{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.995276334435522, "eval_steps": 50, "global_step": 5290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.001889466225791214, "grad_norm": 0.0, "learning_rate": 0.0, "logits": -1.2554917335510254, "logps": -90.04276275634766, "loss": 0.0748, "objective": 0.07446074485778809, "ranking_idealized": 0.4375, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.005146026611328125, "step": 1 }, { "dpo_loss": 0.6931471824645996, "epoch": 0.00944733112895607, "grad_norm": 0.0, "learning_rate": 0.0, "logits": -1.3566728830337524, "logps": -93.35393524169922, "loss": 0.0753, "objective": 0.07558518648147583, "ranking_idealized": 0.5859375, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.006270468235015869, "step": 5 }, { "dpo_loss": 0.6931472420692444, "epoch": 0.01889466225791214, "grad_norm": 96.03528762444088, "learning_rate": 4.7258979206049145e-09, "logits": -1.314980149269104, "logps": -92.82933044433594, "loss": 0.0748, "objective": 0.074301578104496, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.004986858461052179, "step": 10 }, { "dpo_loss": 0.6931472420692444, "epoch": 0.02834199338686821, "grad_norm": 106.03244133035287, "learning_rate": 8.506616257088846e-09, "logits": -1.303232192993164, "logps": -91.14088439941406, "loss": 0.0751, "objective": 0.07503427565097809, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.005719560198485851, "step": 15 }, { "dpo_loss": 0.6926484704017639, "epoch": 0.03778932451582428, "grad_norm": 96.31144733105825, "learning_rate": 1.323251417769376e-08, "logits": -1.454129934310913, "logps": -93.50907897949219, "loss": 0.0796, "objective": 0.07954580336809158, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.01028094906359911, "step": 20 }, { "dpo_loss": 0.6928218007087708, "epoch": 0.04723665564478035, "grad_norm": 92.14233006839046, "learning_rate": 1.7958412098298676e-08, "logits": -1.3390535116195679, "logps": -90.30491638183594, "loss": 0.08, "objective": 0.07990650832653046, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.01062433235347271, "step": 25 }, { "dpo_loss": 0.6925551295280457, "epoch": 0.05668398677373642, "grad_norm": 91.05770478423896, "learning_rate": 2.268431001890359e-08, "logits": -1.3069889545440674, "logps": -91.43885803222656, "loss": 0.0801, "objective": 0.08068785816431046, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.011432340368628502, "step": 30 }, { "dpo_loss": 0.6934799551963806, "epoch": 0.06613131790269249, "grad_norm": 95.27624930021037, "learning_rate": 2.7410207939508506e-08, "logits": -1.283834457397461, "logps": -92.07040405273438, "loss": 0.081, "objective": 0.08080057054758072, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.011452572420239449, "step": 35 }, { "dpo_loss": 0.6932560205459595, "epoch": 0.07557864903164856, "grad_norm": 95.669774962342, "learning_rate": 3.213610586011342e-08, "logits": -1.4033466577529907, "logps": -92.30833435058594, "loss": 0.0809, "objective": 0.08118347078561783, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.011857859790325165, "step": 40 }, { "dpo_loss": 0.6935173273086548, "epoch": 0.08502598016060463, "grad_norm": 87.06329457602646, "learning_rate": 3.6862003780718335e-08, "logits": -1.3774633407592773, "logps": -91.71311950683594, "loss": 0.0798, "objective": 0.07959654182195663, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.010244803503155708, "step": 45 }, { "dpo_loss": 0.6932135820388794, "epoch": 0.0944733112895607, "grad_norm": 90.79462025433014, "learning_rate": 4.158790170132325e-08, "logits": -1.343457579612732, "logps": -91.4620132446289, "loss": 0.0798, "objective": 0.0787525326013565, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.00943115632981062, "step": 50 }, { "epoch": 0.0944733112895607, "eval_dpo_loss": 0.6932454705238342, "eval_logits": -1.3072285652160645, "eval_logps": -98.50404357910156, "eval_loss": 0.08073805272579193, "eval_objective": 0.08079613000154495, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.01147159282118082, "eval_runtime": 159.4296, "eval_samples_per_second": 36.317, "eval_steps_per_second": 3.03, "step": 50 }, { "dpo_loss": 0.6931546926498413, "epoch": 0.10392064241851677, "grad_norm": 95.1381387245793, "learning_rate": 4.6313799621928164e-08, "logits": -1.3497530221939087, "logps": -91.14179229736328, "loss": 0.0799, "objective": 0.08008146286010742, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.010765998624265194, "step": 55 }, { "dpo_loss": 0.6940718293190002, "epoch": 0.11336797354747284, "grad_norm": 85.84378688398391, "learning_rate": 5.103969754253308e-08, "logits": -1.326425313949585, "logps": -91.68214416503906, "loss": 0.08, "objective": 0.08076535165309906, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.011358163319528103, "step": 60 }, { "dpo_loss": 0.6940113306045532, "epoch": 0.12281530467642891, "grad_norm": 94.98552874921538, "learning_rate": 5.576559546313799e-08, "logits": -1.3491050004959106, "logps": -91.77569580078125, "loss": 0.0814, "objective": 0.08189814537763596, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.012497019954025745, "step": 65 }, { "dpo_loss": 0.693263828754425, "epoch": 0.13226263580538497, "grad_norm": 107.33910773824219, "learning_rate": 6.049149338374291e-08, "logits": -1.3523788452148438, "logps": -91.95494842529297, "loss": 0.0801, "objective": 0.08068925887346268, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.01136286836117506, "step": 70 }, { "dpo_loss": 0.6932634115219116, "epoch": 0.14170996693434104, "grad_norm": 97.18132543333557, "learning_rate": 6.521739130434782e-08, "logits": -1.3806952238082886, "logps": -93.421630859375, "loss": 0.0805, "objective": 0.08125968277454376, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.011933336034417152, "step": 75 }, { "dpo_loss": 0.6934519410133362, "epoch": 0.1511572980632971, "grad_norm": 90.58288269347415, "learning_rate": 6.994328922495274e-08, "logits": -1.2946566343307495, "logps": -93.24769592285156, "loss": 0.0808, "objective": 0.08052171766757965, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.011176527477800846, "step": 80 }, { "dpo_loss": 0.6921312212944031, "epoch": 0.16060462919225318, "grad_norm": 88.36217330236191, "learning_rate": 7.466918714555766e-08, "logits": -1.3575432300567627, "logps": -92.65788269042969, "loss": 0.0806, "objective": 0.08070734888315201, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.011494224891066551, "step": 85 }, { "dpo_loss": 0.6927862167358398, "epoch": 0.17005196032120926, "grad_norm": 105.19633396106833, "learning_rate": 7.939508506616256e-08, "logits": -1.2258633375167847, "logps": -93.01042175292969, "loss": 0.0798, "objective": 0.07944050431251526, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.01016187947243452, "step": 90 }, { "dpo_loss": 0.6920500993728638, "epoch": 0.17949929145016533, "grad_norm": 96.30196423359166, "learning_rate": 8.412098298676749e-08, "logits": -1.3657176494598389, "logps": -89.35536193847656, "loss": 0.0807, "objective": 0.08035007864236832, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.011145063675940037, "step": 95 }, { "dpo_loss": 0.6925091743469238, "epoch": 0.1889466225791214, "grad_norm": 93.35552835482645, "learning_rate": 8.88468809073724e-08, "logits": -1.4030728340148926, "logps": -93.86295318603516, "loss": 0.081, "objective": 0.08110513538122177, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.543749988079071, "regularize": 0.01185421273112297, "step": 100 }, { "epoch": 0.1889466225791214, "eval_dpo_loss": 0.6933540105819702, "eval_logits": -1.3084157705307007, "eval_logps": -98.49323272705078, "eval_loss": 0.0818963274359703, "eval_objective": 0.08186686784029007, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.012531458400189877, "eval_runtime": 158.2662, "eval_samples_per_second": 36.584, "eval_steps_per_second": 3.052, "step": 100 }, { "dpo_loss": 0.691580593585968, "epoch": 0.19839395370807747, "grad_norm": 91.03790534738991, "learning_rate": 9.357277882797732e-08, "logits": -1.3384134769439697, "logps": -91.71461486816406, "loss": 0.0805, "objective": 0.08078125864267349, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.011623199097812176, "step": 105 }, { "dpo_loss": 0.6921985149383545, "epoch": 0.20784128483703354, "grad_norm": 93.5398896154081, "learning_rate": 9.829867674858222e-08, "logits": -1.2784953117370605, "logps": -91.62667083740234, "loss": 0.081, "objective": 0.0812302827835083, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.012010429054498672, "step": 110 }, { "dpo_loss": 0.6925197839736938, "epoch": 0.2172886159659896, "grad_norm": 114.1699761501263, "learning_rate": 1.0302457466918714e-07, "logits": -1.321418285369873, "logps": -93.04322814941406, "loss": 0.0809, "objective": 0.08117768913507462, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.01192571222782135, "step": 115 }, { "dpo_loss": 0.691599428653717, "epoch": 0.22673594709494568, "grad_norm": 100.677720262526, "learning_rate": 1.0775047258979206e-07, "logits": -1.3501144647598267, "logps": -91.29630279541016, "loss": 0.0822, "objective": 0.08226821571588516, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.013108278624713421, "step": 120 }, { "dpo_loss": 0.6913750767707825, "epoch": 0.23618327822390175, "grad_norm": 107.79833398829818, "learning_rate": 1.1247637051039697e-07, "logits": -1.3559753894805908, "logps": -91.870849609375, "loss": 0.0816, "objective": 0.08255833387374878, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.013420837000012398, "step": 125 }, { "dpo_loss": 0.6917082667350769, "epoch": 0.24563060935285783, "grad_norm": 87.3599630697971, "learning_rate": 1.1720226843100187e-07, "logits": -1.3852940797805786, "logps": -93.1257095336914, "loss": 0.0821, "objective": 0.08141469210386276, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.012243862263858318, "step": 130 }, { "dpo_loss": 0.6916366815567017, "epoch": 0.25507794048181387, "grad_norm": 85.96528652950495, "learning_rate": 1.219281663516068e-07, "logits": -1.4204727411270142, "logps": -91.48652648925781, "loss": 0.0819, "objective": 0.08242340385913849, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.013259735889732838, "step": 135 }, { "dpo_loss": 0.6917210221290588, "epoch": 0.26452527161076994, "grad_norm": 87.464567760636, "learning_rate": 1.266540642722117e-07, "logits": -1.4328950643539429, "logps": -91.34432220458984, "loss": 0.0815, "objective": 0.08194929361343384, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.012777194380760193, "step": 140 }, { "dpo_loss": 0.6909077167510986, "epoch": 0.273972602739726, "grad_norm": 98.17150093607263, "learning_rate": 1.3137996219281664e-07, "logits": -1.2796088457107544, "logps": -94.21308898925781, "loss": 0.0841, "objective": 0.08374190330505371, "ranking_idealized": 0.4312500059604645, "ranking_idealized_expo": 0.41874998807907104, "ranking_simple": 0.41874998807907104, "regularize": 0.014651129022240639, "step": 145 }, { "dpo_loss": 0.6928467154502869, "epoch": 0.2834199338686821, "grad_norm": 99.65212586497037, "learning_rate": 1.3610586011342153e-07, "logits": -1.299617052078247, "logps": -88.99101257324219, "loss": 0.0839, "objective": 0.08220638334751129, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.012921703048050404, "step": 150 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6930533647537231, "eval_logits": -1.3078515529632568, "eval_logps": -98.5417251586914, "eval_loss": 0.08232778310775757, "eval_objective": 0.08233249187469482, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.013027146458625793, "eval_runtime": 157.9508, "eval_samples_per_second": 36.657, "eval_steps_per_second": 3.058, "step": 150 }, { "dpo_loss": 0.6929137110710144, "epoch": 0.29286726499763815, "grad_norm": 118.8781397472593, "learning_rate": 1.4083175803402647e-07, "logits": -1.4878828525543213, "logps": -93.26710510253906, "loss": 0.0856, "objective": 0.08600855618715286, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.016717184334993362, "step": 155 }, { "dpo_loss": 0.6932097673416138, "epoch": 0.3023145961265942, "grad_norm": 97.86517664421345, "learning_rate": 1.455576559546314e-07, "logits": -1.348954200744629, "logps": -93.52423858642578, "loss": 0.0843, "objective": 0.0843389481306076, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.015017963945865631, "step": 160 }, { "dpo_loss": 0.6927060484886169, "epoch": 0.3117619272555503, "grad_norm": 96.31846370142493, "learning_rate": 1.5028355387523628e-07, "logits": -1.3225867748260498, "logps": -92.67644500732422, "loss": 0.0848, "objective": 0.08455438911914825, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.01528378389775753, "step": 165 }, { "dpo_loss": 0.6917023658752441, "epoch": 0.32120925838450637, "grad_norm": 104.27989577822625, "learning_rate": 1.5500945179584122e-07, "logits": -1.3173997402191162, "logps": -90.8909912109375, "loss": 0.085, "objective": 0.08586680889129639, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.01669657602906227, "step": 170 }, { "dpo_loss": 0.6916298270225525, "epoch": 0.33065658951346244, "grad_norm": 83.6068402947374, "learning_rate": 1.597353497164461e-07, "logits": -1.3807752132415771, "logps": -92.2252426147461, "loss": 0.0851, "objective": 0.08548159152269363, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.016318608075380325, "step": 175 }, { "dpo_loss": 0.691738486289978, "epoch": 0.3401039206424185, "grad_norm": 105.18661952420578, "learning_rate": 1.6446124763705102e-07, "logits": -1.3236361742019653, "logps": -92.10786437988281, "loss": 0.0869, "objective": 0.08777324855327606, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.01859939657151699, "step": 180 }, { "dpo_loss": 0.6920045018196106, "epoch": 0.3495512517713746, "grad_norm": 96.87931928806626, "learning_rate": 1.6918714555765596e-07, "logits": -1.3653669357299805, "logps": -94.2525405883789, "loss": 0.0866, "objective": 0.08760502189397812, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.4625000059604645, "regularize": 0.01840457320213318, "step": 185 }, { "dpo_loss": 0.6901124715805054, "epoch": 0.35899858290033065, "grad_norm": 101.71062430711056, "learning_rate": 1.7391304347826085e-07, "logits": -1.2960126399993896, "logps": -94.1360855102539, "loss": 0.0873, "objective": 0.08737105876207352, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.018359806388616562, "step": 190 }, { "dpo_loss": 0.693382740020752, "epoch": 0.3684459140292867, "grad_norm": 108.0065626001207, "learning_rate": 1.786389413988658e-07, "logits": -1.3760709762573242, "logps": -92.12065124511719, "loss": 0.088, "objective": 0.08760654181241989, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.018268268555402756, "step": 195 }, { "dpo_loss": 0.6914373636245728, "epoch": 0.3778932451582428, "grad_norm": 124.97011802709235, "learning_rate": 1.833648393194707e-07, "logits": -1.4247692823410034, "logps": -92.43400573730469, "loss": 0.0891, "objective": 0.09008407592773438, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.02094034105539322, "step": 200 }, { "epoch": 0.3778932451582428, "eval_dpo_loss": 0.6926960945129395, "eval_logits": -1.3062703609466553, "eval_logps": -98.65170288085938, "eval_loss": 0.08403145521879196, "eval_objective": 0.0839371606707573, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.014667555689811707, "eval_runtime": 158.3979, "eval_samples_per_second": 36.554, "eval_steps_per_second": 3.049, "step": 200 }, { "dpo_loss": 0.6928633451461792, "epoch": 0.38734057628719887, "grad_norm": 102.87640442387767, "learning_rate": 1.880907372400756e-07, "logits": -1.2827486991882324, "logps": -94.01448059082031, "loss": 0.089, "objective": 0.0891382023692131, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.019851867109537125, "step": 205 }, { "dpo_loss": 0.6920486688613892, "epoch": 0.39678790741615494, "grad_norm": 90.49515192046192, "learning_rate": 1.9281663516068053e-07, "logits": -1.3010823726654053, "logps": -92.64601135253906, "loss": 0.0896, "objective": 0.09058623015880585, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.021381361410021782, "step": 210 }, { "dpo_loss": 0.6928970813751221, "epoch": 0.406235238545111, "grad_norm": 110.965752990135, "learning_rate": 1.9754253308128542e-07, "logits": -1.3557052612304688, "logps": -93.74603271484375, "loss": 0.0935, "objective": 0.09472814947366714, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.02543843351304531, "step": 215 }, { "dpo_loss": 0.6901801824569702, "epoch": 0.4156825696740671, "grad_norm": 111.3886176276061, "learning_rate": 2.0226843100189034e-07, "logits": -1.4235761165618896, "logps": -94.99393463134766, "loss": 0.0967, "objective": 0.09937222301959991, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.030354226008057594, "step": 220 }, { "dpo_loss": 0.6928871870040894, "epoch": 0.42512990080302315, "grad_norm": 92.08065964120154, "learning_rate": 2.0699432892249528e-07, "logits": -1.3547779321670532, "logps": -92.96757507324219, "loss": 0.0922, "objective": 0.09014561027288437, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.020856894552707672, "step": 225 }, { "dpo_loss": 0.6924599409103394, "epoch": 0.4345772319319792, "grad_norm": 100.38486388141452, "learning_rate": 2.1172022684310017e-07, "logits": -1.351255178451538, "logps": -91.6011734008789, "loss": 0.0928, "objective": 0.0948859453201294, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.02563994750380516, "step": 230 }, { "dpo_loss": 0.6900066137313843, "epoch": 0.4440245630609353, "grad_norm": 97.70828365629224, "learning_rate": 2.164461247637051e-07, "logits": -1.4029417037963867, "logps": -92.07453918457031, "loss": 0.0961, "objective": 0.09497665613889694, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.025975991040468216, "step": 235 }, { "dpo_loss": 0.6901751756668091, "epoch": 0.45347189418989137, "grad_norm": 110.99658377067563, "learning_rate": 2.2117202268431002e-07, "logits": -1.3475522994995117, "logps": -93.2119140625, "loss": 0.0951, "objective": 0.0938529521226883, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48124998807907104, "regularize": 0.024835431948304176, "step": 240 }, { "dpo_loss": 0.691163957118988, "epoch": 0.46291922531884744, "grad_norm": 100.7251612930126, "learning_rate": 2.258979206049149e-07, "logits": -1.4657642841339111, "logps": -92.83688354492188, "loss": 0.0988, "objective": 0.09848640859127045, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.029370009899139404, "step": 245 }, { "dpo_loss": 0.689066469669342, "epoch": 0.4723665564478035, "grad_norm": 132.46845729407312, "learning_rate": 2.3062381852551985e-07, "logits": -1.3548660278320312, "logps": -93.12916564941406, "loss": 0.1019, "objective": 0.09985167533159256, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48750001192092896, "regularize": 0.03094502165913582, "step": 250 }, { "epoch": 0.4723665564478035, "eval_dpo_loss": 0.6928930878639221, "eval_logits": -1.3058041334152222, "eval_logps": -98.67533111572266, "eval_loss": 0.08647485077381134, "eval_objective": 0.08637857437133789, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.01708926260471344, "eval_runtime": 158.8308, "eval_samples_per_second": 36.454, "eval_steps_per_second": 3.041, "step": 250 }, { "dpo_loss": 0.6961080431938171, "epoch": 0.4818138875767596, "grad_norm": 103.43649747338549, "learning_rate": 2.3534971644612476e-07, "logits": -1.4218876361846924, "logps": -91.47744750976562, "loss": 0.0992, "objective": 0.09804344922304153, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.028432641178369522, "step": 255 }, { "dpo_loss": 0.6928529739379883, "epoch": 0.49126121870571565, "grad_norm": 89.70604380119666, "learning_rate": 2.400756143667297e-07, "logits": -1.355158805847168, "logps": -94.02273559570312, "loss": 0.1005, "objective": 0.10035960376262665, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.031074311584234238, "step": 260 }, { "dpo_loss": 0.6879535913467407, "epoch": 0.5007085498346717, "grad_norm": 95.56790417868868, "learning_rate": 2.448015122873346e-07, "logits": -1.2801698446273804, "logps": -93.42267608642578, "loss": 0.1003, "objective": 0.09648537635803223, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.027690013870596886, "step": 265 }, { "dpo_loss": 0.6904926300048828, "epoch": 0.5101558809636277, "grad_norm": 100.34085108311201, "learning_rate": 2.495274102079395e-07, "logits": -1.2917563915252686, "logps": -92.55985260009766, "loss": 0.1033, "objective": 0.10361208021640778, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.034562818706035614, "step": 270 }, { "dpo_loss": 0.6918493509292603, "epoch": 0.5196032120925839, "grad_norm": 102.50008848820325, "learning_rate": 2.542533081285444e-07, "logits": -1.304248571395874, "logps": -93.20156860351562, "loss": 0.1024, "objective": 0.1000850573182106, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.030900120735168457, "step": 275 }, { "dpo_loss": 0.6872326731681824, "epoch": 0.5290505432215399, "grad_norm": 91.85551703188915, "learning_rate": 2.589792060491493e-07, "logits": -1.4421064853668213, "logps": -91.59370422363281, "loss": 0.1063, "objective": 0.10864508152008057, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.0399218313395977, "step": 280 }, { "dpo_loss": 0.6879855394363403, "epoch": 0.538497874350496, "grad_norm": 102.22439237075963, "learning_rate": 2.6370510396975425e-07, "logits": -1.3105735778808594, "logps": -92.08065032958984, "loss": 0.1029, "objective": 0.101453498005867, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.03265494108200073, "step": 285 }, { "dpo_loss": 0.6928714513778687, "epoch": 0.547945205479452, "grad_norm": 97.93507304637171, "learning_rate": 2.6843100189035917e-07, "logits": -1.302470326423645, "logps": -91.57052612304688, "loss": 0.1071, "objective": 0.10190291702747345, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.03261576220393181, "step": 290 }, { "dpo_loss": 0.6913698315620422, "epoch": 0.5573925366084082, "grad_norm": 93.2934267056007, "learning_rate": 2.731568998109641e-07, "logits": -1.309118628501892, "logps": -91.43770599365234, "loss": 0.1105, "objective": 0.11521486192941666, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.04607786983251572, "step": 295 }, { "dpo_loss": 0.689521074295044, "epoch": 0.5668398677373642, "grad_norm": 87.42650287342656, "learning_rate": 2.77882797731569e-07, "logits": -1.3558248281478882, "logps": -93.9669418334961, "loss": 0.1094, "objective": 0.11233736574649811, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.04338525980710983, "step": 300 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6925599575042725, "eval_logits": -1.308714509010315, "eval_logps": -98.34481811523438, "eval_loss": 0.09280507266521454, "eval_objective": 0.09301475435495377, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.02375875413417816, "eval_runtime": 160.5672, "eval_samples_per_second": 36.06, "eval_steps_per_second": 3.008, "step": 300 }, { "dpo_loss": 0.6917223930358887, "epoch": 0.5762871988663203, "grad_norm": 111.48363245589638, "learning_rate": 2.8260869565217386e-07, "logits": -1.283851981163025, "logps": -92.88015747070312, "loss": 0.1157, "objective": 0.11618302762508392, "ranking_idealized": 0.45625001192092896, "ranking_idealized_expo": 0.4312500059604645, "ranking_simple": 0.4312500059604645, "regularize": 0.047010790556669235, "step": 305 }, { "dpo_loss": 0.6910533905029297, "epoch": 0.5857345299952763, "grad_norm": 122.66031310330784, "learning_rate": 2.873345935727788e-07, "logits": -1.3154358863830566, "logps": -91.87257385253906, "loss": 0.1096, "objective": 0.10647080838680267, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.606249988079071, "ranking_simple": 0.612500011920929, "regularize": 0.03736545890569687, "step": 310 }, { "dpo_loss": 0.6930454969406128, "epoch": 0.5951818611242324, "grad_norm": 97.36343792538779, "learning_rate": 2.911153119092628e-07, "logits": -1.2765394449234009, "logps": -93.13780212402344, "loss": 0.1144, "objective": 0.1138184517621994, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.044513899832963943, "step": 315 }, { "dpo_loss": 0.689300000667572, "epoch": 0.6046291922531885, "grad_norm": 101.14091961784702, "learning_rate": 2.9584120982986764e-07, "logits": -1.3148740530014038, "logps": -91.87626647949219, "loss": 0.1129, "objective": 0.1134437695145607, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4749999940395355, "regularize": 0.0445137694478035, "step": 320 }, { "dpo_loss": 0.6846424341201782, "epoch": 0.6140765233821446, "grad_norm": 125.67390238555228, "learning_rate": 3.0056710775047255e-07, "logits": -1.3823215961456299, "logps": -91.69265747070312, "loss": 0.1176, "objective": 0.12137912213802338, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.05291489511728287, "step": 325 }, { "dpo_loss": 0.690563440322876, "epoch": 0.6235238545111006, "grad_norm": 103.15774875968413, "learning_rate": 3.0529300567107747e-07, "logits": -1.376556396484375, "logps": -92.85224914550781, "loss": 0.1143, "objective": 0.11595580726861954, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.0468994677066803, "step": 330 }, { "dpo_loss": 0.6915899515151978, "epoch": 0.6329711856400567, "grad_norm": 93.95463269073304, "learning_rate": 3.1001890359168243e-07, "logits": -1.3574047088623047, "logps": -93.62723541259766, "loss": 0.1184, "objective": 0.11612270027399063, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.04696370288729668, "step": 335 }, { "dpo_loss": 0.6864021420478821, "epoch": 0.6424185167690127, "grad_norm": 98.76696875690139, "learning_rate": 3.1474480151228735e-07, "logits": -1.3963720798492432, "logps": -92.58355712890625, "loss": 0.1177, "objective": 0.11631828546524048, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.04767807573080063, "step": 340 }, { "dpo_loss": 0.688799262046814, "epoch": 0.6518658478979689, "grad_norm": 95.66225766173847, "learning_rate": 3.194706994328922e-07, "logits": -1.336104154586792, "logps": -91.61724090576172, "loss": 0.1219, "objective": 0.12555508315563202, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.05667515844106674, "step": 345 }, { "dpo_loss": 0.6907819509506226, "epoch": 0.6613131790269249, "grad_norm": 102.20491139749774, "learning_rate": 3.241965973534971e-07, "logits": -1.2857093811035156, "logps": -94.47968292236328, "loss": 0.1267, "objective": 0.12202408164739609, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.05294587463140488, "step": 350 }, { "epoch": 0.6613131790269249, "eval_dpo_loss": 0.694220244884491, "eval_logits": -1.3096948862075806, "eval_logps": -98.48033142089844, "eval_loss": 0.09951319545507431, "eval_objective": 0.10041753947734833, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.030995529145002365, "eval_runtime": 158.8978, "eval_samples_per_second": 36.439, "eval_steps_per_second": 3.04, "step": 350 }, { "dpo_loss": 0.6890446543693542, "epoch": 0.670760510155881, "grad_norm": 107.47087696174563, "learning_rate": 3.2892249527410204e-07, "logits": -1.3915925025939941, "logps": -93.3402099609375, "loss": 0.1234, "objective": 0.12433197349309921, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.055427514016628265, "step": 355 }, { "dpo_loss": 0.6911527514457703, "epoch": 0.680207841284837, "grad_norm": 102.70324138647139, "learning_rate": 3.33648393194707e-07, "logits": -1.3372268676757812, "logps": -92.9881591796875, "loss": 0.121, "objective": 0.12949387729167938, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.518750011920929, "regularize": 0.06037859991192818, "step": 360 }, { "dpo_loss": 0.6866102814674377, "epoch": 0.6896551724137931, "grad_norm": 88.6752156599502, "learning_rate": 3.383742911153119e-07, "logits": -1.3452448844909668, "logps": -93.36683654785156, "loss": 0.1229, "objective": 0.12604525685310364, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.05738421529531479, "step": 365 }, { "dpo_loss": 0.6896581053733826, "epoch": 0.6991025035427492, "grad_norm": 91.19850585461762, "learning_rate": 3.431001890359168e-07, "logits": -1.3197768926620483, "logps": -91.77013397216797, "loss": 0.1276, "objective": 0.11843843758106232, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.0494726225733757, "step": 370 }, { "dpo_loss": 0.691429615020752, "epoch": 0.7085498346717053, "grad_norm": 113.5680710791941, "learning_rate": 3.478260869565217e-07, "logits": -1.2648178339004517, "logps": -91.7554702758789, "loss": 0.1275, "objective": 0.12085701525211334, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.05171404033899307, "step": 375 }, { "dpo_loss": 0.6935691833496094, "epoch": 0.7179971658006613, "grad_norm": 108.42021457365978, "learning_rate": 3.525519848771266e-07, "logits": -1.4277188777923584, "logps": -91.43479919433594, "loss": 0.1292, "objective": 0.12868951261043549, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.05933259800076485, "step": 380 }, { "dpo_loss": 0.6908777952194214, "epoch": 0.7274444969296174, "grad_norm": 112.83132769591303, "learning_rate": 3.572778827977316e-07, "logits": -1.428979516029358, "logps": -94.34440612792969, "loss": 0.1285, "objective": 0.126731738448143, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.05764396861195564, "step": 385 }, { "dpo_loss": 0.6864285469055176, "epoch": 0.7368918280585735, "grad_norm": 86.03614856711587, "learning_rate": 3.620037807183365e-07, "logits": -1.3407856225967407, "logps": -92.52519226074219, "loss": 0.1402, "objective": 0.14342442154884338, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.4437499940395355, "regularize": 0.07478158175945282, "step": 390 }, { "dpo_loss": 0.6894232034683228, "epoch": 0.7463391591875296, "grad_norm": 90.02582661821339, "learning_rate": 3.667296786389414e-07, "logits": -1.450365424156189, "logps": -94.80558776855469, "loss": 0.1342, "objective": 0.13283126056194305, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.06388893723487854, "step": 395 }, { "dpo_loss": 0.6896533370018005, "epoch": 0.7557864903164856, "grad_norm": 111.61106880900202, "learning_rate": 3.7145557655954627e-07, "logits": -1.3269095420837402, "logps": -92.5389633178711, "loss": 0.1414, "objective": 0.13954514265060425, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.07057979702949524, "step": 400 }, { "epoch": 0.7557864903164856, "eval_dpo_loss": 0.6920130848884583, "eval_logits": -1.3138436079025269, "eval_logps": -98.69994354248047, "eval_loss": 0.1019834652543068, "eval_objective": 0.10271409153938293, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 0.033512771129608154, "eval_runtime": 158.4351, "eval_samples_per_second": 36.545, "eval_steps_per_second": 3.049, "step": 400 }, { "dpo_loss": 0.6916826367378235, "epoch": 0.7652338214454416, "grad_norm": 103.02854860900352, "learning_rate": 3.761814744801512e-07, "logits": -1.463388204574585, "logps": -93.37408447265625, "loss": 0.1505, "objective": 0.1511785387992859, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.45625001192092896, "regularize": 0.08201026916503906, "step": 405 }, { "dpo_loss": 0.6840475797653198, "epoch": 0.7746811525743977, "grad_norm": 100.25779449592062, "learning_rate": 3.809073724007561e-07, "logits": -1.330974817276001, "logps": -91.45512390136719, "loss": 0.1453, "objective": 0.1453799307346344, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.07697516679763794, "step": 410 }, { "dpo_loss": 0.6831516027450562, "epoch": 0.7841284837033538, "grad_norm": 114.27467092070809, "learning_rate": 3.8563327032136107e-07, "logits": -1.4295470714569092, "logps": -94.0125961303711, "loss": 0.165, "objective": 0.15841113030910492, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5062500238418579, "regularize": 0.09009597450494766, "step": 415 }, { "dpo_loss": 0.6889381408691406, "epoch": 0.7935758148323099, "grad_norm": 99.17563424148113, "learning_rate": 3.90359168241966e-07, "logits": -1.3562965393066406, "logps": -91.68006896972656, "loss": 0.1444, "objective": 0.14152219891548157, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.07262839376926422, "step": 420 }, { "dpo_loss": 0.6892581582069397, "epoch": 0.8030231459612659, "grad_norm": 100.05408578170393, "learning_rate": 3.9508506616257084e-07, "logits": -1.3055548667907715, "logps": -91.72136688232422, "loss": 0.1538, "objective": 0.14655420184135437, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.40625, "ranking_simple": 0.4000000059604645, "regularize": 0.07762838155031204, "step": 425 }, { "dpo_loss": 0.6880292892456055, "epoch": 0.812470477090222, "grad_norm": 97.66357035709157, "learning_rate": 3.9981096408317576e-07, "logits": -1.3861249685287476, "logps": -95.13639831542969, "loss": 0.1585, "objective": 0.14511282742023468, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.07630989700555801, "step": 430 }, { "dpo_loss": 0.6850839257240295, "epoch": 0.821917808219178, "grad_norm": 116.23193822053362, "learning_rate": 4.0453686200378067e-07, "logits": -1.4044657945632935, "logps": -91.6175765991211, "loss": 0.1652, "objective": 0.1648859977722168, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.09637759625911713, "step": 435 }, { "dpo_loss": 0.6872573494911194, "epoch": 0.8313651393481342, "grad_norm": 101.47384172618189, "learning_rate": 4.0926275992438564e-07, "logits": -1.3521515130996704, "logps": -92.96221923828125, "loss": 0.1529, "objective": 0.1533818542957306, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.512499988079071, "regularize": 0.08465610444545746, "step": 440 }, { "dpo_loss": 0.6908755898475647, "epoch": 0.8408124704770902, "grad_norm": 91.82268216912694, "learning_rate": 4.1398865784499055e-07, "logits": -1.3667224645614624, "logps": -90.73509979248047, "loss": 0.1527, "objective": 0.14873163402080536, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.0796440839767456, "step": 445 }, { "dpo_loss": 0.7019797563552856, "epoch": 0.8502598016060463, "grad_norm": 100.61548180012701, "learning_rate": 4.1871455576559547e-07, "logits": -1.3356683254241943, "logps": -92.12760162353516, "loss": 0.156, "objective": 0.1631532460451126, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.09295526146888733, "step": 450 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6917065382003784, "eval_logits": -1.3000538349151611, "eval_logps": -98.69611358642578, "eval_loss": 0.11016573011875153, "eval_objective": 0.11068187654018402, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.041511211544275284, "eval_runtime": 158.2413, "eval_samples_per_second": 36.59, "eval_steps_per_second": 3.052, "step": 450 }, { "dpo_loss": 0.6912184357643127, "epoch": 0.8597071327350023, "grad_norm": 103.74989373255174, "learning_rate": 4.2344045368620033e-07, "logits": -1.311997413635254, "logps": -91.93920135498047, "loss": 0.1648, "objective": 0.16276951134204865, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.09364765137434006, "step": 455 }, { "dpo_loss": 0.6932337880134583, "epoch": 0.8691544638639584, "grad_norm": 99.03071397788196, "learning_rate": 4.2816635160680524e-07, "logits": -1.3874913454055786, "logps": -92.34394836425781, "loss": 0.1556, "objective": 0.15355022251605988, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.08422684669494629, "step": 460 }, { "dpo_loss": 0.6835826635360718, "epoch": 0.8786017949929145, "grad_norm": 84.6790640072118, "learning_rate": 4.328922495274102e-07, "logits": -1.3000524044036865, "logps": -91.39022064208984, "loss": 0.154, "objective": 0.15162508189678192, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.08326681703329086, "step": 465 }, { "dpo_loss": 0.6978387832641602, "epoch": 0.8880491261218706, "grad_norm": 107.18167519452933, "learning_rate": 4.3761814744801513e-07, "logits": -1.3547214269638062, "logps": -90.24058532714844, "loss": 0.1673, "objective": 0.156182661652565, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.08639879524707794, "step": 470 }, { "dpo_loss": 0.68897944688797, "epoch": 0.8974964572508266, "grad_norm": 94.54535920638504, "learning_rate": 4.4234404536862004e-07, "logits": -1.3638577461242676, "logps": -91.9796371459961, "loss": 0.1689, "objective": 0.16064883768558502, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48124998807907104, "regularize": 0.09175089001655579, "step": 475 }, { "dpo_loss": 0.690127968788147, "epoch": 0.9069437883797827, "grad_norm": 96.98421932494537, "learning_rate": 4.470699432892249e-07, "logits": -1.296614646911621, "logps": -93.49898529052734, "loss": 0.1654, "objective": 0.16247372329235077, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5874999761581421, "regularize": 0.09346093982458115, "step": 480 }, { "dpo_loss": 0.6874816417694092, "epoch": 0.9163911195087387, "grad_norm": 109.5139852284968, "learning_rate": 4.517958412098298e-07, "logits": -1.393359661102295, "logps": -95.6229019165039, "loss": 0.1731, "objective": 0.16404297947883606, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.09529478847980499, "step": 485 }, { "dpo_loss": 0.6885187029838562, "epoch": 0.9258384506376949, "grad_norm": 112.83140218708049, "learning_rate": 4.5652173913043473e-07, "logits": -1.2763055562973022, "logps": -93.39484405517578, "loss": 0.1892, "objective": 0.18885581195354462, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48750001192092896, "regularize": 0.12000392377376556, "step": 490 }, { "dpo_loss": 0.7008720636367798, "epoch": 0.9352857817666509, "grad_norm": 105.14659658145943, "learning_rate": 4.612476370510397e-07, "logits": -1.2833281755447388, "logps": -92.49003601074219, "loss": 0.1824, "objective": 0.1885298490524292, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5562499761581421, "regularize": 0.11844261735677719, "step": 495 }, { "dpo_loss": 0.7026808261871338, "epoch": 0.944733112895607, "grad_norm": 104.20649038619608, "learning_rate": 4.659735349716446e-07, "logits": -1.2983553409576416, "logps": -93.92900085449219, "loss": 0.1843, "objective": 0.1905163675546646, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.53125, "regularize": 0.12024825811386108, "step": 500 }, { "epoch": 0.944733112895607, "eval_dpo_loss": 0.6934216022491455, "eval_logits": -1.2985299825668335, "eval_logps": -98.3685073852539, "eval_loss": 0.14251917600631714, "eval_objective": 0.14164087176322937, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.07229872047901154, "eval_runtime": 158.351, "eval_samples_per_second": 36.564, "eval_steps_per_second": 3.05, "step": 500 }, { "dpo_loss": 0.6825562715530396, "epoch": 0.954180444024563, "grad_norm": 87.49461844052641, "learning_rate": 4.7069943289224953e-07, "logits": -1.3763777017593384, "logps": -92.05134582519531, "loss": 0.1862, "objective": 0.1869305521249771, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.11867489665746689, "step": 505 }, { "dpo_loss": 0.7000191807746887, "epoch": 0.9636277751535192, "grad_norm": 99.68210245118057, "learning_rate": 4.754253308128544e-07, "logits": -1.3264052867889404, "logps": -93.01531982421875, "loss": 0.1855, "objective": 0.18150749802589417, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.550000011920929, "regularize": 0.11150559037923813, "step": 510 }, { "dpo_loss": 0.6989023685455322, "epoch": 0.9730751062824752, "grad_norm": 100.37728881230282, "learning_rate": 4.801512287334594e-07, "logits": -1.3409388065338135, "logps": -92.48466491699219, "loss": 0.1815, "objective": 0.17680883407592773, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5562499761581421, "regularize": 0.1069185882806778, "step": 515 }, { "dpo_loss": 0.6951934695243835, "epoch": 0.9825224374114313, "grad_norm": 109.03096603881598, "learning_rate": 4.848771266540643e-07, "logits": -1.4067871570587158, "logps": -93.4749984741211, "loss": 0.1818, "objective": 0.18318508565425873, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.581250011920929, "regularize": 0.11366574466228485, "step": 520 }, { "dpo_loss": 0.6849699020385742, "epoch": 0.9919697685403873, "grad_norm": 88.97267098704813, "learning_rate": 4.896030245746692e-07, "logits": -1.2617175579071045, "logps": -93.6603775024414, "loss": 0.1789, "objective": 0.17417797446250916, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.10568100214004517, "step": 525 }, { "dpo_loss": 0.6823090314865112, "epoch": 1.0014170996693434, "grad_norm": 89.5976454923623, "learning_rate": 4.943289224952741e-07, "logits": -1.4235401153564453, "logps": -92.53488159179688, "loss": 0.1967, "objective": 0.2006862610578537, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.53125, "regularize": 0.13245537877082825, "step": 530 }, { "dpo_loss": 0.6965185403823853, "epoch": 1.0108644307982995, "grad_norm": 96.30445573525516, "learning_rate": 4.99054820415879e-07, "logits": -1.4111480712890625, "logps": -93.36735534667969, "loss": 0.1921, "objective": 0.19494469463825226, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.1252928525209427, "step": 535 }, { "dpo_loss": 0.6989037394523621, "epoch": 1.0203117619272555, "grad_norm": 97.20685088312915, "learning_rate": 4.999991291705134e-07, "logits": -1.384477138519287, "logps": -93.18318176269531, "loss": 0.2125, "objective": 0.2088538110256195, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5062500238418579, "regularize": 0.13896343111991882, "step": 540 }, { "dpo_loss": 0.6883914470672607, "epoch": 1.0297590930562117, "grad_norm": 90.69429715375348, "learning_rate": 4.999955914361218e-07, "logits": -1.380433201789856, "logps": -93.4023208618164, "loss": 0.2032, "objective": 0.1992042511701584, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.13036508858203888, "step": 545 }, { "dpo_loss": 0.6875864267349243, "epoch": 1.0392064241851677, "grad_norm": 91.50992212902428, "learning_rate": 4.999893324084622e-07, "logits": -1.3838626146316528, "logps": -91.31867980957031, "loss": 0.1954, "objective": 0.2028588354587555, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.13410018384456635, "step": 550 }, { "epoch": 1.0392064241851677, "eval_dpo_loss": 0.6945993900299072, "eval_logits": -1.3153595924377441, "eval_logps": -98.2336196899414, "eval_loss": 0.13877831399440765, "eval_objective": 0.13831962645053864, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.06885971128940582, "eval_runtime": 158.0034, "eval_samples_per_second": 36.645, "eval_steps_per_second": 3.057, "step": 550 }, { "dpo_loss": 0.6934968829154968, "epoch": 1.0486537553141237, "grad_norm": 100.42594403491987, "learning_rate": 4.999803521556664e-07, "logits": -1.3395583629608154, "logps": -90.90914916992188, "loss": 0.2073, "objective": 0.1963643729686737, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.12701468169689178, "step": 555 }, { "dpo_loss": 0.6871433854103088, "epoch": 1.0581010864430798, "grad_norm": 103.31670555425323, "learning_rate": 4.999686507754875e-07, "logits": -1.367850661277771, "logps": -91.52650451660156, "loss": 0.2065, "objective": 0.18664252758026123, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5, "ranking_simple": 0.48750001192092896, "regularize": 0.11792820692062378, "step": 560 }, { "dpo_loss": 0.6820244789123535, "epoch": 1.067548417572036, "grad_norm": 109.55842755689235, "learning_rate": 4.999542283952998e-07, "logits": -1.3654028177261353, "logps": -93.674560546875, "loss": 0.2142, "objective": 0.21235975623130798, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.1441573202610016, "step": 565 }, { "dpo_loss": 0.6821997761726379, "epoch": 1.076995748700992, "grad_norm": 87.6265285859963, "learning_rate": 4.999370851720956e-07, "logits": -1.356442928314209, "logps": -92.73716735839844, "loss": 0.2023, "objective": 0.2152920961380005, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.581250011920929, "regularize": 0.14707210659980774, "step": 570 }, { "dpo_loss": 0.6839498281478882, "epoch": 1.086443079829948, "grad_norm": 94.30475178073908, "learning_rate": 4.999172212924856e-07, "logits": -1.3030678033828735, "logps": -92.93643951416016, "loss": 0.2005, "objective": 0.1996048092842102, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.1312098205089569, "step": 575 }, { "dpo_loss": 0.6901076436042786, "epoch": 1.095890410958904, "grad_norm": 94.21853528392039, "learning_rate": 4.99894636972695e-07, "logits": -1.310119390487671, "logps": -92.42277526855469, "loss": 0.2094, "objective": 0.22037836909294128, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.6187499761581421, "ranking_simple": 0.6187499761581421, "regularize": 0.15136758983135223, "step": 580 }, { "dpo_loss": 0.6884266138076782, "epoch": 1.10533774208786, "grad_norm": 88.706538539552, "learning_rate": 4.998693324585628e-07, "logits": -1.304504632949829, "logps": -93.57672119140625, "loss": 0.2051, "objective": 0.20268912613391876, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.5874999761581421, "regularize": 0.1338464766740799, "step": 585 }, { "dpo_loss": 0.6907263398170471, "epoch": 1.1147850732168163, "grad_norm": 105.11352044037606, "learning_rate": 4.998413080255375e-07, "logits": -1.4110699892044067, "logps": -92.94535827636719, "loss": 0.2072, "objective": 0.19032931327819824, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.12125667184591293, "step": 590 }, { "dpo_loss": 0.6772922277450562, "epoch": 1.1242324043457723, "grad_norm": 95.3174741961889, "learning_rate": 4.998105639786754e-07, "logits": -1.3170404434204102, "logps": -91.73414611816406, "loss": 0.2182, "objective": 0.19899888336658478, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.1312696635723114, "step": 595 }, { "dpo_loss": 0.6846722364425659, "epoch": 1.1336797354747283, "grad_norm": 107.8425091882789, "learning_rate": 4.997771006526367e-07, "logits": -1.3956836462020874, "logps": -93.02611541748047, "loss": 0.2073, "objective": 0.20752784609794617, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.13906064629554749, "step": 600 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.693564236164093, "eval_logits": -1.307108998298645, "eval_logps": -98.72145080566406, "eval_loss": 0.1374109834432602, "eval_objective": 0.13693860173225403, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.06758218258619308, "eval_runtime": 158.0564, "eval_samples_per_second": 36.632, "eval_steps_per_second": 3.056, "step": 600 }, { "dpo_loss": 0.6941686272621155, "epoch": 1.1431270666036846, "grad_norm": 84.49370124503615, "learning_rate": 4.997409184116819e-07, "logits": -1.3777289390563965, "logps": -93.79177856445312, "loss": 0.2128, "objective": 0.2253679782152176, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.1559511125087738, "step": 605 }, { "dpo_loss": 0.6771566271781921, "epoch": 1.1525743977326406, "grad_norm": 88.09246493054597, "learning_rate": 4.997020176496679e-07, "logits": -1.3691322803497314, "logps": -93.24571990966797, "loss": 0.2237, "objective": 0.21853280067443848, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.5, "regularize": 0.1508171707391739, "step": 610 }, { "dpo_loss": 0.6988297700881958, "epoch": 1.1620217288615966, "grad_norm": 96.71294215621295, "learning_rate": 4.996603987900437e-07, "logits": -1.4297274351119995, "logps": -91.73030853271484, "loss": 0.2175, "objective": 0.2077571451663971, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5249999761581421, "regularize": 0.13787415623664856, "step": 615 }, { "dpo_loss": 0.6964720487594604, "epoch": 1.1714690599905526, "grad_norm": 96.89701629175714, "learning_rate": 4.996160622858458e-07, "logits": -1.3424708843231201, "logps": -90.92454528808594, "loss": 0.2191, "objective": 0.21006684005260468, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.574999988079071, "regularize": 0.1404196172952652, "step": 620 }, { "dpo_loss": 0.7060242295265198, "epoch": 1.1809163911195086, "grad_norm": 85.87331082123467, "learning_rate": 4.995690086196932e-07, "logits": -1.4066816568374634, "logps": -93.66090393066406, "loss": 0.2397, "objective": 0.24251866340637207, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.17191624641418457, "step": 625 }, { "dpo_loss": 0.6906918287277222, "epoch": 1.1903637222484649, "grad_norm": 112.18287918083412, "learning_rate": 4.995192383037823e-07, "logits": -1.3011844158172607, "logps": -90.5967025756836, "loss": 0.2062, "objective": 0.21339142322540283, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.512499988079071, "regularize": 0.1443222463130951, "step": 630 }, { "dpo_loss": 0.7012017965316772, "epoch": 1.1998110533774209, "grad_norm": 91.04006298092757, "learning_rate": 4.994667518798809e-07, "logits": -1.3534437417984009, "logps": -90.98788452148438, "loss": 0.2146, "objective": 0.21284246444702148, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.14272227883338928, "step": 635 }, { "dpo_loss": 0.6859739422798157, "epoch": 1.209258384506377, "grad_norm": 95.64760986526014, "learning_rate": 4.994115499193233e-07, "logits": -1.3624138832092285, "logps": -90.53767395019531, "loss": 0.2284, "objective": 0.23482659459114075, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.16622920334339142, "step": 640 }, { "dpo_loss": 0.6897287368774414, "epoch": 1.2187057156353331, "grad_norm": 85.2353798998164, "learning_rate": 4.993536330230027e-07, "logits": -1.265354871749878, "logps": -91.70381164550781, "loss": 0.2109, "objective": 0.2076033651828766, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.45625001192092896, "regularize": 0.13863049447536469, "step": 645 }, { "dpo_loss": 0.6822940707206726, "epoch": 1.2281530467642892, "grad_norm": 94.6596625093207, "learning_rate": 4.992930018213657e-07, "logits": -1.286306619644165, "logps": -92.8187484741211, "loss": 0.2165, "objective": 0.22189109027385712, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5625, "regularize": 0.15366169810295105, "step": 650 }, { "epoch": 1.2281530467642892, "eval_dpo_loss": 0.6915929317474365, "eval_logits": -1.2926098108291626, "eval_logps": -97.9261474609375, "eval_loss": 0.14777907729148865, "eval_objective": 0.14874477684497833, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.0795854777097702, "eval_runtime": 157.6426, "eval_samples_per_second": 36.729, "eval_steps_per_second": 3.064, "step": 650 }, { "dpo_loss": 0.687442421913147, "epoch": 1.2376003778932452, "grad_norm": 104.63327142074685, "learning_rate": 4.992296569744051e-07, "logits": -1.2754095792770386, "logps": -91.40235900878906, "loss": 0.2313, "objective": 0.24213269352912903, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.606249988079071, "ranking_simple": 0.59375, "regularize": 0.17338842153549194, "step": 655 }, { "dpo_loss": 0.6873584389686584, "epoch": 1.2470477090222012, "grad_norm": 92.75152345942683, "learning_rate": 4.991635991716527e-07, "logits": -1.2639495134353638, "logps": -90.1284408569336, "loss": 0.2221, "objective": 0.2178734540939331, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.512499988079071, "regularize": 0.14913764595985413, "step": 660 }, { "dpo_loss": 0.6985175609588623, "epoch": 1.2564950401511572, "grad_norm": 88.00328633474199, "learning_rate": 4.990948291321719e-07, "logits": -1.385095477104187, "logps": -92.58069610595703, "loss": 0.2272, "objective": 0.23054857552051544, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5625, "regularize": 0.1606968343257904, "step": 665 }, { "dpo_loss": 0.6855772137641907, "epoch": 1.2659423712801134, "grad_norm": 85.58324526077789, "learning_rate": 4.990233476045493e-07, "logits": -1.2797305583953857, "logps": -89.69403839111328, "loss": 0.2226, "objective": 0.21690480411052704, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4749999940395355, "regularize": 0.14834709465503693, "step": 670 }, { "dpo_loss": 0.6901682615280151, "epoch": 1.2753897024090695, "grad_norm": 89.94100156988536, "learning_rate": 4.989491553668878e-07, "logits": -1.4622950553894043, "logps": -92.04087829589844, "loss": 0.2133, "objective": 0.21802285313606262, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.14900599420070648, "step": 675 }, { "dpo_loss": 0.684799313545227, "epoch": 1.2848370335380255, "grad_norm": 85.69424074902147, "learning_rate": 4.988722532267968e-07, "logits": -1.2466356754302979, "logps": -90.41310119628906, "loss": 0.2247, "objective": 0.22920887172222137, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.44999998807907104, "regularize": 0.16072896122932434, "step": 680 }, { "dpo_loss": 0.6969146132469177, "epoch": 1.2942843646669817, "grad_norm": 83.29941926709483, "learning_rate": 4.987926420213843e-07, "logits": -1.2692553997039795, "logps": -90.69180297851562, "loss": 0.2207, "objective": 0.2164468765258789, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.1467554122209549, "step": 685 }, { "dpo_loss": 0.7128168940544128, "epoch": 1.3037316957959377, "grad_norm": 89.95838972455667, "learning_rate": 4.987103226172473e-07, "logits": -1.3931699991226196, "logps": -91.45069885253906, "loss": 0.2298, "objective": 0.2287740260362625, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.15749232470989227, "step": 690 }, { "dpo_loss": 0.6920118927955627, "epoch": 1.3131790269248937, "grad_norm": 92.07098509412019, "learning_rate": 4.986252959104624e-07, "logits": -1.3434112071990967, "logps": -89.19305419921875, "loss": 0.2318, "objective": 0.2392226755619049, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.17002148926258087, "step": 695 }, { "dpo_loss": 0.6873451471328735, "epoch": 1.3226263580538498, "grad_norm": 79.45369435451617, "learning_rate": 4.985375628265765e-07, "logits": -1.3786038160324097, "logps": -90.81690979003906, "loss": 0.2333, "objective": 0.2435322254896164, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.45625001192092896, "regularize": 0.17479772865772247, "step": 700 }, { "epoch": 1.3226263580538498, "eval_dpo_loss": 0.691523015499115, "eval_logits": -1.292966365814209, "eval_logps": -97.1070785522461, "eval_loss": 0.14701269567012787, "eval_objective": 0.14496096968650818, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.07580868154764175, "eval_runtime": 159.9237, "eval_samples_per_second": 36.205, "eval_steps_per_second": 3.02, "step": 700 }, { "dpo_loss": 0.6949089765548706, "epoch": 1.3320736891828058, "grad_norm": 84.44151860295028, "learning_rate": 4.984471243205964e-07, "logits": -1.3508937358856201, "logps": -87.85145568847656, "loss": 0.2232, "objective": 0.19557249546051025, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.12608161568641663, "step": 705 }, { "dpo_loss": 0.6862300038337708, "epoch": 1.3415210203117618, "grad_norm": 80.57908902050966, "learning_rate": 4.983539813769778e-07, "logits": -1.283381700515747, "logps": -90.93116760253906, "loss": 0.2168, "objective": 0.2195473164319992, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.15092433989048004, "step": 710 }, { "dpo_loss": 0.7053779363632202, "epoch": 1.350968351440718, "grad_norm": 89.9139558928414, "learning_rate": 4.98258135009616e-07, "logits": -1.34377121925354, "logps": -92.65654754638672, "loss": 0.2232, "objective": 0.22837159037590027, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.15783381462097168, "step": 715 }, { "dpo_loss": 0.7014961838722229, "epoch": 1.360415682569674, "grad_norm": 90.6743435807847, "learning_rate": 4.981595862618335e-07, "logits": -1.3795337677001953, "logps": -89.71587371826172, "loss": 0.2222, "objective": 0.20193001627922058, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.13178040087223053, "step": 720 }, { "dpo_loss": 0.6928237080574036, "epoch": 1.36986301369863, "grad_norm": 84.96138592580893, "learning_rate": 4.980583362063696e-07, "logits": -1.3667652606964111, "logps": -88.43769073486328, "loss": 0.2381, "objective": 0.22285565733909607, "ranking_idealized": 0.44999998807907104, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.15357330441474915, "step": 725 }, { "dpo_loss": 0.6958102583885193, "epoch": 1.3793103448275863, "grad_norm": 81.63702429187545, "learning_rate": 4.97954385945368e-07, "logits": -1.3288919925689697, "logps": -93.19049072265625, "loss": 0.2209, "objective": 0.2079770565032959, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.1383960247039795, "step": 730 }, { "dpo_loss": 0.691160261631012, "epoch": 1.3887576759565423, "grad_norm": 88.03221762034345, "learning_rate": 4.978477366103651e-07, "logits": -1.3048386573791504, "logps": -94.25810241699219, "loss": 0.2502, "objective": 0.25141897797584534, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.518750011920929, "regularize": 0.18230298161506653, "step": 735 }, { "dpo_loss": 0.6944637298583984, "epoch": 1.3982050070854983, "grad_norm": 87.28974817363158, "learning_rate": 4.977383893622782e-07, "logits": -1.3183424472808838, "logps": -94.75113677978516, "loss": 0.2423, "objective": 0.26133081316947937, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4749999940395355, "regularize": 0.19188445806503296, "step": 740 }, { "dpo_loss": 0.6745160818099976, "epoch": 1.4076523382144543, "grad_norm": 96.76417312133702, "learning_rate": 4.976263453913917e-07, "logits": -1.2408394813537598, "logps": -93.50255584716797, "loss": 0.2262, "objective": 0.23426973819732666, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.45625001192092896, "regularize": 0.16681811213493347, "step": 745 }, { "dpo_loss": 0.679398238658905, "epoch": 1.4170996693434104, "grad_norm": 83.00130223599811, "learning_rate": 4.975116059173451e-07, "logits": -1.2838294506072998, "logps": -92.10679626464844, "loss": 0.229, "objective": 0.23130765557289124, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.16336780786514282, "step": 750 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6928658485412598, "eval_logits": -1.2689110040664673, "eval_logps": -97.09227752685547, "eval_loss": 0.17181387543678284, "eval_objective": 0.17250551283359528, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.1032189130783081, "eval_runtime": 157.4279, "eval_samples_per_second": 36.779, "eval_steps_per_second": 3.068, "step": 750 }, { "dpo_loss": 0.6830964684486389, "epoch": 1.4265470004723666, "grad_norm": 95.57749503310629, "learning_rate": 4.973941721891196e-07, "logits": -1.342911958694458, "logps": -92.39205169677734, "loss": 0.2388, "objective": 0.2358742505311966, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5625, "regularize": 0.16756460070610046, "step": 755 }, { "dpo_loss": 0.6861435174942017, "epoch": 1.4359943316013226, "grad_norm": 94.3425647575578, "learning_rate": 4.972740454850243e-07, "logits": -1.2989110946655273, "logps": -91.51603698730469, "loss": 0.2407, "objective": 0.24070945382118225, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.46875, "regularize": 0.17209510505199432, "step": 760 }, { "dpo_loss": 0.7200834155082703, "epoch": 1.4454416627302786, "grad_norm": 83.8026116090842, "learning_rate": 4.971512271126819e-07, "logits": -1.3317606449127197, "logps": -92.68598937988281, "loss": 0.252, "objective": 0.24466517567634583, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.1726568043231964, "step": 765 }, { "dpo_loss": 0.6894422769546509, "epoch": 1.4548889938592349, "grad_norm": 88.18228914725576, "learning_rate": 4.970257184090156e-07, "logits": -1.3856732845306396, "logps": -94.49053192138672, "loss": 0.2368, "objective": 0.2500324249267578, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.46875, "regularize": 0.18108820915222168, "step": 770 }, { "dpo_loss": 0.6978266835212708, "epoch": 1.4643363249881909, "grad_norm": 84.42047599647562, "learning_rate": 4.968975207402331e-07, "logits": -1.299525260925293, "logps": -90.47492218017578, "loss": 0.2412, "objective": 0.2401023656129837, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.48750001192092896, "regularize": 0.17031969130039215, "step": 775 }, { "dpo_loss": 0.6908974051475525, "epoch": 1.473783656117147, "grad_norm": 93.16672039442666, "learning_rate": 4.96766635501813e-07, "logits": -1.2777698040008545, "logps": -90.28379821777344, "loss": 0.2397, "objective": 0.23492303490638733, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.1658332794904709, "step": 780 }, { "dpo_loss": 0.7020236849784851, "epoch": 1.483230987246103, "grad_norm": 96.38647725910744, "learning_rate": 4.966330641184889e-07, "logits": -1.2058895826339722, "logps": -89.30281829833984, "loss": 0.2408, "objective": 0.2349843531847, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.16478195786476135, "step": 785 }, { "dpo_loss": 0.6749367117881775, "epoch": 1.492678318375059, "grad_norm": 80.80313448250223, "learning_rate": 4.964968080442341e-07, "logits": -1.3213014602661133, "logps": -89.11174011230469, "loss": 0.237, "objective": 0.23509947955608368, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48124998807907104, "regularize": 0.16760580241680145, "step": 790 }, { "dpo_loss": 0.6985687613487244, "epoch": 1.5021256495040152, "grad_norm": 87.52434485748383, "learning_rate": 4.963578687622455e-07, "logits": -1.3071268796920776, "logps": -92.3751449584961, "loss": 0.2348, "objective": 0.23419027030467987, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.16433341801166534, "step": 795 }, { "dpo_loss": 0.6958016157150269, "epoch": 1.5115729806329712, "grad_norm": 87.68870839452211, "learning_rate": 4.962162477849281e-07, "logits": -1.3120272159576416, "logps": -90.5923843383789, "loss": 0.2565, "objective": 0.2664094567298889, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.19682928919792175, "step": 800 }, { "epoch": 1.5115729806329712, "eval_dpo_loss": 0.6944370865821838, "eval_logits": -1.2540355920791626, "eval_logps": -97.26213073730469, "eval_loss": 0.18168330192565918, "eval_objective": 0.1830022782087326, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.1135585755109787, "eval_runtime": 158.4981, "eval_samples_per_second": 36.53, "eval_steps_per_second": 3.047, "step": 800 }, { "dpo_loss": 0.7026247978210449, "epoch": 1.5210203117619272, "grad_norm": 81.65317643314856, "learning_rate": 4.96071946653878e-07, "logits": -1.3086163997650146, "logps": -90.99162292480469, "loss": 0.238, "objective": 0.2365531474351883, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.16629067063331604, "step": 805 }, { "dpo_loss": 0.6891599893569946, "epoch": 1.5304676428908834, "grad_norm": 82.12999785328469, "learning_rate": 4.959249669398655e-07, "logits": -1.2432034015655518, "logps": -91.92169189453125, "loss": 0.2353, "objective": 0.2502974569797516, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.1813814640045166, "step": 810 }, { "dpo_loss": 0.6978787779808044, "epoch": 1.5399149740198395, "grad_norm": 86.46569624354272, "learning_rate": 4.957753102428184e-07, "logits": -1.3116600513458252, "logps": -91.28858947753906, "loss": 0.2343, "objective": 0.23753111064434052, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.59375, "regularize": 0.1677432358264923, "step": 815 }, { "dpo_loss": 0.6800965070724487, "epoch": 1.5493623051487955, "grad_norm": 86.12978638347721, "learning_rate": 4.956229781918047e-07, "logits": -1.3178225755691528, "logps": -92.11973571777344, "loss": 0.23, "objective": 0.23273181915283203, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.16472215950489044, "step": 820 }, { "dpo_loss": 0.6855944395065308, "epoch": 1.5588096362777515, "grad_norm": 91.07443851138821, "learning_rate": 4.954679724450142e-07, "logits": -1.3200221061706543, "logps": -91.27191162109375, "loss": 0.2389, "objective": 0.23568864166736603, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.16712923347949982, "step": 825 }, { "dpo_loss": 0.6887711882591248, "epoch": 1.5682569674067075, "grad_norm": 79.19312631115046, "learning_rate": 4.953102946897411e-07, "logits": -1.2187261581420898, "logps": -90.7856216430664, "loss": 0.2384, "objective": 0.24422509968280792, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48750001192092896, "regularize": 0.1753479689359665, "step": 830 }, { "dpo_loss": 0.6980452537536621, "epoch": 1.5777042985356635, "grad_norm": 94.31876516475398, "learning_rate": 4.951499466423653e-07, "logits": -1.2923799753189087, "logps": -92.07442474365234, "loss": 0.239, "objective": 0.24555882811546326, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.17575430870056152, "step": 835 }, { "dpo_loss": 0.7045887112617493, "epoch": 1.5871516296646198, "grad_norm": 81.47411045744263, "learning_rate": 4.949869300483338e-07, "logits": -1.2757551670074463, "logps": -90.0054702758789, "loss": 0.2412, "objective": 0.24159309267997742, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.17113421857357025, "step": 840 }, { "dpo_loss": 0.6911923885345459, "epoch": 1.5965989607935758, "grad_norm": 80.29449667582183, "learning_rate": 4.948212466821419e-07, "logits": -1.2297747135162354, "logps": -91.9012680053711, "loss": 0.2365, "objective": 0.2445167601108551, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4312500059604645, "ranking_simple": 0.44999998807907104, "regularize": 0.17539751529693604, "step": 845 }, { "dpo_loss": 0.7042365074157715, "epoch": 1.606046291922532, "grad_norm": 86.21783815042541, "learning_rate": 4.946528983473133e-07, "logits": -1.2861218452453613, "logps": -88.93782043457031, "loss": 0.2479, "objective": 0.24865977466106415, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.4312500059604645, "ranking_simple": 0.4312500059604645, "regularize": 0.17823612689971924, "step": 850 }, { "epoch": 1.606046291922532, "eval_dpo_loss": 0.6945503950119019, "eval_logits": -1.2707864046096802, "eval_logps": -96.34225463867188, "eval_loss": 0.18642590939998627, "eval_objective": 0.18530656397342682, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.11585152894258499, "eval_runtime": 159.8355, "eval_samples_per_second": 36.225, "eval_steps_per_second": 3.022, "step": 850 }, { "dpo_loss": 0.7014453411102295, "epoch": 1.615493623051488, "grad_norm": 94.62150643181256, "learning_rate": 4.944818868763813e-07, "logits": -1.446413278579712, "logps": -92.63004302978516, "loss": 0.2459, "objective": 0.2486976683139801, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.581250011920929, "regularize": 0.17855311930179596, "step": 855 }, { "dpo_loss": 0.7022618055343628, "epoch": 1.624940954180444, "grad_norm": 84.34142777503043, "learning_rate": 4.943082141308679e-07, "logits": -1.2727842330932617, "logps": -88.67359161376953, "loss": 0.2329, "objective": 0.21688945591449738, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.14666327834129333, "step": 860 }, { "dpo_loss": 0.698417603969574, "epoch": 1.6343882853094, "grad_norm": 82.98354606550087, "learning_rate": 4.941318820012645e-07, "logits": -1.3707373142242432, "logps": -89.36933898925781, "loss": 0.2314, "objective": 0.22971661388874054, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.159874826669693, "step": 865 }, { "dpo_loss": 0.6885509490966797, "epoch": 1.643835616438356, "grad_norm": 81.18325168892137, "learning_rate": 4.939528924070107e-07, "logits": -1.3568060398101807, "logps": -90.24173736572266, "loss": 0.2252, "objective": 0.2341945618391037, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.16533946990966797, "step": 870 }, { "dpo_loss": 0.7120766639709473, "epoch": 1.653282947567312, "grad_norm": 89.83602487849788, "learning_rate": 4.937712472964736e-07, "logits": -1.3132705688476562, "logps": -89.10626220703125, "loss": 0.2492, "objective": 0.2398935854434967, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48750001192092896, "regularize": 0.1686859279870987, "step": 875 }, { "dpo_loss": 0.6932774186134338, "epoch": 1.6627302786962683, "grad_norm": 78.3424148808078, "learning_rate": 4.935869486469266e-07, "logits": -1.2213243246078491, "logps": -90.08100891113281, "loss": 0.2424, "objective": 0.2528541684150696, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4312500059604645, "regularize": 0.1835264414548874, "step": 880 }, { "dpo_loss": 0.7122358083724976, "epoch": 1.6721776098252243, "grad_norm": 81.83908311068858, "learning_rate": 4.933999984645275e-07, "logits": -1.3308547735214233, "logps": -88.36769104003906, "loss": 0.2362, "objective": 0.23830123245716095, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.518750011920929, "regularize": 0.16707763075828552, "step": 885 }, { "dpo_loss": 0.6975988745689392, "epoch": 1.6816249409541806, "grad_norm": 85.91803463556262, "learning_rate": 4.932103987842975e-07, "logits": -1.2521806955337524, "logps": -88.85072326660156, "loss": 0.2473, "objective": 0.237627312541008, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.16786742210388184, "step": 890 }, { "dpo_loss": 0.7044867277145386, "epoch": 1.6910722720831366, "grad_norm": 79.98253861185327, "learning_rate": 4.930181516700982e-07, "logits": -1.269334077835083, "logps": -91.34991455078125, "loss": 0.2509, "objective": 0.2616916298866272, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.19124294817447662, "step": 895 }, { "dpo_loss": 0.6823757886886597, "epoch": 1.7005196032120926, "grad_norm": 81.00522927194797, "learning_rate": 4.928232592146097e-07, "logits": -1.3042701482772827, "logps": -92.00179290771484, "loss": 0.2586, "objective": 0.26876914501190186, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.20053155720233917, "step": 900 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6944207549095154, "eval_logits": -1.2622801065444946, "eval_logps": -97.21571350097656, "eval_loss": 0.18386565148830414, "eval_objective": 0.18254661560058594, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.11310453712940216, "eval_runtime": 157.6535, "eval_samples_per_second": 36.726, "eval_steps_per_second": 3.064, "step": 900 }, { "dpo_loss": 0.7131351828575134, "epoch": 1.7099669343410486, "grad_norm": 92.4488887165358, "learning_rate": 4.926257235393077e-07, "logits": -1.273389220237732, "logps": -91.80742645263672, "loss": 0.263, "objective": 0.273505836725235, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.2021923065185547, "step": 905 }, { "dpo_loss": 0.6990777254104614, "epoch": 1.7194142654700046, "grad_norm": 89.78081376469598, "learning_rate": 4.924255467944397e-07, "logits": -1.21916925907135, "logps": -93.84732818603516, "loss": 0.2557, "objective": 0.2540630102157593, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.550000011920929, "regularize": 0.18415522575378418, "step": 910 }, { "dpo_loss": 0.7060036659240723, "epoch": 1.7288615965989607, "grad_norm": 84.34105627107965, "learning_rate": 4.922227311590029e-07, "logits": -1.328016996383667, "logps": -89.12452697753906, "loss": 0.251, "objective": 0.24675559997558594, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5874999761581421, "regularize": 0.17615528404712677, "step": 915 }, { "dpo_loss": 0.7035702466964722, "epoch": 1.738308927727917, "grad_norm": 101.23265714387522, "learning_rate": 4.920172788407195e-07, "logits": -1.3243465423583984, "logps": -90.22145080566406, "loss": 0.2595, "objective": 0.2632547616958618, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.1928977072238922, "step": 920 }, { "dpo_loss": 0.704595685005188, "epoch": 1.747756258856873, "grad_norm": 86.12783856162052, "learning_rate": 4.91809192076013e-07, "logits": -1.3240232467651367, "logps": -90.23241424560547, "loss": 0.2428, "objective": 0.23700585961341858, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.1665463149547577, "step": 925 }, { "dpo_loss": 0.6912989020347595, "epoch": 1.7572035899858292, "grad_norm": 88.12461065279146, "learning_rate": 4.915984731299838e-07, "logits": -1.3094854354858398, "logps": -87.85308837890625, "loss": 0.2451, "objective": 0.24931573867797852, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.18018585443496704, "step": 930 }, { "dpo_loss": 0.6917027831077576, "epoch": 1.7666509211147852, "grad_norm": 82.18255854656601, "learning_rate": 4.913851242963846e-07, "logits": -1.3129820823669434, "logps": -89.41729736328125, "loss": 0.2462, "objective": 0.22966830432415009, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.518750011920929, "regularize": 0.1604980230331421, "step": 935 }, { "dpo_loss": 0.6987249255180359, "epoch": 1.7760982522437412, "grad_norm": 84.73374743748703, "learning_rate": 4.91169147897595e-07, "logits": -1.2820937633514404, "logps": -89.26692199707031, "loss": 0.2313, "objective": 0.2306392639875412, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.16076678037643433, "step": 940 }, { "dpo_loss": 0.6959508061408997, "epoch": 1.7855455833726972, "grad_norm": 83.20896646756478, "learning_rate": 4.909505462845974e-07, "logits": -1.3393566608428955, "logps": -89.69905090332031, "loss": 0.2387, "objective": 0.23756399750709534, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.16796889901161194, "step": 945 }, { "dpo_loss": 0.6779564023017883, "epoch": 1.7949929145016532, "grad_norm": 84.28401023606136, "learning_rate": 4.907293218369498e-07, "logits": -1.368841290473938, "logps": -88.40916442871094, "loss": 0.2347, "objective": 0.21868371963500977, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.1508881002664566, "step": 950 }, { "epoch": 1.7949929145016532, "eval_dpo_loss": 0.6945188641548157, "eval_logits": -1.2677661180496216, "eval_logps": -94.84024810791016, "eval_loss": 0.19953110814094543, "eval_objective": 0.19893400371074677, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.12948210537433624, "eval_runtime": 157.0713, "eval_samples_per_second": 36.862, "eval_steps_per_second": 3.075, "step": 950 }, { "dpo_loss": 0.6959952712059021, "epoch": 1.8044402456306092, "grad_norm": 82.19115094683073, "learning_rate": 4.905054769627612e-07, "logits": -1.3279144763946533, "logps": -89.822021484375, "loss": 0.2426, "objective": 0.22453975677490234, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.53125, "regularize": 0.15494023263454437, "step": 955 }, { "dpo_loss": 0.6977975964546204, "epoch": 1.8138875767595655, "grad_norm": 79.54284804882354, "learning_rate": 4.902790140986649e-07, "logits": -1.3207134008407593, "logps": -89.81251525878906, "loss": 0.2421, "objective": 0.23721659183502197, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.4937500059604645, "regularize": 0.16743679344654083, "step": 960 }, { "dpo_loss": 0.700217604637146, "epoch": 1.8233349078885215, "grad_norm": 80.11794966712507, "learning_rate": 4.900499357097915e-07, "logits": -1.3014113903045654, "logps": -89.26399230957031, "loss": 0.2313, "objective": 0.24520444869995117, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.17518267035484314, "step": 965 }, { "dpo_loss": 0.6830806732177734, "epoch": 1.8327822390174777, "grad_norm": 85.50728061889993, "learning_rate": 4.898182442897432e-07, "logits": -1.3395977020263672, "logps": -90.12285614013672, "loss": 0.2441, "objective": 0.2601337432861328, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.574999988079071, "regularize": 0.19182565808296204, "step": 970 }, { "dpo_loss": 0.7140229940414429, "epoch": 1.8422295701464337, "grad_norm": 92.90130732582263, "learning_rate": 4.895839423605656e-07, "logits": -1.364551305770874, "logps": -90.04396057128906, "loss": 0.2498, "objective": 0.24496057629585266, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.17355826497077942, "step": 975 }, { "dpo_loss": 0.6796795725822449, "epoch": 1.8516769012753898, "grad_norm": 83.19455027012233, "learning_rate": 4.893470324727208e-07, "logits": -1.2256380319595337, "logps": -87.5365982055664, "loss": 0.2305, "objective": 0.21892723441123962, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.53125, "regularize": 0.15095928311347961, "step": 980 }, { "dpo_loss": 0.6931859254837036, "epoch": 1.8611242324043458, "grad_norm": 81.35496778915645, "learning_rate": 4.891075172050591e-07, "logits": -1.340255856513977, "logps": -88.29389953613281, "loss": 0.2303, "objective": 0.22776278853416443, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.15844415128231049, "step": 985 }, { "dpo_loss": 0.7015528678894043, "epoch": 1.8705715635333018, "grad_norm": 77.84141782754642, "learning_rate": 4.888653991647919e-07, "logits": -1.3628294467926025, "logps": -90.3727035522461, "loss": 0.2384, "objective": 0.2526736855506897, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.1825183928012848, "step": 990 }, { "dpo_loss": 0.6984178423881531, "epoch": 1.8800188946622578, "grad_norm": 84.78671948973349, "learning_rate": 4.886206809874623e-07, "logits": -1.2810890674591064, "logps": -91.11246490478516, "loss": 0.2554, "objective": 0.25629252195358276, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.18645073473453522, "step": 995 }, { "dpo_loss": 0.7016985416412354, "epoch": 1.8894662257912138, "grad_norm": 77.84474982912333, "learning_rate": 4.883733653369172e-07, "logits": -1.3840316534042358, "logps": -87.68888854980469, "loss": 0.2414, "objective": 0.23510241508483887, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.16493254899978638, "step": 1000 }, { "epoch": 1.8894662257912138, "eval_dpo_loss": 0.692449152469635, "eval_logits": -1.2579281330108643, "eval_logps": -95.87931823730469, "eval_loss": 0.18945148587226868, "eval_objective": 0.19014763832092285, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5253623127937317, "eval_regularize": 0.12090273201465607, "eval_runtime": 157.7109, "eval_samples_per_second": 36.713, "eval_steps_per_second": 3.063, "step": 1000 }, { "dpo_loss": 0.6800759434700012, "epoch": 1.89891355692017, "grad_norm": 81.6394205444093, "learning_rate": 4.881234549052775e-07, "logits": -1.2515195608139038, "logps": -88.47236633300781, "loss": 0.2396, "objective": 0.2439851462841034, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.17597754299640656, "step": 1005 }, { "dpo_loss": 0.6951671838760376, "epoch": 1.908360888049126, "grad_norm": 82.6142617114122, "learning_rate": 4.878709524129096e-07, "logits": -1.1769020557403564, "logps": -88.99317169189453, "loss": 0.233, "objective": 0.2350684106349945, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.16555170714855194, "step": 1010 }, { "dpo_loss": 0.704579770565033, "epoch": 1.9178082191780823, "grad_norm": 109.58609659876082, "learning_rate": 4.876158606083952e-07, "logits": -1.350940227508545, "logps": -90.60091400146484, "loss": 0.2504, "objective": 0.26667410135269165, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.543749988079071, "regularize": 0.1962161660194397, "step": 1015 }, { "dpo_loss": 0.684603214263916, "epoch": 1.9272555503070383, "grad_norm": 75.7008642804927, "learning_rate": 4.873581822685019e-07, "logits": -1.298457384109497, "logps": -92.027099609375, "loss": 0.2523, "objective": 0.24609248340129852, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.1776321530342102, "step": 1020 }, { "dpo_loss": 0.6828368306159973, "epoch": 1.9367028814359943, "grad_norm": 78.59042762552068, "learning_rate": 4.870979201981523e-07, "logits": -1.2884193658828735, "logps": -88.74574279785156, "loss": 0.2481, "objective": 0.23535391688346863, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.4625000059604645, "regularize": 0.16707023978233337, "step": 1025 }, { "dpo_loss": 0.7131926417350769, "epoch": 1.9461502125649504, "grad_norm": 78.49738347088316, "learning_rate": 4.86835077230394e-07, "logits": -1.2955366373062134, "logps": -88.70613861083984, "loss": 0.2431, "objective": 0.25340279936790466, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.18208353221416473, "step": 1030 }, { "dpo_loss": 0.6981402635574341, "epoch": 1.9555975436939064, "grad_norm": 80.29900254916205, "learning_rate": 4.865696562263689e-07, "logits": -1.2850407361984253, "logps": -90.81657409667969, "loss": 0.2442, "objective": 0.24822275340557098, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5874999761581421, "regularize": 0.17840874195098877, "step": 1035 }, { "dpo_loss": 0.6980287432670593, "epoch": 1.9650448748228624, "grad_norm": 83.18234069075524, "learning_rate": 4.863016600752813e-07, "logits": -1.2231897115707397, "logps": -89.87462615966797, "loss": 0.2368, "objective": 0.24762320518493652, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.1778203248977661, "step": 1040 }, { "dpo_loss": 0.6901535987854004, "epoch": 1.9744922059518186, "grad_norm": 78.95679943145011, "learning_rate": 4.860310916943672e-07, "logits": -1.2009848356246948, "logps": -90.7292709350586, "loss": 0.2456, "objective": 0.22756969928741455, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.1585543304681778, "step": 1045 }, { "dpo_loss": 0.6983585357666016, "epoch": 1.9839395370807746, "grad_norm": 82.01184585459328, "learning_rate": 4.857579540288622e-07, "logits": -1.270129680633545, "logps": -91.57951354980469, "loss": 0.2433, "objective": 0.2582865059375763, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.4749999940395355, "regularize": 0.1884506642818451, "step": 1050 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.6923243403434753, "eval_logits": -1.2552448511123657, "eval_logps": -95.79696655273438, "eval_loss": 0.209737628698349, "eval_objective": 0.20681917667388916, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 0.13758672773838043, "eval_runtime": 163.6808, "eval_samples_per_second": 35.374, "eval_steps_per_second": 2.951, "step": 1050 }, { "dpo_loss": 0.6885973811149597, "epoch": 1.9933868682097309, "grad_norm": 88.44691340438358, "learning_rate": 4.854822500519694e-07, "logits": -1.1901785135269165, "logps": -89.351806640625, "loss": 0.2419, "objective": 0.23133370280265808, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.16247394680976868, "step": 1055 }, { "dpo_loss": 0.7313130497932434, "epoch": 2.002834199338687, "grad_norm": 77.72279899826333, "learning_rate": 4.852039827648274e-07, "logits": -1.2519927024841309, "logps": -90.05610656738281, "loss": 0.257, "objective": 0.25717490911483765, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.53125, "regularize": 0.18404361605644226, "step": 1060 }, { "dpo_loss": 0.6962462067604065, "epoch": 2.012281530467643, "grad_norm": 81.63125239898291, "learning_rate": 4.849231551964771e-07, "logits": -1.2965519428253174, "logps": -88.1960220336914, "loss": 0.2471, "objective": 0.23747217655181885, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.16784754395484924, "step": 1065 }, { "dpo_loss": 0.7073962688446045, "epoch": 2.021728861596599, "grad_norm": 77.69276192791024, "learning_rate": 4.84639770403829e-07, "logits": -1.2943260669708252, "logps": -89.67262268066406, "loss": 0.2268, "objective": 0.22726266086101532, "ranking_idealized": 0.643750011920929, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.581250011920929, "regularize": 0.15652304887771606, "step": 1070 }, { "dpo_loss": 0.6832243204116821, "epoch": 2.031176192725555, "grad_norm": 83.4346543281612, "learning_rate": 4.843538314716303e-07, "logits": -1.2915871143341064, "logps": -89.31999969482422, "loss": 0.2427, "objective": 0.24280743300914764, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.4937500059604645, "regularize": 0.1744849979877472, "step": 1075 }, { "dpo_loss": 0.7055403590202332, "epoch": 2.040623523854511, "grad_norm": 77.27965034026705, "learning_rate": 4.840653415124302e-07, "logits": -1.2398439645767212, "logps": -89.6719970703125, "loss": 0.2294, "objective": 0.2320195883512497, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.1614655703306198, "step": 1080 }, { "dpo_loss": 0.6916980743408203, "epoch": 2.050070854983467, "grad_norm": 77.59782281381496, "learning_rate": 4.837743036665476e-07, "logits": -1.2724249362945557, "logps": -91.91465759277344, "loss": 0.2537, "objective": 0.2678667902946472, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.19869700074195862, "step": 1085 }, { "dpo_loss": 0.6935771107673645, "epoch": 2.0595181861124234, "grad_norm": 80.82666392609994, "learning_rate": 4.834807211020356e-07, "logits": -1.2286088466644287, "logps": -93.31673431396484, "loss": 0.2434, "objective": 0.25225305557250977, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.18289534747600555, "step": 1090 }, { "dpo_loss": 0.6847450733184814, "epoch": 2.0689655172413794, "grad_norm": 77.40627853328184, "learning_rate": 4.831845970146474e-07, "logits": -1.3850970268249512, "logps": -90.1637191772461, "loss": 0.2472, "objective": 0.23211999237537384, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.45625001192092896, "regularize": 0.16364550590515137, "step": 1095 }, { "dpo_loss": 0.6987076997756958, "epoch": 2.0784128483703355, "grad_norm": 81.51363848488977, "learning_rate": 4.82885934627802e-07, "logits": -1.3133985996246338, "logps": -91.41629028320312, "loss": 0.2393, "objective": 0.2526317238807678, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.18276092410087585, "step": 1100 }, { "epoch": 2.0784128483703355, "eval_dpo_loss": 0.6961632370948792, "eval_logits": -1.242217779159546, "eval_logps": -96.93132019042969, "eval_loss": 0.21558791399002075, "eval_objective": 0.21485954523086548, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5263975262641907, "eval_regularize": 0.14524322748184204, "eval_runtime": 159.0028, "eval_samples_per_second": 36.414, "eval_steps_per_second": 3.038, "step": 1100 }, { "dpo_loss": 0.6975056529045105, "epoch": 2.0878601794992915, "grad_norm": 75.26532295370068, "learning_rate": 4.825847371925484e-07, "logits": -1.272645115852356, "logps": -89.9799575805664, "loss": 0.2312, "objective": 0.2198951244354248, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.1501445770263672, "step": 1105 }, { "dpo_loss": 0.6822449564933777, "epoch": 2.0973075106282475, "grad_norm": 76.11229862634485, "learning_rate": 4.822810079875308e-07, "logits": -1.2977901697158813, "logps": -88.89505767822266, "loss": 0.2413, "objective": 0.2520579695701599, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.18383343517780304, "step": 1110 }, { "dpo_loss": 0.6917838454246521, "epoch": 2.1067548417572035, "grad_norm": 81.10051542367798, "learning_rate": 4.819747503189522e-07, "logits": -1.268223524093628, "logps": -89.8685073852539, "loss": 0.234, "objective": 0.23825547099113464, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5625, "regularize": 0.1690770834684372, "step": 1115 }, { "dpo_loss": 0.6925947070121765, "epoch": 2.1162021728861595, "grad_norm": 73.79299474366525, "learning_rate": 4.816659675205392e-07, "logits": -1.2154170274734497, "logps": -89.65646362304688, "loss": 0.2313, "objective": 0.2250944823026657, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.15583501756191254, "step": 1120 }, { "dpo_loss": 0.6929166913032532, "epoch": 2.1256495040151155, "grad_norm": 79.44167597980932, "learning_rate": 4.813546629535053e-07, "logits": -1.3260807991027832, "logps": -87.94298553466797, "loss": 0.235, "objective": 0.2192649394273758, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.44999998807907104, "regularize": 0.1499732881784439, "step": 1125 }, { "dpo_loss": 0.6933780312538147, "epoch": 2.135096835144072, "grad_norm": 75.59134488100315, "learning_rate": 4.810408400065144e-07, "logits": -1.2588220834732056, "logps": -90.21156311035156, "loss": 0.2508, "objective": 0.25041699409484863, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.1810791939496994, "step": 1130 }, { "dpo_loss": 0.6917892694473267, "epoch": 2.144544166273028, "grad_norm": 78.62106956394454, "learning_rate": 4.807245020956437e-07, "logits": -1.3259994983673096, "logps": -89.59319305419922, "loss": 0.2293, "objective": 0.23598890006542206, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.16680999100208282, "step": 1135 }, { "dpo_loss": 0.7026151418685913, "epoch": 2.153991497401984, "grad_norm": 85.29348168581552, "learning_rate": 4.804056526643471e-07, "logits": -1.217145562171936, "logps": -90.12454986572266, "loss": 0.2412, "objective": 0.22202345728874207, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.15176193416118622, "step": 1140 }, { "dpo_loss": 0.701118528842926, "epoch": 2.16343882853094, "grad_norm": 82.7848574013617, "learning_rate": 4.800842951834168e-07, "logits": -1.3022010326385498, "logps": -90.82298278808594, "loss": 0.2479, "objective": 0.23917004466056824, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.543749988079071, "regularize": 0.1690582036972046, "step": 1145 }, { "dpo_loss": 0.6887177228927612, "epoch": 2.172886159659896, "grad_norm": 81.51256071950469, "learning_rate": 4.797604331509462e-07, "logits": -1.2722337245941162, "logps": -90.06324768066406, "loss": 0.2476, "objective": 0.24817052483558655, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.17929872870445251, "step": 1150 }, { "epoch": 2.172886159659896, "eval_dpo_loss": 0.6958022713661194, "eval_logits": -1.2484749555587769, "eval_logps": -95.86177062988281, "eval_loss": 0.21953806281089783, "eval_objective": 0.21911680698394775, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.149536594748497, "eval_runtime": 157.4146, "eval_samples_per_second": 36.782, "eval_steps_per_second": 3.068, "step": 1150 }, { "dpo_loss": 0.7213719487190247, "epoch": 2.182333490788852, "grad_norm": 85.26999836671722, "learning_rate": 4.794340700922921e-07, "logits": -1.289499044418335, "logps": -88.68782806396484, "loss": 0.2405, "objective": 0.23491954803466797, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.16278235614299774, "step": 1155 }, { "dpo_loss": 0.7010141015052795, "epoch": 2.191780821917808, "grad_norm": 82.49417059894672, "learning_rate": 4.791052095600351e-07, "logits": -1.2551113367080688, "logps": -91.86698150634766, "loss": 0.2499, "objective": 0.23400485515594482, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5562499761581421, "regularize": 0.16390344500541687, "step": 1160 }, { "dpo_loss": 0.6905627250671387, "epoch": 2.201228153046764, "grad_norm": 76.1785158086783, "learning_rate": 4.787738551339425e-07, "logits": -1.3215975761413574, "logps": -90.5303726196289, "loss": 0.2336, "objective": 0.24319115281105042, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.17413488030433655, "step": 1165 }, { "dpo_loss": 0.6864585876464844, "epoch": 2.21067548417572, "grad_norm": 90.3766310186037, "learning_rate": 4.784400104209281e-07, "logits": -1.2362306118011475, "logps": -91.8830795288086, "loss": 0.2504, "objective": 0.24644558131694794, "ranking_idealized": 0.4312500059604645, "ranking_idealized_expo": 0.4312500059604645, "ranking_simple": 0.4312500059604645, "regularize": 0.1777997463941574, "step": 1170 }, { "dpo_loss": 0.6967033743858337, "epoch": 2.2201228153046766, "grad_norm": 81.5391367066229, "learning_rate": 4.781036790550133e-07, "logits": -1.2316911220550537, "logps": -90.62294006347656, "loss": 0.2378, "objective": 0.23412732779979706, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.48750001192092896, "regularize": 0.16445696353912354, "step": 1175 }, { "dpo_loss": 0.7060146331787109, "epoch": 2.2295701464336326, "grad_norm": 85.7375377050115, "learning_rate": 4.777648646972879e-07, "logits": -1.2484381198883057, "logps": -94.7322998046875, "loss": 0.2545, "objective": 0.24841733276844025, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.17781586945056915, "step": 1180 }, { "dpo_loss": 0.7063915729522705, "epoch": 2.2390174775625886, "grad_norm": 74.78902429353171, "learning_rate": 4.774235710358699e-07, "logits": -1.1517055034637451, "logps": -91.82881927490234, "loss": 0.2337, "objective": 0.25040021538734436, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.17976108193397522, "step": 1185 }, { "dpo_loss": 0.6940587759017944, "epoch": 2.2484648086915446, "grad_norm": 79.15932210088977, "learning_rate": 4.770798017858653e-07, "logits": -1.2461541891098022, "logps": -91.99650573730469, "loss": 0.2465, "objective": 0.24219770729541779, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.46875, "regularize": 0.17279183864593506, "step": 1190 }, { "dpo_loss": 0.7148272395133972, "epoch": 2.2579121398205007, "grad_norm": 76.05933709488173, "learning_rate": 4.7673356068932786e-07, "logits": -1.22458815574646, "logps": -93.11870574951172, "loss": 0.2487, "objective": 0.26240792870521545, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.59375, "regularize": 0.19092519581317902, "step": 1195 }, { "dpo_loss": 0.7013410329818726, "epoch": 2.2673594709494567, "grad_norm": 77.60037125978356, "learning_rate": 4.763848515152184e-07, "logits": -1.3251944780349731, "logps": -91.91200256347656, "loss": 0.2443, "objective": 0.23794111609458923, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.16780701279640198, "step": 1200 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6998235583305359, "eval_logits": -1.224057912826538, "eval_logps": -97.13621520996094, "eval_loss": 0.23180466890335083, "eval_objective": 0.23165498673915863, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5258799195289612, "eval_regularize": 0.16167263686656952, "eval_runtime": 157.8532, "eval_samples_per_second": 36.68, "eval_steps_per_second": 3.06, "step": 1200 }, { "dpo_loss": 0.6951106786727905, "epoch": 2.2768068020784127, "grad_norm": 80.22359104079362, "learning_rate": 4.7603367805936347e-07, "logits": -1.2757624387741089, "logps": -91.18368530273438, "loss": 0.2394, "objective": 0.2431495636701584, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.1736384928226471, "step": 1205 }, { "dpo_loss": 0.6933736801147461, "epoch": 2.286254133207369, "grad_norm": 72.68834763111872, "learning_rate": 4.7568004414441444e-07, "logits": -1.2526779174804688, "logps": -88.98243713378906, "loss": 0.2351, "objective": 0.22677263617515564, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.543749988079071, "regularize": 0.15743525326251984, "step": 1210 }, { "dpo_loss": 0.6766241788864136, "epoch": 2.295701464336325, "grad_norm": 79.38107843409844, "learning_rate": 4.7532395361980544e-07, "logits": -1.312524437904358, "logps": -91.68379974365234, "loss": 0.2463, "objective": 0.23765353858470917, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5687500238418579, "regularize": 0.16999110579490662, "step": 1215 }, { "dpo_loss": 0.6962811946868896, "epoch": 2.305148795465281, "grad_norm": 85.13780816275877, "learning_rate": 4.749654103617119e-07, "logits": -1.2287237644195557, "logps": -92.60826110839844, "loss": 0.2328, "objective": 0.22278353571891785, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.15315541625022888, "step": 1220 }, { "dpo_loss": 0.6947506666183472, "epoch": 2.314596126594237, "grad_norm": 78.24452291589246, "learning_rate": 4.74604418273008e-07, "logits": -1.3358386754989624, "logps": -90.82719421386719, "loss": 0.2346, "objective": 0.228725865483284, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.15925081074237823, "step": 1225 }, { "dpo_loss": 0.7003086805343628, "epoch": 2.324043457723193, "grad_norm": 81.50256143225303, "learning_rate": 4.742409812832244e-07, "logits": -1.2636182308197021, "logps": -90.55316925048828, "loss": 0.2489, "objective": 0.25676098465919495, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.18673010170459747, "step": 1230 }, { "dpo_loss": 0.6981230974197388, "epoch": 2.3334907888521492, "grad_norm": 80.31828285987623, "learning_rate": 4.738751033485055e-07, "logits": -1.239471197128296, "logps": -89.82279205322266, "loss": 0.2352, "objective": 0.2329610139131546, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.1631486713886261, "step": 1235 }, { "dpo_loss": 0.7123819589614868, "epoch": 2.3429381199811052, "grad_norm": 83.16849127640717, "learning_rate": 4.7350678845156613e-07, "logits": -1.2978795766830444, "logps": -92.16786193847656, "loss": 0.2402, "objective": 0.2497522085905075, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.1785140037536621, "step": 1240 }, { "dpo_loss": 0.6852763891220093, "epoch": 2.3523854511100613, "grad_norm": 85.40390965968454, "learning_rate": 4.7313604060164824e-07, "logits": -1.2600769996643066, "logps": -93.20845031738281, "loss": 0.254, "objective": 0.27139025926589966, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.543749988079071, "regularize": 0.20286257565021515, "step": 1245 }, { "dpo_loss": 0.7078602910041809, "epoch": 2.3618327822390173, "grad_norm": 78.24810295158233, "learning_rate": 4.7276286383447766e-07, "logits": -1.2538678646087646, "logps": -91.44749450683594, "loss": 0.2337, "objective": 0.23821282386779785, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.16742679476737976, "step": 1250 }, { "epoch": 2.3618327822390173, "eval_dpo_loss": 0.6949954628944397, "eval_logits": -1.23129403591156, "eval_logps": -96.26293182373047, "eval_loss": 0.2493569552898407, "eval_objective": 0.25145968794822693, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 0.18196015059947968, "eval_runtime": 158.1128, "eval_samples_per_second": 36.619, "eval_steps_per_second": 3.055, "step": 1250 }, { "dpo_loss": 0.6774023175239563, "epoch": 2.3712801133679737, "grad_norm": 73.61944794598868, "learning_rate": 4.723872622122197e-07, "logits": -1.222249150276184, "logps": -92.29754638671875, "loss": 0.2413, "objective": 0.2529022693634033, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.1851620376110077, "step": 1255 }, { "dpo_loss": 0.6773152351379395, "epoch": 2.3807274444969297, "grad_norm": 75.01411541839315, "learning_rate": 4.720092398234351e-07, "logits": -1.1812235116958618, "logps": -91.16864013671875, "loss": 0.242, "objective": 0.2524015009403229, "ranking_idealized": 0.4375, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.18466997146606445, "step": 1260 }, { "dpo_loss": 0.7058506011962891, "epoch": 2.3901747756258858, "grad_norm": 85.94887253603434, "learning_rate": 4.716288007830356e-07, "logits": -1.2233319282531738, "logps": -91.88188171386719, "loss": 0.2412, "objective": 0.24302363395690918, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.1724385917186737, "step": 1265 }, { "dpo_loss": 0.6782275438308716, "epoch": 2.3996221067548418, "grad_norm": 84.19950426466906, "learning_rate": 4.71245949232239e-07, "logits": -1.353999376296997, "logps": -92.9189682006836, "loss": 0.2592, "objective": 0.2692815661430359, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.20145881175994873, "step": 1270 }, { "dpo_loss": 0.6916183233261108, "epoch": 2.409069437883798, "grad_norm": 87.47559767434763, "learning_rate": 4.708606893385241e-07, "logits": -1.2686707973480225, "logps": -89.87593078613281, "loss": 0.2319, "objective": 0.22510790824890137, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.15594607591629028, "step": 1275 }, { "dpo_loss": 0.6960471868515015, "epoch": 2.418516769012754, "grad_norm": 81.01535668345583, "learning_rate": 4.7047302529558556e-07, "logits": -1.2857545614242554, "logps": -89.09248352050781, "loss": 0.2388, "objective": 0.22974196076393127, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.16013725101947784, "step": 1280 }, { "dpo_loss": 0.6761273145675659, "epoch": 2.42796410014171, "grad_norm": 74.11716901737486, "learning_rate": 4.7008296132328805e-07, "logits": -1.279343843460083, "logps": -92.09466552734375, "loss": 0.2444, "objective": 0.2343457192182541, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.4625000059604645, "regularize": 0.1667329967021942, "step": 1285 }, { "dpo_loss": 0.708074688911438, "epoch": 2.4374114312706663, "grad_norm": 77.08424586936, "learning_rate": 4.6969050166762013e-07, "logits": -1.232253909111023, "logps": -89.49784851074219, "loss": 0.2379, "objective": 0.24165663123130798, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.17084915935993195, "step": 1290 }, { "dpo_loss": 0.7073941230773926, "epoch": 2.4468587623996223, "grad_norm": 77.54215896924674, "learning_rate": 4.692956506006486e-07, "logits": -1.2533094882965088, "logps": -89.46601104736328, "loss": 0.2512, "objective": 0.25510329008102417, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.18436387181282043, "step": 1295 }, { "dpo_loss": 0.6897347569465637, "epoch": 2.4563060935285783, "grad_norm": 81.92124464995868, "learning_rate": 4.688984124204712e-07, "logits": -1.279669165611267, "logps": -88.2325668334961, "loss": 0.2264, "objective": 0.22791297733783722, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.15893949568271637, "step": 1300 }, { "epoch": 2.4563060935285783, "eval_dpo_loss": 0.6981492042541504, "eval_logits": -1.2534995079040527, "eval_logps": -94.45039367675781, "eval_loss": 0.24725377559661865, "eval_objective": 0.24559155106544495, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.1757766604423523, "eval_runtime": 157.5273, "eval_samples_per_second": 36.756, "eval_steps_per_second": 3.066, "step": 1300 }, { "dpo_loss": 0.7108772397041321, "epoch": 2.4657534246575343, "grad_norm": 82.39715959123795, "learning_rate": 4.684987914511708e-07, "logits": -1.3086609840393066, "logps": -89.1966323852539, "loss": 0.2314, "objective": 0.23325006663799286, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.46875, "regularize": 0.16216234862804413, "step": 1305 }, { "dpo_loss": 0.6940029859542847, "epoch": 2.4752007557864903, "grad_norm": 77.69520048627098, "learning_rate": 4.6809679204276735e-07, "logits": -1.3097118139266968, "logps": -88.96656799316406, "loss": 0.2339, "objective": 0.24012359976768494, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.581250011920929, "regularize": 0.1707233041524887, "step": 1310 }, { "dpo_loss": 0.6831144094467163, "epoch": 2.4846480869154464, "grad_norm": 80.97079870828857, "learning_rate": 4.6769241857117127e-07, "logits": -1.223602294921875, "logps": -88.93707275390625, "loss": 0.2369, "objective": 0.25154465436935425, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.18323318660259247, "step": 1315 }, { "dpo_loss": 0.7016611695289612, "epoch": 2.4940954180444024, "grad_norm": 76.56858236464322, "learning_rate": 4.6728567543813524e-07, "logits": -1.142772912979126, "logps": -87.71903991699219, "loss": 0.2368, "objective": 0.2456255704164505, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.53125, "regularize": 0.17545947432518005, "step": 1320 }, { "dpo_loss": 0.6933158040046692, "epoch": 2.5035427491733584, "grad_norm": 78.36609479465805, "learning_rate": 4.6687656707120693e-07, "logits": -1.2576102018356323, "logps": -87.36424255371094, "loss": 0.2364, "objective": 0.2189684808254242, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.5375000238418579, "regularize": 0.1496368795633316, "step": 1325 }, { "dpo_loss": 0.7006167769432068, "epoch": 2.5129900803023144, "grad_norm": 80.0154519809813, "learning_rate": 4.664650979236801e-07, "logits": -1.2859307527542114, "logps": -87.9940185546875, "loss": 0.2438, "objective": 0.23237256705760956, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.1623109132051468, "step": 1330 }, { "dpo_loss": 0.6959272623062134, "epoch": 2.5224374114312704, "grad_norm": 85.35335471365272, "learning_rate": 4.660512724745467e-07, "logits": -1.27793288230896, "logps": -87.02171325683594, "loss": 0.2368, "objective": 0.2255460023880005, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.45625001192092896, "regularize": 0.15595325827598572, "step": 1335 }, { "dpo_loss": 0.6886266469955444, "epoch": 2.531884742560227, "grad_norm": 78.40692183070935, "learning_rate": 4.656350952284478e-07, "logits": -1.1710107326507568, "logps": -89.88359069824219, "loss": 0.2381, "objective": 0.2395767718553543, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.17071406543254852, "step": 1340 }, { "dpo_loss": 0.6858821511268616, "epoch": 2.541332073689183, "grad_norm": 81.74921881509104, "learning_rate": 4.652165707156246e-07, "logits": -1.2932147979736328, "logps": -90.06592559814453, "loss": 0.2398, "objective": 0.24520452320575714, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.17661629617214203, "step": 1345 }, { "dpo_loss": 0.6906551122665405, "epoch": 2.550779404818139, "grad_norm": 78.20617287848113, "learning_rate": 4.6479570349186913e-07, "logits": -1.212925672531128, "logps": -88.88258361816406, "loss": 0.2398, "objective": 0.222016841173172, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.15295132994651794, "step": 1350 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.7027136087417603, "eval_logits": -1.2349364757537842, "eval_logps": -96.20651245117188, "eval_loss": 0.24674533307552338, "eval_objective": 0.24622555077075958, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.1759541779756546, "eval_runtime": 158.2341, "eval_samples_per_second": 36.591, "eval_steps_per_second": 3.052, "step": 1350 }, { "dpo_loss": 0.6958358883857727, "epoch": 2.560226735947095, "grad_norm": 75.25806832805698, "learning_rate": 4.643724981384749e-07, "logits": -1.2679038047790527, "logps": -91.00750732421875, "loss": 0.2358, "objective": 0.23495018482208252, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.16536661982536316, "step": 1355 }, { "dpo_loss": 0.6821821331977844, "epoch": 2.569674067076051, "grad_norm": 77.03521477955098, "learning_rate": 4.6394695926218656e-07, "logits": -1.2167763710021973, "logps": -89.78504180908203, "loss": 0.2247, "objective": 0.2250729501247406, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.574999988079071, "regularize": 0.1568547487258911, "step": 1360 }, { "dpo_loss": 0.692377507686615, "epoch": 2.579121398205007, "grad_norm": 77.31482925879361, "learning_rate": 4.635190914951502e-07, "logits": -1.2918310165405273, "logps": -91.2199935913086, "loss": 0.2385, "objective": 0.2363658845424652, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.44999998807907104, "regularize": 0.16712811589241028, "step": 1365 }, { "dpo_loss": 0.6883034110069275, "epoch": 2.5885687293339634, "grad_norm": 81.58418573926582, "learning_rate": 4.6308889949486256e-07, "logits": -1.392380952835083, "logps": -91.57077026367188, "loss": 0.2318, "objective": 0.23946337401866913, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.17063304781913757, "step": 1370 }, { "dpo_loss": 0.7015324234962463, "epoch": 2.5980160604629194, "grad_norm": 72.99558544647365, "learning_rate": 4.6265638794412067e-07, "logits": -1.193394660949707, "logps": -89.87769317626953, "loss": 0.2282, "objective": 0.2171776294708252, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4749999940395355, "regularize": 0.14702437818050385, "step": 1375 }, { "dpo_loss": 0.6938923597335815, "epoch": 2.6074633915918755, "grad_norm": 77.25092850818334, "learning_rate": 4.622215615509705e-07, "logits": -1.2323808670043945, "logps": -87.7343978881836, "loss": 0.2284, "objective": 0.2326710969209671, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.1632818579673767, "step": 1380 }, { "dpo_loss": 0.6813127398490906, "epoch": 2.6169107227208315, "grad_norm": 81.808009092451, "learning_rate": 4.6178442504865623e-07, "logits": -1.3131296634674072, "logps": -89.78125, "loss": 0.2463, "objective": 0.24866609275341034, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.18053480982780457, "step": 1385 }, { "dpo_loss": 0.6854914426803589, "epoch": 2.6263580538497875, "grad_norm": 82.80323980610244, "learning_rate": 4.6134498319556815e-07, "logits": -1.1897776126861572, "logps": -89.03984832763672, "loss": 0.2411, "objective": 0.22924157977104187, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.16069245338439941, "step": 1390 }, { "dpo_loss": 0.6865602135658264, "epoch": 2.6358053849787435, "grad_norm": 74.25016710243104, "learning_rate": 4.6090324077519127e-07, "logits": -1.2304832935333252, "logps": -89.58457946777344, "loss": 0.2412, "objective": 0.23623552918434143, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.1675795018672943, "step": 1395 }, { "dpo_loss": 0.6881572008132935, "epoch": 2.6452527161076995, "grad_norm": 73.86263634495297, "learning_rate": 4.6045920259605306e-07, "logits": -1.2435492277145386, "logps": -87.33223724365234, "loss": 0.2346, "objective": 0.23662078380584717, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5562499761581421, "regularize": 0.1678050458431244, "step": 1400 }, { "epoch": 2.6452527161076995, "eval_dpo_loss": 0.7001627683639526, "eval_logits": -1.2562015056610107, "eval_logps": -94.65913391113281, "eval_loss": 0.2565145790576935, "eval_objective": 0.25667497515678406, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.18665869534015656, "eval_runtime": 158.5988, "eval_samples_per_second": 36.507, "eval_steps_per_second": 3.045, "step": 1400 }, { "dpo_loss": 0.689062237739563, "epoch": 2.6547000472366555, "grad_norm": 78.01273938173455, "learning_rate": 4.600128734916713e-07, "logits": -1.2227166891098022, "logps": -89.48971557617188, "loss": 0.2422, "objective": 0.2384214699268341, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5687500238418579, "regularize": 0.1695152223110199, "step": 1405 }, { "dpo_loss": 0.6784166693687439, "epoch": 2.6641473783656116, "grad_norm": 74.9551460062836, "learning_rate": 4.595642583205011e-07, "logits": -1.321109652519226, "logps": -90.31436920166016, "loss": 0.2429, "objective": 0.23836779594421387, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.17052613198757172, "step": 1410 }, { "dpo_loss": 0.6802228689193726, "epoch": 2.6735947094945676, "grad_norm": 75.504272279789, "learning_rate": 4.5911336196588237e-07, "logits": -1.294541597366333, "logps": -88.68775939941406, "loss": 0.2384, "objective": 0.2413848638534546, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.1733625829219818, "step": 1415 }, { "dpo_loss": 0.6930629014968872, "epoch": 2.6830420406235236, "grad_norm": 72.22345932978276, "learning_rate": 4.586601893359865e-07, "logits": -1.230058193206787, "logps": -88.73162841796875, "loss": 0.2386, "objective": 0.2470715045928955, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.17776522040367126, "step": 1420 }, { "dpo_loss": 0.7063760757446289, "epoch": 2.69248937175248, "grad_norm": 81.14829878526038, "learning_rate": 4.58204745363763e-07, "logits": -1.2439881563186646, "logps": -88.5297622680664, "loss": 0.2369, "objective": 0.2180163860321045, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.14737875759601593, "step": 1425 }, { "dpo_loss": 0.6979953646659851, "epoch": 2.701936702881436, "grad_norm": 72.60233904196467, "learning_rate": 4.577470350068858e-07, "logits": -1.2875802516937256, "logps": -87.95441436767578, "loss": 0.2324, "objective": 0.23016265034675598, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.1603630930185318, "step": 1430 }, { "dpo_loss": 0.6804190278053284, "epoch": 2.711384034010392, "grad_norm": 73.85158200914744, "learning_rate": 4.5728706324769914e-07, "logits": -1.2435171604156494, "logps": -89.67928314208984, "loss": 0.2336, "objective": 0.24185959994792938, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.1738176941871643, "step": 1435 }, { "dpo_loss": 0.6941691040992737, "epoch": 2.720831365139348, "grad_norm": 76.60837269004416, "learning_rate": 4.5682483509316367e-07, "logits": -1.2151108980178833, "logps": -88.1698989868164, "loss": 0.245, "objective": 0.24525687098503113, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.53125, "regularize": 0.17583994567394257, "step": 1440 }, { "dpo_loss": 0.6980563998222351, "epoch": 2.730278696268304, "grad_norm": 75.43164801037526, "learning_rate": 4.5636035557480144e-07, "logits": -1.2762441635131836, "logps": -88.98656463623047, "loss": 0.2434, "objective": 0.23745755851268768, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.1676519215106964, "step": 1445 }, { "dpo_loss": 0.7027378082275391, "epoch": 2.73972602739726, "grad_norm": 75.21672455179534, "learning_rate": 4.558936297486415e-07, "logits": -1.306553602218628, "logps": -90.38543701171875, "loss": 0.242, "objective": 0.24809806048870087, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.17782428860664368, "step": 1450 }, { "epoch": 2.73972602739726, "eval_dpo_loss": 0.7014594078063965, "eval_logits": -1.214097261428833, "eval_logps": -94.65546417236328, "eval_loss": 0.2640208899974823, "eval_objective": 0.26405468583106995, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.1939087212085724, "eval_runtime": 159.1959, "eval_samples_per_second": 36.37, "eval_steps_per_second": 3.034, "step": 1450 }, { "dpo_loss": 0.6996809244155884, "epoch": 2.7491733585262166, "grad_norm": 76.00464187434298, "learning_rate": 4.5542466269516503e-07, "logits": -1.1624560356140137, "logps": -88.36128234863281, "loss": 0.2355, "objective": 0.231034517288208, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.1610664278268814, "step": 1455 }, { "dpo_loss": 0.6926349401473999, "epoch": 2.7586206896551726, "grad_norm": 77.37875288927383, "learning_rate": 4.5495345951924944e-07, "logits": -1.146475076675415, "logps": -88.392333984375, "loss": 0.2214, "objective": 0.22251549363136292, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5562499761581421, "regularize": 0.15325197577476501, "step": 1460 }, { "dpo_loss": 0.6899169087409973, "epoch": 2.7680680207841286, "grad_norm": 77.90428885902477, "learning_rate": 4.544800253501134e-07, "logits": -1.310323715209961, "logps": -88.71553802490234, "loss": 0.2326, "objective": 0.23641350865364075, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.4937500059604645, "regularize": 0.16742180287837982, "step": 1465 }, { "dpo_loss": 0.7080496549606323, "epoch": 2.7775153519130846, "grad_norm": 73.39837293853236, "learning_rate": 4.540043653412606e-07, "logits": -1.247336983680725, "logps": -89.10823059082031, "loss": 0.2301, "objective": 0.2354445904493332, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.44999998807907104, "regularize": 0.16463959217071533, "step": 1470 }, { "dpo_loss": 0.6979535818099976, "epoch": 2.7869626830420406, "grad_norm": 75.71179419593179, "learning_rate": 4.5352648467042384e-07, "logits": -1.1792867183685303, "logps": -88.86894226074219, "loss": 0.234, "objective": 0.22246937453746796, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48124998807907104, "regularize": 0.15267404913902283, "step": 1475 }, { "dpo_loss": 0.678043782711029, "epoch": 2.7964100141709967, "grad_norm": 92.17559432395329, "learning_rate": 4.5304638853950866e-07, "logits": -1.198436975479126, "logps": -88.4832992553711, "loss": 0.2211, "objective": 0.2273280918598175, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.15952369570732117, "step": 1480 }, { "dpo_loss": 0.6995588541030884, "epoch": 2.8058573452999527, "grad_norm": 75.6219150180191, "learning_rate": 4.525640821745368e-07, "logits": -1.2838484048843384, "logps": -90.87583923339844, "loss": 0.2283, "objective": 0.23600324988365173, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4749999940395355, "regularize": 0.1660473495721817, "step": 1485 }, { "dpo_loss": 0.6786238551139832, "epoch": 2.8153046764289087, "grad_norm": 74.59167039994438, "learning_rate": 4.52079570825589e-07, "logits": -1.2329105138778687, "logps": -86.96043395996094, "loss": 0.2333, "objective": 0.23706336319446564, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.16920097172260284, "step": 1490 }, { "dpo_loss": 0.7082782983779907, "epoch": 2.8247520075578647, "grad_norm": 83.74958128471566, "learning_rate": 4.515928597667481e-07, "logits": -1.3093531131744385, "logps": -90.46369171142578, "loss": 0.2388, "objective": 0.26155704259872437, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.19072921574115753, "step": 1495 }, { "dpo_loss": 0.7037413716316223, "epoch": 2.8341993386868207, "grad_norm": 71.00748372107614, "learning_rate": 4.5110395429604183e-07, "logits": -1.2782231569290161, "logps": -87.0984115600586, "loss": 0.2372, "objective": 0.2213272601366043, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.15095312893390656, "step": 1500 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.7027208805084229, "eval_logits": -1.2472012042999268, "eval_logps": -94.92886352539062, "eval_loss": 0.2747000455856323, "eval_objective": 0.2726268768310547, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2023547887802124, "eval_runtime": 162.0193, "eval_samples_per_second": 35.736, "eval_steps_per_second": 2.981, "step": 1500 }, { "dpo_loss": 0.691096305847168, "epoch": 2.843646669815777, "grad_norm": 75.52449443233994, "learning_rate": 4.5061285973538456e-07, "logits": -1.2361116409301758, "logps": -87.37306213378906, "loss": 0.2267, "objective": 0.2288275510072708, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.15971791744232178, "step": 1505 }, { "dpo_loss": 0.6853073835372925, "epoch": 2.853094000944733, "grad_norm": 75.06432079116681, "learning_rate": 4.5011958143051987e-07, "logits": -1.3150286674499512, "logps": -87.7445297241211, "loss": 0.2295, "objective": 0.2292981892824173, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.16076745092868805, "step": 1510 }, { "dpo_loss": 0.6877030730247498, "epoch": 2.862541332073689, "grad_norm": 71.09686183807273, "learning_rate": 4.496241247509621e-07, "logits": -1.3267923593521118, "logps": -88.32781982421875, "loss": 0.2355, "objective": 0.2435312271118164, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.550000011920929, "regularize": 0.17476090788841248, "step": 1515 }, { "dpo_loss": 0.7023721933364868, "epoch": 2.8719886632026452, "grad_norm": 84.59529154677602, "learning_rate": 4.4912649508993827e-07, "logits": -1.3224318027496338, "logps": -87.6534194946289, "loss": 0.2331, "objective": 0.22857289016246796, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.543749988079071, "regularize": 0.15833568572998047, "step": 1520 }, { "dpo_loss": 0.7165266275405884, "epoch": 2.8814359943316012, "grad_norm": 74.94683227752313, "learning_rate": 4.486266978643286e-07, "logits": -1.1609947681427002, "logps": -86.48796081542969, "loss": 0.2349, "objective": 0.2357398271560669, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.550000011920929, "regularize": 0.16408716142177582, "step": 1525 }, { "dpo_loss": 0.6916718482971191, "epoch": 2.8908833254605573, "grad_norm": 71.49489799909206, "learning_rate": 4.481247385146086e-07, "logits": -1.1969927549362183, "logps": -87.98089599609375, "loss": 0.2218, "objective": 0.21987979114055634, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.15071257948875427, "step": 1530 }, { "dpo_loss": 0.7046507596969604, "epoch": 2.9003306565895137, "grad_norm": 72.52640605772251, "learning_rate": 4.476206225047889e-07, "logits": -1.2879106998443604, "logps": -88.4914321899414, "loss": 0.2265, "objective": 0.23516707122325897, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.16470198333263397, "step": 1535 }, { "dpo_loss": 0.6844728589057922, "epoch": 2.9097779877184697, "grad_norm": 73.91357746088326, "learning_rate": 4.4711435532235624e-07, "logits": -1.2798715829849243, "logps": -89.1636962890625, "loss": 0.2236, "objective": 0.21921619772911072, "ranking_idealized": 0.45625001192092896, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.44999998807907104, "regularize": 0.15076890587806702, "step": 1540 }, { "dpo_loss": 0.6933543086051941, "epoch": 2.9192253188474258, "grad_norm": 72.93335086466251, "learning_rate": 4.4660594247821384e-07, "logits": -1.2931110858917236, "logps": -88.17526245117188, "loss": 0.2147, "objective": 0.20991010963916779, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.14057467877864838, "step": 1545 }, { "dpo_loss": 0.6930429339408875, "epoch": 2.9286726499763818, "grad_norm": 81.90726634302081, "learning_rate": 4.460953895066209e-07, "logits": -1.2895206212997437, "logps": -90.56971740722656, "loss": 0.2133, "objective": 0.2167246788740158, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.14742037653923035, "step": 1550 }, { "epoch": 2.9286726499763818, "eval_dpo_loss": 0.7006378769874573, "eval_logits": -1.2344614267349243, "eval_logps": -95.19911193847656, "eval_loss": 0.2529480755329132, "eval_objective": 0.251154363155365, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.1810905784368515, "eval_runtime": 157.1881, "eval_samples_per_second": 36.835, "eval_steps_per_second": 3.073, "step": 1550 }, { "dpo_loss": 0.7084794044494629, "epoch": 2.938119981105338, "grad_norm": 81.37590112985177, "learning_rate": 4.4558270196513306e-07, "logits": -1.3027656078338623, "logps": -89.90327453613281, "loss": 0.226, "objective": 0.2371251881122589, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.4937500059604645, "regularize": 0.16627724468708038, "step": 1555 }, { "dpo_loss": 0.6968989968299866, "epoch": 2.947567312234294, "grad_norm": 74.4110488665179, "learning_rate": 4.450678854345412e-07, "logits": -1.3281409740447998, "logps": -89.05107116699219, "loss": 0.2284, "objective": 0.2369905412197113, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.16730067133903503, "step": 1560 }, { "dpo_loss": 0.6882658004760742, "epoch": 2.95701464336325, "grad_norm": 74.9308740448357, "learning_rate": 4.445509455188113e-07, "logits": -1.2605291604995728, "logps": -91.87565612792969, "loss": 0.2284, "objective": 0.22609011828899384, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.15726353228092194, "step": 1565 }, { "dpo_loss": 0.6850372552871704, "epoch": 2.966461974492206, "grad_norm": 75.73765777029178, "learning_rate": 4.44031887845023e-07, "logits": -1.2321079969406128, "logps": -88.75463104248047, "loss": 0.229, "objective": 0.2340477705001831, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.1655440479516983, "step": 1570 }, { "dpo_loss": 0.6979643702507019, "epoch": 2.975909305621162, "grad_norm": 75.89769576525151, "learning_rate": 4.4351071806330856e-07, "logits": -1.2928818464279175, "logps": -87.43384552001953, "loss": 0.2337, "objective": 0.22396059334278107, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.550000011920929, "regularize": 0.1541641652584076, "step": 1575 }, { "dpo_loss": 0.6913705468177795, "epoch": 2.985356636750118, "grad_norm": 79.25695866196861, "learning_rate": 4.429874418467913e-07, "logits": -1.309133768081665, "logps": -86.86026000976562, "loss": 0.2402, "objective": 0.23291504383087158, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.581250011920929, "regularize": 0.16377797722816467, "step": 1580 }, { "dpo_loss": 0.703238844871521, "epoch": 2.9948039678790743, "grad_norm": 79.54613475110943, "learning_rate": 4.4246206489152375e-07, "logits": -1.2111042737960815, "logps": -88.62483978271484, "loss": 0.2274, "objective": 0.23201140761375427, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.16168752312660217, "step": 1585 }, { "dpo_loss": 0.6940716505050659, "epoch": 3.0042512990080303, "grad_norm": 75.03175294996458, "learning_rate": 4.41934592916426e-07, "logits": -1.2947633266448975, "logps": -87.53929138183594, "loss": 0.2249, "objective": 0.2187323272228241, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.1493251621723175, "step": 1590 }, { "dpo_loss": 0.693493664264679, "epoch": 3.0136986301369864, "grad_norm": 73.25790878410778, "learning_rate": 4.414050316632229e-07, "logits": -1.3149116039276123, "logps": -89.59700012207031, "loss": 0.2326, "objective": 0.2274354249238968, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4749999940395355, "regularize": 0.15808603167533875, "step": 1595 }, { "dpo_loss": 0.6876460313796997, "epoch": 3.0231459612659424, "grad_norm": 70.71771437549256, "learning_rate": 4.408733868963821e-07, "logits": -1.2373759746551514, "logps": -89.33880615234375, "loss": 0.2292, "objective": 0.2364555299282074, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.1676909476518631, "step": 1600 }, { "epoch": 3.0231459612659424, "eval_dpo_loss": 0.7038150429725647, "eval_logits": -1.2436572313308716, "eval_logps": -93.63340759277344, "eval_loss": 0.28403371572494507, "eval_objective": 0.2860759198665619, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.21569444239139557, "eval_runtime": 157.8424, "eval_samples_per_second": 36.682, "eval_steps_per_second": 3.06, "step": 1600 }, { "dpo_loss": 0.6990983486175537, "epoch": 3.0325932923948984, "grad_norm": 71.79146138509809, "learning_rate": 4.403396644030509e-07, "logits": -1.225997805595398, "logps": -86.30064392089844, "loss": 0.2438, "objective": 0.2267124354839325, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.15680257976055145, "step": 1605 }, { "dpo_loss": 0.6825001239776611, "epoch": 3.0420406235238544, "grad_norm": 84.9864448074978, "learning_rate": 4.398038699929935e-07, "logits": -1.2466906309127808, "logps": -88.25871276855469, "loss": 0.2262, "objective": 0.21322885155677795, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5562499761581421, "regularize": 0.1449788361787796, "step": 1610 }, { "dpo_loss": 0.7017780542373657, "epoch": 3.0514879546528104, "grad_norm": 71.05456640227479, "learning_rate": 4.392660094985276e-07, "logits": -1.2647464275360107, "logps": -88.5762710571289, "loss": 0.2331, "objective": 0.23695608973503113, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.1667782962322235, "step": 1615 }, { "dpo_loss": 0.6860125660896301, "epoch": 3.0609352857817664, "grad_norm": 73.51188299769646, "learning_rate": 4.38726088774461e-07, "logits": -1.3439459800720215, "logps": -88.92823791503906, "loss": 0.2232, "objective": 0.21951046586036682, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.1509092003107071, "step": 1620 }, { "dpo_loss": 0.6869057416915894, "epoch": 3.070382616910723, "grad_norm": 81.04643217611545, "learning_rate": 4.3818411369802795e-07, "logits": -1.281882882118225, "logps": -88.72843933105469, "loss": 0.222, "objective": 0.22023515403270721, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.15154457092285156, "step": 1625 }, { "dpo_loss": 0.6761130094528198, "epoch": 3.079829948039679, "grad_norm": 77.28947505018488, "learning_rate": 4.3764009016882484e-07, "logits": -1.2218523025512695, "logps": -88.16899108886719, "loss": 0.2295, "objective": 0.22817304730415344, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.1605617254972458, "step": 1630 }, { "dpo_loss": 0.6863864064216614, "epoch": 3.089277279168635, "grad_norm": 80.79575367937582, "learning_rate": 4.370940241087466e-07, "logits": -1.2691013813018799, "logps": -85.53553771972656, "loss": 0.2408, "objective": 0.22911615669727325, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.518750011920929, "regularize": 0.16047748923301697, "step": 1635 }, { "dpo_loss": 0.6985718011856079, "epoch": 3.098724610297591, "grad_norm": 76.70049039283718, "learning_rate": 4.3654592146192137e-07, "logits": -1.193575382232666, "logps": -88.6602554321289, "loss": 0.2323, "objective": 0.22806735336780548, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.1582101434469223, "step": 1640 }, { "dpo_loss": 0.6904804706573486, "epoch": 3.108171941426547, "grad_norm": 73.8243508955083, "learning_rate": 4.3599578819464664e-07, "logits": -1.2254277467727661, "logps": -88.09333801269531, "loss": 0.2229, "objective": 0.2212776392698288, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.15222959220409393, "step": 1645 }, { "dpo_loss": 0.6988715529441833, "epoch": 3.117619272555503, "grad_norm": 70.58207807986498, "learning_rate": 4.3544363029532394e-07, "logits": -1.288346767425537, "logps": -88.28787231445312, "loss": 0.2227, "objective": 0.22656592726707458, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.4749999940395355, "regularize": 0.15667878091335297, "step": 1650 }, { "epoch": 3.117619272555503, "eval_dpo_loss": 0.7025489807128906, "eval_logits": -1.233197569847107, "eval_logps": -93.4762954711914, "eval_loss": 0.2854098975658417, "eval_objective": 0.28514111042022705, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.21488620340824127, "eval_runtime": 159.1208, "eval_samples_per_second": 36.387, "eval_steps_per_second": 3.035, "step": 1650 }, { "dpo_loss": 0.6795647144317627, "epoch": 3.127066603684459, "grad_norm": 77.06489456346192, "learning_rate": 4.3488945377439344e-07, "logits": -1.2286312580108643, "logps": -87.36393737792969, "loss": 0.2162, "objective": 0.21354565024375916, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.14558915793895721, "step": 1655 }, { "dpo_loss": 0.6925145387649536, "epoch": 3.136513934813415, "grad_norm": 67.06324257323814, "learning_rate": 4.3433326466426884e-07, "logits": -1.2775371074676514, "logps": -88.46916198730469, "loss": 0.2086, "objective": 0.1956256926059723, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.1263742297887802, "step": 1660 }, { "dpo_loss": 0.6909645795822144, "epoch": 3.1459612659423715, "grad_norm": 77.02733659224373, "learning_rate": 4.337750690192717e-07, "logits": -1.1966722011566162, "logps": -88.89064025878906, "loss": 0.2323, "objective": 0.23447394371032715, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.16537746787071228, "step": 1665 }, { "dpo_loss": 0.6765434741973877, "epoch": 3.1554085970713275, "grad_norm": 76.79340939576753, "learning_rate": 4.3321487291556537e-07, "logits": -1.2806079387664795, "logps": -88.34075927734375, "loss": 0.2341, "objective": 0.22429628670215607, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.15664193034172058, "step": 1670 }, { "dpo_loss": 0.6903152465820312, "epoch": 3.1648559282002835, "grad_norm": 71.06959581471867, "learning_rate": 4.326526824510888e-07, "logits": -1.1770598888397217, "logps": -87.21501922607422, "loss": 0.2203, "objective": 0.22811034321784973, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.5874999761581421, "regularize": 0.15907880663871765, "step": 1675 }, { "dpo_loss": 0.6848000288009644, "epoch": 3.1743032593292395, "grad_norm": 71.53335354112947, "learning_rate": 4.3208850374549066e-07, "logits": -1.2417142391204834, "logps": -85.82376861572266, "loss": 0.2254, "objective": 0.22943010926246643, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.16095009446144104, "step": 1680 }, { "dpo_loss": 0.6956906914710999, "epoch": 3.1837505904581955, "grad_norm": 68.21329452472561, "learning_rate": 4.3152234294006185e-07, "logits": -1.2465617656707764, "logps": -87.61761474609375, "loss": 0.2211, "objective": 0.22904673218727112, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5, "regularize": 0.15947763621807098, "step": 1685 }, { "dpo_loss": 0.6885348558425903, "epoch": 3.1931979215871515, "grad_norm": 72.44090594485864, "learning_rate": 4.309542061976695e-07, "logits": -1.2819534540176392, "logps": -89.31121063232422, "loss": 0.2324, "objective": 0.23005850613117218, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.16120502352714539, "step": 1690 }, { "dpo_loss": 0.6953254342079163, "epoch": 3.2026452527161076, "grad_norm": 80.57191405511507, "learning_rate": 4.303840997026895e-07, "logits": -1.3103854656219482, "logps": -89.79373168945312, "loss": 0.2321, "objective": 0.24063341319561005, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.17110088467597961, "step": 1695 }, { "dpo_loss": 0.6881821751594543, "epoch": 3.2120925838450636, "grad_norm": 71.42605462474872, "learning_rate": 4.298120296609391e-07, "logits": -1.2566523551940918, "logps": -88.62003326416016, "loss": 0.2123, "objective": 0.205051988363266, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48124998807907104, "regularize": 0.13623377680778503, "step": 1700 }, { "epoch": 3.2120925838450636, "eval_dpo_loss": 0.7008152604103088, "eval_logits": -1.2311402559280396, "eval_logps": -95.69064331054688, "eval_loss": 0.275156170129776, "eval_objective": 0.27562105655670166, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.2055395096540451, "eval_runtime": 157.5719, "eval_samples_per_second": 36.745, "eval_steps_per_second": 3.065, "step": 1700 }, { "dpo_loss": 0.7004303932189941, "epoch": 3.22153991497402, "grad_norm": 78.61435190569276, "learning_rate": 4.292380022996094e-07, "logits": -1.2356593608856201, "logps": -88.43986511230469, "loss": 0.2149, "objective": 0.2104000598192215, "ranking_idealized": 0.44999998807907104, "ranking_idealized_expo": 0.4124999940395355, "ranking_simple": 0.42500001192092896, "regularize": 0.14035701751708984, "step": 1705 }, { "dpo_loss": 0.6839348673820496, "epoch": 3.230987246102976, "grad_norm": 72.2767944523041, "learning_rate": 4.2866202386719806e-07, "logits": -1.186471939086914, "logps": -89.38927459716797, "loss": 0.218, "objective": 0.20395776629447937, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.550000011920929, "regularize": 0.13556428253650665, "step": 1710 }, { "dpo_loss": 0.6864286661148071, "epoch": 3.240434577231932, "grad_norm": 77.24847322414985, "learning_rate": 4.2808410063344025e-07, "logits": -1.2315725088119507, "logps": -90.61546325683594, "loss": 0.2224, "objective": 0.22162394225597382, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.15298107266426086, "step": 1715 }, { "dpo_loss": 0.6974970102310181, "epoch": 3.249881908360888, "grad_norm": 76.03703557994152, "learning_rate": 4.2750423888924156e-07, "logits": -1.2265657186508179, "logps": -90.89115905761719, "loss": 0.2318, "objective": 0.2390907108783722, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.16934101283550262, "step": 1720 }, { "dpo_loss": 0.6898042559623718, "epoch": 3.259329239489844, "grad_norm": 69.31924665387766, "learning_rate": 4.2692244494660854e-07, "logits": -1.256665587425232, "logps": -87.42665100097656, "loss": 0.2255, "objective": 0.21517562866210938, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.550000011920929, "regularize": 0.1461952179670334, "step": 1725 }, { "dpo_loss": 0.6960927248001099, "epoch": 3.2687765706188, "grad_norm": 74.63257715863116, "learning_rate": 4.263387251385808e-07, "logits": -1.2075088024139404, "logps": -88.08040618896484, "loss": 0.2275, "objective": 0.2128901183605194, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.550000011920929, "regularize": 0.14328083395957947, "step": 1730 }, { "dpo_loss": 0.6903256177902222, "epoch": 3.278223901747756, "grad_norm": 75.40242294372213, "learning_rate": 4.2575308581916147e-07, "logits": -1.2131913900375366, "logps": -92.84107971191406, "loss": 0.2352, "objective": 0.2373063564300537, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.16827379167079926, "step": 1735 }, { "dpo_loss": 0.6846415400505066, "epoch": 3.287671232876712, "grad_norm": 76.10083653558631, "learning_rate": 4.251655333632482e-07, "logits": -1.273949384689331, "logps": -90.49128723144531, "loss": 0.2297, "objective": 0.21975067257881165, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48750001192092896, "regularize": 0.1512865275144577, "step": 1740 }, { "dpo_loss": 0.6826716065406799, "epoch": 3.297118564005668, "grad_norm": 74.85403338341007, "learning_rate": 4.2457607416656417e-07, "logits": -1.2469581365585327, "logps": -90.03531646728516, "loss": 0.2201, "objective": 0.2303483486175537, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.16208119690418243, "step": 1745 }, { "dpo_loss": 0.688482403755188, "epoch": 3.3065658951346246, "grad_norm": 69.73525123761033, "learning_rate": 4.239847146455877e-07, "logits": -1.254630208015442, "logps": -89.3567886352539, "loss": 0.218, "objective": 0.23068399727344513, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.16183575987815857, "step": 1750 }, { "epoch": 3.3065658951346246, "eval_dpo_loss": 0.7037237882614136, "eval_logits": -1.2166752815246582, "eval_logps": -95.90421295166016, "eval_loss": 0.279991090297699, "eval_objective": 0.27828940749168396, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.523809552192688, "eval_regularize": 0.20791704952716827, "eval_runtime": 158.1426, "eval_samples_per_second": 36.613, "eval_steps_per_second": 3.054, "step": 1750 }, { "dpo_loss": 0.7040959596633911, "epoch": 3.3160132262635806, "grad_norm": 79.0729106207761, "learning_rate": 4.233914612374832e-07, "logits": -1.2124968767166138, "logps": -91.5573959350586, "loss": 0.2265, "objective": 0.23880453407764435, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.16839495301246643, "step": 1755 }, { "dpo_loss": 0.6873825788497925, "epoch": 3.3254605573925367, "grad_norm": 79.43729294619003, "learning_rate": 4.227963204000304e-07, "logits": -1.2346994876861572, "logps": -90.03487396240234, "loss": 0.2216, "objective": 0.2150363028049469, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.14629802107810974, "step": 1760 }, { "dpo_loss": 0.7011836767196655, "epoch": 3.3349078885214927, "grad_norm": 73.36113823463724, "learning_rate": 4.221992986115548e-07, "logits": -1.309787392616272, "logps": -89.97632598876953, "loss": 0.2261, "objective": 0.23244591057300568, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.41874998807907104, "ranking_simple": 0.42500001192092896, "regularize": 0.16232752799987793, "step": 1765 }, { "dpo_loss": 0.6768354177474976, "epoch": 3.3443552196504487, "grad_norm": 79.18185589393623, "learning_rate": 4.2160040237085627e-07, "logits": -1.3097981214523315, "logps": -93.37566375732422, "loss": 0.2236, "objective": 0.23846200108528137, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.17077843844890594, "step": 1770 }, { "dpo_loss": 0.6806414127349854, "epoch": 3.3538025507794047, "grad_norm": 76.07180693015962, "learning_rate": 4.2099963819713896e-07, "logits": -1.2354665994644165, "logps": -92.22715759277344, "loss": 0.2161, "objective": 0.22700223326683044, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.15893810987472534, "step": 1775 }, { "dpo_loss": 0.6920955777168274, "epoch": 3.3632498819083607, "grad_norm": 78.86169700611478, "learning_rate": 4.203970126299404e-07, "logits": -1.1942596435546875, "logps": -89.15409088134766, "loss": 0.236, "objective": 0.22877366840839386, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.15956410765647888, "step": 1780 }, { "dpo_loss": 0.7036693692207336, "epoch": 3.372697213037317, "grad_norm": 75.60890885363388, "learning_rate": 4.1979253222905984e-07, "logits": -1.3075898885726929, "logps": -91.54581451416016, "loss": 0.2256, "objective": 0.2090461701154709, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.13867922127246857, "step": 1785 }, { "dpo_loss": 0.684840977191925, "epoch": 3.382144544166273, "grad_norm": 74.14098312646254, "learning_rate": 4.19186203574487e-07, "logits": -1.1911137104034424, "logps": -91.33808135986328, "loss": 0.2194, "objective": 0.2209484875202179, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.1524643748998642, "step": 1790 }, { "dpo_loss": 0.7015503644943237, "epoch": 3.391591875295229, "grad_norm": 75.93493261152722, "learning_rate": 4.185780332663306e-07, "logits": -1.3089752197265625, "logps": -90.48160552978516, "loss": 0.2183, "objective": 0.21567769348621368, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.14552268385887146, "step": 1795 }, { "dpo_loss": 0.7126961350440979, "epoch": 3.4010392064241852, "grad_norm": 75.44154451389085, "learning_rate": 4.1796802792474655e-07, "logits": -1.282238245010376, "logps": -89.86241912841797, "loss": 0.2086, "objective": 0.21801860630512238, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.53125, "regularize": 0.14674898982048035, "step": 1800 }, { "epoch": 3.4010392064241852, "eval_dpo_loss": 0.7027232646942139, "eval_logits": -1.2183377742767334, "eval_logps": -95.69831085205078, "eval_loss": 0.29446694254875183, "eval_objective": 0.2932322025299072, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.22295984625816345, "eval_runtime": 157.9813, "eval_samples_per_second": 36.65, "eval_steps_per_second": 3.057, "step": 1800 }, { "dpo_loss": 0.6810536980628967, "epoch": 3.4104865375531412, "grad_norm": 71.63965639419007, "learning_rate": 4.1735619418986554e-07, "logits": -1.2107884883880615, "logps": -90.13643646240234, "loss": 0.2135, "objective": 0.20621566474437714, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5687500238418579, "regularize": 0.13811030983924866, "step": 1805 }, { "dpo_loss": 0.6831521987915039, "epoch": 3.4199338686820973, "grad_norm": 72.26883405440417, "learning_rate": 4.1674253872172126e-07, "logits": -1.2936310768127441, "logps": -90.55397033691406, "loss": 0.2152, "objective": 0.21073338389396667, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4749999940395355, "regularize": 0.14241817593574524, "step": 1810 }, { "dpo_loss": 0.6987210512161255, "epoch": 3.4293811998110533, "grad_norm": 76.83410855661971, "learning_rate": 4.1612706820017735e-07, "logits": -1.214812994003296, "logps": -90.34785461425781, "loss": 0.2151, "objective": 0.21492168307304382, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.4937500059604645, "regularize": 0.145049586892128, "step": 1815 }, { "dpo_loss": 0.6953445672988892, "epoch": 3.4388285309400093, "grad_norm": 73.25854640302302, "learning_rate": 4.1550978932485516e-07, "logits": -1.2482576370239258, "logps": -90.07608795166016, "loss": 0.2118, "objective": 0.2104315310716629, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.14089706540107727, "step": 1820 }, { "dpo_loss": 0.6868658065795898, "epoch": 3.4482758620689653, "grad_norm": 70.67689663662213, "learning_rate": 4.1489070881506053e-07, "logits": -1.217498779296875, "logps": -90.81259155273438, "loss": 0.2224, "objective": 0.21237485110759735, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.512499988079071, "regularize": 0.14368824660778046, "step": 1825 }, { "dpo_loss": 0.7178629636764526, "epoch": 3.4577231931979218, "grad_norm": 74.0418276231077, "learning_rate": 4.142698334097109e-07, "logits": -1.2087634801864624, "logps": -89.52534484863281, "loss": 0.2271, "objective": 0.24181203544139862, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.17002572119235992, "step": 1830 }, { "dpo_loss": 0.6907800436019897, "epoch": 3.467170524326878, "grad_norm": 74.29390968359742, "learning_rate": 4.1364716986726147e-07, "logits": -1.2132540941238403, "logps": -87.84049987792969, "loss": 0.2119, "objective": 0.22763434052467346, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.4749999940395355, "regularize": 0.15855631232261658, "step": 1835 }, { "dpo_loss": 0.6834974884986877, "epoch": 3.476617855455834, "grad_norm": 70.530010249313, "learning_rate": 4.130227249656324e-07, "logits": -1.290542483329773, "logps": -89.50587463378906, "loss": 0.2227, "objective": 0.22068659961223602, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.15233686566352844, "step": 1840 }, { "dpo_loss": 0.6878833770751953, "epoch": 3.48606518658479, "grad_norm": 66.897938998051, "learning_rate": 4.1239650550213435e-07, "logits": -1.2337359189987183, "logps": -87.03044128417969, "loss": 0.2064, "objective": 0.22119465470314026, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.550000011920929, "regularize": 0.15240630507469177, "step": 1845 }, { "dpo_loss": 0.7014085650444031, "epoch": 3.495512517713746, "grad_norm": 70.40529049471338, "learning_rate": 4.1176851829339465e-07, "logits": -1.3215398788452148, "logps": -87.68231201171875, "loss": 0.216, "objective": 0.21836349368095398, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.1482226401567459, "step": 1850 }, { "epoch": 3.495512517713746, "eval_dpo_loss": 0.702814519405365, "eval_logits": -1.22354257106781, "eval_logps": -93.07842254638672, "eval_loss": 0.28953778743743896, "eval_objective": 0.28734228014945984, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.21706083416938782, "eval_runtime": 157.9047, "eval_samples_per_second": 36.668, "eval_steps_per_second": 3.059, "step": 1850 }, { "dpo_loss": 0.6787363290786743, "epoch": 3.504959848842702, "grad_norm": 73.96509104787444, "learning_rate": 4.111387701752834e-07, "logits": -1.1543210744857788, "logps": -88.16014099121094, "loss": 0.2149, "objective": 0.23128505051136017, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.574999988079071, "regularize": 0.16341140866279602, "step": 1855 }, { "dpo_loss": 0.6856968998908997, "epoch": 3.514407179971658, "grad_norm": 68.14542483518404, "learning_rate": 4.1050726800283886e-07, "logits": -1.2676688432693481, "logps": -85.95804595947266, "loss": 0.2151, "objective": 0.20490717887878418, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.4625000059604645, "regularize": 0.13633747398853302, "step": 1860 }, { "dpo_loss": 0.6999458074569702, "epoch": 3.5238545111006143, "grad_norm": 74.92773908196327, "learning_rate": 4.0987401865019246e-07, "logits": -1.2837023735046387, "logps": -87.63172149658203, "loss": 0.2158, "objective": 0.21518392860889435, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.1451893448829651, "step": 1865 }, { "dpo_loss": 0.6849756836891174, "epoch": 3.5333018422295703, "grad_norm": 71.18170455631198, "learning_rate": 4.092390290104946e-07, "logits": -1.2384998798370361, "logps": -89.23136901855469, "loss": 0.2155, "objective": 0.22379860281944275, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.1553010642528534, "step": 1870 }, { "dpo_loss": 0.6910787224769592, "epoch": 3.5427491733585263, "grad_norm": 70.20472742023536, "learning_rate": 4.086023059958393e-07, "logits": -1.22196364402771, "logps": -87.11416625976562, "loss": 0.2314, "objective": 0.23368553817272186, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.1645776778459549, "step": 1875 }, { "dpo_loss": 0.6986586451530457, "epoch": 3.5521965044874824, "grad_norm": 68.61044209679825, "learning_rate": 4.0796385653718916e-07, "logits": -1.2331111431121826, "logps": -87.99723815917969, "loss": 0.2188, "objective": 0.23793819546699524, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.46875, "regularize": 0.16807231307029724, "step": 1880 }, { "dpo_loss": 0.6858953237533569, "epoch": 3.5616438356164384, "grad_norm": 75.74083195147854, "learning_rate": 4.073236875842995e-07, "logits": -1.2994650602340698, "logps": -87.44940185546875, "loss": 0.2144, "objective": 0.22179841995239258, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.1532088816165924, "step": 1885 }, { "dpo_loss": 0.6975304484367371, "epoch": 3.5710911667453944, "grad_norm": 74.25528642912279, "learning_rate": 4.06681806105643e-07, "logits": -1.2362072467803955, "logps": -88.1231689453125, "loss": 0.2207, "objective": 0.21597766876220703, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.14622461795806885, "step": 1890 }, { "dpo_loss": 0.6833354234695435, "epoch": 3.5805384978743504, "grad_norm": 71.48737502356961, "learning_rate": 4.060382190883338e-07, "logits": -1.2433570623397827, "logps": -87.79743194580078, "loss": 0.2115, "objective": 0.23286516964435577, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.1645316183567047, "step": 1895 }, { "dpo_loss": 0.7012842297554016, "epoch": 3.5899858290033064, "grad_norm": 70.89129174284837, "learning_rate": 4.053929335380516e-07, "logits": -1.219341516494751, "logps": -88.85891723632812, "loss": 0.2182, "objective": 0.19823125004768372, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.12810282409191132, "step": 1900 }, { "epoch": 3.5899858290033064, "eval_dpo_loss": 0.7018768191337585, "eval_logits": -1.2137829065322876, "eval_logps": -95.2384033203125, "eval_loss": 0.29732540249824524, "eval_objective": 0.2976545989513397, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.520703911781311, "eval_regularize": 0.2274669110774994, "eval_runtime": 157.4719, "eval_samples_per_second": 36.768, "eval_steps_per_second": 3.067, "step": 1900 }, { "dpo_loss": 0.7078115344047546, "epoch": 3.5994331601322624, "grad_norm": 66.4607315520783, "learning_rate": 4.047459564789653e-07, "logits": -1.29160737991333, "logps": -89.04077911376953, "loss": 0.2096, "objective": 0.20699355006217957, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48750001192092896, "regularize": 0.13621237874031067, "step": 1905 }, { "dpo_loss": 0.6720970869064331, "epoch": 3.6088804912612185, "grad_norm": 71.70509436040507, "learning_rate": 4.040972949536561e-07, "logits": -1.1920585632324219, "logps": -89.29122161865234, "loss": 0.2159, "objective": 0.20662541687488556, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.13941572606563568, "step": 1910 }, { "dpo_loss": 0.6973280310630798, "epoch": 3.618327822390175, "grad_norm": 71.88875462075627, "learning_rate": 4.0344695602304157e-07, "logits": -1.2554800510406494, "logps": -90.129150390625, "loss": 0.211, "objective": 0.2120811492204666, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.14234834909439087, "step": 1915 }, { "dpo_loss": 0.6800395846366882, "epoch": 3.627775153519131, "grad_norm": 74.42477707252212, "learning_rate": 4.0279494676629844e-07, "logits": -1.2180017232894897, "logps": -88.39971160888672, "loss": 0.2152, "objective": 0.21058933436870575, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.44999998807907104, "regularize": 0.1425853818655014, "step": 1920 }, { "dpo_loss": 0.6923831701278687, "epoch": 3.637222484648087, "grad_norm": 73.34941427015573, "learning_rate": 4.021412742807854e-07, "logits": -1.1889969110488892, "logps": -86.75862121582031, "loss": 0.2262, "objective": 0.21546685695648193, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.14622853696346283, "step": 1925 }, { "dpo_loss": 0.6955797076225281, "epoch": 3.646669815777043, "grad_norm": 73.03460385154673, "learning_rate": 4.01485945681966e-07, "logits": -1.1690335273742676, "logps": -90.11813354492188, "loss": 0.2297, "objective": 0.2290828675031662, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.15952490270137787, "step": 1930 }, { "dpo_loss": 0.6710019111633301, "epoch": 3.656117146905999, "grad_norm": 74.1516440000823, "learning_rate": 4.0082896810333144e-07, "logits": -1.1948826313018799, "logps": -86.81245422363281, "loss": 0.2282, "objective": 0.234673410654068, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5, "regularize": 0.1675732135772705, "step": 1935 }, { "dpo_loss": 0.6779486536979675, "epoch": 3.665564478034955, "grad_norm": 73.38625711928812, "learning_rate": 4.001703486963223e-07, "logits": -1.2244865894317627, "logps": -88.3202896118164, "loss": 0.2144, "objective": 0.21537482738494873, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.48750001192092896, "regularize": 0.14757993817329407, "step": 1940 }, { "dpo_loss": 0.6879987716674805, "epoch": 3.6750118091639115, "grad_norm": 71.15714667512788, "learning_rate": 3.9951009463025125e-07, "logits": -1.2248257398605347, "logps": -85.404541015625, "loss": 0.212, "objective": 0.20684568583965302, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.1380458027124405, "step": 1945 }, { "dpo_loss": 0.6955716013908386, "epoch": 3.6844591402928675, "grad_norm": 69.03202990753444, "learning_rate": 3.988482130922249e-07, "logits": -1.2241899967193604, "logps": -87.9198226928711, "loss": 0.2097, "objective": 0.21265700459480286, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.543749988079071, "regularize": 0.143099844455719, "step": 1950 }, { "epoch": 3.6844591402928675, "eval_dpo_loss": 0.7046319842338562, "eval_logits": -1.2111308574676514, "eval_logps": -93.49398040771484, "eval_loss": 0.3022589683532715, "eval_objective": 0.29996559023857117, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.2295023649930954, "eval_runtime": 160.3461, "eval_samples_per_second": 36.109, "eval_steps_per_second": 3.012, "step": 1950 }, { "dpo_loss": 0.6958789229393005, "epoch": 3.6939064714218235, "grad_norm": 68.69646746773823, "learning_rate": 3.981847112870654e-07, "logits": -1.222076177597046, "logps": -87.19930267333984, "loss": 0.214, "objective": 0.22020037472248077, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4625000059604645, "regularize": 0.1506124883890152, "step": 1955 }, { "dpo_loss": 0.6883679032325745, "epoch": 3.7033538025507795, "grad_norm": 74.56605724250167, "learning_rate": 3.9751959643723215e-07, "logits": -1.209473729133606, "logps": -89.5036849975586, "loss": 0.2277, "objective": 0.2188255488872528, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4749999940395355, "regularize": 0.14998875558376312, "step": 1960 }, { "dpo_loss": 0.6816436052322388, "epoch": 3.7128011336797355, "grad_norm": 73.39352968814994, "learning_rate": 3.9685287578274284e-07, "logits": -1.2119733095169067, "logps": -87.68373107910156, "loss": 0.2122, "objective": 0.22281642258167267, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.15465207397937775, "step": 1965 }, { "dpo_loss": 0.6812302470207214, "epoch": 3.7222484648086915, "grad_norm": 73.99180921100901, "learning_rate": 3.961845565810954e-07, "logits": -1.2347557544708252, "logps": -87.96427917480469, "loss": 0.2161, "objective": 0.20007386803627014, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.46875, "regularize": 0.13195081055164337, "step": 1970 }, { "dpo_loss": 0.6964179277420044, "epoch": 3.7316957959376476, "grad_norm": 72.84292160439442, "learning_rate": 3.9551464610718815e-07, "logits": -1.2395890951156616, "logps": -87.82969665527344, "loss": 0.2033, "objective": 0.19435453414916992, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.512499988079071, "regularize": 0.12471272796392441, "step": 1975 }, { "dpo_loss": 0.6921079158782959, "epoch": 3.7411431270666036, "grad_norm": 66.93544436043771, "learning_rate": 3.9484315165324123e-07, "logits": -1.3096370697021484, "logps": -89.51728820800781, "loss": 0.2112, "objective": 0.22608831524848938, "ranking_idealized": 0.6312500238418579, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.59375, "regularize": 0.1568775177001953, "step": 1980 }, { "dpo_loss": 0.6992356777191162, "epoch": 3.7505904581955596, "grad_norm": 70.59081285310172, "learning_rate": 3.941700805287168e-07, "logits": -1.2646957635879517, "logps": -88.4402084350586, "loss": 0.2132, "objective": 0.21550500392913818, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.48750001192092896, "regularize": 0.1455814391374588, "step": 1985 }, { "dpo_loss": 0.695839524269104, "epoch": 3.7600377893245156, "grad_norm": 75.7270912521511, "learning_rate": 3.9349544006023976e-07, "logits": -1.186018466949463, "logps": -88.2295150756836, "loss": 0.2182, "objective": 0.219081312417984, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4749999940395355, "regularize": 0.1494973599910736, "step": 1990 }, { "dpo_loss": 0.6939408779144287, "epoch": 3.769485120453472, "grad_norm": 69.76456500792314, "learning_rate": 3.928192375915179e-07, "logits": -1.260425329208374, "logps": -88.10365295410156, "loss": 0.2134, "objective": 0.22218754887580872, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.15279348194599152, "step": 1995 }, { "dpo_loss": 0.6960574388504028, "epoch": 3.778932451582428, "grad_norm": 67.23828756636841, "learning_rate": 3.9214148048326203e-07, "logits": -1.2658439874649048, "logps": -89.23229217529297, "loss": 0.2076, "objective": 0.2040717601776123, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.1344660073518753, "step": 2000 }, { "epoch": 3.778932451582428, "eval_dpo_loss": 0.7034488320350647, "eval_logits": -1.2337108850479126, "eval_logps": -93.09392547607422, "eval_loss": 0.3083517551422119, "eval_objective": 0.3067256808280945, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.23638078570365906, "eval_runtime": 157.9882, "eval_samples_per_second": 36.648, "eval_steps_per_second": 3.057, "step": 2000 }, { "dpo_loss": 0.6904383301734924, "epoch": 3.788379782711384, "grad_norm": 70.81937143215949, "learning_rate": 3.914621761131054e-07, "logits": -1.3027160167694092, "logps": -87.96573638916016, "loss": 0.2112, "objective": 0.2065231055021286, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5625, "regularize": 0.1374792605638504, "step": 2005 }, { "dpo_loss": 0.6968480944633484, "epoch": 3.79782711384034, "grad_norm": 72.90625556362093, "learning_rate": 3.907813318755243e-07, "logits": -1.2627811431884766, "logps": -86.23448181152344, "loss": 0.2151, "objective": 0.22150051593780518, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.15181571245193481, "step": 2010 }, { "dpo_loss": 0.6842600703239441, "epoch": 3.807274444969296, "grad_norm": 73.31719528122754, "learning_rate": 3.9009895518175665e-07, "logits": -1.215425968170166, "logps": -87.07940673828125, "loss": 0.2091, "objective": 0.2033880203962326, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.1349620223045349, "step": 2015 }, { "dpo_loss": 0.6808702349662781, "epoch": 3.816721776098252, "grad_norm": 74.57110438173568, "learning_rate": 3.894150534597219e-07, "logits": -1.236825704574585, "logps": -87.09656524658203, "loss": 0.2082, "objective": 0.22032049298286438, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.543749988079071, "regularize": 0.15223348140716553, "step": 2020 }, { "dpo_loss": 0.7069395780563354, "epoch": 3.826169107227208, "grad_norm": 68.4209155562685, "learning_rate": 3.887296341539399e-07, "logits": -1.2843348979949951, "logps": -86.17149353027344, "loss": 0.2156, "objective": 0.22795970737934113, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.48750001192092896, "regularize": 0.15726572275161743, "step": 2025 }, { "dpo_loss": 0.6793303489685059, "epoch": 3.8356164383561646, "grad_norm": 75.1413449223689, "learning_rate": 3.880427047254501e-07, "logits": -1.2218055725097656, "logps": -86.53273010253906, "loss": 0.2093, "objective": 0.2060566246509552, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48750001192092896, "regularize": 0.13812358677387238, "step": 2030 }, { "dpo_loss": 0.6906328201293945, "epoch": 3.8450637694851206, "grad_norm": 68.98202095352073, "learning_rate": 3.8735427265172994e-07, "logits": -1.2316806316375732, "logps": -87.60394287109375, "loss": 0.2064, "objective": 0.20692899823188782, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.13786569237709045, "step": 2035 }, { "dpo_loss": 0.6925719976425171, "epoch": 3.8545111006140766, "grad_norm": 70.40093898247615, "learning_rate": 3.8666434542661384e-07, "logits": -1.2409096956253052, "logps": -85.93083190917969, "loss": 0.2128, "objective": 0.21727947890758514, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.14802229404449463, "step": 2040 }, { "dpo_loss": 0.6921749114990234, "epoch": 3.8639584317430327, "grad_norm": 70.45551281696774, "learning_rate": 3.859729305602116e-07, "logits": -1.2216551303863525, "logps": -86.43045043945312, "loss": 0.2027, "objective": 0.2071777880191803, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.137960284948349, "step": 2045 }, { "dpo_loss": 0.6951195597648621, "epoch": 3.8734057628719887, "grad_norm": 84.99479776157791, "learning_rate": 3.852800355788263e-07, "logits": -1.257700800895691, "logps": -88.80815124511719, "loss": 0.2099, "objective": 0.21275556087493896, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.14324359595775604, "step": 2050 }, { "epoch": 3.8734057628719887, "eval_dpo_loss": 0.7044119238853455, "eval_logits": -1.2279720306396484, "eval_logps": -93.17267608642578, "eval_loss": 0.29618221521377563, "eval_objective": 0.29535168409347534, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.22491048276424408, "eval_runtime": 158.2417, "eval_samples_per_second": 36.59, "eval_steps_per_second": 3.052, "step": 2050 }, { "dpo_loss": 0.6845670938491821, "epoch": 3.8828530940009447, "grad_norm": 72.36813730314171, "learning_rate": 3.845856680248729e-07, "logits": -1.26739501953125, "logps": -88.06434631347656, "loss": 0.2088, "objective": 0.2010469138622284, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.132590189576149, "step": 2055 }, { "dpo_loss": 0.6887670755386353, "epoch": 3.8923004251299007, "grad_norm": 66.16332670938284, "learning_rate": 3.8388983545679546e-07, "logits": -1.178450345993042, "logps": -86.03468322753906, "loss": 0.2002, "objective": 0.20420917868614197, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.13533246517181396, "step": 2060 }, { "dpo_loss": 0.6974113583564758, "epoch": 3.9017477562588567, "grad_norm": 67.85463149312577, "learning_rate": 3.831925454489857e-07, "logits": -1.2947609424591064, "logps": -88.52572631835938, "loss": 0.2062, "objective": 0.2186674177646637, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.1489262729883194, "step": 2065 }, { "dpo_loss": 0.6959481835365295, "epoch": 3.9111950873878127, "grad_norm": 70.49600282460919, "learning_rate": 3.824938055916998e-07, "logits": -1.2146751880645752, "logps": -85.87166595458984, "loss": 0.2144, "objective": 0.21881794929504395, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.59375, "regularize": 0.14922314882278442, "step": 2070 }, { "dpo_loss": 0.7003825306892395, "epoch": 3.9206424185167688, "grad_norm": 68.41116746766701, "learning_rate": 3.8179362349097624e-07, "logits": -1.2074339389801025, "logps": -88.32437133789062, "loss": 0.2036, "objective": 0.20541544258594513, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.4937500059604645, "regularize": 0.1353771686553955, "step": 2075 }, { "dpo_loss": 0.6963819265365601, "epoch": 3.930089749645725, "grad_norm": 64.33739691711719, "learning_rate": 3.810920067685525e-07, "logits": -1.2292484045028687, "logps": -87.91120147705078, "loss": 0.1955, "objective": 0.20319481194019318, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4437499940395355, "regularize": 0.13355661928653717, "step": 2080 }, { "dpo_loss": 0.6952013969421387, "epoch": 3.9395370807746812, "grad_norm": 68.36853706112721, "learning_rate": 3.8038896306178304e-07, "logits": -1.244873046875, "logps": -88.29363250732422, "loss": 0.1996, "objective": 0.19693097472190857, "ranking_idealized": 0.44999998807907104, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.12741082906723022, "step": 2085 }, { "dpo_loss": 0.6985955238342285, "epoch": 3.9489844119036372, "grad_norm": 72.63484898585152, "learning_rate": 3.79684500023555e-07, "logits": -1.3063652515411377, "logps": -88.39873504638672, "loss": 0.1975, "objective": 0.18639475107192993, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.11653516441583633, "step": 2090 }, { "dpo_loss": 0.6845866441726685, "epoch": 3.9584317430325933, "grad_norm": 75.90816419725637, "learning_rate": 3.7897862532220594e-07, "logits": -1.1452820301055908, "logps": -88.32160949707031, "loss": 0.2137, "objective": 0.22087030112743378, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.15241165459156036, "step": 2095 }, { "dpo_loss": 0.7027496099472046, "epoch": 3.9678790741615493, "grad_norm": 65.23926199909047, "learning_rate": 3.7827134664143944e-07, "logits": -1.2793314456939697, "logps": -87.33671569824219, "loss": 0.2001, "objective": 0.20044174790382385, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.13016676902770996, "step": 2100 }, { "epoch": 3.9678790741615493, "eval_dpo_loss": 0.7062909007072449, "eval_logits": -1.2079051733016968, "eval_logps": -93.9210433959961, "eval_loss": 0.31389835476875305, "eval_objective": 0.31231164932250977, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5186335444450378, "eval_regularize": 0.24168255925178528, "eval_runtime": 157.946, "eval_samples_per_second": 36.658, "eval_steps_per_second": 3.058, "step": 2100 }, { "dpo_loss": 0.694995641708374, "epoch": 3.9773264052905053, "grad_norm": 70.6276970104338, "learning_rate": 3.7756267168024216e-07, "logits": -1.1883465051651, "logps": -87.5169448852539, "loss": 0.2032, "objective": 0.20670051872730255, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.1372009664773941, "step": 2105 }, { "dpo_loss": 0.682449996471405, "epoch": 3.9867737364194618, "grad_norm": 73.84783896819107, "learning_rate": 3.7685260815279985e-07, "logits": -1.191896677017212, "logps": -87.0994644165039, "loss": 0.2025, "objective": 0.20286063849925995, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.13461564481258392, "step": 2110 }, { "dpo_loss": 0.6851739287376404, "epoch": 3.9962210675484178, "grad_norm": 72.15356901954534, "learning_rate": 3.7614116378841304e-07, "logits": -1.1415256261825562, "logps": -87.24793243408203, "loss": 0.1994, "objective": 0.18393009901046753, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.11541268974542618, "step": 2115 }, { "dpo_loss": 0.6915415525436401, "epoch": 4.005668398677374, "grad_norm": 69.87138624592257, "learning_rate": 3.754283463314134e-07, "logits": -1.2651426792144775, "logps": -87.78424072265625, "loss": 0.2082, "objective": 0.2031985968351364, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.13404445350170135, "step": 2120 }, { "dpo_loss": 0.6839936375617981, "epoch": 4.01511572980633, "grad_norm": 65.95346613098634, "learning_rate": 3.747141635410792e-07, "logits": -1.2902952432632446, "logps": -89.2015380859375, "loss": 0.2008, "objective": 0.1982060968875885, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.1298067271709442, "step": 2125 }, { "dpo_loss": 0.6753361821174622, "epoch": 4.024563060935286, "grad_norm": 68.66890119551623, "learning_rate": 3.739986231915508e-07, "logits": -1.2251384258270264, "logps": -90.5170669555664, "loss": 0.1999, "objective": 0.2000185251235962, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.4625000059604645, "regularize": 0.13248488306999207, "step": 2130 }, { "dpo_loss": 0.6820967197418213, "epoch": 4.034010392064242, "grad_norm": 67.84715622068745, "learning_rate": 3.7328173307174597e-07, "logits": -1.2330976724624634, "logps": -89.2086410522461, "loss": 0.2065, "objective": 0.20815534889698029, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.13994565606117249, "step": 2135 }, { "dpo_loss": 0.6867682337760925, "epoch": 4.043457723193198, "grad_norm": 71.14832059721319, "learning_rate": 3.725635009852755e-07, "logits": -1.1594212055206299, "logps": -87.94404602050781, "loss": 0.2092, "objective": 0.21153667569160461, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.14285986125469208, "step": 2140 }, { "dpo_loss": 0.7056056261062622, "epoch": 4.052905054322154, "grad_norm": 68.73986524633295, "learning_rate": 3.718439347503578e-07, "logits": -1.1812673807144165, "logps": -87.79216766357422, "loss": 0.2071, "objective": 0.2009545862674713, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.13039401173591614, "step": 2145 }, { "dpo_loss": 0.6813782453536987, "epoch": 4.06235238545111, "grad_norm": 68.82741945060675, "learning_rate": 3.7112304219973394e-07, "logits": -1.1798770427703857, "logps": -91.46490478515625, "loss": 0.2082, "objective": 0.21020595729351044, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.48124998807907104, "regularize": 0.14206811785697937, "step": 2150 }, { "epoch": 4.06235238545111, "eval_dpo_loss": 0.7036926746368408, "eval_logits": -1.2147855758666992, "eval_logps": -93.67681884765625, "eval_loss": 0.311942994594574, "eval_objective": 0.3123517334461212, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.24198248982429504, "eval_runtime": 158.0386, "eval_samples_per_second": 36.637, "eval_steps_per_second": 3.056, "step": 2150 }, { "dpo_loss": 0.6815483570098877, "epoch": 4.071799716580066, "grad_norm": 74.77954309287684, "learning_rate": 3.7040083118058243e-07, "logits": -1.1801261901855469, "logps": -88.95193481445312, "loss": 0.2014, "objective": 0.20084276795387268, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.13268791139125824, "step": 2155 }, { "dpo_loss": 0.6933214664459229, "epoch": 4.081247047709022, "grad_norm": 68.95274496215063, "learning_rate": 3.69677309554434e-07, "logits": -1.2751498222351074, "logps": -89.47291564941406, "loss": 0.19, "objective": 0.17904631793498993, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.10971417278051376, "step": 2160 }, { "dpo_loss": 0.6934170126914978, "epoch": 4.090694378837978, "grad_norm": 70.3491425212816, "learning_rate": 3.689524851970855e-07, "logits": -1.1636548042297363, "logps": -88.32524871826172, "loss": 0.2041, "objective": 0.20672444999217987, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5625, "regularize": 0.13738274574279785, "step": 2165 }, { "dpo_loss": 0.6837751865386963, "epoch": 4.100141709966934, "grad_norm": 76.80389850526132, "learning_rate": 3.682263659985148e-07, "logits": -1.2340781688690186, "logps": -89.10033416748047, "loss": 0.2021, "objective": 0.196573406457901, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.518750011920929, "regularize": 0.1281958818435669, "step": 2170 }, { "dpo_loss": 0.7011173963546753, "epoch": 4.109589041095891, "grad_norm": 69.35694017187159, "learning_rate": 3.674989598627943e-07, "logits": -1.1500046253204346, "logps": -90.71090698242188, "loss": 0.2032, "objective": 0.20607486367225647, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.13596312701702118, "step": 2175 }, { "dpo_loss": 0.6869430541992188, "epoch": 4.119036372224847, "grad_norm": 69.15222854312827, "learning_rate": 3.6677027470800534e-07, "logits": -1.2302781343460083, "logps": -88.79737854003906, "loss": 0.2057, "objective": 0.22396686673164368, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.15527252852916718, "step": 2180 }, { "dpo_loss": 0.6947880983352661, "epoch": 4.128483703353803, "grad_norm": 65.82727901584528, "learning_rate": 3.660403184661518e-07, "logits": -1.2679330110549927, "logps": -89.92203521728516, "loss": 0.1949, "objective": 0.21502026915550232, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.1455414742231369, "step": 2185 }, { "dpo_loss": 0.6966627836227417, "epoch": 4.137931034482759, "grad_norm": 69.60605335663098, "learning_rate": 3.653090990830739e-07, "logits": -1.2208318710327148, "logps": -89.22146606445312, "loss": 0.2042, "objective": 0.19856971502304077, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.512499988079071, "regularize": 0.12890341877937317, "step": 2190 }, { "dpo_loss": 0.6860800385475159, "epoch": 4.147378365611715, "grad_norm": 70.06283430115266, "learning_rate": 3.645766245183615e-07, "logits": -1.1613614559173584, "logps": -89.69837951660156, "loss": 0.2059, "objective": 0.194590762257576, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.550000011920929, "regularize": 0.12598276138305664, "step": 2195 }, { "dpo_loss": 0.6867005228996277, "epoch": 4.156825696740671, "grad_norm": 67.94271181770046, "learning_rate": 3.6384290274526766e-07, "logits": -1.181180715560913, "logps": -90.18212890625, "loss": 0.1914, "objective": 0.19875545799732208, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.13008537888526917, "step": 2200 }, { "epoch": 4.156825696740671, "eval_dpo_loss": 0.7032468318939209, "eval_logits": -1.217943549156189, "eval_logps": -94.57373046875, "eval_loss": 0.31386545300483704, "eval_objective": 0.3137575387954712, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.24343283474445343, "eval_runtime": 157.6862, "eval_samples_per_second": 36.718, "eval_steps_per_second": 3.063, "step": 2200 }, { "dpo_loss": 0.6863278150558472, "epoch": 4.166273027869627, "grad_norm": 70.79608630866144, "learning_rate": 3.6310794175062156e-07, "logits": -1.2975205183029175, "logps": -87.41703033447266, "loss": 0.1941, "objective": 0.18734896183013916, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4937500059604645, "regularize": 0.11871619522571564, "step": 2205 }, { "dpo_loss": 0.6975613832473755, "epoch": 4.175720358998583, "grad_norm": 77.54815208083042, "learning_rate": 3.62371749534742e-07, "logits": -1.221123456954956, "logps": -90.33836364746094, "loss": 0.2056, "objective": 0.20308546721935272, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.13332930207252502, "step": 2210 }, { "dpo_loss": 0.6789113283157349, "epoch": 4.185167690127539, "grad_norm": 75.631051881529, "learning_rate": 3.6163433411135e-07, "logits": -1.1596550941467285, "logps": -88.44036102294922, "loss": 0.2012, "objective": 0.20634588599205017, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5562499761581421, "regularize": 0.13845475018024445, "step": 2215 }, { "dpo_loss": 0.6980286836624146, "epoch": 4.194615021256495, "grad_norm": 66.30888177673542, "learning_rate": 3.6089570350748167e-07, "logits": -1.157200813293457, "logps": -89.20658111572266, "loss": 0.1931, "objective": 0.187965989112854, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.11816313117742538, "step": 2220 }, { "dpo_loss": 0.6964994668960571, "epoch": 4.204062352385451, "grad_norm": 69.89753056570058, "learning_rate": 3.601558657634006e-07, "logits": -1.2606405019760132, "logps": -89.00025939941406, "loss": 0.201, "objective": 0.18100889027118683, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.11135894060134888, "step": 2225 }, { "dpo_loss": 0.6851469874382019, "epoch": 4.213509683514407, "grad_norm": 73.28546857906582, "learning_rate": 3.594148289325108e-07, "logits": -1.2857199907302856, "logps": -87.86104583740234, "loss": 0.198, "objective": 0.19738662242889404, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.4375, "regularize": 0.12887193262577057, "step": 2230 }, { "dpo_loss": 0.6863463521003723, "epoch": 4.222957014643363, "grad_norm": 69.20984946205131, "learning_rate": 3.586726010812687e-07, "logits": -1.2645803689956665, "logps": -87.96028137207031, "loss": 0.1942, "objective": 0.1901693493127823, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5062500238418579, "regularize": 0.12153474241495132, "step": 2235 }, { "dpo_loss": 0.691120982170105, "epoch": 4.232404345772319, "grad_norm": 80.25805309412497, "learning_rate": 3.5792919028909537e-07, "logits": -1.3437623977661133, "logps": -87.26004791259766, "loss": 0.1951, "objective": 0.19092616438865662, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.1218140497803688, "step": 2240 }, { "dpo_loss": 0.6999267935752869, "epoch": 4.241851676901275, "grad_norm": 74.39106991623802, "learning_rate": 3.571846046482886e-07, "logits": -1.2108876705169678, "logps": -88.40174102783203, "loss": 0.2089, "objective": 0.21814508736133575, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.14815238118171692, "step": 2245 }, { "dpo_loss": 0.6846331357955933, "epoch": 4.251299008030231, "grad_norm": 72.48586230381743, "learning_rate": 3.564388522639349e-07, "logits": -1.2497327327728271, "logps": -89.22590637207031, "loss": 0.2026, "objective": 0.20532198250293732, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.13685865700244904, "step": 2250 }, { "epoch": 4.251299008030231, "eval_dpo_loss": 0.7034680843353271, "eval_logits": -1.2043732404708862, "eval_logps": -93.22201538085938, "eval_loss": 0.31791311502456665, "eval_objective": 0.31768348813056946, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2473367154598236, "eval_runtime": 157.3903, "eval_samples_per_second": 36.788, "eval_steps_per_second": 3.069, "step": 2250 }, { "dpo_loss": 0.6916313171386719, "epoch": 4.260746339159187, "grad_norm": 84.41450589555723, "learning_rate": 3.556919412538212e-07, "logits": -1.2482655048370361, "logps": -88.18809509277344, "loss": 0.1989, "objective": 0.19382549822330475, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.1246623545885086, "step": 2255 }, { "dpo_loss": 0.6791214346885681, "epoch": 4.270193670288144, "grad_norm": 70.00311355913053, "learning_rate": 3.549438797483465e-07, "logits": -1.157104730606079, "logps": -88.4947738647461, "loss": 0.2055, "objective": 0.20276562869548798, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.13485348224639893, "step": 2260 }, { "dpo_loss": 0.673402726650238, "epoch": 4.2796410014171, "grad_norm": 65.09377603534938, "learning_rate": 3.5419467589043337e-07, "logits": -1.128641128540039, "logps": -88.3416519165039, "loss": 0.2032, "objective": 0.22014395892620087, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.53125, "regularize": 0.1528036892414093, "step": 2265 }, { "dpo_loss": 0.6994603872299194, "epoch": 4.289088332546056, "grad_norm": 70.89538912066566, "learning_rate": 3.5344433783543927e-07, "logits": -1.2988563776016235, "logps": -90.05706024169922, "loss": 0.2001, "objective": 0.19624318182468414, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.12629713118076324, "step": 2270 }, { "dpo_loss": 0.6809098124504089, "epoch": 4.298535663675012, "grad_norm": 68.30870403869821, "learning_rate": 3.526928737510678e-07, "logits": -1.1839253902435303, "logps": -87.8452377319336, "loss": 0.1948, "objective": 0.199173703789711, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.13108272850513458, "step": 2275 }, { "dpo_loss": 0.7003562450408936, "epoch": 4.307982994803968, "grad_norm": 72.49712985866785, "learning_rate": 3.519402918172798e-07, "logits": -1.1665685176849365, "logps": -89.95878601074219, "loss": 0.1994, "objective": 0.19495268166065216, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.543749988079071, "regularize": 0.12491704523563385, "step": 2280 }, { "dpo_loss": 0.6910146474838257, "epoch": 4.317430325932924, "grad_norm": 68.88559864938857, "learning_rate": 3.511866002262044e-07, "logits": -1.1572844982147217, "logps": -88.31980895996094, "loss": 0.1855, "objective": 0.18461644649505615, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.550000011920929, "regularize": 0.11551499366760254, "step": 2285 }, { "dpo_loss": 0.6953611969947815, "epoch": 4.32687765706188, "grad_norm": 72.10111152081335, "learning_rate": 3.504318071820496e-07, "logits": -1.2646400928497314, "logps": -90.25203704833984, "loss": 0.1979, "objective": 0.20109014213085175, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.1315540075302124, "step": 2290 }, { "dpo_loss": 0.6853431463241577, "epoch": 4.336324988190836, "grad_norm": 74.46522176713073, "learning_rate": 3.4967592090101326e-07, "logits": -1.2266438007354736, "logps": -90.8377456665039, "loss": 0.2014, "objective": 0.2001165896654129, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.13158227503299713, "step": 2295 }, { "dpo_loss": 0.6854650974273682, "epoch": 4.345772319319792, "grad_norm": 67.16113769652895, "learning_rate": 3.489189496111936e-07, "logits": -1.1785436868667603, "logps": -88.6437759399414, "loss": 0.1908, "objective": 0.19480295479297638, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5249999761581421, "regularize": 0.12625643610954285, "step": 2300 }, { "epoch": 4.345772319319792, "eval_dpo_loss": 0.702228844165802, "eval_logits": -1.2116835117340088, "eval_logps": -94.31507110595703, "eval_loss": 0.3066709339618683, "eval_objective": 0.3084842562675476, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.2382613569498062, "eval_runtime": 157.4043, "eval_samples_per_second": 36.784, "eval_steps_per_second": 3.069, "step": 2300 }, { "dpo_loss": 0.6954362988471985, "epoch": 4.355219650448748, "grad_norm": 72.34445643960167, "learning_rate": 3.481609015524991e-07, "logits": -1.2407293319702148, "logps": -89.92005157470703, "loss": 0.1938, "objective": 0.19215351343154907, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.12260987609624863, "step": 2305 }, { "dpo_loss": 0.6934856176376343, "epoch": 4.364666981577704, "grad_norm": 78.51664449321001, "learning_rate": 3.474017849765599e-07, "logits": -1.2324168682098389, "logps": -87.94395446777344, "loss": 0.1936, "objective": 0.1962895691394806, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5562499761581421, "regularize": 0.1269409954547882, "step": 2310 }, { "dpo_loss": 0.6825065016746521, "epoch": 4.37411431270666, "grad_norm": 69.72656724525673, "learning_rate": 3.466416081466369e-07, "logits": -1.2817673683166504, "logps": -89.84367370605469, "loss": 0.1833, "objective": 0.1848260760307312, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.11657543480396271, "step": 2315 }, { "dpo_loss": 0.6836963891983032, "epoch": 4.383561643835616, "grad_norm": 65.28015042658268, "learning_rate": 3.458803793375324e-07, "logits": -1.2592960596084595, "logps": -88.8481674194336, "loss": 0.1879, "objective": 0.1888824999332428, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.12051286548376083, "step": 2320 }, { "dpo_loss": 0.6949820518493652, "epoch": 4.393008974964572, "grad_norm": 67.61356747192421, "learning_rate": 3.451181068354998e-07, "logits": -1.1924588680267334, "logps": -87.05536651611328, "loss": 0.1871, "objective": 0.1929215043783188, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4749999940395355, "regularize": 0.12342329323291779, "step": 2325 }, { "dpo_loss": 0.6871247887611389, "epoch": 4.402456306093528, "grad_norm": 68.57928876300109, "learning_rate": 3.4435479893815355e-07, "logits": -1.2161206007003784, "logps": -87.45741271972656, "loss": 0.194, "objective": 0.19727759063243866, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.512499988079071, "regularize": 0.12856510281562805, "step": 2330 }, { "dpo_loss": 0.696916937828064, "epoch": 4.411903637222484, "grad_norm": 64.66076626257149, "learning_rate": 3.435904639543789e-07, "logits": -1.2180455923080444, "logps": -86.78954315185547, "loss": 0.195, "objective": 0.19943666458129883, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.1297449767589569, "step": 2335 }, { "dpo_loss": 0.6831547617912292, "epoch": 4.42135096835144, "grad_norm": 76.92527405322872, "learning_rate": 3.428251102042409e-07, "logits": -1.2242004871368408, "logps": -87.0202865600586, "loss": 0.1862, "objective": 0.18791408836841583, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.11959860473871231, "step": 2340 }, { "dpo_loss": 0.6820409893989563, "epoch": 4.430798299480397, "grad_norm": 65.42855768283592, "learning_rate": 3.4205874601889464e-07, "logits": -1.2483327388763428, "logps": -85.98213195800781, "loss": 0.1889, "objective": 0.1903569996356964, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.12215292453765869, "step": 2345 }, { "dpo_loss": 0.6897372007369995, "epoch": 4.440245630609353, "grad_norm": 67.23230975440292, "learning_rate": 3.414449327628279e-07, "logits": -1.2460027933120728, "logps": -86.58792877197266, "loss": 0.1931, "objective": 0.18547531962394714, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.11650160700082779, "step": 2350 }, { "epoch": 4.440245630609353, "eval_dpo_loss": 0.7057645320892334, "eval_logits": -1.206606388092041, "eval_logps": -93.412353515625, "eval_loss": 0.3241415023803711, "eval_objective": 0.32362499833106995, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.25304850935935974, "eval_runtime": 157.4891, "eval_samples_per_second": 36.764, "eval_steps_per_second": 3.067, "step": 2350 }, { "dpo_loss": 0.6985042691230774, "epoch": 4.449692961738309, "grad_norm": 68.68278228628853, "learning_rate": 3.406767708236679e-07, "logits": -1.237305998802185, "logps": -87.68280029296875, "loss": 0.1871, "objective": 0.18133942782878876, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.45625001192092896, "regularize": 0.11148901283740997, "step": 2355 }, { "dpo_loss": 0.7006803750991821, "epoch": 4.459140292867265, "grad_norm": 64.88989596384992, "learning_rate": 3.39907621834758e-07, "logits": -1.2617131471633911, "logps": -91.06695556640625, "loss": 0.2024, "objective": 0.20287685096263885, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.13280881941318512, "step": 2360 }, { "dpo_loss": 0.6917591691017151, "epoch": 4.468587623996221, "grad_norm": 72.92629822125772, "learning_rate": 3.391374941685656e-07, "logits": -1.15776789188385, "logps": -89.98072052001953, "loss": 0.1974, "objective": 0.20786471664905548, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.5874999761581421, "regularize": 0.13868877291679382, "step": 2365 }, { "dpo_loss": 0.6847442388534546, "epoch": 4.478034955125177, "grad_norm": 72.59186575547133, "learning_rate": 3.3836639620821164e-07, "logits": -1.2330033779144287, "logps": -87.796875, "loss": 0.1977, "objective": 0.19836851954460144, "ranking_idealized": 0.4437499940395355, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.12989410758018494, "step": 2370 }, { "dpo_loss": 0.6927186250686646, "epoch": 4.487482286254133, "grad_norm": 73.8071218413957, "learning_rate": 3.3759433634737875e-07, "logits": -1.2164292335510254, "logps": -88.21554565429688, "loss": 0.1954, "objective": 0.1947944313287735, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.12552256882190704, "step": 2375 }, { "dpo_loss": 0.6942063570022583, "epoch": 4.496929617383089, "grad_norm": 69.69165122272695, "learning_rate": 3.3682132299022037e-07, "logits": -1.171820878982544, "logps": -88.99403381347656, "loss": 0.1888, "objective": 0.18754351139068604, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.518750011920929, "regularize": 0.1181228905916214, "step": 2380 }, { "dpo_loss": 0.6920589208602905, "epoch": 4.506376948512045, "grad_norm": 76.6672759543614, "learning_rate": 3.360473645512691e-07, "logits": -1.1179250478744507, "logps": -89.265869140625, "loss": 0.1898, "objective": 0.1791161596775055, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.1099102646112442, "step": 2385 }, { "dpo_loss": 0.6984947919845581, "epoch": 4.515824279641001, "grad_norm": 64.60478702112378, "learning_rate": 3.3527246945534503e-07, "logits": -1.2097110748291016, "logps": -88.06912994384766, "loss": 0.1893, "objective": 0.2067548781633377, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4749999940395355, "regularize": 0.13690538704395294, "step": 2390 }, { "dpo_loss": 0.6983093619346619, "epoch": 4.525271610769957, "grad_norm": 68.74712299235686, "learning_rate": 3.3449664613746423e-07, "logits": -1.1768536567687988, "logps": -89.36795806884766, "loss": 0.1867, "objective": 0.1928076446056366, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.53125, "regularize": 0.12297670543193817, "step": 2395 }, { "dpo_loss": 0.6859962344169617, "epoch": 4.534718941898913, "grad_norm": 69.57898877547159, "learning_rate": 3.337199030427465e-07, "logits": -1.221806526184082, "logps": -89.45655822753906, "loss": 0.195, "objective": 0.19366537034511566, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5562499761581421, "regularize": 0.12506575882434845, "step": 2400 }, { "epoch": 4.534718941898913, "eval_dpo_loss": 0.7035441994667053, "eval_logits": -1.2061595916748047, "eval_logps": -94.24186706542969, "eval_loss": 0.31109294295310974, "eval_objective": 0.31131625175476074, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.24096183478832245, "eval_runtime": 157.5383, "eval_samples_per_second": 36.753, "eval_steps_per_second": 3.066, "step": 2400 }, { "dpo_loss": 0.6786166429519653, "epoch": 4.544166273027869, "grad_norm": 76.04442874927135, "learning_rate": 3.329422486263242e-07, "logits": -1.2290821075439453, "logps": -90.7866439819336, "loss": 0.1908, "objective": 0.188669353723526, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.1208076924085617, "step": 2405 }, { "dpo_loss": 0.6833987236022949, "epoch": 4.553613604156825, "grad_norm": 69.9120836600003, "learning_rate": 3.321636913532494e-07, "logits": -1.184873104095459, "logps": -87.97506713867188, "loss": 0.1921, "objective": 0.201020747423172, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.4124999940395355, "ranking_simple": 0.41874998807907104, "regularize": 0.13268086314201355, "step": 2410 }, { "dpo_loss": 0.688135027885437, "epoch": 4.563060935285781, "grad_norm": 71.7826426033307, "learning_rate": 3.3138423969840214e-07, "logits": -1.273437261581421, "logps": -89.36498260498047, "loss": 0.1885, "objective": 0.19135358929634094, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.48750001192092896, "regularize": 0.12254010140895844, "step": 2415 }, { "dpo_loss": 0.6903122663497925, "epoch": 4.572508266414738, "grad_norm": 72.09130382817256, "learning_rate": 3.3060390214639834e-07, "logits": -1.1458829641342163, "logps": -86.36959075927734, "loss": 0.1962, "objective": 0.1846756488084793, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.44999998807907104, "regularize": 0.1156444177031517, "step": 2420 }, { "dpo_loss": 0.6769827008247375, "epoch": 4.581955597543693, "grad_norm": 75.79810035141197, "learning_rate": 3.29822687191497e-07, "logits": -1.2757484912872314, "logps": -88.91954040527344, "loss": 0.1881, "objective": 0.19819346070289612, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.13049522042274475, "step": 2425 }, { "dpo_loss": 0.7002481818199158, "epoch": 4.59140292867265, "grad_norm": 72.47474751481887, "learning_rate": 3.2904060333750817e-07, "logits": -1.3062492609024048, "logps": -87.92079162597656, "loss": 0.1892, "objective": 0.18215011060237885, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.11212529242038727, "step": 2430 }, { "dpo_loss": 0.7003153562545776, "epoch": 4.600850259801606, "grad_norm": 69.56974706989557, "learning_rate": 3.282576590977e-07, "logits": -1.2487523555755615, "logps": -87.8376235961914, "loss": 0.19, "objective": 0.18513301014900208, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.581250011920929, "regularize": 0.11510147899389267, "step": 2435 }, { "dpo_loss": 0.6834243535995483, "epoch": 4.610297590930562, "grad_norm": 68.29661178867353, "learning_rate": 3.2747386299470625e-07, "logits": -1.1830635070800781, "logps": -89.48603820800781, "loss": 0.1829, "objective": 0.1890353560447693, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.6187499761581421, "ranking_simple": 0.612500011920929, "regularize": 0.12069292366504669, "step": 2440 }, { "dpo_loss": 0.6962495446205139, "epoch": 4.619744922059518, "grad_norm": 67.06846784717705, "learning_rate": 3.2668922356043385e-07, "logits": -1.1794134378433228, "logps": -88.9986801147461, "loss": 0.1937, "objective": 0.1998957395553589, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.606249988079071, "ranking_simple": 0.606249988079071, "regularize": 0.1302708089351654, "step": 2445 }, { "dpo_loss": 0.6916170120239258, "epoch": 4.629192253188474, "grad_norm": 69.63472871808763, "learning_rate": 3.2590374933596934e-07, "logits": -1.212580919265747, "logps": -88.09070587158203, "loss": 0.1947, "objective": 0.19347377121448517, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.581250011920929, "regularize": 0.12431205809116364, "step": 2450 }, { "epoch": 4.629192253188474, "eval_dpo_loss": 0.7066665291786194, "eval_logits": -1.1955713033676147, "eval_logps": -93.6715316772461, "eval_loss": 0.3311574161052704, "eval_objective": 0.3316709101200104, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.522774338722229, "eval_regularize": 0.2610042691230774, "eval_runtime": 158.4938, "eval_samples_per_second": 36.531, "eval_steps_per_second": 3.047, "step": 2450 }, { "dpo_loss": 0.6930611729621887, "epoch": 4.63863958431743, "grad_norm": 68.9826360071532, "learning_rate": 3.251174488714863e-07, "logits": -1.1573054790496826, "logps": -87.21949768066406, "loss": 0.1898, "objective": 0.1879061758518219, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.11860009282827377, "step": 2455 }, { "dpo_loss": 0.6947464346885681, "epoch": 4.648086915446386, "grad_norm": 69.37793486917724, "learning_rate": 3.2433033072615237e-07, "logits": -1.285698413848877, "logps": -88.887939453125, "loss": 0.1954, "objective": 0.20384371280670166, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.13436904549598694, "step": 2460 }, { "dpo_loss": 0.6881365180015564, "epoch": 4.657534246575342, "grad_norm": 76.27390048138683, "learning_rate": 3.2354240346803587e-07, "logits": -1.3076812028884888, "logps": -88.25764465332031, "loss": 0.189, "objective": 0.19885292649269104, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.1300393044948578, "step": 2465 }, { "dpo_loss": 0.6903897523880005, "epoch": 4.6669815777042984, "grad_norm": 68.2251787946993, "learning_rate": 3.227536756740127e-07, "logits": -1.1892402172088623, "logps": -86.03571319580078, "loss": 0.1829, "objective": 0.18689945340156555, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.11786048114299774, "step": 2470 }, { "dpo_loss": 0.6937160491943359, "epoch": 4.6764289088332545, "grad_norm": 63.17076907075886, "learning_rate": 3.219641559296726e-07, "logits": -1.1472514867782593, "logps": -86.34375762939453, "loss": 0.1906, "objective": 0.18023869395256042, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.11086708307266235, "step": 2475 }, { "dpo_loss": 0.6965736150741577, "epoch": 4.6858762399622105, "grad_norm": 64.71300253439841, "learning_rate": 3.2117385282922636e-07, "logits": -1.2553402185440063, "logps": -87.43341827392578, "loss": 0.1767, "objective": 0.16977284848690033, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.10011549293994904, "step": 2480 }, { "dpo_loss": 0.6819189786911011, "epoch": 4.6953235710911665, "grad_norm": 69.46266935910216, "learning_rate": 3.2038277497541177e-07, "logits": -1.2258949279785156, "logps": -87.9898910522461, "loss": 0.1924, "objective": 0.19613954424858093, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.12794767320156097, "step": 2485 }, { "dpo_loss": 0.6897075772285461, "epoch": 4.7047709022201225, "grad_norm": 69.39400619680565, "learning_rate": 3.195909309793998e-07, "logits": -1.1648635864257812, "logps": -87.18431091308594, "loss": 0.1797, "objective": 0.1739298552274704, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.10495909303426743, "step": 2490 }, { "dpo_loss": 0.690790593624115, "epoch": 4.7142182333490785, "grad_norm": 75.1319131589367, "learning_rate": 3.187983294607016e-07, "logits": -1.1756292581558228, "logps": -88.00474548339844, "loss": 0.1948, "objective": 0.2014426738023758, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.13236361742019653, "step": 2495 }, { "dpo_loss": 0.6772107481956482, "epoch": 4.7236655644780345, "grad_norm": 70.41393937300322, "learning_rate": 3.1800497904707393e-07, "logits": -1.1602323055267334, "logps": -87.19842529296875, "loss": 0.1837, "objective": 0.1814550906419754, "ranking_idealized": 0.4124999940395355, "ranking_idealized_expo": 0.40625, "ranking_simple": 0.40625, "regularize": 0.11373400688171387, "step": 2500 }, { "epoch": 4.7236655644780345, "eval_dpo_loss": 0.7076886296272278, "eval_logits": -1.2040798664093018, "eval_logps": -93.6178970336914, "eval_loss": 0.32894209027290344, "eval_objective": 0.3304065465927124, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.2596376836299896, "eval_runtime": 157.472, "eval_samples_per_second": 36.768, "eval_steps_per_second": 3.067, "step": 2500 }, { "dpo_loss": 0.6912451982498169, "epoch": 4.733112895606991, "grad_norm": 63.32014936670936, "learning_rate": 3.1721088837442563e-07, "logits": -1.177161455154419, "logps": -89.26594543457031, "loss": 0.1772, "objective": 0.1801118403673172, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.11098729074001312, "step": 2505 }, { "dpo_loss": 0.6862069368362427, "epoch": 4.7425602267359475, "grad_norm": 69.92808489362727, "learning_rate": 3.1641606608672357e-07, "logits": -1.2042205333709717, "logps": -89.10171508789062, "loss": 0.1854, "objective": 0.18454326689243317, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.45625001192092896, "regularize": 0.11592257022857666, "step": 2510 }, { "dpo_loss": 0.6823688745498657, "epoch": 4.7520075578649035, "grad_norm": 68.15826407150226, "learning_rate": 3.1562052083589843e-07, "logits": -1.1668407917022705, "logps": -89.30625915527344, "loss": 0.1939, "objective": 0.19946186244487762, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.13122497498989105, "step": 2515 }, { "dpo_loss": 0.6842549443244934, "epoch": 4.7614548889938595, "grad_norm": 68.69748953052088, "learning_rate": 3.1482426128175075e-07, "logits": -1.1746580600738525, "logps": -86.11698913574219, "loss": 0.1809, "objective": 0.17297717928886414, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.574999988079071, "regularize": 0.10455168783664703, "step": 2520 }, { "dpo_loss": 0.6946545243263245, "epoch": 4.7709022201228155, "grad_norm": 73.811610364557, "learning_rate": 3.1402729609185633e-07, "logits": -1.2330520153045654, "logps": -87.93450927734375, "loss": 0.1861, "objective": 0.19090914726257324, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.12144370377063751, "step": 2525 }, { "dpo_loss": 0.697222888469696, "epoch": 4.7803495512517715, "grad_norm": 70.51840780607554, "learning_rate": 3.132296339414723e-07, "logits": -1.2591397762298584, "logps": -86.95104217529297, "loss": 0.1856, "objective": 0.1920265108346939, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.4937500059604645, "regularize": 0.12230421602725983, "step": 2530 }, { "dpo_loss": 0.6979044675827026, "epoch": 4.7897968823807275, "grad_norm": 69.26217964070176, "learning_rate": 3.124312835134422e-07, "logits": -1.2490675449371338, "logps": -87.75285339355469, "loss": 0.1804, "objective": 0.18386594951152802, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.11407549679279327, "step": 2535 }, { "dpo_loss": 0.6813572645187378, "epoch": 4.7992442135096836, "grad_norm": 65.90857751588676, "learning_rate": 3.1163225349810197e-07, "logits": -1.1830145120620728, "logps": -86.79169464111328, "loss": 0.1827, "objective": 0.1742682158946991, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.10613247007131577, "step": 2540 }, { "dpo_loss": 0.6973792314529419, "epoch": 4.80869154463864, "grad_norm": 70.5678238381887, "learning_rate": 3.10832552593185e-07, "logits": -1.1652858257293701, "logps": -86.21405029296875, "loss": 0.1766, "objective": 0.17289616167545319, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.10315822064876556, "step": 2545 }, { "dpo_loss": 0.6942178010940552, "epoch": 4.818138875767596, "grad_norm": 69.54271350405952, "learning_rate": 3.100321895037274e-07, "logits": -1.2170817852020264, "logps": -86.79609680175781, "loss": 0.1751, "objective": 0.1728823482990265, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.10346055030822754, "step": 2550 }, { "epoch": 4.818138875767596, "eval_dpo_loss": 0.7060064077377319, "eval_logits": -1.1993141174316406, "eval_logps": -93.47087860107422, "eval_loss": 0.32536742091178894, "eval_objective": 0.32468801736831665, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.25408732891082764, "eval_runtime": 157.7178, "eval_samples_per_second": 36.711, "eval_steps_per_second": 3.062, "step": 2550 }, { "dpo_loss": 0.6955153346061707, "epoch": 4.827586206896552, "grad_norm": 69.02890069280883, "learning_rate": 3.092311729419737e-07, "logits": -1.2395265102386475, "logps": -88.45282745361328, "loss": 0.1858, "objective": 0.17923080921173096, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.612500011920929, "ranking_simple": 0.612500011920929, "regularize": 0.10967928171157837, "step": 2555 }, { "dpo_loss": 0.6916204690933228, "epoch": 4.837033538025508, "grad_norm": 69.16848391532415, "learning_rate": 3.0842951162728157e-07, "logits": -1.1788740158081055, "logps": -86.39543914794922, "loss": 0.1829, "objective": 0.17950484156608582, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5062500238418579, "regularize": 0.11034281551837921, "step": 2560 }, { "dpo_loss": 0.693740725517273, "epoch": 4.846480869154464, "grad_norm": 68.5339569030316, "learning_rate": 3.0762721428602695e-07, "logits": -1.2411420345306396, "logps": -86.57717895507812, "loss": 0.1871, "objective": 0.1871481090784073, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5625, "regularize": 0.11777403205633163, "step": 2565 }, { "dpo_loss": 0.6866006851196289, "epoch": 4.85592820028342, "grad_norm": 73.61264970416494, "learning_rate": 3.068242896515093e-07, "logits": -1.1418663263320923, "logps": -86.59040069580078, "loss": 0.1737, "objective": 0.16396993398666382, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.518750011920929, "regularize": 0.09530988335609436, "step": 2570 }, { "dpo_loss": 0.6778737306594849, "epoch": 4.865375531412376, "grad_norm": 70.3681521743756, "learning_rate": 3.060207464638564e-07, "logits": -1.3109599351882935, "logps": -87.58135223388672, "loss": 0.1818, "objective": 0.1783079206943512, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.11052054166793823, "step": 2575 }, { "dpo_loss": 0.698595404624939, "epoch": 4.874822862541333, "grad_norm": 66.61584272656816, "learning_rate": 3.052165934699291e-07, "logits": -1.1655247211456299, "logps": -87.35575866699219, "loss": 0.179, "objective": 0.18281424045562744, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.11295469850301743, "step": 2580 }, { "dpo_loss": 0.6869684457778931, "epoch": 4.884270193670288, "grad_norm": 64.53729015678043, "learning_rate": 3.044118394232263e-07, "logits": -1.1486632823944092, "logps": -85.10508728027344, "loss": 0.1744, "objective": 0.1802261918783188, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.11152936518192291, "step": 2585 }, { "dpo_loss": 0.6830800175666809, "epoch": 4.893717524799245, "grad_norm": 64.62619594802779, "learning_rate": 3.0360649308378965e-07, "logits": -1.2419785261154175, "logps": -87.8184814453125, "loss": 0.1761, "objective": 0.1736673265695572, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.10535933077335358, "step": 2590 }, { "dpo_loss": 0.6906985640525818, "epoch": 4.903164855928201, "grad_norm": 70.30080047514623, "learning_rate": 3.0280056321810793e-07, "logits": -1.2014786005020142, "logps": -86.3695297241211, "loss": 0.1827, "objective": 0.17873261868953705, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.10966275632381439, "step": 2595 }, { "dpo_loss": 0.6820223927497864, "epoch": 4.912612187057157, "grad_norm": 65.23956981989254, "learning_rate": 3.019940585990219e-07, "logits": -1.3367946147918701, "logps": -88.90962982177734, "loss": 0.1717, "objective": 0.17627449333667755, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.10807224363088608, "step": 2600 }, { "epoch": 4.912612187057157, "eval_dpo_loss": 0.7049936652183533, "eval_logits": -1.2077727317810059, "eval_logps": -94.28855895996094, "eval_loss": 0.32867535948753357, "eval_objective": 0.32916179299354553, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.520703911781311, "eval_regularize": 0.2586624324321747, "eval_runtime": 157.7677, "eval_samples_per_second": 36.7, "eval_steps_per_second": 3.061, "step": 2600 }, { "dpo_loss": 0.6886630058288574, "epoch": 4.922059518186113, "grad_norm": 72.59462116847205, "learning_rate": 3.013484469806475e-07, "logits": -1.2272846698760986, "logps": -89.29021453857422, "loss": 0.1751, "objective": 0.17364418506622314, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.10477789491415024, "step": 2605 }, { "dpo_loss": 0.6812406778335571, "epoch": 4.931506849315069, "grad_norm": 67.46210838128205, "learning_rate": 3.0054092993290186e-07, "logits": -1.1636372804641724, "logps": -87.42491149902344, "loss": 0.1778, "objective": 0.17332686483860016, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.10520277917385101, "step": 2610 }, { "dpo_loss": 0.688554584980011, "epoch": 4.940954180444025, "grad_norm": 73.04114552356964, "learning_rate": 2.997328627286852e-07, "logits": -1.2463303804397583, "logps": -86.92112731933594, "loss": 0.1727, "objective": 0.17535845935344696, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5249999761581421, "regularize": 0.10650300979614258, "step": 2615 }, { "dpo_loss": 0.6917858719825745, "epoch": 4.950401511572981, "grad_norm": 64.2146343951538, "learning_rate": 2.9892425416410385e-07, "logits": -1.2629753351211548, "logps": -88.22035217285156, "loss": 0.1788, "objective": 0.1773729771375656, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48124998807907104, "regularize": 0.10819437354803085, "step": 2620 }, { "dpo_loss": 0.699199914932251, "epoch": 4.959848842701937, "grad_norm": 70.39578465923876, "learning_rate": 2.9811511304115715e-07, "logits": -1.1896778345108032, "logps": -89.34644317626953, "loss": 0.1783, "objective": 0.18884512782096863, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.11892513185739517, "step": 2625 }, { "dpo_loss": 0.6905206441879272, "epoch": 4.969296173830893, "grad_norm": 69.32265365835673, "learning_rate": 2.9730544816764175e-07, "logits": -1.1768453121185303, "logps": -89.61418151855469, "loss": 0.1782, "objective": 0.18465279042720795, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.11560072749853134, "step": 2630 }, { "dpo_loss": 0.6918953657150269, "epoch": 4.978743504959849, "grad_norm": 72.72456109597668, "learning_rate": 2.9649526835705517e-07, "logits": -1.255723476409912, "logps": -87.71105194091797, "loss": 0.1797, "objective": 0.18056915700435638, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.543749988079071, "regularize": 0.11137963831424713, "step": 2635 }, { "dpo_loss": 0.6785508990287781, "epoch": 4.988190836088805, "grad_norm": 71.85403127935078, "learning_rate": 2.9568458242850053e-07, "logits": -1.1688531637191772, "logps": -88.31727600097656, "loss": 0.1755, "objective": 0.18157070875167847, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.581250011920929, "regularize": 0.11371561139822006, "step": 2640 }, { "dpo_loss": 0.6890773773193359, "epoch": 4.997638167217761, "grad_norm": 67.62187141331893, "learning_rate": 2.9487339920659005e-07, "logits": -1.3337209224700928, "logps": -88.54768371582031, "loss": 0.1825, "objective": 0.17994090914726257, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.11103316396474838, "step": 2645 }, { "dpo_loss": 0.6844527721405029, "epoch": 5.007085498346717, "grad_norm": 70.1158522407508, "learning_rate": 2.9406172752134914e-07, "logits": -1.1873667240142822, "logps": -85.24104309082031, "loss": 0.1761, "objective": 0.18002694845199585, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.11158166825771332, "step": 2650 }, { "epoch": 5.007085498346717, "eval_dpo_loss": 0.7060586214065552, "eval_logits": -1.205496907234192, "eval_logps": -93.62104034423828, "eval_loss": 0.3257303535938263, "eval_objective": 0.3239457905292511, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.253339946269989, "eval_runtime": 157.8318, "eval_samples_per_second": 36.685, "eval_steps_per_second": 3.06, "step": 2650 }, { "dpo_loss": 0.6852763295173645, "epoch": 5.016532829475673, "grad_norm": 71.3277288070423, "learning_rate": 2.932495762081205e-07, "logits": -1.1914253234863281, "logps": -89.50672912597656, "loss": 0.1789, "objective": 0.1824033111333847, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.11387567222118378, "step": 2655 }, { "dpo_loss": 0.6922017335891724, "epoch": 5.025980160604629, "grad_norm": 70.69002903393073, "learning_rate": 2.924369541074674e-07, "logits": -1.1777479648590088, "logps": -87.5579833984375, "loss": 0.1814, "objective": 0.18187366425991058, "ranking_idealized": 0.45625001192092896, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.11265347898006439, "step": 2660 }, { "dpo_loss": 0.6896432638168335, "epoch": 5.035427491733585, "grad_norm": 69.459829412544, "learning_rate": 2.916238700650783e-07, "logits": -1.2779085636138916, "logps": -86.95489501953125, "loss": 0.1774, "objective": 0.1773608922958374, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.10839656740427017, "step": 2665 }, { "dpo_loss": 0.6889906525611877, "epoch": 5.044874822862542, "grad_norm": 66.60019086580874, "learning_rate": 2.908103329316697e-07, "logits": -1.1906511783599854, "logps": -87.39493560791016, "loss": 0.171, "objective": 0.1724904477596283, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.10359140485525131, "step": 2670 }, { "dpo_loss": 0.6954147219657898, "epoch": 5.054322153991498, "grad_norm": 69.47070958379493, "learning_rate": 2.8999635156289027e-07, "logits": -1.275161862373352, "logps": -87.87858581542969, "loss": 0.1752, "objective": 0.18025653064250946, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.11071505397558212, "step": 2675 }, { "dpo_loss": 0.6879990696907043, "epoch": 5.063769485120454, "grad_norm": 63.420675987238475, "learning_rate": 2.8918193481922425e-07, "logits": -1.1934435367584229, "logps": -86.3830795288086, "loss": 0.1634, "objective": 0.16555842757225037, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.48750001192092896, "regularize": 0.09675852209329605, "step": 2680 }, { "dpo_loss": 0.6914385557174683, "epoch": 5.07321681624941, "grad_norm": 70.46171647986608, "learning_rate": 2.883670915658952e-07, "logits": -1.1908118724822998, "logps": -88.31812286376953, "loss": 0.1767, "objective": 0.17208442091941833, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.46875, "regularize": 0.10294058173894882, "step": 2685 }, { "dpo_loss": 0.6840112209320068, "epoch": 5.082664147378366, "grad_norm": 66.86402457294675, "learning_rate": 2.8755183067276955e-07, "logits": -1.1934614181518555, "logps": -88.08988952636719, "loss": 0.1767, "objective": 0.17896486818790436, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.1105637326836586, "step": 2690 }, { "dpo_loss": 0.6855770349502563, "epoch": 5.092111478507322, "grad_norm": 65.28727826104723, "learning_rate": 2.8673616101425946e-07, "logits": -1.1915392875671387, "logps": -87.99581146240234, "loss": 0.1719, "objective": 0.17346800863742828, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.6000000238418579, "regularize": 0.10491027683019638, "step": 2695 }, { "dpo_loss": 0.6846620440483093, "epoch": 5.101558809636278, "grad_norm": 67.24661354657333, "learning_rate": 2.85920091469227e-07, "logits": -1.2413551807403564, "logps": -87.92008972167969, "loss": 0.1692, "objective": 0.17094926536083221, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.4937500059604645, "regularize": 0.10248307138681412, "step": 2700 }, { "epoch": 5.101558809636278, "eval_dpo_loss": 0.7072012424468994, "eval_logits": -1.2062513828277588, "eval_logps": -93.01093292236328, "eval_loss": 0.33959639072418213, "eval_objective": 0.3377668559551239, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5222567319869995, "eval_regularize": 0.267046719789505, "eval_runtime": 158.2175, "eval_samples_per_second": 36.595, "eval_steps_per_second": 3.053, "step": 2700 }, { "dpo_loss": 0.6864423751831055, "epoch": 5.111006140765234, "grad_norm": 68.97419904948683, "learning_rate": 2.8510363092088725e-07, "logits": -1.2034504413604736, "logps": -88.89812469482422, "loss": 0.1761, "objective": 0.17688480019569397, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5375000238418579, "regularize": 0.10824055969715118, "step": 2705 }, { "dpo_loss": 0.696429967880249, "epoch": 5.12045347189419, "grad_norm": 66.60888495594311, "learning_rate": 2.8428678825671097e-07, "logits": -1.2295548915863037, "logps": -88.55594635009766, "loss": 0.1749, "objective": 0.1697665899991989, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5687500238418579, "regularize": 0.10012359917163849, "step": 2710 }, { "dpo_loss": 0.6905234456062317, "epoch": 5.129900803023146, "grad_norm": 67.47223601522833, "learning_rate": 2.83469572368329e-07, "logits": -1.206521987915039, "logps": -90.2388916015625, "loss": 0.1675, "objective": 0.1722148209810257, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.10316245257854462, "step": 2715 }, { "dpo_loss": 0.6935364007949829, "epoch": 5.139348134152102, "grad_norm": 65.87343663255949, "learning_rate": 2.8265199215143444e-07, "logits": -1.2320802211761475, "logps": -88.0063705444336, "loss": 0.1752, "objective": 0.18072409927845, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4375, "regularize": 0.11137046664953232, "step": 2720 }, { "dpo_loss": 0.6894115209579468, "epoch": 5.148795465281058, "grad_norm": 70.65549570326904, "learning_rate": 2.818340565056864e-07, "logits": -1.2492519617080688, "logps": -87.02153778076172, "loss": 0.172, "objective": 0.18207569420337677, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.11313454061746597, "step": 2725 }, { "dpo_loss": 0.6915232539176941, "epoch": 5.158242796410014, "grad_norm": 64.75353275942425, "learning_rate": 2.810157743346131e-07, "logits": -1.1639604568481445, "logps": -86.55883026123047, "loss": 0.1667, "objective": 0.17392504215240479, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5874999761581421, "regularize": 0.10477272421121597, "step": 2730 }, { "dpo_loss": 0.6925600171089172, "epoch": 5.16769012753897, "grad_norm": 68.39462262880215, "learning_rate": 2.801971545455144e-07, "logits": -1.2426865100860596, "logps": -86.77247619628906, "loss": 0.1793, "objective": 0.18478263914585114, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.11552663892507553, "step": 2735 }, { "dpo_loss": 0.6853238344192505, "epoch": 5.177137458667926, "grad_norm": 63.78064347024799, "learning_rate": 2.7937820604936583e-07, "logits": -1.2348917722702026, "logps": -88.9598388671875, "loss": 0.1679, "objective": 0.17504046857357025, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.543749988079071, "regularize": 0.10650809109210968, "step": 2740 }, { "dpo_loss": 0.693293571472168, "epoch": 5.186584789796882, "grad_norm": 66.4897429476268, "learning_rate": 2.785589377607205e-07, "logits": -1.23585045337677, "logps": -89.13365936279297, "loss": 0.1648, "objective": 0.164475217461586, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.543749988079071, "regularize": 0.09514583647251129, "step": 2745 }, { "dpo_loss": 0.6954046487808228, "epoch": 5.196032120925839, "grad_norm": 68.61846330346695, "learning_rate": 2.7773935859761287e-07, "logits": -1.1489399671554565, "logps": -89.15138244628906, "loss": 0.1676, "objective": 0.17117753624916077, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.512499988079071, "regularize": 0.10163706541061401, "step": 2750 }, { "epoch": 5.196032120925839, "eval_dpo_loss": 0.7083860039710999, "eval_logits": -1.1977908611297607, "eval_logps": -93.95914459228516, "eval_loss": 0.340188592672348, "eval_objective": 0.3383520543575287, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2675134837627411, "eval_runtime": 162.1543, "eval_samples_per_second": 35.707, "eval_steps_per_second": 2.979, "step": 2750 }, { "dpo_loss": 0.6867315769195557, "epoch": 5.205479452054795, "grad_norm": 68.40852812072636, "learning_rate": 2.769194774814613e-07, "logits": -1.1747679710388184, "logps": -88.01338958740234, "loss": 0.1723, "objective": 0.17285940051078796, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.10418625175952911, "step": 2755 }, { "dpo_loss": 0.7033735513687134, "epoch": 5.214926783183751, "grad_norm": 71.86539992150202, "learning_rate": 2.7609930333697094e-07, "logits": -1.1481572389602661, "logps": -89.25263977050781, "loss": 0.1727, "objective": 0.1682891547679901, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.09795178472995758, "step": 2760 }, { "dpo_loss": 0.6900596022605896, "epoch": 5.224374114312707, "grad_norm": 65.34349513478105, "learning_rate": 2.7527884509203686e-07, "logits": -1.1207449436187744, "logps": -88.28308868408203, "loss": 0.1599, "objective": 0.14985518157482147, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.08084921538829803, "step": 2765 }, { "dpo_loss": 0.6909135580062866, "epoch": 5.233821445441663, "grad_norm": 71.32727022935785, "learning_rate": 2.7445811167764644e-07, "logits": -1.1768852472305298, "logps": -90.43959045410156, "loss": 0.1741, "objective": 0.1675560474395752, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.0984647125005722, "step": 2770 }, { "dpo_loss": 0.6930040121078491, "epoch": 5.243268776570619, "grad_norm": 68.91942074445643, "learning_rate": 2.736371120277825e-07, "logits": -1.2606618404388428, "logps": -89.94786071777344, "loss": 0.1732, "objective": 0.1796959936618805, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.11039556562900543, "step": 2775 }, { "dpo_loss": 0.6956884860992432, "epoch": 5.252716107699575, "grad_norm": 75.06042603871374, "learning_rate": 2.72815855079326e-07, "logits": -1.1935651302337646, "logps": -86.52474212646484, "loss": 0.169, "objective": 0.1645137369632721, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.59375, "regularize": 0.09494488686323166, "step": 2780 }, { "dpo_loss": 0.690733790397644, "epoch": 5.262163438828531, "grad_norm": 76.75556002327785, "learning_rate": 2.719943497719585e-07, "logits": -1.2216724157333374, "logps": -89.10572814941406, "loss": 0.1631, "objective": 0.15377160906791687, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.08469823747873306, "step": 2785 }, { "dpo_loss": 0.6916483640670776, "epoch": 5.271610769957487, "grad_norm": 75.0828149872277, "learning_rate": 2.711726050480653e-07, "logits": -1.1828960180282593, "logps": -90.04365539550781, "loss": 0.1778, "objective": 0.1791430115699768, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.10997817665338516, "step": 2790 }, { "dpo_loss": 0.6929569244384766, "epoch": 5.281058101086443, "grad_norm": 67.80140559185041, "learning_rate": 2.703506298526374e-07, "logits": -1.246824860572815, "logps": -87.59806060791016, "loss": 0.1671, "objective": 0.17096053063869476, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.53125, "regularize": 0.10166484117507935, "step": 2795 }, { "dpo_loss": 0.6785708069801331, "epoch": 5.290505432215399, "grad_norm": 66.61830847165874, "learning_rate": 2.6952843313317506e-07, "logits": -1.168810486793518, "logps": -87.19744873046875, "loss": 0.1743, "objective": 0.1652020812034607, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.0973450094461441, "step": 2800 }, { "epoch": 5.290505432215399, "eval_dpo_loss": 0.7076205611228943, "eval_logits": -1.1971802711486816, "eval_logps": -93.91000366210938, "eval_loss": 0.33709466457366943, "eval_objective": 0.3350926339626312, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.26433053612709045, "eval_runtime": 158.1327, "eval_samples_per_second": 36.615, "eval_steps_per_second": 3.054, "step": 2800 }, { "dpo_loss": 0.6910735368728638, "epoch": 5.299952763344355, "grad_norm": 70.67384600159596, "learning_rate": 2.687060238395898e-07, "logits": -1.2108272314071655, "logps": -87.95372009277344, "loss": 0.1681, "objective": 0.15209585428237915, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.08298849314451218, "step": 2805 }, { "dpo_loss": 0.6852573752403259, "epoch": 5.309400094473311, "grad_norm": 70.07310569800453, "learning_rate": 2.6788341092410667e-07, "logits": -1.1896169185638428, "logps": -89.82859802246094, "loss": 0.1607, "objective": 0.1615303009748459, "ranking_idealized": 0.4312500059604645, "ranking_idealized_expo": 0.4124999940395355, "ranking_simple": 0.41874998807907104, "regularize": 0.09300456941127777, "step": 2810 }, { "dpo_loss": 0.6889798045158386, "epoch": 5.318847425602267, "grad_norm": 68.20006338317981, "learning_rate": 2.6706060334116775e-07, "logits": -1.1706857681274414, "logps": -88.37882995605469, "loss": 0.1694, "objective": 0.16437754034996033, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.518750011920929, "regularize": 0.0954795628786087, "step": 2815 }, { "dpo_loss": 0.6930587291717529, "epoch": 5.328294756731223, "grad_norm": 65.44290392226539, "learning_rate": 2.6623761004733384e-07, "logits": -1.1820690631866455, "logps": -89.46732330322266, "loss": 0.17, "objective": 0.17971068620681763, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.11040481179952621, "step": 2820 }, { "dpo_loss": 0.6904914975166321, "epoch": 5.337742087860179, "grad_norm": 66.90833905570693, "learning_rate": 2.6541444000118744e-07, "logits": -1.1795885562896729, "logps": -87.92036437988281, "loss": 0.1717, "objective": 0.16590887308120728, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.09685972332954407, "step": 2825 }, { "dpo_loss": 0.6931673288345337, "epoch": 5.347189418989135, "grad_norm": 79.47113987338702, "learning_rate": 2.6459110216323494e-07, "logits": -1.1880987882614136, "logps": -88.9898910522461, "loss": 0.1688, "objective": 0.17713353037834167, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5874999761581421, "regularize": 0.10781680047512054, "step": 2830 }, { "dpo_loss": 0.6927000284194946, "epoch": 5.356636750118092, "grad_norm": 77.112287607801, "learning_rate": 2.637676054958092e-07, "logits": -1.1750524044036865, "logps": -88.32490539550781, "loss": 0.1701, "objective": 0.1717822402715683, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.10251224040985107, "step": 2835 }, { "dpo_loss": 0.6875866651535034, "epoch": 5.366084081247048, "grad_norm": 66.31189469268375, "learning_rate": 2.629439589629722e-07, "logits": -1.1873130798339844, "logps": -88.2190170288086, "loss": 0.1628, "objective": 0.16570058465003967, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.09694191813468933, "step": 2840 }, { "dpo_loss": 0.6951637864112854, "epoch": 5.375531412376004, "grad_norm": 75.85897729374831, "learning_rate": 2.621201715304168e-07, "logits": -1.2368756532669067, "logps": -88.91191101074219, "loss": 0.1712, "objective": 0.1714092344045639, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.1018928661942482, "step": 2845 }, { "dpo_loss": 0.689228892326355, "epoch": 5.38497874350496, "grad_norm": 66.51978594280956, "learning_rate": 2.612962521653702e-07, "logits": -1.1629688739776611, "logps": -88.8650131225586, "loss": 0.1715, "objective": 0.16678547859191895, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.09786257147789001, "step": 2850 }, { "epoch": 5.38497874350496, "eval_dpo_loss": 0.7084411978721619, "eval_logits": -1.1938939094543457, "eval_logps": -93.68083953857422, "eval_loss": 0.3407943844795227, "eval_objective": 0.34045931696891785, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.26961514353752136, "eval_runtime": 158.2513, "eval_samples_per_second": 36.587, "eval_steps_per_second": 3.052, "step": 2850 }, { "dpo_loss": 0.6896392107009888, "epoch": 5.394426074633916, "grad_norm": 71.43890694528095, "learning_rate": 2.6047220983649535e-07, "logits": -1.1977884769439697, "logps": -89.16146087646484, "loss": 0.1634, "objective": 0.16076752543449402, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.53125, "regularize": 0.09180359542369843, "step": 2855 }, { "dpo_loss": 0.6991931796073914, "epoch": 5.403873405762872, "grad_norm": 70.78967226486061, "learning_rate": 2.596480535137938e-07, "logits": -1.1725437641143799, "logps": -87.13787841796875, "loss": 0.1664, "objective": 0.17251865565776825, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.10259934514760971, "step": 2860 }, { "dpo_loss": 0.6924434900283813, "epoch": 5.413320736891828, "grad_norm": 71.78447378915702, "learning_rate": 2.588237921685079e-07, "logits": -1.279209852218628, "logps": -88.69322967529297, "loss": 0.1646, "objective": 0.15512174367904663, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.08587740361690521, "step": 2865 }, { "dpo_loss": 0.6857197880744934, "epoch": 5.422768068020784, "grad_norm": 71.6129346374041, "learning_rate": 2.579994347730233e-07, "logits": -1.1856725215911865, "logps": -87.46627044677734, "loss": 0.1714, "objective": 0.1694221794605255, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.10085020214319229, "step": 2870 }, { "dpo_loss": 0.6933203935623169, "epoch": 5.43221539914974, "grad_norm": 71.14208242063751, "learning_rate": 2.571749903007712e-07, "logits": -1.2006828784942627, "logps": -87.65222930908203, "loss": 0.1645, "objective": 0.17003345489501953, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.10070140659809113, "step": 2875 }, { "dpo_loss": 0.6922504305839539, "epoch": 5.441662730278696, "grad_norm": 64.76173144858346, "learning_rate": 2.563504677261307e-07, "logits": -1.0994261503219604, "logps": -87.58671569824219, "loss": 0.1664, "objective": 0.16446347534656525, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.4312500059604645, "regularize": 0.09523843228816986, "step": 2880 }, { "dpo_loss": 0.6941710710525513, "epoch": 5.451110061407652, "grad_norm": 71.57354676807199, "learning_rate": 2.555258760243308e-07, "logits": -1.1873737573623657, "logps": -86.51579284667969, "loss": 0.1661, "objective": 0.1644514799118042, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.09503436088562012, "step": 2885 }, { "dpo_loss": 0.6851297616958618, "epoch": 5.460557392536608, "grad_norm": 68.40112111684203, "learning_rate": 2.5470122417135325e-07, "logits": -1.2447443008422852, "logps": -87.82207489013672, "loss": 0.1715, "objective": 0.17926739156246185, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4749999940395355, "regularize": 0.11075443029403687, "step": 2890 }, { "dpo_loss": 0.6831666827201843, "epoch": 5.470004723665564, "grad_norm": 70.97494885343163, "learning_rate": 2.538765211438346e-07, "logits": -1.1744649410247803, "logps": -87.33937072753906, "loss": 0.1686, "objective": 0.17264780402183533, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.10433115065097809, "step": 2895 }, { "dpo_loss": 0.6850699186325073, "epoch": 5.47945205479452, "grad_norm": 67.03454129134816, "learning_rate": 2.5305177591896827e-07, "logits": -1.2211939096450806, "logps": -87.41989135742188, "loss": 0.1643, "objective": 0.17151689529418945, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.42500001192092896, "ranking_simple": 0.4375, "regularize": 0.1030099019408226, "step": 2900 }, { "epoch": 5.47945205479452, "eval_dpo_loss": 0.709491491317749, "eval_logits": -1.1941238641738892, "eval_logps": -93.03810119628906, "eval_loss": 0.3434217870235443, "eval_objective": 0.34335386753082275, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 0.2724047005176544, "eval_runtime": 157.9263, "eval_samples_per_second": 36.663, "eval_steps_per_second": 3.058, "step": 2900 }, { "dpo_loss": 0.6883138418197632, "epoch": 5.488899385923476, "grad_norm": 68.25764695131512, "learning_rate": 2.5222699747440705e-07, "logits": -1.1506329774856567, "logps": -87.58967590332031, "loss": 0.1628, "objective": 0.1579241305589676, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.08909274637699127, "step": 2905 }, { "dpo_loss": 0.6868668794631958, "epoch": 5.498346717052432, "grad_norm": 63.82252969817784, "learning_rate": 2.514021947881654e-07, "logits": -1.1989600658416748, "logps": -85.99334716796875, "loss": 0.1579, "objective": 0.15128464996814728, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.08259795606136322, "step": 2910 }, { "dpo_loss": 0.6849532723426819, "epoch": 5.507794048181388, "grad_norm": 70.75728445226883, "learning_rate": 2.5057737683852166e-07, "logits": -1.18284273147583, "logps": -86.6116943359375, "loss": 0.1592, "objective": 0.1478767693042755, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.07938142865896225, "step": 2915 }, { "dpo_loss": 0.6933680772781372, "epoch": 5.517241379310345, "grad_norm": 71.53835495050187, "learning_rate": 2.497525526039202e-07, "logits": -1.2268903255462646, "logps": -87.72315979003906, "loss": 0.1604, "objective": 0.16375622153282166, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.09441942721605301, "step": 2920 }, { "dpo_loss": 0.6884834170341492, "epoch": 5.526688710439301, "grad_norm": 70.7363668045601, "learning_rate": 2.4892773106287406e-07, "logits": -1.2739144563674927, "logps": -87.93558502197266, "loss": 0.1538, "objective": 0.15243694186210632, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.512499988079071, "regularize": 0.0835886001586914, "step": 2925 }, { "dpo_loss": 0.6854166984558105, "epoch": 5.536136041568257, "grad_norm": 64.52862440957706, "learning_rate": 2.4810292119386674e-07, "logits": -1.2274558544158936, "logps": -87.8212890625, "loss": 0.1554, "objective": 0.15207795798778534, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.08353628218173981, "step": 2930 }, { "dpo_loss": 0.6835038661956787, "epoch": 5.545583372697213, "grad_norm": 71.03427758426722, "learning_rate": 2.472781319752546e-07, "logits": -1.1815805435180664, "logps": -88.67829895019531, "loss": 0.161, "objective": 0.15246529877185822, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.0841149091720581, "step": 2935 }, { "dpo_loss": 0.6861475110054016, "epoch": 5.555030703826169, "grad_norm": 69.93722884891055, "learning_rate": 2.4645337238516953e-07, "logits": -1.2078359127044678, "logps": -89.95475006103516, "loss": 0.1645, "objective": 0.1642766147851944, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.09566183388233185, "step": 2940 }, { "dpo_loss": 0.6943419575691223, "epoch": 5.564478034955125, "grad_norm": 67.34370411603598, "learning_rate": 2.4562865140142065e-07, "logits": -1.2549326419830322, "logps": -87.40699768066406, "loss": 0.1626, "objective": 0.16093124449253082, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4437499940395355, "regularize": 0.0914970338344574, "step": 2945 }, { "dpo_loss": 0.6921701431274414, "epoch": 5.573925366084081, "grad_norm": 67.90112389845122, "learning_rate": 2.4480397800139697e-07, "logits": -1.2807310819625854, "logps": -85.22374725341797, "loss": 0.1569, "objective": 0.1513904631137848, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.550000011920929, "regularize": 0.08217345923185349, "step": 2950 }, { "epoch": 5.573925366084081, "eval_dpo_loss": 0.7083237767219543, "eval_logits": -1.1993257999420166, "eval_logps": -94.44893646240234, "eval_loss": 0.34027528762817383, "eval_objective": 0.340644896030426, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 0.2698124647140503, "eval_runtime": 157.6445, "eval_samples_per_second": 36.728, "eval_steps_per_second": 3.064, "step": 2950 }, { "dpo_loss": 0.699337363243103, "epoch": 5.583372697213037, "grad_norm": 70.66002533656673, "learning_rate": 2.439793611619693e-07, "logits": -1.195084810256958, "logps": -89.92610168457031, "loss": 0.1672, "objective": 0.16342179477214813, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.09348806738853455, "step": 2955 }, { "dpo_loss": 0.6918556690216064, "epoch": 5.592820028341993, "grad_norm": 64.9097814778515, "learning_rate": 2.431548098593932e-07, "logits": -1.2656776905059814, "logps": -88.05064392089844, "loss": 0.1528, "objective": 0.15726497769355774, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5562499761581421, "regularize": 0.08807940781116486, "step": 2960 }, { "dpo_loss": 0.6896753311157227, "epoch": 5.602267359470949, "grad_norm": 66.60639839782714, "learning_rate": 2.4233033306921044e-07, "logits": -1.2398788928985596, "logps": -87.45914459228516, "loss": 0.1555, "objective": 0.15855464339256287, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.08958712220191956, "step": 2965 }, { "dpo_loss": 0.6900865435600281, "epoch": 5.611714690599905, "grad_norm": 68.83649894986999, "learning_rate": 2.415059397661519e-07, "logits": -1.2571332454681396, "logps": -89.16593170166016, "loss": 0.1615, "objective": 0.160673126578331, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.09166448563337326, "step": 2970 }, { "dpo_loss": 0.6876694560050964, "epoch": 5.621162021728861, "grad_norm": 71.4594888607687, "learning_rate": 2.4068163892403954e-07, "logits": -1.2201224565505981, "logps": -91.45027160644531, "loss": 0.1677, "objective": 0.16672007739543915, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.09795314073562622, "step": 2975 }, { "dpo_loss": 0.6961122751235962, "epoch": 5.630609352857817, "grad_norm": 66.06190204842538, "learning_rate": 2.3985743951568896e-07, "logits": -1.1683927774429321, "logps": -89.21009826660156, "loss": 0.1615, "objective": 0.16139890253543854, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.543749988079071, "regularize": 0.0917876735329628, "step": 2980 }, { "dpo_loss": 0.6950571537017822, "epoch": 5.640056683986773, "grad_norm": 71.8921965768204, "learning_rate": 2.3903335051281155e-07, "logits": -1.184612512588501, "logps": -88.78089904785156, "loss": 0.162, "objective": 0.15779440104961395, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.08828870207071304, "step": 2985 }, { "dpo_loss": 0.6960257291793823, "epoch": 5.649504015115729, "grad_norm": 73.58902577978184, "learning_rate": 2.3820938088591694e-07, "logits": -1.1792681217193604, "logps": -88.75108337402344, "loss": 0.163, "objective": 0.1602451205253601, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.0906425416469574, "step": 2990 }, { "dpo_loss": 0.6898195147514343, "epoch": 5.658951346244686, "grad_norm": 67.15580408040853, "learning_rate": 2.373855396042152e-07, "logits": -1.2541872262954712, "logps": -91.73573303222656, "loss": 0.1571, "objective": 0.16459956765174866, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.09561760723590851, "step": 2995 }, { "dpo_loss": 0.6943185329437256, "epoch": 5.6683986773736414, "grad_norm": 71.35092730092892, "learning_rate": 2.3656183563551954e-07, "logits": -1.2561254501342773, "logps": -89.2706527709961, "loss": 0.16, "objective": 0.16651032865047455, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.09707846492528915, "step": 3000 }, { "epoch": 5.6683986773736414, "eval_dpo_loss": 0.7067892551422119, "eval_logits": -1.1952264308929443, "eval_logps": -94.13389587402344, "eval_loss": 0.3336896300315857, "eval_objective": 0.33315029740333557, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5232919454574585, "eval_regularize": 0.26247134804725647, "eval_runtime": 157.4877, "eval_samples_per_second": 36.765, "eval_steps_per_second": 3.067, "step": 3000 }, { "dpo_loss": 0.6860445141792297, "epoch": 5.677846008502598, "grad_norm": 69.9695559274476, "learning_rate": 2.3573827794614836e-07, "logits": -1.235999345779419, "logps": -88.295654296875, "loss": 0.1581, "objective": 0.15271826088428497, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.612500011920929, "ranking_simple": 0.6312500238418579, "regularize": 0.08411379158496857, "step": 3005 }, { "dpo_loss": 0.6918520927429199, "epoch": 5.687293339631554, "grad_norm": 68.61279004584298, "learning_rate": 2.3491487550082759e-07, "logits": -1.1995729207992554, "logps": -88.19017028808594, "loss": 0.1579, "objective": 0.1600116491317749, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.09082644432783127, "step": 3010 }, { "dpo_loss": 0.690693736076355, "epoch": 5.69674067076051, "grad_norm": 71.44504599708077, "learning_rate": 2.3409163726259354e-07, "logits": -1.2030599117279053, "logps": -89.19579315185547, "loss": 0.1578, "objective": 0.15412333607673645, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.08505398035049438, "step": 3015 }, { "dpo_loss": 0.6911080479621887, "epoch": 5.706188001889466, "grad_norm": 62.79376194718201, "learning_rate": 2.332685721926948e-07, "logits": -1.1986589431762695, "logps": -88.55294036865234, "loss": 0.1619, "objective": 0.15914146602153778, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.09003065526485443, "step": 3020 }, { "dpo_loss": 0.6796199083328247, "epoch": 5.715635333018422, "grad_norm": 69.44076468937035, "learning_rate": 2.3244568925049522e-07, "logits": -1.2015893459320068, "logps": -89.00029754638672, "loss": 0.1662, "objective": 0.16765353083610535, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.09969155490398407, "step": 3025 }, { "dpo_loss": 0.6879712343215942, "epoch": 5.725082664147378, "grad_norm": 69.49885636121284, "learning_rate": 2.3162299739337586e-07, "logits": -1.2396279573440552, "logps": -88.59342956542969, "loss": 0.1609, "objective": 0.163058340549469, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.09426120668649673, "step": 3030 }, { "dpo_loss": 0.6858317255973816, "epoch": 5.7345299952763344, "grad_norm": 65.93617691088278, "learning_rate": 2.3080050557663807e-07, "logits": -1.1753551959991455, "logps": -89.18077087402344, "loss": 0.1518, "objective": 0.1514325886964798, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.46875, "regularize": 0.08284942060709, "step": 3035 }, { "dpo_loss": 0.6900348663330078, "epoch": 5.7439773264052905, "grad_norm": 66.57478285269204, "learning_rate": 2.2997822275340545e-07, "logits": -1.1697237491607666, "logps": -87.06428527832031, "loss": 0.1562, "objective": 0.1557130068540573, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.08670951426029205, "step": 3040 }, { "dpo_loss": 0.6868686079978943, "epoch": 5.7534246575342465, "grad_norm": 73.35640138982335, "learning_rate": 2.2915615787452664e-07, "logits": -1.164442777633667, "logps": -88.9676742553711, "loss": 0.1539, "objective": 0.1539611518383026, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.08527431637048721, "step": 3045 }, { "dpo_loss": 0.6946436762809753, "epoch": 5.7628719886632025, "grad_norm": 69.71603742581622, "learning_rate": 2.283343198884779e-07, "logits": -1.171112060546875, "logps": -88.559326171875, "loss": 0.1556, "objective": 0.159796804189682, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.09033244848251343, "step": 3050 }, { "epoch": 5.7628719886632025, "eval_dpo_loss": 0.7074865102767944, "eval_logits": -1.1943256855010986, "eval_logps": -93.7010726928711, "eval_loss": 0.3378671705722809, "eval_objective": 0.3365963399410248, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.26584771275520325, "eval_runtime": 158.4185, "eval_samples_per_second": 36.549, "eval_steps_per_second": 3.049, "step": 3050 }, { "dpo_loss": 0.6892920136451721, "epoch": 5.7723193197921585, "grad_norm": 70.77530028281073, "learning_rate": 2.2751271774126578e-07, "logits": -1.29044508934021, "logps": -89.32376861572266, "loss": 0.1527, "objective": 0.15522916615009308, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.08629997074604034, "step": 3055 }, { "dpo_loss": 0.691388726234436, "epoch": 5.7817666509211145, "grad_norm": 67.76333355139332, "learning_rate": 2.266913603763295e-07, "logits": -1.2957406044006348, "logps": -90.00084686279297, "loss": 0.1522, "objective": 0.14783525466918945, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.07869637757539749, "step": 3060 }, { "dpo_loss": 0.6861189603805542, "epoch": 5.7912139820500705, "grad_norm": 67.53343152706475, "learning_rate": 2.2587025673444384e-07, "logits": -1.228858232498169, "logps": -88.78462219238281, "loss": 0.1601, "objective": 0.15433308482170105, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48750001192092896, "regularize": 0.08572117984294891, "step": 3065 }, { "dpo_loss": 0.6877793073654175, "epoch": 5.800661313179027, "grad_norm": 64.90782659817482, "learning_rate": 2.2504941575362153e-07, "logits": -1.1816461086273193, "logps": -88.59249877929688, "loss": 0.147, "objective": 0.14250199496746063, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.07372405380010605, "step": 3070 }, { "dpo_loss": 0.6855846047401428, "epoch": 5.810108644307983, "grad_norm": 68.67570881638909, "learning_rate": 2.2422884636901648e-07, "logits": -1.1656111478805542, "logps": -89.04707336425781, "loss": 0.152, "objective": 0.1549055278301239, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.0863470733165741, "step": 3075 }, { "dpo_loss": 0.6973013877868652, "epoch": 5.8195559754369395, "grad_norm": 75.69732008065817, "learning_rate": 2.2340855751282593e-07, "logits": -1.1823872327804565, "logps": -85.95021057128906, "loss": 0.1618, "objective": 0.15959547460079193, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.08986534923315048, "step": 3080 }, { "dpo_loss": 0.6956144571304321, "epoch": 5.8290033065658955, "grad_norm": 72.67073003432586, "learning_rate": 2.2258855811419338e-07, "logits": -1.1929875612258911, "logps": -88.76408386230469, "loss": 0.1562, "objective": 0.15549372136592865, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.08593227714300156, "step": 3085 }, { "dpo_loss": 0.6876565217971802, "epoch": 5.8384506376948515, "grad_norm": 72.90703070061888, "learning_rate": 2.2176885709911174e-07, "logits": -1.2129268646240234, "logps": -88.34392547607422, "loss": 0.1516, "objective": 0.1628263145685196, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.09406064450740814, "step": 3090 }, { "dpo_loss": 0.6911213397979736, "epoch": 5.8478979688238075, "grad_norm": 70.10000752386972, "learning_rate": 2.209494633903257e-07, "logits": -1.2557677030563354, "logps": -89.69297790527344, "loss": 0.1528, "objective": 0.15556392073631287, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.08645178377628326, "step": 3095 }, { "dpo_loss": 0.6917442083358765, "epoch": 5.8573452999527635, "grad_norm": 68.58660189854848, "learning_rate": 2.201303859072349e-07, "logits": -1.174304723739624, "logps": -87.0228500366211, "loss": 0.1544, "objective": 0.15769553184509277, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.08852110803127289, "step": 3100 }, { "epoch": 5.8573452999527635, "eval_dpo_loss": 0.7081683278083801, "eval_logits": -1.1895536184310913, "eval_logps": -93.8059310913086, "eval_loss": 0.3406558930873871, "eval_objective": 0.3385447859764099, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.26772794127464294, "eval_runtime": 157.8418, "eval_samples_per_second": 36.682, "eval_steps_per_second": 3.06, "step": 3100 }, { "dpo_loss": 0.6924247145652771, "epoch": 5.8667926310817196, "grad_norm": 70.60312821876914, "learning_rate": 2.1931163356579667e-07, "logits": -1.2612760066986084, "logps": -87.61418151855469, "loss": 0.1603, "objective": 0.15718653798103333, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.08794407546520233, "step": 3105 }, { "dpo_loss": 0.6939336657524109, "epoch": 5.876239962210676, "grad_norm": 66.79816676685348, "learning_rate": 2.184932152784292e-07, "logits": -1.2125117778778076, "logps": -87.36097717285156, "loss": 0.154, "objective": 0.14958223700523376, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.08018886297941208, "step": 3110 }, { "dpo_loss": 0.6892750263214111, "epoch": 5.885687293339632, "grad_norm": 61.589115768630904, "learning_rate": 2.176751399539143e-07, "logits": -1.1249029636383057, "logps": -85.64997100830078, "loss": 0.1516, "objective": 0.1503055989742279, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.08137810230255127, "step": 3115 }, { "dpo_loss": 0.6893929839134216, "epoch": 5.895134624468588, "grad_norm": 67.96856168245768, "learning_rate": 2.168574164973005e-07, "logits": -1.2797582149505615, "logps": -86.67450714111328, "loss": 0.1526, "objective": 0.15556205809116364, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.08662275969982147, "step": 3120 }, { "dpo_loss": 0.6934850811958313, "epoch": 5.904581955597544, "grad_norm": 61.93077678604341, "learning_rate": 2.16040053809806e-07, "logits": -1.2393492460250854, "logps": -89.31920623779297, "loss": 0.1476, "objective": 0.14929169416427612, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48750001192092896, "regularize": 0.07994318008422852, "step": 3125 }, { "dpo_loss": 0.6860083937644958, "epoch": 5.9140292867265, "grad_norm": 65.98308041473744, "learning_rate": 2.1522306078872217e-07, "logits": -1.1525981426239014, "logps": -87.80781555175781, "loss": 0.1452, "objective": 0.15540987253189087, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.08680902421474457, "step": 3130 }, { "dpo_loss": 0.6895954012870789, "epoch": 5.923476617855456, "grad_norm": 69.45472051004639, "learning_rate": 2.1440644632731608e-07, "logits": -1.2585550546646118, "logps": -86.21622467041016, "loss": 0.1526, "objective": 0.1478964388370514, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.07893688976764679, "step": 3135 }, { "dpo_loss": 0.6982107758522034, "epoch": 5.932923948984412, "grad_norm": 69.75620955548241, "learning_rate": 2.1359021931473444e-07, "logits": -1.1937921047210693, "logps": -87.94801330566406, "loss": 0.1535, "objective": 0.16531306505203247, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.09549199044704437, "step": 3140 }, { "dpo_loss": 0.6916354894638062, "epoch": 5.942371280113368, "grad_norm": 69.26169680454808, "learning_rate": 2.1277438863590602e-07, "logits": -1.2406847476959229, "logps": -86.54442596435547, "loss": 0.1513, "objective": 0.14869888126850128, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.0795353353023529, "step": 3145 }, { "dpo_loss": 0.6927730441093445, "epoch": 5.951818611242324, "grad_norm": 69.08292513293352, "learning_rate": 2.119589631714457e-07, "logits": -1.1690309047698975, "logps": -88.31249237060547, "loss": 0.1539, "objective": 0.15352843701839447, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.581250011920929, "regularize": 0.08425113558769226, "step": 3150 }, { "epoch": 5.951818611242324, "eval_dpo_loss": 0.7079322338104248, "eval_logits": -1.2013435363769531, "eval_logps": -93.3647232055664, "eval_loss": 0.3377290964126587, "eval_objective": 0.3357836604118347, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.520703911781311, "eval_regularize": 0.2649904191493988, "eval_runtime": 158.4666, "eval_samples_per_second": 36.538, "eval_steps_per_second": 3.048, "step": 3150 }, { "dpo_loss": 0.6902034878730774, "epoch": 5.961265942371281, "grad_norm": 63.635186729008495, "learning_rate": 2.1114395179755736e-07, "logits": -1.2969659566879272, "logps": -90.55731201171875, "loss": 0.1479, "objective": 0.14589950442314148, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.07687915861606598, "step": 3155 }, { "dpo_loss": 0.677855372428894, "epoch": 5.970713273500236, "grad_norm": 72.69121739480386, "learning_rate": 2.1032936338593717e-07, "logits": -1.2355512380599976, "logps": -88.95880126953125, "loss": 0.1504, "objective": 0.14666247367858887, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.07887694984674454, "step": 3160 }, { "dpo_loss": 0.6994338631629944, "epoch": 5.980160604629193, "grad_norm": 66.8268105376101, "learning_rate": 2.0951520680367742e-07, "logits": -1.246654748916626, "logps": -89.81675720214844, "loss": 0.1491, "objective": 0.15047189593315125, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.08052850514650345, "step": 3165 }, { "dpo_loss": 0.6910912394523621, "epoch": 5.989607935758149, "grad_norm": 74.09902751093182, "learning_rate": 2.0870149091316966e-07, "logits": -1.2551653385162354, "logps": -86.7063980102539, "loss": 0.1467, "objective": 0.14258627593517303, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5249999761581421, "regularize": 0.07347716391086578, "step": 3170 }, { "dpo_loss": 0.6935809850692749, "epoch": 5.999055266887105, "grad_norm": 66.65350456772539, "learning_rate": 2.0788822457200842e-07, "logits": -1.154550313949585, "logps": -88.93148803710938, "loss": 0.1516, "objective": 0.15211038291454315, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.0827522873878479, "step": 3175 }, { "dpo_loss": 0.6875017881393433, "epoch": 6.008502598016061, "grad_norm": 67.3310083616562, "learning_rate": 2.0707541663289462e-07, "logits": -1.1374294757843018, "logps": -87.5985107421875, "loss": 0.1529, "objective": 0.15485554933547974, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.0861053541302681, "step": 3180 }, { "dpo_loss": 0.6881211400032043, "epoch": 6.017949929145017, "grad_norm": 64.40493710346342, "learning_rate": 2.0626307594353936e-07, "logits": -1.2522262334823608, "logps": -87.79068756103516, "loss": 0.1497, "objective": 0.14534898102283478, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.0765368640422821, "step": 3185 }, { "dpo_loss": 0.6818069219589233, "epoch": 6.027397260273973, "grad_norm": 65.13273445841992, "learning_rate": 2.0545121134656777e-07, "logits": -1.1388541460037231, "logps": -86.80610656738281, "loss": 0.1491, "objective": 0.15392068028450012, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.08573999255895615, "step": 3190 }, { "dpo_loss": 0.6863843202590942, "epoch": 6.036844591402929, "grad_norm": 65.24808997845832, "learning_rate": 2.0463983167942218e-07, "logits": -1.2672401666641235, "logps": -86.259033203125, "loss": 0.1502, "objective": 0.1560548096895218, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.08741637319326401, "step": 3195 }, { "dpo_loss": 0.6929386258125305, "epoch": 6.046291922531885, "grad_norm": 62.711010349308516, "learning_rate": 2.0382894577426642e-07, "logits": -1.1413090229034424, "logps": -86.51998138427734, "loss": 0.1448, "objective": 0.14656969904899597, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.07727585732936859, "step": 3200 }, { "epoch": 6.046291922531885, "eval_dpo_loss": 0.7086135149002075, "eval_logits": -1.1912150382995605, "eval_logps": -93.06739044189453, "eval_loss": 0.34175461530685425, "eval_objective": 0.34017154574394226, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5181159377098083, "eval_regularize": 0.2693101763725281, "eval_runtime": 157.8644, "eval_samples_per_second": 36.677, "eval_steps_per_second": 3.06, "step": 3200 }, { "dpo_loss": 0.6970037221908569, "epoch": 6.055739253660841, "grad_norm": 65.08813909383588, "learning_rate": 2.0301856245788965e-07, "logits": -1.2222968339920044, "logps": -86.71208190917969, "loss": 0.1466, "objective": 0.14986823499202728, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5375000238418579, "regularize": 0.08016785979270935, "step": 3205 }, { "dpo_loss": 0.6925697922706604, "epoch": 6.065186584789797, "grad_norm": 67.16033624454055, "learning_rate": 2.0220869055160998e-07, "logits": -1.1997841596603394, "logps": -87.62628173828125, "loss": 0.1476, "objective": 0.1423240303993225, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.46875, "regularize": 0.07306705415248871, "step": 3210 }, { "dpo_loss": 0.6911340355873108, "epoch": 6.074633915918753, "grad_norm": 69.8806686059364, "learning_rate": 2.0139933887117886e-07, "logits": -1.2009985446929932, "logps": -86.01728057861328, "loss": 0.1462, "objective": 0.148556187748909, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.07944278419017792, "step": 3215 }, { "dpo_loss": 0.6944746375083923, "epoch": 6.084081247047709, "grad_norm": 70.31322329461948, "learning_rate": 2.0059051622668456e-07, "logits": -1.1811740398406982, "logps": -89.98876190185547, "loss": 0.1503, "objective": 0.15364637970924377, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.08419889211654663, "step": 3220 }, { "dpo_loss": 0.6908897161483765, "epoch": 6.093528578176665, "grad_norm": 65.60153658686136, "learning_rate": 1.9978223142245707e-07, "logits": -1.1639667749404907, "logps": -87.06336975097656, "loss": 0.1469, "objective": 0.14311951398849487, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.48124998807907104, "regularize": 0.07403053343296051, "step": 3225 }, { "dpo_loss": 0.6866973638534546, "epoch": 6.102975909305621, "grad_norm": 65.29973630577604, "learning_rate": 1.989744932569714e-07, "logits": -1.2155070304870605, "logps": -85.58670043945312, "loss": 0.1466, "objective": 0.14489760994911194, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.07622788101434708, "step": 3230 }, { "dpo_loss": 0.6991506218910217, "epoch": 6.112423240434577, "grad_norm": 76.05159682239864, "learning_rate": 1.9816731052275233e-07, "logits": -1.231654167175293, "logps": -88.26829528808594, "loss": 0.1494, "objective": 0.15017834305763245, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.08026325702667236, "step": 3235 }, { "dpo_loss": 0.6891576647758484, "epoch": 6.121870571563533, "grad_norm": 70.81562644042008, "learning_rate": 1.973606920062786e-07, "logits": -1.22501802444458, "logps": -89.1203842163086, "loss": 0.1463, "objective": 0.1460135132074356, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.4749999940395355, "regularize": 0.07709775865077972, "step": 3240 }, { "dpo_loss": 0.6927801370620728, "epoch": 6.13131790269249, "grad_norm": 65.23971186277288, "learning_rate": 1.965546464878871e-07, "logits": -1.1830025911331177, "logps": -88.51461791992188, "loss": 0.1431, "objective": 0.14562612771987915, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.07634811103343964, "step": 3245 }, { "dpo_loss": 0.688566267490387, "epoch": 6.140765233821446, "grad_norm": 67.00376402860445, "learning_rate": 1.957491827416777e-07, "logits": -1.1757619380950928, "logps": -88.70887756347656, "loss": 0.1479, "objective": 0.16261467337608337, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.09375804662704468, "step": 3250 }, { "epoch": 6.140765233821446, "eval_dpo_loss": 0.7078864574432373, "eval_logits": -1.1883435249328613, "eval_logps": -93.16507720947266, "eval_loss": 0.34374916553497314, "eval_objective": 0.34232527017593384, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.52173912525177, "eval_regularize": 0.27153661847114563, "eval_runtime": 159.3838, "eval_samples_per_second": 36.327, "eval_steps_per_second": 3.03, "step": 3250 }, { "dpo_loss": 0.6872513294219971, "epoch": 6.150212564950402, "grad_norm": 70.56196265247418, "learning_rate": 1.9494430953541719e-07, "logits": -1.215637445449829, "logps": -87.37667083740234, "loss": 0.1434, "objective": 0.13861486315727234, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.06988973170518875, "step": 3255 }, { "dpo_loss": 0.6876817941665649, "epoch": 6.159659896079358, "grad_norm": 67.32751350607244, "learning_rate": 1.9414003563044401e-07, "logits": -1.2061582803726196, "logps": -87.83901977539062, "loss": 0.1417, "objective": 0.15059073269367218, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.08182253688573837, "step": 3260 }, { "dpo_loss": 0.6937980651855469, "epoch": 6.169107227208314, "grad_norm": 78.19000031864657, "learning_rate": 1.9333636978157363e-07, "logits": -1.1729189157485962, "logps": -88.02546691894531, "loss": 0.1467, "objective": 0.1545541137456894, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.543749988079071, "regularize": 0.08517428487539291, "step": 3265 }, { "dpo_loss": 0.6834089756011963, "epoch": 6.17855455833727, "grad_norm": 65.86655539594295, "learning_rate": 1.9253332073700193e-07, "logits": -1.2429250478744507, "logps": -87.91304016113281, "loss": 0.1404, "objective": 0.14111605286598206, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.07277516275644302, "step": 3270 }, { "dpo_loss": 0.6903216242790222, "epoch": 6.188001889466226, "grad_norm": 73.07389560054459, "learning_rate": 1.9173089723821087e-07, "logits": -1.1890922784805298, "logps": -89.6094970703125, "loss": 0.1426, "objective": 0.14552678167819977, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.07649461925029755, "step": 3275 }, { "dpo_loss": 0.6901880502700806, "epoch": 6.197449220595182, "grad_norm": 69.12858727264873, "learning_rate": 1.9092910801987324e-07, "logits": -1.1704219579696655, "logps": -87.91764831542969, "loss": 0.147, "objective": 0.14026370644569397, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.07124490290880203, "step": 3280 }, { "dpo_loss": 0.6826928853988647, "epoch": 6.206896551724138, "grad_norm": 66.9720371973547, "learning_rate": 1.9012796180975726e-07, "logits": -1.1560781002044678, "logps": -89.125732421875, "loss": 0.144, "objective": 0.13750192523002625, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.06923265755176544, "step": 3285 }, { "dpo_loss": 0.684235155582428, "epoch": 6.216343882853094, "grad_norm": 78.42657030667156, "learning_rate": 1.8932746732863196e-07, "logits": -1.2505360841751099, "logps": -87.80974578857422, "loss": 0.1487, "objective": 0.15419574081897736, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.53125, "regularize": 0.08577221632003784, "step": 3290 }, { "dpo_loss": 0.6891018152236938, "epoch": 6.22579121398205, "grad_norm": 69.4336784183768, "learning_rate": 1.8852763329017186e-07, "logits": -1.182964563369751, "logps": -87.61400604248047, "loss": 0.1426, "objective": 0.14548328518867493, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.07657310366630554, "step": 3295 }, { "dpo_loss": 0.6955987811088562, "epoch": 6.235238545111006, "grad_norm": 65.24436277654908, "learning_rate": 1.877284684008625e-07, "logits": -1.2475354671478271, "logps": -86.30474090576172, "loss": 0.1408, "objective": 0.13629359006881714, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.0667337030172348, "step": 3300 }, { "epoch": 6.235238545111006, "eval_dpo_loss": 0.7074025273323059, "eval_logits": -1.182108998298645, "eval_logps": -93.40288543701172, "eval_loss": 0.34269240498542786, "eval_objective": 0.3404931426048279, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.26975294947624207, "eval_runtime": 157.5057, "eval_samples_per_second": 36.761, "eval_steps_per_second": 3.067, "step": 3300 }, { "dpo_loss": 0.6913751363754272, "epoch": 6.244685876239962, "grad_norm": 70.40922944532815, "learning_rate": 1.8692998135990552e-07, "logits": -1.1527745723724365, "logps": -88.61917114257812, "loss": 0.1391, "objective": 0.14121171832084656, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.4937500059604645, "regularize": 0.07207418978214264, "step": 3305 }, { "dpo_loss": 0.6933282613754272, "epoch": 6.254133207368918, "grad_norm": 64.18766718774896, "learning_rate": 1.8613218085912363e-07, "logits": -1.2052415609359741, "logps": -87.96755981445312, "loss": 0.1439, "objective": 0.14554929733276367, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.07621648907661438, "step": 3310 }, { "dpo_loss": 0.6829043030738831, "epoch": 6.263580538497874, "grad_norm": 66.70699808027437, "learning_rate": 1.8533507558286666e-07, "logits": -1.1629126071929932, "logps": -85.55716705322266, "loss": 0.1414, "objective": 0.13286006450653076, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.0645696222782135, "step": 3315 }, { "dpo_loss": 0.6907526254653931, "epoch": 6.27302786962683, "grad_norm": 64.95029785368621, "learning_rate": 1.8453867420791635e-07, "logits": -1.217167854309082, "logps": -87.27310943603516, "loss": 0.1431, "objective": 0.13572126626968384, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.06664600968360901, "step": 3320 }, { "dpo_loss": 0.6905287504196167, "epoch": 6.282475200755787, "grad_norm": 63.119014952755904, "learning_rate": 1.8374298540339256e-07, "logits": -1.142101526260376, "logps": -86.95668029785156, "loss": 0.1372, "objective": 0.13219018280506134, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.06313730776309967, "step": 3325 }, { "dpo_loss": 0.6907231211662292, "epoch": 6.291922531884743, "grad_norm": 70.32560354245551, "learning_rate": 1.8294801783065828e-07, "logits": -1.243328332901001, "logps": -89.5649642944336, "loss": 0.1386, "objective": 0.1380024254322052, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.06893011927604675, "step": 3330 }, { "dpo_loss": 0.6874068975448608, "epoch": 6.301369863013699, "grad_norm": 71.44456692823061, "learning_rate": 1.8215378014322557e-07, "logits": -1.1567237377166748, "logps": -88.77133178710938, "loss": 0.1371, "objective": 0.1324915885925293, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.06375088542699814, "step": 3335 }, { "dpo_loss": 0.6807462573051453, "epoch": 6.310817194142655, "grad_norm": 68.66265924559801, "learning_rate": 1.8136028098666187e-07, "logits": -1.1961827278137207, "logps": -88.57237243652344, "loss": 0.1372, "objective": 0.1379082351922989, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.543749988079071, "regularize": 0.06983359158039093, "step": 3340 }, { "dpo_loss": 0.6893335580825806, "epoch": 6.320264525271611, "grad_norm": 70.97693865652336, "learning_rate": 1.8056752899849503e-07, "logits": -1.1134297847747803, "logps": -86.894287109375, "loss": 0.1414, "objective": 0.1386771947145462, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.45625001192092896, "regularize": 0.06974383443593979, "step": 3345 }, { "dpo_loss": 0.6851335167884827, "epoch": 6.329711856400567, "grad_norm": 67.57923187365041, "learning_rate": 1.7977553280811975e-07, "logits": -1.1563271284103394, "logps": -88.98851013183594, "loss": 0.1475, "objective": 0.1491604745388031, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.08064713329076767, "step": 3350 }, { "epoch": 6.329711856400567, "eval_dpo_loss": 0.7078341841697693, "eval_logits": -1.1855790615081787, "eval_logps": -93.60319519042969, "eval_loss": 0.34008708596229553, "eval_objective": 0.3383205533027649, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 0.2675371468067169, "eval_runtime": 157.0694, "eval_samples_per_second": 36.863, "eval_steps_per_second": 3.075, "step": 3350 }, { "dpo_loss": 0.686167299747467, "epoch": 6.339159187529523, "grad_norm": 69.56892331213925, "learning_rate": 1.7898430103670373e-07, "logits": -1.1535394191741943, "logps": -87.93611907958984, "loss": 0.1388, "objective": 0.13828575611114502, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.06966903805732727, "step": 3355 }, { "dpo_loss": 0.6901485323905945, "epoch": 6.348606518658479, "grad_norm": 70.11794207823513, "learning_rate": 1.7819384229709355e-07, "logits": -1.162483811378479, "logps": -88.49964904785156, "loss": 0.1419, "objective": 0.14245642721652985, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.46875, "regularize": 0.07344158738851547, "step": 3360 }, { "dpo_loss": 0.6842073798179626, "epoch": 6.358053849787435, "grad_norm": 72.89111653782912, "learning_rate": 1.7740416519372126e-07, "logits": -1.1979695558547974, "logps": -87.9147720336914, "loss": 0.1353, "objective": 0.14146491885185242, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.07304418087005615, "step": 3365 }, { "dpo_loss": 0.6911894083023071, "epoch": 6.367501180916391, "grad_norm": 64.23327954770339, "learning_rate": 1.7661527832251023e-07, "logits": -1.2230565547943115, "logps": -88.17019653320312, "loss": 0.142, "objective": 0.14444653689861298, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4312500059604645, "regularize": 0.07532760500907898, "step": 3370 }, { "dpo_loss": 0.6906020641326904, "epoch": 6.376948512045347, "grad_norm": 68.69602710927053, "learning_rate": 1.7582719027078206e-07, "logits": -1.212783932685852, "logps": -87.7943344116211, "loss": 0.137, "objective": 0.13813506066799164, "ranking_idealized": 0.6312500238418579, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.06907486170530319, "step": 3375 }, { "dpo_loss": 0.6869951486587524, "epoch": 6.386395843174303, "grad_norm": 68.40435028508888, "learning_rate": 1.7503990961716303e-07, "logits": -1.1598793268203735, "logps": -86.40039825439453, "loss": 0.1404, "objective": 0.1395477056503296, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.07084819674491882, "step": 3380 }, { "dpo_loss": 0.6884499788284302, "epoch": 6.395843174303259, "grad_norm": 64.66069456653081, "learning_rate": 1.7425344493149025e-07, "logits": -1.162398338317871, "logps": -88.71234130859375, "loss": 0.1368, "objective": 0.13811743259429932, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.06927243620157242, "step": 3385 }, { "dpo_loss": 0.6859569549560547, "epoch": 6.405290505432215, "grad_norm": 66.24542313878766, "learning_rate": 1.7346780477471897e-07, "logits": -1.166072130203247, "logps": -87.08891296386719, "loss": 0.1329, "objective": 0.13712583482265472, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.06853015720844269, "step": 3390 }, { "dpo_loss": 0.6902660131454468, "epoch": 6.414737836561171, "grad_norm": 65.71480604448934, "learning_rate": 1.7268299769882905e-07, "logits": -1.1450494527816772, "logps": -87.38763427734375, "loss": 0.135, "objective": 0.136188805103302, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.06716220825910568, "step": 3395 }, { "dpo_loss": 0.6831803321838379, "epoch": 6.424185167690127, "grad_norm": 74.21086801419243, "learning_rate": 1.7189903224673205e-07, "logits": -1.257408857345581, "logps": -90.4032974243164, "loss": 0.1339, "objective": 0.13736803829669952, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.06904999911785126, "step": 3400 }, { "epoch": 6.424185167690127, "eval_dpo_loss": 0.7081868648529053, "eval_logits": -1.1891323328018188, "eval_logps": -93.52287292480469, "eval_loss": 0.34146103262901306, "eval_objective": 0.34015777707099915, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5212215185165405, "eval_regularize": 0.26933905482292175, "eval_runtime": 162.0893, "eval_samples_per_second": 35.721, "eval_steps_per_second": 2.98, "step": 3400 }, { "dpo_loss": 0.6947253942489624, "epoch": 6.433632498819083, "grad_norm": 63.6629594252536, "learning_rate": 1.7111591695217803e-07, "logits": -1.2772033214569092, "logps": -88.42134857177734, "loss": 0.1367, "objective": 0.13918960094451904, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.06971704959869385, "step": 3405 }, { "dpo_loss": 0.6906223297119141, "epoch": 6.44307982994804, "grad_norm": 69.0498878453693, "learning_rate": 1.7033366033966273e-07, "logits": -1.1488436460494995, "logps": -87.87260437011719, "loss": 0.137, "objective": 0.1409592628479004, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.07189702987670898, "step": 3410 }, { "dpo_loss": 0.6856287121772766, "epoch": 6.452527161076996, "grad_norm": 72.65038733274254, "learning_rate": 1.6955227092433511e-07, "logits": -1.1925437450408936, "logps": -88.14703369140625, "loss": 0.1418, "objective": 0.13947254419326782, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5, "regularize": 0.07090966403484344, "step": 3415 }, { "dpo_loss": 0.6915432214736938, "epoch": 6.461974492205952, "grad_norm": 68.73931631536732, "learning_rate": 1.6877175721190413e-07, "logits": -1.2278176546096802, "logps": -90.1395263671875, "loss": 0.1422, "objective": 0.14660508930683136, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4375, "regularize": 0.07745076715946198, "step": 3420 }, { "dpo_loss": 0.6973224878311157, "epoch": 6.471421823334908, "grad_norm": 64.82032182916474, "learning_rate": 1.679921276985464e-07, "logits": -1.1448619365692139, "logps": -86.83024597167969, "loss": 0.1369, "objective": 0.1359107941389084, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.06617854535579681, "step": 3425 }, { "dpo_loss": 0.6940586566925049, "epoch": 6.480869154463864, "grad_norm": 66.38959885778102, "learning_rate": 1.672133908708138e-07, "logits": -1.1017616987228394, "logps": -85.83534240722656, "loss": 0.1311, "objective": 0.13164441287517548, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.062238551676273346, "step": 3430 }, { "dpo_loss": 0.6849575042724609, "epoch": 6.49031648559282, "grad_norm": 62.103826503006154, "learning_rate": 1.6643555520554098e-07, "logits": -1.2360210418701172, "logps": -88.12874603271484, "loss": 0.1367, "objective": 0.13433274626731873, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.606249988079071, "regularize": 0.06583701074123383, "step": 3435 }, { "dpo_loss": 0.6873617172241211, "epoch": 6.499763816721776, "grad_norm": 68.82750339484012, "learning_rate": 1.6565862916975307e-07, "logits": -1.160133719444275, "logps": -86.71800231933594, "loss": 0.1419, "objective": 0.14208652079105377, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.07335034757852554, "step": 3440 }, { "dpo_loss": 0.6891940236091614, "epoch": 6.509211147850732, "grad_norm": 74.40728288667519, "learning_rate": 1.648826212205735e-07, "logits": -1.2320117950439453, "logps": -88.15089416503906, "loss": 0.14, "objective": 0.1433391571044922, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.07441975176334381, "step": 3445 }, { "dpo_loss": 0.6927613615989685, "epoch": 6.518658478979688, "grad_norm": 66.771754638497, "learning_rate": 1.6410753980513208e-07, "logits": -1.1595888137817383, "logps": -89.47431945800781, "loss": 0.1394, "objective": 0.14031726121902466, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.07104112207889557, "step": 3450 }, { "epoch": 6.518658478979688, "eval_dpo_loss": 0.7083378434181213, "eval_logits": -1.1958953142166138, "eval_logps": -94.05181884765625, "eval_loss": 0.3397524952888489, "eval_objective": 0.33791953325271606, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5186335444450378, "eval_regularize": 0.2670857906341553, "eval_runtime": 155.9657, "eval_samples_per_second": 37.124, "eval_steps_per_second": 3.097, "step": 3450 }, { "dpo_loss": 0.6850086450576782, "epoch": 6.528105810108644, "grad_norm": 70.24192103411116, "learning_rate": 1.633333933604731e-07, "logits": -1.2264412641525269, "logps": -90.35914611816406, "loss": 0.1381, "objective": 0.13573698699474335, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.06723614037036896, "step": 3455 }, { "dpo_loss": 0.6905264854431152, "epoch": 6.5375531412376, "grad_norm": 66.32091148253436, "learning_rate": 1.6256019031346301e-07, "logits": -1.188407301902771, "logps": -86.26610565185547, "loss": 0.1352, "objective": 0.1351533830165863, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.06610073894262314, "step": 3460 }, { "dpo_loss": 0.6922488212585449, "epoch": 6.547000472366556, "grad_norm": 69.0551628405319, "learning_rate": 1.6178793908069938e-07, "logits": -1.2067480087280273, "logps": -89.35780334472656, "loss": 0.1331, "objective": 0.1331738531589508, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.06394897401332855, "step": 3465 }, { "dpo_loss": 0.6851745843887329, "epoch": 6.556447803495512, "grad_norm": 68.33813577635021, "learning_rate": 1.6101664806841857e-07, "logits": -1.151658296585083, "logps": -88.51567077636719, "loss": 0.135, "objective": 0.13486088812351227, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.06634342670440674, "step": 3470 }, { "dpo_loss": 0.6871123909950256, "epoch": 6.565895134624468, "grad_norm": 63.68925248136598, "learning_rate": 1.60246325672405e-07, "logits": -1.2574050426483154, "logps": -88.74208068847656, "loss": 0.131, "objective": 0.13215181231498718, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.06344055384397507, "step": 3475 }, { "dpo_loss": 0.6891454458236694, "epoch": 6.575342465753424, "grad_norm": 65.67103831685853, "learning_rate": 1.59476980277899e-07, "logits": -1.193644404411316, "logps": -87.88640594482422, "loss": 0.1312, "objective": 0.1272941380739212, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.05837959796190262, "step": 3480 }, { "dpo_loss": 0.68227618932724, "epoch": 6.584789796882381, "grad_norm": 69.3694590300883, "learning_rate": 1.5870862025950595e-07, "logits": -1.2936718463897705, "logps": -87.17106628417969, "loss": 0.1321, "objective": 0.12874533236026764, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5625, "regularize": 0.06051770970225334, "step": 3485 }, { "dpo_loss": 0.6884613037109375, "epoch": 6.594237128011336, "grad_norm": 66.43795002581034, "learning_rate": 1.579412539811053e-07, "logits": -1.232988715171814, "logps": -89.5428695678711, "loss": 0.1353, "objective": 0.13002143800258636, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.061175305396318436, "step": 3490 }, { "dpo_loss": 0.6869865655899048, "epoch": 6.603684459140293, "grad_norm": 67.64239996395119, "learning_rate": 1.571748897957591e-07, "logits": -1.1515171527862549, "logps": -87.28607940673828, "loss": 0.1296, "objective": 0.12666872143745422, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.05797005817294121, "step": 3495 }, { "dpo_loss": 0.6819984316825867, "epoch": 6.613131790269249, "grad_norm": 68.22980092850779, "learning_rate": 1.5640953604562113e-07, "logits": -1.1667726039886475, "logps": -87.58833312988281, "loss": 0.1324, "objective": 0.13020966947078705, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.46875, "regularize": 0.06200982257723808, "step": 3500 }, { "epoch": 6.613131790269249, "eval_dpo_loss": 0.707497775554657, "eval_logits": -1.1836313009262085, "eval_logps": -93.94660949707031, "eval_loss": 0.34014692902565, "eval_objective": 0.33894920349121094, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 0.26819944381713867, "eval_runtime": 155.8735, "eval_samples_per_second": 37.145, "eval_steps_per_second": 3.099, "step": 3500 }, { "dpo_loss": 0.6908973455429077, "epoch": 6.622579121398205, "grad_norm": 72.27666665513064, "learning_rate": 1.5564520106184643e-07, "logits": -1.1951795816421509, "logps": -89.80879211425781, "loss": 0.1314, "objective": 0.12301365286111832, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48750001192092896, "regularize": 0.05392392352223396, "step": 3505 }, { "dpo_loss": 0.68874591588974, "epoch": 6.632026452527161, "grad_norm": 70.12917103624659, "learning_rate": 1.5488189316450018e-07, "logits": -1.3211259841918945, "logps": -87.243896484375, "loss": 0.1322, "objective": 0.13392779231071472, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.06505317986011505, "step": 3510 }, { "dpo_loss": 0.6867557168006897, "epoch": 6.641473783656117, "grad_norm": 67.28560339736524, "learning_rate": 1.5411962066246765e-07, "logits": -1.1426551342010498, "logps": -87.66487121582031, "loss": 0.1324, "objective": 0.12283537536859512, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.46875, "regularize": 0.05415979027748108, "step": 3515 }, { "dpo_loss": 0.6930004954338074, "epoch": 6.650921114785073, "grad_norm": 71.83681748999514, "learning_rate": 1.533583918533631e-07, "logits": -1.1357409954071045, "logps": -89.56889343261719, "loss": 0.1353, "objective": 0.13924241065979004, "ranking_idealized": 0.6875, "ranking_idealized_expo": 0.6499999761581421, "ranking_simple": 0.6499999761581421, "regularize": 0.06994234025478363, "step": 3520 }, { "dpo_loss": 0.6901943683624268, "epoch": 6.660368445914029, "grad_norm": 63.771756125302865, "learning_rate": 1.5259821502344004e-07, "logits": -1.1956990957260132, "logps": -86.81988525390625, "loss": 0.1325, "objective": 0.13418355584144592, "ranking_idealized": 0.4437499940395355, "ranking_idealized_expo": 0.4124999940395355, "ranking_simple": 0.41874998807907104, "regularize": 0.06516411155462265, "step": 3525 }, { "dpo_loss": 0.68889981508255, "epoch": 6.669815777042985, "grad_norm": 65.86146652055191, "learning_rate": 1.5183909844750085e-07, "logits": -1.2661304473876953, "logps": -88.42591857910156, "loss": 0.1301, "objective": 0.13600081205368042, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.543749988079071, "regularize": 0.0671108216047287, "step": 3530 }, { "dpo_loss": 0.6970779895782471, "epoch": 6.679263108171941, "grad_norm": 73.55635902035519, "learning_rate": 1.510810503888064e-07, "logits": -1.1741832494735718, "logps": -89.119873046875, "loss": 0.1362, "objective": 0.13686904311180115, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48124998807907104, "regularize": 0.06716124713420868, "step": 3535 }, { "dpo_loss": 0.690667986869812, "epoch": 6.688710439300897, "grad_norm": 68.09799154479465, "learning_rate": 1.503240790989867e-07, "logits": -1.2058817148208618, "logps": -89.58091735839844, "loss": 0.1327, "objective": 0.13466203212738037, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.06559522449970245, "step": 3540 }, { "dpo_loss": 0.6922726035118103, "epoch": 6.698157770429853, "grad_norm": 71.11909965177945, "learning_rate": 1.4956819281795038e-07, "logits": -1.1305687427520752, "logps": -87.77249145507812, "loss": 0.1278, "objective": 0.1287274956703186, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5249999761581421, "regularize": 0.059500228613615036, "step": 3545 }, { "dpo_loss": 0.6947690844535828, "epoch": 6.707605101558809, "grad_norm": 69.61974547667347, "learning_rate": 1.4881339977379564e-07, "logits": -1.199467420578003, "logps": -89.92427825927734, "loss": 0.1385, "objective": 0.14292028546333313, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.07344337552785873, "step": 3550 }, { "epoch": 6.707605101558809, "eval_dpo_loss": 0.707977294921875, "eval_logits": -1.1866289377212524, "eval_logps": -93.62445831298828, "eval_loss": 0.34494805335998535, "eval_objective": 0.3436737358570099, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.27287599444389343, "eval_runtime": 156.8242, "eval_samples_per_second": 36.92, "eval_steps_per_second": 3.08, "step": 3550 }, { "dpo_loss": 0.6931701898574829, "epoch": 6.717052432687765, "grad_norm": 70.05227121284707, "learning_rate": 1.480597081827203e-07, "logits": -1.2326997518539429, "logps": -88.47614288330078, "loss": 0.1266, "objective": 0.12746387720108032, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.05814685299992561, "step": 3555 }, { "dpo_loss": 0.6866840124130249, "epoch": 6.726499763816721, "grad_norm": 65.862462651802, "learning_rate": 1.473071262489322e-07, "logits": -1.1386873722076416, "logps": -88.5678482055664, "loss": 0.1302, "objective": 0.13396327197551727, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.48124998807907104, "regularize": 0.06529487669467926, "step": 3560 }, { "dpo_loss": 0.6916936635971069, "epoch": 6.7359470949456774, "grad_norm": 70.79260627546033, "learning_rate": 1.465556621645607e-07, "logits": -1.1299726963043213, "logps": -86.30937194824219, "loss": 0.1267, "objective": 0.1276959329843521, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.05852656438946724, "step": 3565 }, { "dpo_loss": 0.6887668371200562, "epoch": 6.745394426074634, "grad_norm": 69.13953631290167, "learning_rate": 1.4580532410956658e-07, "logits": -1.2006984949111938, "logps": -88.484130859375, "loss": 0.132, "objective": 0.13525152206420898, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.06637485325336456, "step": 3570 }, { "dpo_loss": 0.6862061619758606, "epoch": 6.75484175720359, "grad_norm": 69.38531825951776, "learning_rate": 1.4505612025165347e-07, "logits": -1.2285568714141846, "logps": -89.47730255126953, "loss": 0.1287, "objective": 0.1284267008304596, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.05980608984827995, "step": 3575 }, { "dpo_loss": 0.6938058137893677, "epoch": 6.764289088332546, "grad_norm": 73.48578648094887, "learning_rate": 1.4430805874617882e-07, "logits": -1.1846771240234375, "logps": -87.36907196044922, "loss": 0.1292, "objective": 0.13662242889404297, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.06724182516336441, "step": 3580 }, { "dpo_loss": 0.6926447153091431, "epoch": 6.773736419461502, "grad_norm": 70.9810049729385, "learning_rate": 1.4356114773606515e-07, "logits": -1.260349988937378, "logps": -89.3043212890625, "loss": 0.133, "objective": 0.12815634906291962, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.05889187008142471, "step": 3585 }, { "dpo_loss": 0.6904715299606323, "epoch": 6.783183750590458, "grad_norm": 68.29513245816993, "learning_rate": 1.4281539535171138e-07, "logits": -1.1519930362701416, "logps": -87.79924011230469, "loss": 0.129, "objective": 0.12700645625591278, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.574999988079071, "regularize": 0.05795930698513985, "step": 3590 }, { "dpo_loss": 0.6834983825683594, "epoch": 6.792631081719414, "grad_norm": 67.53864839379757, "learning_rate": 1.420708097109047e-07, "logits": -1.2150757312774658, "logps": -87.30172729492188, "loss": 0.1318, "objective": 0.1265515387058258, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.46875, "regularize": 0.058201707899570465, "step": 3595 }, { "dpo_loss": 0.6890872716903687, "epoch": 6.8020784128483704, "grad_norm": 63.489388278693234, "learning_rate": 1.4132739891873124e-07, "logits": -1.1841628551483154, "logps": -87.87871551513672, "loss": 0.1289, "objective": 0.12770512700080872, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.05879638344049454, "step": 3600 }, { "epoch": 6.8020784128483704, "eval_dpo_loss": 0.708841860294342, "eval_logits": -1.1858102083206177, "eval_logps": -93.84819793701172, "eval_loss": 0.3432690501213074, "eval_objective": 0.341215580701828, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 0.27033141255378723, "eval_runtime": 156.5964, "eval_samples_per_second": 36.974, "eval_steps_per_second": 3.084, "step": 3600 }, { "dpo_loss": 0.6896633505821228, "epoch": 6.8115257439773265, "grad_norm": 66.40829611486825, "learning_rate": 1.4058517106748915e-07, "logits": -1.1672431230545044, "logps": -87.52486419677734, "loss": 0.1277, "objective": 0.1254403442144394, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.05647401884198189, "step": 3605 }, { "dpo_loss": 0.6861889958381653, "epoch": 6.8209730751062825, "grad_norm": 72.87149287628974, "learning_rate": 1.398441342365994e-07, "logits": -1.257767915725708, "logps": -88.41429138183594, "loss": 0.1232, "objective": 0.12373647838830948, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.055117569863796234, "step": 3610 }, { "dpo_loss": 0.6913567781448364, "epoch": 6.8304204062352385, "grad_norm": 73.82859446285208, "learning_rate": 1.391042964925183e-07, "logits": -1.2193915843963623, "logps": -89.33489227294922, "loss": 0.127, "objective": 0.12400223314762115, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5625, "regularize": 0.05486655235290527, "step": 3615 }, { "dpo_loss": 0.6929974555969238, "epoch": 6.8398677373641945, "grad_norm": 66.78663032939671, "learning_rate": 1.3836566588865e-07, "logits": -1.1925121545791626, "logps": -88.87205505371094, "loss": 0.1268, "objective": 0.12733788788318634, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.581250011920929, "regularize": 0.058038126677274704, "step": 3620 }, { "dpo_loss": 0.6925061345100403, "epoch": 6.8493150684931505, "grad_norm": 68.91716941519066, "learning_rate": 1.3762825046525802e-07, "logits": -1.1834360361099243, "logps": -88.37583923339844, "loss": 0.1246, "objective": 0.1241520419716835, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.05490143224596977, "step": 3625 }, { "dpo_loss": 0.6956363916397095, "epoch": 6.8587623996221065, "grad_norm": 64.49188844038778, "learning_rate": 1.3689205824937855e-07, "logits": -1.1987342834472656, "logps": -86.22975158691406, "loss": 0.1271, "objective": 0.12109167873859406, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4312500059604645, "regularize": 0.05152803659439087, "step": 3630 }, { "dpo_loss": 0.6963600516319275, "epoch": 6.868209730751063, "grad_norm": 70.9120124443984, "learning_rate": 1.361570972547324e-07, "logits": -1.1320016384124756, "logps": -89.20863342285156, "loss": 0.1295, "objective": 0.13553881645202637, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5, "regularize": 0.06590281426906586, "step": 3635 }, { "dpo_loss": 0.6899456977844238, "epoch": 6.877657061880019, "grad_norm": 67.98797208533495, "learning_rate": 1.3542337548163854e-07, "logits": -1.1944100856781006, "logps": -90.85575866699219, "loss": 0.1297, "objective": 0.1339595913887024, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.06496501713991165, "step": 3640 }, { "dpo_loss": 0.6944170594215393, "epoch": 6.887104393008975, "grad_norm": 68.58400023807916, "learning_rate": 1.3469090091692606e-07, "logits": -1.2223891019821167, "logps": -90.43609619140625, "loss": 0.1277, "objective": 0.12617693841457367, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.05673524737358093, "step": 3645 }, { "dpo_loss": 0.6873482465744019, "epoch": 6.896551724137931, "grad_norm": 69.87269982347813, "learning_rate": 1.3395968153384818e-07, "logits": -1.1659064292907715, "logps": -87.1473159790039, "loss": 0.1272, "objective": 0.12711526453495026, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.05838043615221977, "step": 3650 }, { "epoch": 6.896551724137931, "eval_dpo_loss": 0.7080458998680115, "eval_logits": -1.1978520154953003, "eval_logps": -93.93708038330078, "eval_loss": 0.34308624267578125, "eval_objective": 0.3417006731033325, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2708960771560669, "eval_runtime": 155.0927, "eval_samples_per_second": 37.333, "eval_steps_per_second": 3.114, "step": 3650 }, { "dpo_loss": 0.6883994340896606, "epoch": 6.9059990552668875, "grad_norm": 66.06237659948253, "learning_rate": 1.3322972529199472e-07, "logits": -1.1440255641937256, "logps": -89.079345703125, "loss": 0.132, "objective": 0.1393580138683319, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.07051806151866913, "step": 3655 }, { "dpo_loss": 0.6803470849990845, "epoch": 6.9154463863958435, "grad_norm": 68.89901994571194, "learning_rate": 1.3250104013720577e-07, "logits": -1.0780181884765625, "logps": -89.1418685913086, "loss": 0.1257, "objective": 0.13033224642276764, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.06229754164814949, "step": 3660 }, { "dpo_loss": 0.6918571591377258, "epoch": 6.9248937175247995, "grad_norm": 70.23465718773171, "learning_rate": 1.3177363400148521e-07, "logits": -1.2739157676696777, "logps": -88.82063293457031, "loss": 0.1235, "objective": 0.12518411874771118, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.05599840357899666, "step": 3665 }, { "dpo_loss": 0.6917076110839844, "epoch": 6.934341048653756, "grad_norm": 71.12782015732581, "learning_rate": 1.3104751480291448e-07, "logits": -1.1866590976715088, "logps": -89.13645935058594, "loss": 0.1219, "objective": 0.12391819804906845, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.054747432470321655, "step": 3670 }, { "dpo_loss": 0.6851991415023804, "epoch": 6.943788379782712, "grad_norm": 63.64952176646913, "learning_rate": 1.30322690445566e-07, "logits": -1.2462929487228394, "logps": -88.42264556884766, "loss": 0.1252, "objective": 0.125773623585701, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.05725371837615967, "step": 3675 }, { "dpo_loss": 0.6953834295272827, "epoch": 6.953235710911668, "grad_norm": 66.21002613526767, "learning_rate": 1.2959916881941755e-07, "logits": -1.2305362224578857, "logps": -88.08800506591797, "loss": 0.1264, "objective": 0.12478785216808319, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.46875, "regularize": 0.05524950474500656, "step": 3680 }, { "dpo_loss": 0.6868912577629089, "epoch": 6.962683042040624, "grad_norm": 69.20229018358609, "learning_rate": 1.2887695780026614e-07, "logits": -1.263931393623352, "logps": -86.4261703491211, "loss": 0.1234, "objective": 0.11907543987035751, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.05038632079958916, "step": 3685 }, { "dpo_loss": 0.6869106888771057, "epoch": 6.97213037316958, "grad_norm": 65.75821684775042, "learning_rate": 1.2815606524964218e-07, "logits": -1.2265689373016357, "logps": -87.24744415283203, "loss": 0.1259, "objective": 0.12614509463310242, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.057454027235507965, "step": 3690 }, { "dpo_loss": 0.6816592216491699, "epoch": 6.981577704298536, "grad_norm": 68.4662770532532, "learning_rate": 1.2743649901472446e-07, "logits": -1.1392152309417725, "logps": -87.72526550292969, "loss": 0.1278, "objective": 0.13349346816539764, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.06532754749059677, "step": 3695 }, { "dpo_loss": 0.6968778371810913, "epoch": 6.991025035427492, "grad_norm": 68.16552756051469, "learning_rate": 1.2671826692825403e-07, "logits": -1.206505537033081, "logps": -90.31403350830078, "loss": 0.125, "objective": 0.1260785311460495, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.056390754878520966, "step": 3700 }, { "epoch": 6.991025035427492, "eval_dpo_loss": 0.707888126373291, "eval_logits": -1.1951963901519775, "eval_logps": -93.96659088134766, "eval_loss": 0.34364402294158936, "eval_objective": 0.34248366951942444, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2716948688030243, "eval_runtime": 155.8519, "eval_samples_per_second": 37.151, "eval_steps_per_second": 3.099, "step": 3700 }, { "dpo_loss": 0.692703366279602, "epoch": 7.000472366556448, "grad_norm": 65.88227214495035, "learning_rate": 1.2600137680844928e-07, "logits": -1.3077120780944824, "logps": -89.81864166259766, "loss": 0.1237, "objective": 0.12311004102230072, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.05383969470858574, "step": 3705 }, { "dpo_loss": 0.691459059715271, "epoch": 7.009919697685404, "grad_norm": 69.68883276511424, "learning_rate": 1.2528583645892088e-07, "logits": -1.3056930303573608, "logps": -87.62162780761719, "loss": 0.1187, "objective": 0.11696214973926544, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5562499761581421, "regularize": 0.04781625419855118, "step": 3710 }, { "dpo_loss": 0.688200831413269, "epoch": 7.01936702881436, "grad_norm": 65.12006779525382, "learning_rate": 1.245716536685866e-07, "logits": -1.1382930278778076, "logps": -88.21986389160156, "loss": 0.1191, "objective": 0.12016657739877701, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.606249988079071, "ranking_simple": 0.612500011920929, "regularize": 0.051346492022275925, "step": 3715 }, { "dpo_loss": 0.6875385046005249, "epoch": 7.028814359943316, "grad_norm": 66.59560630354164, "learning_rate": 1.2385883621158694e-07, "logits": -1.1439882516860962, "logps": -88.45091247558594, "loss": 0.1226, "objective": 0.12256159633398056, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.05380775406956673, "step": 3720 }, { "dpo_loss": 0.6896174550056458, "epoch": 7.038261691072272, "grad_norm": 63.44555622200776, "learning_rate": 1.2314739184720018e-07, "logits": -1.2115542888641357, "logps": -88.55656433105469, "loss": 0.1227, "objective": 0.1167091354727745, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5625, "regularize": 0.04774738475680351, "step": 3725 }, { "dpo_loss": 0.6908005475997925, "epoch": 7.047709022201228, "grad_norm": 69.3967153206809, "learning_rate": 1.2243732831975785e-07, "logits": -1.17701256275177, "logps": -89.33656311035156, "loss": 0.1219, "objective": 0.12276466190814972, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48750001192092896, "regularize": 0.05368459224700928, "step": 3730 }, { "dpo_loss": 0.6931012868881226, "epoch": 7.057156353330185, "grad_norm": 68.56316233493952, "learning_rate": 1.2172865335856064e-07, "logits": -1.2081263065338135, "logps": -88.94207000732422, "loss": 0.123, "objective": 0.12437693774700165, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.055066801607608795, "step": 3735 }, { "dpo_loss": 0.6907673478126526, "epoch": 7.066603684459141, "grad_norm": 74.9215388811123, "learning_rate": 1.2102137467779409e-07, "logits": -1.2592813968658447, "logps": -88.12895202636719, "loss": 0.1187, "objective": 0.11529743671417236, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.4937500059604645, "regularize": 0.046220697462558746, "step": 3740 }, { "dpo_loss": 0.6894332766532898, "epoch": 7.076051015588097, "grad_norm": 68.51635982925964, "learning_rate": 1.2031549997644498e-07, "logits": -1.2517545223236084, "logps": -89.99351501464844, "loss": 0.12, "objective": 0.12121138721704483, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.05226803943514824, "step": 3745 }, { "dpo_loss": 0.6855832934379578, "epoch": 7.085498346717053, "grad_norm": 65.23697097053963, "learning_rate": 1.1961103693821694e-07, "logits": -1.1581439971923828, "logps": -89.9802474975586, "loss": 0.1227, "objective": 0.12154228985309601, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.05298396199941635, "step": 3750 }, { "epoch": 7.085498346717053, "eval_dpo_loss": 0.7086306214332581, "eval_logits": -1.2021671533584595, "eval_logps": -93.87805938720703, "eval_loss": 0.34044116735458374, "eval_objective": 0.3382265567779541, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.2673634886741638, "eval_runtime": 154.6879, "eval_samples_per_second": 37.43, "eval_steps_per_second": 3.122, "step": 3750 }, { "dpo_loss": 0.6866408586502075, "epoch": 7.094945677846009, "grad_norm": 68.10588099468598, "learning_rate": 1.1890799323144749e-07, "logits": -1.2115185260772705, "logps": -88.48025512695312, "loss": 0.1226, "objective": 0.11928554624319077, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.050621457397937775, "step": 3755 }, { "dpo_loss": 0.6906393766403198, "epoch": 7.104393008974965, "grad_norm": 68.72119036287717, "learning_rate": 1.1820637650902387e-07, "logits": -1.2775551080703735, "logps": -88.83606719970703, "loss": 0.1236, "objective": 0.1245572417974472, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.0554933063685894, "step": 3760 }, { "dpo_loss": 0.688023030757904, "epoch": 7.113840340103921, "grad_norm": 68.91483050812526, "learning_rate": 1.1750619440830014e-07, "logits": -1.1680511236190796, "logps": -87.44700622558594, "loss": 0.1197, "objective": 0.11044274270534515, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.04164043813943863, "step": 3765 }, { "dpo_loss": 0.6897870302200317, "epoch": 7.123287671232877, "grad_norm": 67.85665764483112, "learning_rate": 1.1680745455101426e-07, "logits": -1.205358624458313, "logps": -87.68621826171875, "loss": 0.1174, "objective": 0.11713232100009918, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4312500059604645, "regularize": 0.048153601586818695, "step": 3770 }, { "dpo_loss": 0.6883377432823181, "epoch": 7.132735002361833, "grad_norm": 68.01990647906555, "learning_rate": 1.1611016454320452e-07, "logits": -1.2369829416275024, "logps": -87.67942810058594, "loss": 0.1203, "objective": 0.11703640222549438, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.581250011920929, "regularize": 0.04820261523127556, "step": 3775 }, { "dpo_loss": 0.691364586353302, "epoch": 7.142182333490789, "grad_norm": 67.10762501083026, "learning_rate": 1.1541433197512717e-07, "logits": -1.229856014251709, "logps": -88.16793060302734, "loss": 0.1202, "objective": 0.1137702614068985, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4749999940395355, "regularize": 0.04463379830121994, "step": 3780 }, { "dpo_loss": 0.6905776262283325, "epoch": 7.151629664619745, "grad_norm": 60.558726867867506, "learning_rate": 1.1471996442117374e-07, "logits": -1.2590320110321045, "logps": -89.91032409667969, "loss": 0.1163, "objective": 0.115619957447052, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.046562183648347855, "step": 3785 }, { "dpo_loss": 0.688174843788147, "epoch": 7.161076995748701, "grad_norm": 65.46106583390794, "learning_rate": 1.1402706943978843e-07, "logits": -1.2758677005767822, "logps": -88.68501281738281, "loss": 0.1142, "objective": 0.11551575362682343, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.512499988079071, "regularize": 0.04669825732707977, "step": 3790 }, { "dpo_loss": 0.6905801296234131, "epoch": 7.170524326877657, "grad_norm": 69.42634954648608, "learning_rate": 1.133356545733861e-07, "logits": -1.1488438844680786, "logps": -88.26316833496094, "loss": 0.1148, "objective": 0.11274246126413345, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.043684449046850204, "step": 3795 }, { "dpo_loss": 0.6877044439315796, "epoch": 7.179971658006613, "grad_norm": 79.2004419422603, "learning_rate": 1.1264572734827008e-07, "logits": -1.2306697368621826, "logps": -88.87214660644531, "loss": 0.1142, "objective": 0.11047129333019257, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4375, "regularize": 0.041700832545757294, "step": 3800 }, { "epoch": 7.179971658006613, "eval_dpo_loss": 0.7083066701889038, "eval_logits": -1.1874396800994873, "eval_logps": -93.82341766357422, "eval_loss": 0.3426133394241333, "eval_objective": 0.3419845700263977, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.520703911781311, "eval_regularize": 0.27115386724472046, "eval_runtime": 154.3413, "eval_samples_per_second": 37.514, "eval_steps_per_second": 3.129, "step": 3800 }, { "dpo_loss": 0.6943923234939575, "epoch": 7.189418989135569, "grad_norm": 67.50840725502906, "learning_rate": 1.1195729527454994e-07, "logits": -1.1638985872268677, "logps": -86.8692626953125, "loss": 0.1167, "objective": 0.1133107915520668, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.04387155547738075, "step": 3805 }, { "dpo_loss": 0.6871123313903809, "epoch": 7.198866320264525, "grad_norm": 71.05284807752426, "learning_rate": 1.1127036584606012e-07, "logits": -1.169498085975647, "logps": -87.57221984863281, "loss": 0.1189, "objective": 0.11854559183120728, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.04983435943722725, "step": 3810 }, { "dpo_loss": 0.689956784248352, "epoch": 7.208313651393482, "grad_norm": 72.85395631216751, "learning_rate": 1.1058494654027806e-07, "logits": -1.177872896194458, "logps": -89.75749206542969, "loss": 0.118, "objective": 0.12170775234699249, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.05271206423640251, "step": 3815 }, { "dpo_loss": 0.6926616430282593, "epoch": 7.217760982522438, "grad_norm": 68.4949492318128, "learning_rate": 1.0990104481824336e-07, "logits": -1.2210584878921509, "logps": -88.03923034667969, "loss": 0.1189, "objective": 0.12480070441961288, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.05553454905748367, "step": 3820 }, { "dpo_loss": 0.6882877349853516, "epoch": 7.227208313651394, "grad_norm": 72.17531652399471, "learning_rate": 1.0921866812447567e-07, "logits": -1.2541043758392334, "logps": -88.48805236816406, "loss": 0.1186, "objective": 0.11761631071567535, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.048787519335746765, "step": 3825 }, { "dpo_loss": 0.6862500905990601, "epoch": 7.23665564478035, "grad_norm": 63.810028727336274, "learning_rate": 1.0853782388689456e-07, "logits": -1.1824160814285278, "logps": -86.62850189208984, "loss": 0.1173, "objective": 0.12098387628793716, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.05235886573791504, "step": 3830 }, { "dpo_loss": 0.6879441738128662, "epoch": 7.246102975909306, "grad_norm": 64.9312849341968, "learning_rate": 1.0785851951673805e-07, "logits": -1.1976611614227295, "logps": -87.53474426269531, "loss": 0.1151, "objective": 0.11285368353128433, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5625, "regularize": 0.044059254229068756, "step": 3835 }, { "dpo_loss": 0.6905585527420044, "epoch": 7.255550307038262, "grad_norm": 70.35920534332588, "learning_rate": 1.0718076240848211e-07, "logits": -1.20999014377594, "logps": -86.69548797607422, "loss": 0.1152, "objective": 0.10998617112636566, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.04093032330274582, "step": 3840 }, { "dpo_loss": 0.6859630942344666, "epoch": 7.264997638167218, "grad_norm": 68.34159831161008, "learning_rate": 1.0650455993976021e-07, "logits": -1.181806206703186, "logps": -88.92317199707031, "loss": 0.1169, "objective": 0.11873508989810944, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.05013876408338547, "step": 3845 }, { "dpo_loss": 0.6961053013801575, "epoch": 7.274444969296174, "grad_norm": 73.44616251707711, "learning_rate": 1.0582991947128323e-07, "logits": -1.1553852558135986, "logps": -88.92807006835938, "loss": 0.1142, "objective": 0.11381669342517853, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.0442061573266983, "step": 3850 }, { "epoch": 7.274444969296174, "eval_dpo_loss": 0.7090203762054443, "eval_logits": -1.1775156259536743, "eval_logps": -93.68949890136719, "eval_loss": 0.34542912244796753, "eval_objective": 0.34420666098594666, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.27330461144447327, "eval_runtime": 155.6654, "eval_samples_per_second": 37.195, "eval_steps_per_second": 3.103, "step": 3850 }, { "dpo_loss": 0.6929150223731995, "epoch": 7.28389230042513, "grad_norm": 63.048191058037425, "learning_rate": 1.0515684834675884e-07, "logits": -1.17747962474823, "logps": -88.01960754394531, "loss": 0.1143, "objective": 0.11103460937738419, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.04174310341477394, "step": 3855 }, { "dpo_loss": 0.6904551982879639, "epoch": 7.293339631554086, "grad_norm": 66.45739287207125, "learning_rate": 1.0448535389281191e-07, "logits": -1.1651670932769775, "logps": -88.12651062011719, "loss": 0.1175, "objective": 0.11071532964706421, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.041669812053442, "step": 3860 }, { "dpo_loss": 0.6892693042755127, "epoch": 7.302786962683042, "grad_norm": 65.61604925434061, "learning_rate": 1.038154434189046e-07, "logits": -1.2562576532363892, "logps": -87.65116882324219, "loss": 0.1188, "objective": 0.11806901544332504, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.04914209246635437, "step": 3865 }, { "dpo_loss": 0.6871533393859863, "epoch": 7.312234293811998, "grad_norm": 65.23941905124475, "learning_rate": 1.0314712421725707e-07, "logits": -1.237535834312439, "logps": -89.5743637084961, "loss": 0.1154, "objective": 0.1144314780831337, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.045716144144535065, "step": 3870 }, { "dpo_loss": 0.6926724910736084, "epoch": 7.321681624940954, "grad_norm": 66.65320725431135, "learning_rate": 1.0248040356276785e-07, "logits": -1.2694696187973022, "logps": -91.21292877197266, "loss": 0.1145, "objective": 0.11743433773517609, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.04816708713769913, "step": 3875 }, { "dpo_loss": 0.688007652759552, "epoch": 7.33112895606991, "grad_norm": 67.38572150902526, "learning_rate": 1.0181528871293452e-07, "logits": -1.252357840538025, "logps": -87.56616973876953, "loss": 0.1152, "objective": 0.11621659994125366, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.047415841370821, "step": 3880 }, { "dpo_loss": 0.6883160471916199, "epoch": 7.340576287198866, "grad_norm": 69.25950898070047, "learning_rate": 1.0115178690777507e-07, "logits": -1.1671538352966309, "logps": -88.53242492675781, "loss": 0.1149, "objective": 0.11127232015132904, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.042440708726644516, "step": 3885 }, { "dpo_loss": 0.6874035000801086, "epoch": 7.350023618327822, "grad_norm": 65.20783339989941, "learning_rate": 1.004899053697487e-07, "logits": -1.2028082609176636, "logps": -89.45445251464844, "loss": 0.1126, "objective": 0.11188634485006332, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5, "regularize": 0.04314599931240082, "step": 3890 }, { "dpo_loss": 0.689424991607666, "epoch": 7.359470949456778, "grad_norm": 69.45795757708476, "learning_rate": 9.982965130367774e-08, "logits": -1.1444966793060303, "logps": -89.14888000488281, "loss": 0.1119, "objective": 0.11185325682163239, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.4625000059604645, "regularize": 0.042910750955343246, "step": 3895 }, { "dpo_loss": 0.685333251953125, "epoch": 7.368918280585735, "grad_norm": 70.7089984246962, "learning_rate": 9.917103189666864e-08, "logits": -1.2610085010528564, "logps": -89.09552764892578, "loss": 0.1128, "objective": 0.1101282611489296, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.574999988079071, "regularize": 0.04159492999315262, "step": 3900 }, { "epoch": 7.368918280585735, "eval_dpo_loss": 0.7082622647285461, "eval_logits": -1.183833360671997, "eval_logps": -94.05205535888672, "eval_loss": 0.34172287583351135, "eval_objective": 0.34058767557144165, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.26976147294044495, "eval_runtime": 154.5614, "eval_samples_per_second": 37.461, "eval_steps_per_second": 3.125, "step": 3900 }, { "dpo_loss": 0.6910353302955627, "epoch": 7.378365611714691, "grad_norm": 66.13991711330048, "learning_rate": 9.851405431803397e-08, "logits": -1.2042526006698608, "logps": -89.818603515625, "loss": 0.1143, "objective": 0.1198992133140564, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.05079569295048714, "step": 3905 }, { "dpo_loss": 0.6896607875823975, "epoch": 7.387812942843647, "grad_norm": 66.42398501362136, "learning_rate": 9.785872571921467e-08, "logits": -1.146799921989441, "logps": -87.97035217285156, "loss": 0.1125, "objective": 0.11261601746082306, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.043649934232234955, "step": 3910 }, { "dpo_loss": 0.6883934736251831, "epoch": 7.397260273972603, "grad_norm": 70.24223672772072, "learning_rate": 9.720505323370165e-08, "logits": -1.1669622659683228, "logps": -89.51654052734375, "loss": 0.114, "objective": 0.11565478891134262, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.04681544750928879, "step": 3915 }, { "dpo_loss": 0.6910319328308105, "epoch": 7.406707605101559, "grad_norm": 69.94033055223287, "learning_rate": 9.655304397695843e-08, "logits": -1.22225022315979, "logps": -86.38886260986328, "loss": 0.1109, "objective": 0.10618066787719727, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.03707747161388397, "step": 3920 }, { "dpo_loss": 0.6937940716743469, "epoch": 7.416154936230515, "grad_norm": 66.97802233109535, "learning_rate": 9.590270504634396e-08, "logits": -1.1933887004852295, "logps": -89.13442993164062, "loss": 0.1159, "objective": 0.11764197051525116, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.048262566328048706, "step": 3925 }, { "dpo_loss": 0.6914020776748657, "epoch": 7.425602267359471, "grad_norm": 70.59997983605382, "learning_rate": 9.52540435210348e-08, "logits": -1.1378452777862549, "logps": -89.34126281738281, "loss": 0.1146, "objective": 0.11447039991617203, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.04533017799258232, "step": 3930 }, { "dpo_loss": 0.6921123266220093, "epoch": 7.435049598488427, "grad_norm": 62.12464069488986, "learning_rate": 9.460706646194843e-08, "logits": -1.2301725149154663, "logps": -86.09767150878906, "loss": 0.1145, "objective": 0.1187603622674942, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.04954911768436432, "step": 3935 }, { "dpo_loss": 0.6864615678787231, "epoch": 7.444496929617383, "grad_norm": 65.02567329964197, "learning_rate": 9.396178091166623e-08, "logits": -1.2188907861709595, "logps": -89.6012954711914, "loss": 0.1129, "objective": 0.11353881657123566, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.04489266127347946, "step": 3940 }, { "dpo_loss": 0.692807137966156, "epoch": 7.453944260746339, "grad_norm": 69.78518471690447, "learning_rate": 9.331819389435702e-08, "logits": -1.2514480352401733, "logps": -86.1063232421875, "loss": 0.1121, "objective": 0.11364670097827911, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.543749988079071, "regularize": 0.04436598718166351, "step": 3945 }, { "dpo_loss": 0.6867848038673401, "epoch": 7.463391591875295, "grad_norm": 68.38044982421731, "learning_rate": 9.267631241570051e-08, "logits": -1.1391940116882324, "logps": -89.96165466308594, "loss": 0.1158, "objective": 0.11638858169317245, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.04771009460091591, "step": 3950 }, { "epoch": 7.463391591875295, "eval_dpo_loss": 0.7085540294647217, "eval_logits": -1.1875077486038208, "eval_logps": -93.92083740234375, "eval_loss": 0.3433838188648224, "eval_objective": 0.34226563572883606, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.2714102268218994, "eval_runtime": 157.9826, "eval_samples_per_second": 36.65, "eval_steps_per_second": 3.057, "step": 3950 }, { "dpo_loss": 0.6887429356575012, "epoch": 7.472838923004251, "grad_norm": 69.82696415854126, "learning_rate": 9.203614346281083e-08, "logits": -1.178577184677124, "logps": -88.75222778320312, "loss": 0.1129, "objective": 0.11189063638448715, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.04301634058356285, "step": 3955 }, { "dpo_loss": 0.6867369413375854, "epoch": 7.482286254133207, "grad_norm": 68.98701188066258, "learning_rate": 9.139769400416066e-08, "logits": -1.2077807188034058, "logps": -87.89368438720703, "loss": 0.1137, "objective": 0.11300931125879288, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.044335611164569855, "step": 3960 }, { "dpo_loss": 0.6933126449584961, "epoch": 7.491733585262163, "grad_norm": 71.68343168413607, "learning_rate": 9.076097098950541e-08, "logits": -1.2484073638916016, "logps": -87.87511444091797, "loss": 0.112, "objective": 0.11054714769124985, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.041215892881155014, "step": 3965 }, { "dpo_loss": 0.6919286847114563, "epoch": 7.501180916391119, "grad_norm": 65.63600778904674, "learning_rate": 9.012598134980762e-08, "logits": -1.2233575582504272, "logps": -88.73258972167969, "loss": 0.1109, "objective": 0.10980510711669922, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.04061223939061165, "step": 3970 }, { "dpo_loss": 0.6842435002326965, "epoch": 7.510628247520076, "grad_norm": 67.39010843092791, "learning_rate": 8.949273199716124e-08, "logits": -1.1742600202560425, "logps": -88.2282485961914, "loss": 0.1161, "objective": 0.12271026521921158, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.0542859211564064, "step": 3975 }, { "dpo_loss": 0.6895918846130371, "epoch": 7.520075578649031, "grad_norm": 66.23441397917325, "learning_rate": 8.886122982471653e-08, "logits": -1.165290355682373, "logps": -87.8960952758789, "loss": 0.1126, "objective": 0.1167561262845993, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.04779692739248276, "step": 3980 }, { "dpo_loss": 0.688515305519104, "epoch": 7.529522909777988, "grad_norm": 69.12404399558686, "learning_rate": 8.823148170660533e-08, "logits": -1.2020834684371948, "logps": -87.83871459960938, "loss": 0.1123, "objective": 0.11350240558385849, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.04465087503194809, "step": 3985 }, { "dpo_loss": 0.6893686056137085, "epoch": 7.538970240906944, "grad_norm": 77.00746497085886, "learning_rate": 8.760349449786569e-08, "logits": -1.2218725681304932, "logps": -87.24681091308594, "loss": 0.1135, "objective": 0.11317841708660126, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.04424155130982399, "step": 3990 }, { "dpo_loss": 0.6874690651893616, "epoch": 7.5484175720359, "grad_norm": 64.95968671329116, "learning_rate": 8.697727503436756e-08, "logits": -1.1924564838409424, "logps": -88.96747589111328, "loss": 0.1096, "objective": 0.10814033448696136, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.039393432438373566, "step": 3995 }, { "dpo_loss": 0.6912730932235718, "epoch": 7.557864903164856, "grad_norm": 73.49530141426365, "learning_rate": 8.635283013273853e-08, "logits": -1.2151412963867188, "logps": -88.40718841552734, "loss": 0.113, "objective": 0.1168517917394638, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.512499988079071, "regularize": 0.047724489122629166, "step": 4000 }, { "epoch": 7.557864903164856, "eval_dpo_loss": 0.708738386631012, "eval_logits": -1.184956669807434, "eval_logps": -93.68663787841797, "eval_loss": 0.34279587864875793, "eval_objective": 0.34106388688087463, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.2701900601387024, "eval_runtime": 154.4859, "eval_samples_per_second": 37.479, "eval_steps_per_second": 3.126, "step": 4000 }, { "dpo_loss": 0.6873505711555481, "epoch": 7.567312234293812, "grad_norm": 68.58470344270393, "learning_rate": 8.57301665902892e-08, "logits": -1.1909992694854736, "logps": -88.84749603271484, "loss": 0.1089, "objective": 0.109657421708107, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.040922366082668304, "step": 4005 }, { "dpo_loss": 0.6915440559387207, "epoch": 7.576759565422768, "grad_norm": 66.8202796391533, "learning_rate": 8.510929118493951e-08, "logits": -1.1787294149398804, "logps": -88.94754791259766, "loss": 0.1111, "objective": 0.11381425708532333, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.04465986043214798, "step": 4010 }, { "dpo_loss": 0.6843048334121704, "epoch": 7.586206896551724, "grad_norm": 70.49292621484116, "learning_rate": 8.449021067514483e-08, "logits": -1.1641387939453125, "logps": -88.68355560302734, "loss": 0.1125, "objective": 0.11452841758728027, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.04609794169664383, "step": 4015 }, { "dpo_loss": 0.6912661790847778, "epoch": 7.59565422768068, "grad_norm": 67.40017733094507, "learning_rate": 8.387293179982257e-08, "logits": -1.1959151029586792, "logps": -87.34587860107422, "loss": 0.1115, "objective": 0.11274880170822144, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5375000238418579, "regularize": 0.04362217336893082, "step": 4020 }, { "dpo_loss": 0.6897019147872925, "epoch": 7.605101558809636, "grad_norm": 68.09459225606703, "learning_rate": 8.32574612782787e-08, "logits": -1.1839287281036377, "logps": -91.09791564941406, "loss": 0.1061, "objective": 0.10851629823446274, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5562499761581421, "regularize": 0.03954610973596573, "step": 4025 }, { "dpo_loss": 0.6882971525192261, "epoch": 7.614548889938592, "grad_norm": 62.462183761915234, "learning_rate": 8.264380581013442e-08, "logits": -1.1843148469924927, "logps": -88.91985321044922, "loss": 0.1086, "objective": 0.10946284234523773, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.44999998807907104, "regularize": 0.040633127093315125, "step": 4030 }, { "dpo_loss": 0.6897971630096436, "epoch": 7.623996221067548, "grad_norm": 70.50481329581429, "learning_rate": 8.203197207525347e-08, "logits": -1.108965277671814, "logps": -88.89385986328125, "loss": 0.1082, "objective": 0.10758545249700546, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.03860573098063469, "step": 4035 }, { "dpo_loss": 0.6894980072975159, "epoch": 7.633443552196504, "grad_norm": 82.68503480035113, "learning_rate": 8.142196673366936e-08, "logits": -1.1271682977676392, "logps": -87.27262115478516, "loss": 0.1094, "objective": 0.110640749335289, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4749999940395355, "regularize": 0.04169095307588577, "step": 4040 }, { "dpo_loss": 0.6852022409439087, "epoch": 7.64289088332546, "grad_norm": 69.13124584150115, "learning_rate": 8.081379642551301e-08, "logits": -1.071737289428711, "logps": -87.50572967529297, "loss": 0.1063, "objective": 0.10488457977771759, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.036364343017339706, "step": 4045 }, { "dpo_loss": 0.6881375312805176, "epoch": 7.652338214454416, "grad_norm": 70.93720732954677, "learning_rate": 8.02074677709402e-08, "logits": -1.1176092624664307, "logps": -87.46456146240234, "loss": 0.1113, "objective": 0.10660157352685928, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.03778781741857529, "step": 4050 }, { "epoch": 7.652338214454416, "eval_dpo_loss": 0.7087328433990479, "eval_logits": -1.1836637258529663, "eval_logps": -93.61712646484375, "eval_loss": 0.34341859817504883, "eval_objective": 0.34246423840522766, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2715909481048584, "eval_runtime": 154.5352, "eval_samples_per_second": 37.467, "eval_steps_per_second": 3.126, "step": 4050 }, { "dpo_loss": 0.6898146271705627, "epoch": 7.661785545583372, "grad_norm": 68.15988794344355, "learning_rate": 7.960298737005952e-08, "logits": -1.14591383934021, "logps": -87.33655548095703, "loss": 0.1077, "objective": 0.10561883449554443, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.03663736954331398, "step": 4055 }, { "dpo_loss": 0.6883107423782349, "epoch": 7.671232876712329, "grad_norm": 67.6835835394026, "learning_rate": 7.900036180286102e-08, "logits": -1.1574805974960327, "logps": -87.51531982421875, "loss": 0.1049, "objective": 0.10436640679836273, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.03553532809019089, "step": 4060 }, { "dpo_loss": 0.6928078532218933, "epoch": 7.680680207841284, "grad_norm": 65.52857280252256, "learning_rate": 7.839959762914383e-08, "logits": -1.188384771347046, "logps": -88.98746490478516, "loss": 0.1065, "objective": 0.10643269121646881, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.037151891738176346, "step": 4065 }, { "dpo_loss": 0.6930032968521118, "epoch": 7.690127538970241, "grad_norm": 64.74313053889571, "learning_rate": 7.780070138844522e-08, "logits": -1.1631094217300415, "logps": -87.88558197021484, "loss": 0.1081, "objective": 0.11115256696939468, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.04185224324464798, "step": 4070 }, { "dpo_loss": 0.6907125115394592, "epoch": 7.699574870099197, "grad_norm": 67.13432378815105, "learning_rate": 7.720367959996954e-08, "logits": -1.2005220651626587, "logps": -89.42745208740234, "loss": 0.1088, "objective": 0.10870270431041718, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.48750001192092896, "regularize": 0.03963145241141319, "step": 4075 }, { "dpo_loss": 0.6925803422927856, "epoch": 7.709022201228153, "grad_norm": 71.33412473182247, "learning_rate": 7.660853876251683e-08, "logits": -1.189697265625, "logps": -86.2691421508789, "loss": 0.1076, "objective": 0.1080603152513504, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.03880227357149124, "step": 4080 }, { "dpo_loss": 0.6878358721733093, "epoch": 7.718469532357109, "grad_norm": 65.78903913253299, "learning_rate": 7.601528535441232e-08, "logits": -1.1525851488113403, "logps": -88.75498962402344, "loss": 0.1087, "objective": 0.11228512227535248, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.04350154846906662, "step": 4085 }, { "dpo_loss": 0.6919507384300232, "epoch": 7.727916863486065, "grad_norm": 66.3099078877917, "learning_rate": 7.542392583343583e-08, "logits": -1.0845839977264404, "logps": -87.10064697265625, "loss": 0.1089, "objective": 0.1093490943312645, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.04015401005744934, "step": 4090 }, { "dpo_loss": 0.6882933378219604, "epoch": 7.737364194615021, "grad_norm": 71.99346678649765, "learning_rate": 7.483446663675169e-08, "logits": -1.1613986492156982, "logps": -87.78504180908203, "loss": 0.1061, "objective": 0.10825704038143158, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.03942771628499031, "step": 4095 }, { "dpo_loss": 0.6845086812973022, "epoch": 7.746811525743977, "grad_norm": 62.65200304841601, "learning_rate": 7.424691418083854e-08, "logits": -1.2002389430999756, "logps": -88.44282531738281, "loss": 0.1082, "objective": 0.11072710901498795, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.44999998807907104, "regularize": 0.042276252061128616, "step": 4100 }, { "epoch": 7.746811525743977, "eval_dpo_loss": 0.7081242203712463, "eval_logits": -1.1851998567581177, "eval_logps": -94.00126647949219, "eval_loss": 0.34109148383140564, "eval_objective": 0.34028691053390503, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 0.26947447657585144, "eval_runtime": 154.5082, "eval_samples_per_second": 37.474, "eval_steps_per_second": 3.126, "step": 4100 }, { "dpo_loss": 0.6903212666511536, "epoch": 7.756258856872933, "grad_norm": 71.55327544805871, "learning_rate": 7.366127486141918e-08, "logits": -1.1610606908798218, "logps": -87.8761978149414, "loss": 0.1044, "objective": 0.10260413587093353, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.543749988079071, "regularize": 0.033572006970644, "step": 4105 }, { "dpo_loss": 0.6889077425003052, "epoch": 7.765706188001889, "grad_norm": 65.38368309326316, "learning_rate": 7.319414514879208e-08, "logits": -1.106872797012329, "logps": -88.81092834472656, "loss": 0.1095, "objective": 0.10929863154888153, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.04040784761309624, "step": 4110 }, { "dpo_loss": 0.6919306516647339, "epoch": 7.775153519130845, "grad_norm": 66.77104121920884, "learning_rate": 7.261196552576512e-08, "logits": -1.1112695932388306, "logps": -87.815185546875, "loss": 0.1061, "objective": 0.10447889566421509, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.035285837948322296, "step": 4115 }, { "dpo_loss": 0.6867073178291321, "epoch": 7.784600850259801, "grad_norm": 62.880099903131345, "learning_rate": 7.203171683624498e-08, "logits": -1.193832278251648, "logps": -87.20587158203125, "loss": 0.1047, "objective": 0.10617707669734955, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.0375063456594944, "step": 4120 }, { "dpo_loss": 0.6886472105979919, "epoch": 7.794048181388757, "grad_norm": 65.15194680583734, "learning_rate": 7.14534053964504e-08, "logits": -1.178002119064331, "logps": -87.29219055175781, "loss": 0.107, "objective": 0.10785814374685287, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.03899341821670532, "step": 4125 }, { "dpo_loss": 0.6928061842918396, "epoch": 7.8034955125177135, "grad_norm": 66.0849283350675, "learning_rate": 7.087703750151244e-08, "logits": -1.1966705322265625, "logps": -88.70189666748047, "loss": 0.1055, "objective": 0.10542736202478409, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.03614673763513565, "step": 4130 }, { "dpo_loss": 0.6882622838020325, "epoch": 7.8129428436466695, "grad_norm": 67.7948862553432, "learning_rate": 7.030261942540602e-08, "logits": -1.1530323028564453, "logps": -88.18225860595703, "loss": 0.1057, "objective": 0.10691492259502411, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.46875, "regularize": 0.03808870166540146, "step": 4135 }, { "dpo_loss": 0.690109133720398, "epoch": 7.8223901747756255, "grad_norm": 67.20968655610226, "learning_rate": 6.973015742088173e-08, "logits": -1.2623380422592163, "logps": -87.86051177978516, "loss": 0.1065, "objective": 0.10408475250005722, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.035073842853307724, "step": 4140 }, { "dpo_loss": 0.6884062886238098, "epoch": 7.831837505904582, "grad_norm": 68.68688981851791, "learning_rate": 6.915965771939725e-08, "logits": -1.1563483476638794, "logps": -88.83927917480469, "loss": 0.1037, "objective": 0.10269608348608017, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.03385545685887337, "step": 4145 }, { "dpo_loss": 0.6895419359207153, "epoch": 7.841284837033538, "grad_norm": 72.99502346008097, "learning_rate": 6.859112653105023e-08, "logits": -1.107596516609192, "logps": -89.7536849975586, "loss": 0.1051, "objective": 0.10431935638189316, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.03536514565348625, "step": 4150 }, { "epoch": 7.841284837033538, "eval_dpo_loss": 0.7083097100257874, "eval_logits": -1.1848164796829224, "eval_logps": -93.8552474975586, "eval_loss": 0.3425026834011078, "eval_objective": 0.3417186439037323, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.27088767290115356, "eval_runtime": 154.5073, "eval_samples_per_second": 37.474, "eval_steps_per_second": 3.126, "step": 4150 }, { "dpo_loss": 0.689522922039032, "epoch": 7.850732168162494, "grad_norm": 61.625780749855046, "learning_rate": 6.802457004451018e-08, "logits": -1.2146650552749634, "logps": -87.07714080810547, "loss": 0.1048, "objective": 0.10108639299869537, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.03213409334421158, "step": 4155 }, { "dpo_loss": 0.6918686032295227, "epoch": 7.86017949929145, "grad_norm": 66.69255583419884, "learning_rate": 6.74599944269512e-08, "logits": -1.219438910484314, "logps": -86.26667785644531, "loss": 0.1049, "objective": 0.10439826548099518, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.03521140664815903, "step": 4160 }, { "dpo_loss": 0.6884658932685852, "epoch": 7.8696268304204064, "grad_norm": 69.93747327295206, "learning_rate": 6.68974058239849e-08, "logits": -1.160825490951538, "logps": -87.0850601196289, "loss": 0.1037, "objective": 0.10178661346435547, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48124998807907104, "regularize": 0.03294002264738083, "step": 4165 }, { "dpo_loss": 0.688989520072937, "epoch": 7.8790741615493625, "grad_norm": 66.57073575298065, "learning_rate": 6.633681035959374e-08, "logits": -1.2526596784591675, "logps": -88.16157531738281, "loss": 0.1061, "objective": 0.1027727723121643, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5, "regularize": 0.033873818814754486, "step": 4170 }, { "dpo_loss": 0.6920872926712036, "epoch": 7.8885214926783185, "grad_norm": 65.368918169017, "learning_rate": 6.577821413606394e-08, "logits": -1.1576217412948608, "logps": -89.33631896972656, "loss": 0.1042, "objective": 0.10328070819377899, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.03407198190689087, "step": 4175 }, { "dpo_loss": 0.6866429448127747, "epoch": 7.8979688238072745, "grad_norm": 64.0686419132941, "learning_rate": 6.522162323391925e-08, "logits": -1.1338218450546265, "logps": -88.63480377197266, "loss": 0.1045, "objective": 0.1009424477815628, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.032278142869472504, "step": 4180 }, { "dpo_loss": 0.6868075132369995, "epoch": 7.9074161549362305, "grad_norm": 71.10493618106902, "learning_rate": 6.466704371185478e-08, "logits": -1.1870781183242798, "logps": -86.28584289550781, "loss": 0.1041, "objective": 0.10661537945270538, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.037934623658657074, "step": 4185 }, { "dpo_loss": 0.6851555705070496, "epoch": 7.9168634860651865, "grad_norm": 64.35034498511142, "learning_rate": 6.411448160667113e-08, "logits": -1.137531042098999, "logps": -87.21583557128906, "loss": 0.1031, "objective": 0.10514400899410248, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.036628447473049164, "step": 4190 }, { "dpo_loss": 0.6898540258407593, "epoch": 7.9263108171941425, "grad_norm": 64.62703600883805, "learning_rate": 6.356394293320854e-08, "logits": -1.154130458831787, "logps": -89.00459289550781, "loss": 0.1035, "objective": 0.10545846074819565, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.03647305816411972, "step": 4195 }, { "dpo_loss": 0.6923469305038452, "epoch": 7.935758148323099, "grad_norm": 66.02875824508733, "learning_rate": 6.301543368428141e-08, "logits": -1.0741298198699951, "logps": -87.47310638427734, "loss": 0.1047, "objective": 0.10732004791498184, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.03808536380529404, "step": 4200 }, { "epoch": 7.935758148323099, "eval_dpo_loss": 0.7085427045822144, "eval_logits": -1.187182903289795, "eval_logps": -93.66959381103516, "eval_loss": 0.34216782450675964, "eval_objective": 0.34106820821762085, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.27021393179893494, "eval_runtime": 154.4178, "eval_samples_per_second": 37.496, "eval_steps_per_second": 3.128, "step": 4200 }, { "dpo_loss": 0.6902600526809692, "epoch": 7.945205479452055, "grad_norm": 70.3187555611494, "learning_rate": 6.246895983061315e-08, "logits": -1.2895134687423706, "logps": -87.88436889648438, "loss": 0.1009, "objective": 0.10067249834537506, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.0316464826464653, "step": 4205 }, { "dpo_loss": 0.6903197169303894, "epoch": 7.954652810581011, "grad_norm": 64.72695206813354, "learning_rate": 6.192452732077099e-08, "logits": -1.2405836582183838, "logps": -88.14324951171875, "loss": 0.1053, "objective": 0.10551361739635468, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.036481648683547974, "step": 4210 }, { "dpo_loss": 0.6930989027023315, "epoch": 7.964100141709967, "grad_norm": 68.35426549454564, "learning_rate": 6.138214208110176e-08, "logits": -1.2283471822738647, "logps": -87.89575958251953, "loss": 0.1038, "objective": 0.10590711981058121, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5375000238418579, "regularize": 0.03659723699092865, "step": 4215 }, { "dpo_loss": 0.6872066259384155, "epoch": 7.973547472838923, "grad_norm": 74.31478828809273, "learning_rate": 6.084181001566657e-08, "logits": -1.1427218914031982, "logps": -87.60230255126953, "loss": 0.1038, "objective": 0.10263446718454361, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5249999761581421, "regularize": 0.03391381725668907, "step": 4220 }, { "dpo_loss": 0.6878780126571655, "epoch": 7.982994803967879, "grad_norm": 66.25372718162421, "learning_rate": 6.030353700617738e-08, "logits": -1.19222891330719, "logps": -88.9227523803711, "loss": 0.1033, "objective": 0.10655965656042099, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.03777185454964638, "step": 4225 }, { "dpo_loss": 0.690626859664917, "epoch": 7.9924421350968355, "grad_norm": 73.101616411094, "learning_rate": 5.976732891193226e-08, "logits": -1.1580171585083008, "logps": -89.56224060058594, "loss": 0.1021, "objective": 0.10268416255712509, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.03362148255109787, "step": 4230 }, { "dpo_loss": 0.6919626593589783, "epoch": 8.00188946622579, "grad_norm": 65.50765796086647, "learning_rate": 5.9233191569752096e-08, "logits": -1.2294315099716187, "logps": -87.63737487792969, "loss": 0.1029, "objective": 0.10424093902111053, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.035044677555561066, "step": 4235 }, { "dpo_loss": 0.6897158622741699, "epoch": 8.011336797354748, "grad_norm": 67.31454023920725, "learning_rate": 5.870113079391673e-08, "logits": -1.1569894552230835, "logps": -88.41534423828125, "loss": 0.0972, "objective": 0.0982559472322464, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.46875, "regularize": 0.029284348711371422, "step": 4240 }, { "dpo_loss": 0.6918286681175232, "epoch": 8.020784128483703, "grad_norm": 65.95526469820857, "learning_rate": 5.8171152376102065e-08, "logits": -1.2049682140350342, "logps": -88.76419830322266, "loss": 0.0978, "objective": 0.09947766363620758, "ranking_idealized": 0.44999998807907104, "ranking_idealized_expo": 0.40625, "ranking_simple": 0.40625, "regularize": 0.03029480017721653, "step": 4245 }, { "dpo_loss": 0.6871441602706909, "epoch": 8.03023145961266, "grad_norm": 66.216010377231, "learning_rate": 5.764326208531661e-08, "logits": -1.1581623554229736, "logps": -89.52348327636719, "loss": 0.0985, "objective": 0.09997192770242691, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.41874998807907104, "ranking_simple": 0.41874998807907104, "regularize": 0.0312575027346611, "step": 4250 }, { "epoch": 8.03023145961266, "eval_dpo_loss": 0.7083138823509216, "eval_logits": -1.1844391822814941, "eval_logps": -93.69242095947266, "eval_loss": 0.3415719270706177, "eval_objective": 0.34033986926078796, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.2695084512233734, "eval_runtime": 155.0337, "eval_samples_per_second": 37.347, "eval_steps_per_second": 3.115, "step": 4250 }, { "dpo_loss": 0.6859526634216309, "epoch": 8.039678790741615, "grad_norm": 66.75299577172807, "learning_rate": 5.711746566783881e-08, "logits": -1.1340796947479248, "logps": -86.99250793457031, "loss": 0.1001, "objective": 0.10409112274646759, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.59375, "regularize": 0.03549586609005928, "step": 4255 }, { "dpo_loss": 0.691058874130249, "epoch": 8.049126121870572, "grad_norm": 63.70150795972831, "learning_rate": 5.6593768847154585e-08, "logits": -1.2099452018737793, "logps": -88.73480224609375, "loss": 0.0999, "objective": 0.09854385256767273, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.029437970370054245, "step": 4260 }, { "dpo_loss": 0.6891748309135437, "epoch": 8.058573452999527, "grad_norm": 65.84663674520789, "learning_rate": 5.607217732389502e-08, "logits": -1.2241264581680298, "logps": -87.21629333496094, "loss": 0.0965, "objective": 0.09340299665927887, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.024485522881150246, "step": 4265 }, { "dpo_loss": 0.6913408041000366, "epoch": 8.068020784128484, "grad_norm": 65.15468330480556, "learning_rate": 5.555269677577432e-08, "logits": -1.1851134300231934, "logps": -87.51316833496094, "loss": 0.0989, "objective": 0.1005776897072792, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.03144359961152077, "step": 4270 }, { "dpo_loss": 0.6871658563613892, "epoch": 8.07746811525744, "grad_norm": 64.42370950124189, "learning_rate": 5.503533285752785e-08, "logits": -1.1673667430877686, "logps": -87.55001068115234, "loss": 0.0997, "objective": 0.09894660115242004, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.030230006203055382, "step": 4275 }, { "dpo_loss": 0.6906015872955322, "epoch": 8.086915446386396, "grad_norm": 65.25737645815519, "learning_rate": 5.452009120085063e-08, "logits": -1.10977303981781, "logps": -88.37261962890625, "loss": 0.1, "objective": 0.10002779960632324, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.030967634171247482, "step": 4280 }, { "dpo_loss": 0.6894617080688477, "epoch": 8.096362777515353, "grad_norm": 66.36346720749803, "learning_rate": 5.400697741433624e-08, "logits": -1.2385480403900146, "logps": -89.33724212646484, "loss": 0.0992, "objective": 0.09789734333753586, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.02895117737352848, "step": 4285 }, { "dpo_loss": 0.6887727379798889, "epoch": 8.105810108644308, "grad_norm": 64.7308617932561, "learning_rate": 5.3495997083415454e-08, "logits": -1.2975224256515503, "logps": -88.44245147705078, "loss": 0.0993, "objective": 0.09873533993959427, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.02985805831849575, "step": 4290 }, { "dpo_loss": 0.6871117353439331, "epoch": 8.115257439773265, "grad_norm": 71.20794837846714, "learning_rate": 5.2987155770295835e-08, "logits": -1.1390098333358765, "logps": -88.33921813964844, "loss": 0.0977, "objective": 0.09580464661121368, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.027093475684523582, "step": 4295 }, { "dpo_loss": 0.6903670430183411, "epoch": 8.12470477090222, "grad_norm": 63.395320073665424, "learning_rate": 5.2480459013900666e-08, "logits": -1.2452596426010132, "logps": -90.17387390136719, "loss": 0.0964, "objective": 0.0957658439874649, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.46875, "regularize": 0.02672913670539856, "step": 4300 }, { "epoch": 8.12470477090222, "eval_dpo_loss": 0.708151638507843, "eval_logits": -1.1870557069778442, "eval_logps": -93.50247955322266, "eval_loss": 0.3422209322452545, "eval_objective": 0.3408995568752289, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.270084410905838, "eval_runtime": 154.5816, "eval_samples_per_second": 37.456, "eval_steps_per_second": 3.125, "step": 4300 }, { "dpo_loss": 0.6924780607223511, "epoch": 8.134152102031177, "grad_norm": 65.77929031759814, "learning_rate": 5.1975912329809245e-08, "logits": -1.1215593814849854, "logps": -89.29200744628906, "loss": 0.0983, "objective": 0.09922514110803604, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.029977332800626755, "step": 4305 }, { "dpo_loss": 0.6879432797431946, "epoch": 8.143599433160132, "grad_norm": 60.81145106842867, "learning_rate": 5.147352121019635e-08, "logits": -1.247128963470459, "logps": -86.65443420410156, "loss": 0.0958, "objective": 0.09523816406726837, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.026443829759955406, "step": 4310 }, { "dpo_loss": 0.6936001777648926, "epoch": 8.153046764289089, "grad_norm": 71.82393154987687, "learning_rate": 5.0973291123772636e-08, "logits": -1.203482985496521, "logps": -89.65019226074219, "loss": 0.0987, "objective": 0.10044272989034653, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.031082715839147568, "step": 4315 }, { "dpo_loss": 0.6879534721374512, "epoch": 8.162494095418044, "grad_norm": 68.61775723034592, "learning_rate": 5.047522751572528e-08, "logits": -1.2061399221420288, "logps": -89.13920593261719, "loss": 0.0975, "objective": 0.09912938624620438, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.03033403679728508, "step": 4320 }, { "dpo_loss": 0.6893355250358582, "epoch": 8.171941426547, "grad_norm": 64.96895658828034, "learning_rate": 5.007834013783027e-08, "logits": -1.204025387763977, "logps": -88.53910827636719, "loss": 0.097, "objective": 0.09730519354343414, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.028371628373861313, "step": 4325 }, { "dpo_loss": 0.6900074481964111, "epoch": 8.181388757675956, "grad_norm": 64.5727180759984, "learning_rate": 4.958418983741744e-08, "logits": -1.227060317993164, "logps": -86.502685546875, "loss": 0.0985, "objective": 0.09863508492708206, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.029634330421686172, "step": 4330 }, { "dpo_loss": 0.6896570324897766, "epoch": 8.190836088804913, "grad_norm": 67.13135125464676, "learning_rate": 4.909222113625544e-08, "logits": -1.17739999294281, "logps": -89.40184020996094, "loss": 0.0975, "objective": 0.09737617522478104, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.028410470113158226, "step": 4335 }, { "dpo_loss": 0.6879313588142395, "epoch": 8.200283419933868, "grad_norm": 70.81753843915308, "learning_rate": 4.860243938960329e-08, "logits": -1.1501330137252808, "logps": -90.64093017578125, "loss": 0.0997, "objective": 0.09839653968811035, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.5874999761581421, "regularize": 0.029603416100144386, "step": 4340 }, { "dpo_loss": 0.6932226419448853, "epoch": 8.209730751062825, "grad_norm": 72.04751784384656, "learning_rate": 4.8114849928914013e-08, "logits": -1.2128031253814697, "logps": -89.16693115234375, "loss": 0.0979, "objective": 0.10338658094406128, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.034064311534166336, "step": 4345 }, { "dpo_loss": 0.6911360025405884, "epoch": 8.219178082191782, "grad_norm": 65.5013939361684, "learning_rate": 4.7629458061776816e-08, "logits": -1.2401503324508667, "logps": -88.55441284179688, "loss": 0.0997, "objective": 0.09972050040960312, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.03060689941048622, "step": 4350 }, { "epoch": 8.219178082191782, "eval_dpo_loss": 0.7081336975097656, "eval_logits": -1.1865993738174438, "eval_logps": -93.80738067626953, "eval_loss": 0.34230393171310425, "eval_objective": 0.3408054709434509, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5186335444450378, "eval_regularize": 0.26999205350875854, "eval_runtime": 154.5279, "eval_samples_per_second": 37.469, "eval_steps_per_second": 3.126, "step": 4350 }, { "dpo_loss": 0.6863928437232971, "epoch": 8.228625413320737, "grad_norm": 69.86443537052166, "learning_rate": 4.7146269071859477e-08, "logits": -1.2031657695770264, "logps": -89.26103210449219, "loss": 0.0971, "objective": 0.09626658260822296, "ranking_idealized": 0.45625001192092896, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.027627307921648026, "step": 4355 }, { "dpo_loss": 0.6876174807548523, "epoch": 8.238072744449694, "grad_norm": 71.68346903393217, "learning_rate": 4.6665288218850404e-08, "logits": -1.1734378337860107, "logps": -87.59542846679688, "loss": 0.0969, "objective": 0.0959327444434166, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.02717098966240883, "step": 4360 }, { "dpo_loss": 0.6887752413749695, "epoch": 8.247520075578649, "grad_norm": 68.38626337967031, "learning_rate": 4.618652073840187e-08, "logits": -1.124677062034607, "logps": -88.45066833496094, "loss": 0.0978, "objective": 0.09622694551944733, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.027349423617124557, "step": 4365 }, { "dpo_loss": 0.6909067034721375, "epoch": 8.256967406707606, "grad_norm": 62.8555313961238, "learning_rate": 4.570997184207262e-08, "logits": -1.1489849090576172, "logps": -88.59163665771484, "loss": 0.0969, "objective": 0.09771253913640976, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.028621861711144447, "step": 4370 }, { "dpo_loss": 0.691406786441803, "epoch": 8.266414737836561, "grad_norm": 67.83043830233812, "learning_rate": 4.523564671727134e-08, "logits": -1.27828848361969, "logps": -87.43110656738281, "loss": 0.0959, "objective": 0.09452847391366959, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.02538778819143772, "step": 4375 }, { "dpo_loss": 0.6888766288757324, "epoch": 8.275862068965518, "grad_norm": 69.32541466192775, "learning_rate": 4.476355052720013e-08, "logits": -1.1250083446502686, "logps": -91.4510269165039, "loss": 0.0973, "objective": 0.0973498672246933, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.028462210670113564, "step": 4380 }, { "dpo_loss": 0.6925772428512573, "epoch": 8.285309400094473, "grad_norm": 72.54182169425356, "learning_rate": 4.4293688410798306e-08, "logits": -1.2165477275848389, "logps": -88.94318389892578, "loss": 0.0972, "objective": 0.09378346055746078, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.024525735527276993, "step": 4385 }, { "dpo_loss": 0.6887747049331665, "epoch": 8.29475673122343, "grad_norm": 69.46068601996431, "learning_rate": 4.382606548268658e-08, "logits": -1.2112553119659424, "logps": -88.78561401367188, "loss": 0.0972, "objective": 0.09837064892053604, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.029493171721696854, "step": 4390 }, { "dpo_loss": 0.6883238554000854, "epoch": 8.304204062352385, "grad_norm": 63.45350469479899, "learning_rate": 4.336068683311125e-08, "logits": -1.1967496871948242, "logps": -89.25916290283203, "loss": 0.0968, "objective": 0.0968455821275711, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.028013193979859352, "step": 4395 }, { "dpo_loss": 0.6897088289260864, "epoch": 8.313651393481342, "grad_norm": 64.0026031711914, "learning_rate": 4.289755752788879e-08, "logits": -1.2383521795272827, "logps": -88.59597778320312, "loss": 0.0963, "objective": 0.0949353277683258, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.025964435189962387, "step": 4400 }, { "epoch": 8.313651393481342, "eval_dpo_loss": 0.7083983421325684, "eval_logits": -1.1860986948013306, "eval_logps": -93.68854522705078, "eval_loss": 0.34339407086372375, "eval_objective": 0.3419049382209778, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2710651159286499, "eval_runtime": 154.6301, "eval_samples_per_second": 37.444, "eval_steps_per_second": 3.124, "step": 4400 }, { "dpo_loss": 0.6894680261611938, "epoch": 8.323098724610297, "grad_norm": 64.99137183554055, "learning_rate": 4.2436682608350705e-08, "logits": -1.1680843830108643, "logps": -89.59283447265625, "loss": 0.0934, "objective": 0.09198366850614548, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5562499761581421, "regularize": 0.023036859929561615, "step": 4405 }, { "dpo_loss": 0.6878527402877808, "epoch": 8.332546055739254, "grad_norm": 66.27524605172559, "learning_rate": 4.197806709128865e-08, "logits": -1.2237894535064697, "logps": -87.18922424316406, "loss": 0.0972, "objective": 0.0979805439710617, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.029195260256528854, "step": 4410 }, { "dpo_loss": 0.6892653107643127, "epoch": 8.341993386868209, "grad_norm": 67.46944259801737, "learning_rate": 4.152171596890008e-08, "logits": -1.2152959108352661, "logps": -88.88487243652344, "loss": 0.0972, "objective": 0.09294287860393524, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.48750001192092896, "regularize": 0.02401634305715561, "step": 4415 }, { "dpo_loss": 0.6894920468330383, "epoch": 8.351440717997166, "grad_norm": 71.97312442790279, "learning_rate": 4.106763420873349e-08, "logits": -1.1700403690338135, "logps": -87.84864807128906, "loss": 0.0979, "objective": 0.09823272377252579, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.029283514246344566, "step": 4420 }, { "dpo_loss": 0.6906135678291321, "epoch": 8.360888049126121, "grad_norm": 64.88947011442576, "learning_rate": 4.061582675363459e-08, "logits": -1.1943508386611938, "logps": -86.23715209960938, "loss": 0.0952, "objective": 0.09793750196695328, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.028876136988401413, "step": 4425 }, { "dpo_loss": 0.6880233287811279, "epoch": 8.370335380255078, "grad_norm": 66.23358310841932, "learning_rate": 4.016629852169237e-08, "logits": -1.197622537612915, "logps": -87.19839477539062, "loss": 0.0965, "objective": 0.09400331228971481, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.550000011920929, "regularize": 0.025200972333550453, "step": 4430 }, { "dpo_loss": 0.6888989806175232, "epoch": 8.379782711384035, "grad_norm": 69.18798130701948, "learning_rate": 3.9719054406185806e-08, "logits": -1.2378427982330322, "logps": -87.28419494628906, "loss": 0.0973, "objective": 0.1004018634557724, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.03151196241378784, "step": 4435 }, { "dpo_loss": 0.6903999447822571, "epoch": 8.38923004251299, "grad_norm": 63.93310100734026, "learning_rate": 3.927409927553038e-08, "logits": -1.1226755380630493, "logps": -88.65518951416016, "loss": 0.096, "objective": 0.0965915396809578, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.02755153737962246, "step": 4440 }, { "dpo_loss": 0.6903192400932312, "epoch": 8.398677373641947, "grad_norm": 70.19789859402223, "learning_rate": 3.8831437973225076e-08, "logits": -1.1719176769256592, "logps": -89.19178771972656, "loss": 0.0954, "objective": 0.09195469319820404, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.02292276918888092, "step": 4445 }, { "dpo_loss": 0.692059338092804, "epoch": 8.408124704770902, "grad_norm": 67.97213768976081, "learning_rate": 3.8391075317799784e-08, "logits": -1.2177083492279053, "logps": -88.48794555664062, "loss": 0.0966, "objective": 0.10071361064910889, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.03150767460465431, "step": 4450 }, { "epoch": 8.408124704770902, "eval_dpo_loss": 0.7083788514137268, "eval_logits": -1.1875168085098267, "eval_logps": -93.731201171875, "eval_loss": 0.3433586359024048, "eval_objective": 0.3419070541858673, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5186335444450378, "eval_regularize": 0.271069198846817, "eval_runtime": 154.7595, "eval_samples_per_second": 37.413, "eval_steps_per_second": 3.121, "step": 4450 }, { "dpo_loss": 0.687419056892395, "epoch": 8.417572035899859, "grad_norm": 64.10285998797039, "learning_rate": 3.795301610276269e-08, "logits": -1.2001512050628662, "logps": -85.2901611328125, "loss": 0.0944, "objective": 0.09056023508310318, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.02181832864880562, "step": 4455 }, { "dpo_loss": 0.689990758895874, "epoch": 8.427019367028814, "grad_norm": 65.93551136370232, "learning_rate": 3.7517265096548236e-08, "logits": -1.1889123916625977, "logps": -91.23262786865234, "loss": 0.0946, "objective": 0.09441934525966644, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.02542026713490486, "step": 4460 }, { "dpo_loss": 0.6894329786300659, "epoch": 8.436466698157771, "grad_norm": 68.36833954510912, "learning_rate": 3.708382704246521e-08, "logits": -1.2022359371185303, "logps": -87.67900848388672, "loss": 0.0967, "objective": 0.0936957448720932, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.02475244179368019, "step": 4465 }, { "dpo_loss": 0.6907721757888794, "epoch": 8.445914029286726, "grad_norm": 63.76925048241595, "learning_rate": 3.6652706658645146e-08, "logits": -1.2040332555770874, "logps": -87.47300720214844, "loss": 0.0958, "objective": 0.0988548994064331, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48124998807907104, "regularize": 0.029777679592370987, "step": 4470 }, { "dpo_loss": 0.6902170777320862, "epoch": 8.455361360415683, "grad_norm": 69.38300500893175, "learning_rate": 3.6223908637990686e-08, "logits": -1.2222057580947876, "logps": -88.08490753173828, "loss": 0.0941, "objective": 0.09459289163351059, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.025571173056960106, "step": 4475 }, { "dpo_loss": 0.6911331415176392, "epoch": 8.464808691544638, "grad_norm": 67.18588794078038, "learning_rate": 3.579743764812487e-08, "logits": -1.2379624843597412, "logps": -88.86549377441406, "loss": 0.0934, "objective": 0.0922469049692154, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.02313357964158058, "step": 4480 }, { "dpo_loss": 0.6889945864677429, "epoch": 8.474256022673595, "grad_norm": 64.37437826023299, "learning_rate": 3.537329833134001e-08, "logits": -1.2001641988754272, "logps": -89.17662048339844, "loss": 0.098, "objective": 0.09767808020114899, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.028778618201613426, "step": 4485 }, { "dpo_loss": 0.6907839179039001, "epoch": 8.48370335380255, "grad_norm": 72.47472879601983, "learning_rate": 3.495149530454747e-08, "logits": -1.2376738786697388, "logps": -89.4019775390625, "loss": 0.093, "objective": 0.09337928891181946, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.024300888180732727, "step": 4490 }, { "dpo_loss": 0.6883755922317505, "epoch": 8.493150684931507, "grad_norm": 69.09675000652916, "learning_rate": 3.4532033159227174e-08, "logits": -1.2523037195205688, "logps": -87.72993469238281, "loss": 0.0933, "objective": 0.09463658928871155, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.025799039751291275, "step": 4495 }, { "dpo_loss": 0.6915408968925476, "epoch": 8.502598016060462, "grad_norm": 74.02222044202652, "learning_rate": 3.4114916461377627e-08, "logits": -1.148749589920044, "logps": -87.29940032958984, "loss": 0.0956, "objective": 0.09479153901338577, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.025637447834014893, "step": 4500 }, { "epoch": 8.502598016060462, "eval_dpo_loss": 0.7081142663955688, "eval_logits": -1.1866111755371094, "eval_logps": -93.84306335449219, "eval_loss": 0.34312811493873596, "eval_objective": 0.34156593680381775, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5186335444450378, "eval_regularize": 0.27075451612472534, "eval_runtime": 157.504, "eval_samples_per_second": 36.761, "eval_steps_per_second": 3.067, "step": 4500 }, { "dpo_loss": 0.6882936954498291, "epoch": 8.512045347189419, "grad_norm": 68.0477059878961, "learning_rate": 3.3700149751466344e-08, "logits": -1.096351981163025, "logps": -89.81024169921875, "loss": 0.096, "objective": 0.09751152992248535, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.028682153671979904, "step": 4505 }, { "dpo_loss": 0.6905571222305298, "epoch": 8.521492678318374, "grad_norm": 63.99219462642485, "learning_rate": 3.3287737544380385e-08, "logits": -1.2304773330688477, "logps": -87.42345428466797, "loss": 0.0949, "objective": 0.09371695667505264, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.024661244824528694, "step": 4510 }, { "dpo_loss": 0.6905447244644165, "epoch": 8.530940009447331, "grad_norm": 66.08184879335815, "learning_rate": 3.287768432937721e-08, "logits": -1.169162392616272, "logps": -88.21883392333984, "loss": 0.094, "objective": 0.09295524656772614, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.023900777101516724, "step": 4515 }, { "dpo_loss": 0.6899552345275879, "epoch": 8.540387340576288, "grad_norm": 66.47372887760963, "learning_rate": 3.246999457003574e-08, "logits": -1.1336697340011597, "logps": -87.81645965576172, "loss": 0.0954, "objective": 0.09886420518159866, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.029868673533201218, "step": 4520 }, { "dpo_loss": 0.6897488832473755, "epoch": 8.549834671705243, "grad_norm": 62.47832099540411, "learning_rate": 3.2064672704207765e-08, "logits": -1.2520087957382202, "logps": -89.779052734375, "loss": 0.0954, "objective": 0.09957768768072128, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.030602801591157913, "step": 4525 }, { "dpo_loss": 0.6906110048294067, "epoch": 8.5592820028342, "grad_norm": 63.79845554953128, "learning_rate": 3.1661723143969783e-08, "logits": -1.1387863159179688, "logps": -87.83059692382812, "loss": 0.0939, "objective": 0.09394125640392303, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.02488015405833721, "step": 4530 }, { "dpo_loss": 0.6908597350120544, "epoch": 8.568729333963155, "grad_norm": 64.58466357139119, "learning_rate": 3.12611502755748e-08, "logits": -1.224230408668518, "logps": -90.1174087524414, "loss": 0.0925, "objective": 0.09125302731990814, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.02216704748570919, "step": 4535 }, { "dpo_loss": 0.6905266642570496, "epoch": 8.578176665092112, "grad_norm": 72.54050125765384, "learning_rate": 3.086295845940473e-08, "logits": -1.2545582056045532, "logps": -90.31018829345703, "loss": 0.0953, "objective": 0.09556666761636734, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.026513999328017235, "step": 4540 }, { "dpo_loss": 0.689853310585022, "epoch": 8.587623996221067, "grad_norm": 63.01988615424287, "learning_rate": 3.046715202992292e-08, "logits": -1.239242672920227, "logps": -87.2964096069336, "loss": 0.0962, "objective": 0.09675108641386032, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.027765760198235512, "step": 4545 }, { "dpo_loss": 0.691806435585022, "epoch": 8.597071327350024, "grad_norm": 69.93692600298262, "learning_rate": 3.007373529562676e-08, "logits": -1.1182079315185547, "logps": -88.34652709960938, "loss": 0.0928, "objective": 0.09237860143184662, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.02319796197116375, "step": 4550 }, { "epoch": 8.597071327350024, "eval_dpo_loss": 0.7083669900894165, "eval_logits": -1.1858949661254883, "eval_logps": -93.82425689697266, "eval_loss": 0.34278836846351624, "eval_objective": 0.3414183557033539, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5186335444450378, "eval_regularize": 0.2705816626548767, "eval_runtime": 154.5947, "eval_samples_per_second": 37.453, "eval_steps_per_second": 3.124, "step": 4550 }, { "dpo_loss": 0.690921425819397, "epoch": 8.60651865847898, "grad_norm": 65.50525148475181, "learning_rate": 2.9682712539001038e-08, "logits": -1.2246023416519165, "logps": -89.01949310302734, "loss": 0.0932, "objective": 0.09233587980270386, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.023243745788931847, "step": 4555 }, { "dpo_loss": 0.6897628307342529, "epoch": 8.615965989607936, "grad_norm": 70.33173705088662, "learning_rate": 2.9294088016471124e-08, "logits": -1.1716398000717163, "logps": -87.7445297241211, "loss": 0.0915, "objective": 0.09164030849933624, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.02266402170062065, "step": 4560 }, { "dpo_loss": 0.6929219365119934, "epoch": 8.625413320736891, "grad_norm": 66.1921684721932, "learning_rate": 2.8907865958356926e-08, "logits": -1.2246098518371582, "logps": -88.78094482421875, "loss": 0.0953, "objective": 0.09624224156141281, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.0269500520080328, "step": 4565 }, { "dpo_loss": 0.6871283650398254, "epoch": 8.634860651865848, "grad_norm": 68.83329016660062, "learning_rate": 2.8524050568826452e-08, "logits": -1.2258967161178589, "logps": -88.34210205078125, "loss": 0.0952, "objective": 0.09454770386219025, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.02583487331867218, "step": 4570 }, { "dpo_loss": 0.6881753206253052, "epoch": 8.644307982994803, "grad_norm": 64.84871007090842, "learning_rate": 2.8142646025850313e-08, "logits": -1.1877790689468384, "logps": -86.32475280761719, "loss": 0.0922, "objective": 0.09216133505105972, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.59375, "regularize": 0.023343803361058235, "step": 4575 }, { "dpo_loss": 0.6900415420532227, "epoch": 8.65375531412376, "grad_norm": 65.79624102950804, "learning_rate": 2.7763656481156195e-08, "logits": -1.1728365421295166, "logps": -88.63168334960938, "loss": 0.0932, "objective": 0.09233821928501129, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.023334065452218056, "step": 4580 }, { "dpo_loss": 0.6894615292549133, "epoch": 8.663202645252715, "grad_norm": 63.83956436960819, "learning_rate": 2.7387086060183613e-08, "logits": -1.1962952613830566, "logps": -88.25006866455078, "loss": 0.095, "objective": 0.09368318319320679, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.02473703771829605, "step": 4585 }, { "dpo_loss": 0.687892735004425, "epoch": 8.672649976381672, "grad_norm": 67.74351010340281, "learning_rate": 2.701293886203912e-08, "logits": -1.1812070608139038, "logps": -89.998291015625, "loss": 0.0951, "objective": 0.0939764752984047, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5687500238418579, "regularize": 0.02518719993531704, "step": 4590 }, { "dpo_loss": 0.6898320317268372, "epoch": 8.682097307510627, "grad_norm": 68.21667103987487, "learning_rate": 2.664121895945151e-08, "logits": -1.098860502243042, "logps": -89.40926361083984, "loss": 0.0924, "objective": 0.09196336567401886, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.02298017032444477, "step": 4595 }, { "dpo_loss": 0.6876360774040222, "epoch": 8.691544638639584, "grad_norm": 67.16139911459766, "learning_rate": 2.627193039872763e-08, "logits": -1.2400391101837158, "logps": -88.00350189208984, "loss": 0.0924, "objective": 0.09190518409013748, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.023141566663980484, "step": 4600 }, { "epoch": 8.691544638639584, "eval_dpo_loss": 0.7081586122512817, "eval_logits": -1.1870571374893188, "eval_logps": -93.7706069946289, "eval_loss": 0.34183570742607117, "eval_objective": 0.3406393826007843, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5186335444450378, "eval_regularize": 0.26982349157333374, "eval_runtime": 154.9258, "eval_samples_per_second": 37.373, "eval_steps_per_second": 3.118, "step": 4600 }, { "dpo_loss": 0.689667820930481, "epoch": 8.700991969768541, "grad_norm": 66.78585103551504, "learning_rate": 2.590507719970822e-08, "logits": -1.238919734954834, "logps": -88.60655212402344, "loss": 0.0921, "objective": 0.0904478058218956, "ranking_idealized": 0.6312500238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.021481022238731384, "step": 4605 }, { "dpo_loss": 0.6921404004096985, "epoch": 8.710439300897496, "grad_norm": 64.9738940503894, "learning_rate": 2.5540663355724247e-08, "logits": -1.2821037769317627, "logps": -87.18733215332031, "loss": 0.092, "objective": 0.09340129792690277, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48750001192092896, "regularize": 0.02418726123869419, "step": 4610 }, { "dpo_loss": 0.6893884539604187, "epoch": 8.719886632026453, "grad_norm": 65.1514547817796, "learning_rate": 2.517869283355345e-08, "logits": -1.2021890878677368, "logps": -88.43559265136719, "loss": 0.0924, "objective": 0.09232644736766815, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.023387601599097252, "step": 4615 }, { "dpo_loss": 0.690017819404602, "epoch": 8.729333963155408, "grad_norm": 67.83285187738235, "learning_rate": 2.4819169573377152e-08, "logits": -1.2051613330841064, "logps": -91.49044036865234, "loss": 0.0914, "objective": 0.09051535278558731, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.021513575688004494, "step": 4620 }, { "dpo_loss": 0.6906439065933228, "epoch": 8.738781294284365, "grad_norm": 67.69617898227318, "learning_rate": 2.4462097488737232e-08, "logits": -1.1643754243850708, "logps": -88.56101989746094, "loss": 0.0925, "objective": 0.09072182327508926, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.021657422184944153, "step": 4625 }, { "dpo_loss": 0.6884030103683472, "epoch": 8.74822862541332, "grad_norm": 72.77658648446189, "learning_rate": 2.410748046649366e-08, "logits": -1.2056032419204712, "logps": -88.59508514404297, "loss": 0.0937, "objective": 0.09496771544218063, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.026127416640520096, "step": 4630 }, { "dpo_loss": 0.6875420212745667, "epoch": 8.757675956542277, "grad_norm": 66.43966114497935, "learning_rate": 2.3755322366782154e-08, "logits": -1.2251002788543701, "logps": -92.06501007080078, "loss": 0.0947, "objective": 0.09579209983348846, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.02703789807856083, "step": 4635 }, { "dpo_loss": 0.6887904405593872, "epoch": 8.767123287671232, "grad_norm": 75.24456536798026, "learning_rate": 2.340562702297222e-08, "logits": -1.1932998895645142, "logps": -88.05471801757812, "loss": 0.0891, "objective": 0.08944825083017349, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.0205692071467638, "step": 4640 }, { "dpo_loss": 0.6870555877685547, "epoch": 8.77657061880019, "grad_norm": 72.27399298291411, "learning_rate": 2.3058398241625282e-08, "logits": -1.1654350757598877, "logps": -88.93949890136719, "loss": 0.0922, "objective": 0.09154076874256134, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4749999940395355, "regularize": 0.022835206240415573, "step": 4645 }, { "dpo_loss": 0.6922885775566101, "epoch": 8.786017949929144, "grad_norm": 67.30299296912347, "learning_rate": 2.2713639802453416e-08, "logits": -1.1404081583023071, "logps": -88.39984130859375, "loss": 0.0908, "objective": 0.09168007224798203, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4749999940395355, "regularize": 0.022451210767030716, "step": 4650 }, { "epoch": 8.786017949929144, "eval_dpo_loss": 0.7079064249992371, "eval_logits": -1.1871592998504639, "eval_logps": -93.74046325683594, "eval_loss": 0.34150078892707825, "eval_objective": 0.3402964472770691, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2695057988166809, "eval_runtime": 155.9316, "eval_samples_per_second": 37.132, "eval_steps_per_second": 3.098, "step": 4650 }, { "dpo_loss": 0.6876232624053955, "epoch": 8.795465281058101, "grad_norm": 65.15183634112627, "learning_rate": 2.2371355458278007e-08, "logits": -1.2236615419387817, "logps": -88.56582641601562, "loss": 0.0891, "objective": 0.0867016464471817, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.01793932355940342, "step": 4655 }, { "dpo_loss": 0.6892760396003723, "epoch": 8.804912612187056, "grad_norm": 68.8055375349853, "learning_rate": 2.2031548934989126e-08, "logits": -1.1710513830184937, "logps": -89.77734375, "loss": 0.0933, "objective": 0.09406758844852448, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.025139978155493736, "step": 4660 }, { "dpo_loss": 0.6892111897468567, "epoch": 8.814359943316013, "grad_norm": 63.889831491213435, "learning_rate": 2.1694223931504883e-08, "logits": -1.2380783557891846, "logps": -87.5960922241211, "loss": 0.0899, "objective": 0.08907774835824966, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.4312500059604645, "ranking_simple": 0.42500001192092896, "regularize": 0.02015662007033825, "step": 4665 }, { "dpo_loss": 0.6887243986129761, "epoch": 8.823807274444968, "grad_norm": 67.27570613787965, "learning_rate": 2.1359384119731012e-08, "logits": -1.2172998189926147, "logps": -87.30493927001953, "loss": 0.0912, "objective": 0.08973757922649384, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.02086513489484787, "step": 4670 }, { "dpo_loss": 0.6899820566177368, "epoch": 8.833254605573925, "grad_norm": 63.128167239434276, "learning_rate": 2.102703314452106e-08, "logits": -1.1707627773284912, "logps": -88.51395416259766, "loss": 0.0909, "objective": 0.0940367728471756, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5687500238418579, "regularize": 0.025038564577698708, "step": 4675 }, { "dpo_loss": 0.688649594783783, "epoch": 8.84270193670288, "grad_norm": 63.2769665589679, "learning_rate": 2.069717462363679e-08, "logits": -1.16934335231781, "logps": -87.5626449584961, "loss": 0.0893, "objective": 0.08972381055355072, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.45625001192092896, "ranking_simple": 0.45625001192092896, "regularize": 0.020858842879533768, "step": 4680 }, { "dpo_loss": 0.6877025365829468, "epoch": 8.852149267831837, "grad_norm": 64.74207762173776, "learning_rate": 2.03698121477085e-08, "logits": -1.2021405696868896, "logps": -86.36474609375, "loss": 0.0891, "objective": 0.08947821706533432, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5062500238418579, "regularize": 0.020707957446575165, "step": 4685 }, { "dpo_loss": 0.6896892786026001, "epoch": 8.861596598960794, "grad_norm": 73.692173393817, "learning_rate": 2.0044949280196317e-08, "logits": -1.133115530014038, "logps": -86.93243408203125, "loss": 0.0912, "objective": 0.09109000116586685, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.022121066227555275, "step": 4690 }, { "dpo_loss": 0.6885090470314026, "epoch": 8.87104393008975, "grad_norm": 61.78689892348044, "learning_rate": 1.9722589557351093e-08, "logits": -1.2453858852386475, "logps": -86.93772888183594, "loss": 0.0915, "objective": 0.08757088333368301, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.018719974905252457, "step": 4695 }, { "dpo_loss": 0.6881387233734131, "epoch": 8.880491261218706, "grad_norm": 67.67673378303367, "learning_rate": 1.940273648817617e-08, "logits": -1.2218042612075806, "logps": -90.48210144042969, "loss": 0.0922, "objective": 0.09271275252103806, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.023898882791399956, "step": 4700 }, { "epoch": 8.880491261218706, "eval_dpo_loss": 0.7078229784965515, "eval_logits": -1.1888352632522583, "eval_logps": -93.71256256103516, "eval_loss": 0.3418760597705841, "eval_objective": 0.3405354917049408, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.26975318789482117, "eval_runtime": 154.247, "eval_samples_per_second": 37.537, "eval_steps_per_second": 3.131, "step": 4700 }, { "dpo_loss": 0.6872705817222595, "epoch": 8.889938592347661, "grad_norm": 66.00163288743991, "learning_rate": 1.9085393554388872e-08, "logits": -1.1716687679290771, "logps": -85.67667388916016, "loss": 0.0886, "objective": 0.09024933725595474, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.021522274240851402, "step": 4705 }, { "dpo_loss": 0.6918772459030151, "epoch": 8.899385923476618, "grad_norm": 68.31292223070808, "learning_rate": 1.8770564210382867e-08, "logits": -1.1566064357757568, "logps": -89.57164001464844, "loss": 0.0907, "objective": 0.0918797180056572, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.022691994905471802, "step": 4710 }, { "dpo_loss": 0.6887393593788147, "epoch": 8.908833254605574, "grad_norm": 70.38766796803583, "learning_rate": 1.8458251883190557e-08, "logits": -1.0890405178070068, "logps": -86.96800231933594, "loss": 0.0921, "objective": 0.09513839334249496, "ranking_idealized": 0.4625000059604645, "ranking_idealized_expo": 0.41874998807907104, "ranking_simple": 0.42500001192092896, "regularize": 0.026264458894729614, "step": 4715 }, { "dpo_loss": 0.6884477138519287, "epoch": 8.91828058573453, "grad_norm": 69.40217443634405, "learning_rate": 1.8148459972445574e-08, "logits": -1.1955578327178955, "logps": -88.05402374267578, "loss": 0.0907, "objective": 0.08991407603025436, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.59375, "regularize": 0.021069305017590523, "step": 4720 }, { "dpo_loss": 0.6898118257522583, "epoch": 8.927727916863486, "grad_norm": 60.67398784751232, "learning_rate": 1.7841191850345966e-08, "logits": -1.216772198677063, "logps": -86.17524719238281, "loss": 0.0912, "objective": 0.09173218905925751, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.022751014679670334, "step": 4725 }, { "dpo_loss": 0.6895357370376587, "epoch": 8.937175247992442, "grad_norm": 63.314698923503244, "learning_rate": 1.753645086161737e-08, "logits": -1.1994549036026, "logps": -89.67224884033203, "loss": 0.0928, "objective": 0.09444586932659149, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.025492290034890175, "step": 4730 }, { "dpo_loss": 0.6899622678756714, "epoch": 8.946622579121398, "grad_norm": 69.25838644392168, "learning_rate": 1.7234240323476702e-08, "logits": -1.2246150970458984, "logps": -86.9800796508789, "loss": 0.0901, "objective": 0.0887894406914711, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.48750001192092896, "regularize": 0.019793204963207245, "step": 4735 }, { "dpo_loss": 0.6905208826065063, "epoch": 8.956069910250354, "grad_norm": 68.50166748835065, "learning_rate": 1.6934563525596052e-08, "logits": -1.158728837966919, "logps": -88.60356140136719, "loss": 0.0911, "objective": 0.08902458846569061, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.019972508773207664, "step": 4740 }, { "dpo_loss": 0.6890329122543335, "epoch": 8.96551724137931, "grad_norm": 67.0223995041343, "learning_rate": 1.6637423730066685e-08, "logits": -1.2964181900024414, "logps": -88.4970474243164, "loss": 0.0893, "objective": 0.0878550335764885, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.01895173452794552, "step": 4745 }, { "dpo_loss": 0.6893103718757629, "epoch": 8.974964572508267, "grad_norm": 67.04461498936297, "learning_rate": 1.634282417136376e-08, "logits": -1.1936671733856201, "logps": -88.7967758178711, "loss": 0.0895, "objective": 0.08902595937252045, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.550000011920929, "regularize": 0.020094921812415123, "step": 4750 }, { "epoch": 8.974964572508267, "eval_dpo_loss": 0.7079932689666748, "eval_logits": -1.1886335611343384, "eval_logps": -93.7926025390625, "eval_loss": 0.34172844886779785, "eval_objective": 0.3402238190174103, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.2694244980812073, "eval_runtime": 154.4216, "eval_samples_per_second": 37.495, "eval_steps_per_second": 3.128, "step": 4750 }, { "dpo_loss": 0.6871371269226074, "epoch": 8.984411903637223, "grad_norm": 62.54869581796573, "learning_rate": 1.6050768056311033e-08, "logits": -1.1841161251068115, "logps": -87.77922058105469, "loss": 0.0875, "objective": 0.08825816214084625, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.0195444468408823, "step": 4755 }, { "dpo_loss": 0.6882451176643372, "epoch": 8.993859234766179, "grad_norm": 69.0916030923187, "learning_rate": 1.5761258564045838e-08, "logits": -1.2951503992080688, "logps": -88.92320251464844, "loss": 0.0891, "objective": 0.08905212581157684, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.020227614790201187, "step": 4760 }, { "dpo_loss": 0.6898214817047119, "epoch": 9.003306565895135, "grad_norm": 62.98625686677879, "learning_rate": 1.547429884598478e-08, "logits": -1.1931416988372803, "logps": -87.09309387207031, "loss": 0.0897, "objective": 0.09113426506519318, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.02215210348367691, "step": 4765 }, { "dpo_loss": 0.6886254549026489, "epoch": 9.01275389702409, "grad_norm": 62.10509231599403, "learning_rate": 1.5189892025789047e-08, "logits": -1.2121164798736572, "logps": -88.56450653076172, "loss": 0.0865, "objective": 0.08668197691440582, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.01781943440437317, "step": 4770 }, { "dpo_loss": 0.6889467239379883, "epoch": 9.022201228153047, "grad_norm": 76.6790638644144, "learning_rate": 1.4908041199330785e-08, "logits": -1.2352068424224854, "logps": -86.6780776977539, "loss": 0.0862, "objective": 0.08484284579753876, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.015948180109262466, "step": 4775 }, { "dpo_loss": 0.6895532608032227, "epoch": 9.031648559282003, "grad_norm": 68.78517673168774, "learning_rate": 1.4628749434659082e-08, "logits": -1.2529716491699219, "logps": -88.34037780761719, "loss": 0.0879, "objective": 0.08553078025579453, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.01657545007765293, "step": 4780 }, { "dpo_loss": 0.6879485845565796, "epoch": 9.04109589041096, "grad_norm": 67.0168472829834, "learning_rate": 1.4352019771966707e-08, "logits": -1.2469146251678467, "logps": -87.65064239501953, "loss": 0.0876, "objective": 0.0845494493842125, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.48750001192092896, "regularize": 0.015754589810967445, "step": 4785 }, { "dpo_loss": 0.6894162893295288, "epoch": 9.050543221539915, "grad_norm": 66.53150175740558, "learning_rate": 1.4077855223557117e-08, "logits": -1.2380765676498413, "logps": -89.157958984375, "loss": 0.0883, "objective": 0.08610600978136063, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.017164375633001328, "step": 4790 }, { "dpo_loss": 0.6880242824554443, "epoch": 9.059990552668872, "grad_norm": 63.93016707524342, "learning_rate": 1.3806258773811475e-08, "logits": -1.180908441543579, "logps": -87.93473052978516, "loss": 0.0898, "objective": 0.09052032232284546, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.021717887371778488, "step": 4795 }, { "dpo_loss": 0.6889215707778931, "epoch": 9.069437883797827, "grad_norm": 62.80651843785913, "learning_rate": 1.3537233379156298e-08, "logits": -1.2439289093017578, "logps": -86.09716796875, "loss": 0.0877, "objective": 0.085892453789711, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.017000291496515274, "step": 4800 }, { "epoch": 9.069437883797827, "eval_dpo_loss": 0.7083116173744202, "eval_logits": -1.1891411542892456, "eval_logps": -93.75226593017578, "eval_loss": 0.34253185987472534, "eval_objective": 0.34146299958229065, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.2706318497657776, "eval_runtime": 155.8804, "eval_samples_per_second": 37.144, "eval_steps_per_second": 3.099, "step": 4800 }, { "dpo_loss": 0.6885039210319519, "epoch": 9.078885214926784, "grad_norm": 66.81682953585893, "learning_rate": 1.3270781968031109e-08, "logits": -1.2414919137954712, "logps": -88.50529479980469, "loss": 0.0875, "objective": 0.09152210503816605, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5062500238418579, "regularize": 0.022671718150377274, "step": 4805 }, { "dpo_loss": 0.6892110109329224, "epoch": 9.088332546055739, "grad_norm": 65.02550523359547, "learning_rate": 1.300690744085689e-08, "logits": -1.1340454816818237, "logps": -86.27638244628906, "loss": 0.0862, "objective": 0.08667820692062378, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.017757095396518707, "step": 4810 }, { "dpo_loss": 0.6907178163528442, "epoch": 9.097779877184696, "grad_norm": 68.9857610536704, "learning_rate": 1.2745612670004152e-08, "logits": -1.1740095615386963, "logps": -87.9771957397461, "loss": 0.086, "objective": 0.08404810726642609, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.014976325444877148, "step": 4815 }, { "dpo_loss": 0.6885272264480591, "epoch": 9.10722720831365, "grad_norm": 63.104116776332255, "learning_rate": 1.2486900499761894e-08, "logits": -1.2118504047393799, "logps": -88.93104553222656, "loss": 0.0882, "objective": 0.08766726404428482, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.01881454512476921, "step": 4820 }, { "dpo_loss": 0.6883677840232849, "epoch": 9.116674539442608, "grad_norm": 64.89314557005927, "learning_rate": 1.223077374630646e-08, "logits": -1.1904551982879639, "logps": -89.31713104248047, "loss": 0.0865, "objective": 0.08571527153253555, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.016878489404916763, "step": 4825 }, { "dpo_loss": 0.6899958848953247, "epoch": 9.126121870571563, "grad_norm": 63.79886310014757, "learning_rate": 1.1977235197671104e-08, "logits": -1.2142480611801147, "logps": -88.31678771972656, "loss": 0.0862, "objective": 0.08748110383749008, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5375000238418579, "regularize": 0.018481507897377014, "step": 4830 }, { "dpo_loss": 0.688530445098877, "epoch": 9.13556920170052, "grad_norm": 64.41359209913031, "learning_rate": 1.1726287613715441e-08, "logits": -1.1784677505493164, "logps": -87.01527404785156, "loss": 0.088, "objective": 0.08827238529920578, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.019419338554143906, "step": 4835 }, { "dpo_loss": 0.6870613098144531, "epoch": 9.145016532829477, "grad_norm": 67.04834827209164, "learning_rate": 1.147793372609554e-08, "logits": -1.1923383474349976, "logps": -90.55400848388672, "loss": 0.0868, "objective": 0.0859370082616806, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.017230866476893425, "step": 4840 }, { "dpo_loss": 0.6901251673698425, "epoch": 9.154463863958432, "grad_norm": 65.08175102578413, "learning_rate": 1.1232176238234109e-08, "logits": -1.1326186656951904, "logps": -87.73515319824219, "loss": 0.0845, "objective": 0.08397001028060913, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.014957490377128124, "step": 4845 }, { "dpo_loss": 0.6900922060012817, "epoch": 9.163911195087389, "grad_norm": 69.47769734946932, "learning_rate": 1.0989017825291159e-08, "logits": -1.2422475814819336, "logps": -90.37779235839844, "loss": 0.0862, "objective": 0.08490969240665436, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.574999988079071, "regularize": 0.015900466591119766, "step": 4850 }, { "epoch": 9.163911195087389, "eval_dpo_loss": 0.7081762552261353, "eval_logits": -1.1894125938415527, "eval_logps": -93.8492431640625, "eval_loss": 0.34226810932159424, "eval_objective": 0.340609073638916, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.520703911781311, "eval_regularize": 0.26979145407676697, "eval_runtime": 157.3259, "eval_samples_per_second": 36.803, "eval_steps_per_second": 3.07, "step": 4850 }, { "dpo_loss": 0.6883929967880249, "epoch": 9.173358526216344, "grad_norm": 67.70285222701611, "learning_rate": 1.0748461134134746e-08, "logits": -1.1807101964950562, "logps": -88.82564544677734, "loss": 0.0851, "objective": 0.08623015880584717, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.017390865832567215, "step": 4855 }, { "dpo_loss": 0.6887978911399841, "epoch": 9.1828058573453, "grad_norm": 70.304720336417, "learning_rate": 1.0510508783312221e-08, "logits": -1.1540125608444214, "logps": -88.77113342285156, "loss": 0.0862, "objective": 0.0866982564330101, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.5, "regularize": 0.017818449065089226, "step": 4860 }, { "dpo_loss": 0.689242959022522, "epoch": 9.192253188474256, "grad_norm": 71.42256439142606, "learning_rate": 1.0275163363021865e-08, "logits": -1.275807499885559, "logps": -87.94988250732422, "loss": 0.0878, "objective": 0.09097422659397125, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.0220499224960804, "step": 4865 }, { "dpo_loss": 0.6900553107261658, "epoch": 9.201700519603213, "grad_norm": 63.33565072444968, "learning_rate": 1.0042427435084433e-08, "logits": -1.1935269832611084, "logps": -89.87028503417969, "loss": 0.0855, "objective": 0.08483897894620895, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.5062500238418579, "regularize": 0.01583344303071499, "step": 4870 }, { "dpo_loss": 0.6895790100097656, "epoch": 9.211147850732168, "grad_norm": 66.58911224536496, "learning_rate": 9.812303532915483e-09, "logits": -1.0958967208862305, "logps": -88.036376953125, "loss": 0.086, "objective": 0.08777010440826416, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.6312500238418579, "ranking_simple": 0.6312500238418579, "regularize": 0.018812203779816628, "step": 4875 }, { "dpo_loss": 0.6897131204605103, "epoch": 9.220595181861125, "grad_norm": 67.84574166096418, "learning_rate": 9.584794161497656e-09, "logits": -1.234770655632019, "logps": -88.74861145019531, "loss": 0.0879, "objective": 0.08676841855049133, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.017797108739614487, "step": 4880 }, { "dpo_loss": 0.6893714666366577, "epoch": 9.23004251299008, "grad_norm": 65.59648173263587, "learning_rate": 9.359901797353465e-09, "logits": -1.24154794216156, "logps": -84.87870025634766, "loss": 0.0879, "objective": 0.08753690123558044, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.01859975792467594, "step": 4885 }, { "dpo_loss": 0.6893272399902344, "epoch": 9.239489844119037, "grad_norm": 70.44821865321937, "learning_rate": 9.13762888851849e-09, "logits": -1.277855634689331, "logps": -87.74569702148438, "loss": 0.0865, "objective": 0.0885370597243309, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.019604332745075226, "step": 4890 }, { "dpo_loss": 0.6910862326622009, "epoch": 9.248937175247992, "grad_norm": 67.9554426716988, "learning_rate": 8.917977854514452e-09, "logits": -1.1983087062835693, "logps": -88.98271179199219, "loss": 0.0881, "objective": 0.08856123685836792, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.01945260725915432, "step": 4895 }, { "dpo_loss": 0.6897703409194946, "epoch": 9.258384506376949, "grad_norm": 66.2538061963501, "learning_rate": 8.700951086323016e-09, "logits": -1.2099605798721313, "logps": -88.5633773803711, "loss": 0.0856, "objective": 0.08235849440097809, "ranking_idealized": 0.4749999940395355, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.4625000059604645, "regularize": 0.013381464406847954, "step": 4900 }, { "epoch": 9.258384506376949, "eval_dpo_loss": 0.7080788016319275, "eval_logits": -1.1882998943328857, "eval_logps": -93.84528350830078, "eval_loss": 0.3417190909385681, "eval_objective": 0.3403823971748352, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.26957446336746216, "eval_runtime": 155.2113, "eval_samples_per_second": 37.304, "eval_steps_per_second": 3.112, "step": 4900 }, { "dpo_loss": 0.6926195025444031, "epoch": 9.267831837505904, "grad_norm": 62.961062924907445, "learning_rate": 8.486550946359777e-09, "logits": -1.233040452003479, "logps": -85.98373413085938, "loss": 0.0863, "objective": 0.08583666384220123, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4437499940395355, "ranking_simple": 0.4437499940395355, "regularize": 0.01657470501959324, "step": 4905 }, { "dpo_loss": 0.6889017820358276, "epoch": 9.27727916863486, "grad_norm": 68.99308296487503, "learning_rate": 8.274779768448482e-09, "logits": -1.1674201488494873, "logps": -86.79928588867188, "loss": 0.0858, "objective": 0.08492042124271393, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.016030244529247284, "step": 4910 }, { "dpo_loss": 0.6884921789169312, "epoch": 9.286726499763816, "grad_norm": 68.69978118047693, "learning_rate": 8.065639857795791e-09, "logits": -1.1256113052368164, "logps": -87.06742095947266, "loss": 0.0876, "objective": 0.08750703185796738, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.01865781843662262, "step": 4915 }, { "dpo_loss": 0.6900587677955627, "epoch": 9.296173830892773, "grad_norm": 64.29991429398892, "learning_rate": 7.85913349096587e-09, "logits": -1.2107371091842651, "logps": -89.1092529296875, "loss": 0.0864, "objective": 0.08681637793779373, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5562499761581421, "regularize": 0.017810503020882607, "step": 4920 }, { "dpo_loss": 0.6903694868087769, "epoch": 9.30562116202173, "grad_norm": 68.53547054793829, "learning_rate": 7.655262915855998e-09, "logits": -1.1062363386154175, "logps": -87.64759063720703, "loss": 0.0856, "objective": 0.08366571366786957, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.014628760516643524, "step": 4925 }, { "dpo_loss": 0.6893365979194641, "epoch": 9.315068493150685, "grad_norm": 66.0146634728705, "learning_rate": 7.454030351671748e-09, "logits": -1.1078659296035767, "logps": -90.8432388305664, "loss": 0.087, "objective": 0.08709698170423508, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5687500238418579, "regularize": 0.018163317814469337, "step": 4930 }, { "dpo_loss": 0.6889593005180359, "epoch": 9.324515824279642, "grad_norm": 69.06842401030131, "learning_rate": 7.255437988903046e-09, "logits": -1.1686851978302002, "logps": -90.20281219482422, "loss": 0.0859, "objective": 0.08375723659992218, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.014861305244266987, "step": 4935 }, { "dpo_loss": 0.6888924837112427, "epoch": 9.333963155408597, "grad_norm": 65.29947499443276, "learning_rate": 7.059487989300256e-09, "logits": -1.1724879741668701, "logps": -86.79241943359375, "loss": 0.0857, "objective": 0.08658678084611893, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.017697524279356003, "step": 4940 }, { "dpo_loss": 0.6898183822631836, "epoch": 9.343410486537554, "grad_norm": 64.63096002919885, "learning_rate": 6.866182485850663e-09, "logits": -1.15725576877594, "logps": -88.48844909667969, "loss": 0.0863, "objective": 0.08518817275762558, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.016206329688429832, "step": 4945 }, { "dpo_loss": 0.6934045553207397, "epoch": 9.352857817666509, "grad_norm": 69.34382914541001, "learning_rate": 6.6755235827552215e-09, "logits": -1.1436889171600342, "logps": -89.59980773925781, "loss": 0.0883, "objective": 0.08872490376234055, "ranking_idealized": 0.4937500059604645, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.4625000059604645, "regularize": 0.019384462386369705, "step": 4950 }, { "epoch": 9.352857817666509, "eval_dpo_loss": 0.7080335021018982, "eval_logits": -1.1885579824447632, "eval_logps": -93.87728118896484, "eval_loss": 0.3414333164691925, "eval_objective": 0.3400879502296448, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5201863646507263, "eval_regularize": 0.26928460597991943, "eval_runtime": 154.478, "eval_samples_per_second": 37.481, "eval_steps_per_second": 3.127, "step": 4950 }, { "dpo_loss": 0.6904705762863159, "epoch": 9.362305148795466, "grad_norm": 66.85204937019242, "learning_rate": 6.487513355405705e-09, "logits": -1.117248773574829, "logps": -88.00492858886719, "loss": 0.0857, "objective": 0.08591008931398392, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.016863025724887848, "step": 4955 }, { "dpo_loss": 0.6898153424263, "epoch": 9.371752479924421, "grad_norm": 62.29407776791308, "learning_rate": 6.3021538503620075e-09, "logits": -1.203830599784851, "logps": -90.52922821044922, "loss": 0.0855, "objective": 0.08534902334213257, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.016367487609386444, "step": 4960 }, { "dpo_loss": 0.6877592206001282, "epoch": 9.381199811053378, "grad_norm": 62.92032475377164, "learning_rate": 6.119447085330048e-09, "logits": -1.2359322309494019, "logps": -89.5292739868164, "loss": 0.0884, "objective": 0.0865122601389885, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.017736343666911125, "step": 4965 }, { "dpo_loss": 0.6872529983520508, "epoch": 9.390647142182333, "grad_norm": 69.50126731961326, "learning_rate": 5.939395049139678e-09, "logits": -1.2220096588134766, "logps": -89.81663513183594, "loss": 0.0871, "objective": 0.0878627598285675, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.019137457013130188, "step": 4970 }, { "dpo_loss": 0.689234733581543, "epoch": 9.40009447331129, "grad_norm": 67.6468971762299, "learning_rate": 5.761999701723003e-09, "logits": -1.2347389459609985, "logps": -88.4090347290039, "loss": 0.0856, "objective": 0.08394335210323334, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.015019873157143593, "step": 4975 }, { "dpo_loss": 0.689188539981842, "epoch": 9.409541804440245, "grad_norm": 69.29456704780728, "learning_rate": 5.5872629740931765e-09, "logits": -1.157482624053955, "logps": -87.40362548828125, "loss": 0.0838, "objective": 0.08270417898893356, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.59375, "regularize": 0.013785332441329956, "step": 4980 }, { "dpo_loss": 0.6888192892074585, "epoch": 9.418989135569202, "grad_norm": 64.98356997489397, "learning_rate": 5.415186768323171e-09, "logits": -1.21011483669281, "logps": -87.56295013427734, "loss": 0.0864, "objective": 0.08518704771995544, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5062500238418579, "regularize": 0.01630512624979019, "step": 4985 }, { "dpo_loss": 0.6896821856498718, "epoch": 9.428436466698157, "grad_norm": 78.49982821743039, "learning_rate": 5.245772957525402e-09, "logits": -1.18148672580719, "logps": -88.74088287353516, "loss": 0.0861, "objective": 0.08624227344989777, "ranking_idealized": 0.543749988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.017274051904678345, "step": 4990 }, { "dpo_loss": 0.6909292936325073, "epoch": 9.437883797827114, "grad_norm": 65.18298031190771, "learning_rate": 5.079023385830938e-09, "logits": -1.1286699771881104, "logps": -87.91972351074219, "loss": 0.0849, "objective": 0.08399278670549393, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.581250011920929, "regularize": 0.014899860136210918, "step": 4995 }, { "dpo_loss": 0.6874872446060181, "epoch": 9.447331128956069, "grad_norm": 70.39639898119631, "learning_rate": 4.914939868369855e-09, "logits": -1.1657907962799072, "logps": -87.12093353271484, "loss": 0.0866, "objective": 0.08511563390493393, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.53125, "regularize": 0.016366904601454735, "step": 5000 }, { "epoch": 9.447331128956069, "eval_dpo_loss": 0.7080661654472351, "eval_logits": -1.1880360841751099, "eval_logps": -93.85932922363281, "eval_loss": 0.3414228558540344, "eval_objective": 0.34020325541496277, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.26939666271209717, "eval_runtime": 154.8973, "eval_samples_per_second": 37.38, "eval_steps_per_second": 3.118, "step": 5000 }, { "dpo_loss": 0.6903454661369324, "epoch": 9.456778460085026, "grad_norm": 68.84991082457346, "learning_rate": 4.753524191251052e-09, "logits": -1.2496955394744873, "logps": -87.48423767089844, "loss": 0.0846, "objective": 0.08353155106306076, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.014497010037302971, "step": 5005 }, { "dpo_loss": 0.6901063323020935, "epoch": 9.466225791213983, "grad_norm": 66.2623062946029, "learning_rate": 4.5947781115431015e-09, "logits": -1.1387720108032227, "logps": -87.15850830078125, "loss": 0.0857, "objective": 0.08711527287960052, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4437499940395355, "regularize": 0.018104637041687965, "step": 5010 }, { "dpo_loss": 0.6906918287277222, "epoch": 9.475673122342938, "grad_norm": 71.40206387464819, "learning_rate": 4.438703357255047e-09, "logits": -1.1895406246185303, "logps": -89.1908950805664, "loss": 0.0843, "objective": 0.08377130329608917, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.4937500059604645, "regularize": 0.014702118933200836, "step": 5015 }, { "dpo_loss": 0.6910804510116577, "epoch": 9.485120453471895, "grad_norm": 63.96037085446238, "learning_rate": 4.285301627317578e-09, "logits": -1.2459213733673096, "logps": -90.33760833740234, "loss": 0.0857, "objective": 0.08324258774518967, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5562499761581421, "regularize": 0.0141345439478755, "step": 5020 }, { "dpo_loss": 0.6904935240745544, "epoch": 9.49456778460085, "grad_norm": 65.44362349914937, "learning_rate": 4.1345745915644935e-09, "logits": -1.1675902605056763, "logps": -87.94022369384766, "loss": 0.0851, "objective": 0.08558507263660431, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4625000059604645, "regularize": 0.016535717993974686, "step": 5025 }, { "dpo_loss": 0.6895044445991516, "epoch": 9.504015115729807, "grad_norm": 66.31698226564012, "learning_rate": 3.986523890714605e-09, "logits": -1.2078769207000732, "logps": -90.2032241821289, "loss": 0.0854, "objective": 0.08553291857242584, "ranking_idealized": 0.45625001192092896, "ranking_idealized_expo": 0.4000000059604645, "ranking_simple": 0.4000000059604645, "regularize": 0.01658247597515583, "step": 5030 }, { "dpo_loss": 0.6903690695762634, "epoch": 9.513462446858762, "grad_norm": 67.61769988129738, "learning_rate": 3.841151136353804e-09, "logits": -1.188755750656128, "logps": -88.5759506225586, "loss": 0.0862, "objective": 0.08529649674892426, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.016259578987956047, "step": 5035 }, { "dpo_loss": 0.6889756917953491, "epoch": 9.522909777987719, "grad_norm": 70.645766914286, "learning_rate": 3.6984579109176072e-09, "logits": -1.246675729751587, "logps": -90.35582733154297, "loss": 0.0835, "objective": 0.08555910736322403, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.01666153408586979, "step": 5040 }, { "dpo_loss": 0.6908430457115173, "epoch": 9.532357109116674, "grad_norm": 66.7833057778863, "learning_rate": 3.5584457676738898e-09, "logits": -1.114335298538208, "logps": -89.66558837890625, "loss": 0.0873, "objective": 0.08752139657735825, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.018437087535858154, "step": 5045 }, { "dpo_loss": 0.6902520060539246, "epoch": 9.541804440245631, "grad_norm": 66.89948855068245, "learning_rate": 3.421116230705928e-09, "logits": -1.195339560508728, "logps": -87.08709716796875, "loss": 0.0843, "objective": 0.08465549349784851, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.015630286186933517, "step": 5050 }, { "epoch": 9.541804440245631, "eval_dpo_loss": 0.7080516219139099, "eval_logits": -1.1880168914794922, "eval_logps": -93.8241195678711, "eval_loss": 0.34173211455345154, "eval_objective": 0.3405408561229706, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.520703911781311, "eval_regularize": 0.269735723733902, "eval_runtime": 154.4639, "eval_samples_per_second": 37.484, "eval_steps_per_second": 3.127, "step": 5050 }, { "dpo_loss": 0.6894997954368591, "epoch": 9.551251771374586, "grad_norm": 67.22054719561658, "learning_rate": 3.286470794895857e-09, "logits": -1.2233834266662598, "logps": -89.50907897949219, "loss": 0.0856, "objective": 0.08771258592605591, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.59375, "ranking_simple": 0.5874999761581421, "regularize": 0.018762610852718353, "step": 5055 }, { "dpo_loss": 0.6897779107093811, "epoch": 9.560699102503543, "grad_norm": 74.81621860274835, "learning_rate": 3.154510925908349e-09, "logits": -1.0932340621948242, "logps": -85.86308288574219, "loss": 0.0842, "objective": 0.08236806094646454, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.5625, "regularize": 0.01339026726782322, "step": 5060 }, { "dpo_loss": 0.6881380081176758, "epoch": 9.570146433632498, "grad_norm": 68.7579606426006, "learning_rate": 3.025238060174795e-09, "logits": -1.2063125371932983, "logps": -90.78138732910156, "loss": 0.0857, "objective": 0.08496876806020737, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.4375, "ranking_simple": 0.4375, "regularize": 0.01615496352314949, "step": 5065 }, { "dpo_loss": 0.6897650957107544, "epoch": 9.579593764761455, "grad_norm": 65.67261342906198, "learning_rate": 2.898653604877538e-09, "logits": -1.17462158203125, "logps": -89.63032531738281, "loss": 0.088, "objective": 0.09223799407482147, "ranking_idealized": 0.48124998807907104, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.02326149307191372, "step": 5070 }, { "dpo_loss": 0.6891880631446838, "epoch": 9.58904109589041, "grad_norm": 66.21421194658043, "learning_rate": 2.7747589379345514e-09, "logits": -1.1750421524047852, "logps": -88.36298370361328, "loss": 0.0865, "objective": 0.08409323543310165, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.015174436382949352, "step": 5075 }, { "dpo_loss": 0.690551221370697, "epoch": 9.598488427019367, "grad_norm": 71.18902970881412, "learning_rate": 2.653555407984509e-09, "logits": -1.1492376327514648, "logps": -88.63973236083984, "loss": 0.0851, "objective": 0.08727878332138062, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.018223661929368973, "step": 5080 }, { "dpo_loss": 0.689854085445404, "epoch": 9.607935758148322, "grad_norm": 70.01820762620369, "learning_rate": 2.535044334372072e-09, "logits": -1.2707594633102417, "logps": -88.87725830078125, "loss": 0.0843, "objective": 0.08458703011274338, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.48124998807907104, "ranking_simple": 0.48124998807907104, "regularize": 0.0156016256660223, "step": 5085 }, { "dpo_loss": 0.6904751062393188, "epoch": 9.61738308927728, "grad_norm": 68.87022111268584, "learning_rate": 2.4192270071335676e-09, "logits": -1.2080779075622559, "logps": -87.8804702758789, "loss": 0.0867, "objective": 0.0866638720035553, "ranking_idealized": 0.581250011920929, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.543749988079071, "regularize": 0.01761636883020401, "step": 5090 }, { "dpo_loss": 0.6896811723709106, "epoch": 9.626830420406236, "grad_norm": 68.19822090549287, "learning_rate": 2.3061046869828903e-09, "logits": -1.2898871898651123, "logps": -91.15736389160156, "loss": 0.0866, "objective": 0.0855109840631485, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5375000238418579, "regularize": 0.01654285192489624, "step": 5095 }, { "dpo_loss": 0.691383421421051, "epoch": 9.636277751535191, "grad_norm": 69.69113013678992, "learning_rate": 2.195678605297735e-09, "logits": -1.2627365589141846, "logps": -91.48534393310547, "loss": 0.0862, "objective": 0.08452753722667694, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.015389196574687958, "step": 5100 }, { "epoch": 9.636277751535191, "eval_dpo_loss": 0.7081097364425659, "eval_logits": -1.1884174346923828, "eval_logps": -93.82679748535156, "eval_loss": 0.34187451004981995, "eval_objective": 0.3404352366924286, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.2696242928504944, "eval_runtime": 154.6575, "eval_samples_per_second": 37.438, "eval_steps_per_second": 3.123, "step": 5100 }, { "dpo_loss": 0.6893877983093262, "epoch": 9.645725082664148, "grad_norm": 68.29870104426664, "learning_rate": 2.087949964106328e-09, "logits": -1.1205192804336548, "logps": -88.82121276855469, "loss": 0.0853, "objective": 0.08397676050662994, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.015037978067994118, "step": 5105 }, { "dpo_loss": 0.690714418888092, "epoch": 9.655172413793103, "grad_norm": 71.86357129119948, "learning_rate": 1.9829199360742456e-09, "logits": -1.2247989177703857, "logps": -89.22515869140625, "loss": 0.0869, "objective": 0.08558894693851471, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5249999761581421, "regularize": 0.016517508774995804, "step": 5110 }, { "dpo_loss": 0.6895348429679871, "epoch": 9.66461974492206, "grad_norm": 64.50124718922551, "learning_rate": 1.8805896644916997e-09, "logits": -1.1435085535049438, "logps": -87.88530731201172, "loss": 0.0834, "objective": 0.08410073816776276, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.015147256664931774, "step": 5115 }, { "dpo_loss": 0.6883980631828308, "epoch": 9.674067076051015, "grad_norm": 66.48144330096997, "learning_rate": 1.7809602632610776e-09, "logits": -1.1729907989501953, "logps": -88.49473571777344, "loss": 0.0854, "objective": 0.08492030203342438, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.016080478206276894, "step": 5120 }, { "dpo_loss": 0.6892448663711548, "epoch": 9.683514407179972, "grad_norm": 66.87664115159657, "learning_rate": 1.6840328168847284e-09, "logits": -1.2348103523254395, "logps": -88.95825958251953, "loss": 0.0851, "objective": 0.08360938727855682, "ranking_idealized": 0.59375, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.014684900641441345, "step": 5125 }, { "dpo_loss": 0.6895814538002014, "epoch": 9.692961738308927, "grad_norm": 68.06390176075568, "learning_rate": 1.5898083804533059e-09, "logits": -1.1406044960021973, "logps": -88.6418228149414, "loss": 0.0858, "objective": 0.0848851129412651, "ranking_idealized": 0.5, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.015926973894238472, "step": 5130 }, { "dpo_loss": 0.6889828443527222, "epoch": 9.702409069437884, "grad_norm": 66.21295619445583, "learning_rate": 1.4982879796341662e-09, "logits": -1.1915266513824463, "logps": -87.96595764160156, "loss": 0.0851, "objective": 0.08489920198917389, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.016000913456082344, "step": 5135 }, { "dpo_loss": 0.6904030442237854, "epoch": 9.71185640056684, "grad_norm": 70.56807563983075, "learning_rate": 1.4094726106603504e-09, "logits": -1.2663993835449219, "logps": -88.0069808959961, "loss": 0.0841, "objective": 0.08480189740657806, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.015761589631438255, "step": 5140 }, { "dpo_loss": 0.6891473531723022, "epoch": 9.721303731695796, "grad_norm": 65.03693590968845, "learning_rate": 1.3233632403195083e-09, "logits": -1.1926839351654053, "logps": -89.36128997802734, "loss": 0.0865, "objective": 0.0885232537984848, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.543749988079071, "ranking_simple": 0.5375000238418579, "regularize": 0.019608518108725548, "step": 5145 }, { "dpo_loss": 0.6895589232444763, "epoch": 9.730751062824751, "grad_norm": 68.29866148384843, "learning_rate": 1.239960805943574e-09, "logits": -1.199296474456787, "logps": -87.64341735839844, "loss": 0.0851, "objective": 0.08560691028833389, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.5249999761581421, "regularize": 0.016651010140776634, "step": 5150 }, { "epoch": 9.730751062824751, "eval_dpo_loss": 0.7081645131111145, "eval_logits": -1.1881499290466309, "eval_logps": -93.82471466064453, "eval_loss": 0.3417753279209137, "eval_objective": 0.34053075313568115, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5191511511802673, "eval_regularize": 0.2697142958641052, "eval_runtime": 154.0822, "eval_samples_per_second": 37.577, "eval_steps_per_second": 3.135, "step": 5150 }, { "dpo_loss": 0.6904730200767517, "epoch": 9.740198393953708, "grad_norm": 66.34791283998516, "learning_rate": 1.1592662153985245e-09, "logits": -1.1837701797485352, "logps": -89.02965545654297, "loss": 0.0857, "objective": 0.08642591536045074, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.518750011920929, "regularize": 0.017378611490130424, "step": 5155 }, { "dpo_loss": 0.6907302737236023, "epoch": 9.749645725082663, "grad_norm": 63.32858046981145, "learning_rate": 1.081280347074387e-09, "logits": -1.2325102090835571, "logps": -89.9759750366211, "loss": 0.0865, "objective": 0.08908991515636444, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.020016871392726898, "step": 5160 }, { "dpo_loss": 0.6881371736526489, "epoch": 9.75909305621162, "grad_norm": 67.24496793857094, "learning_rate": 1.0060040498758583e-09, "logits": -1.274625301361084, "logps": -88.36674499511719, "loss": 0.0851, "objective": 0.08519413322210312, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4937500059604645, "ranking_simple": 0.4937500059604645, "regularize": 0.01638040691614151, "step": 5165 }, { "dpo_loss": 0.6903113126754761, "epoch": 9.768540387340575, "grad_norm": 67.50347263114672, "learning_rate": 9.334381432128946e-10, "logits": -1.1394083499908447, "logps": -88.5200424194336, "loss": 0.086, "objective": 0.0858432799577713, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.016812153160572052, "step": 5170 }, { "dpo_loss": 0.6890208721160889, "epoch": 9.777987718469532, "grad_norm": 72.11725269729324, "learning_rate": 8.63583416991831e-10, "logits": -1.1778769493103027, "logps": -88.27137756347656, "loss": 0.0839, "objective": 0.0835142731666565, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.01461219321936369, "step": 5175 }, { "dpo_loss": 0.6891745328903198, "epoch": 9.78743504959849, "grad_norm": 69.23279225600632, "learning_rate": 7.96440631606915e-10, "logits": -1.1942167282104492, "logps": -89.7216796875, "loss": 0.0871, "objective": 0.08584664762020111, "ranking_idealized": 0.5375000238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.01692918874323368, "step": 5180 }, { "dpo_loss": 0.6901405453681946, "epoch": 9.796882380727444, "grad_norm": 65.88174648724309, "learning_rate": 7.320105179318414e-10, "logits": -1.2572628259658813, "logps": -89.32684326171875, "loss": 0.0849, "objective": 0.08371184766292572, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.53125, "ranking_simple": 0.53125, "regularize": 0.0146977873519063, "step": 5185 }, { "dpo_loss": 0.691258430480957, "epoch": 9.806329711856401, "grad_norm": 66.16278545830859, "learning_rate": 6.702937773119532e-10, "logits": -1.1129093170166016, "logps": -87.53398132324219, "loss": 0.0851, "objective": 0.0858643501996994, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.581250011920929, "ranking_simple": 0.581250011920929, "regularize": 0.01673850044608116, "step": 5190 }, { "dpo_loss": 0.6866841316223145, "epoch": 9.815777042985356, "grad_norm": 66.81451713561216, "learning_rate": 6.112910815564698e-10, "logits": -1.1755070686340332, "logps": -91.15867614746094, "loss": 0.0856, "objective": 0.084581658244133, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.01591324433684349, "step": 5195 }, { "dpo_loss": 0.6875885725021362, "epoch": 9.825224374114313, "grad_norm": 65.97696465942497, "learning_rate": 5.550030729312705e-10, "logits": -1.2532360553741455, "logps": -88.8522720336914, "loss": 0.0852, "objective": 0.08520559966564178, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.48750001192092896, "regularize": 0.016446739435195923, "step": 5200 }, { "epoch": 9.825224374114313, "eval_dpo_loss": 0.7080548405647278, "eval_logits": -1.1885803937911987, "eval_logps": -93.82567596435547, "eval_loss": 0.34153902530670166, "eval_objective": 0.3401677906513214, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.26936233043670654, "eval_runtime": 154.8959, "eval_samples_per_second": 37.38, "eval_steps_per_second": 3.118, "step": 5200 }, { "dpo_loss": 0.6900717616081238, "epoch": 9.834671705243268, "grad_norm": 66.45010839760512, "learning_rate": 5.014303641519002e-10, "logits": -1.2506177425384521, "logps": -88.95404815673828, "loss": 0.0867, "objective": 0.08865327388048172, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.518750011920929, "ranking_simple": 0.518750011920929, "regularize": 0.019646089524030685, "step": 5205 }, { "dpo_loss": 0.6904046535491943, "epoch": 9.844119036372225, "grad_norm": 64.94882365693898, "learning_rate": 4.505735383768527e-10, "logits": -1.1922829151153564, "logps": -88.02415466308594, "loss": 0.0894, "objective": 0.08971310406923294, "ranking_idealized": 0.48750001192092896, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.02067263238132, "step": 5210 }, { "dpo_loss": 0.6909619569778442, "epoch": 9.85356636750118, "grad_norm": 65.59047455604453, "learning_rate": 4.0243314920118677e-10, "logits": -1.1949775218963623, "logps": -87.81748962402344, "loss": 0.0857, "objective": 0.08444318920373917, "ranking_idealized": 0.53125, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5062500238418579, "regularize": 0.015346983447670937, "step": 5215 }, { "dpo_loss": 0.6882283687591553, "epoch": 9.863013698630137, "grad_norm": 70.42798711656708, "learning_rate": 3.5700972065066946e-10, "logits": -1.2317819595336914, "logps": -88.4919204711914, "loss": 0.0849, "objective": 0.08625314384698868, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5, "regularize": 0.01743030548095703, "step": 5220 }, { "dpo_loss": 0.6860737800598145, "epoch": 9.872461029759092, "grad_norm": 66.20596185239998, "learning_rate": 3.143037471758925e-10, "logits": -1.1786277294158936, "logps": -89.03819274902344, "loss": 0.0871, "objective": 0.09018915891647339, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5687500238418579, "ranking_simple": 0.5687500238418579, "regularize": 0.021581783890724182, "step": 5225 }, { "dpo_loss": 0.6909530758857727, "epoch": 9.88190836088805, "grad_norm": 65.01306248417197, "learning_rate": 2.743156936469426e-10, "logits": -1.1898537874221802, "logps": -86.34603118896484, "loss": 0.0843, "objective": 0.08352215588092804, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.543749988079071, "regularize": 0.014426849782466888, "step": 5230 }, { "dpo_loss": 0.6891884803771973, "epoch": 9.891355692017004, "grad_norm": 71.66449902562967, "learning_rate": 2.370459953483783e-10, "logits": -1.1369574069976807, "logps": -86.77111053466797, "loss": 0.0852, "objective": 0.08359196782112122, "ranking_idealized": 0.606249988079071, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.5687500238418579, "regularize": 0.014673106372356415, "step": 5235 }, { "dpo_loss": 0.6891953349113464, "epoch": 9.900803023145961, "grad_norm": 69.17506467154475, "learning_rate": 2.024950579744833e-10, "logits": -1.2471169233322144, "logps": -89.45270538330078, "loss": 0.0848, "objective": 0.08721388876438141, "ranking_idealized": 0.5062500238418579, "ranking_idealized_expo": 0.5062500238418579, "ranking_simple": 0.5, "regularize": 0.018294354900717735, "step": 5240 }, { "dpo_loss": 0.6880627274513245, "epoch": 9.910250354274917, "grad_norm": 68.78965633618786, "learning_rate": 1.706632576248257e-10, "logits": -1.231979250907898, "logps": -89.65391540527344, "loss": 0.0869, "objective": 0.08603748679161072, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.518750011920929, "regularize": 0.01723121479153633, "step": 5245 }, { "dpo_loss": 0.6887436509132385, "epoch": 9.919697685403873, "grad_norm": 67.12352931570797, "learning_rate": 1.4155094080017804e-10, "logits": -1.152923583984375, "logps": -88.67755889892578, "loss": 0.0873, "objective": 0.08699290454387665, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.018118537962436676, "step": 5250 }, { "epoch": 9.919697685403873, "eval_dpo_loss": 0.708267092704773, "eval_logits": -1.1885186433792114, "eval_logps": -93.82195281982422, "eval_loss": 0.34177881479263306, "eval_objective": 0.3403994143009186, "eval_ranking_idealized": 0.5398550629615784, "eval_ranking_idealized_expo": 0.5243270993232727, "eval_ranking_simple": 0.5196687579154968, "eval_regularize": 0.26957273483276367, "eval_runtime": 154.5383, "eval_samples_per_second": 37.466, "eval_steps_per_second": 3.125, "step": 5250 }, { "dpo_loss": 0.6899495720863342, "epoch": 9.929145016532829, "grad_norm": 66.40637873315407, "learning_rate": 1.151584243987147e-10, "logits": -1.2185667753219604, "logps": -88.12953186035156, "loss": 0.0876, "objective": 0.08826954662799835, "ranking_idealized": 0.518750011920929, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.512499988079071, "regularize": 0.019274592399597168, "step": 5255 }, { "dpo_loss": 0.6879016757011414, "epoch": 9.938592347661785, "grad_norm": 66.23811942100768, "learning_rate": 9.148599571262572e-11, "logits": -1.167487382888794, "logps": -89.59779357910156, "loss": 0.0873, "objective": 0.08775386959314346, "ranking_idealized": 0.5562499761581421, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.53125, "regularize": 0.018963690847158432, "step": 5260 }, { "dpo_loss": 0.6897536516189575, "epoch": 9.948039678790742, "grad_norm": 70.44524154982453, "learning_rate": 7.05339124249249e-11, "logits": -1.1897327899932861, "logps": -90.16870880126953, "loss": 0.0855, "objective": 0.08374921977519989, "ranking_idealized": 0.6187499761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5625, "regularize": 0.014773854985833168, "step": 5265 }, { "dpo_loss": 0.6920146942138672, "epoch": 9.957487009919697, "grad_norm": 66.76085795213623, "learning_rate": 5.2302402606702044e-11, "logits": -1.076805591583252, "logps": -91.03254699707031, "loss": 0.0876, "objective": 0.0894809290766716, "ranking_idealized": 0.5687500238418579, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.02027946338057518, "step": 5270 }, { "dpo_loss": 0.6896114349365234, "epoch": 9.966934341048654, "grad_norm": 70.14959939061058, "learning_rate": 3.679166471459716e-11, "logits": -1.2547842264175415, "logps": -88.06056213378906, "loss": 0.0844, "objective": 0.08376505225896835, "ranking_idealized": 0.46875, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.4749999940395355, "regularize": 0.014803911559283733, "step": 5275 }, { "dpo_loss": 0.6923837661743164, "epoch": 9.97638167217761, "grad_norm": 64.43154839869437, "learning_rate": 2.4001867588663293e-11, "logits": -1.1902258396148682, "logps": -91.7777328491211, "loss": 0.0856, "objective": 0.08763132989406586, "ranking_idealized": 0.643750011920929, "ranking_idealized_expo": 0.6000000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.018392954021692276, "step": 5280 }, { "dpo_loss": 0.6896318197250366, "epoch": 9.985829003306566, "grad_norm": 65.45521151304239, "learning_rate": 1.3933150450479159e-11, "logits": -1.1867663860321045, "logps": -87.43783569335938, "loss": 0.0853, "objective": 0.0830325335264206, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.46875, "ranking_simple": 0.46875, "regularize": 0.01406935416162014, "step": 5285 }, { "dpo_loss": 0.6888291239738464, "epoch": 9.995276334435522, "grad_norm": 65.46742735182674, "learning_rate": 6.585622901705834e-12, "logits": -1.1299092769622803, "logps": -89.06723022460938, "loss": 0.0866, "objective": 0.0879444032907486, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5562499761581421, "ranking_simple": 0.5562499761581421, "regularize": 0.019061479717493057, "step": 5290 }, { "epoch": 9.995276334435522, "step": 5290, "total_flos": 0.0, "train_loss": 0.15836801591183153, "train_runtime": 49865.2618, "train_samples_per_second": 10.188, "train_steps_per_second": 0.106 } ], "logging_steps": 5, "max_steps": 5290, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }