{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.992914501653283, "eval_steps": 50, "global_step": 1056, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 88378.40404084016, "learning_rate": 4.716981132075472e-08, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 288.6135, "objective": 277.7129821777344, "ranking_simple": 0.5833333134651184, "regularize": 0.6931471824645996, "step": 1, "wo_beta": 5.2708282470703125 }, { "dpo_loss": 0.6890408396720886, "epoch": 0.014170996693434105, "grad_norm": 83739.33254900719, "learning_rate": 2.358490566037736e-07, "logits": -1.429946780204773, "logps": -83.76575469970703, "loss": 299.2071, "objective": 287.1035461425781, "ranking_simple": 0.4895833432674408, "regularize": 0.6890408396720886, "step": 5, "wo_beta": 7.664237976074219 }, { "dpo_loss": 0.6906710267066956, "epoch": 0.02834199338686821, "grad_norm": 87871.922788759, "learning_rate": 4.716981132075472e-07, "logits": -1.4002543687820435, "logps": -84.44983673095703, "loss": 290.579, "objective": 288.7486572265625, "ranking_simple": 0.574999988079071, "regularize": 0.6906710267066956, "step": 10, "wo_beta": 5.951157569885254 }, { "dpo_loss": 0.719412624835968, "epoch": 0.042512990080302314, "grad_norm": 104628.71690566906, "learning_rate": 7.075471698113208e-07, "logits": -1.5360398292541504, "logps": -84.26319122314453, "loss": 299.0445, "objective": 310.48016357421875, "ranking_simple": 0.5708333253860474, "regularize": 0.719412624835968, "step": 15, "wo_beta": 6.623058795928955 }, { "dpo_loss": 0.6358036398887634, "epoch": 0.05668398677373642, "grad_norm": 98064.26779815872, "learning_rate": 9.433962264150944e-07, "logits": -1.3698803186416626, "logps": -83.57228088378906, "loss": 297.1517, "objective": 276.2193908691406, "ranking_simple": 0.48750001192092896, "regularize": 0.6358036398887634, "step": 20, "wo_beta": 8.147703170776367 }, { "dpo_loss": 0.7541502714157104, "epoch": 0.07085498346717052, "grad_norm": 117575.06586226003, "learning_rate": 1.179245283018868e-06, "logits": -1.3847885131835938, "logps": -83.47090148925781, "loss": 313.7878, "objective": 312.7436218261719, "ranking_simple": 0.5708333253860474, "regularize": 0.7541502714157104, "step": 25, "wo_beta": 6.331053256988525 }, { "dpo_loss": 0.8459195494651794, "epoch": 0.08502598016060463, "grad_norm": 83498.77667145648, "learning_rate": 1.4150943396226415e-06, "logits": -1.445679783821106, "logps": -83.64388275146484, "loss": 334.7166, "objective": 370.2745361328125, "ranking_simple": 0.49166667461395264, "regularize": 0.8459195494651794, "step": 30, "wo_beta": 8.90923023223877 }, { "dpo_loss": 0.8700669407844543, "epoch": 0.09919697685403873, "grad_norm": 87674.9381300825, "learning_rate": 1.650943396226415e-06, "logits": -1.4222424030303955, "logps": -83.19346618652344, "loss": 358.5594, "objective": 387.00079345703125, "ranking_simple": 0.5416666865348816, "regularize": 0.8700669407844543, "step": 35, "wo_beta": 6.324372291564941 }, { "dpo_loss": 0.7968003749847412, "epoch": 0.11336797354747284, "grad_norm": 94029.82232454565, "learning_rate": 1.8867924528301889e-06, "logits": -1.3892769813537598, "logps": -84.3359146118164, "loss": 335.1716, "objective": 350.38433837890625, "ranking_simple": 0.5, "regularize": 0.7968003749847412, "step": 40, "wo_beta": 6.651224136352539 }, { "dpo_loss": 0.8314542770385742, "epoch": 0.12753897024090693, "grad_norm": 107131.70166266784, "learning_rate": 2.1226415094339624e-06, "logits": -1.3818415403366089, "logps": -83.095458984375, "loss": 392.6496, "objective": 352.84722900390625, "ranking_simple": 0.512499988079071, "regularize": 0.8314542770385742, "step": 45, "wo_beta": 6.450110912322998 }, { "dpo_loss": 0.9717784523963928, "epoch": 0.14170996693434104, "grad_norm": 85511.36737100256, "learning_rate": 2.358490566037736e-06, "logits": -1.4050464630126953, "logps": -84.67338562011719, "loss": 421.2896, "objective": 429.39739990234375, "ranking_simple": 0.5249999761581421, "regularize": 0.9717784523963928, "step": 50, "wo_beta": 7.381195068359375 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.9812284708023071, "eval_logits": -1.3988816738128662, "eval_logps": -92.03357696533203, "eval_loss": 429.86468505859375, "eval_objective": 424.89910888671875, "eval_ranking_simple": 0.5243270993232727, "eval_regularize": 0.9812284708023071, "eval_runtime": 369.9673, "eval_samples_per_second": 15.65, "eval_steps_per_second": 1.306, "eval_wo_beta": 7.807825565338135, "step": 50 }, { "dpo_loss": 1.085899829864502, "epoch": 0.15588096362777515, "grad_norm": 86740.35819020051, "learning_rate": 2.5943396226415095e-06, "logits": -1.3797937631607056, "logps": -84.61592102050781, "loss": 438.6828, "objective": 554.507080078125, "ranking_simple": 0.5083333253860474, "regularize": 1.085899829864502, "step": 55, "wo_beta": 7.282232761383057 }, { "dpo_loss": 0.881396472454071, "epoch": 0.17005196032120926, "grad_norm": 73943.20904640444, "learning_rate": 2.830188679245283e-06, "logits": -1.3378223180770874, "logps": -82.09742736816406, "loss": 396.9731, "objective": 387.7957763671875, "ranking_simple": 0.5375000238418579, "regularize": 0.881396472454071, "step": 60, "wo_beta": 6.004281997680664 }, { "dpo_loss": 0.9917481541633606, "epoch": 0.18422295701464336, "grad_norm": 67818.91421902912, "learning_rate": 3.0660377358490567e-06, "logits": -1.4089940786361694, "logps": -82.27486419677734, "loss": 421.1799, "objective": 443.5271911621094, "ranking_simple": 0.5541666746139526, "regularize": 0.9917481541633606, "step": 65, "wo_beta": 6.279632091522217 }, { "dpo_loss": 0.911385178565979, "epoch": 0.19839395370807747, "grad_norm": 65454.0541533192, "learning_rate": 3.30188679245283e-06, "logits": -1.4254525899887085, "logps": -82.29732513427734, "loss": 383.1954, "objective": 402.9763488769531, "ranking_simple": 0.550000011920929, "regularize": 0.911385178565979, "step": 70, "wo_beta": 6.741910934448242 }, { "dpo_loss": 1.0663002729415894, "epoch": 0.21256495040151158, "grad_norm": 88434.22176378245, "learning_rate": 3.5377358490566038e-06, "logits": -1.327197551727295, "logps": -82.15841674804688, "loss": 495.8337, "objective": 501.70574951171875, "ranking_simple": 0.5541666746139526, "regularize": 1.0663002729415894, "step": 75, "wo_beta": 6.144163131713867 }, { "dpo_loss": 1.0284979343414307, "epoch": 0.22673594709494568, "grad_norm": 81588.2874349178, "learning_rate": 3.7735849056603777e-06, "logits": -1.3877789974212646, "logps": -82.84284210205078, "loss": 443.4796, "objective": 458.9221496582031, "ranking_simple": 0.5833333134651184, "regularize": 1.0284979343414307, "step": 80, "wo_beta": 5.538651466369629 }, { "dpo_loss": 1.2060002088546753, "epoch": 0.2409069437883798, "grad_norm": 50570.51985294035, "learning_rate": 4.009433962264152e-06, "logits": -1.325385570526123, "logps": -85.86609649658203, "loss": 454.4117, "objective": 512.2498168945312, "ranking_simple": 0.5708333253860474, "regularize": 1.2060002088546753, "step": 85, "wo_beta": 5.5450663566589355 }, { "dpo_loss": 0.8621365427970886, "epoch": 0.25507794048181387, "grad_norm": 64042.236837982855, "learning_rate": 4.245283018867925e-06, "logits": -1.3326818943023682, "logps": -87.87580108642578, "loss": 468.0205, "objective": 382.29150390625, "ranking_simple": 0.5874999761581421, "regularize": 0.8621365427970886, "step": 90, "wo_beta": 6.040257930755615 }, { "dpo_loss": 1.0820232629776, "epoch": 0.269248937175248, "grad_norm": 73856.79865687525, "learning_rate": 4.481132075471699e-06, "logits": -1.2569290399551392, "logps": -87.48131561279297, "loss": 451.8957, "objective": 499.0147705078125, "ranking_simple": 0.5208333134651184, "regularize": 1.0820232629776, "step": 95, "wo_beta": 7.041137218475342 }, { "dpo_loss": 0.9895243048667908, "epoch": 0.2834199338686821, "grad_norm": 69854.16899039333, "learning_rate": 4.716981132075472e-06, "logits": -1.1859853267669678, "logps": -85.07958221435547, "loss": 504.7676, "objective": 428.0093688964844, "ranking_simple": 0.5541666746139526, "regularize": 0.9895243048667908, "step": 100, "wo_beta": 6.106683731079102 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 1.2757266759872437, "eval_logits": -1.2545620203018188, "eval_logps": -92.60501861572266, "eval_loss": 586.443115234375, "eval_objective": 565.0955810546875, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 1.2757266759872437, "eval_runtime": 368.7119, "eval_samples_per_second": 15.703, "eval_steps_per_second": 1.31, "eval_wo_beta": 7.499707221984863, "step": 100 }, { "dpo_loss": 1.5464370250701904, "epoch": 0.2975909305621162, "grad_norm": 81924.04645083607, "learning_rate": 4.952830188679246e-06, "logits": -1.3069990873336792, "logps": -87.30802154541016, "loss": 621.3279, "objective": 697.1328125, "ranking_simple": 0.5874999761581421, "regularize": 1.5464370250701904, "step": 105, "wo_beta": 5.294255256652832 }, { "dpo_loss": 1.3242223262786865, "epoch": 0.3117619272555503, "grad_norm": 67213.20348371088, "learning_rate": 4.999781286194085e-06, "logits": -1.2264933586120605, "logps": -82.50062561035156, "loss": 590.9299, "objective": 569.9200439453125, "ranking_simple": 0.5208333134651184, "regularize": 1.3242223262786865, "step": 110, "wo_beta": 5.986863136291504 }, { "dpo_loss": 1.50898015499115, "epoch": 0.32593292394898443, "grad_norm": 66001.4469324612, "learning_rate": 4.998892826944418e-06, "logits": -1.1785295009613037, "logps": -78.3976821899414, "loss": 613.2373, "objective": 647.5435180664062, "ranking_simple": 0.5583333373069763, "regularize": 1.50898015499115, "step": 115, "wo_beta": 6.424834251403809 }, { "dpo_loss": 1.3131033182144165, "epoch": 0.3401039206424185, "grad_norm": 71675.69603134823, "learning_rate": 4.997321195347154e-06, "logits": -1.1258571147918701, "logps": -79.26954650878906, "loss": 580.6179, "objective": 548.9010009765625, "ranking_simple": 0.5625, "regularize": 1.3131033182144165, "step": 120, "wo_beta": 6.872457027435303 }, { "dpo_loss": 1.1034477949142456, "epoch": 0.35427491733585265, "grad_norm": 66579.26081621695, "learning_rate": 4.9950668210706795e-06, "logits": -1.193581223487854, "logps": -78.10294342041016, "loss": 503.3348, "objective": 437.7645568847656, "ranking_simple": 0.5583333373069763, "regularize": 1.1034477949142456, "step": 125, "wo_beta": 6.386569023132324 }, { "dpo_loss": 1.4382485151290894, "epoch": 0.3684459140292867, "grad_norm": 55084.45931854072, "learning_rate": 4.992130320438411e-06, "logits": -1.253204345703125, "logps": -73.60761260986328, "loss": 670.9436, "objective": 778.9769287109375, "ranking_simple": 0.637499988079071, "regularize": 1.4382485151290894, "step": 130, "wo_beta": 4.876318454742432 }, { "dpo_loss": 1.1328778266906738, "epoch": 0.3826169107227208, "grad_norm": 54685.712604550725, "learning_rate": 4.988512496260302e-06, "logits": -1.3233076333999634, "logps": -72.83162689208984, "loss": 579.4298, "objective": 505.5152282714844, "ranking_simple": 0.5208333134651184, "regularize": 1.1328778266906738, "step": 135, "wo_beta": 5.750979423522949 }, { "dpo_loss": 1.8627400398254395, "epoch": 0.39678790741615494, "grad_norm": 76839.6853222187, "learning_rate": 4.984214337613357e-06, "logits": -1.3366682529449463, "logps": -71.90711212158203, "loss": 685.0747, "objective": 835.1346435546875, "ranking_simple": 0.5416666865348816, "regularize": 1.8627400398254395, "step": 140, "wo_beta": 6.485233783721924 }, { "dpo_loss": 1.5502649545669556, "epoch": 0.410958904109589, "grad_norm": 66610.05959697074, "learning_rate": 4.979237019571235e-06, "logits": -1.3182580471038818, "logps": -74.45819854736328, "loss": 704.9736, "objective": 741.4519653320312, "ranking_simple": 0.6208333373069763, "regularize": 1.5502649545669556, "step": 145, "wo_beta": 5.113797187805176 }, { "dpo_loss": 1.3416301012039185, "epoch": 0.42512990080302315, "grad_norm": 57008.90120770728, "learning_rate": 4.97358190288299e-06, "logits": -1.2401268482208252, "logps": -76.36083984375, "loss": 647.489, "objective": 566.4555053710938, "ranking_simple": 0.5708333253860474, "regularize": 1.3416301012039185, "step": 150, "wo_beta": 7.6363630294799805 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 1.820163607597351, "eval_logits": -1.282083511352539, "eval_logps": -81.57704162597656, "eval_loss": 806.8840942382812, "eval_objective": 788.4058227539062, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 1.820163607597351, "eval_runtime": 370.4907, "eval_samples_per_second": 15.628, "eval_steps_per_second": 1.304, "eval_wo_beta": 7.2971673011779785, "step": 150 }, { "dpo_loss": 1.201590657234192, "epoch": 0.43930089749645723, "grad_norm": 53158.65039781518, "learning_rate": 4.967250533601059e-06, "logits": -1.2983914613723755, "logps": -76.4615707397461, "loss": 621.7593, "objective": 591.6034545898438, "ranking_simple": 0.5625, "regularize": 1.201590657234192, "step": 155, "wo_beta": 6.331362247467041 }, { "dpo_loss": 1.8209228515625, "epoch": 0.45347189418989137, "grad_norm": 58324.46781297966, "learning_rate": 4.9602446426585845e-06, "logits": -1.227155089378357, "logps": -76.60499572753906, "loss": 653.8297, "objective": 777.4675903320312, "ranking_simple": 0.5541666746139526, "regularize": 1.8209228515625, "step": 160, "wo_beta": 6.163790225982666 }, { "dpo_loss": 1.1324195861816406, "epoch": 0.46764289088332545, "grad_norm": 58174.78839479279, "learning_rate": 4.952566145396197e-06, "logits": -1.3118336200714111, "logps": -77.7535171508789, "loss": 655.7216, "objective": 464.7123107910156, "ranking_simple": 0.5958333611488342, "regularize": 1.1324195861816406, "step": 165, "wo_beta": 5.644231796264648 }, { "dpo_loss": 1.3175557851791382, "epoch": 0.4818138875767596, "grad_norm": 47074.837544146685, "learning_rate": 4.944217141038379e-06, "logits": -1.2680017948150635, "logps": -77.98702239990234, "loss": 553.8126, "objective": 550.3071899414062, "ranking_simple": 0.5375000238418579, "regularize": 1.3175557851791382, "step": 170, "wo_beta": 6.271345615386963 }, { "dpo_loss": 1.528406023979187, "epoch": 0.49598488427019366, "grad_norm": 51080.30959388274, "learning_rate": 4.935199912119558e-06, "logits": -1.1689633131027222, "logps": -78.72420501708984, "loss": 669.8391, "objective": 715.664794921875, "ranking_simple": 0.550000011920929, "regularize": 1.528406023979187, "step": 175, "wo_beta": 6.365441799163818 }, { "dpo_loss": 1.5439296960830688, "epoch": 0.5101558809636277, "grad_norm": 58721.23782221805, "learning_rate": 4.925516923860083e-06, "logits": -1.1620502471923828, "logps": -77.92072296142578, "loss": 615.0646, "objective": 768.5841674804688, "ranking_simple": 0.5166666507720947, "regularize": 1.5439296960830688, "step": 180, "wo_beta": 6.549993991851807 }, { "dpo_loss": 1.295415997505188, "epoch": 0.5243268776570619, "grad_norm": 57823.70553626485, "learning_rate": 4.9151708234922605e-06, "logits": -1.1856701374053955, "logps": -75.32164764404297, "loss": 501.8101, "objective": 566.3171997070312, "ranking_simple": 0.574999988079071, "regularize": 1.295415997505188, "step": 185, "wo_beta": 5.50905704498291 }, { "dpo_loss": 1.4275904893875122, "epoch": 0.538497874350496, "grad_norm": 71863.24536234359, "learning_rate": 4.904164439536626e-06, "logits": -1.189324140548706, "logps": -73.93627166748047, "loss": 635.339, "objective": 705.5938110351562, "ranking_simple": 0.6333333253860474, "regularize": 1.4275904893875122, "step": 190, "wo_beta": 5.4983296394348145 }, { "dpo_loss": 1.2087292671203613, "epoch": 0.5526688710439301, "grad_norm": 49718.992888304776, "learning_rate": 4.8925007810286555e-06, "logits": -1.1961599588394165, "logps": -73.1677474975586, "loss": 604.9664, "objective": 597.218994140625, "ranking_simple": 0.625, "regularize": 1.2087292671203613, "step": 195, "wo_beta": 5.489840507507324 }, { "dpo_loss": 1.2826271057128906, "epoch": 0.5668398677373642, "grad_norm": 47393.96595402916, "learning_rate": 4.880183036696123e-06, "logits": -1.1661118268966675, "logps": -72.84329986572266, "loss": 549.7892, "objective": 592.0448608398438, "ranking_simple": 0.5708333253860474, "regularize": 1.2826271057128906, "step": 200, "wo_beta": 6.384205341339111 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 1.888936996459961, "eval_logits": -1.1824791431427002, "eval_logps": -76.20477294921875, "eval_loss": 883.9945678710938, "eval_objective": 832.8786010742188, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 1.888936996459961, "eval_runtime": 375.7585, "eval_samples_per_second": 15.409, "eval_steps_per_second": 1.285, "eval_wo_beta": 7.189944267272949, "step": 200 }, { "dpo_loss": 1.4287751913070679, "epoch": 0.5810108644307983, "grad_norm": 61461.28761463611, "learning_rate": 4.867214574087338e-06, "logits": -0.9886723160743713, "logps": -72.71061706542969, "loss": 664.0259, "objective": 768.5003662109375, "ranking_simple": 0.5791666507720947, "regularize": 1.4287751913070679, "step": 205, "wo_beta": 6.914271831512451 }, { "dpo_loss": 1.3722679615020752, "epoch": 0.5951818611242324, "grad_norm": 56025.34447718605, "learning_rate": 4.853598938650487e-06, "logits": -1.004089593887329, "logps": -71.4341049194336, "loss": 678.2527, "objective": 627.8658447265625, "ranking_simple": 0.6166666746139526, "regularize": 1.3722679615020752, "step": 210, "wo_beta": 5.539144992828369 }, { "dpo_loss": 1.2253851890563965, "epoch": 0.6093528578176665, "grad_norm": 48935.729292866636, "learning_rate": 4.8393398527643495e-06, "logits": -1.0572926998138428, "logps": -71.53670501708984, "loss": 542.6101, "objective": 526.205810546875, "ranking_simple": 0.5666666626930237, "regularize": 1.2253851890563965, "step": 215, "wo_beta": 6.424313545227051 }, { "dpo_loss": 1.1790845394134521, "epoch": 0.6235238545111006, "grad_norm": 50799.22260603063, "learning_rate": 4.824441214720629e-06, "logits": -1.100419521331787, "logps": -72.95840454101562, "loss": 580.133, "objective": 527.5165405273438, "ranking_simple": 0.550000011920929, "regularize": 1.1790845394134521, "step": 220, "wo_beta": 6.990549087524414 }, { "dpo_loss": 1.325684666633606, "epoch": 0.6376948512045347, "grad_norm": 43237.99565272054, "learning_rate": 4.808907097658205e-06, "logits": -1.0687224864959717, "logps": -73.86213684082031, "loss": 599.2142, "objective": 594.8961791992188, "ranking_simple": 0.5708333253860474, "regularize": 1.325684666633606, "step": 225, "wo_beta": 6.574854373931885 }, { "dpo_loss": 1.0051558017730713, "epoch": 0.6518658478979689, "grad_norm": 46348.61858741176, "learning_rate": 4.7927417484495756e-06, "logits": -0.8813047409057617, "logps": -73.6087875366211, "loss": 497.2491, "objective": 420.7375793457031, "ranking_simple": 0.6416666507720947, "regularize": 1.0051558017730713, "step": 230, "wo_beta": 4.896794319152832 }, { "dpo_loss": 1.4625457525253296, "epoch": 0.6660368445914029, "grad_norm": 49199.93289752187, "learning_rate": 4.7759495865398035e-06, "logits": -0.757663369178772, "logps": -76.35986328125, "loss": 592.7314, "objective": 588.2656860351562, "ranking_simple": 0.574999988079071, "regularize": 1.4625457525253296, "step": 235, "wo_beta": 6.098875522613525 }, { "dpo_loss": 1.234517216682434, "epoch": 0.680207841284837, "grad_norm": 57833.981683398764, "learning_rate": 4.758535202738287e-06, "logits": -0.9619929790496826, "logps": -75.74427032470703, "loss": 554.4981, "objective": 601.7564086914062, "ranking_simple": 0.5958333611488342, "regularize": 1.234517216682434, "step": 240, "wo_beta": 6.564117908477783 }, { "dpo_loss": 1.3769992589950562, "epoch": 0.6943788379782712, "grad_norm": 53444.90688249006, "learning_rate": 4.740503357963676e-06, "logits": -0.990798830986023, "logps": -74.93412780761719, "loss": 543.6711, "objective": 609.6189575195312, "ranking_simple": 0.574999988079071, "regularize": 1.3769992589950562, "step": 245, "wo_beta": 5.855550765991211 }, { "dpo_loss": 1.1388071775436401, "epoch": 0.7085498346717053, "grad_norm": 46909.61647475604, "learning_rate": 4.721858981942284e-06, "logits": -0.8007088899612427, "logps": -74.202880859375, "loss": 598.0575, "objective": 542.016357421875, "ranking_simple": 0.6166666746139526, "regularize": 1.1388071775436401, "step": 250, "wo_beta": 5.385839939117432 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 2.010594367980957, "eval_logits": -1.049426794052124, "eval_logps": -79.15605926513672, "eval_loss": 912.8013916015625, "eval_objective": 878.098388671875, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 2.010594367980957, "eval_runtime": 369.3016, "eval_samples_per_second": 15.678, "eval_steps_per_second": 1.308, "eval_wo_beta": 7.141663551330566, "step": 250 }, { "dpo_loss": 1.0299155712127686, "epoch": 0.7227208313651393, "grad_norm": 34955.92659194729, "learning_rate": 4.702607171860354e-06, "logits": -1.0210615396499634, "logps": -74.22802734375, "loss": 541.174, "objective": 435.53558349609375, "ranking_simple": 0.5375000238418579, "regularize": 1.0299155712127686, "step": 255, "wo_beta": 6.036287307739258 }, { "dpo_loss": 1.2304381132125854, "epoch": 0.7368918280585735, "grad_norm": 43356.96437216858, "learning_rate": 4.682753190970533e-06, "logits": -0.9648634195327759, "logps": -76.73786163330078, "loss": 536.6063, "objective": 601.1241455078125, "ranking_simple": 0.5666666626930237, "regularize": 1.2304381132125854, "step": 260, "wo_beta": 6.554741382598877 }, { "dpo_loss": 1.1359996795654297, "epoch": 0.7510628247520076, "grad_norm": 42994.029974385, "learning_rate": 4.6623024671529555e-06, "logits": -0.904022753238678, "logps": -76.38091278076172, "loss": 493.3245, "objective": 537.385498046875, "ranking_simple": 0.6541666388511658, "regularize": 1.1359996795654297, "step": 265, "wo_beta": 5.8244123458862305 }, { "dpo_loss": 1.0946044921875, "epoch": 0.7652338214454416, "grad_norm": 47060.47697517327, "learning_rate": 4.641260591431315e-06, "logits": -0.954149067401886, "logps": -77.31507110595703, "loss": 584.1508, "objective": 473.41119384765625, "ranking_simple": 0.5375000238418579, "regularize": 1.0946044921875, "step": 270, "wo_beta": 5.909613609313965 }, { "dpo_loss": 1.2381278276443481, "epoch": 0.7794048181388757, "grad_norm": 50524.69195776784, "learning_rate": 4.61963331644433e-06, "logits": -0.8554438352584839, "logps": -76.56681060791016, "loss": 567.9371, "objective": 556.9308471679688, "ranking_simple": 0.5625, "regularize": 1.2381278276443481, "step": 275, "wo_beta": 5.9904327392578125 }, { "dpo_loss": 0.8290544748306274, "epoch": 0.7935758148323099, "grad_norm": 52340.88871917512, "learning_rate": 4.597426554873037e-06, "logits": -0.8973789811134338, "logps": -76.66759490966797, "loss": 468.9087, "objective": 375.5177307128906, "ranking_simple": 0.6000000238418579, "regularize": 0.8290544748306274, "step": 280, "wo_beta": 5.502625465393066 }, { "dpo_loss": 1.0164730548858643, "epoch": 0.807746811525744, "grad_norm": 55654.623332201154, "learning_rate": 4.574646377824316e-06, "logits": -0.8480007648468018, "logps": -78.14274597167969, "loss": 473.5023, "objective": 487.4198303222656, "ranking_simple": 0.5458333492279053, "regularize": 1.0164730548858643, "step": 285, "wo_beta": 6.672605037689209 }, { "dpo_loss": 1.0201401710510254, "epoch": 0.821917808219178, "grad_norm": 48563.30285486188, "learning_rate": 4.551299013171111e-06, "logits": -0.7914489507675171, "logps": -77.2682113647461, "loss": 444.8455, "objective": 464.7095642089844, "ranking_simple": 0.5666666626930237, "regularize": 1.0201401710510254, "step": 290, "wo_beta": 5.619294166564941 }, { "dpo_loss": 1.2212320566177368, "epoch": 0.8360888049126122, "grad_norm": 49233.92704031764, "learning_rate": 4.5273908438498e-06, "logits": -0.8363037705421448, "logps": -77.95922088623047, "loss": 599.433, "objective": 584.8712158203125, "ranking_simple": 0.5416666865348816, "regularize": 1.2212320566177368, "step": 295, "wo_beta": 6.878579139709473 }, { "dpo_loss": 0.9529095888137817, "epoch": 0.8502598016060463, "grad_norm": 38375.982543414415, "learning_rate": 4.502928406115152e-06, "logits": -0.7699640989303589, "logps": -77.4969482421875, "loss": 490.4698, "objective": 387.5968322753906, "ranking_simple": 0.625, "regularize": 0.9529095888137817, "step": 300, "wo_beta": 4.446579933166504 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 2.048867702484131, "eval_logits": -0.8242712616920471, "eval_logps": -84.34913635253906, "eval_loss": 908.3519287109375, "eval_objective": 883.9851684570312, "eval_ranking_simple": 0.5372670888900757, "eval_regularize": 2.048867702484131, "eval_runtime": 368.9144, "eval_samples_per_second": 15.695, "eval_steps_per_second": 1.309, "eval_wo_beta": 6.972108840942383, "step": 300 }, { "dpo_loss": 1.360299825668335, "epoch": 0.8644307982994804, "grad_norm": 49256.76350615258, "learning_rate": 4.477918387753388e-06, "logits": -0.6884523034095764, "logps": -79.74717712402344, "loss": 540.5464, "objective": 640.423828125, "ranking_simple": 0.625, "regularize": 1.360299825668335, "step": 305, "wo_beta": 5.2918877601623535 }, { "dpo_loss": 0.9228391051292419, "epoch": 0.8786017949929145, "grad_norm": 48740.801909647664, "learning_rate": 4.452367626253805e-06, "logits": -0.6559950709342957, "logps": -79.34391784667969, "loss": 420.4869, "objective": 424.25775146484375, "ranking_simple": 0.5916666388511658, "regularize": 0.9228391051292419, "step": 310, "wo_beta": 5.364611625671387 }, { "dpo_loss": 1.0072659254074097, "epoch": 0.8927727916863486, "grad_norm": 42307.68186238231, "learning_rate": 4.426283106939474e-06, "logits": -0.6370798945426941, "logps": -80.14945983886719, "loss": 505.2505, "objective": 464.4563903808594, "ranking_simple": 0.5541666746139526, "regularize": 1.0072659254074097, "step": 315, "wo_beta": 6.282584190368652 }, { "dpo_loss": 1.0113941431045532, "epoch": 0.9069437883797827, "grad_norm": 49217.69235824995, "learning_rate": 4.399671961057523e-06, "logits": -0.5728167295455933, "logps": -81.63493347167969, "loss": 549.8222, "objective": 494.4688415527344, "ranking_simple": 0.625, "regularize": 1.0113941431045532, "step": 320, "wo_beta": 4.408293724060059 }, { "dpo_loss": 0.9181217551231384, "epoch": 0.9211147850732169, "grad_norm": 37081.62708771448, "learning_rate": 4.372541463829524e-06, "logits": -0.6937563419342041, "logps": -80.65882873535156, "loss": 498.2751, "objective": 406.5204772949219, "ranking_simple": 0.5874999761581421, "regularize": 0.9181217551231384, "step": 325, "wo_beta": 5.4936604499816895 }, { "dpo_loss": 0.9474784135818481, "epoch": 0.9352857817666509, "grad_norm": 40839.28555485669, "learning_rate": 4.3448990324625244e-06, "logits": -0.5978609323501587, "logps": -79.6310806274414, "loss": 499.4487, "objective": 420.19354248046875, "ranking_simple": 0.5874999761581421, "regularize": 0.9474784135818481, "step": 330, "wo_beta": 5.275208473205566 }, { "dpo_loss": 0.8909139037132263, "epoch": 0.949456778460085, "grad_norm": 41412.745559313786, "learning_rate": 4.316752224121252e-06, "logits": -0.5733863711357117, "logps": -79.5725326538086, "loss": 420.0585, "objective": 373.74444580078125, "ranking_simple": 0.5958333611488342, "regularize": 0.8909139037132263, "step": 335, "wo_beta": 5.151965141296387 }, { "dpo_loss": 1.1936326026916504, "epoch": 0.9636277751535192, "grad_norm": 38902.519316501435, "learning_rate": 4.288108733862064e-06, "logits": -0.6429716944694519, "logps": -78.99500274658203, "loss": 501.9028, "objective": 553.547119140625, "ranking_simple": 0.5916666388511658, "regularize": 1.1936326026916504, "step": 340, "wo_beta": 5.893803119659424 }, { "dpo_loss": 0.8929412961006165, "epoch": 0.9777987718469532, "grad_norm": 38680.18948554596, "learning_rate": 4.2589763925291924e-06, "logits": -0.6908617615699768, "logps": -77.42713165283203, "loss": 443.8264, "objective": 408.8953552246094, "ranking_simple": 0.6666666865348816, "regularize": 0.8929412961006165, "step": 345, "wo_beta": 4.660811901092529 }, { "dpo_loss": 0.7659813761711121, "epoch": 0.9919697685403873, "grad_norm": 38727.42670719923, "learning_rate": 4.229363164613874e-06, "logits": -0.6147564649581909, "logps": -79.30657196044922, "loss": 374.0952, "objective": 361.5460510253906, "ranking_simple": 0.6791666746139526, "regularize": 0.7659813761711121, "step": 350, "wo_beta": 4.2715983390808105 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 2.082609176635742, "eval_logits": -0.7334761023521423, "eval_logps": -82.52086639404297, "eval_loss": 968.3278198242188, "eval_objective": 906.4931030273438, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 2.082609176635742, "eval_runtime": 369.2448, "eval_samples_per_second": 15.681, "eval_steps_per_second": 1.308, "eval_wo_beta": 6.938116550445557, "step": 350 }, { "dpo_loss": 0.7874710559844971, "epoch": 1.0061407652338215, "grad_norm": 21920.85892485651, "learning_rate": 4.199277146076933e-06, "logits": -0.7007929682731628, "logps": -77.06559753417969, "loss": 368.6515, "objective": 352.9415283203125, "ranking_simple": 0.6416666507720947, "regularize": 0.7874710559844971, "step": 355, "wo_beta": 4.749766826629639 }, { "dpo_loss": 0.7488501071929932, "epoch": 1.0203117619272555, "grad_norm": 30589.88972145914, "learning_rate": 4.168726562135432e-06, "logits": -0.6663420796394348, "logps": -77.70557403564453, "loss": 293.1907, "objective": 339.2917785644531, "ranking_simple": 0.637499988079071, "regularize": 0.7488501071929932, "step": 360, "wo_beta": 5.18883752822876 }, { "dpo_loss": 0.47425612807273865, "epoch": 1.0344827586206897, "grad_norm": 26429.526464050992, "learning_rate": 4.137719765013974e-06, "logits": -0.6962677240371704, "logps": -76.95773315429688, "loss": 268.2307, "objective": 244.91177368164062, "ranking_simple": 0.6333333253860474, "regularize": 0.47425612807273865, "step": 365, "wo_beta": 4.233913898468018 }, { "dpo_loss": 0.5232141017913818, "epoch": 1.0486537553141237, "grad_norm": 35534.75324777604, "learning_rate": 4.106265231661292e-06, "logits": -0.6801474094390869, "logps": -74.06124114990234, "loss": 243.6382, "objective": 228.08172607421875, "ranking_simple": 0.5958333611488342, "regularize": 0.5232141017913818, "step": 370, "wo_beta": 5.037619113922119 }, { "dpo_loss": 0.548820436000824, "epoch": 1.0628247520075578, "grad_norm": 27102.24167022124, "learning_rate": 4.074371561432731e-06, "logits": -0.7840978503227234, "logps": -75.09078216552734, "loss": 253.0529, "objective": 245.6653289794922, "ranking_simple": 0.6208333373069763, "regularize": 0.548820436000824, "step": 375, "wo_beta": 4.070898056030273 }, { "dpo_loss": 0.717989981174469, "epoch": 1.076995748700992, "grad_norm": 31679.22207192818, "learning_rate": 4.042047473739278e-06, "logits": -0.7510035037994385, "logps": -76.53946685791016, "loss": 309.9577, "objective": 332.1832275390625, "ranking_simple": 0.6875, "regularize": 0.717989981174469, "step": 380, "wo_beta": 4.098145008087158 }, { "dpo_loss": 0.5641272068023682, "epoch": 1.091166745394426, "grad_norm": 24366.847525226553, "learning_rate": 4.009301805663752e-06, "logits": -0.6073976159095764, "logps": -75.09868621826172, "loss": 265.7304, "objective": 296.70831298828125, "ranking_simple": 0.6291666626930237, "regularize": 0.5641272068023682, "step": 385, "wo_beta": 4.445924758911133 }, { "dpo_loss": 0.6449456810951233, "epoch": 1.10533774208786, "grad_norm": 23048.879310786837, "learning_rate": 3.976143509544843e-06, "logits": -0.5609559416770935, "logps": -76.08757781982422, "loss": 222.6515, "objective": 276.19744873046875, "ranking_simple": 0.6791666746139526, "regularize": 0.6449456810951233, "step": 390, "wo_beta": 4.739548683166504 }, { "dpo_loss": 0.5751555562019348, "epoch": 1.1195087387812943, "grad_norm": 31852.53024343298, "learning_rate": 3.9425816505296254e-06, "logits": -0.582244873046875, "logps": -75.46802520751953, "loss": 288.2667, "objective": 265.84442138671875, "ranking_simple": 0.6291666626930237, "regularize": 0.5751555562019348, "step": 395, "wo_beta": 4.211004734039307 }, { "dpo_loss": 0.5259193778038025, "epoch": 1.1336797354747283, "grad_norm": 28994.715243763803, "learning_rate": 3.908625404095242e-06, "logits": -0.6348000764846802, "logps": -74.51661682128906, "loss": 270.3782, "objective": 223.46231079101562, "ranking_simple": 0.6875, "regularize": 0.5259193778038025, "step": 400, "wo_beta": 3.843280553817749 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 2.1841752529144287, "eval_logits": -0.6857498288154602, "eval_logps": -79.62760925292969, "eval_loss": 980.7469482421875, "eval_objective": 943.677734375, "eval_ranking_simple": 0.531573474407196, "eval_regularize": 2.1841752529144287, "eval_runtime": 375.6251, "eval_samples_per_second": 15.414, "eval_steps_per_second": 1.286, "eval_wo_beta": 7.058492660522461, "step": 400 }, { "dpo_loss": 0.6943067908287048, "epoch": 1.1478507321681626, "grad_norm": 31867.494723376392, "learning_rate": 3.8742840535404155e-06, "logits": -0.5997341871261597, "logps": -76.28077697753906, "loss": 271.2819, "objective": 352.92449951171875, "ranking_simple": 0.5833333134651184, "regularize": 0.6943067908287048, "step": 405, "wo_beta": 5.699499607086182 }, { "dpo_loss": 0.8418512344360352, "epoch": 1.1620217288615966, "grad_norm": 29842.834539174277, "learning_rate": 3.839566987447492e-06, "logits": -0.6471078991889954, "logps": -75.29569244384766, "loss": 296.2768, "objective": 396.2966613769531, "ranking_simple": 0.6291666626930237, "regularize": 0.8418512344360352, "step": 410, "wo_beta": 6.141425132751465 }, { "dpo_loss": 0.5453721284866333, "epoch": 1.1761927255550306, "grad_norm": 27018.957385156904, "learning_rate": 3.8044836971156935e-06, "logits": -0.5671236515045166, "logps": -74.40109252929688, "loss": 273.0484, "objective": 220.66055297851562, "ranking_simple": 0.6208333373069763, "regularize": 0.5453721284866333, "step": 415, "wo_beta": 5.825514793395996 }, { "dpo_loss": 0.5288990139961243, "epoch": 1.1903637222484649, "grad_norm": 25344.093819012876, "learning_rate": 3.7690437739662928e-06, "logits": -0.5559112429618835, "logps": -75.0554428100586, "loss": 249.2432, "objective": 244.75767517089844, "ranking_simple": 0.6291666626930237, "regularize": 0.5288990139961243, "step": 420, "wo_beta": 5.291681289672852 }, { "dpo_loss": 0.803754448890686, "epoch": 1.204534718941899, "grad_norm": 29915.434557425604, "learning_rate": 3.7332569069204127e-06, "logits": -0.6134353876113892, "logps": -73.4056625366211, "loss": 277.3619, "objective": 350.0005187988281, "ranking_simple": 0.6416666507720947, "regularize": 0.803754448890686, "step": 425, "wo_beta": 4.601781368255615 }, { "dpo_loss": 0.781599760055542, "epoch": 1.2187057156353331, "grad_norm": 34073.98431226179, "learning_rate": 3.697132879750174e-06, "logits": -0.6061522364616394, "logps": -75.91817474365234, "loss": 319.8952, "objective": 359.3941650390625, "ranking_simple": 0.6541666388511658, "regularize": 0.781599760055542, "step": 430, "wo_beta": 5.290956974029541 }, { "dpo_loss": 0.5502253174781799, "epoch": 1.2328767123287672, "grad_norm": 38063.822960677266, "learning_rate": 3.6606815684039098e-06, "logits": -0.5053932666778564, "logps": -75.83305358886719, "loss": 288.8541, "objective": 226.22280883789062, "ranking_simple": 0.6708333492279053, "regularize": 0.5502253174781799, "step": 435, "wo_beta": 4.12706184387207 }, { "dpo_loss": 0.6821548342704773, "epoch": 1.2470477090222012, "grad_norm": 33366.98311460532, "learning_rate": 3.6239129383061764e-06, "logits": -0.5692261457443237, "logps": -76.28325653076172, "loss": 306.1993, "objective": 302.21856689453125, "ranking_simple": 0.6875, "regularize": 0.6821548342704773, "step": 440, "wo_beta": 4.461844444274902 }, { "dpo_loss": 0.85939621925354, "epoch": 1.2612187057156352, "grad_norm": 39164.29912052594, "learning_rate": 3.586837041633312e-06, "logits": -0.627022385597229, "logps": -76.72431945800781, "loss": 321.7045, "objective": 412.44061279296875, "ranking_simple": 0.675000011920929, "regularize": 0.85939621925354, "step": 445, "wo_beta": 4.648702621459961 }, { "dpo_loss": 0.5442861914634705, "epoch": 1.2753897024090695, "grad_norm": 30514.25628778678, "learning_rate": 3.5494640145652647e-06, "logits": -0.8091104626655579, "logps": -75.52713775634766, "loss": 260.6353, "objective": 219.17945861816406, "ranking_simple": 0.6333333253860474, "regularize": 0.5442861914634705, "step": 450, "wo_beta": 4.987157821655273 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 2.075345277786255, "eval_logits": -0.8703874945640564, "eval_logps": -79.30490112304688, "eval_loss": 933.441650390625, "eval_objective": 893.1430053710938, "eval_ranking_simple": 0.5357142686843872, "eval_regularize": 2.075345277786255, "eval_runtime": 369.7717, "eval_samples_per_second": 15.658, "eval_steps_per_second": 1.306, "eval_wo_beta": 6.9555816650390625, "step": 450 }, { "dpo_loss": 0.6076017022132874, "epoch": 1.2895606991025035, "grad_norm": 30068.494295042852, "learning_rate": 3.511804074514468e-06, "logits": -0.7256959676742554, "logps": -75.7904052734375, "loss": 274.9153, "objective": 268.743896484375, "ranking_simple": 0.5833333134651184, "regularize": 0.6076017022132874, "step": 455, "wo_beta": 4.931685924530029 }, { "dpo_loss": 0.5776770710945129, "epoch": 1.3037316957959377, "grad_norm": 33947.98253416442, "learning_rate": 3.4738675173325008e-06, "logits": -0.8186031579971313, "logps": -75.09860229492188, "loss": 298.7045, "objective": 264.06005859375, "ranking_simple": 0.612500011920929, "regularize": 0.5776770710945129, "step": 460, "wo_beta": 5.527124881744385 }, { "dpo_loss": 0.5068582892417908, "epoch": 1.3179026924893718, "grad_norm": 27985.15602475436, "learning_rate": 3.435664714495301e-06, "logits": -0.8312460780143738, "logps": -73.1260757446289, "loss": 265.522, "objective": 230.74436950683594, "ranking_simple": 0.6166666746139526, "regularize": 0.5068582892417908, "step": 465, "wo_beta": 4.814398765563965 }, { "dpo_loss": 0.6723037362098694, "epoch": 1.3320736891828058, "grad_norm": 26533.816181899983, "learning_rate": 3.397206110267713e-06, "logits": -0.8928317427635193, "logps": -72.38593292236328, "loss": 298.2945, "objective": 296.0791931152344, "ranking_simple": 0.6541666388511658, "regularize": 0.6723037362098694, "step": 470, "wo_beta": 4.333133220672607 }, { "dpo_loss": 0.5037826299667358, "epoch": 1.34624468587624, "grad_norm": 35654.842705857875, "learning_rate": 3.3585022188481247e-06, "logits": -0.7810119390487671, "logps": -74.3759994506836, "loss": 283.622, "objective": 209.9154815673828, "ranking_simple": 0.675000011920929, "regularize": 0.5037826299667358, "step": 475, "wo_beta": 4.845743656158447 }, { "dpo_loss": 0.5474976301193237, "epoch": 1.360415682569674, "grad_norm": 35159.884724394884, "learning_rate": 3.3195636214939943e-06, "logits": -0.7756078839302063, "logps": -75.58786010742188, "loss": 288.5159, "objective": 257.5296936035156, "ranking_simple": 0.612500011920929, "regularize": 0.5474976301193237, "step": 480, "wo_beta": 5.6963653564453125 }, { "dpo_loss": 0.5475578308105469, "epoch": 1.3745866792631083, "grad_norm": 45395.4629312106, "learning_rate": 3.2804009636290403e-06, "logits": -0.7321893572807312, "logps": -75.10790252685547, "loss": 256.6031, "objective": 262.57879638671875, "ranking_simple": 0.6083333492279053, "regularize": 0.5475578308105469, "step": 485, "wo_beta": 5.356605052947998 }, { "dpo_loss": 0.7748611569404602, "epoch": 1.3887576759565423, "grad_norm": 33385.64413662957, "learning_rate": 3.2410249519328848e-06, "logits": -0.7029439806938171, "logps": -77.96426391601562, "loss": 309.7623, "objective": 388.19610595703125, "ranking_simple": 0.6333333253860474, "regularize": 0.7748611569404602, "step": 490, "wo_beta": 4.442887783050537 }, { "dpo_loss": 0.5730568170547485, "epoch": 1.4029286726499763, "grad_norm": 41910.67029527795, "learning_rate": 3.201446351413958e-06, "logits": -0.7370147109031677, "logps": -78.2493667602539, "loss": 288.5006, "objective": 271.5208740234375, "ranking_simple": 0.6000000238418579, "regularize": 0.5730568170547485, "step": 495, "wo_beta": 5.084538459777832 }, { "dpo_loss": 0.6209725141525269, "epoch": 1.4170996693434104, "grad_norm": 38353.55717005552, "learning_rate": 3.1616759824664543e-06, "logits": -0.6797351837158203, "logps": -76.69928741455078, "loss": 272.6055, "objective": 298.72381591796875, "ranking_simple": 0.6333333253860474, "regularize": 0.6209725141525269, "step": 500, "wo_beta": 4.246993541717529 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 2.0877864360809326, "eval_logits": -0.8079127669334412, "eval_logps": -81.43925476074219, "eval_loss": 950.2913818359375, "eval_objective": 901.2256469726562, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 2.0877864360809326, "eval_runtime": 368.6401, "eval_samples_per_second": 15.706, "eval_steps_per_second": 1.31, "eval_wo_beta": 6.822662830352783, "step": 500 }, { "dpo_loss": 0.48541003465652466, "epoch": 1.4312706660368446, "grad_norm": 29544.984605162008, "learning_rate": 3.121724717912138e-06, "logits": -0.7053753733634949, "logps": -77.21398162841797, "loss": 216.0385, "objective": 198.67111206054688, "ranking_simple": 0.6541666388511658, "regularize": 0.48541003465652466, "step": 505, "wo_beta": 4.210334300994873 }, { "dpo_loss": 0.5575286149978638, "epoch": 1.4454416627302786, "grad_norm": 31963.434830208436, "learning_rate": 3.081603480027826e-06, "logits": -0.7207833528518677, "logps": -77.88783264160156, "loss": 271.1889, "objective": 220.0654296875, "ranking_simple": 0.6625000238418579, "regularize": 0.5575286149978638, "step": 510, "wo_beta": 4.390309810638428 }, { "dpo_loss": 0.743520200252533, "epoch": 1.4596126594237129, "grad_norm": 27959.80538905989, "learning_rate": 3.04132323755935e-06, "logits": -0.7187339067459106, "logps": -79.07169342041016, "loss": 318.8216, "objective": 342.0149841308594, "ranking_simple": 0.5625, "regularize": 0.743520200252533, "step": 515, "wo_beta": 6.24213981628418 }, { "dpo_loss": 0.47003206610679626, "epoch": 1.473783656117147, "grad_norm": 27496.616543577868, "learning_rate": 3.0008950027228035e-06, "logits": -0.6592309474945068, "logps": -76.95936584472656, "loss": 192.6094, "objective": 205.1455535888672, "ranking_simple": 0.6499999761581421, "regularize": 0.47003206610679626, "step": 520, "wo_beta": 4.843044757843018 }, { "dpo_loss": 0.36703598499298096, "epoch": 1.487954652810581, "grad_norm": 26375.77965551739, "learning_rate": 2.960329828193918e-06, "logits": -0.5630077123641968, "logps": -76.27136993408203, "loss": 240.7371, "objective": 136.03848266601562, "ranking_simple": 0.6625000238418579, "regularize": 0.36703598499298096, "step": 525, "wo_beta": 4.377591609954834 }, { "dpo_loss": 0.5440438985824585, "epoch": 1.5021256495040152, "grad_norm": 27571.64273275317, "learning_rate": 2.9196388040863695e-06, "logits": -0.7446085810661316, "logps": -78.13150787353516, "loss": 252.6264, "objective": 210.30335998535156, "ranking_simple": 0.637499988079071, "regularize": 0.5440438985824585, "step": 530, "wo_beta": 4.725710868835449 }, { "dpo_loss": 0.4591270685195923, "epoch": 1.5162966461974492, "grad_norm": 29879.356657087457, "learning_rate": 2.8788330549198512e-06, "logits": -0.7952173352241516, "logps": -76.83704376220703, "loss": 190.3222, "objective": 189.28045654296875, "ranking_simple": 0.6416666507720947, "regularize": 0.4591270685195923, "step": 535, "wo_beta": 4.304568290710449 }, { "dpo_loss": 0.7635994553565979, "epoch": 1.5304676428908834, "grad_norm": 32950.397237122976, "learning_rate": 2.8379237365787426e-06, "logits": -0.7304627299308777, "logps": -77.21965789794922, "loss": 271.3824, "objective": 302.41015625, "ranking_simple": 0.6833333373069763, "regularize": 0.7635994553565979, "step": 540, "wo_beta": 3.8116295337677 }, { "dpo_loss": 0.6426182389259338, "epoch": 1.5446386395843175, "grad_norm": 29698.866901475067, "learning_rate": 2.7969220332622004e-06, "logits": -0.8022263646125793, "logps": -78.44091796875, "loss": 268.7311, "objective": 240.541015625, "ranking_simple": 0.6958333253860474, "regularize": 0.6426182389259338, "step": 545, "wo_beta": 3.3054466247558594 }, { "dpo_loss": 0.4164026081562042, "epoch": 1.5588096362777515, "grad_norm": 22464.97408129426, "learning_rate": 2.7558391544265127e-06, "logits": -0.8392809629440308, "logps": -78.80281829833984, "loss": 201.6789, "objective": 177.04327392578125, "ranking_simple": 0.612500011920929, "regularize": 0.4164026081562042, "step": 550, "wo_beta": 4.627588272094727 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 2.06260085105896, "eval_logits": -0.861229658126831, "eval_logps": -82.88394165039062, "eval_loss": 942.4044799804688, "eval_objective": 899.7697143554688, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 2.06260085105896, "eval_runtime": 374.5541, "eval_samples_per_second": 15.458, "eval_steps_per_second": 1.29, "eval_wo_beta": 6.859459400177002, "step": 550 }, { "dpo_loss": 0.6262320280075073, "epoch": 1.5729806329711855, "grad_norm": 30721.707672058405, "learning_rate": 2.714686331720543e-06, "logits": -0.6610599756240845, "logps": -79.22738647460938, "loss": 265.0857, "objective": 308.8701171875, "ranking_simple": 0.6291666626930237, "regularize": 0.6262320280075073, "step": 555, "wo_beta": 4.726518154144287 }, { "dpo_loss": 0.49044954776763916, "epoch": 1.5871516296646198, "grad_norm": 30519.863800131625, "learning_rate": 2.6734748159151104e-06, "logits": -0.6557633876800537, "logps": -78.31670379638672, "loss": 249.957, "objective": 215.1184539794922, "ranking_simple": 0.6583333611488342, "regularize": 0.49044954776763916, "step": 560, "wo_beta": 4.984176158905029 }, { "dpo_loss": 0.49784523248672485, "epoch": 1.601322626358054, "grad_norm": 32552.21298139843, "learning_rate": 2.632215873827142e-06, "logits": -0.6383572220802307, "logps": -78.56299591064453, "loss": 242.473, "objective": 225.4641571044922, "ranking_simple": 0.6083333492279053, "regularize": 0.49784523248672485, "step": 565, "wo_beta": 5.1718597412109375 }, { "dpo_loss": 0.42994922399520874, "epoch": 1.615493623051488, "grad_norm": 34165.36275325673, "learning_rate": 2.5909207852394363e-06, "logits": -0.7592962384223938, "logps": -78.97236633300781, "loss": 189.9181, "objective": 185.12489318847656, "ranking_simple": 0.6708333492279053, "regularize": 0.42994922399520874, "step": 570, "wo_beta": 4.939838886260986 }, { "dpo_loss": 0.7586291432380676, "epoch": 1.629664619744922, "grad_norm": 35161.73370280744, "learning_rate": 2.5496008398168844e-06, "logits": -0.7024948596954346, "logps": -76.8245620727539, "loss": 314.04, "objective": 396.62518310546875, "ranking_simple": 0.6958333253860474, "regularize": 0.7586291432380676, "step": 575, "wo_beta": 4.421546936035156 }, { "dpo_loss": 0.767590343952179, "epoch": 1.643835616438356, "grad_norm": 32708.853726410838, "learning_rate": 2.508267334019988e-06, "logits": -0.6832980513572693, "logps": -76.89154815673828, "loss": 278.9917, "objective": 312.0636901855469, "ranking_simple": 0.6583333611488342, "regularize": 0.767590343952179, "step": 580, "wo_beta": 4.10227108001709 }, { "dpo_loss": 0.3702860474586487, "epoch": 1.6580066131317903, "grad_norm": 31637.02640388039, "learning_rate": 2.46693156801652e-06, "logits": -0.5593028664588928, "logps": -76.0708999633789, "loss": 198.3549, "objective": 170.7590789794922, "ranking_simple": 0.6000000238418579, "regularize": 0.3702860474586487, "step": 585, "wo_beta": 4.9678053855896 }, { "dpo_loss": 0.475857138633728, "epoch": 1.6721776098252243, "grad_norm": 23355.06704017027, "learning_rate": 2.4256048425921693e-06, "logits": -0.5719855427742004, "logps": -76.99803161621094, "loss": 231.7539, "objective": 203.09518432617188, "ranking_simple": 0.6625000238418579, "regularize": 0.475857138633728, "step": 590, "wo_beta": 4.2729363441467285 }, { "dpo_loss": 0.5022611618041992, "epoch": 1.6863486065186586, "grad_norm": 24151.29042367439, "learning_rate": 2.384298456061023e-06, "logits": -0.5644751787185669, "logps": -77.0865478515625, "loss": 221.3199, "objective": 220.84716796875, "ranking_simple": 0.6416666507720947, "regularize": 0.5022611618041992, "step": 595, "wo_beta": 5.112887382507324 }, { "dpo_loss": 0.4297979474067688, "epoch": 1.7005196032120926, "grad_norm": 31821.391833657053, "learning_rate": 2.3430237011767166e-06, "logits": -0.6274603009223938, "logps": -77.14356231689453, "loss": 190.6931, "objective": 182.52053833007812, "ranking_simple": 0.6958333253860474, "regularize": 0.4297979474067688, "step": 600, "wo_beta": 3.9301912784576416 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 2.0212671756744385, "eval_logits": -0.7143464684486389, "eval_logps": -80.58209228515625, "eval_loss": 909.0859375, "eval_objective": 874.9575805664062, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 2.0212671756744385, "eval_runtime": 368.0708, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.312, "eval_wo_beta": 6.8604817390441895, "step": 600 }, { "dpo_loss": 0.5198889374732971, "epoch": 1.7146905999055266, "grad_norm": 30026.01051762202, "learning_rate": 2.30179186204511e-06, "logits": -0.5722830891609192, "logps": -77.72653198242188, "loss": 213.3847, "objective": 228.3468780517578, "ranking_simple": 0.6291666626930237, "regularize": 0.5198889374732971, "step": 605, "wo_beta": 5.703073978424072 }, { "dpo_loss": 0.36141520738601685, "epoch": 1.7288615965989607, "grad_norm": 40011.17376243372, "learning_rate": 2.2606142110393248e-06, "logits": -0.626063883304596, "logps": -76.1588363647461, "loss": 211.2466, "objective": 149.7343292236328, "ranking_simple": 0.6791666746139526, "regularize": 0.36141520738601685, "step": 610, "wo_beta": 4.233180522918701 }, { "dpo_loss": 0.42948976159095764, "epoch": 1.743032593292395, "grad_norm": 29635.739966818786, "learning_rate": 2.2195020057179897e-06, "logits": -0.6392956972122192, "logps": -76.81178283691406, "loss": 225.1069, "objective": 180.2233428955078, "ranking_simple": 0.6166666746139526, "regularize": 0.42948976159095764, "step": 615, "wo_beta": 5.672121524810791 }, { "dpo_loss": 0.5156412720680237, "epoch": 1.7572035899858292, "grad_norm": 30208.788539603607, "learning_rate": 2.1784664857475356e-06, "logits": -0.6532320380210876, "logps": -77.48053741455078, "loss": 264.541, "objective": 242.69305419921875, "ranking_simple": 0.6666666865348816, "regularize": 0.5156412720680237, "step": 620, "wo_beta": 4.189736843109131 }, { "dpo_loss": 0.5349311828613281, "epoch": 1.7713745866792632, "grad_norm": 39380.903198687505, "learning_rate": 2.1375188698293855e-06, "logits": -0.5855329036712646, "logps": -76.99386596679688, "loss": 262.5758, "objective": 243.1309356689453, "ranking_simple": 0.6291666626930237, "regularize": 0.5349311828613281, "step": 625, "wo_beta": 4.572874546051025 }, { "dpo_loss": 0.6499994993209839, "epoch": 1.7855455833726972, "grad_norm": 33586.854859622, "learning_rate": 2.096670352632873e-06, "logits": -0.6419364809989929, "logps": -78.828857421875, "loss": 222.7364, "objective": 283.2447204589844, "ranking_simple": 0.5958333611488342, "regularize": 0.6499994993209839, "step": 630, "wo_beta": 5.4850687980651855 }, { "dpo_loss": 0.350009948015213, "epoch": 1.7997165800661312, "grad_norm": 25946.609358155, "learning_rate": 2.0559321017347286e-06, "logits": -0.7381666898727417, "logps": -77.83517456054688, "loss": 231.875, "objective": 173.66685485839844, "ranking_simple": 0.699999988079071, "regularize": 0.350009948015213, "step": 635, "wo_beta": 4.173274517059326 }, { "dpo_loss": 0.4921790361404419, "epoch": 1.8138875767595655, "grad_norm": 34774.65383490848, "learning_rate": 2.01531525456598e-06, "logits": -0.7322004437446594, "logps": -78.3251724243164, "loss": 220.5483, "objective": 208.82156372070312, "ranking_simple": 0.6541666388511658, "regularize": 0.4921790361404419, "step": 640, "wo_beta": 4.936450481414795 }, { "dpo_loss": 0.46808040142059326, "epoch": 1.8280585734529995, "grad_norm": 32970.96122285086, "learning_rate": 1.974830915367086e-06, "logits": -0.7492889165878296, "logps": -78.48296356201172, "loss": 234.5868, "objective": 203.17454528808594, "ranking_simple": 0.6708333492279053, "regularize": 0.46808040142059326, "step": 645, "wo_beta": 4.602018356323242 }, { "dpo_loss": 0.5371274352073669, "epoch": 1.8422295701464337, "grad_norm": 35743.17684775322, "learning_rate": 1.93449015215215e-06, "logits": -0.779839277267456, "logps": -78.36189270019531, "loss": 308.8635, "objective": 253.41427612304688, "ranking_simple": 0.6625000238418579, "regularize": 0.5371274352073669, "step": 650, "wo_beta": 3.7921812534332275 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 1.975651741027832, "eval_logits": -0.7771020531654358, "eval_logps": -81.49595642089844, "eval_loss": 903.3455810546875, "eval_objective": 858.396728515625, "eval_ranking_simple": 0.5341615080833435, "eval_regularize": 1.975651741027832, "eval_runtime": 368.8643, "eval_samples_per_second": 15.697, "eval_steps_per_second": 1.309, "eval_wo_beta": 6.74319314956665, "step": 650 }, { "dpo_loss": 0.47425001859664917, "epoch": 1.8564005668398678, "grad_norm": 32301.135893099767, "learning_rate": 1.8943039936830347e-06, "logits": -0.6750093698501587, "logps": -75.62101745605469, "loss": 194.2043, "objective": 181.9368133544922, "ranking_simple": 0.6291666626930237, "regularize": 0.47425001859664917, "step": 655, "wo_beta": 4.659397602081299 }, { "dpo_loss": 0.5889893770217896, "epoch": 1.8705715635333018, "grad_norm": 29198.03257069531, "learning_rate": 1.8542834264542091e-06, "logits": -0.6569967865943909, "logps": -78.80994415283203, "loss": 197.3959, "objective": 235.9475555419922, "ranking_simple": 0.6666666865348816, "regularize": 0.5889893770217896, "step": 660, "wo_beta": 5.009462356567383 }, { "dpo_loss": 0.5106583833694458, "epoch": 1.8847425602267358, "grad_norm": 35479.47574916255, "learning_rate": 1.814439391689151e-06, "logits": -0.6794141530990601, "logps": -76.54640197753906, "loss": 243.1572, "objective": 230.056640625, "ranking_simple": 0.6958333253860474, "regularize": 0.5106583833694458, "step": 665, "wo_beta": 4.178842544555664 }, { "dpo_loss": 0.4586184322834015, "epoch": 1.89891355692017, "grad_norm": 23231.111354011602, "learning_rate": 1.7747827823491253e-06, "logits": -0.6216932535171509, "logps": -75.05060577392578, "loss": 175.5592, "objective": 206.04708862304688, "ranking_simple": 0.5833333134651184, "regularize": 0.4586184322834015, "step": 670, "wo_beta": 5.662291049957275 }, { "dpo_loss": 0.4679652154445648, "epoch": 1.9130845536136043, "grad_norm": 27735.952169830267, "learning_rate": 1.7353244401551566e-06, "logits": -0.6084102392196655, "logps": -75.44328308105469, "loss": 219.5326, "objective": 205.1367645263672, "ranking_simple": 0.6875, "regularize": 0.4679652154445648, "step": 675, "wo_beta": 3.7292354106903076 }, { "dpo_loss": 0.4869479835033417, "epoch": 1.9272555503070383, "grad_norm": 34188.342920935414, "learning_rate": 1.6960751526240122e-06, "logits": -0.6308152675628662, "logps": -77.12525939941406, "loss": 209.8948, "objective": 227.87367248535156, "ranking_simple": 0.6833333373069763, "regularize": 0.4869479835033417, "step": 680, "wo_beta": 4.258009433746338 }, { "dpo_loss": 0.41189083456993103, "epoch": 1.9414265470004723, "grad_norm": 22728.798141567488, "learning_rate": 1.6570456501189996e-06, "logits": -0.5937235355377197, "logps": -75.273681640625, "loss": 158.1766, "objective": 177.9638671875, "ranking_simple": 0.5916666388511658, "regularize": 0.41189083456993103, "step": 685, "wo_beta": 5.404563903808594 }, { "dpo_loss": 0.36097389459609985, "epoch": 1.9555975436939064, "grad_norm": 28163.987666291356, "learning_rate": 1.6182466029163974e-06, "logits": -0.5778710842132568, "logps": -76.22671508789062, "loss": 197.674, "objective": 151.4131622314453, "ranking_simple": 0.6458333134651184, "regularize": 0.36097389459609985, "step": 690, "wo_beta": 4.349457263946533 }, { "dpo_loss": 0.4063621461391449, "epoch": 1.9697685403873406, "grad_norm": 32613.751926495792, "learning_rate": 1.5796886182883053e-06, "logits": -0.5166969895362854, "logps": -77.0401382446289, "loss": 183.2558, "objective": 182.5120391845703, "ranking_simple": 0.6083333492279053, "regularize": 0.4063621461391449, "step": 695, "wo_beta": 5.3592329025268555 }, { "dpo_loss": 0.3167162537574768, "epoch": 1.9839395370807746, "grad_norm": 38189.51001873364, "learning_rate": 1.541382237602721e-06, "logits": -0.4650060832500458, "logps": -77.90335845947266, "loss": 176.7641, "objective": 123.92158508300781, "ranking_simple": 0.6666666865348816, "regularize": 0.3167162537574768, "step": 700, "wo_beta": 4.42483377456665 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 1.9719160795211792, "eval_logits": -0.66416335105896, "eval_logps": -80.92808532714844, "eval_loss": 901.6802368164062, "eval_objective": 855.0221557617188, "eval_ranking_simple": 0.5398550629615784, "eval_regularize": 1.9719160795211792, "eval_runtime": 368.757, "eval_samples_per_second": 15.701, "eval_steps_per_second": 1.31, "eval_wo_beta": 6.820044040679932, "step": 700 }, { "dpo_loss": 0.41513699293136597, "epoch": 1.9981105337742089, "grad_norm": 29466.894534313185, "learning_rate": 1.5033379334416376e-06, "logits": -0.431937575340271, "logps": -77.02298736572266, "loss": 226.2701, "objective": 215.40646362304688, "ranking_simple": 0.6083333492279053, "regularize": 0.41513699293136597, "step": 705, "wo_beta": 5.689188480377197 }, { "dpo_loss": 0.28987228870391846, "epoch": 2.012281530467643, "grad_norm": 14354.448151084767, "learning_rate": 1.465566106737942e-06, "logits": -0.5705215930938721, "logps": -76.63302612304688, "loss": 100.7583, "objective": 113.46463012695312, "ranking_simple": 0.6833333373069763, "regularize": 0.28987228870391846, "step": 710, "wo_beta": 3.9176442623138428 }, { "dpo_loss": 0.16603197157382965, "epoch": 2.026452527161077, "grad_norm": 11508.708871868666, "learning_rate": 1.4280770839319073e-06, "logits": -0.563579261302948, "logps": -75.65678405761719, "loss": 70.137, "objective": 80.78282165527344, "ranking_simple": 0.7416666746139526, "regularize": 0.16603197157382965, "step": 715, "wo_beta": 3.3496031761169434 }, { "dpo_loss": 0.1282922476530075, "epoch": 2.040623523854511, "grad_norm": 11811.061459991428, "learning_rate": 1.3908811141480408e-06, "logits": -0.5584599375724792, "logps": -76.28443908691406, "loss": 51.7676, "objective": 59.46492004394531, "ranking_simple": 0.6833333373069763, "regularize": 0.1282922476530075, "step": 720, "wo_beta": 4.073063850402832 }, { "dpo_loss": 0.11519418656826019, "epoch": 2.0547945205479454, "grad_norm": 12532.838982073052, "learning_rate": 1.353988366393083e-06, "logits": -0.5916457772254944, "logps": -79.23841857910156, "loss": 65.4189, "objective": 45.14365768432617, "ranking_simple": 0.7124999761581421, "regularize": 0.11519418656826019, "step": 725, "wo_beta": 3.8109290599823 }, { "dpo_loss": 0.1648443341255188, "epoch": 2.0689655172413794, "grad_norm": 15010.96827295094, "learning_rate": 1.3174089267758983e-06, "logits": -0.6307211518287659, "logps": -76.99424743652344, "loss": 58.8548, "objective": 62.6117057800293, "ranking_simple": 0.6625000238418579, "regularize": 0.1648443341255188, "step": 730, "wo_beta": 4.767910957336426 }, { "dpo_loss": 0.23415635526180267, "epoch": 2.0831365139348135, "grad_norm": 16123.177435040037, "learning_rate": 1.2811527957500344e-06, "logits": -0.5699220299720764, "logps": -76.40848541259766, "loss": 69.1498, "objective": 94.10277557373047, "ranking_simple": 0.6291666626930237, "regularize": 0.23415635526180267, "step": 735, "wo_beta": 5.384328842163086 }, { "dpo_loss": 0.10797632485628128, "epoch": 2.0973075106282475, "grad_norm": 12855.608519396761, "learning_rate": 1.245229885379699e-06, "logits": -0.6591292023658752, "logps": -76.2997055053711, "loss": 56.1864, "objective": 34.191505432128906, "ranking_simple": 0.7083333134651184, "regularize": 0.10797632485628128, "step": 740, "wo_beta": 3.6457924842834473 }, { "dpo_loss": 0.16380687057971954, "epoch": 2.1114785073216815, "grad_norm": 10889.824074533886, "learning_rate": 1.2096500166298992e-06, "logits": -0.5935976505279541, "logps": -77.07962799072266, "loss": 70.7977, "objective": 74.69692993164062, "ranking_simple": 0.6916666626930237, "regularize": 0.16380687057971954, "step": 745, "wo_beta": 4.419310569763184 }, { "dpo_loss": 0.15039294958114624, "epoch": 2.1256495040151155, "grad_norm": 8517.081418777607, "learning_rate": 1.1744229166814889e-06, "logits": -0.6511305570602417, "logps": -76.2921371459961, "loss": 56.904, "objective": 58.568729400634766, "ranking_simple": 0.6458333134651184, "regularize": 0.15039294958114624, "step": 750, "wo_beta": 4.041059494018555 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 1.9392777681350708, "eval_logits": -0.7334077954292297, "eval_logps": -81.60860443115234, "eval_loss": 887.2379760742188, "eval_objective": 839.1236572265625, "eval_ranking_simple": 0.5388198494911194, "eval_regularize": 1.9392777681350708, "eval_runtime": 368.7916, "eval_samples_per_second": 15.7, "eval_steps_per_second": 1.31, "eval_wo_beta": 6.87873649597168, "step": 750 }, { "dpo_loss": 0.10645638406276703, "epoch": 2.13982050070855, "grad_norm": 13052.259014125502, "learning_rate": 1.1395582162718524e-06, "logits": -0.6800190806388855, "logps": -78.63035583496094, "loss": 57.8502, "objective": 35.947303771972656, "ranking_simple": 0.6833333373069763, "regularize": 0.10645638406276703, "step": 755, "wo_beta": 4.126923084259033 }, { "dpo_loss": 0.19088611006736755, "epoch": 2.153991497401984, "grad_norm": 17569.54466226025, "learning_rate": 1.1050654470619602e-06, "logits": -0.6212081909179688, "logps": -75.5756607055664, "loss": 75.6102, "objective": 78.61075592041016, "ranking_simple": 0.6708333492279053, "regularize": 0.19088611006736755, "step": 760, "wo_beta": 4.341642379760742 }, { "dpo_loss": 0.13319069147109985, "epoch": 2.168162494095418, "grad_norm": 16693.2708880337, "learning_rate": 1.0709540390305061e-06, "logits": -0.6630601286888123, "logps": -76.63800811767578, "loss": 66.0811, "objective": 50.65385437011719, "ranking_simple": 0.6833333373069763, "regularize": 0.13319069147109985, "step": 765, "wo_beta": 3.741922616958618 }, { "dpo_loss": 0.10242452472448349, "epoch": 2.182333490788852, "grad_norm": 22180.175367129727, "learning_rate": 1.0372333178958462e-06, "logits": -0.6701247692108154, "logps": -76.97808074951172, "loss": 72.9876, "objective": 40.84423828125, "ranking_simple": 0.6666666865348816, "regularize": 0.10242452472448349, "step": 770, "wo_beta": 4.013566970825195 }, { "dpo_loss": 0.21899166703224182, "epoch": 2.196504487482286, "grad_norm": 16166.28367330464, "learning_rate": 1.0039125025664392e-06, "logits": -0.69548100233078, "logps": -77.49720764160156, "loss": 85.4559, "objective": 107.94995880126953, "ranking_simple": 0.7083333134651184, "regularize": 0.21899166703224182, "step": 775, "wo_beta": 4.022635459899902 }, { "dpo_loss": 0.23950724303722382, "epoch": 2.21067548417572, "grad_norm": 23845.856987166226, "learning_rate": 9.710007026204896e-07, "logits": -0.5921620726585388, "logps": -77.82970428466797, "loss": 73.8894, "objective": 118.90029907226562, "ranking_simple": 0.625, "regularize": 0.23950724303722382, "step": 780, "wo_beta": 5.2229838371276855 }, { "dpo_loss": 0.1409159004688263, "epoch": 2.2248464808691546, "grad_norm": 23390.757076240254, "learning_rate": 9.385069158154805e-07, "logits": -0.5967235565185547, "logps": -77.1053695678711, "loss": 72.4182, "objective": 49.14412307739258, "ranking_simple": 0.6541666388511658, "regularize": 0.1409159004688263, "step": 785, "wo_beta": 4.309206485748291 }, { "dpo_loss": 0.11289669573307037, "epoch": 2.2390174775625886, "grad_norm": 19262.26547177061, "learning_rate": 9.064400256282757e-07, "logits": -0.5860327482223511, "logps": -77.5761489868164, "loss": 58.4468, "objective": 36.8985481262207, "ranking_simple": 0.7124999761581421, "regularize": 0.11289669573307037, "step": 790, "wo_beta": 3.3499815464019775 }, { "dpo_loss": 0.1393917351961136, "epoch": 2.2531884742560226, "grad_norm": 13961.149915240683, "learning_rate": 8.74808798826467e-07, "logits": -0.6526386141777039, "logps": -77.9594497680664, "loss": 49.578, "objective": 57.15465545654297, "ranking_simple": 0.699999988079071, "regularize": 0.1393917351961136, "step": 795, "wo_beta": 3.4494924545288086 }, { "dpo_loss": 0.19990867376327515, "epoch": 2.2673594709494567, "grad_norm": 19068.02255354706, "learning_rate": 8.436218830716259e-07, "logits": -0.6704553365707397, "logps": -77.96353912353516, "loss": 63.8462, "objective": 76.01747131347656, "ranking_simple": 0.6833333373069763, "regularize": 0.19990867376327515, "step": 800, "wo_beta": 4.034228324890137 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 1.9230473041534424, "eval_logits": -0.7491475343704224, "eval_logps": -81.2590560913086, "eval_loss": 877.9466552734375, "eval_objective": 832.3640747070312, "eval_ranking_simple": 0.5388198494911194, "eval_regularize": 1.9230473041534424, "eval_runtime": 368.0271, "eval_samples_per_second": 15.733, "eval_steps_per_second": 1.312, "eval_wo_beta": 6.825497150421143, "step": 800 }, { "dpo_loss": 0.19815029203891754, "epoch": 2.2815304676428907, "grad_norm": 14625.694980992721, "learning_rate": 8.1288780455512e-07, "logits": -0.6596904397010803, "logps": -76.49951934814453, "loss": 62.5605, "objective": 77.96316528320312, "ranking_simple": 0.637499988079071, "regularize": 0.19815029203891754, "step": 805, "wo_beta": 4.744214057922363 }, { "dpo_loss": 0.1642819195985794, "epoch": 2.295701464336325, "grad_norm": 16998.784498338227, "learning_rate": 7.826149656671386e-07, "logits": -0.7425891757011414, "logps": -75.827880859375, "loss": 74.8622, "objective": 69.58053588867188, "ranking_simple": 0.7124999761581421, "regularize": 0.1642819195985794, "step": 810, "wo_beta": 3.9602649211883545 }, { "dpo_loss": 0.11294615268707275, "epoch": 2.309872461029759, "grad_norm": 13662.727399988506, "learning_rate": 7.528116426995605e-07, "logits": -0.7013121843338013, "logps": -77.21855163574219, "loss": 50.3895, "objective": 50.59254455566406, "ranking_simple": 0.6666666865348816, "regularize": 0.11294615268707275, "step": 815, "wo_beta": 4.308948040008545 }, { "dpo_loss": 0.1386326104402542, "epoch": 2.324043457723193, "grad_norm": 11846.16448361833, "learning_rate": 7.234859835833022e-07, "logits": -0.625929057598114, "logps": -77.01323699951172, "loss": 54.8365, "objective": 65.72408294677734, "ranking_simple": 0.6583333611488342, "regularize": 0.1386326104402542, "step": 820, "wo_beta": 4.429469585418701 }, { "dpo_loss": 0.19329386949539185, "epoch": 2.3382144544166272, "grad_norm": 12374.930899414503, "learning_rate": 6.94646005660749e-07, "logits": -0.6363904476165771, "logps": -76.43814086914062, "loss": 70.2641, "objective": 75.29810333251953, "ranking_simple": 0.6833333373069763, "regularize": 0.19329386949539185, "step": 825, "wo_beta": 4.171545028686523 }, { "dpo_loss": 0.14528803527355194, "epoch": 2.3523854511100613, "grad_norm": 17569.089090693193, "learning_rate": 6.662995934939007e-07, "logits": -0.6522344946861267, "logps": -78.64282989501953, "loss": 49.4023, "objective": 57.81031036376953, "ranking_simple": 0.7083333134651184, "regularize": 0.14528803527355194, "step": 830, "wo_beta": 3.6162989139556885 }, { "dpo_loss": 0.1276129186153412, "epoch": 2.3665564478034957, "grad_norm": 16065.220370294603, "learning_rate": 6.384544967088063e-07, "logits": -0.6452387571334839, "logps": -77.74604034423828, "loss": 64.3901, "objective": 50.397518157958984, "ranking_simple": 0.7166666388511658, "regularize": 0.1276129186153412, "step": 835, "wo_beta": 4.223039150238037 }, { "dpo_loss": 0.10496517270803452, "epoch": 2.3807274444969297, "grad_norm": 15811.911533083141, "learning_rate": 6.111183278768956e-07, "logits": -0.5866932272911072, "logps": -77.73839569091797, "loss": 63.8202, "objective": 43.11891555786133, "ranking_simple": 0.612500011920929, "regularize": 0.10496517270803452, "step": 840, "wo_beta": 4.501134872436523 }, { "dpo_loss": 0.2035357505083084, "epoch": 2.3948984411903638, "grad_norm": 21874.361109441965, "learning_rate": 5.842985604337769e-07, "logits": -0.6392544507980347, "logps": -79.36287689208984, "loss": 65.8964, "objective": 87.10123443603516, "ranking_simple": 0.6708333492279053, "regularize": 0.2035357505083084, "step": 845, "wo_beta": 3.961372137069702 }, { "dpo_loss": 0.1189335361123085, "epoch": 2.409069437883798, "grad_norm": 13250.743799157402, "learning_rate": 5.580025266360764e-07, "logits": -0.6523115038871765, "logps": -76.91999053955078, "loss": 60.559, "objective": 56.37547302246094, "ranking_simple": 0.6499999761581421, "regularize": 0.1189335361123085, "step": 850, "wo_beta": 4.629120349884033 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 1.930367350578308, "eval_logits": -0.7166435718536377, "eval_logps": -81.43138122558594, "eval_loss": 876.0620727539062, "eval_objective": 834.962890625, "eval_ranking_simple": 0.5393374562263489, "eval_regularize": 1.930367350578308, "eval_runtime": 368.7191, "eval_samples_per_second": 15.703, "eval_steps_per_second": 1.31, "eval_wo_beta": 6.89687442779541, "step": 850 }, { "dpo_loss": 0.16136877238750458, "epoch": 2.423240434577232, "grad_norm": 16354.507131335233, "learning_rate": 5.322374155568688e-07, "logits": -0.6328111290931702, "logps": -76.62676239013672, "loss": 96.6161, "objective": 63.801761627197266, "ranking_simple": 0.6791666746139526, "regularize": 0.16136877238750458, "step": 855, "wo_beta": 3.592381238937378 }, { "dpo_loss": 0.22841231524944305, "epoch": 2.4374114312706663, "grad_norm": 16096.552601321706, "learning_rate": 5.070102711202606e-07, "logits": -0.5914830565452576, "logps": -77.95539855957031, "loss": 70.7855, "objective": 97.63321685791016, "ranking_simple": 0.6583333611488342, "regularize": 0.22841231524944305, "step": 860, "wo_beta": 4.532555103302002 }, { "dpo_loss": 0.13698126375675201, "epoch": 2.4515824279641003, "grad_norm": 26580.339293279554, "learning_rate": 4.823279901756498e-07, "logits": -0.6197033524513245, "logps": -76.97139739990234, "loss": 57.5479, "objective": 63.15699005126953, "ranking_simple": 0.6499999761581421, "regularize": 0.13698126375675201, "step": 865, "wo_beta": 4.650790691375732 }, { "dpo_loss": 0.17232166230678558, "epoch": 2.4657534246575343, "grad_norm": 14902.759735907886, "learning_rate": 4.581973206121948e-07, "logits": -0.6524402499198914, "logps": -77.39944458007812, "loss": 59.8115, "objective": 72.1148452758789, "ranking_simple": 0.6666666865348816, "regularize": 0.17232166230678558, "step": 870, "wo_beta": 4.716188430786133 }, { "dpo_loss": 0.16291117668151855, "epoch": 2.4799244213509684, "grad_norm": 16443.311561175098, "learning_rate": 4.3462485951401126e-07, "logits": -0.6056000590324402, "logps": -77.26061248779297, "loss": 58.545, "objective": 65.73771667480469, "ranking_simple": 0.6958333253860474, "regularize": 0.16291117668151855, "step": 875, "wo_beta": 3.9136276245117188 }, { "dpo_loss": 0.1831001490354538, "epoch": 2.4940954180444024, "grad_norm": 17222.63460091855, "learning_rate": 4.116170513565942e-07, "logits": -0.5453128218650818, "logps": -76.7745132446289, "loss": 81.4098, "objective": 87.2974853515625, "ranking_simple": 0.6458333134651184, "regularize": 0.1831001490354538, "step": 880, "wo_beta": 5.620166301727295 }, { "dpo_loss": 0.17733284831047058, "epoch": 2.5082664147378364, "grad_norm": 18608.993512491026, "learning_rate": 3.891801862449629e-07, "logits": -0.6945806741714478, "logps": -75.74263763427734, "loss": 64.9335, "objective": 71.06294250488281, "ranking_simple": 0.6791666746139526, "regularize": 0.17733284831047058, "step": 885, "wo_beta": 4.535309791564941 }, { "dpo_loss": 0.12178336828947067, "epoch": 2.5224374114312704, "grad_norm": 19369.013842624132, "learning_rate": 3.6732039819400686e-07, "logits": -0.6358047127723694, "logps": -74.91355895996094, "loss": 68.998, "objective": 51.5380859375, "ranking_simple": 0.6875, "regularize": 0.12178336828947067, "step": 890, "wo_beta": 3.4617159366607666 }, { "dpo_loss": 0.16680613160133362, "epoch": 2.536608408124705, "grad_norm": 14873.87723458358, "learning_rate": 3.46043663451511e-07, "logits": -0.6266335844993591, "logps": -77.49481964111328, "loss": 58.2508, "objective": 74.53083038330078, "ranking_simple": 0.6499999761581421, "regularize": 0.16680613160133362, "step": 895, "wo_beta": 4.595145225524902 }, { "dpo_loss": 0.153776153922081, "epoch": 2.550779404818139, "grad_norm": 20793.25603723828, "learning_rate": 3.253557988643072e-07, "logits": -0.5867082476615906, "logps": -76.2872085571289, "loss": 61.5447, "objective": 66.78315734863281, "ranking_simple": 0.6666666865348816, "regularize": 0.153776153922081, "step": 900, "wo_beta": 4.349160194396973 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 1.9454582929611206, "eval_logits": -0.7119657397270203, "eval_logps": -81.622314453125, "eval_loss": 885.504638671875, "eval_objective": 842.5767822265625, "eval_ranking_simple": 0.5414078831672668, "eval_regularize": 1.9454582929611206, "eval_runtime": 368.0518, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.312, "eval_wo_beta": 6.90536642074585, "step": 900 }, { "dpo_loss": 0.1694839894771576, "epoch": 2.564950401511573, "grad_norm": 5383.724684748024, "learning_rate": 3.052624602880064e-07, "logits": -0.6292735934257507, "logps": -75.40677642822266, "loss": 55.1556, "objective": 73.89187622070312, "ranking_simple": 0.6916666626930237, "regularize": 0.1694839894771576, "step": 905, "wo_beta": 4.002458095550537 }, { "dpo_loss": 0.2758396863937378, "epoch": 2.579121398205007, "grad_norm": 21167.29586316348, "learning_rate": 2.8576914104074425e-07, "logits": -0.6043637990951538, "logps": -79.17253875732422, "loss": 71.7501, "objective": 115.38219451904297, "ranking_simple": 0.6833333373069763, "regularize": 0.2758396863937378, "step": 910, "wo_beta": 4.724976062774658 }, { "dpo_loss": 0.1418589949607849, "epoch": 2.593292394898441, "grad_norm": 18250.664807404537, "learning_rate": 2.6688117040136463e-07, "logits": -0.7428493499755859, "logps": -78.07086944580078, "loss": 62.2011, "objective": 52.23597717285156, "ranking_simple": 0.6916666626930237, "regularize": 0.1418589949607849, "step": 915, "wo_beta": 3.8614261150360107 }, { "dpo_loss": 0.13393595814704895, "epoch": 2.6074633915918755, "grad_norm": 12767.685735140785, "learning_rate": 2.486037121524448e-07, "logits": -0.5820329785346985, "logps": -77.22747039794922, "loss": 53.2699, "objective": 55.055419921875, "ranking_simple": 0.6916666626930237, "regularize": 0.13393595814704895, "step": 920, "wo_beta": 3.9748997688293457 }, { "dpo_loss": 0.11604820191860199, "epoch": 2.6216343882853095, "grad_norm": 10406.725183590128, "learning_rate": 2.3094176316856982e-07, "logits": -0.59433913230896, "logps": -76.20643615722656, "loss": 67.2785, "objective": 43.61358642578125, "ranking_simple": 0.6416666507720947, "regularize": 0.11604820191860199, "step": 925, "wo_beta": 4.829153537750244 }, { "dpo_loss": 0.1961955726146698, "epoch": 2.6358053849787435, "grad_norm": 13596.007234051527, "learning_rate": 2.13900152050239e-07, "logits": -0.6106235384941101, "logps": -76.37940979003906, "loss": 67.3097, "objective": 87.510986328125, "ranking_simple": 0.6291666626930237, "regularize": 0.1961955726146698, "step": 930, "wo_beta": 5.22459602355957 }, { "dpo_loss": 0.12966051697731018, "epoch": 2.6499763816721775, "grad_norm": 16260.320579834099, "learning_rate": 1.9748353780377234e-07, "logits": -0.5941105484962463, "logps": -75.61875915527344, "loss": 78.4459, "objective": 43.857093811035156, "ranking_simple": 0.6875, "regularize": 0.12966051697731018, "step": 935, "wo_beta": 3.68115234375 }, { "dpo_loss": 0.11212758719921112, "epoch": 2.6641473783656116, "grad_norm": 14569.700031823939, "learning_rate": 1.8169640856758652e-07, "logits": -0.6425444483757019, "logps": -78.66395568847656, "loss": 58.8761, "objective": 53.29667282104492, "ranking_simple": 0.6708333492279053, "regularize": 0.11212758719921112, "step": 940, "wo_beta": 4.617191791534424 }, { "dpo_loss": 0.13408087193965912, "epoch": 2.678318375059046, "grad_norm": 11373.74928233542, "learning_rate": 1.6654308038518057e-07, "logits": -0.6835525631904602, "logps": -76.78800201416016, "loss": 49.8562, "objective": 51.24209213256836, "ranking_simple": 0.6625000238418579, "regularize": 0.13408087193965912, "step": 945, "wo_beta": 4.127335071563721 }, { "dpo_loss": 0.2326124608516693, "epoch": 2.69248937175248, "grad_norm": 17533.557850413275, "learning_rate": 1.5202769602517514e-07, "logits": -0.5890699625015259, "logps": -76.48961639404297, "loss": 76.2992, "objective": 107.2320785522461, "ranking_simple": 0.6916666626930237, "regularize": 0.2326124608516693, "step": 950, "wo_beta": 4.0390706062316895 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 1.9408873319625854, "eval_logits": -0.733562171459198, "eval_logps": -81.01062774658203, "eval_loss": 885.0243530273438, "eval_objective": 840.861572265625, "eval_ranking_simple": 0.5414078831672668, "eval_regularize": 1.9408873319625854, "eval_runtime": 368.3562, "eval_samples_per_second": 15.718, "eval_steps_per_second": 1.311, "eval_wo_beta": 6.8752055168151855, "step": 950 }, { "dpo_loss": 0.15838518738746643, "epoch": 2.706660368445914, "grad_norm": 19909.761661048542, "learning_rate": 1.381542238487188e-07, "logits": -0.6561439633369446, "logps": -76.67708587646484, "loss": 70.9999, "objective": 63.771217346191406, "ranking_simple": 0.6791666746139526, "regularize": 0.15838518738746643, "step": 955, "wo_beta": 4.371394157409668 }, { "dpo_loss": 0.14735987782478333, "epoch": 2.720831365139348, "grad_norm": 18552.81796156679, "learning_rate": 1.2492645672457838e-07, "logits": -0.6275337934494019, "logps": -76.68091583251953, "loss": 48.8316, "objective": 56.900169372558594, "ranking_simple": 0.6583333611488342, "regularize": 0.14735987782478333, "step": 960, "wo_beta": 4.69942569732666 }, { "dpo_loss": 0.1760532110929489, "epoch": 2.735002361832782, "grad_norm": 11215.906650051957, "learning_rate": 1.1234801099220787e-07, "logits": -0.663616955280304, "logps": -76.21281433105469, "loss": 74.0274, "objective": 69.9241714477539, "ranking_simple": 0.7291666865348816, "regularize": 0.1760532110929489, "step": 965, "wo_beta": 2.9462215900421143 }, { "dpo_loss": 0.21225102245807648, "epoch": 2.7491733585262166, "grad_norm": 22811.566451460014, "learning_rate": 1.004223254730749e-07, "logits": -0.6760108470916748, "logps": -77.40128326416016, "loss": 59.5453, "objective": 79.48020935058594, "ranking_simple": 0.7124999761581421, "regularize": 0.21225102245807648, "step": 970, "wo_beta": 3.1231398582458496 }, { "dpo_loss": 0.11088516563177109, "epoch": 2.7633443552196506, "grad_norm": 16204.377117749516, "learning_rate": 8.915266053052374e-08, "logits": -0.5741701126098633, "logps": -75.76372528076172, "loss": 55.3212, "objective": 38.7469596862793, "ranking_simple": 0.6916666626930237, "regularize": 0.11088516563177109, "step": 975, "wo_beta": 4.1251749992370605 }, { "dpo_loss": 0.11932215839624405, "epoch": 2.7775153519130846, "grad_norm": 18600.253407646793, "learning_rate": 7.854209717842231e-08, "logits": -0.6992437839508057, "logps": -76.46355438232422, "loss": 61.9886, "objective": 40.65220642089844, "ranking_simple": 0.6833333373069763, "regularize": 0.11932215839624405, "step": 980, "wo_beta": 3.3738327026367188 }, { "dpo_loss": 0.13639378547668457, "epoch": 2.7916863486065187, "grad_norm": 22167.749340913193, "learning_rate": 6.859353623884569e-08, "logits": -0.5681948065757751, "logps": -73.97103118896484, "loss": 57.9846, "objective": 50.542232513427734, "ranking_simple": 0.6875, "regularize": 0.13639378547668457, "step": 985, "wo_beta": 3.965052366256714 }, { "dpo_loss": 0.13361641764640808, "epoch": 2.8058573452999527, "grad_norm": 17499.837979849613, "learning_rate": 5.930969754901844e-08, "logits": -0.6173512935638428, "logps": -77.61453247070312, "loss": 48.6088, "objective": 48.143672943115234, "ranking_simple": 0.675000011920929, "regularize": 0.13361641764640808, "step": 990, "wo_beta": 4.875253677368164 }, { "dpo_loss": 0.15786290168762207, "epoch": 2.820028341993387, "grad_norm": 20060.653124706987, "learning_rate": 5.069311921774039e-08, "logits": -0.7026129364967346, "logps": -76.29402923583984, "loss": 74.7771, "objective": 64.83280944824219, "ranking_simple": 0.6875, "regularize": 0.15786290168762207, "step": 995, "wo_beta": 4.514121055603027 }, { "dpo_loss": 0.1805247962474823, "epoch": 2.8341993386868207, "grad_norm": 9634.51813321111, "learning_rate": 4.2746156931490756e-08, "logits": -0.604320228099823, "logps": -75.02464294433594, "loss": 65.003, "objective": 79.15603637695312, "ranking_simple": 0.6916666626930237, "regularize": 0.1805247962474823, "step": 1000, "wo_beta": 3.7478036880493164 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 1.938610553741455, "eval_logits": -0.7340191602706909, "eval_logps": -80.97093200683594, "eval_loss": 883.8386840820312, "eval_objective": 839.7760009765625, "eval_ranking_simple": 0.5419254899024963, "eval_regularize": 1.938610553741455, "eval_runtime": 369.6961, "eval_samples_per_second": 15.662, "eval_steps_per_second": 1.306, "eval_wo_beta": 6.880806922912598, "step": 1000 }, { "dpo_loss": 0.09317085891962051, "epoch": 2.848370335380255, "grad_norm": 14479.932099976699, "learning_rate": 3.547098331040916e-08, "logits": -0.6243438124656677, "logps": -75.73117065429688, "loss": 60.5699, "objective": 35.96461486816406, "ranking_simple": 0.6958333253860474, "regularize": 0.09317085891962051, "step": 1005, "wo_beta": 4.23464822769165 }, { "dpo_loss": 0.15720970928668976, "epoch": 2.862541332073689, "grad_norm": 17027.37402012935, "learning_rate": 2.8869587314321324e-08, "logits": -0.673160970211029, "logps": -75.78913879394531, "loss": 48.6492, "objective": 64.71520233154297, "ranking_simple": 0.6791666746139526, "regularize": 0.15720970928668976, "step": 1010, "wo_beta": 4.332252502441406 }, { "dpo_loss": 0.11788446456193924, "epoch": 2.8767123287671232, "grad_norm": 11230.189495808421, "learning_rate": 2.2943773698977935e-08, "logits": -0.6236141324043274, "logps": -75.31798553466797, "loss": 55.6176, "objective": 39.647281646728516, "ranking_simple": 0.7166666388511658, "regularize": 0.11788446456193924, "step": 1015, "wo_beta": 3.647088050842285 }, { "dpo_loss": 0.16658252477645874, "epoch": 2.8908833254605573, "grad_norm": 12864.832924042903, "learning_rate": 1.7695162522652352e-08, "logits": -0.5761324167251587, "logps": -76.70230865478516, "loss": 62.3645, "objective": 71.47562408447266, "ranking_simple": 0.6333333253860474, "regularize": 0.16658252477645874, "step": 1020, "wo_beta": 5.046957969665527 }, { "dpo_loss": 0.12362422049045563, "epoch": 2.9050543221539913, "grad_norm": 12894.829353554518, "learning_rate": 1.3125188703233815e-08, "logits": -0.681283712387085, "logps": -76.55783081054688, "loss": 58.5196, "objective": 48.40925598144531, "ranking_simple": 0.7124999761581421, "regularize": 0.12362422049045563, "step": 1025, "wo_beta": 3.9699738025665283 }, { "dpo_loss": 0.14537376165390015, "epoch": 2.9192253188474258, "grad_norm": 14896.788895915139, "learning_rate": 9.235101625932885e-09, "logits": -0.6897163391113281, "logps": -75.82958984375, "loss": 60.8222, "objective": 57.95526123046875, "ranking_simple": 0.625, "regularize": 0.14537376165390015, "step": 1030, "wo_beta": 4.4757466316223145 }, { "dpo_loss": 0.21178776025772095, "epoch": 2.9333963155408598, "grad_norm": 19672.735817554352, "learning_rate": 6.025964801714412e-09, "logits": -0.6465732455253601, "logps": -77.34451293945312, "loss": 93.2003, "objective": 83.75788116455078, "ranking_simple": 0.6583333611488342, "regularize": 0.21178776025772095, "step": 1035, "wo_beta": 3.9459071159362793 }, { "dpo_loss": 0.16617676615715027, "epoch": 2.947567312234294, "grad_norm": 13684.295320753776, "learning_rate": 3.4986555765434415e-09, "logits": -0.6697372794151306, "logps": -77.15235137939453, "loss": 51.8377, "objective": 61.92232894897461, "ranking_simple": 0.6458333134651184, "regularize": 0.16617676615715027, "step": 1040, "wo_beta": 4.602719783782959 }, { "dpo_loss": 0.14166632294654846, "epoch": 2.961738308927728, "grad_norm": 19931.645045536377, "learning_rate": 1.6538648915270794e-09, "logits": -0.6000397801399231, "logps": -79.05880737304688, "loss": 63.6317, "objective": 56.86110305786133, "ranking_simple": 0.6833333373069763, "regularize": 0.14166632294654846, "step": 1045, "wo_beta": 5.119239330291748 }, { "dpo_loss": 0.1613662987947464, "epoch": 2.975909305621162, "grad_norm": 14762.074424664803, "learning_rate": 4.920970940180958e-10, "logits": -0.6614073514938354, "logps": -74.51754760742188, "loss": 59.5302, "objective": 55.57144546508789, "ranking_simple": 0.7041666507720947, "regularize": 0.1613662987947464, "step": 1050, "wo_beta": 3.768414258956909 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 1.9391295909881592, "eval_logits": -0.7331260442733765, "eval_logps": -80.98583984375, "eval_loss": 883.8082885742188, "eval_objective": 840.0045166015625, "eval_ranking_simple": 0.5419254899024963, "eval_regularize": 1.9391295909881592, "eval_runtime": 368.7556, "eval_samples_per_second": 15.701, "eval_steps_per_second": 1.31, "eval_wo_beta": 6.885995388031006, "step": 1050 }, { "dpo_loss": 0.18576760590076447, "epoch": 2.9900803023145963, "grad_norm": 14293.757478034773, "learning_rate": 1.3669799732163314e-11, "logits": -0.6282141208648682, "logps": -75.39889526367188, "loss": 57.2359, "objective": 80.12165832519531, "ranking_simple": 0.6708333492279053, "regularize": 0.18576760590076447, "step": 1055, "wo_beta": 4.444089412689209 }, { "epoch": 2.992914501653283, "step": 1056, "total_flos": 0.0, "train_loss": 275.78432337443036, "train_runtime": 34690.4645, "train_samples_per_second": 4.393, "train_steps_per_second": 0.03 } ], "logging_steps": 5, "max_steps": 1056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }