qwen2.5-0.5b-expo-DPO-W0-noES5-1 / trainer_state.json
hZzy's picture
Model save
9068903 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.992914501653283,
"eval_steps": 50,
"global_step": 1056,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 88378.40404084016,
"learning_rate": 4.716981132075472e-08,
"logits": -1.2867579460144043,
"logps": -84.34933471679688,
"loss": 288.6135,
"objective": 277.7129821777344,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6931471824645996,
"step": 1,
"wo_beta": 5.2708282470703125
},
{
"dpo_loss": 0.6890408396720886,
"epoch": 0.014170996693434105,
"grad_norm": 83739.33254900719,
"learning_rate": 2.358490566037736e-07,
"logits": -1.429946780204773,
"logps": -83.76575469970703,
"loss": 299.2071,
"objective": 287.1035461425781,
"ranking_simple": 0.4895833432674408,
"regularize": 0.6890408396720886,
"step": 5,
"wo_beta": 7.664237976074219
},
{
"dpo_loss": 0.6906710267066956,
"epoch": 0.02834199338686821,
"grad_norm": 87871.922788759,
"learning_rate": 4.716981132075472e-07,
"logits": -1.4002543687820435,
"logps": -84.44983673095703,
"loss": 290.579,
"objective": 288.7486572265625,
"ranking_simple": 0.574999988079071,
"regularize": 0.6906710267066956,
"step": 10,
"wo_beta": 5.951157569885254
},
{
"dpo_loss": 0.719412624835968,
"epoch": 0.042512990080302314,
"grad_norm": 104628.71690566906,
"learning_rate": 7.075471698113208e-07,
"logits": -1.5360398292541504,
"logps": -84.26319122314453,
"loss": 299.0445,
"objective": 310.48016357421875,
"ranking_simple": 0.5708333253860474,
"regularize": 0.719412624835968,
"step": 15,
"wo_beta": 6.623058795928955
},
{
"dpo_loss": 0.6358036398887634,
"epoch": 0.05668398677373642,
"grad_norm": 98064.26779815872,
"learning_rate": 9.433962264150944e-07,
"logits": -1.3698803186416626,
"logps": -83.57228088378906,
"loss": 297.1517,
"objective": 276.2193908691406,
"ranking_simple": 0.48750001192092896,
"regularize": 0.6358036398887634,
"step": 20,
"wo_beta": 8.147703170776367
},
{
"dpo_loss": 0.7541502714157104,
"epoch": 0.07085498346717052,
"grad_norm": 117575.06586226003,
"learning_rate": 1.179245283018868e-06,
"logits": -1.3847885131835938,
"logps": -83.47090148925781,
"loss": 313.7878,
"objective": 312.7436218261719,
"ranking_simple": 0.5708333253860474,
"regularize": 0.7541502714157104,
"step": 25,
"wo_beta": 6.331053256988525
},
{
"dpo_loss": 0.8459195494651794,
"epoch": 0.08502598016060463,
"grad_norm": 83498.77667145648,
"learning_rate": 1.4150943396226415e-06,
"logits": -1.445679783821106,
"logps": -83.64388275146484,
"loss": 334.7166,
"objective": 370.2745361328125,
"ranking_simple": 0.49166667461395264,
"regularize": 0.8459195494651794,
"step": 30,
"wo_beta": 8.90923023223877
},
{
"dpo_loss": 0.8700669407844543,
"epoch": 0.09919697685403873,
"grad_norm": 87674.9381300825,
"learning_rate": 1.650943396226415e-06,
"logits": -1.4222424030303955,
"logps": -83.19346618652344,
"loss": 358.5594,
"objective": 387.00079345703125,
"ranking_simple": 0.5416666865348816,
"regularize": 0.8700669407844543,
"step": 35,
"wo_beta": 6.324372291564941
},
{
"dpo_loss": 0.7968003749847412,
"epoch": 0.11336797354747284,
"grad_norm": 94029.82232454565,
"learning_rate": 1.8867924528301889e-06,
"logits": -1.3892769813537598,
"logps": -84.3359146118164,
"loss": 335.1716,
"objective": 350.38433837890625,
"ranking_simple": 0.5,
"regularize": 0.7968003749847412,
"step": 40,
"wo_beta": 6.651224136352539
},
{
"dpo_loss": 0.8314542770385742,
"epoch": 0.12753897024090693,
"grad_norm": 107131.70166266784,
"learning_rate": 2.1226415094339624e-06,
"logits": -1.3818415403366089,
"logps": -83.095458984375,
"loss": 392.6496,
"objective": 352.84722900390625,
"ranking_simple": 0.512499988079071,
"regularize": 0.8314542770385742,
"step": 45,
"wo_beta": 6.450110912322998
},
{
"dpo_loss": 0.9717784523963928,
"epoch": 0.14170996693434104,
"grad_norm": 85511.36737100256,
"learning_rate": 2.358490566037736e-06,
"logits": -1.4050464630126953,
"logps": -84.67338562011719,
"loss": 421.2896,
"objective": 429.39739990234375,
"ranking_simple": 0.5249999761581421,
"regularize": 0.9717784523963928,
"step": 50,
"wo_beta": 7.381195068359375
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.9812284708023071,
"eval_logits": -1.3988816738128662,
"eval_logps": -92.03357696533203,
"eval_loss": 429.86468505859375,
"eval_objective": 424.89910888671875,
"eval_ranking_simple": 0.5243270993232727,
"eval_regularize": 0.9812284708023071,
"eval_runtime": 369.9673,
"eval_samples_per_second": 15.65,
"eval_steps_per_second": 1.306,
"eval_wo_beta": 7.807825565338135,
"step": 50
},
{
"dpo_loss": 1.085899829864502,
"epoch": 0.15588096362777515,
"grad_norm": 86740.35819020051,
"learning_rate": 2.5943396226415095e-06,
"logits": -1.3797937631607056,
"logps": -84.61592102050781,
"loss": 438.6828,
"objective": 554.507080078125,
"ranking_simple": 0.5083333253860474,
"regularize": 1.085899829864502,
"step": 55,
"wo_beta": 7.282232761383057
},
{
"dpo_loss": 0.881396472454071,
"epoch": 0.17005196032120926,
"grad_norm": 73943.20904640444,
"learning_rate": 2.830188679245283e-06,
"logits": -1.3378223180770874,
"logps": -82.09742736816406,
"loss": 396.9731,
"objective": 387.7957763671875,
"ranking_simple": 0.5375000238418579,
"regularize": 0.881396472454071,
"step": 60,
"wo_beta": 6.004281997680664
},
{
"dpo_loss": 0.9917481541633606,
"epoch": 0.18422295701464336,
"grad_norm": 67818.91421902912,
"learning_rate": 3.0660377358490567e-06,
"logits": -1.4089940786361694,
"logps": -82.27486419677734,
"loss": 421.1799,
"objective": 443.5271911621094,
"ranking_simple": 0.5541666746139526,
"regularize": 0.9917481541633606,
"step": 65,
"wo_beta": 6.279632091522217
},
{
"dpo_loss": 0.911385178565979,
"epoch": 0.19839395370807747,
"grad_norm": 65454.0541533192,
"learning_rate": 3.30188679245283e-06,
"logits": -1.4254525899887085,
"logps": -82.29732513427734,
"loss": 383.1954,
"objective": 402.9763488769531,
"ranking_simple": 0.550000011920929,
"regularize": 0.911385178565979,
"step": 70,
"wo_beta": 6.741910934448242
},
{
"dpo_loss": 1.0663002729415894,
"epoch": 0.21256495040151158,
"grad_norm": 88434.22176378245,
"learning_rate": 3.5377358490566038e-06,
"logits": -1.327197551727295,
"logps": -82.15841674804688,
"loss": 495.8337,
"objective": 501.70574951171875,
"ranking_simple": 0.5541666746139526,
"regularize": 1.0663002729415894,
"step": 75,
"wo_beta": 6.144163131713867
},
{
"dpo_loss": 1.0284979343414307,
"epoch": 0.22673594709494568,
"grad_norm": 81588.2874349178,
"learning_rate": 3.7735849056603777e-06,
"logits": -1.3877789974212646,
"logps": -82.84284210205078,
"loss": 443.4796,
"objective": 458.9221496582031,
"ranking_simple": 0.5833333134651184,
"regularize": 1.0284979343414307,
"step": 80,
"wo_beta": 5.538651466369629
},
{
"dpo_loss": 1.2060002088546753,
"epoch": 0.2409069437883798,
"grad_norm": 50570.51985294035,
"learning_rate": 4.009433962264152e-06,
"logits": -1.325385570526123,
"logps": -85.86609649658203,
"loss": 454.4117,
"objective": 512.2498168945312,
"ranking_simple": 0.5708333253860474,
"regularize": 1.2060002088546753,
"step": 85,
"wo_beta": 5.5450663566589355
},
{
"dpo_loss": 0.8621365427970886,
"epoch": 0.25507794048181387,
"grad_norm": 64042.236837982855,
"learning_rate": 4.245283018867925e-06,
"logits": -1.3326818943023682,
"logps": -87.87580108642578,
"loss": 468.0205,
"objective": 382.29150390625,
"ranking_simple": 0.5874999761581421,
"regularize": 0.8621365427970886,
"step": 90,
"wo_beta": 6.040257930755615
},
{
"dpo_loss": 1.0820232629776,
"epoch": 0.269248937175248,
"grad_norm": 73856.79865687525,
"learning_rate": 4.481132075471699e-06,
"logits": -1.2569290399551392,
"logps": -87.48131561279297,
"loss": 451.8957,
"objective": 499.0147705078125,
"ranking_simple": 0.5208333134651184,
"regularize": 1.0820232629776,
"step": 95,
"wo_beta": 7.041137218475342
},
{
"dpo_loss": 0.9895243048667908,
"epoch": 0.2834199338686821,
"grad_norm": 69854.16899039333,
"learning_rate": 4.716981132075472e-06,
"logits": -1.1859853267669678,
"logps": -85.07958221435547,
"loss": 504.7676,
"objective": 428.0093688964844,
"ranking_simple": 0.5541666746139526,
"regularize": 0.9895243048667908,
"step": 100,
"wo_beta": 6.106683731079102
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 1.2757266759872437,
"eval_logits": -1.2545620203018188,
"eval_logps": -92.60501861572266,
"eval_loss": 586.443115234375,
"eval_objective": 565.0955810546875,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 1.2757266759872437,
"eval_runtime": 368.7119,
"eval_samples_per_second": 15.703,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 7.499707221984863,
"step": 100
},
{
"dpo_loss": 1.5464370250701904,
"epoch": 0.2975909305621162,
"grad_norm": 81924.04645083607,
"learning_rate": 4.952830188679246e-06,
"logits": -1.3069990873336792,
"logps": -87.30802154541016,
"loss": 621.3279,
"objective": 697.1328125,
"ranking_simple": 0.5874999761581421,
"regularize": 1.5464370250701904,
"step": 105,
"wo_beta": 5.294255256652832
},
{
"dpo_loss": 1.3242223262786865,
"epoch": 0.3117619272555503,
"grad_norm": 67213.20348371088,
"learning_rate": 4.999781286194085e-06,
"logits": -1.2264933586120605,
"logps": -82.50062561035156,
"loss": 590.9299,
"objective": 569.9200439453125,
"ranking_simple": 0.5208333134651184,
"regularize": 1.3242223262786865,
"step": 110,
"wo_beta": 5.986863136291504
},
{
"dpo_loss": 1.50898015499115,
"epoch": 0.32593292394898443,
"grad_norm": 66001.4469324612,
"learning_rate": 4.998892826944418e-06,
"logits": -1.1785295009613037,
"logps": -78.3976821899414,
"loss": 613.2373,
"objective": 647.5435180664062,
"ranking_simple": 0.5583333373069763,
"regularize": 1.50898015499115,
"step": 115,
"wo_beta": 6.424834251403809
},
{
"dpo_loss": 1.3131033182144165,
"epoch": 0.3401039206424185,
"grad_norm": 71675.69603134823,
"learning_rate": 4.997321195347154e-06,
"logits": -1.1258571147918701,
"logps": -79.26954650878906,
"loss": 580.6179,
"objective": 548.9010009765625,
"ranking_simple": 0.5625,
"regularize": 1.3131033182144165,
"step": 120,
"wo_beta": 6.872457027435303
},
{
"dpo_loss": 1.1034477949142456,
"epoch": 0.35427491733585265,
"grad_norm": 66579.26081621695,
"learning_rate": 4.9950668210706795e-06,
"logits": -1.193581223487854,
"logps": -78.10294342041016,
"loss": 503.3348,
"objective": 437.7645568847656,
"ranking_simple": 0.5583333373069763,
"regularize": 1.1034477949142456,
"step": 125,
"wo_beta": 6.386569023132324
},
{
"dpo_loss": 1.4382485151290894,
"epoch": 0.3684459140292867,
"grad_norm": 55084.45931854072,
"learning_rate": 4.992130320438411e-06,
"logits": -1.253204345703125,
"logps": -73.60761260986328,
"loss": 670.9436,
"objective": 778.9769287109375,
"ranking_simple": 0.637499988079071,
"regularize": 1.4382485151290894,
"step": 130,
"wo_beta": 4.876318454742432
},
{
"dpo_loss": 1.1328778266906738,
"epoch": 0.3826169107227208,
"grad_norm": 54685.712604550725,
"learning_rate": 4.988512496260302e-06,
"logits": -1.3233076333999634,
"logps": -72.83162689208984,
"loss": 579.4298,
"objective": 505.5152282714844,
"ranking_simple": 0.5208333134651184,
"regularize": 1.1328778266906738,
"step": 135,
"wo_beta": 5.750979423522949
},
{
"dpo_loss": 1.8627400398254395,
"epoch": 0.39678790741615494,
"grad_norm": 76839.6853222187,
"learning_rate": 4.984214337613357e-06,
"logits": -1.3366682529449463,
"logps": -71.90711212158203,
"loss": 685.0747,
"objective": 835.1346435546875,
"ranking_simple": 0.5416666865348816,
"regularize": 1.8627400398254395,
"step": 140,
"wo_beta": 6.485233783721924
},
{
"dpo_loss": 1.5502649545669556,
"epoch": 0.410958904109589,
"grad_norm": 66610.05959697074,
"learning_rate": 4.979237019571235e-06,
"logits": -1.3182580471038818,
"logps": -74.45819854736328,
"loss": 704.9736,
"objective": 741.4519653320312,
"ranking_simple": 0.6208333373069763,
"regularize": 1.5502649545669556,
"step": 145,
"wo_beta": 5.113797187805176
},
{
"dpo_loss": 1.3416301012039185,
"epoch": 0.42512990080302315,
"grad_norm": 57008.90120770728,
"learning_rate": 4.97358190288299e-06,
"logits": -1.2401268482208252,
"logps": -76.36083984375,
"loss": 647.489,
"objective": 566.4555053710938,
"ranking_simple": 0.5708333253860474,
"regularize": 1.3416301012039185,
"step": 150,
"wo_beta": 7.6363630294799805
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 1.820163607597351,
"eval_logits": -1.282083511352539,
"eval_logps": -81.57704162597656,
"eval_loss": 806.8840942382812,
"eval_objective": 788.4058227539062,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 1.820163607597351,
"eval_runtime": 370.4907,
"eval_samples_per_second": 15.628,
"eval_steps_per_second": 1.304,
"eval_wo_beta": 7.2971673011779785,
"step": 150
},
{
"dpo_loss": 1.201590657234192,
"epoch": 0.43930089749645723,
"grad_norm": 53158.65039781518,
"learning_rate": 4.967250533601059e-06,
"logits": -1.2983914613723755,
"logps": -76.4615707397461,
"loss": 621.7593,
"objective": 591.6034545898438,
"ranking_simple": 0.5625,
"regularize": 1.201590657234192,
"step": 155,
"wo_beta": 6.331362247467041
},
{
"dpo_loss": 1.8209228515625,
"epoch": 0.45347189418989137,
"grad_norm": 58324.46781297966,
"learning_rate": 4.9602446426585845e-06,
"logits": -1.227155089378357,
"logps": -76.60499572753906,
"loss": 653.8297,
"objective": 777.4675903320312,
"ranking_simple": 0.5541666746139526,
"regularize": 1.8209228515625,
"step": 160,
"wo_beta": 6.163790225982666
},
{
"dpo_loss": 1.1324195861816406,
"epoch": 0.46764289088332545,
"grad_norm": 58174.78839479279,
"learning_rate": 4.952566145396197e-06,
"logits": -1.3118336200714111,
"logps": -77.7535171508789,
"loss": 655.7216,
"objective": 464.7123107910156,
"ranking_simple": 0.5958333611488342,
"regularize": 1.1324195861816406,
"step": 165,
"wo_beta": 5.644231796264648
},
{
"dpo_loss": 1.3175557851791382,
"epoch": 0.4818138875767596,
"grad_norm": 47074.837544146685,
"learning_rate": 4.944217141038379e-06,
"logits": -1.2680017948150635,
"logps": -77.98702239990234,
"loss": 553.8126,
"objective": 550.3071899414062,
"ranking_simple": 0.5375000238418579,
"regularize": 1.3175557851791382,
"step": 170,
"wo_beta": 6.271345615386963
},
{
"dpo_loss": 1.528406023979187,
"epoch": 0.49598488427019366,
"grad_norm": 51080.30959388274,
"learning_rate": 4.935199912119558e-06,
"logits": -1.1689633131027222,
"logps": -78.72420501708984,
"loss": 669.8391,
"objective": 715.664794921875,
"ranking_simple": 0.550000011920929,
"regularize": 1.528406023979187,
"step": 175,
"wo_beta": 6.365441799163818
},
{
"dpo_loss": 1.5439296960830688,
"epoch": 0.5101558809636277,
"grad_norm": 58721.23782221805,
"learning_rate": 4.925516923860083e-06,
"logits": -1.1620502471923828,
"logps": -77.92072296142578,
"loss": 615.0646,
"objective": 768.5841674804688,
"ranking_simple": 0.5166666507720947,
"regularize": 1.5439296960830688,
"step": 180,
"wo_beta": 6.549993991851807
},
{
"dpo_loss": 1.295415997505188,
"epoch": 0.5243268776570619,
"grad_norm": 57823.70553626485,
"learning_rate": 4.9151708234922605e-06,
"logits": -1.1856701374053955,
"logps": -75.32164764404297,
"loss": 501.8101,
"objective": 566.3171997070312,
"ranking_simple": 0.574999988079071,
"regularize": 1.295415997505188,
"step": 185,
"wo_beta": 5.50905704498291
},
{
"dpo_loss": 1.4275904893875122,
"epoch": 0.538497874350496,
"grad_norm": 71863.24536234359,
"learning_rate": 4.904164439536626e-06,
"logits": -1.189324140548706,
"logps": -73.93627166748047,
"loss": 635.339,
"objective": 705.5938110351562,
"ranking_simple": 0.6333333253860474,
"regularize": 1.4275904893875122,
"step": 190,
"wo_beta": 5.4983296394348145
},
{
"dpo_loss": 1.2087292671203613,
"epoch": 0.5526688710439301,
"grad_norm": 49718.992888304776,
"learning_rate": 4.8925007810286555e-06,
"logits": -1.1961599588394165,
"logps": -73.1677474975586,
"loss": 604.9664,
"objective": 597.218994140625,
"ranking_simple": 0.625,
"regularize": 1.2087292671203613,
"step": 195,
"wo_beta": 5.489840507507324
},
{
"dpo_loss": 1.2826271057128906,
"epoch": 0.5668398677373642,
"grad_norm": 47393.96595402916,
"learning_rate": 4.880183036696123e-06,
"logits": -1.1661118268966675,
"logps": -72.84329986572266,
"loss": 549.7892,
"objective": 592.0448608398438,
"ranking_simple": 0.5708333253860474,
"regularize": 1.2826271057128906,
"step": 200,
"wo_beta": 6.384205341339111
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 1.888936996459961,
"eval_logits": -1.1824791431427002,
"eval_logps": -76.20477294921875,
"eval_loss": 883.9945678710938,
"eval_objective": 832.8786010742188,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 1.888936996459961,
"eval_runtime": 375.7585,
"eval_samples_per_second": 15.409,
"eval_steps_per_second": 1.285,
"eval_wo_beta": 7.189944267272949,
"step": 200
},
{
"dpo_loss": 1.4287751913070679,
"epoch": 0.5810108644307983,
"grad_norm": 61461.28761463611,
"learning_rate": 4.867214574087338e-06,
"logits": -0.9886723160743713,
"logps": -72.71061706542969,
"loss": 664.0259,
"objective": 768.5003662109375,
"ranking_simple": 0.5791666507720947,
"regularize": 1.4287751913070679,
"step": 205,
"wo_beta": 6.914271831512451
},
{
"dpo_loss": 1.3722679615020752,
"epoch": 0.5951818611242324,
"grad_norm": 56025.34447718605,
"learning_rate": 4.853598938650487e-06,
"logits": -1.004089593887329,
"logps": -71.4341049194336,
"loss": 678.2527,
"objective": 627.8658447265625,
"ranking_simple": 0.6166666746139526,
"regularize": 1.3722679615020752,
"step": 210,
"wo_beta": 5.539144992828369
},
{
"dpo_loss": 1.2253851890563965,
"epoch": 0.6093528578176665,
"grad_norm": 48935.729292866636,
"learning_rate": 4.8393398527643495e-06,
"logits": -1.0572926998138428,
"logps": -71.53670501708984,
"loss": 542.6101,
"objective": 526.205810546875,
"ranking_simple": 0.5666666626930237,
"regularize": 1.2253851890563965,
"step": 215,
"wo_beta": 6.424313545227051
},
{
"dpo_loss": 1.1790845394134521,
"epoch": 0.6235238545111006,
"grad_norm": 50799.22260603063,
"learning_rate": 4.824441214720629e-06,
"logits": -1.100419521331787,
"logps": -72.95840454101562,
"loss": 580.133,
"objective": 527.5165405273438,
"ranking_simple": 0.550000011920929,
"regularize": 1.1790845394134521,
"step": 220,
"wo_beta": 6.990549087524414
},
{
"dpo_loss": 1.325684666633606,
"epoch": 0.6376948512045347,
"grad_norm": 43237.99565272054,
"learning_rate": 4.808907097658205e-06,
"logits": -1.0687224864959717,
"logps": -73.86213684082031,
"loss": 599.2142,
"objective": 594.8961791992188,
"ranking_simple": 0.5708333253860474,
"regularize": 1.325684666633606,
"step": 225,
"wo_beta": 6.574854373931885
},
{
"dpo_loss": 1.0051558017730713,
"epoch": 0.6518658478979689,
"grad_norm": 46348.61858741176,
"learning_rate": 4.7927417484495756e-06,
"logits": -0.8813047409057617,
"logps": -73.6087875366211,
"loss": 497.2491,
"objective": 420.7375793457031,
"ranking_simple": 0.6416666507720947,
"regularize": 1.0051558017730713,
"step": 230,
"wo_beta": 4.896794319152832
},
{
"dpo_loss": 1.4625457525253296,
"epoch": 0.6660368445914029,
"grad_norm": 49199.93289752187,
"learning_rate": 4.7759495865398035e-06,
"logits": -0.757663369178772,
"logps": -76.35986328125,
"loss": 592.7314,
"objective": 588.2656860351562,
"ranking_simple": 0.574999988079071,
"regularize": 1.4625457525253296,
"step": 235,
"wo_beta": 6.098875522613525
},
{
"dpo_loss": 1.234517216682434,
"epoch": 0.680207841284837,
"grad_norm": 57833.981683398764,
"learning_rate": 4.758535202738287e-06,
"logits": -0.9619929790496826,
"logps": -75.74427032470703,
"loss": 554.4981,
"objective": 601.7564086914062,
"ranking_simple": 0.5958333611488342,
"regularize": 1.234517216682434,
"step": 240,
"wo_beta": 6.564117908477783
},
{
"dpo_loss": 1.3769992589950562,
"epoch": 0.6943788379782712,
"grad_norm": 53444.90688249006,
"learning_rate": 4.740503357963676e-06,
"logits": -0.990798830986023,
"logps": -74.93412780761719,
"loss": 543.6711,
"objective": 609.6189575195312,
"ranking_simple": 0.574999988079071,
"regularize": 1.3769992589950562,
"step": 245,
"wo_beta": 5.855550765991211
},
{
"dpo_loss": 1.1388071775436401,
"epoch": 0.7085498346717053,
"grad_norm": 46909.61647475604,
"learning_rate": 4.721858981942284e-06,
"logits": -0.8007088899612427,
"logps": -74.202880859375,
"loss": 598.0575,
"objective": 542.016357421875,
"ranking_simple": 0.6166666746139526,
"regularize": 1.1388071775436401,
"step": 250,
"wo_beta": 5.385839939117432
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 2.010594367980957,
"eval_logits": -1.049426794052124,
"eval_logps": -79.15605926513672,
"eval_loss": 912.8013916015625,
"eval_objective": 878.098388671875,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 2.010594367980957,
"eval_runtime": 369.3016,
"eval_samples_per_second": 15.678,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 7.141663551330566,
"step": 250
},
{
"dpo_loss": 1.0299155712127686,
"epoch": 0.7227208313651393,
"grad_norm": 34955.92659194729,
"learning_rate": 4.702607171860354e-06,
"logits": -1.0210615396499634,
"logps": -74.22802734375,
"loss": 541.174,
"objective": 435.53558349609375,
"ranking_simple": 0.5375000238418579,
"regularize": 1.0299155712127686,
"step": 255,
"wo_beta": 6.036287307739258
},
{
"dpo_loss": 1.2304381132125854,
"epoch": 0.7368918280585735,
"grad_norm": 43356.96437216858,
"learning_rate": 4.682753190970533e-06,
"logits": -0.9648634195327759,
"logps": -76.73786163330078,
"loss": 536.6063,
"objective": 601.1241455078125,
"ranking_simple": 0.5666666626930237,
"regularize": 1.2304381132125854,
"step": 260,
"wo_beta": 6.554741382598877
},
{
"dpo_loss": 1.1359996795654297,
"epoch": 0.7510628247520076,
"grad_norm": 42994.029974385,
"learning_rate": 4.6623024671529555e-06,
"logits": -0.904022753238678,
"logps": -76.38091278076172,
"loss": 493.3245,
"objective": 537.385498046875,
"ranking_simple": 0.6541666388511658,
"regularize": 1.1359996795654297,
"step": 265,
"wo_beta": 5.8244123458862305
},
{
"dpo_loss": 1.0946044921875,
"epoch": 0.7652338214454416,
"grad_norm": 47060.47697517327,
"learning_rate": 4.641260591431315e-06,
"logits": -0.954149067401886,
"logps": -77.31507110595703,
"loss": 584.1508,
"objective": 473.41119384765625,
"ranking_simple": 0.5375000238418579,
"regularize": 1.0946044921875,
"step": 270,
"wo_beta": 5.909613609313965
},
{
"dpo_loss": 1.2381278276443481,
"epoch": 0.7794048181388757,
"grad_norm": 50524.69195776784,
"learning_rate": 4.61963331644433e-06,
"logits": -0.8554438352584839,
"logps": -76.56681060791016,
"loss": 567.9371,
"objective": 556.9308471679688,
"ranking_simple": 0.5625,
"regularize": 1.2381278276443481,
"step": 275,
"wo_beta": 5.9904327392578125
},
{
"dpo_loss": 0.8290544748306274,
"epoch": 0.7935758148323099,
"grad_norm": 52340.88871917512,
"learning_rate": 4.597426554873037e-06,
"logits": -0.8973789811134338,
"logps": -76.66759490966797,
"loss": 468.9087,
"objective": 375.5177307128906,
"ranking_simple": 0.6000000238418579,
"regularize": 0.8290544748306274,
"step": 280,
"wo_beta": 5.502625465393066
},
{
"dpo_loss": 1.0164730548858643,
"epoch": 0.807746811525744,
"grad_norm": 55654.623332201154,
"learning_rate": 4.574646377824316e-06,
"logits": -0.8480007648468018,
"logps": -78.14274597167969,
"loss": 473.5023,
"objective": 487.4198303222656,
"ranking_simple": 0.5458333492279053,
"regularize": 1.0164730548858643,
"step": 285,
"wo_beta": 6.672605037689209
},
{
"dpo_loss": 1.0201401710510254,
"epoch": 0.821917808219178,
"grad_norm": 48563.30285486188,
"learning_rate": 4.551299013171111e-06,
"logits": -0.7914489507675171,
"logps": -77.2682113647461,
"loss": 444.8455,
"objective": 464.7095642089844,
"ranking_simple": 0.5666666626930237,
"regularize": 1.0201401710510254,
"step": 290,
"wo_beta": 5.619294166564941
},
{
"dpo_loss": 1.2212320566177368,
"epoch": 0.8360888049126122,
"grad_norm": 49233.92704031764,
"learning_rate": 4.5273908438498e-06,
"logits": -0.8363037705421448,
"logps": -77.95922088623047,
"loss": 599.433,
"objective": 584.8712158203125,
"ranking_simple": 0.5416666865348816,
"regularize": 1.2212320566177368,
"step": 295,
"wo_beta": 6.878579139709473
},
{
"dpo_loss": 0.9529095888137817,
"epoch": 0.8502598016060463,
"grad_norm": 38375.982543414415,
"learning_rate": 4.502928406115152e-06,
"logits": -0.7699640989303589,
"logps": -77.4969482421875,
"loss": 490.4698,
"objective": 387.5968322753906,
"ranking_simple": 0.625,
"regularize": 0.9529095888137817,
"step": 300,
"wo_beta": 4.446579933166504
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 2.048867702484131,
"eval_logits": -0.8242712616920471,
"eval_logps": -84.34913635253906,
"eval_loss": 908.3519287109375,
"eval_objective": 883.9851684570312,
"eval_ranking_simple": 0.5372670888900757,
"eval_regularize": 2.048867702484131,
"eval_runtime": 368.9144,
"eval_samples_per_second": 15.695,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 6.972108840942383,
"step": 300
},
{
"dpo_loss": 1.360299825668335,
"epoch": 0.8644307982994804,
"grad_norm": 49256.76350615258,
"learning_rate": 4.477918387753388e-06,
"logits": -0.6884523034095764,
"logps": -79.74717712402344,
"loss": 540.5464,
"objective": 640.423828125,
"ranking_simple": 0.625,
"regularize": 1.360299825668335,
"step": 305,
"wo_beta": 5.2918877601623535
},
{
"dpo_loss": 0.9228391051292419,
"epoch": 0.8786017949929145,
"grad_norm": 48740.801909647664,
"learning_rate": 4.452367626253805e-06,
"logits": -0.6559950709342957,
"logps": -79.34391784667969,
"loss": 420.4869,
"objective": 424.25775146484375,
"ranking_simple": 0.5916666388511658,
"regularize": 0.9228391051292419,
"step": 310,
"wo_beta": 5.364611625671387
},
{
"dpo_loss": 1.0072659254074097,
"epoch": 0.8927727916863486,
"grad_norm": 42307.68186238231,
"learning_rate": 4.426283106939474e-06,
"logits": -0.6370798945426941,
"logps": -80.14945983886719,
"loss": 505.2505,
"objective": 464.4563903808594,
"ranking_simple": 0.5541666746139526,
"regularize": 1.0072659254074097,
"step": 315,
"wo_beta": 6.282584190368652
},
{
"dpo_loss": 1.0113941431045532,
"epoch": 0.9069437883797827,
"grad_norm": 49217.69235824995,
"learning_rate": 4.399671961057523e-06,
"logits": -0.5728167295455933,
"logps": -81.63493347167969,
"loss": 549.8222,
"objective": 494.4688415527344,
"ranking_simple": 0.625,
"regularize": 1.0113941431045532,
"step": 320,
"wo_beta": 4.408293724060059
},
{
"dpo_loss": 0.9181217551231384,
"epoch": 0.9211147850732169,
"grad_norm": 37081.62708771448,
"learning_rate": 4.372541463829524e-06,
"logits": -0.6937563419342041,
"logps": -80.65882873535156,
"loss": 498.2751,
"objective": 406.5204772949219,
"ranking_simple": 0.5874999761581421,
"regularize": 0.9181217551231384,
"step": 325,
"wo_beta": 5.4936604499816895
},
{
"dpo_loss": 0.9474784135818481,
"epoch": 0.9352857817666509,
"grad_norm": 40839.28555485669,
"learning_rate": 4.3448990324625244e-06,
"logits": -0.5978609323501587,
"logps": -79.6310806274414,
"loss": 499.4487,
"objective": 420.19354248046875,
"ranking_simple": 0.5874999761581421,
"regularize": 0.9474784135818481,
"step": 330,
"wo_beta": 5.275208473205566
},
{
"dpo_loss": 0.8909139037132263,
"epoch": 0.949456778460085,
"grad_norm": 41412.745559313786,
"learning_rate": 4.316752224121252e-06,
"logits": -0.5733863711357117,
"logps": -79.5725326538086,
"loss": 420.0585,
"objective": 373.74444580078125,
"ranking_simple": 0.5958333611488342,
"regularize": 0.8909139037132263,
"step": 335,
"wo_beta": 5.151965141296387
},
{
"dpo_loss": 1.1936326026916504,
"epoch": 0.9636277751535192,
"grad_norm": 38902.519316501435,
"learning_rate": 4.288108733862064e-06,
"logits": -0.6429716944694519,
"logps": -78.99500274658203,
"loss": 501.9028,
"objective": 553.547119140625,
"ranking_simple": 0.5916666388511658,
"regularize": 1.1936326026916504,
"step": 340,
"wo_beta": 5.893803119659424
},
{
"dpo_loss": 0.8929412961006165,
"epoch": 0.9777987718469532,
"grad_norm": 38680.18948554596,
"learning_rate": 4.2589763925291924e-06,
"logits": -0.6908617615699768,
"logps": -77.42713165283203,
"loss": 443.8264,
"objective": 408.8953552246094,
"ranking_simple": 0.6666666865348816,
"regularize": 0.8929412961006165,
"step": 345,
"wo_beta": 4.660811901092529
},
{
"dpo_loss": 0.7659813761711121,
"epoch": 0.9919697685403873,
"grad_norm": 38727.42670719923,
"learning_rate": 4.229363164613874e-06,
"logits": -0.6147564649581909,
"logps": -79.30657196044922,
"loss": 374.0952,
"objective": 361.5460510253906,
"ranking_simple": 0.6791666746139526,
"regularize": 0.7659813761711121,
"step": 350,
"wo_beta": 4.2715983390808105
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 2.082609176635742,
"eval_logits": -0.7334761023521423,
"eval_logps": -82.52086639404297,
"eval_loss": 968.3278198242188,
"eval_objective": 906.4931030273438,
"eval_ranking_simple": 0.5341615080833435,
"eval_regularize": 2.082609176635742,
"eval_runtime": 369.2448,
"eval_samples_per_second": 15.681,
"eval_steps_per_second": 1.308,
"eval_wo_beta": 6.938116550445557,
"step": 350
},
{
"dpo_loss": 0.7874710559844971,
"epoch": 1.0061407652338215,
"grad_norm": 21920.85892485651,
"learning_rate": 4.199277146076933e-06,
"logits": -0.7007929682731628,
"logps": -77.06559753417969,
"loss": 368.6515,
"objective": 352.9415283203125,
"ranking_simple": 0.6416666507720947,
"regularize": 0.7874710559844971,
"step": 355,
"wo_beta": 4.749766826629639
},
{
"dpo_loss": 0.7488501071929932,
"epoch": 1.0203117619272555,
"grad_norm": 30589.88972145914,
"learning_rate": 4.168726562135432e-06,
"logits": -0.6663420796394348,
"logps": -77.70557403564453,
"loss": 293.1907,
"objective": 339.2917785644531,
"ranking_simple": 0.637499988079071,
"regularize": 0.7488501071929932,
"step": 360,
"wo_beta": 5.18883752822876
},
{
"dpo_loss": 0.47425612807273865,
"epoch": 1.0344827586206897,
"grad_norm": 26429.526464050992,
"learning_rate": 4.137719765013974e-06,
"logits": -0.6962677240371704,
"logps": -76.95773315429688,
"loss": 268.2307,
"objective": 244.91177368164062,
"ranking_simple": 0.6333333253860474,
"regularize": 0.47425612807273865,
"step": 365,
"wo_beta": 4.233913898468018
},
{
"dpo_loss": 0.5232141017913818,
"epoch": 1.0486537553141237,
"grad_norm": 35534.75324777604,
"learning_rate": 4.106265231661292e-06,
"logits": -0.6801474094390869,
"logps": -74.06124114990234,
"loss": 243.6382,
"objective": 228.08172607421875,
"ranking_simple": 0.5958333611488342,
"regularize": 0.5232141017913818,
"step": 370,
"wo_beta": 5.037619113922119
},
{
"dpo_loss": 0.548820436000824,
"epoch": 1.0628247520075578,
"grad_norm": 27102.24167022124,
"learning_rate": 4.074371561432731e-06,
"logits": -0.7840978503227234,
"logps": -75.09078216552734,
"loss": 253.0529,
"objective": 245.6653289794922,
"ranking_simple": 0.6208333373069763,
"regularize": 0.548820436000824,
"step": 375,
"wo_beta": 4.070898056030273
},
{
"dpo_loss": 0.717989981174469,
"epoch": 1.076995748700992,
"grad_norm": 31679.22207192818,
"learning_rate": 4.042047473739278e-06,
"logits": -0.7510035037994385,
"logps": -76.53946685791016,
"loss": 309.9577,
"objective": 332.1832275390625,
"ranking_simple": 0.6875,
"regularize": 0.717989981174469,
"step": 380,
"wo_beta": 4.098145008087158
},
{
"dpo_loss": 0.5641272068023682,
"epoch": 1.091166745394426,
"grad_norm": 24366.847525226553,
"learning_rate": 4.009301805663752e-06,
"logits": -0.6073976159095764,
"logps": -75.09868621826172,
"loss": 265.7304,
"objective": 296.70831298828125,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5641272068023682,
"step": 385,
"wo_beta": 4.445924758911133
},
{
"dpo_loss": 0.6449456810951233,
"epoch": 1.10533774208786,
"grad_norm": 23048.879310786837,
"learning_rate": 3.976143509544843e-06,
"logits": -0.5609559416770935,
"logps": -76.08757781982422,
"loss": 222.6515,
"objective": 276.19744873046875,
"ranking_simple": 0.6791666746139526,
"regularize": 0.6449456810951233,
"step": 390,
"wo_beta": 4.739548683166504
},
{
"dpo_loss": 0.5751555562019348,
"epoch": 1.1195087387812943,
"grad_norm": 31852.53024343298,
"learning_rate": 3.9425816505296254e-06,
"logits": -0.582244873046875,
"logps": -75.46802520751953,
"loss": 288.2667,
"objective": 265.84442138671875,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5751555562019348,
"step": 395,
"wo_beta": 4.211004734039307
},
{
"dpo_loss": 0.5259193778038025,
"epoch": 1.1336797354747283,
"grad_norm": 28994.715243763803,
"learning_rate": 3.908625404095242e-06,
"logits": -0.6348000764846802,
"logps": -74.51661682128906,
"loss": 270.3782,
"objective": 223.46231079101562,
"ranking_simple": 0.6875,
"regularize": 0.5259193778038025,
"step": 400,
"wo_beta": 3.843280553817749
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 2.1841752529144287,
"eval_logits": -0.6857498288154602,
"eval_logps": -79.62760925292969,
"eval_loss": 980.7469482421875,
"eval_objective": 943.677734375,
"eval_ranking_simple": 0.531573474407196,
"eval_regularize": 2.1841752529144287,
"eval_runtime": 375.6251,
"eval_samples_per_second": 15.414,
"eval_steps_per_second": 1.286,
"eval_wo_beta": 7.058492660522461,
"step": 400
},
{
"dpo_loss": 0.6943067908287048,
"epoch": 1.1478507321681626,
"grad_norm": 31867.494723376392,
"learning_rate": 3.8742840535404155e-06,
"logits": -0.5997341871261597,
"logps": -76.28077697753906,
"loss": 271.2819,
"objective": 352.92449951171875,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6943067908287048,
"step": 405,
"wo_beta": 5.699499607086182
},
{
"dpo_loss": 0.8418512344360352,
"epoch": 1.1620217288615966,
"grad_norm": 29842.834539174277,
"learning_rate": 3.839566987447492e-06,
"logits": -0.6471078991889954,
"logps": -75.29569244384766,
"loss": 296.2768,
"objective": 396.2966613769531,
"ranking_simple": 0.6291666626930237,
"regularize": 0.8418512344360352,
"step": 410,
"wo_beta": 6.141425132751465
},
{
"dpo_loss": 0.5453721284866333,
"epoch": 1.1761927255550306,
"grad_norm": 27018.957385156904,
"learning_rate": 3.8044836971156935e-06,
"logits": -0.5671236515045166,
"logps": -74.40109252929688,
"loss": 273.0484,
"objective": 220.66055297851562,
"ranking_simple": 0.6208333373069763,
"regularize": 0.5453721284866333,
"step": 415,
"wo_beta": 5.825514793395996
},
{
"dpo_loss": 0.5288990139961243,
"epoch": 1.1903637222484649,
"grad_norm": 25344.093819012876,
"learning_rate": 3.7690437739662928e-06,
"logits": -0.5559112429618835,
"logps": -75.0554428100586,
"loss": 249.2432,
"objective": 244.75767517089844,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5288990139961243,
"step": 420,
"wo_beta": 5.291681289672852
},
{
"dpo_loss": 0.803754448890686,
"epoch": 1.204534718941899,
"grad_norm": 29915.434557425604,
"learning_rate": 3.7332569069204127e-06,
"logits": -0.6134353876113892,
"logps": -73.4056625366211,
"loss": 277.3619,
"objective": 350.0005187988281,
"ranking_simple": 0.6416666507720947,
"regularize": 0.803754448890686,
"step": 425,
"wo_beta": 4.601781368255615
},
{
"dpo_loss": 0.781599760055542,
"epoch": 1.2187057156353331,
"grad_norm": 34073.98431226179,
"learning_rate": 3.697132879750174e-06,
"logits": -0.6061522364616394,
"logps": -75.91817474365234,
"loss": 319.8952,
"objective": 359.3941650390625,
"ranking_simple": 0.6541666388511658,
"regularize": 0.781599760055542,
"step": 430,
"wo_beta": 5.290956974029541
},
{
"dpo_loss": 0.5502253174781799,
"epoch": 1.2328767123287672,
"grad_norm": 38063.822960677266,
"learning_rate": 3.6606815684039098e-06,
"logits": -0.5053932666778564,
"logps": -75.83305358886719,
"loss": 288.8541,
"objective": 226.22280883789062,
"ranking_simple": 0.6708333492279053,
"regularize": 0.5502253174781799,
"step": 435,
"wo_beta": 4.12706184387207
},
{
"dpo_loss": 0.6821548342704773,
"epoch": 1.2470477090222012,
"grad_norm": 33366.98311460532,
"learning_rate": 3.6239129383061764e-06,
"logits": -0.5692261457443237,
"logps": -76.28325653076172,
"loss": 306.1993,
"objective": 302.21856689453125,
"ranking_simple": 0.6875,
"regularize": 0.6821548342704773,
"step": 440,
"wo_beta": 4.461844444274902
},
{
"dpo_loss": 0.85939621925354,
"epoch": 1.2612187057156352,
"grad_norm": 39164.29912052594,
"learning_rate": 3.586837041633312e-06,
"logits": -0.627022385597229,
"logps": -76.72431945800781,
"loss": 321.7045,
"objective": 412.44061279296875,
"ranking_simple": 0.675000011920929,
"regularize": 0.85939621925354,
"step": 445,
"wo_beta": 4.648702621459961
},
{
"dpo_loss": 0.5442861914634705,
"epoch": 1.2753897024090695,
"grad_norm": 30514.25628778678,
"learning_rate": 3.5494640145652647e-06,
"logits": -0.8091104626655579,
"logps": -75.52713775634766,
"loss": 260.6353,
"objective": 219.17945861816406,
"ranking_simple": 0.6333333253860474,
"regularize": 0.5442861914634705,
"step": 450,
"wo_beta": 4.987157821655273
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 2.075345277786255,
"eval_logits": -0.8703874945640564,
"eval_logps": -79.30490112304688,
"eval_loss": 933.441650390625,
"eval_objective": 893.1430053710938,
"eval_ranking_simple": 0.5357142686843872,
"eval_regularize": 2.075345277786255,
"eval_runtime": 369.7717,
"eval_samples_per_second": 15.658,
"eval_steps_per_second": 1.306,
"eval_wo_beta": 6.9555816650390625,
"step": 450
},
{
"dpo_loss": 0.6076017022132874,
"epoch": 1.2895606991025035,
"grad_norm": 30068.494295042852,
"learning_rate": 3.511804074514468e-06,
"logits": -0.7256959676742554,
"logps": -75.7904052734375,
"loss": 274.9153,
"objective": 268.743896484375,
"ranking_simple": 0.5833333134651184,
"regularize": 0.6076017022132874,
"step": 455,
"wo_beta": 4.931685924530029
},
{
"dpo_loss": 0.5776770710945129,
"epoch": 1.3037316957959377,
"grad_norm": 33947.98253416442,
"learning_rate": 3.4738675173325008e-06,
"logits": -0.8186031579971313,
"logps": -75.09860229492188,
"loss": 298.7045,
"objective": 264.06005859375,
"ranking_simple": 0.612500011920929,
"regularize": 0.5776770710945129,
"step": 460,
"wo_beta": 5.527124881744385
},
{
"dpo_loss": 0.5068582892417908,
"epoch": 1.3179026924893718,
"grad_norm": 27985.15602475436,
"learning_rate": 3.435664714495301e-06,
"logits": -0.8312460780143738,
"logps": -73.1260757446289,
"loss": 265.522,
"objective": 230.74436950683594,
"ranking_simple": 0.6166666746139526,
"regularize": 0.5068582892417908,
"step": 465,
"wo_beta": 4.814398765563965
},
{
"dpo_loss": 0.6723037362098694,
"epoch": 1.3320736891828058,
"grad_norm": 26533.816181899983,
"learning_rate": 3.397206110267713e-06,
"logits": -0.8928317427635193,
"logps": -72.38593292236328,
"loss": 298.2945,
"objective": 296.0791931152344,
"ranking_simple": 0.6541666388511658,
"regularize": 0.6723037362098694,
"step": 470,
"wo_beta": 4.333133220672607
},
{
"dpo_loss": 0.5037826299667358,
"epoch": 1.34624468587624,
"grad_norm": 35654.842705857875,
"learning_rate": 3.3585022188481247e-06,
"logits": -0.7810119390487671,
"logps": -74.3759994506836,
"loss": 283.622,
"objective": 209.9154815673828,
"ranking_simple": 0.675000011920929,
"regularize": 0.5037826299667358,
"step": 475,
"wo_beta": 4.845743656158447
},
{
"dpo_loss": 0.5474976301193237,
"epoch": 1.360415682569674,
"grad_norm": 35159.884724394884,
"learning_rate": 3.3195636214939943e-06,
"logits": -0.7756078839302063,
"logps": -75.58786010742188,
"loss": 288.5159,
"objective": 257.5296936035156,
"ranking_simple": 0.612500011920929,
"regularize": 0.5474976301193237,
"step": 480,
"wo_beta": 5.6963653564453125
},
{
"dpo_loss": 0.5475578308105469,
"epoch": 1.3745866792631083,
"grad_norm": 45395.4629312106,
"learning_rate": 3.2804009636290403e-06,
"logits": -0.7321893572807312,
"logps": -75.10790252685547,
"loss": 256.6031,
"objective": 262.57879638671875,
"ranking_simple": 0.6083333492279053,
"regularize": 0.5475578308105469,
"step": 485,
"wo_beta": 5.356605052947998
},
{
"dpo_loss": 0.7748611569404602,
"epoch": 1.3887576759565423,
"grad_norm": 33385.64413662957,
"learning_rate": 3.2410249519328848e-06,
"logits": -0.7029439806938171,
"logps": -77.96426391601562,
"loss": 309.7623,
"objective": 388.19610595703125,
"ranking_simple": 0.6333333253860474,
"regularize": 0.7748611569404602,
"step": 490,
"wo_beta": 4.442887783050537
},
{
"dpo_loss": 0.5730568170547485,
"epoch": 1.4029286726499763,
"grad_norm": 41910.67029527795,
"learning_rate": 3.201446351413958e-06,
"logits": -0.7370147109031677,
"logps": -78.2493667602539,
"loss": 288.5006,
"objective": 271.5208740234375,
"ranking_simple": 0.6000000238418579,
"regularize": 0.5730568170547485,
"step": 495,
"wo_beta": 5.084538459777832
},
{
"dpo_loss": 0.6209725141525269,
"epoch": 1.4170996693434104,
"grad_norm": 38353.55717005552,
"learning_rate": 3.1616759824664543e-06,
"logits": -0.6797351837158203,
"logps": -76.69928741455078,
"loss": 272.6055,
"objective": 298.72381591796875,
"ranking_simple": 0.6333333253860474,
"regularize": 0.6209725141525269,
"step": 500,
"wo_beta": 4.246993541717529
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 2.0877864360809326,
"eval_logits": -0.8079127669334412,
"eval_logps": -81.43925476074219,
"eval_loss": 950.2913818359375,
"eval_objective": 901.2256469726562,
"eval_ranking_simple": 0.5269151329994202,
"eval_regularize": 2.0877864360809326,
"eval_runtime": 368.6401,
"eval_samples_per_second": 15.706,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 6.822662830352783,
"step": 500
},
{
"dpo_loss": 0.48541003465652466,
"epoch": 1.4312706660368446,
"grad_norm": 29544.984605162008,
"learning_rate": 3.121724717912138e-06,
"logits": -0.7053753733634949,
"logps": -77.21398162841797,
"loss": 216.0385,
"objective": 198.67111206054688,
"ranking_simple": 0.6541666388511658,
"regularize": 0.48541003465652466,
"step": 505,
"wo_beta": 4.210334300994873
},
{
"dpo_loss": 0.5575286149978638,
"epoch": 1.4454416627302786,
"grad_norm": 31963.434830208436,
"learning_rate": 3.081603480027826e-06,
"logits": -0.7207833528518677,
"logps": -77.88783264160156,
"loss": 271.1889,
"objective": 220.0654296875,
"ranking_simple": 0.6625000238418579,
"regularize": 0.5575286149978638,
"step": 510,
"wo_beta": 4.390309810638428
},
{
"dpo_loss": 0.743520200252533,
"epoch": 1.4596126594237129,
"grad_norm": 27959.80538905989,
"learning_rate": 3.04132323755935e-06,
"logits": -0.7187339067459106,
"logps": -79.07169342041016,
"loss": 318.8216,
"objective": 342.0149841308594,
"ranking_simple": 0.5625,
"regularize": 0.743520200252533,
"step": 515,
"wo_beta": 6.24213981628418
},
{
"dpo_loss": 0.47003206610679626,
"epoch": 1.473783656117147,
"grad_norm": 27496.616543577868,
"learning_rate": 3.0008950027228035e-06,
"logits": -0.6592309474945068,
"logps": -76.95936584472656,
"loss": 192.6094,
"objective": 205.1455535888672,
"ranking_simple": 0.6499999761581421,
"regularize": 0.47003206610679626,
"step": 520,
"wo_beta": 4.843044757843018
},
{
"dpo_loss": 0.36703598499298096,
"epoch": 1.487954652810581,
"grad_norm": 26375.77965551739,
"learning_rate": 2.960329828193918e-06,
"logits": -0.5630077123641968,
"logps": -76.27136993408203,
"loss": 240.7371,
"objective": 136.03848266601562,
"ranking_simple": 0.6625000238418579,
"regularize": 0.36703598499298096,
"step": 525,
"wo_beta": 4.377591609954834
},
{
"dpo_loss": 0.5440438985824585,
"epoch": 1.5021256495040152,
"grad_norm": 27571.64273275317,
"learning_rate": 2.9196388040863695e-06,
"logits": -0.7446085810661316,
"logps": -78.13150787353516,
"loss": 252.6264,
"objective": 210.30335998535156,
"ranking_simple": 0.637499988079071,
"regularize": 0.5440438985824585,
"step": 530,
"wo_beta": 4.725710868835449
},
{
"dpo_loss": 0.4591270685195923,
"epoch": 1.5162966461974492,
"grad_norm": 29879.356657087457,
"learning_rate": 2.8788330549198512e-06,
"logits": -0.7952173352241516,
"logps": -76.83704376220703,
"loss": 190.3222,
"objective": 189.28045654296875,
"ranking_simple": 0.6416666507720947,
"regularize": 0.4591270685195923,
"step": 535,
"wo_beta": 4.304568290710449
},
{
"dpo_loss": 0.7635994553565979,
"epoch": 1.5304676428908834,
"grad_norm": 32950.397237122976,
"learning_rate": 2.8379237365787426e-06,
"logits": -0.7304627299308777,
"logps": -77.21965789794922,
"loss": 271.3824,
"objective": 302.41015625,
"ranking_simple": 0.6833333373069763,
"regularize": 0.7635994553565979,
"step": 540,
"wo_beta": 3.8116295337677
},
{
"dpo_loss": 0.6426182389259338,
"epoch": 1.5446386395843175,
"grad_norm": 29698.866901475067,
"learning_rate": 2.7969220332622004e-06,
"logits": -0.8022263646125793,
"logps": -78.44091796875,
"loss": 268.7311,
"objective": 240.541015625,
"ranking_simple": 0.6958333253860474,
"regularize": 0.6426182389259338,
"step": 545,
"wo_beta": 3.3054466247558594
},
{
"dpo_loss": 0.4164026081562042,
"epoch": 1.5588096362777515,
"grad_norm": 22464.97408129426,
"learning_rate": 2.7558391544265127e-06,
"logits": -0.8392809629440308,
"logps": -78.80281829833984,
"loss": 201.6789,
"objective": 177.04327392578125,
"ranking_simple": 0.612500011920929,
"regularize": 0.4164026081562042,
"step": 550,
"wo_beta": 4.627588272094727
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 2.06260085105896,
"eval_logits": -0.861229658126831,
"eval_logps": -82.88394165039062,
"eval_loss": 942.4044799804688,
"eval_objective": 899.7697143554688,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 2.06260085105896,
"eval_runtime": 374.5541,
"eval_samples_per_second": 15.458,
"eval_steps_per_second": 1.29,
"eval_wo_beta": 6.859459400177002,
"step": 550
},
{
"dpo_loss": 0.6262320280075073,
"epoch": 1.5729806329711855,
"grad_norm": 30721.707672058405,
"learning_rate": 2.714686331720543e-06,
"logits": -0.6610599756240845,
"logps": -79.22738647460938,
"loss": 265.0857,
"objective": 308.8701171875,
"ranking_simple": 0.6291666626930237,
"regularize": 0.6262320280075073,
"step": 555,
"wo_beta": 4.726518154144287
},
{
"dpo_loss": 0.49044954776763916,
"epoch": 1.5871516296646198,
"grad_norm": 30519.863800131625,
"learning_rate": 2.6734748159151104e-06,
"logits": -0.6557633876800537,
"logps": -78.31670379638672,
"loss": 249.957,
"objective": 215.1184539794922,
"ranking_simple": 0.6583333611488342,
"regularize": 0.49044954776763916,
"step": 560,
"wo_beta": 4.984176158905029
},
{
"dpo_loss": 0.49784523248672485,
"epoch": 1.601322626358054,
"grad_norm": 32552.21298139843,
"learning_rate": 2.632215873827142e-06,
"logits": -0.6383572220802307,
"logps": -78.56299591064453,
"loss": 242.473,
"objective": 225.4641571044922,
"ranking_simple": 0.6083333492279053,
"regularize": 0.49784523248672485,
"step": 565,
"wo_beta": 5.1718597412109375
},
{
"dpo_loss": 0.42994922399520874,
"epoch": 1.615493623051488,
"grad_norm": 34165.36275325673,
"learning_rate": 2.5909207852394363e-06,
"logits": -0.7592962384223938,
"logps": -78.97236633300781,
"loss": 189.9181,
"objective": 185.12489318847656,
"ranking_simple": 0.6708333492279053,
"regularize": 0.42994922399520874,
"step": 570,
"wo_beta": 4.939838886260986
},
{
"dpo_loss": 0.7586291432380676,
"epoch": 1.629664619744922,
"grad_norm": 35161.73370280744,
"learning_rate": 2.5496008398168844e-06,
"logits": -0.7024948596954346,
"logps": -76.8245620727539,
"loss": 314.04,
"objective": 396.62518310546875,
"ranking_simple": 0.6958333253860474,
"regularize": 0.7586291432380676,
"step": 575,
"wo_beta": 4.421546936035156
},
{
"dpo_loss": 0.767590343952179,
"epoch": 1.643835616438356,
"grad_norm": 32708.853726410838,
"learning_rate": 2.508267334019988e-06,
"logits": -0.6832980513572693,
"logps": -76.89154815673828,
"loss": 278.9917,
"objective": 312.0636901855469,
"ranking_simple": 0.6583333611488342,
"regularize": 0.767590343952179,
"step": 580,
"wo_beta": 4.10227108001709
},
{
"dpo_loss": 0.3702860474586487,
"epoch": 1.6580066131317903,
"grad_norm": 31637.02640388039,
"learning_rate": 2.46693156801652e-06,
"logits": -0.5593028664588928,
"logps": -76.0708999633789,
"loss": 198.3549,
"objective": 170.7590789794922,
"ranking_simple": 0.6000000238418579,
"regularize": 0.3702860474586487,
"step": 585,
"wo_beta": 4.9678053855896
},
{
"dpo_loss": 0.475857138633728,
"epoch": 1.6721776098252243,
"grad_norm": 23355.06704017027,
"learning_rate": 2.4256048425921693e-06,
"logits": -0.5719855427742004,
"logps": -76.99803161621094,
"loss": 231.7539,
"objective": 203.09518432617188,
"ranking_simple": 0.6625000238418579,
"regularize": 0.475857138633728,
"step": 590,
"wo_beta": 4.2729363441467285
},
{
"dpo_loss": 0.5022611618041992,
"epoch": 1.6863486065186586,
"grad_norm": 24151.29042367439,
"learning_rate": 2.384298456061023e-06,
"logits": -0.5644751787185669,
"logps": -77.0865478515625,
"loss": 221.3199,
"objective": 220.84716796875,
"ranking_simple": 0.6416666507720947,
"regularize": 0.5022611618041992,
"step": 595,
"wo_beta": 5.112887382507324
},
{
"dpo_loss": 0.4297979474067688,
"epoch": 1.7005196032120926,
"grad_norm": 31821.391833657053,
"learning_rate": 2.3430237011767166e-06,
"logits": -0.6274603009223938,
"logps": -77.14356231689453,
"loss": 190.6931,
"objective": 182.52053833007812,
"ranking_simple": 0.6958333253860474,
"regularize": 0.4297979474067688,
"step": 600,
"wo_beta": 3.9301912784576416
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 2.0212671756744385,
"eval_logits": -0.7143464684486389,
"eval_logps": -80.58209228515625,
"eval_loss": 909.0859375,
"eval_objective": 874.9575805664062,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 2.0212671756744385,
"eval_runtime": 368.0708,
"eval_samples_per_second": 15.731,
"eval_steps_per_second": 1.312,
"eval_wo_beta": 6.8604817390441895,
"step": 600
},
{
"dpo_loss": 0.5198889374732971,
"epoch": 1.7146905999055266,
"grad_norm": 30026.01051762202,
"learning_rate": 2.30179186204511e-06,
"logits": -0.5722830891609192,
"logps": -77.72653198242188,
"loss": 213.3847,
"objective": 228.3468780517578,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5198889374732971,
"step": 605,
"wo_beta": 5.703073978424072
},
{
"dpo_loss": 0.36141520738601685,
"epoch": 1.7288615965989607,
"grad_norm": 40011.17376243372,
"learning_rate": 2.2606142110393248e-06,
"logits": -0.626063883304596,
"logps": -76.1588363647461,
"loss": 211.2466,
"objective": 149.7343292236328,
"ranking_simple": 0.6791666746139526,
"regularize": 0.36141520738601685,
"step": 610,
"wo_beta": 4.233180522918701
},
{
"dpo_loss": 0.42948976159095764,
"epoch": 1.743032593292395,
"grad_norm": 29635.739966818786,
"learning_rate": 2.2195020057179897e-06,
"logits": -0.6392956972122192,
"logps": -76.81178283691406,
"loss": 225.1069,
"objective": 180.2233428955078,
"ranking_simple": 0.6166666746139526,
"regularize": 0.42948976159095764,
"step": 615,
"wo_beta": 5.672121524810791
},
{
"dpo_loss": 0.5156412720680237,
"epoch": 1.7572035899858292,
"grad_norm": 30208.788539603607,
"learning_rate": 2.1784664857475356e-06,
"logits": -0.6532320380210876,
"logps": -77.48053741455078,
"loss": 264.541,
"objective": 242.69305419921875,
"ranking_simple": 0.6666666865348816,
"regularize": 0.5156412720680237,
"step": 620,
"wo_beta": 4.189736843109131
},
{
"dpo_loss": 0.5349311828613281,
"epoch": 1.7713745866792632,
"grad_norm": 39380.903198687505,
"learning_rate": 2.1375188698293855e-06,
"logits": -0.5855329036712646,
"logps": -76.99386596679688,
"loss": 262.5758,
"objective": 243.1309356689453,
"ranking_simple": 0.6291666626930237,
"regularize": 0.5349311828613281,
"step": 625,
"wo_beta": 4.572874546051025
},
{
"dpo_loss": 0.6499994993209839,
"epoch": 1.7855455833726972,
"grad_norm": 33586.854859622,
"learning_rate": 2.096670352632873e-06,
"logits": -0.6419364809989929,
"logps": -78.828857421875,
"loss": 222.7364,
"objective": 283.2447204589844,
"ranking_simple": 0.5958333611488342,
"regularize": 0.6499994993209839,
"step": 630,
"wo_beta": 5.4850687980651855
},
{
"dpo_loss": 0.350009948015213,
"epoch": 1.7997165800661312,
"grad_norm": 25946.609358155,
"learning_rate": 2.0559321017347286e-06,
"logits": -0.7381666898727417,
"logps": -77.83517456054688,
"loss": 231.875,
"objective": 173.66685485839844,
"ranking_simple": 0.699999988079071,
"regularize": 0.350009948015213,
"step": 635,
"wo_beta": 4.173274517059326
},
{
"dpo_loss": 0.4921790361404419,
"epoch": 1.8138875767595655,
"grad_norm": 34774.65383490848,
"learning_rate": 2.01531525456598e-06,
"logits": -0.7322004437446594,
"logps": -78.3251724243164,
"loss": 220.5483,
"objective": 208.82156372070312,
"ranking_simple": 0.6541666388511658,
"regularize": 0.4921790361404419,
"step": 640,
"wo_beta": 4.936450481414795
},
{
"dpo_loss": 0.46808040142059326,
"epoch": 1.8280585734529995,
"grad_norm": 32970.96122285086,
"learning_rate": 1.974830915367086e-06,
"logits": -0.7492889165878296,
"logps": -78.48296356201172,
"loss": 234.5868,
"objective": 203.17454528808594,
"ranking_simple": 0.6708333492279053,
"regularize": 0.46808040142059326,
"step": 645,
"wo_beta": 4.602018356323242
},
{
"dpo_loss": 0.5371274352073669,
"epoch": 1.8422295701464337,
"grad_norm": 35743.17684775322,
"learning_rate": 1.93449015215215e-06,
"logits": -0.779839277267456,
"logps": -78.36189270019531,
"loss": 308.8635,
"objective": 253.41427612304688,
"ranking_simple": 0.6625000238418579,
"regularize": 0.5371274352073669,
"step": 650,
"wo_beta": 3.7921812534332275
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 1.975651741027832,
"eval_logits": -0.7771020531654358,
"eval_logps": -81.49595642089844,
"eval_loss": 903.3455810546875,
"eval_objective": 858.396728515625,
"eval_ranking_simple": 0.5341615080833435,
"eval_regularize": 1.975651741027832,
"eval_runtime": 368.8643,
"eval_samples_per_second": 15.697,
"eval_steps_per_second": 1.309,
"eval_wo_beta": 6.74319314956665,
"step": 650
},
{
"dpo_loss": 0.47425001859664917,
"epoch": 1.8564005668398678,
"grad_norm": 32301.135893099767,
"learning_rate": 1.8943039936830347e-06,
"logits": -0.6750093698501587,
"logps": -75.62101745605469,
"loss": 194.2043,
"objective": 181.9368133544922,
"ranking_simple": 0.6291666626930237,
"regularize": 0.47425001859664917,
"step": 655,
"wo_beta": 4.659397602081299
},
{
"dpo_loss": 0.5889893770217896,
"epoch": 1.8705715635333018,
"grad_norm": 29198.03257069531,
"learning_rate": 1.8542834264542091e-06,
"logits": -0.6569967865943909,
"logps": -78.80994415283203,
"loss": 197.3959,
"objective": 235.9475555419922,
"ranking_simple": 0.6666666865348816,
"regularize": 0.5889893770217896,
"step": 660,
"wo_beta": 5.009462356567383
},
{
"dpo_loss": 0.5106583833694458,
"epoch": 1.8847425602267358,
"grad_norm": 35479.47574916255,
"learning_rate": 1.814439391689151e-06,
"logits": -0.6794141530990601,
"logps": -76.54640197753906,
"loss": 243.1572,
"objective": 230.056640625,
"ranking_simple": 0.6958333253860474,
"regularize": 0.5106583833694458,
"step": 665,
"wo_beta": 4.178842544555664
},
{
"dpo_loss": 0.4586184322834015,
"epoch": 1.89891355692017,
"grad_norm": 23231.111354011602,
"learning_rate": 1.7747827823491253e-06,
"logits": -0.6216932535171509,
"logps": -75.05060577392578,
"loss": 175.5592,
"objective": 206.04708862304688,
"ranking_simple": 0.5833333134651184,
"regularize": 0.4586184322834015,
"step": 670,
"wo_beta": 5.662291049957275
},
{
"dpo_loss": 0.4679652154445648,
"epoch": 1.9130845536136043,
"grad_norm": 27735.952169830267,
"learning_rate": 1.7353244401551566e-06,
"logits": -0.6084102392196655,
"logps": -75.44328308105469,
"loss": 219.5326,
"objective": 205.1367645263672,
"ranking_simple": 0.6875,
"regularize": 0.4679652154445648,
"step": 675,
"wo_beta": 3.7292354106903076
},
{
"dpo_loss": 0.4869479835033417,
"epoch": 1.9272555503070383,
"grad_norm": 34188.342920935414,
"learning_rate": 1.6960751526240122e-06,
"logits": -0.6308152675628662,
"logps": -77.12525939941406,
"loss": 209.8948,
"objective": 227.87367248535156,
"ranking_simple": 0.6833333373069763,
"regularize": 0.4869479835033417,
"step": 680,
"wo_beta": 4.258009433746338
},
{
"dpo_loss": 0.41189083456993103,
"epoch": 1.9414265470004723,
"grad_norm": 22728.798141567488,
"learning_rate": 1.6570456501189996e-06,
"logits": -0.5937235355377197,
"logps": -75.273681640625,
"loss": 158.1766,
"objective": 177.9638671875,
"ranking_simple": 0.5916666388511658,
"regularize": 0.41189083456993103,
"step": 685,
"wo_beta": 5.404563903808594
},
{
"dpo_loss": 0.36097389459609985,
"epoch": 1.9555975436939064,
"grad_norm": 28163.987666291356,
"learning_rate": 1.6182466029163974e-06,
"logits": -0.5778710842132568,
"logps": -76.22671508789062,
"loss": 197.674,
"objective": 151.4131622314453,
"ranking_simple": 0.6458333134651184,
"regularize": 0.36097389459609985,
"step": 690,
"wo_beta": 4.349457263946533
},
{
"dpo_loss": 0.4063621461391449,
"epoch": 1.9697685403873406,
"grad_norm": 32613.751926495792,
"learning_rate": 1.5796886182883053e-06,
"logits": -0.5166969895362854,
"logps": -77.0401382446289,
"loss": 183.2558,
"objective": 182.5120391845703,
"ranking_simple": 0.6083333492279053,
"regularize": 0.4063621461391449,
"step": 695,
"wo_beta": 5.3592329025268555
},
{
"dpo_loss": 0.3167162537574768,
"epoch": 1.9839395370807746,
"grad_norm": 38189.51001873364,
"learning_rate": 1.541382237602721e-06,
"logits": -0.4650060832500458,
"logps": -77.90335845947266,
"loss": 176.7641,
"objective": 123.92158508300781,
"ranking_simple": 0.6666666865348816,
"regularize": 0.3167162537574768,
"step": 700,
"wo_beta": 4.42483377456665
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 1.9719160795211792,
"eval_logits": -0.66416335105896,
"eval_logps": -80.92808532714844,
"eval_loss": 901.6802368164062,
"eval_objective": 855.0221557617188,
"eval_ranking_simple": 0.5398550629615784,
"eval_regularize": 1.9719160795211792,
"eval_runtime": 368.757,
"eval_samples_per_second": 15.701,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 6.820044040679932,
"step": 700
},
{
"dpo_loss": 0.41513699293136597,
"epoch": 1.9981105337742089,
"grad_norm": 29466.894534313185,
"learning_rate": 1.5033379334416376e-06,
"logits": -0.431937575340271,
"logps": -77.02298736572266,
"loss": 226.2701,
"objective": 215.40646362304688,
"ranking_simple": 0.6083333492279053,
"regularize": 0.41513699293136597,
"step": 705,
"wo_beta": 5.689188480377197
},
{
"dpo_loss": 0.28987228870391846,
"epoch": 2.012281530467643,
"grad_norm": 14354.448151084767,
"learning_rate": 1.465566106737942e-06,
"logits": -0.5705215930938721,
"logps": -76.63302612304688,
"loss": 100.7583,
"objective": 113.46463012695312,
"ranking_simple": 0.6833333373069763,
"regularize": 0.28987228870391846,
"step": 710,
"wo_beta": 3.9176442623138428
},
{
"dpo_loss": 0.16603197157382965,
"epoch": 2.026452527161077,
"grad_norm": 11508.708871868666,
"learning_rate": 1.4280770839319073e-06,
"logits": -0.563579261302948,
"logps": -75.65678405761719,
"loss": 70.137,
"objective": 80.78282165527344,
"ranking_simple": 0.7416666746139526,
"regularize": 0.16603197157382965,
"step": 715,
"wo_beta": 3.3496031761169434
},
{
"dpo_loss": 0.1282922476530075,
"epoch": 2.040623523854511,
"grad_norm": 11811.061459991428,
"learning_rate": 1.3908811141480408e-06,
"logits": -0.5584599375724792,
"logps": -76.28443908691406,
"loss": 51.7676,
"objective": 59.46492004394531,
"ranking_simple": 0.6833333373069763,
"regularize": 0.1282922476530075,
"step": 720,
"wo_beta": 4.073063850402832
},
{
"dpo_loss": 0.11519418656826019,
"epoch": 2.0547945205479454,
"grad_norm": 12532.838982073052,
"learning_rate": 1.353988366393083e-06,
"logits": -0.5916457772254944,
"logps": -79.23841857910156,
"loss": 65.4189,
"objective": 45.14365768432617,
"ranking_simple": 0.7124999761581421,
"regularize": 0.11519418656826019,
"step": 725,
"wo_beta": 3.8109290599823
},
{
"dpo_loss": 0.1648443341255188,
"epoch": 2.0689655172413794,
"grad_norm": 15010.96827295094,
"learning_rate": 1.3174089267758983e-06,
"logits": -0.6307211518287659,
"logps": -76.99424743652344,
"loss": 58.8548,
"objective": 62.6117057800293,
"ranking_simple": 0.6625000238418579,
"regularize": 0.1648443341255188,
"step": 730,
"wo_beta": 4.767910957336426
},
{
"dpo_loss": 0.23415635526180267,
"epoch": 2.0831365139348135,
"grad_norm": 16123.177435040037,
"learning_rate": 1.2811527957500344e-06,
"logits": -0.5699220299720764,
"logps": -76.40848541259766,
"loss": 69.1498,
"objective": 94.10277557373047,
"ranking_simple": 0.6291666626930237,
"regularize": 0.23415635526180267,
"step": 735,
"wo_beta": 5.384328842163086
},
{
"dpo_loss": 0.10797632485628128,
"epoch": 2.0973075106282475,
"grad_norm": 12855.608519396761,
"learning_rate": 1.245229885379699e-06,
"logits": -0.6591292023658752,
"logps": -76.2997055053711,
"loss": 56.1864,
"objective": 34.191505432128906,
"ranking_simple": 0.7083333134651184,
"regularize": 0.10797632485628128,
"step": 740,
"wo_beta": 3.6457924842834473
},
{
"dpo_loss": 0.16380687057971954,
"epoch": 2.1114785073216815,
"grad_norm": 10889.824074533886,
"learning_rate": 1.2096500166298992e-06,
"logits": -0.5935976505279541,
"logps": -77.07962799072266,
"loss": 70.7977,
"objective": 74.69692993164062,
"ranking_simple": 0.6916666626930237,
"regularize": 0.16380687057971954,
"step": 745,
"wo_beta": 4.419310569763184
},
{
"dpo_loss": 0.15039294958114624,
"epoch": 2.1256495040151155,
"grad_norm": 8517.081418777607,
"learning_rate": 1.1744229166814889e-06,
"logits": -0.6511305570602417,
"logps": -76.2921371459961,
"loss": 56.904,
"objective": 58.568729400634766,
"ranking_simple": 0.6458333134651184,
"regularize": 0.15039294958114624,
"step": 750,
"wo_beta": 4.041059494018555
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 1.9392777681350708,
"eval_logits": -0.7334077954292297,
"eval_logps": -81.60860443115234,
"eval_loss": 887.2379760742188,
"eval_objective": 839.1236572265625,
"eval_ranking_simple": 0.5388198494911194,
"eval_regularize": 1.9392777681350708,
"eval_runtime": 368.7916,
"eval_samples_per_second": 15.7,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 6.87873649597168,
"step": 750
},
{
"dpo_loss": 0.10645638406276703,
"epoch": 2.13982050070855,
"grad_norm": 13052.259014125502,
"learning_rate": 1.1395582162718524e-06,
"logits": -0.6800190806388855,
"logps": -78.63035583496094,
"loss": 57.8502,
"objective": 35.947303771972656,
"ranking_simple": 0.6833333373069763,
"regularize": 0.10645638406276703,
"step": 755,
"wo_beta": 4.126923084259033
},
{
"dpo_loss": 0.19088611006736755,
"epoch": 2.153991497401984,
"grad_norm": 17569.54466226025,
"learning_rate": 1.1050654470619602e-06,
"logits": -0.6212081909179688,
"logps": -75.5756607055664,
"loss": 75.6102,
"objective": 78.61075592041016,
"ranking_simple": 0.6708333492279053,
"regularize": 0.19088611006736755,
"step": 760,
"wo_beta": 4.341642379760742
},
{
"dpo_loss": 0.13319069147109985,
"epoch": 2.168162494095418,
"grad_norm": 16693.2708880337,
"learning_rate": 1.0709540390305061e-06,
"logits": -0.6630601286888123,
"logps": -76.63800811767578,
"loss": 66.0811,
"objective": 50.65385437011719,
"ranking_simple": 0.6833333373069763,
"regularize": 0.13319069147109985,
"step": 765,
"wo_beta": 3.741922616958618
},
{
"dpo_loss": 0.10242452472448349,
"epoch": 2.182333490788852,
"grad_norm": 22180.175367129727,
"learning_rate": 1.0372333178958462e-06,
"logits": -0.6701247692108154,
"logps": -76.97808074951172,
"loss": 72.9876,
"objective": 40.84423828125,
"ranking_simple": 0.6666666865348816,
"regularize": 0.10242452472448349,
"step": 770,
"wo_beta": 4.013566970825195
},
{
"dpo_loss": 0.21899166703224182,
"epoch": 2.196504487482286,
"grad_norm": 16166.28367330464,
"learning_rate": 1.0039125025664392e-06,
"logits": -0.69548100233078,
"logps": -77.49720764160156,
"loss": 85.4559,
"objective": 107.94995880126953,
"ranking_simple": 0.7083333134651184,
"regularize": 0.21899166703224182,
"step": 775,
"wo_beta": 4.022635459899902
},
{
"dpo_loss": 0.23950724303722382,
"epoch": 2.21067548417572,
"grad_norm": 23845.856987166226,
"learning_rate": 9.710007026204896e-07,
"logits": -0.5921620726585388,
"logps": -77.82970428466797,
"loss": 73.8894,
"objective": 118.90029907226562,
"ranking_simple": 0.625,
"regularize": 0.23950724303722382,
"step": 780,
"wo_beta": 5.2229838371276855
},
{
"dpo_loss": 0.1409159004688263,
"epoch": 2.2248464808691546,
"grad_norm": 23390.757076240254,
"learning_rate": 9.385069158154805e-07,
"logits": -0.5967235565185547,
"logps": -77.1053695678711,
"loss": 72.4182,
"objective": 49.14412307739258,
"ranking_simple": 0.6541666388511658,
"regularize": 0.1409159004688263,
"step": 785,
"wo_beta": 4.309206485748291
},
{
"dpo_loss": 0.11289669573307037,
"epoch": 2.2390174775625886,
"grad_norm": 19262.26547177061,
"learning_rate": 9.064400256282757e-07,
"logits": -0.5860327482223511,
"logps": -77.5761489868164,
"loss": 58.4468,
"objective": 36.8985481262207,
"ranking_simple": 0.7124999761581421,
"regularize": 0.11289669573307037,
"step": 790,
"wo_beta": 3.3499815464019775
},
{
"dpo_loss": 0.1393917351961136,
"epoch": 2.2531884742560226,
"grad_norm": 13961.149915240683,
"learning_rate": 8.74808798826467e-07,
"logits": -0.6526386141777039,
"logps": -77.9594497680664,
"loss": 49.578,
"objective": 57.15465545654297,
"ranking_simple": 0.699999988079071,
"regularize": 0.1393917351961136,
"step": 795,
"wo_beta": 3.4494924545288086
},
{
"dpo_loss": 0.19990867376327515,
"epoch": 2.2673594709494567,
"grad_norm": 19068.02255354706,
"learning_rate": 8.436218830716259e-07,
"logits": -0.6704553365707397,
"logps": -77.96353912353516,
"loss": 63.8462,
"objective": 76.01747131347656,
"ranking_simple": 0.6833333373069763,
"regularize": 0.19990867376327515,
"step": 800,
"wo_beta": 4.034228324890137
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 1.9230473041534424,
"eval_logits": -0.7491475343704224,
"eval_logps": -81.2590560913086,
"eval_loss": 877.9466552734375,
"eval_objective": 832.3640747070312,
"eval_ranking_simple": 0.5388198494911194,
"eval_regularize": 1.9230473041534424,
"eval_runtime": 368.0271,
"eval_samples_per_second": 15.733,
"eval_steps_per_second": 1.312,
"eval_wo_beta": 6.825497150421143,
"step": 800
},
{
"dpo_loss": 0.19815029203891754,
"epoch": 2.2815304676428907,
"grad_norm": 14625.694980992721,
"learning_rate": 8.1288780455512e-07,
"logits": -0.6596904397010803,
"logps": -76.49951934814453,
"loss": 62.5605,
"objective": 77.96316528320312,
"ranking_simple": 0.637499988079071,
"regularize": 0.19815029203891754,
"step": 805,
"wo_beta": 4.744214057922363
},
{
"dpo_loss": 0.1642819195985794,
"epoch": 2.295701464336325,
"grad_norm": 16998.784498338227,
"learning_rate": 7.826149656671386e-07,
"logits": -0.7425891757011414,
"logps": -75.827880859375,
"loss": 74.8622,
"objective": 69.58053588867188,
"ranking_simple": 0.7124999761581421,
"regularize": 0.1642819195985794,
"step": 810,
"wo_beta": 3.9602649211883545
},
{
"dpo_loss": 0.11294615268707275,
"epoch": 2.309872461029759,
"grad_norm": 13662.727399988506,
"learning_rate": 7.528116426995605e-07,
"logits": -0.7013121843338013,
"logps": -77.21855163574219,
"loss": 50.3895,
"objective": 50.59254455566406,
"ranking_simple": 0.6666666865348816,
"regularize": 0.11294615268707275,
"step": 815,
"wo_beta": 4.308948040008545
},
{
"dpo_loss": 0.1386326104402542,
"epoch": 2.324043457723193,
"grad_norm": 11846.16448361833,
"learning_rate": 7.234859835833022e-07,
"logits": -0.625929057598114,
"logps": -77.01323699951172,
"loss": 54.8365,
"objective": 65.72408294677734,
"ranking_simple": 0.6583333611488342,
"regularize": 0.1386326104402542,
"step": 820,
"wo_beta": 4.429469585418701
},
{
"dpo_loss": 0.19329386949539185,
"epoch": 2.3382144544166272,
"grad_norm": 12374.930899414503,
"learning_rate": 6.94646005660749e-07,
"logits": -0.6363904476165771,
"logps": -76.43814086914062,
"loss": 70.2641,
"objective": 75.29810333251953,
"ranking_simple": 0.6833333373069763,
"regularize": 0.19329386949539185,
"step": 825,
"wo_beta": 4.171545028686523
},
{
"dpo_loss": 0.14528803527355194,
"epoch": 2.3523854511100613,
"grad_norm": 17569.089090693193,
"learning_rate": 6.662995934939007e-07,
"logits": -0.6522344946861267,
"logps": -78.64282989501953,
"loss": 49.4023,
"objective": 57.81031036376953,
"ranking_simple": 0.7083333134651184,
"regularize": 0.14528803527355194,
"step": 830,
"wo_beta": 3.6162989139556885
},
{
"dpo_loss": 0.1276129186153412,
"epoch": 2.3665564478034957,
"grad_norm": 16065.220370294603,
"learning_rate": 6.384544967088063e-07,
"logits": -0.6452387571334839,
"logps": -77.74604034423828,
"loss": 64.3901,
"objective": 50.397518157958984,
"ranking_simple": 0.7166666388511658,
"regularize": 0.1276129186153412,
"step": 835,
"wo_beta": 4.223039150238037
},
{
"dpo_loss": 0.10496517270803452,
"epoch": 2.3807274444969297,
"grad_norm": 15811.911533083141,
"learning_rate": 6.111183278768956e-07,
"logits": -0.5866932272911072,
"logps": -77.73839569091797,
"loss": 63.8202,
"objective": 43.11891555786133,
"ranking_simple": 0.612500011920929,
"regularize": 0.10496517270803452,
"step": 840,
"wo_beta": 4.501134872436523
},
{
"dpo_loss": 0.2035357505083084,
"epoch": 2.3948984411903638,
"grad_norm": 21874.361109441965,
"learning_rate": 5.842985604337769e-07,
"logits": -0.6392544507980347,
"logps": -79.36287689208984,
"loss": 65.8964,
"objective": 87.10123443603516,
"ranking_simple": 0.6708333492279053,
"regularize": 0.2035357505083084,
"step": 845,
"wo_beta": 3.961372137069702
},
{
"dpo_loss": 0.1189335361123085,
"epoch": 2.409069437883798,
"grad_norm": 13250.743799157402,
"learning_rate": 5.580025266360764e-07,
"logits": -0.6523115038871765,
"logps": -76.91999053955078,
"loss": 60.559,
"objective": 56.37547302246094,
"ranking_simple": 0.6499999761581421,
"regularize": 0.1189335361123085,
"step": 850,
"wo_beta": 4.629120349884033
},
{
"epoch": 2.409069437883798,
"eval_dpo_loss": 1.930367350578308,
"eval_logits": -0.7166435718536377,
"eval_logps": -81.43138122558594,
"eval_loss": 876.0620727539062,
"eval_objective": 834.962890625,
"eval_ranking_simple": 0.5393374562263489,
"eval_regularize": 1.930367350578308,
"eval_runtime": 368.7191,
"eval_samples_per_second": 15.703,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 6.89687442779541,
"step": 850
},
{
"dpo_loss": 0.16136877238750458,
"epoch": 2.423240434577232,
"grad_norm": 16354.507131335233,
"learning_rate": 5.322374155568688e-07,
"logits": -0.6328111290931702,
"logps": -76.62676239013672,
"loss": 96.6161,
"objective": 63.801761627197266,
"ranking_simple": 0.6791666746139526,
"regularize": 0.16136877238750458,
"step": 855,
"wo_beta": 3.592381238937378
},
{
"dpo_loss": 0.22841231524944305,
"epoch": 2.4374114312706663,
"grad_norm": 16096.552601321706,
"learning_rate": 5.070102711202606e-07,
"logits": -0.5914830565452576,
"logps": -77.95539855957031,
"loss": 70.7855,
"objective": 97.63321685791016,
"ranking_simple": 0.6583333611488342,
"regularize": 0.22841231524944305,
"step": 860,
"wo_beta": 4.532555103302002
},
{
"dpo_loss": 0.13698126375675201,
"epoch": 2.4515824279641003,
"grad_norm": 26580.339293279554,
"learning_rate": 4.823279901756498e-07,
"logits": -0.6197033524513245,
"logps": -76.97139739990234,
"loss": 57.5479,
"objective": 63.15699005126953,
"ranking_simple": 0.6499999761581421,
"regularize": 0.13698126375675201,
"step": 865,
"wo_beta": 4.650790691375732
},
{
"dpo_loss": 0.17232166230678558,
"epoch": 2.4657534246575343,
"grad_norm": 14902.759735907886,
"learning_rate": 4.581973206121948e-07,
"logits": -0.6524402499198914,
"logps": -77.39944458007812,
"loss": 59.8115,
"objective": 72.1148452758789,
"ranking_simple": 0.6666666865348816,
"regularize": 0.17232166230678558,
"step": 870,
"wo_beta": 4.716188430786133
},
{
"dpo_loss": 0.16291117668151855,
"epoch": 2.4799244213509684,
"grad_norm": 16443.311561175098,
"learning_rate": 4.3462485951401126e-07,
"logits": -0.6056000590324402,
"logps": -77.26061248779297,
"loss": 58.545,
"objective": 65.73771667480469,
"ranking_simple": 0.6958333253860474,
"regularize": 0.16291117668151855,
"step": 875,
"wo_beta": 3.9136276245117188
},
{
"dpo_loss": 0.1831001490354538,
"epoch": 2.4940954180444024,
"grad_norm": 17222.63460091855,
"learning_rate": 4.116170513565942e-07,
"logits": -0.5453128218650818,
"logps": -76.7745132446289,
"loss": 81.4098,
"objective": 87.2974853515625,
"ranking_simple": 0.6458333134651184,
"regularize": 0.1831001490354538,
"step": 880,
"wo_beta": 5.620166301727295
},
{
"dpo_loss": 0.17733284831047058,
"epoch": 2.5082664147378364,
"grad_norm": 18608.993512491026,
"learning_rate": 3.891801862449629e-07,
"logits": -0.6945806741714478,
"logps": -75.74263763427734,
"loss": 64.9335,
"objective": 71.06294250488281,
"ranking_simple": 0.6791666746139526,
"regularize": 0.17733284831047058,
"step": 885,
"wo_beta": 4.535309791564941
},
{
"dpo_loss": 0.12178336828947067,
"epoch": 2.5224374114312704,
"grad_norm": 19369.013842624132,
"learning_rate": 3.6732039819400686e-07,
"logits": -0.6358047127723694,
"logps": -74.91355895996094,
"loss": 68.998,
"objective": 51.5380859375,
"ranking_simple": 0.6875,
"regularize": 0.12178336828947067,
"step": 890,
"wo_beta": 3.4617159366607666
},
{
"dpo_loss": 0.16680613160133362,
"epoch": 2.536608408124705,
"grad_norm": 14873.87723458358,
"learning_rate": 3.46043663451511e-07,
"logits": -0.6266335844993591,
"logps": -77.49481964111328,
"loss": 58.2508,
"objective": 74.53083038330078,
"ranking_simple": 0.6499999761581421,
"regularize": 0.16680613160133362,
"step": 895,
"wo_beta": 4.595145225524902
},
{
"dpo_loss": 0.153776153922081,
"epoch": 2.550779404818139,
"grad_norm": 20793.25603723828,
"learning_rate": 3.253557988643072e-07,
"logits": -0.5867082476615906,
"logps": -76.2872085571289,
"loss": 61.5447,
"objective": 66.78315734863281,
"ranking_simple": 0.6666666865348816,
"regularize": 0.153776153922081,
"step": 900,
"wo_beta": 4.349160194396973
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 1.9454582929611206,
"eval_logits": -0.7119657397270203,
"eval_logps": -81.622314453125,
"eval_loss": 885.504638671875,
"eval_objective": 842.5767822265625,
"eval_ranking_simple": 0.5414078831672668,
"eval_regularize": 1.9454582929611206,
"eval_runtime": 368.0518,
"eval_samples_per_second": 15.731,
"eval_steps_per_second": 1.312,
"eval_wo_beta": 6.90536642074585,
"step": 900
},
{
"dpo_loss": 0.1694839894771576,
"epoch": 2.564950401511573,
"grad_norm": 5383.724684748024,
"learning_rate": 3.052624602880064e-07,
"logits": -0.6292735934257507,
"logps": -75.40677642822266,
"loss": 55.1556,
"objective": 73.89187622070312,
"ranking_simple": 0.6916666626930237,
"regularize": 0.1694839894771576,
"step": 905,
"wo_beta": 4.002458095550537
},
{
"dpo_loss": 0.2758396863937378,
"epoch": 2.579121398205007,
"grad_norm": 21167.29586316348,
"learning_rate": 2.8576914104074425e-07,
"logits": -0.6043637990951538,
"logps": -79.17253875732422,
"loss": 71.7501,
"objective": 115.38219451904297,
"ranking_simple": 0.6833333373069763,
"regularize": 0.2758396863937378,
"step": 910,
"wo_beta": 4.724976062774658
},
{
"dpo_loss": 0.1418589949607849,
"epoch": 2.593292394898441,
"grad_norm": 18250.664807404537,
"learning_rate": 2.6688117040136463e-07,
"logits": -0.7428493499755859,
"logps": -78.07086944580078,
"loss": 62.2011,
"objective": 52.23597717285156,
"ranking_simple": 0.6916666626930237,
"regularize": 0.1418589949607849,
"step": 915,
"wo_beta": 3.8614261150360107
},
{
"dpo_loss": 0.13393595814704895,
"epoch": 2.6074633915918755,
"grad_norm": 12767.685735140785,
"learning_rate": 2.486037121524448e-07,
"logits": -0.5820329785346985,
"logps": -77.22747039794922,
"loss": 53.2699,
"objective": 55.055419921875,
"ranking_simple": 0.6916666626930237,
"regularize": 0.13393595814704895,
"step": 920,
"wo_beta": 3.9748997688293457
},
{
"dpo_loss": 0.11604820191860199,
"epoch": 2.6216343882853095,
"grad_norm": 10406.725183590128,
"learning_rate": 2.3094176316856982e-07,
"logits": -0.59433913230896,
"logps": -76.20643615722656,
"loss": 67.2785,
"objective": 43.61358642578125,
"ranking_simple": 0.6416666507720947,
"regularize": 0.11604820191860199,
"step": 925,
"wo_beta": 4.829153537750244
},
{
"dpo_loss": 0.1961955726146698,
"epoch": 2.6358053849787435,
"grad_norm": 13596.007234051527,
"learning_rate": 2.13900152050239e-07,
"logits": -0.6106235384941101,
"logps": -76.37940979003906,
"loss": 67.3097,
"objective": 87.510986328125,
"ranking_simple": 0.6291666626930237,
"regularize": 0.1961955726146698,
"step": 930,
"wo_beta": 5.22459602355957
},
{
"dpo_loss": 0.12966051697731018,
"epoch": 2.6499763816721775,
"grad_norm": 16260.320579834099,
"learning_rate": 1.9748353780377234e-07,
"logits": -0.5941105484962463,
"logps": -75.61875915527344,
"loss": 78.4459,
"objective": 43.857093811035156,
"ranking_simple": 0.6875,
"regularize": 0.12966051697731018,
"step": 935,
"wo_beta": 3.68115234375
},
{
"dpo_loss": 0.11212758719921112,
"epoch": 2.6641473783656116,
"grad_norm": 14569.700031823939,
"learning_rate": 1.8169640856758652e-07,
"logits": -0.6425444483757019,
"logps": -78.66395568847656,
"loss": 58.8761,
"objective": 53.29667282104492,
"ranking_simple": 0.6708333492279053,
"regularize": 0.11212758719921112,
"step": 940,
"wo_beta": 4.617191791534424
},
{
"dpo_loss": 0.13408087193965912,
"epoch": 2.678318375059046,
"grad_norm": 11373.74928233542,
"learning_rate": 1.6654308038518057e-07,
"logits": -0.6835525631904602,
"logps": -76.78800201416016,
"loss": 49.8562,
"objective": 51.24209213256836,
"ranking_simple": 0.6625000238418579,
"regularize": 0.13408087193965912,
"step": 945,
"wo_beta": 4.127335071563721
},
{
"dpo_loss": 0.2326124608516693,
"epoch": 2.69248937175248,
"grad_norm": 17533.557850413275,
"learning_rate": 1.5202769602517514e-07,
"logits": -0.5890699625015259,
"logps": -76.48961639404297,
"loss": 76.2992,
"objective": 107.2320785522461,
"ranking_simple": 0.6916666626930237,
"regularize": 0.2326124608516693,
"step": 950,
"wo_beta": 4.0390706062316895
},
{
"epoch": 2.69248937175248,
"eval_dpo_loss": 1.9408873319625854,
"eval_logits": -0.733562171459198,
"eval_logps": -81.01062774658203,
"eval_loss": 885.0243530273438,
"eval_objective": 840.861572265625,
"eval_ranking_simple": 0.5414078831672668,
"eval_regularize": 1.9408873319625854,
"eval_runtime": 368.3562,
"eval_samples_per_second": 15.718,
"eval_steps_per_second": 1.311,
"eval_wo_beta": 6.8752055168151855,
"step": 950
},
{
"dpo_loss": 0.15838518738746643,
"epoch": 2.706660368445914,
"grad_norm": 19909.761661048542,
"learning_rate": 1.381542238487188e-07,
"logits": -0.6561439633369446,
"logps": -76.67708587646484,
"loss": 70.9999,
"objective": 63.771217346191406,
"ranking_simple": 0.6791666746139526,
"regularize": 0.15838518738746643,
"step": 955,
"wo_beta": 4.371394157409668
},
{
"dpo_loss": 0.14735987782478333,
"epoch": 2.720831365139348,
"grad_norm": 18552.81796156679,
"learning_rate": 1.2492645672457838e-07,
"logits": -0.6275337934494019,
"logps": -76.68091583251953,
"loss": 48.8316,
"objective": 56.900169372558594,
"ranking_simple": 0.6583333611488342,
"regularize": 0.14735987782478333,
"step": 960,
"wo_beta": 4.69942569732666
},
{
"dpo_loss": 0.1760532110929489,
"epoch": 2.735002361832782,
"grad_norm": 11215.906650051957,
"learning_rate": 1.1234801099220787e-07,
"logits": -0.663616955280304,
"logps": -76.21281433105469,
"loss": 74.0274,
"objective": 69.9241714477539,
"ranking_simple": 0.7291666865348816,
"regularize": 0.1760532110929489,
"step": 965,
"wo_beta": 2.9462215900421143
},
{
"dpo_loss": 0.21225102245807648,
"epoch": 2.7491733585262166,
"grad_norm": 22811.566451460014,
"learning_rate": 1.004223254730749e-07,
"logits": -0.6760108470916748,
"logps": -77.40128326416016,
"loss": 59.5453,
"objective": 79.48020935058594,
"ranking_simple": 0.7124999761581421,
"regularize": 0.21225102245807648,
"step": 970,
"wo_beta": 3.1231398582458496
},
{
"dpo_loss": 0.11088516563177109,
"epoch": 2.7633443552196506,
"grad_norm": 16204.377117749516,
"learning_rate": 8.915266053052374e-08,
"logits": -0.5741701126098633,
"logps": -75.76372528076172,
"loss": 55.3212,
"objective": 38.7469596862793,
"ranking_simple": 0.6916666626930237,
"regularize": 0.11088516563177109,
"step": 975,
"wo_beta": 4.1251749992370605
},
{
"dpo_loss": 0.11932215839624405,
"epoch": 2.7775153519130846,
"grad_norm": 18600.253407646793,
"learning_rate": 7.854209717842231e-08,
"logits": -0.6992437839508057,
"logps": -76.46355438232422,
"loss": 61.9886,
"objective": 40.65220642089844,
"ranking_simple": 0.6833333373069763,
"regularize": 0.11932215839624405,
"step": 980,
"wo_beta": 3.3738327026367188
},
{
"dpo_loss": 0.13639378547668457,
"epoch": 2.7916863486065187,
"grad_norm": 22167.749340913193,
"learning_rate": 6.859353623884569e-08,
"logits": -0.5681948065757751,
"logps": -73.97103118896484,
"loss": 57.9846,
"objective": 50.542232513427734,
"ranking_simple": 0.6875,
"regularize": 0.13639378547668457,
"step": 985,
"wo_beta": 3.965052366256714
},
{
"dpo_loss": 0.13361641764640808,
"epoch": 2.8058573452999527,
"grad_norm": 17499.837979849613,
"learning_rate": 5.930969754901844e-08,
"logits": -0.6173512935638428,
"logps": -77.61453247070312,
"loss": 48.6088,
"objective": 48.143672943115234,
"ranking_simple": 0.675000011920929,
"regularize": 0.13361641764640808,
"step": 990,
"wo_beta": 4.875253677368164
},
{
"dpo_loss": 0.15786290168762207,
"epoch": 2.820028341993387,
"grad_norm": 20060.653124706987,
"learning_rate": 5.069311921774039e-08,
"logits": -0.7026129364967346,
"logps": -76.29402923583984,
"loss": 74.7771,
"objective": 64.83280944824219,
"ranking_simple": 0.6875,
"regularize": 0.15786290168762207,
"step": 995,
"wo_beta": 4.514121055603027
},
{
"dpo_loss": 0.1805247962474823,
"epoch": 2.8341993386868207,
"grad_norm": 9634.51813321111,
"learning_rate": 4.2746156931490756e-08,
"logits": -0.604320228099823,
"logps": -75.02464294433594,
"loss": 65.003,
"objective": 79.15603637695312,
"ranking_simple": 0.6916666626930237,
"regularize": 0.1805247962474823,
"step": 1000,
"wo_beta": 3.7478036880493164
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 1.938610553741455,
"eval_logits": -0.7340191602706909,
"eval_logps": -80.97093200683594,
"eval_loss": 883.8386840820312,
"eval_objective": 839.7760009765625,
"eval_ranking_simple": 0.5419254899024963,
"eval_regularize": 1.938610553741455,
"eval_runtime": 369.6961,
"eval_samples_per_second": 15.662,
"eval_steps_per_second": 1.306,
"eval_wo_beta": 6.880806922912598,
"step": 1000
},
{
"dpo_loss": 0.09317085891962051,
"epoch": 2.848370335380255,
"grad_norm": 14479.932099976699,
"learning_rate": 3.547098331040916e-08,
"logits": -0.6243438124656677,
"logps": -75.73117065429688,
"loss": 60.5699,
"objective": 35.96461486816406,
"ranking_simple": 0.6958333253860474,
"regularize": 0.09317085891962051,
"step": 1005,
"wo_beta": 4.23464822769165
},
{
"dpo_loss": 0.15720970928668976,
"epoch": 2.862541332073689,
"grad_norm": 17027.37402012935,
"learning_rate": 2.8869587314321324e-08,
"logits": -0.673160970211029,
"logps": -75.78913879394531,
"loss": 48.6492,
"objective": 64.71520233154297,
"ranking_simple": 0.6791666746139526,
"regularize": 0.15720970928668976,
"step": 1010,
"wo_beta": 4.332252502441406
},
{
"dpo_loss": 0.11788446456193924,
"epoch": 2.8767123287671232,
"grad_norm": 11230.189495808421,
"learning_rate": 2.2943773698977935e-08,
"logits": -0.6236141324043274,
"logps": -75.31798553466797,
"loss": 55.6176,
"objective": 39.647281646728516,
"ranking_simple": 0.7166666388511658,
"regularize": 0.11788446456193924,
"step": 1015,
"wo_beta": 3.647088050842285
},
{
"dpo_loss": 0.16658252477645874,
"epoch": 2.8908833254605573,
"grad_norm": 12864.832924042903,
"learning_rate": 1.7695162522652352e-08,
"logits": -0.5761324167251587,
"logps": -76.70230865478516,
"loss": 62.3645,
"objective": 71.47562408447266,
"ranking_simple": 0.6333333253860474,
"regularize": 0.16658252477645874,
"step": 1020,
"wo_beta": 5.046957969665527
},
{
"dpo_loss": 0.12362422049045563,
"epoch": 2.9050543221539913,
"grad_norm": 12894.829353554518,
"learning_rate": 1.3125188703233815e-08,
"logits": -0.681283712387085,
"logps": -76.55783081054688,
"loss": 58.5196,
"objective": 48.40925598144531,
"ranking_simple": 0.7124999761581421,
"regularize": 0.12362422049045563,
"step": 1025,
"wo_beta": 3.9699738025665283
},
{
"dpo_loss": 0.14537376165390015,
"epoch": 2.9192253188474258,
"grad_norm": 14896.788895915139,
"learning_rate": 9.235101625932885e-09,
"logits": -0.6897163391113281,
"logps": -75.82958984375,
"loss": 60.8222,
"objective": 57.95526123046875,
"ranking_simple": 0.625,
"regularize": 0.14537376165390015,
"step": 1030,
"wo_beta": 4.4757466316223145
},
{
"dpo_loss": 0.21178776025772095,
"epoch": 2.9333963155408598,
"grad_norm": 19672.735817554352,
"learning_rate": 6.025964801714412e-09,
"logits": -0.6465732455253601,
"logps": -77.34451293945312,
"loss": 93.2003,
"objective": 83.75788116455078,
"ranking_simple": 0.6583333611488342,
"regularize": 0.21178776025772095,
"step": 1035,
"wo_beta": 3.9459071159362793
},
{
"dpo_loss": 0.16617676615715027,
"epoch": 2.947567312234294,
"grad_norm": 13684.295320753776,
"learning_rate": 3.4986555765434415e-09,
"logits": -0.6697372794151306,
"logps": -77.15235137939453,
"loss": 51.8377,
"objective": 61.92232894897461,
"ranking_simple": 0.6458333134651184,
"regularize": 0.16617676615715027,
"step": 1040,
"wo_beta": 4.602719783782959
},
{
"dpo_loss": 0.14166632294654846,
"epoch": 2.961738308927728,
"grad_norm": 19931.645045536377,
"learning_rate": 1.6538648915270794e-09,
"logits": -0.6000397801399231,
"logps": -79.05880737304688,
"loss": 63.6317,
"objective": 56.86110305786133,
"ranking_simple": 0.6833333373069763,
"regularize": 0.14166632294654846,
"step": 1045,
"wo_beta": 5.119239330291748
},
{
"dpo_loss": 0.1613662987947464,
"epoch": 2.975909305621162,
"grad_norm": 14762.074424664803,
"learning_rate": 4.920970940180958e-10,
"logits": -0.6614073514938354,
"logps": -74.51754760742188,
"loss": 59.5302,
"objective": 55.57144546508789,
"ranking_simple": 0.7041666507720947,
"regularize": 0.1613662987947464,
"step": 1050,
"wo_beta": 3.768414258956909
},
{
"epoch": 2.975909305621162,
"eval_dpo_loss": 1.9391295909881592,
"eval_logits": -0.7331260442733765,
"eval_logps": -80.98583984375,
"eval_loss": 883.8082885742188,
"eval_objective": 840.0045166015625,
"eval_ranking_simple": 0.5419254899024963,
"eval_regularize": 1.9391295909881592,
"eval_runtime": 368.7556,
"eval_samples_per_second": 15.701,
"eval_steps_per_second": 1.31,
"eval_wo_beta": 6.885995388031006,
"step": 1050
},
{
"dpo_loss": 0.18576760590076447,
"epoch": 2.9900803023145963,
"grad_norm": 14293.757478034773,
"learning_rate": 1.3669799732163314e-11,
"logits": -0.6282141208648682,
"logps": -75.39889526367188,
"loss": 57.2359,
"objective": 80.12165832519531,
"ranking_simple": 0.6708333492279053,
"regularize": 0.18576760590076447,
"step": 1055,
"wo_beta": 4.444089412689209
},
{
"epoch": 2.992914501653283,
"step": 1056,
"total_flos": 0.0,
"train_loss": 275.78432337443036,
"train_runtime": 34690.4645,
"train_samples_per_second": 4.393,
"train_steps_per_second": 0.03
}
],
"logging_steps": 5,
"max_steps": 1056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}