gemma-7b-simpo-basic-5e-5-05-v4 / trainer_state.json
silviasapora's picture
Model save
eccece5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.986666666666667,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 342.0,
"learning_rate": 7.8125e-06,
"log_odds_chosen": -1.001197099685669,
"log_odds_ratio": -10.174017906188965,
"logps/chosen": -21.73920249938965,
"logps/rejected": -20.738176345825195,
"loss": 168.3007,
"nll_loss": 9.714922904968262,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -10.869601249694824,
"rewards/margins": -0.5005130171775818,
"rewards/rejected": -10.369088172912598,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 612.0,
"learning_rate": 1.5625e-05,
"log_odds_chosen": -2.596830368041992,
"log_odds_ratio": -9.992377281188965,
"logps/chosen": -21.97031593322754,
"logps/rejected": -19.373477935791016,
"loss": 165.589,
"nll_loss": 9.406023979187012,
"rewards/accuracies": 0.46562498807907104,
"rewards/chosen": -10.98515796661377,
"rewards/margins": -1.2984188795089722,
"rewards/rejected": -9.686738967895508,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 2400.0,
"learning_rate": 2.34375e-05,
"log_odds_chosen": 0.5056972503662109,
"log_odds_ratio": -9.307385444641113,
"logps/chosen": -38.13218307495117,
"logps/rejected": -38.638153076171875,
"loss": 152.4231,
"nll_loss": 20.461414337158203,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -19.066091537475586,
"rewards/margins": 0.2529878616333008,
"rewards/rejected": -19.319076538085938,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 5472.0,
"learning_rate": 3.125e-05,
"log_odds_chosen": 2.4256246089935303,
"log_odds_ratio": -5.600870609283447,
"logps/chosen": -99.3405990600586,
"logps/rejected": -101.7662353515625,
"loss": 93.2344,
"nll_loss": 69.94987487792969,
"rewards/accuracies": 0.609375,
"rewards/chosen": -49.6702995300293,
"rewards/margins": 1.212813138961792,
"rewards/rejected": -50.88311767578125,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 1232.0,
"learning_rate": 3.90625e-05,
"log_odds_chosen": 5.067164421081543,
"log_odds_ratio": -3.7080636024475098,
"logps/chosen": -167.09085083007812,
"logps/rejected": -172.15805053710938,
"loss": 63.106,
"nll_loss": 138.32569885253906,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -83.54542541503906,
"rewards/margins": 2.5335822105407715,
"rewards/rejected": -86.07902526855469,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 1576.0,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 3.0125393867492676,
"log_odds_ratio": -2.035407781600952,
"logps/chosen": -153.0269775390625,
"logps/rejected": -156.03952026367188,
"loss": 37.5506,
"nll_loss": 131.5561981201172,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -76.51348876953125,
"rewards/margins": 1.5062696933746338,
"rewards/rejected": -78.01976013183594,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 6304.0,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 2.0004706382751465,
"log_odds_ratio": -2.640322208404541,
"logps/chosen": -158.06951904296875,
"logps/rejected": -160.0699920654297,
"loss": 47.7611,
"nll_loss": 138.33160400390625,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -79.03475952148438,
"rewards/margins": 1.0002353191375732,
"rewards/rejected": -80.03499603271484,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 344.0,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 2.306946277618408,
"log_odds_ratio": -1.341448426246643,
"logps/chosen": -109.2757339477539,
"logps/rejected": -111.58267974853516,
"loss": 27.9171,
"nll_loss": 92.97138214111328,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -54.63786697387695,
"rewards/margins": 1.153473138809204,
"rewards/rejected": -55.79133987426758,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 1280.0,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 2.0845911502838135,
"log_odds_ratio": -0.9715531468391418,
"logps/chosen": -108.42757415771484,
"logps/rejected": -110.51216125488281,
"loss": 22.2225,
"nll_loss": 94.10669708251953,
"rewards/accuracies": 0.71875,
"rewards/chosen": -54.21378707885742,
"rewards/margins": 1.0422955751419067,
"rewards/rejected": -55.256080627441406,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 888.0,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 1.7515223026275635,
"log_odds_ratio": -1.2153639793395996,
"logps/chosen": -91.28450012207031,
"logps/rejected": -93.03602600097656,
"loss": 25.9285,
"nll_loss": 83.43812561035156,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -45.642250061035156,
"rewards/margins": 0.8757611513137817,
"rewards/rejected": -46.51801300048828,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 338.0,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 1.6651471853256226,
"log_odds_ratio": -1.1954998970031738,
"logps/chosen": -116.0286636352539,
"logps/rejected": -117.69380950927734,
"loss": 25.5553,
"nll_loss": 103.76983642578125,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -58.01433181762695,
"rewards/margins": 0.8325735926628113,
"rewards/rejected": -58.84690475463867,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 668.0,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 3.8872642517089844,
"log_odds_ratio": -1.0850141048431396,
"logps/chosen": -95.78065490722656,
"logps/rejected": -99.66791534423828,
"loss": 22.5262,
"nll_loss": 82.30366516113281,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -47.89032745361328,
"rewards/margins": 1.9436321258544922,
"rewards/rejected": -49.83395767211914,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 458.0,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 2.0383987426757812,
"log_odds_ratio": -1.4042725563049316,
"logps/chosen": -79.27400207519531,
"logps/rejected": -81.31240844726562,
"loss": 28.716,
"nll_loss": 72.04411315917969,
"rewards/accuracies": 0.65625,
"rewards/chosen": -39.637001037597656,
"rewards/margins": 1.0191993713378906,
"rewards/rejected": -40.65620422363281,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 2784.0,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 1.3019278049468994,
"log_odds_ratio": -1.2315095663070679,
"logps/chosen": -71.43753051757812,
"logps/rejected": -72.73945617675781,
"loss": 26.5387,
"nll_loss": 74.93604278564453,
"rewards/accuracies": 0.65625,
"rewards/chosen": -35.71876525878906,
"rewards/margins": 0.6509639024734497,
"rewards/rejected": -36.369728088378906,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 564.0,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 2.32663893699646,
"log_odds_ratio": -1.090299367904663,
"logps/chosen": -66.29554748535156,
"logps/rejected": -68.6221923828125,
"loss": 23.741,
"nll_loss": 59.28515625,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -33.14777374267578,
"rewards/margins": 1.16331946849823,
"rewards/rejected": -34.31109619140625,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 808.0,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 2.1821746826171875,
"log_odds_ratio": -0.9271427392959595,
"logps/chosen": -65.87654876708984,
"logps/rejected": -68.05873107910156,
"loss": 21.0796,
"nll_loss": 58.6850471496582,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -32.93827438354492,
"rewards/margins": 1.0910873413085938,
"rewards/rejected": -34.02936553955078,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 336.0,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 1.560599446296692,
"log_odds_ratio": -0.7616011500358582,
"logps/chosen": -61.8255615234375,
"logps/rejected": -63.38615798950195,
"loss": 19.4048,
"nll_loss": 54.76035690307617,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -30.91278076171875,
"rewards/margins": 0.780299723148346,
"rewards/rejected": -31.693078994750977,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 364.0,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 3.0954723358154297,
"log_odds_ratio": -0.8529545068740845,
"logps/chosen": -69.68413543701172,
"logps/rejected": -72.77960968017578,
"loss": 19.6692,
"nll_loss": 61.34803009033203,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -34.84206771850586,
"rewards/margins": 1.5477361679077148,
"rewards/rejected": -36.38980484008789,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 2544.0,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 3.0107903480529785,
"log_odds_ratio": -1.105690598487854,
"logps/chosen": -60.92426681518555,
"logps/rejected": -63.93505859375,
"loss": 22.6413,
"nll_loss": 51.06819534301758,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -30.462133407592773,
"rewards/margins": 1.5053951740264893,
"rewards/rejected": -31.967529296875,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 644.0,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 2.8773467540740967,
"log_odds_ratio": -0.838965117931366,
"logps/chosen": -62.23920822143555,
"logps/rejected": -65.11656188964844,
"loss": 18.8463,
"nll_loss": 54.19057083129883,
"rewards/accuracies": 0.765625,
"rewards/chosen": -31.119604110717773,
"rewards/margins": 1.4386733770370483,
"rewards/rejected": -32.55828094482422,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 820.0,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 1.8299286365509033,
"log_odds_ratio": -0.8006542325019836,
"logps/chosen": -57.67655563354492,
"logps/rejected": -59.50648880004883,
"loss": 19.4492,
"nll_loss": 50.32979965209961,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -28.83827781677246,
"rewards/margins": 0.9149643182754517,
"rewards/rejected": -29.753244400024414,
"step": 105
},
{
"epoch": 1.0429629629629629,
"grad_norm": 608.0,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 1.6930482387542725,
"log_odds_ratio": -0.6848193407058716,
"logps/chosen": -58.739891052246094,
"logps/rejected": -60.43293380737305,
"loss": 18.2001,
"nll_loss": 53.937278747558594,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -29.369945526123047,
"rewards/margins": 0.8465241193771362,
"rewards/rejected": -30.216466903686523,
"step": 110
},
{
"epoch": 1.0903703703703704,
"grad_norm": 1512.0,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 1.0279403924942017,
"log_odds_ratio": -0.8556658029556274,
"logps/chosen": -58.91347122192383,
"logps/rejected": -59.94141387939453,
"loss": 21.4266,
"nll_loss": 56.569854736328125,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -29.456735610961914,
"rewards/margins": 0.5139701962471008,
"rewards/rejected": -29.970706939697266,
"step": 115
},
{
"epoch": 1.1377777777777778,
"grad_norm": 552.0,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 2.164902925491333,
"log_odds_ratio": -0.7987843155860901,
"logps/chosen": -52.04120635986328,
"logps/rejected": -54.20610809326172,
"loss": 18.9914,
"nll_loss": 47.23939895629883,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -26.02060317993164,
"rewards/margins": 1.0824514627456665,
"rewards/rejected": -27.10305404663086,
"step": 120
},
{
"epoch": 1.1851851851851851,
"grad_norm": 1552.0,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 2.5184638500213623,
"log_odds_ratio": -0.8748235702514648,
"logps/chosen": -45.36113739013672,
"logps/rejected": -47.87959671020508,
"loss": 19.6045,
"nll_loss": 41.12126922607422,
"rewards/accuracies": 0.734375,
"rewards/chosen": -22.68056869506836,
"rewards/margins": 1.2592319250106812,
"rewards/rejected": -23.93979835510254,
"step": 125
},
{
"epoch": 1.2325925925925927,
"grad_norm": 1152.0,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 3.643378496170044,
"log_odds_ratio": -0.7594578862190247,
"logps/chosen": -45.95283889770508,
"logps/rejected": -49.59621810913086,
"loss": 17.2083,
"nll_loss": 41.947303771972656,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -22.97641944885254,
"rewards/margins": 1.821689248085022,
"rewards/rejected": -24.79810905456543,
"step": 130
},
{
"epoch": 1.28,
"grad_norm": 376.0,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 3.739476442337036,
"log_odds_ratio": -0.6163658499717712,
"logps/chosen": -40.55989074707031,
"logps/rejected": -44.29936981201172,
"loss": 15.0989,
"nll_loss": 36.582767486572266,
"rewards/accuracies": 0.784375011920929,
"rewards/chosen": -20.279945373535156,
"rewards/margins": 1.869738221168518,
"rewards/rejected": -22.14968490600586,
"step": 135
},
{
"epoch": 1.3274074074074074,
"grad_norm": 700.0,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 3.9576961994171143,
"log_odds_ratio": -0.5536572337150574,
"logps/chosen": -46.536231994628906,
"logps/rejected": -50.49393081665039,
"loss": 13.9694,
"nll_loss": 42.133995056152344,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -23.268115997314453,
"rewards/margins": 1.9788480997085571,
"rewards/rejected": -25.246965408325195,
"step": 140
},
{
"epoch": 1.374814814814815,
"grad_norm": 612.0,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 3.112581729888916,
"log_odds_ratio": -0.4531089663505554,
"logps/chosen": -41.952354431152344,
"logps/rejected": -45.06493377685547,
"loss": 13.5222,
"nll_loss": 41.113059997558594,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -20.976177215576172,
"rewards/margins": 1.556290864944458,
"rewards/rejected": -22.532466888427734,
"step": 145
},
{
"epoch": 1.4222222222222223,
"grad_norm": 502.0,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 4.257750511169434,
"log_odds_ratio": -0.71519935131073,
"logps/chosen": -36.202030181884766,
"logps/rejected": -40.45978546142578,
"loss": 15.7954,
"nll_loss": 33.97623062133789,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -18.101015090942383,
"rewards/margins": 2.128875255584717,
"rewards/rejected": -20.22989273071289,
"step": 150
},
{
"epoch": 1.4696296296296296,
"grad_norm": 544.0,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 4.735526084899902,
"log_odds_ratio": -0.8354274034500122,
"logps/chosen": -43.27934265136719,
"logps/rejected": -48.014869689941406,
"loss": 17.5477,
"nll_loss": 38.36113739013672,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -21.639671325683594,
"rewards/margins": 2.367762804031372,
"rewards/rejected": -24.007434844970703,
"step": 155
},
{
"epoch": 1.5170370370370372,
"grad_norm": 1040.0,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 4.304583549499512,
"log_odds_ratio": -0.5599141120910645,
"logps/chosen": -36.469970703125,
"logps/rejected": -40.774559020996094,
"loss": 13.4462,
"nll_loss": 31.85152816772461,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -18.2349853515625,
"rewards/margins": 2.152291774749756,
"rewards/rejected": -20.387279510498047,
"step": 160
},
{
"epoch": 1.5644444444444443,
"grad_norm": 468.0,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 3.8765549659729004,
"log_odds_ratio": -0.5279486775398254,
"logps/chosen": -35.835731506347656,
"logps/rejected": -39.71228790283203,
"loss": 13.4231,
"nll_loss": 31.093231201171875,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -17.917865753173828,
"rewards/margins": 1.9382774829864502,
"rewards/rejected": -19.856143951416016,
"step": 165
},
{
"epoch": 1.6118518518518519,
"grad_norm": 181.0,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 3.232649326324463,
"log_odds_ratio": -0.48758283257484436,
"logps/chosen": -27.44559097290039,
"logps/rejected": -30.678241729736328,
"loss": 13.1705,
"nll_loss": 23.72698402404785,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -13.722795486450195,
"rewards/margins": 1.6163246631622314,
"rewards/rejected": -15.339120864868164,
"step": 170
},
{
"epoch": 1.6592592592592592,
"grad_norm": 350.0,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 3.8613972663879395,
"log_odds_ratio": -0.5020617842674255,
"logps/chosen": -32.17850875854492,
"logps/rejected": -36.0399055480957,
"loss": 13.1753,
"nll_loss": 27.571544647216797,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -16.08925437927246,
"rewards/margins": 1.9306986331939697,
"rewards/rejected": -18.01995277404785,
"step": 175
},
{
"epoch": 1.7066666666666666,
"grad_norm": 294.0,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 4.640763759613037,
"log_odds_ratio": -0.49695101380348206,
"logps/chosen": -37.30582046508789,
"logps/rejected": -41.94658660888672,
"loss": 12.1428,
"nll_loss": 32.45693588256836,
"rewards/accuracies": 0.8218749761581421,
"rewards/chosen": -18.652910232543945,
"rewards/margins": 2.3203818798065186,
"rewards/rejected": -20.97329330444336,
"step": 180
},
{
"epoch": 1.7540740740740741,
"grad_norm": 215.0,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 3.8562660217285156,
"log_odds_ratio": -0.6583287119865417,
"logps/chosen": -50.08024215698242,
"logps/rejected": -53.93650436401367,
"loss": 15.07,
"nll_loss": 43.848018646240234,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -25.04012107849121,
"rewards/margins": 1.9281330108642578,
"rewards/rejected": -26.968252182006836,
"step": 185
},
{
"epoch": 1.8014814814814815,
"grad_norm": 556.0,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 3.928633213043213,
"log_odds_ratio": -0.5254294872283936,
"logps/chosen": -36.410377502441406,
"logps/rejected": -40.339012145996094,
"loss": 13.1055,
"nll_loss": 31.071889877319336,
"rewards/accuracies": 0.84375,
"rewards/chosen": -18.205188751220703,
"rewards/margins": 1.9643166065216064,
"rewards/rejected": -20.169506072998047,
"step": 190
},
{
"epoch": 1.8488888888888888,
"grad_norm": 220.0,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 3.9167213439941406,
"log_odds_ratio": -0.4089687764644623,
"logps/chosen": -35.08547592163086,
"logps/rejected": -39.002197265625,
"loss": 11.6004,
"nll_loss": 30.35329818725586,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -17.54273796081543,
"rewards/margins": 1.9583606719970703,
"rewards/rejected": -19.5010986328125,
"step": 195
},
{
"epoch": 1.8962962962962964,
"grad_norm": 232.0,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 4.179836273193359,
"log_odds_ratio": -0.5350766181945801,
"logps/chosen": -35.59379196166992,
"logps/rejected": -39.77362823486328,
"loss": 13.1393,
"nll_loss": 30.656850814819336,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -17.79689598083496,
"rewards/margins": 2.0899181365966797,
"rewards/rejected": -19.88681411743164,
"step": 200
},
{
"epoch": 1.9437037037037037,
"grad_norm": 288.0,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 4.366988658905029,
"log_odds_ratio": -0.5492602586746216,
"logps/chosen": -30.83774757385254,
"logps/rejected": -35.2047233581543,
"loss": 13.2985,
"nll_loss": 27.005844116210938,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -15.41887378692627,
"rewards/margins": 2.183488368988037,
"rewards/rejected": -17.60236167907715,
"step": 205
},
{
"epoch": 1.991111111111111,
"grad_norm": 164.0,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 3.571192502975464,
"log_odds_ratio": -0.5615382790565491,
"logps/chosen": -26.493785858154297,
"logps/rejected": -30.064956665039062,
"loss": 14.1991,
"nll_loss": 22.7987117767334,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": -13.246892929077148,
"rewards/margins": 1.7855854034423828,
"rewards/rejected": -15.032478332519531,
"step": 210
},
{
"epoch": 2.0385185185185186,
"grad_norm": 135.0,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 4.486402988433838,
"log_odds_ratio": -0.26281020045280457,
"logps/chosen": -31.758060455322266,
"logps/rejected": -36.24446487426758,
"loss": 8.5002,
"nll_loss": 28.028461456298828,
"rewards/accuracies": 0.890625,
"rewards/chosen": -15.879030227661133,
"rewards/margins": 2.243201494216919,
"rewards/rejected": -18.12223243713379,
"step": 215
},
{
"epoch": 2.0859259259259257,
"grad_norm": 199.0,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 6.2985358238220215,
"log_odds_ratio": -0.22524575889110565,
"logps/chosen": -28.65276527404785,
"logps/rejected": -34.95128631591797,
"loss": 7.0508,
"nll_loss": 27.499364852905273,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": -14.326382637023926,
"rewards/margins": 3.149261474609375,
"rewards/rejected": -17.475643157958984,
"step": 220
},
{
"epoch": 2.1333333333333333,
"grad_norm": 258.0,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 10.32975959777832,
"log_odds_ratio": -0.2018369436264038,
"logps/chosen": -30.228496551513672,
"logps/rejected": -40.5582389831543,
"loss": 5.0834,
"nll_loss": 28.979644775390625,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -15.114248275756836,
"rewards/margins": 5.164873123168945,
"rewards/rejected": -20.27911949157715,
"step": 225
},
{
"epoch": 2.180740740740741,
"grad_norm": 174.0,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 12.795625686645508,
"log_odds_ratio": -0.21236321330070496,
"logps/chosen": -40.56447982788086,
"logps/rejected": -53.360107421875,
"loss": 4.6331,
"nll_loss": 37.57656478881836,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -20.28223991394043,
"rewards/margins": 6.397812843322754,
"rewards/rejected": -26.6800537109375,
"step": 230
},
{
"epoch": 2.228148148148148,
"grad_norm": 228.0,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 12.043777465820312,
"log_odds_ratio": -0.23059391975402832,
"logps/chosen": -43.32316970825195,
"logps/rejected": -55.366943359375,
"loss": 5.3205,
"nll_loss": 38.94471740722656,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -21.661584854125977,
"rewards/margins": 6.021887302398682,
"rewards/rejected": -27.6834716796875,
"step": 235
},
{
"epoch": 2.2755555555555556,
"grad_norm": 91.5,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 12.533405303955078,
"log_odds_ratio": -0.12673521041870117,
"logps/chosen": -38.45610809326172,
"logps/rejected": -50.98948287963867,
"loss": 3.4403,
"nll_loss": 35.29063415527344,
"rewards/accuracies": 0.965624988079071,
"rewards/chosen": -19.22805404663086,
"rewards/margins": 6.266686916351318,
"rewards/rejected": -25.494741439819336,
"step": 240
},
{
"epoch": 2.322962962962963,
"grad_norm": 302.0,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 13.506985664367676,
"log_odds_ratio": -0.22909840941429138,
"logps/chosen": -37.9549674987793,
"logps/rejected": -51.461952209472656,
"loss": 5.0087,
"nll_loss": 34.513214111328125,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -18.97748374938965,
"rewards/margins": 6.753490447998047,
"rewards/rejected": -25.730976104736328,
"step": 245
},
{
"epoch": 2.3703703703703702,
"grad_norm": 138.0,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 12.552645683288574,
"log_odds_ratio": -0.17762118577957153,
"logps/chosen": -36.93925476074219,
"logps/rejected": -49.49188995361328,
"loss": 4.2289,
"nll_loss": 33.53951644897461,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -18.469627380371094,
"rewards/margins": 6.276318550109863,
"rewards/rejected": -24.74594497680664,
"step": 250
},
{
"epoch": 2.417777777777778,
"grad_norm": 278.0,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 12.663997650146484,
"log_odds_ratio": -0.2587743103504181,
"logps/chosen": -40.36345672607422,
"logps/rejected": -53.0274543762207,
"loss": 5.5798,
"nll_loss": 36.72834014892578,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": -20.18172836303711,
"rewards/margins": 6.331998348236084,
"rewards/rejected": -26.51372718811035,
"step": 255
},
{
"epoch": 2.4651851851851854,
"grad_norm": 322.0,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 12.492012977600098,
"log_odds_ratio": -0.2238761931657791,
"logps/chosen": -41.58088302612305,
"logps/rejected": -54.072898864746094,
"loss": 5.0248,
"nll_loss": 37.42829132080078,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": -20.790441513061523,
"rewards/margins": 6.246006488800049,
"rewards/rejected": -27.036449432373047,
"step": 260
},
{
"epoch": 2.5125925925925925,
"grad_norm": 262.0,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 12.127960205078125,
"log_odds_ratio": -0.1539038121700287,
"logps/chosen": -42.163063049316406,
"logps/rejected": -54.2910270690918,
"loss": 3.9328,
"nll_loss": 37.33251190185547,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -21.081531524658203,
"rewards/margins": 6.0639801025390625,
"rewards/rejected": -27.1455135345459,
"step": 265
},
{
"epoch": 2.56,
"grad_norm": 172.0,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 11.486291885375977,
"log_odds_ratio": -0.21284326910972595,
"logps/chosen": -42.949806213378906,
"logps/rejected": -54.43610382080078,
"loss": 4.9346,
"nll_loss": 37.990211486816406,
"rewards/accuracies": 0.953125,
"rewards/chosen": -21.474903106689453,
"rewards/margins": 5.743145942687988,
"rewards/rejected": -27.21805191040039,
"step": 270
},
{
"epoch": 2.6074074074074076,
"grad_norm": 154.0,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 11.679998397827148,
"log_odds_ratio": -0.23582549393177032,
"logps/chosen": -43.870338439941406,
"logps/rejected": -55.55034255981445,
"loss": 5.3451,
"nll_loss": 38.93402862548828,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -21.935169219970703,
"rewards/margins": 5.839999198913574,
"rewards/rejected": -27.775171279907227,
"step": 275
},
{
"epoch": 2.6548148148148147,
"grad_norm": 151.0,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 11.430465698242188,
"log_odds_ratio": -0.12883998453617096,
"logps/chosen": -45.1104850769043,
"logps/rejected": -56.54095458984375,
"loss": 3.5943,
"nll_loss": 39.615169525146484,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": -22.55524253845215,
"rewards/margins": 5.715232849121094,
"rewards/rejected": -28.270477294921875,
"step": 280
},
{
"epoch": 2.7022222222222223,
"grad_norm": 156.0,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 11.030641555786133,
"log_odds_ratio": -0.21815872192382812,
"logps/chosen": -45.768348693847656,
"logps/rejected": -56.798988342285156,
"loss": 5.0783,
"nll_loss": 39.93412780761719,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -22.884174346923828,
"rewards/margins": 5.515320777893066,
"rewards/rejected": -28.399494171142578,
"step": 285
},
{
"epoch": 2.74962962962963,
"grad_norm": 276.0,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 11.255643844604492,
"log_odds_ratio": -0.11287225782871246,
"logps/chosen": -45.53264236450195,
"logps/rejected": -56.788291931152344,
"loss": 3.382,
"nll_loss": 39.89075469970703,
"rewards/accuracies": 0.971875011920929,
"rewards/chosen": -22.766321182250977,
"rewards/margins": 5.627821922302246,
"rewards/rejected": -28.394145965576172,
"step": 290
},
{
"epoch": 2.797037037037037,
"grad_norm": 149.0,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 10.942522048950195,
"log_odds_ratio": -0.14776502549648285,
"logps/chosen": -44.723236083984375,
"logps/rejected": -55.6657600402832,
"loss": 3.9534,
"nll_loss": 39.40924072265625,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": -22.361618041992188,
"rewards/margins": 5.471261024475098,
"rewards/rejected": -27.8328800201416,
"step": 295
},
{
"epoch": 2.8444444444444446,
"grad_norm": 282.0,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 11.7633695602417,
"log_odds_ratio": -0.23342649638652802,
"logps/chosen": -45.44441604614258,
"logps/rejected": -57.20778274536133,
"loss": 5.2955,
"nll_loss": 39.873775482177734,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -22.72220802307129,
"rewards/margins": 5.88168478012085,
"rewards/rejected": -28.603891372680664,
"step": 300
},
{
"epoch": 2.891851851851852,
"grad_norm": 148.0,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 11.136703491210938,
"log_odds_ratio": -0.1254163384437561,
"logps/chosen": -45.35710144042969,
"logps/rejected": -56.493804931640625,
"loss": 3.5067,
"nll_loss": 40.35554885864258,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": -22.678550720214844,
"rewards/margins": 5.568351745605469,
"rewards/rejected": -28.246902465820312,
"step": 305
},
{
"epoch": 2.9392592592592592,
"grad_norm": 155.0,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 11.506017684936523,
"log_odds_ratio": -0.08677199482917786,
"logps/chosen": -46.21002960205078,
"logps/rejected": -57.71604537963867,
"loss": 2.8359,
"nll_loss": 40.85438919067383,
"rewards/accuracies": 0.965624988079071,
"rewards/chosen": -23.10501480102539,
"rewards/margins": 5.753008842468262,
"rewards/rejected": -28.858022689819336,
"step": 310
},
{
"epoch": 2.986666666666667,
"grad_norm": 175.0,
"learning_rate": 0.0,
"log_odds_chosen": 11.3948335647583,
"log_odds_ratio": -0.15954703092575073,
"logps/chosen": -45.135643005371094,
"logps/rejected": -56.53047561645508,
"loss": 4.0357,
"nll_loss": 40.42476272583008,
"rewards/accuracies": 0.953125,
"rewards/chosen": -22.567821502685547,
"rewards/margins": 5.69741678237915,
"rewards/rejected": -28.26523780822754,
"step": 315
},
{
"epoch": 2.986666666666667,
"step": 315,
"total_flos": 0.0,
"train_loss": 23.31905473678831,
"train_runtime": 7145.3824,
"train_samples_per_second": 2.834,
"train_steps_per_second": 0.044
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}