gemma-7b-borpo-noisy-5e-5-v5 / trainer_state.json
silviasapora's picture
Model save
b0d52ed verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9765925925925925,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 1915.3714599609375,
"learning_rate": 7.8125e-06,
"log_odds_chosen": 1.65981125831604,
"log_odds_ratio": -11.16843032836914,
"logps/chosen": -22.020946502685547,
"logps/rejected": -23.68042755126953,
"loss": 320.1571,
"nll_loss": 8.666691780090332,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -11.010473251342773,
"rewards/margins": 0.8297405242919922,
"rewards/rejected": -11.840213775634766,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 2617.00927734375,
"learning_rate": 1.5625e-05,
"log_odds_chosen": 1.09341299533844,
"log_odds_ratio": -8.355111122131348,
"logps/chosen": -19.82636833190918,
"logps/rejected": -20.919193267822266,
"loss": 223.029,
"nll_loss": 7.8865966796875,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": -9.91318416595459,
"rewards/margins": 0.5464121699333191,
"rewards/rejected": -10.459596633911133,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 1221.8326416015625,
"learning_rate": 2.34375e-05,
"log_odds_chosen": 4.4873456954956055,
"log_odds_ratio": -6.951984405517578,
"logps/chosen": -18.489765167236328,
"logps/rejected": -22.975828170776367,
"loss": 226.6759,
"nll_loss": 8.182887077331543,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -9.244882583618164,
"rewards/margins": 2.2430315017700195,
"rewards/rejected": -11.487914085388184,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 2359.64111328125,
"learning_rate": 3.125e-05,
"log_odds_chosen": 0.6630983352661133,
"log_odds_ratio": -8.002729415893555,
"logps/chosen": -18.08315086364746,
"logps/rejected": -18.74709129333496,
"loss": 342.1809,
"nll_loss": 8.18604564666748,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -9.04157543182373,
"rewards/margins": 0.3319700062274933,
"rewards/rejected": -9.37354564666748,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 2270.639892578125,
"learning_rate": 3.90625e-05,
"log_odds_chosen": 4.7319722175598145,
"log_odds_ratio": -6.239144802093506,
"logps/chosen": -15.107877731323242,
"logps/rejected": -19.83966636657715,
"loss": 59.8623,
"nll_loss": 6.319466590881348,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -7.553938865661621,
"rewards/margins": 2.3658950328826904,
"rewards/rejected": -9.919833183288574,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 42054.76953125,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 3.2522056102752686,
"log_odds_ratio": -5.358423709869385,
"logps/chosen": -19.751956939697266,
"logps/rejected": -23.000202178955078,
"loss": 210.8639,
"nll_loss": 11.341104507446289,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -9.875978469848633,
"rewards/margins": 1.6241226196289062,
"rewards/rejected": -11.500101089477539,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 1386.9453125,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 2.529534339904785,
"log_odds_ratio": -4.246035575866699,
"logps/chosen": -15.55242919921875,
"logps/rejected": -18.077491760253906,
"loss": 259.2666,
"nll_loss": 11.016453742980957,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -7.776214599609375,
"rewards/margins": 1.2625317573547363,
"rewards/rejected": -9.038745880126953,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 1015.165771484375,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.2810021936893463,
"log_odds_ratio": -0.8303823471069336,
"logps/chosen": -2.1894371509552,
"logps/rejected": -2.4453959465026855,
"loss": 75.6876,
"nll_loss": 2.393183469772339,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -1.0947185754776,
"rewards/margins": 0.12797939777374268,
"rewards/rejected": -1.2226979732513428,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 1208.3837890625,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 0.16454455256462097,
"log_odds_ratio": -1.0962440967559814,
"logps/chosen": -2.4815239906311035,
"logps/rejected": -2.6206681728363037,
"loss": 61.1886,
"nll_loss": 2.248060464859009,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -1.2407619953155518,
"rewards/margins": 0.0695720762014389,
"rewards/rejected": -1.3103340864181519,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 4378.82275390625,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 0.6479941010475159,
"log_odds_ratio": -1.2961242198944092,
"logps/chosen": -3.2210915088653564,
"logps/rejected": -3.855274200439453,
"loss": 7.3654,
"nll_loss": 2.6344618797302246,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -1.6105457544326782,
"rewards/margins": 0.3170911371707916,
"rewards/rejected": -1.9276371002197266,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 1226.081787109375,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 1.278847098350525,
"log_odds_ratio": -2.351318836212158,
"logps/chosen": -5.5019121170043945,
"logps/rejected": -6.7642693519592285,
"loss": 69.5193,
"nll_loss": 3.3521831035614014,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -2.7509560585021973,
"rewards/margins": 0.6311787366867065,
"rewards/rejected": -3.3821346759796143,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 3815.735595703125,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 0.8591831922531128,
"log_odds_ratio": -1.0965297222137451,
"logps/chosen": -2.902388334274292,
"logps/rejected": -3.7349720001220703,
"loss": 24.2741,
"nll_loss": 2.398529529571533,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -1.451194167137146,
"rewards/margins": 0.41629156470298767,
"rewards/rejected": -1.8674860000610352,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 1931.8660888671875,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 0.31718695163726807,
"log_odds_ratio": -1.2613378763198853,
"logps/chosen": -2.781818389892578,
"logps/rejected": -3.0610077381134033,
"loss": 81.3175,
"nll_loss": 2.3013808727264404,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -1.390909194946289,
"rewards/margins": 0.13959458470344543,
"rewards/rejected": -1.5305038690567017,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 787.8285522460938,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 0.5639177560806274,
"log_odds_ratio": -0.8356220126152039,
"logps/chosen": -2.251502752304077,
"logps/rejected": -2.7904326915740967,
"loss": 47.3395,
"nll_loss": 2.168375015258789,
"rewards/accuracies": 0.578125,
"rewards/chosen": -1.1257513761520386,
"rewards/margins": 0.2694648802280426,
"rewards/rejected": -1.3952163457870483,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 2055.1884765625,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 1.0343811511993408,
"log_odds_ratio": -1.943116545677185,
"logps/chosen": -4.0394768714904785,
"logps/rejected": -5.053744792938232,
"loss": 15.8044,
"nll_loss": 2.568474292755127,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -2.0197384357452393,
"rewards/margins": 0.5071338415145874,
"rewards/rejected": -2.526872396469116,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 3419.63525390625,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 1.538326621055603,
"log_odds_ratio": -2.196194648742676,
"logps/chosen": -5.185378074645996,
"logps/rejected": -6.7065582275390625,
"loss": 39.0928,
"nll_loss": 2.9203972816467285,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -2.592689037322998,
"rewards/margins": 0.7605901956558228,
"rewards/rejected": -3.3532791137695312,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 3812.28271484375,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 2.206240177154541,
"log_odds_ratio": -2.366931438446045,
"logps/chosen": -6.021973609924316,
"logps/rejected": -8.210701942443848,
"loss": 57.2016,
"nll_loss": 2.832935333251953,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -3.010986804962158,
"rewards/margins": 1.0943641662597656,
"rewards/rejected": -4.105350971221924,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 2624.8759765625,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 0.8822873830795288,
"log_odds_ratio": -2.509356737136841,
"logps/chosen": -4.698742389678955,
"logps/rejected": -5.556033134460449,
"loss": 103.7346,
"nll_loss": 2.442578077316284,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -2.3493711948394775,
"rewards/margins": 0.42864537239074707,
"rewards/rejected": -2.7780165672302246,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 12379.837890625,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 0.29022759199142456,
"log_odds_ratio": -1.0547641515731812,
"logps/chosen": -2.508532762527466,
"logps/rejected": -2.7733073234558105,
"loss": 61.9094,
"nll_loss": 2.3397486209869385,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -1.254266381263733,
"rewards/margins": 0.1323871910572052,
"rewards/rejected": -1.3866536617279053,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 2379.628173828125,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 0.09418745338916779,
"log_odds_ratio": -1.3399364948272705,
"logps/chosen": -2.7042553424835205,
"logps/rejected": -2.785879373550415,
"loss": 86.4669,
"nll_loss": 2.2360591888427734,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -1.3521276712417603,
"rewards/margins": 0.040812067687511444,
"rewards/rejected": -1.3929396867752075,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 1130.39892578125,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 0.4360111653804779,
"log_odds_ratio": -0.8215211629867554,
"logps/chosen": -2.013885498046875,
"logps/rejected": -2.4152350425720215,
"loss": 57.9738,
"nll_loss": 2.038440704345703,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -1.0069427490234375,
"rewards/margins": 0.2006748616695404,
"rewards/rejected": -1.2076175212860107,
"step": 105
},
{
"epoch": 1.037925925925926,
"grad_norm": 3500.71533203125,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 0.5743904709815979,
"log_odds_ratio": -0.8129003047943115,
"logps/chosen": -1.8548295497894287,
"logps/rejected": -2.392976760864258,
"loss": 30.7734,
"nll_loss": 1.9196120500564575,
"rewards/accuracies": 0.5769230723381042,
"rewards/chosen": -0.9274147748947144,
"rewards/margins": 0.2690735161304474,
"rewards/rejected": -1.196488380432129,
"step": 110
},
{
"epoch": 1.0853333333333333,
"grad_norm": 7732.44091796875,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 0.8173832893371582,
"log_odds_ratio": -1.355067253112793,
"logps/chosen": -3.0429067611694336,
"logps/rejected": -3.800440549850464,
"loss": 32.8992,
"nll_loss": 2.336174726486206,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -1.5214533805847168,
"rewards/margins": 0.3787666857242584,
"rewards/rejected": -1.900220274925232,
"step": 115
},
{
"epoch": 1.1327407407407408,
"grad_norm": 4230.470703125,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 1.6969165802001953,
"log_odds_ratio": -1.324521541595459,
"logps/chosen": -3.4614341259002686,
"logps/rejected": -5.1248674392700195,
"loss": -9.7343,
"nll_loss": 2.7742135524749756,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -1.7307170629501343,
"rewards/margins": 0.8317165374755859,
"rewards/rejected": -2.5624337196350098,
"step": 120
},
{
"epoch": 1.1801481481481482,
"grad_norm": 3288.0673828125,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 1.6559861898422241,
"log_odds_ratio": -1.8177845478057861,
"logps/chosen": -4.351998329162598,
"logps/rejected": -5.973184108734131,
"loss": 18.5195,
"nll_loss": 2.8550593852996826,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -2.175999164581299,
"rewards/margins": 0.8105929493904114,
"rewards/rejected": -2.9865920543670654,
"step": 125
},
{
"epoch": 1.2275555555555555,
"grad_norm": 30743.359375,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 0.7961785793304443,
"log_odds_ratio": -2.717101573944092,
"logps/chosen": -9.171316146850586,
"logps/rejected": -9.938522338867188,
"loss": 169.7847,
"nll_loss": 5.80072546005249,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -4.585658073425293,
"rewards/margins": 0.38360315561294556,
"rewards/rejected": -4.969261169433594,
"step": 130
},
{
"epoch": 1.274962962962963,
"grad_norm": 33299.71484375,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 2.2118756771087646,
"log_odds_ratio": -4.363597869873047,
"logps/chosen": -21.064800262451172,
"logps/rejected": -23.25614356994629,
"loss": 276.0252,
"nll_loss": 12.389029502868652,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": -10.532400131225586,
"rewards/margins": 1.0956722497940063,
"rewards/rejected": -11.628071784973145,
"step": 135
},
{
"epoch": 1.3223703703703704,
"grad_norm": 9129.3916015625,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": -0.8955130577087402,
"log_odds_ratio": -6.262269020080566,
"logps/chosen": -13.717962265014648,
"logps/rejected": -12.80627155303955,
"loss": 237.298,
"nll_loss": 6.492087364196777,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -6.858981132507324,
"rewards/margins": -0.4558447003364563,
"rewards/rejected": -6.403135776519775,
"step": 140
},
{
"epoch": 1.3697777777777778,
"grad_norm": 4406.74853515625,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 2.508333206176758,
"log_odds_ratio": -1.729018211364746,
"logps/chosen": -4.257506847381592,
"logps/rejected": -6.73601770401001,
"loss": -21.3999,
"nll_loss": 2.9763360023498535,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -2.128753423690796,
"rewards/margins": 1.2392549514770508,
"rewards/rejected": -3.368008852005005,
"step": 145
},
{
"epoch": 1.417185185185185,
"grad_norm": 3294.3505859375,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 0.41120272874832153,
"log_odds_ratio": -2.209031581878662,
"logps/chosen": -5.029679298400879,
"logps/rejected": -5.439136981964111,
"loss": 73.5667,
"nll_loss": 3.1203856468200684,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -2.5148396492004395,
"rewards/margins": 0.20472900569438934,
"rewards/rejected": -2.7195684909820557,
"step": 150
},
{
"epoch": 1.4645925925925927,
"grad_norm": 2727.369140625,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 1.2927907705307007,
"log_odds_ratio": -1.953330636024475,
"logps/chosen": -5.769057273864746,
"logps/rejected": -7.043553829193115,
"loss": 66.51,
"nll_loss": 3.7798728942871094,
"rewards/accuracies": 0.59375,
"rewards/chosen": -2.884528636932373,
"rewards/margins": 0.6372483968734741,
"rewards/rejected": -3.5217769145965576,
"step": 155
},
{
"epoch": 1.512,
"grad_norm": 2540.416748046875,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 1.328902006149292,
"log_odds_ratio": -1.8827491998672485,
"logps/chosen": -4.703896999359131,
"logps/rejected": -6.023054599761963,
"loss": 35.3034,
"nll_loss": 3.127842664718628,
"rewards/accuracies": 0.578125,
"rewards/chosen": -2.3519484996795654,
"rewards/margins": 0.6595786809921265,
"rewards/rejected": -3.0115272998809814,
"step": 160
},
{
"epoch": 1.5594074074074074,
"grad_norm": 2457.10205078125,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 0.4289638102054596,
"log_odds_ratio": -1.7100918292999268,
"logps/chosen": -3.7828564643859863,
"logps/rejected": -4.20039701461792,
"loss": 72.3472,
"nll_loss": 2.577268123626709,
"rewards/accuracies": 0.546875,
"rewards/chosen": -1.8914282321929932,
"rewards/margins": 0.20877020061016083,
"rewards/rejected": -2.10019850730896,
"step": 165
},
{
"epoch": 1.6068148148148147,
"grad_norm": 2422.359130859375,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 0.3876183331012726,
"log_odds_ratio": -1.1971830129623413,
"logps/chosen": -2.719515800476074,
"logps/rejected": -3.0870518684387207,
"loss": 49.8125,
"nll_loss": 2.2820823192596436,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -1.359757900238037,
"rewards/margins": 0.1837681084871292,
"rewards/rejected": -1.5435259342193604,
"step": 170
},
{
"epoch": 1.6542222222222223,
"grad_norm": 601.5615234375,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 0.5851010680198669,
"log_odds_ratio": -0.9167743921279907,
"logps/chosen": -2.2165403366088867,
"logps/rejected": -2.7729554176330566,
"loss": 50.6215,
"nll_loss": 2.1689133644104004,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -1.1082701683044434,
"rewards/margins": 0.2782076299190521,
"rewards/rejected": -1.3864777088165283,
"step": 175
},
{
"epoch": 1.7016296296296296,
"grad_norm": 463.87890625,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 0.9187017679214478,
"log_odds_ratio": -1.155874490737915,
"logps/chosen": -2.5220038890838623,
"logps/rejected": -3.416661024093628,
"loss": 9.5969,
"nll_loss": 2.0762057304382324,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -1.2610019445419312,
"rewards/margins": 0.4473283886909485,
"rewards/rejected": -1.708330512046814,
"step": 180
},
{
"epoch": 1.749037037037037,
"grad_norm": 3273.33935546875,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 1.7500969171524048,
"log_odds_ratio": -1.275059461593628,
"logps/chosen": -3.396794557571411,
"logps/rejected": -5.1389851570129395,
"loss": -60.3306,
"nll_loss": 2.238281726837158,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -1.6983972787857056,
"rewards/margins": 0.8710952997207642,
"rewards/rejected": -2.5694925785064697,
"step": 185
},
{
"epoch": 1.7964444444444445,
"grad_norm": 1209.9384765625,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 0.5481420755386353,
"log_odds_ratio": -1.067756175994873,
"logps/chosen": -2.3263049125671387,
"logps/rejected": -2.851926803588867,
"loss": 32.6693,
"nll_loss": 2.037564754486084,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -1.1631524562835693,
"rewards/margins": 0.26281076669692993,
"rewards/rejected": -1.4259634017944336,
"step": 190
},
{
"epoch": 1.8438518518518519,
"grad_norm": 2658.087646484375,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 1.4211509227752686,
"log_odds_ratio": -0.9490826725959778,
"logps/chosen": -2.6055521965026855,
"logps/rejected": -3.9891953468322754,
"loss": -18.5069,
"nll_loss": 2.2056031227111816,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -1.3027760982513428,
"rewards/margins": 0.6918215155601501,
"rewards/rejected": -1.9945976734161377,
"step": 195
},
{
"epoch": 1.8912592592592592,
"grad_norm": 526.1814575195312,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 0.7827054858207703,
"log_odds_ratio": -2.516727924346924,
"logps/chosen": -4.788647651672363,
"logps/rejected": -5.54547643661499,
"loss": 63.2911,
"nll_loss": 2.443591356277466,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -2.3943238258361816,
"rewards/margins": 0.37841445207595825,
"rewards/rejected": -2.772738218307495,
"step": 200
},
{
"epoch": 1.9386666666666668,
"grad_norm": 1198.92138671875,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 0.6334174275398254,
"log_odds_ratio": -1.686605453491211,
"logps/chosen": -3.5008976459503174,
"logps/rejected": -4.093779563903809,
"loss": 57.0721,
"nll_loss": 2.26170015335083,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -1.7504488229751587,
"rewards/margins": 0.29644104838371277,
"rewards/rejected": -2.0468897819519043,
"step": 205
},
{
"epoch": 1.986074074074074,
"grad_norm": 360.5643310546875,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 0.5420491099357605,
"log_odds_ratio": -1.02804696559906,
"logps/chosen": -2.0554840564727783,
"logps/rejected": -2.5570180416107178,
"loss": 26.4923,
"nll_loss": 1.8558366298675537,
"rewards/accuracies": 0.578125,
"rewards/chosen": -1.0277420282363892,
"rewards/margins": 0.25076690316200256,
"rewards/rejected": -1.2785090208053589,
"step": 210
},
{
"epoch": 2.0284444444444443,
"grad_norm": 10238.283203125,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 0.5111017823219299,
"log_odds_ratio": -0.8272331953048706,
"logps/chosen": -1.7190930843353271,
"logps/rejected": -2.203624963760376,
"loss": 29.5791,
"nll_loss": 1.8271044492721558,
"rewards/accuracies": 0.5524475574493408,
"rewards/chosen": -0.8595465421676636,
"rewards/margins": 0.24226588010787964,
"rewards/rejected": -1.101812481880188,
"step": 215
},
{
"epoch": 2.075851851851852,
"grad_norm": 2326.157958984375,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 0.5695599913597107,
"log_odds_ratio": -0.9038535952568054,
"logps/chosen": -1.8567044734954834,
"logps/rejected": -2.3801803588867188,
"loss": 42.0315,
"nll_loss": 1.7851667404174805,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.9283522367477417,
"rewards/margins": 0.26173779368400574,
"rewards/rejected": -1.1900901794433594,
"step": 220
},
{
"epoch": 2.1232592592592594,
"grad_norm": 1981.0318603515625,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 0.4965842664241791,
"log_odds_ratio": -0.9662116169929504,
"logps/chosen": -1.9954092502593994,
"logps/rejected": -2.455885648727417,
"loss": 24.2707,
"nll_loss": 1.8060328960418701,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.9977046251296997,
"rewards/margins": 0.2302381992340088,
"rewards/rejected": -1.2279428243637085,
"step": 225
},
{
"epoch": 2.1706666666666665,
"grad_norm": 2371.164794921875,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 0.8104255795478821,
"log_odds_ratio": -0.9122349619865417,
"logps/chosen": -1.928625464439392,
"logps/rejected": -2.700075626373291,
"loss": 0.4953,
"nll_loss": 1.8198583126068115,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.964312732219696,
"rewards/margins": 0.3857249617576599,
"rewards/rejected": -1.3500378131866455,
"step": 230
},
{
"epoch": 2.218074074074074,
"grad_norm": 510.7680969238281,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 0.9049872159957886,
"log_odds_ratio": -0.8861944079399109,
"logps/chosen": -2.1324591636657715,
"logps/rejected": -2.9944934844970703,
"loss": 4.731,
"nll_loss": 1.871519684791565,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -1.0662295818328857,
"rewards/margins": 0.4310172200202942,
"rewards/rejected": -1.4972467422485352,
"step": 235
},
{
"epoch": 2.2654814814814817,
"grad_norm": 2696.190185546875,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 0.4651837944984436,
"log_odds_ratio": -1.2659015655517578,
"logps/chosen": -2.480325937271118,
"logps/rejected": -2.9123668670654297,
"loss": 49.3961,
"nll_loss": 1.8576265573501587,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -1.240162968635559,
"rewards/margins": 0.21602031588554382,
"rewards/rejected": -1.4561834335327148,
"step": 240
},
{
"epoch": 2.3128888888888888,
"grad_norm": 1326.971435546875,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 1.1274998188018799,
"log_odds_ratio": -0.9266605377197266,
"logps/chosen": -2.0514588356018066,
"logps/rejected": -3.127443790435791,
"loss": -21.5027,
"nll_loss": 1.8539222478866577,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -1.0257294178009033,
"rewards/margins": 0.5379923582077026,
"rewards/rejected": -1.5637218952178955,
"step": 245
},
{
"epoch": 2.3602962962962963,
"grad_norm": 4184.349609375,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 0.7846413850784302,
"log_odds_ratio": -0.9509506225585938,
"logps/chosen": -2.0468831062316895,
"logps/rejected": -2.7701876163482666,
"loss": 34.0259,
"nll_loss": 1.8205235004425049,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -1.0234415531158447,
"rewards/margins": 0.36165231466293335,
"rewards/rejected": -1.3850938081741333,
"step": 250
},
{
"epoch": 2.407703703703704,
"grad_norm": 72418.359375,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 0.7129810452461243,
"log_odds_ratio": -1.4154326915740967,
"logps/chosen": -2.65124249458313,
"logps/rejected": -3.2982776165008545,
"loss": 19.2975,
"nll_loss": 1.873997688293457,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -1.325621247291565,
"rewards/margins": 0.32351773977279663,
"rewards/rejected": -1.6491388082504272,
"step": 255
},
{
"epoch": 2.455111111111111,
"grad_norm": 6894.69677734375,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 1.0349304676055908,
"log_odds_ratio": -1.0125768184661865,
"logps/chosen": -2.1709659099578857,
"logps/rejected": -3.1571593284606934,
"loss": 5.3407,
"nll_loss": 1.8703029155731201,
"rewards/accuracies": 0.609375,
"rewards/chosen": -1.0854829549789429,
"rewards/margins": 0.4930966794490814,
"rewards/rejected": -1.5785796642303467,
"step": 260
},
{
"epoch": 2.5025185185185186,
"grad_norm": 864.9271850585938,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 0.46679940819740295,
"log_odds_ratio": -1.316929817199707,
"logps/chosen": -2.535597562789917,
"logps/rejected": -2.952941656112671,
"loss": 71.3618,
"nll_loss": 1.8557851314544678,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -1.2677987813949585,
"rewards/margins": 0.20867201685905457,
"rewards/rejected": -1.4764708280563354,
"step": 265
},
{
"epoch": 2.549925925925926,
"grad_norm": 3560.4990234375,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 1.4798504114151,
"log_odds_ratio": -1.3126966953277588,
"logps/chosen": -2.62119722366333,
"logps/rejected": -4.038577079772949,
"loss": -37.0258,
"nll_loss": 1.893930435180664,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": -1.310598611831665,
"rewards/margins": 0.7086899876594543,
"rewards/rejected": -2.0192885398864746,
"step": 270
},
{
"epoch": 2.5973333333333333,
"grad_norm": 1490.2098388671875,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 0.9033193588256836,
"log_odds_ratio": -1.1426560878753662,
"logps/chosen": -2.5527491569519043,
"logps/rejected": -3.397388458251953,
"loss": 29.2458,
"nll_loss": 1.9291893243789673,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": -1.2763745784759521,
"rewards/margins": 0.42231959104537964,
"rewards/rejected": -1.6986942291259766,
"step": 275
},
{
"epoch": 2.644740740740741,
"grad_norm": 10416.1259765625,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 0.7765440940856934,
"log_odds_ratio": -1.2765750885009766,
"logps/chosen": -2.5416433811187744,
"logps/rejected": -3.276179552078247,
"loss": 16.2592,
"nll_loss": 1.885671615600586,
"rewards/accuracies": 0.578125,
"rewards/chosen": -1.2708216905593872,
"rewards/margins": 0.3672682046890259,
"rewards/rejected": -1.6380897760391235,
"step": 280
},
{
"epoch": 2.6921481481481484,
"grad_norm": 987.6349487304688,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 1.4070631265640259,
"log_odds_ratio": -0.9240388870239258,
"logps/chosen": -2.1706430912017822,
"logps/rejected": -3.544604778289795,
"loss": -18.614,
"nll_loss": 1.9531824588775635,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -1.0853215456008911,
"rewards/margins": 0.6869809031486511,
"rewards/rejected": -1.7723023891448975,
"step": 285
},
{
"epoch": 2.7395555555555555,
"grad_norm": 1785.149658203125,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 1.1818147897720337,
"log_odds_ratio": -1.5213770866394043,
"logps/chosen": -2.8310532569885254,
"logps/rejected": -3.944000244140625,
"loss": -20.8544,
"nll_loss": 1.9278194904327393,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": -1.4155266284942627,
"rewards/margins": 0.5564736127853394,
"rewards/rejected": -1.9720001220703125,
"step": 290
},
{
"epoch": 2.786962962962963,
"grad_norm": 3843.544677734375,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 0.7062476277351379,
"log_odds_ratio": -1.199745535850525,
"logps/chosen": -2.419466495513916,
"logps/rejected": -3.0821032524108887,
"loss": 9.6821,
"nll_loss": 1.8187296390533447,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -1.209733247756958,
"rewards/margins": 0.3313182294368744,
"rewards/rejected": -1.5410516262054443,
"step": 295
},
{
"epoch": 2.83437037037037,
"grad_norm": 1985.0562744140625,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 1.1894285678863525,
"log_odds_ratio": -1.35343337059021,
"logps/chosen": -2.6638786792755127,
"logps/rejected": -3.7862019538879395,
"loss": 5.4621,
"nll_loss": 1.938586950302124,
"rewards/accuracies": 0.625,
"rewards/chosen": -1.3319393396377563,
"rewards/margins": 0.5611615777015686,
"rewards/rejected": -1.8931009769439697,
"step": 300
},
{
"epoch": 2.8817777777777778,
"grad_norm": 1326.342529296875,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 2.1142306327819824,
"log_odds_ratio": -0.9648601412773132,
"logps/chosen": -2.3394112586975098,
"logps/rejected": -4.382277011871338,
"loss": -80.5969,
"nll_loss": 1.8355882167816162,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -1.1697056293487549,
"rewards/margins": 1.0214331150054932,
"rewards/rejected": -2.191138505935669,
"step": 305
},
{
"epoch": 2.9291851851851853,
"grad_norm": 3704.05322265625,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 1.72466242313385,
"log_odds_ratio": -1.052673578262329,
"logps/chosen": -2.2251315116882324,
"logps/rejected": -3.870131731033325,
"loss": -35.5622,
"nll_loss": 1.8819787502288818,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -1.1125657558441162,
"rewards/margins": 0.8225000500679016,
"rewards/rejected": -1.9350658655166626,
"step": 310
},
{
"epoch": 2.9765925925925925,
"grad_norm": 492.51934814453125,
"learning_rate": 0.0,
"log_odds_chosen": 1.213081955909729,
"log_odds_ratio": -1.0762965679168701,
"logps/chosen": -2.213444232940674,
"logps/rejected": -3.3574657440185547,
"loss": -9.4288,
"nll_loss": 1.8365122079849243,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -1.106722116470337,
"rewards/margins": 0.5720106363296509,
"rewards/rejected": -1.6787328720092773,
"step": 315
},
{
"epoch": 2.9765925925925925,
"step": 315,
"total_flos": 0.0,
"train_loss": 59.38279808892144,
"train_runtime": 9551.9831,
"train_samples_per_second": 2.12,
"train_steps_per_second": 0.033
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}