zephyr-7b-dpo-lora / trainer_state.json
TrumpBiden's picture
Model save
fd8204a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9993222089532967,
"eval_steps": 100,
"global_step": 2904,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.7182130584192438e-09,
"logits/chosen": -2.4422173500061035,
"logits/rejected": -2.526975631713867,
"logps/chosen": -235.28317260742188,
"logps/rejected": -214.19320678710938,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.718213058419244e-08,
"logits/chosen": -2.4839749336242676,
"logits/rejected": -2.4228153228759766,
"logps/chosen": -280.0798034667969,
"logps/rejected": -230.15765380859375,
"loss": 0.6934,
"rewards/accuracies": 0.4600694477558136,
"rewards/chosen": -7.412416744045913e-05,
"rewards/margins": 0.0002167515631299466,
"rewards/rejected": -0.0002908758178818971,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 3.436426116838488e-08,
"logits/chosen": -2.414851188659668,
"logits/rejected": -2.354247570037842,
"logps/chosen": -255.57260131835938,
"logps/rejected": -226.37820434570312,
"loss": 0.6956,
"rewards/accuracies": 0.4765625,
"rewards/chosen": -0.0009197852341458201,
"rewards/margins": -0.004096911288797855,
"rewards/rejected": 0.003177126171067357,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 5.154639175257731e-08,
"logits/chosen": -2.4241952896118164,
"logits/rejected": -2.400988817214966,
"logps/chosen": -272.502197265625,
"logps/rejected": -227.431884765625,
"loss": 0.6943,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.0013401806354522705,
"rewards/margins": -0.0016240004915744066,
"rewards/rejected": 0.0002838193904608488,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 6.872852233676976e-08,
"logits/chosen": -2.4217023849487305,
"logits/rejected": -2.3694050312042236,
"logps/chosen": -249.1688995361328,
"logps/rejected": -220.63821411132812,
"loss": 0.6953,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.0008647952345199883,
"rewards/margins": -0.0035088094882667065,
"rewards/rejected": 0.002644014311954379,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 8.59106529209622e-08,
"logits/chosen": -2.457735538482666,
"logits/rejected": -2.413196563720703,
"logps/chosen": -259.66912841796875,
"logps/rejected": -220.27700805664062,
"loss": 0.694,
"rewards/accuracies": 0.484375,
"rewards/chosen": 0.0021845095325261354,
"rewards/margins": -0.0009308269363828003,
"rewards/rejected": 0.003115336410701275,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.0309278350515462e-07,
"logits/chosen": -2.4587912559509277,
"logits/rejected": -2.4032034873962402,
"logps/chosen": -258.9931945800781,
"logps/rejected": -228.43301391601562,
"loss": 0.6937,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": 0.00026782663189806044,
"rewards/margins": -0.00029953793273307383,
"rewards/rejected": 0.0005673646228387952,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 1.202749140893471e-07,
"logits/chosen": -2.438084840774536,
"logits/rejected": -2.4171149730682373,
"logps/chosen": -267.3536071777344,
"logps/rejected": -210.99343872070312,
"loss": 0.6927,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.0028153976891189814,
"rewards/margins": 0.0016862023621797562,
"rewards/rejected": 0.0011291948612779379,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 1.3745704467353952e-07,
"logits/chosen": -2.449380397796631,
"logits/rejected": -2.3840742111206055,
"logps/chosen": -280.45050048828125,
"logps/rejected": -225.0115966796875,
"loss": 0.6925,
"rewards/accuracies": 0.5390625,
"rewards/chosen": 0.003211159957572818,
"rewards/margins": 0.0021671659778803587,
"rewards/rejected": 0.0010439944453537464,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 1.5463917525773197e-07,
"logits/chosen": -2.4744973182678223,
"logits/rejected": -2.3940176963806152,
"logps/chosen": -271.45843505859375,
"logps/rejected": -231.62643432617188,
"loss": 0.6919,
"rewards/accuracies": 0.520312488079071,
"rewards/chosen": 0.0016121773514896631,
"rewards/margins": 0.0033598211593925953,
"rewards/rejected": -0.0017476438079029322,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 1.718213058419244e-07,
"logits/chosen": -2.489611864089966,
"logits/rejected": -2.3956551551818848,
"logps/chosen": -264.9837646484375,
"logps/rejected": -215.3820343017578,
"loss": 0.6918,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.0020506351720541716,
"rewards/margins": 0.0036049727350473404,
"rewards/rejected": -0.0015543376794084907,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 1.8900343642611682e-07,
"logits/chosen": -2.4358983039855957,
"logits/rejected": -2.3630106449127197,
"logps/chosen": -283.7451477050781,
"logps/rejected": -214.5529327392578,
"loss": 0.6887,
"rewards/accuracies": 0.557812511920929,
"rewards/chosen": 0.006171266548335552,
"rewards/margins": 0.00967160053551197,
"rewards/rejected": -0.003500334918498993,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 2.0618556701030925e-07,
"logits/chosen": -2.4519553184509277,
"logits/rejected": -2.4237570762634277,
"logps/chosen": -271.7016906738281,
"logps/rejected": -229.41964721679688,
"loss": 0.6911,
"rewards/accuracies": 0.5140625238418579,
"rewards/chosen": 0.0020948010496795177,
"rewards/margins": 0.004834444727748632,
"rewards/rejected": -0.0027396436780691147,
"step": 120
},
{
"epoch": 0.13,
"learning_rate": 2.2336769759450173e-07,
"logits/chosen": -2.448523998260498,
"logits/rejected": -2.3815808296203613,
"logps/chosen": -267.52691650390625,
"logps/rejected": -212.1881866455078,
"loss": 0.6888,
"rewards/accuracies": 0.5796874761581421,
"rewards/chosen": 0.005649151746183634,
"rewards/margins": 0.009708194993436337,
"rewards/rejected": -0.00405904371291399,
"step": 130
},
{
"epoch": 0.14,
"learning_rate": 2.405498281786942e-07,
"logits/chosen": -2.46962308883667,
"logits/rejected": -2.4132840633392334,
"logps/chosen": -274.2135009765625,
"logps/rejected": -212.12222290039062,
"loss": 0.6881,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": 0.005823948420584202,
"rewards/margins": 0.01115177758038044,
"rewards/rejected": -0.0053278305567801,
"step": 140
},
{
"epoch": 0.15,
"learning_rate": 2.5773195876288655e-07,
"logits/chosen": -2.4161739349365234,
"logits/rejected": -2.3815319538116455,
"logps/chosen": -250.0603485107422,
"logps/rejected": -210.3913116455078,
"loss": 0.6891,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.005285581108182669,
"rewards/margins": 0.009132949635386467,
"rewards/rejected": -0.003847368760034442,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 2.7491408934707903e-07,
"logits/chosen": -2.437349557876587,
"logits/rejected": -2.374871015548706,
"logps/chosen": -267.4647521972656,
"logps/rejected": -218.4558563232422,
"loss": 0.6883,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.004300659988075495,
"rewards/margins": 0.010579666122794151,
"rewards/rejected": -0.006279005669057369,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 2.9209621993127146e-07,
"logits/chosen": -2.400257110595703,
"logits/rejected": -2.368004083633423,
"logps/chosen": -277.8876953125,
"logps/rejected": -221.78640747070312,
"loss": 0.6878,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.005591097287833691,
"rewards/margins": 0.011688882485032082,
"rewards/rejected": -0.006097783800214529,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 3.0927835051546394e-07,
"logits/chosen": -2.4092414379119873,
"logits/rejected": -2.376796007156372,
"logps/chosen": -271.3955993652344,
"logps/rejected": -226.97055053710938,
"loss": 0.6844,
"rewards/accuracies": 0.598437488079071,
"rewards/chosen": 0.008772986009716988,
"rewards/margins": 0.018974503502249718,
"rewards/rejected": -0.010201516561210155,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 3.2646048109965636e-07,
"logits/chosen": -2.4419052600860596,
"logits/rejected": -2.3903238773345947,
"logps/chosen": -276.5497131347656,
"logps/rejected": -222.6400604248047,
"loss": 0.685,
"rewards/accuracies": 0.5921875238418579,
"rewards/chosen": 0.011585086584091187,
"rewards/margins": 0.017912257462739944,
"rewards/rejected": -0.006327168550342321,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 3.436426116838488e-07,
"logits/chosen": -2.419137477874756,
"logits/rejected": -2.3881921768188477,
"logps/chosen": -249.6503448486328,
"logps/rejected": -214.3955078125,
"loss": 0.6802,
"rewards/accuracies": 0.6265624761581421,
"rewards/chosen": 0.014122622087597847,
"rewards/margins": 0.027482766658067703,
"rewards/rejected": -0.01336014736443758,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 3.608247422680412e-07,
"logits/chosen": -2.4479005336761475,
"logits/rejected": -2.4040849208831787,
"logps/chosen": -270.5887145996094,
"logps/rejected": -235.2773895263672,
"loss": 0.6807,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.011653213761746883,
"rewards/margins": 0.026884321123361588,
"rewards/rejected": -0.01523110456764698,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 3.7800687285223364e-07,
"logits/chosen": -2.4104199409484863,
"logits/rejected": -2.3601438999176025,
"logps/chosen": -263.31024169921875,
"logps/rejected": -219.03466796875,
"loss": 0.6787,
"rewards/accuracies": 0.6796875,
"rewards/chosen": 0.013443564996123314,
"rewards/margins": 0.03105132281780243,
"rewards/rejected": -0.017607757821679115,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 3.9518900343642607e-07,
"logits/chosen": -2.406188726425171,
"logits/rejected": -2.3706843852996826,
"logps/chosen": -273.1360778808594,
"logps/rejected": -231.5634765625,
"loss": 0.6728,
"rewards/accuracies": 0.6390625238418579,
"rewards/chosen": 0.01533445529639721,
"rewards/margins": 0.0440392941236496,
"rewards/rejected": -0.02870483696460724,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 4.123711340206185e-07,
"logits/chosen": -2.4351398944854736,
"logits/rejected": -2.393178701400757,
"logps/chosen": -271.65087890625,
"logps/rejected": -233.6895751953125,
"loss": 0.6708,
"rewards/accuracies": 0.660937488079071,
"rewards/chosen": 0.02181144617497921,
"rewards/margins": 0.04818682745099068,
"rewards/rejected": -0.026375379413366318,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 4.2955326460481097e-07,
"logits/chosen": -2.4275219440460205,
"logits/rejected": -2.455544948577881,
"logps/chosen": -264.905517578125,
"logps/rejected": -225.69180297851562,
"loss": 0.6707,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": 0.020589668303728104,
"rewards/margins": 0.04932643473148346,
"rewards/rejected": -0.028736764565110207,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 4.4673539518900345e-07,
"logits/chosen": -2.4035658836364746,
"logits/rejected": -2.4021072387695312,
"logps/chosen": -249.67953491210938,
"logps/rejected": -204.56887817382812,
"loss": 0.6655,
"rewards/accuracies": 0.671875,
"rewards/chosen": 0.01743764989078045,
"rewards/margins": 0.06085206940770149,
"rewards/rejected": -0.043414413928985596,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 4.639175257731959e-07,
"logits/chosen": -2.4608845710754395,
"logits/rejected": -2.433506488800049,
"logps/chosen": -281.73260498046875,
"logps/rejected": -224.5501251220703,
"loss": 0.6585,
"rewards/accuracies": 0.692187488079071,
"rewards/chosen": 0.0280983354896307,
"rewards/margins": 0.07657264918088913,
"rewards/rejected": -0.04847431182861328,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 4.810996563573884e-07,
"logits/chosen": -2.4379546642303467,
"logits/rejected": -2.374706745147705,
"logps/chosen": -266.258544921875,
"logps/rejected": -217.6811981201172,
"loss": 0.6557,
"rewards/accuracies": 0.698437511920929,
"rewards/chosen": 0.02859863080084324,
"rewards/margins": 0.08380897343158722,
"rewards/rejected": -0.05521036311984062,
"step": 280
},
{
"epoch": 0.3,
"learning_rate": 4.982817869415807e-07,
"logits/chosen": -2.449183940887451,
"logits/rejected": -2.4152982234954834,
"logps/chosen": -251.42617797851562,
"logps/rejected": -206.6394805908203,
"loss": 0.6567,
"rewards/accuracies": 0.6703125238418579,
"rewards/chosen": 0.018911005929112434,
"rewards/margins": 0.08338409662246704,
"rewards/rejected": -0.06447309255599976,
"step": 290
},
{
"epoch": 0.31,
"learning_rate": 4.982778415614236e-07,
"logits/chosen": -2.431877374649048,
"logits/rejected": -2.3985190391540527,
"logps/chosen": -258.9649353027344,
"logps/rejected": -214.5379180908203,
"loss": 0.6557,
"rewards/accuracies": 0.6578124761581421,
"rewards/chosen": 0.022992964833974838,
"rewards/margins": 0.08744792640209198,
"rewards/rejected": -0.06445495784282684,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 4.963643321852277e-07,
"logits/chosen": -2.4291908740997314,
"logits/rejected": -2.3937199115753174,
"logps/chosen": -264.531005859375,
"logps/rejected": -223.732177734375,
"loss": 0.6499,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": 0.026863668113946915,
"rewards/margins": 0.10220368206501007,
"rewards/rejected": -0.07534001767635345,
"step": 310
},
{
"epoch": 0.33,
"learning_rate": 4.944508228090318e-07,
"logits/chosen": -2.4233272075653076,
"logits/rejected": -2.3791823387145996,
"logps/chosen": -268.5234375,
"logps/rejected": -215.85610961914062,
"loss": 0.6425,
"rewards/accuracies": 0.6953125,
"rewards/chosen": 0.03440080210566521,
"rewards/margins": 0.11941323429346085,
"rewards/rejected": -0.08501242101192474,
"step": 320
},
{
"epoch": 0.34,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": -2.4789443016052246,
"logits/rejected": -2.4048690795898438,
"logps/chosen": -266.273681640625,
"logps/rejected": -230.77957153320312,
"loss": 0.6404,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": 0.036819975823163986,
"rewards/margins": 0.12693271040916443,
"rewards/rejected": -0.09011274576187134,
"step": 330
},
{
"epoch": 0.35,
"learning_rate": 4.906238040566398e-07,
"logits/chosen": -2.4333298206329346,
"logits/rejected": -2.3658194541931152,
"logps/chosen": -252.1578826904297,
"logps/rejected": -221.54611206054688,
"loss": 0.6406,
"rewards/accuracies": 0.692187488079071,
"rewards/chosen": 0.02940729819238186,
"rewards/margins": 0.12888944149017334,
"rewards/rejected": -0.09948214888572693,
"step": 340
},
{
"epoch": 0.36,
"learning_rate": 4.887102946804438e-07,
"logits/chosen": -2.4523606300354004,
"logits/rejected": -2.388826847076416,
"logps/chosen": -263.3428039550781,
"logps/rejected": -218.86074829101562,
"loss": 0.6345,
"rewards/accuracies": 0.667187511920929,
"rewards/chosen": 0.022628765553236008,
"rewards/margins": 0.14625979959964752,
"rewards/rejected": -0.12363102287054062,
"step": 350
},
{
"epoch": 0.37,
"learning_rate": 4.867967853042479e-07,
"logits/chosen": -2.450970411300659,
"logits/rejected": -2.3968029022216797,
"logps/chosen": -267.2438049316406,
"logps/rejected": -222.943359375,
"loss": 0.6252,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": 0.03826092556118965,
"rewards/margins": 0.1685018539428711,
"rewards/rejected": -0.13024093210697174,
"step": 360
},
{
"epoch": 0.38,
"learning_rate": 4.84883275928052e-07,
"logits/chosen": -2.4290268421173096,
"logits/rejected": -2.417224645614624,
"logps/chosen": -266.91693115234375,
"logps/rejected": -235.718017578125,
"loss": 0.6357,
"rewards/accuracies": 0.660937488079071,
"rewards/chosen": 0.016346175223588943,
"rewards/margins": 0.1523023247718811,
"rewards/rejected": -0.13595613837242126,
"step": 370
},
{
"epoch": 0.39,
"learning_rate": 4.82969766551856e-07,
"logits/chosen": -2.412705421447754,
"logits/rejected": -2.374624013900757,
"logps/chosen": -261.72271728515625,
"logps/rejected": -229.1892547607422,
"loss": 0.635,
"rewards/accuracies": 0.6640625,
"rewards/chosen": 0.012038780376315117,
"rewards/margins": 0.15452785789966583,
"rewards/rejected": -0.14248906075954437,
"step": 380
},
{
"epoch": 0.4,
"learning_rate": 4.810562571756601e-07,
"logits/chosen": -2.453866720199585,
"logits/rejected": -2.3835487365722656,
"logps/chosen": -263.36273193359375,
"logps/rejected": -218.2621307373047,
"loss": 0.6147,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": 0.04369325190782547,
"rewards/margins": 0.19822198152542114,
"rewards/rejected": -0.15452872216701508,
"step": 390
},
{
"epoch": 0.41,
"learning_rate": 4.791427477994642e-07,
"logits/chosen": -2.446223258972168,
"logits/rejected": -2.4049808979034424,
"logps/chosen": -270.06732177734375,
"logps/rejected": -222.06930541992188,
"loss": 0.6228,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.036653708666563034,
"rewards/margins": 0.1898169219493866,
"rewards/rejected": -0.15316320955753326,
"step": 400
},
{
"epoch": 0.42,
"learning_rate": 4.772292384232682e-07,
"logits/chosen": -2.4419703483581543,
"logits/rejected": -2.409404754638672,
"logps/chosen": -261.24114990234375,
"logps/rejected": -230.6668701171875,
"loss": 0.627,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": 0.020566729828715324,
"rewards/margins": 0.1889314353466034,
"rewards/rejected": -0.1683647185564041,
"step": 410
},
{
"epoch": 0.43,
"learning_rate": 4.753157290470723e-07,
"logits/chosen": -2.4552600383758545,
"logits/rejected": -2.4268581867218018,
"logps/chosen": -276.3377990722656,
"logps/rejected": -226.77072143554688,
"loss": 0.6127,
"rewards/accuracies": 0.698437511920929,
"rewards/chosen": 0.02126963995397091,
"rewards/margins": 0.22278845310211182,
"rewards/rejected": -0.20151881873607635,
"step": 420
},
{
"epoch": 0.44,
"learning_rate": 4.7340221967087635e-07,
"logits/chosen": -2.413886308670044,
"logits/rejected": -2.3775603771209717,
"logps/chosen": -250.6868896484375,
"logps/rejected": -207.9862518310547,
"loss": 0.6152,
"rewards/accuracies": 0.682812511920929,
"rewards/chosen": 0.01281009428203106,
"rewards/margins": 0.20990212261676788,
"rewards/rejected": -0.19709204137325287,
"step": 430
},
{
"epoch": 0.45,
"learning_rate": 4.714887102946804e-07,
"logits/chosen": -2.4519877433776855,
"logits/rejected": -2.3820528984069824,
"logps/chosen": -274.48419189453125,
"logps/rejected": -226.1211395263672,
"loss": 0.6002,
"rewards/accuracies": 0.7265625,
"rewards/chosen": 0.03596062213182449,
"rewards/margins": 0.2432943880558014,
"rewards/rejected": -0.20733380317687988,
"step": 440
},
{
"epoch": 0.46,
"learning_rate": 4.6957520091848447e-07,
"logits/chosen": -2.4627573490142822,
"logits/rejected": -2.424017906188965,
"logps/chosen": -262.072509765625,
"logps/rejected": -227.0179443359375,
"loss": 0.6138,
"rewards/accuracies": 0.6640625,
"rewards/chosen": 0.0052271028980612755,
"rewards/margins": 0.22640076279640198,
"rewards/rejected": -0.22117361426353455,
"step": 450
},
{
"epoch": 0.48,
"learning_rate": 4.6766169154228853e-07,
"logits/chosen": -2.419257640838623,
"logits/rejected": -2.350435733795166,
"logps/chosen": -256.357421875,
"logps/rejected": -219.219970703125,
"loss": 0.6211,
"rewards/accuracies": 0.651562511920929,
"rewards/chosen": -0.010133005678653717,
"rewards/margins": 0.2125161588191986,
"rewards/rejected": -0.22264917194843292,
"step": 460
},
{
"epoch": 0.49,
"learning_rate": 4.657481821660926e-07,
"logits/chosen": -2.466386079788208,
"logits/rejected": -2.42856764793396,
"logps/chosen": -279.46905517578125,
"logps/rejected": -232.4190673828125,
"loss": 0.6076,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.01027261558920145,
"rewards/margins": 0.24917173385620117,
"rewards/rejected": -0.2388991117477417,
"step": 470
},
{
"epoch": 0.5,
"learning_rate": 4.6383467278989666e-07,
"logits/chosen": -2.3757636547088623,
"logits/rejected": -2.4026846885681152,
"logps/chosen": -262.5502624511719,
"logps/rejected": -227.03671264648438,
"loss": 0.5955,
"rewards/accuracies": 0.7109375,
"rewards/chosen": 0.036844007670879364,
"rewards/margins": 0.2827877700328827,
"rewards/rejected": -0.24594378471374512,
"step": 480
},
{
"epoch": 0.51,
"learning_rate": 4.6192116341370067e-07,
"logits/chosen": -2.4304065704345703,
"logits/rejected": -2.3491933345794678,
"logps/chosen": -263.8543395996094,
"logps/rejected": -223.7662353515625,
"loss": 0.5863,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.016529660671949387,
"rewards/margins": 0.30523234605789185,
"rewards/rejected": -0.28870272636413574,
"step": 490
},
{
"epoch": 0.52,
"learning_rate": 4.6000765403750473e-07,
"logits/chosen": -2.3874869346618652,
"logits/rejected": -2.369131088256836,
"logps/chosen": -273.19482421875,
"logps/rejected": -219.20993041992188,
"loss": 0.5999,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": 0.016616690903902054,
"rewards/margins": 0.27095186710357666,
"rewards/rejected": -0.2543351948261261,
"step": 500
},
{
"epoch": 0.53,
"learning_rate": 4.580941446613088e-07,
"logits/chosen": -2.432959794998169,
"logits/rejected": -2.4002068042755127,
"logps/chosen": -268.3512268066406,
"logps/rejected": -213.71481323242188,
"loss": 0.5767,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": 0.02091902121901512,
"rewards/margins": 0.3344683051109314,
"rewards/rejected": -0.3135492205619812,
"step": 510
},
{
"epoch": 0.54,
"learning_rate": 4.5618063528511285e-07,
"logits/chosen": -2.423337459564209,
"logits/rejected": -2.367553234100342,
"logps/chosen": -270.74822998046875,
"logps/rejected": -229.89859008789062,
"loss": 0.5871,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": 0.01809154823422432,
"rewards/margins": 0.32776904106140137,
"rewards/rejected": -0.30967751145362854,
"step": 520
},
{
"epoch": 0.55,
"learning_rate": 4.542671259089169e-07,
"logits/chosen": -2.407766103744507,
"logits/rejected": -2.3573992252349854,
"logps/chosen": -272.4050598144531,
"logps/rejected": -231.3518524169922,
"loss": 0.592,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": 0.009805982932448387,
"rewards/margins": 0.31897610425949097,
"rewards/rejected": -0.309170126914978,
"step": 530
},
{
"epoch": 0.56,
"learning_rate": 4.52353616532721e-07,
"logits/chosen": -2.438194990158081,
"logits/rejected": -2.3653242588043213,
"logps/chosen": -265.61004638671875,
"logps/rejected": -224.57614135742188,
"loss": 0.5941,
"rewards/accuracies": 0.692187488079071,
"rewards/chosen": -0.03684517741203308,
"rewards/margins": 0.3040197789669037,
"rewards/rejected": -0.34086498618125916,
"step": 540
},
{
"epoch": 0.57,
"learning_rate": 4.5044010715652504e-07,
"logits/chosen": -2.454868793487549,
"logits/rejected": -2.4135568141937256,
"logps/chosen": -262.34844970703125,
"logps/rejected": -231.43362426757812,
"loss": 0.598,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.0009088374790735543,
"rewards/margins": 0.2974187731742859,
"rewards/rejected": -0.29832762479782104,
"step": 550
},
{
"epoch": 0.58,
"learning_rate": 4.485265977803291e-07,
"logits/chosen": -2.378328800201416,
"logits/rejected": -2.380078077316284,
"logps/chosen": -268.5691833496094,
"logps/rejected": -231.4857940673828,
"loss": 0.5899,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.003624826669692993,
"rewards/margins": 0.3238300085067749,
"rewards/rejected": -0.3202051818370819,
"step": 560
},
{
"epoch": 0.59,
"learning_rate": 4.4661308840413316e-07,
"logits/chosen": -2.4507269859313965,
"logits/rejected": -2.4167442321777344,
"logps/chosen": -268.0180969238281,
"logps/rejected": -222.7704315185547,
"loss": 0.5739,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": 0.004687085747718811,
"rewards/margins": 0.3652251660823822,
"rewards/rejected": -0.3605380654335022,
"step": 570
},
{
"epoch": 0.6,
"learning_rate": 4.446995790279372e-07,
"logits/chosen": -2.4484550952911377,
"logits/rejected": -2.3759899139404297,
"logps/chosen": -270.5180969238281,
"logps/rejected": -231.528564453125,
"loss": 0.5729,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.012805086560547352,
"rewards/margins": 0.3726140558719635,
"rewards/rejected": -0.3598089814186096,
"step": 580
},
{
"epoch": 0.61,
"learning_rate": 4.4278606965174123e-07,
"logits/chosen": -2.441225528717041,
"logits/rejected": -2.3825039863586426,
"logps/chosen": -271.6439514160156,
"logps/rejected": -223.9091033935547,
"loss": 0.5831,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.006787353660911322,
"rewards/margins": 0.36242157220840454,
"rewards/rejected": -0.3692089319229126,
"step": 590
},
{
"epoch": 0.62,
"learning_rate": 4.408725602755453e-07,
"logits/chosen": -2.4532737731933594,
"logits/rejected": -2.4243369102478027,
"logps/chosen": -266.54681396484375,
"logps/rejected": -231.792236328125,
"loss": 0.5821,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.014241993427276611,
"rewards/margins": 0.35581323504447937,
"rewards/rejected": -0.3700551986694336,
"step": 600
},
{
"epoch": 0.63,
"learning_rate": 4.3895905089934936e-07,
"logits/chosen": -2.441526412963867,
"logits/rejected": -2.4337775707244873,
"logps/chosen": -280.2122497558594,
"logps/rejected": -237.7913360595703,
"loss": 0.5681,
"rewards/accuracies": 0.7109375,
"rewards/chosen": 0.010852565988898277,
"rewards/margins": 0.39948010444641113,
"rewards/rejected": -0.3886275291442871,
"step": 610
},
{
"epoch": 0.64,
"learning_rate": 4.370455415231534e-07,
"logits/chosen": -2.4393959045410156,
"logits/rejected": -2.3926703929901123,
"logps/chosen": -257.3842468261719,
"logps/rejected": -213.9781494140625,
"loss": 0.575,
"rewards/accuracies": 0.703125,
"rewards/chosen": 8.446201536571607e-05,
"rewards/margins": 0.40542498230934143,
"rewards/rejected": -0.4053404927253723,
"step": 620
},
{
"epoch": 0.65,
"learning_rate": 4.351320321469575e-07,
"logits/chosen": -2.4491591453552246,
"logits/rejected": -2.398932933807373,
"logps/chosen": -269.54095458984375,
"logps/rejected": -223.14892578125,
"loss": 0.5835,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.04456017538905144,
"rewards/margins": 0.3685937225818634,
"rewards/rejected": -0.41315382719039917,
"step": 630
},
{
"epoch": 0.66,
"learning_rate": 4.3321852277076154e-07,
"logits/chosen": -2.4231886863708496,
"logits/rejected": -2.3941125869750977,
"logps/chosen": -278.4034729003906,
"logps/rejected": -239.0082244873047,
"loss": 0.5597,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.026434283703565598,
"rewards/margins": 0.431147962808609,
"rewards/rejected": -0.4575822949409485,
"step": 640
},
{
"epoch": 0.67,
"learning_rate": 4.313050133945656e-07,
"logits/chosen": -2.4560158252716064,
"logits/rejected": -2.3756983280181885,
"logps/chosen": -274.8756103515625,
"logps/rejected": -228.38619995117188,
"loss": 0.5685,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.037291474640369415,
"rewards/margins": 0.42679017782211304,
"rewards/rejected": -0.46408161520957947,
"step": 650
},
{
"epoch": 0.68,
"learning_rate": 4.2939150401836967e-07,
"logits/chosen": -2.4025585651397705,
"logits/rejected": -2.362170934677124,
"logps/chosen": -279.28619384765625,
"logps/rejected": -235.0756378173828,
"loss": 0.5796,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.03398103266954422,
"rewards/margins": 0.38851290941238403,
"rewards/rejected": -0.42249393463134766,
"step": 660
},
{
"epoch": 0.69,
"learning_rate": 4.2747799464217373e-07,
"logits/chosen": -2.3948450088500977,
"logits/rejected": -2.380448818206787,
"logps/chosen": -265.87774658203125,
"logps/rejected": -221.30856323242188,
"loss": 0.5815,
"rewards/accuracies": 0.6890624761581421,
"rewards/chosen": -0.02471747435629368,
"rewards/margins": 0.38041889667510986,
"rewards/rejected": -0.4051364064216614,
"step": 670
},
{
"epoch": 0.7,
"learning_rate": 4.255644852659778e-07,
"logits/chosen": -2.428434371948242,
"logits/rejected": -2.3654212951660156,
"logps/chosen": -265.0726013183594,
"logps/rejected": -230.83480834960938,
"loss": 0.5846,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.058827854692935944,
"rewards/margins": 0.39134788513183594,
"rewards/rejected": -0.4501757025718689,
"step": 680
},
{
"epoch": 0.71,
"learning_rate": 4.236509758897818e-07,
"logits/chosen": -2.4110920429229736,
"logits/rejected": -2.3683831691741943,
"logps/chosen": -263.7908935546875,
"logps/rejected": -228.82852172851562,
"loss": 0.56,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.040988270193338394,
"rewards/margins": 0.4240415692329407,
"rewards/rejected": -0.4650298058986664,
"step": 690
},
{
"epoch": 0.72,
"learning_rate": 4.2173746651358586e-07,
"logits/chosen": -2.487917423248291,
"logits/rejected": -2.386542797088623,
"logps/chosen": -277.84173583984375,
"logps/rejected": -226.375732421875,
"loss": 0.5783,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.0548752136528492,
"rewards/margins": 0.41838139295578003,
"rewards/rejected": -0.4732566475868225,
"step": 700
},
{
"epoch": 0.73,
"learning_rate": 4.198239571373899e-07,
"logits/chosen": -2.4249427318573,
"logits/rejected": -2.397531270980835,
"logps/chosen": -280.85748291015625,
"logps/rejected": -239.29263305664062,
"loss": 0.5849,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.060692138969898224,
"rewards/margins": 0.412163645029068,
"rewards/rejected": -0.472855806350708,
"step": 710
},
{
"epoch": 0.74,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": -2.441291332244873,
"logits/rejected": -2.3523077964782715,
"logps/chosen": -252.23779296875,
"logps/rejected": -217.0404510498047,
"loss": 0.5835,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.06024731323122978,
"rewards/margins": 0.37643498182296753,
"rewards/rejected": -0.4366822838783264,
"step": 720
},
{
"epoch": 0.75,
"learning_rate": 4.1599693838499805e-07,
"logits/chosen": -2.3433680534362793,
"logits/rejected": -2.340506076812744,
"logps/chosen": -248.30313110351562,
"logps/rejected": -215.12033081054688,
"loss": 0.5863,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.08882372081279755,
"rewards/margins": 0.3825678527355194,
"rewards/rejected": -0.47139161825180054,
"step": 730
},
{
"epoch": 0.76,
"learning_rate": 4.140834290088021e-07,
"logits/chosen": -2.3932948112487793,
"logits/rejected": -2.3713467121124268,
"logps/chosen": -246.66860961914062,
"logps/rejected": -206.83572387695312,
"loss": 0.5667,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.05001844838261604,
"rewards/margins": 0.4407244324684143,
"rewards/rejected": -0.49074286222457886,
"step": 740
},
{
"epoch": 0.77,
"learning_rate": 4.121699196326062e-07,
"logits/chosen": -2.3883261680603027,
"logits/rejected": -2.3786678314208984,
"logps/chosen": -266.27886962890625,
"logps/rejected": -225.5245819091797,
"loss": 0.5482,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.022055109962821007,
"rewards/margins": 0.4830406606197357,
"rewards/rejected": -0.5050958395004272,
"step": 750
},
{
"epoch": 0.78,
"learning_rate": 4.1025641025641024e-07,
"logits/chosen": -2.4633374214172363,
"logits/rejected": -2.3861355781555176,
"logps/chosen": -269.9394836425781,
"logps/rejected": -229.15283203125,
"loss": 0.5616,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.0641530454158783,
"rewards/margins": 0.4700705409049988,
"rewards/rejected": -0.5342236161231995,
"step": 760
},
{
"epoch": 0.8,
"learning_rate": 4.083429008802143e-07,
"logits/chosen": -2.394104480743408,
"logits/rejected": -2.379462718963623,
"logps/chosen": -265.15008544921875,
"logps/rejected": -216.9358367919922,
"loss": 0.5678,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.05918584018945694,
"rewards/margins": 0.4691968560218811,
"rewards/rejected": -0.5283826589584351,
"step": 770
},
{
"epoch": 0.81,
"learning_rate": 4.0642939150401836e-07,
"logits/chosen": -2.439371109008789,
"logits/rejected": -2.3606810569763184,
"logps/chosen": -263.8412780761719,
"logps/rejected": -222.4286346435547,
"loss": 0.5684,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.07698424160480499,
"rewards/margins": 0.44548702239990234,
"rewards/rejected": -0.5224713087081909,
"step": 780
},
{
"epoch": 0.82,
"learning_rate": 4.0451588212782237e-07,
"logits/chosen": -2.4030070304870605,
"logits/rejected": -2.3470616340637207,
"logps/chosen": -277.69207763671875,
"logps/rejected": -241.84603881835938,
"loss": 0.5782,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.08946071565151215,
"rewards/margins": 0.44598451256752014,
"rewards/rejected": -0.5354452729225159,
"step": 790
},
{
"epoch": 0.83,
"learning_rate": 4.0260237275162643e-07,
"logits/chosen": -2.4240808486938477,
"logits/rejected": -2.379459857940674,
"logps/chosen": -267.9504699707031,
"logps/rejected": -237.7564239501953,
"loss": 0.5631,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1054706797003746,
"rewards/margins": 0.47193509340286255,
"rewards/rejected": -0.5774057507514954,
"step": 800
},
{
"epoch": 0.84,
"learning_rate": 4.006888633754305e-07,
"logits/chosen": -2.4196529388427734,
"logits/rejected": -2.378551959991455,
"logps/chosen": -265.9372253417969,
"logps/rejected": -226.904296875,
"loss": 0.5845,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": -0.06137767434120178,
"rewards/margins": 0.43556785583496094,
"rewards/rejected": -0.4969455301761627,
"step": 810
},
{
"epoch": 0.85,
"learning_rate": 3.9877535399923456e-07,
"logits/chosen": -2.418703317642212,
"logits/rejected": -2.3797688484191895,
"logps/chosen": -272.62408447265625,
"logps/rejected": -233.73880004882812,
"loss": 0.5658,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.07619120925664902,
"rewards/margins": 0.48392266035079956,
"rewards/rejected": -0.5601138472557068,
"step": 820
},
{
"epoch": 0.86,
"learning_rate": 3.968618446230386e-07,
"logits/chosen": -2.4377353191375732,
"logits/rejected": -2.3761370182037354,
"logps/chosen": -264.58331298828125,
"logps/rejected": -227.14138793945312,
"loss": 0.5727,
"rewards/accuracies": 0.692187488079071,
"rewards/chosen": -0.07739187777042389,
"rewards/margins": 0.45683830976486206,
"rewards/rejected": -0.5342302918434143,
"step": 830
},
{
"epoch": 0.87,
"learning_rate": 3.949483352468427e-07,
"logits/chosen": -2.4207470417022705,
"logits/rejected": -2.3555076122283936,
"logps/chosen": -254.27468872070312,
"logps/rejected": -230.114013671875,
"loss": 0.5806,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.09545855224132538,
"rewards/margins": 0.434993177652359,
"rewards/rejected": -0.5304517149925232,
"step": 840
},
{
"epoch": 0.88,
"learning_rate": 3.9303482587064674e-07,
"logits/chosen": -2.408648729324341,
"logits/rejected": -2.392381429672241,
"logps/chosen": -268.1350402832031,
"logps/rejected": -232.40829467773438,
"loss": 0.5628,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.10538534820079803,
"rewards/margins": 0.4874357283115387,
"rewards/rejected": -0.5928210616111755,
"step": 850
},
{
"epoch": 0.89,
"learning_rate": 3.911213164944508e-07,
"logits/chosen": -2.4571163654327393,
"logits/rejected": -2.4369616508483887,
"logps/chosen": -260.79400634765625,
"logps/rejected": -234.09521484375,
"loss": 0.5797,
"rewards/accuracies": 0.7109375,
"rewards/chosen": -0.08784712105989456,
"rewards/margins": 0.4429641664028168,
"rewards/rejected": -0.5308112502098083,
"step": 860
},
{
"epoch": 0.9,
"learning_rate": 3.8920780711825487e-07,
"logits/chosen": -2.4167487621307373,
"logits/rejected": -2.3556063175201416,
"logps/chosen": -264.9407653808594,
"logps/rejected": -234.77792358398438,
"loss": 0.5627,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": -0.09353096783161163,
"rewards/margins": 0.47954684495925903,
"rewards/rejected": -0.5730777978897095,
"step": 870
},
{
"epoch": 0.91,
"learning_rate": 3.8729429774205893e-07,
"logits/chosen": -2.4163200855255127,
"logits/rejected": -2.3464267253875732,
"logps/chosen": -258.3912048339844,
"logps/rejected": -219.3795623779297,
"loss": 0.5831,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.13110245764255524,
"rewards/margins": 0.43385767936706543,
"rewards/rejected": -0.5649601221084595,
"step": 880
},
{
"epoch": 0.92,
"learning_rate": 3.8538078836586294e-07,
"logits/chosen": -2.4255287647247314,
"logits/rejected": -2.355109453201294,
"logps/chosen": -283.6699523925781,
"logps/rejected": -235.8437957763672,
"loss": 0.5491,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.07336901128292084,
"rewards/margins": 0.5329583883285522,
"rewards/rejected": -0.6063274145126343,
"step": 890
},
{
"epoch": 0.93,
"learning_rate": 3.83467278989667e-07,
"logits/chosen": -2.44077205657959,
"logits/rejected": -2.361696481704712,
"logps/chosen": -273.6830749511719,
"logps/rejected": -237.58401489257812,
"loss": 0.5821,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.09832929819822311,
"rewards/margins": 0.45769548416137695,
"rewards/rejected": -0.5560247302055359,
"step": 900
},
{
"epoch": 0.94,
"learning_rate": 3.8155376961347106e-07,
"logits/chosen": -2.424898624420166,
"logits/rejected": -2.3718905448913574,
"logps/chosen": -268.20269775390625,
"logps/rejected": -231.1397247314453,
"loss": 0.5609,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.0910768061876297,
"rewards/margins": 0.4992894232273102,
"rewards/rejected": -0.5903662443161011,
"step": 910
},
{
"epoch": 0.95,
"learning_rate": 3.796402602372751e-07,
"logits/chosen": -2.4363582134246826,
"logits/rejected": -2.377957582473755,
"logps/chosen": -276.6056213378906,
"logps/rejected": -233.3043975830078,
"loss": 0.5433,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.05291268229484558,
"rewards/margins": 0.5536981821060181,
"rewards/rejected": -0.606610894203186,
"step": 920
},
{
"epoch": 0.96,
"learning_rate": 3.777267508610792e-07,
"logits/chosen": -2.3768372535705566,
"logits/rejected": -2.393470287322998,
"logps/chosen": -272.34576416015625,
"logps/rejected": -224.0140838623047,
"loss": 0.5594,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.10874155908823013,
"rewards/margins": 0.5176196694374084,
"rewards/rejected": -0.6263612508773804,
"step": 930
},
{
"epoch": 0.97,
"learning_rate": 3.7581324148488325e-07,
"logits/chosen": -2.4112918376922607,
"logits/rejected": -2.388984441757202,
"logps/chosen": -273.1308898925781,
"logps/rejected": -239.2963409423828,
"loss": 0.5473,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.1055203229188919,
"rewards/margins": 0.545116126537323,
"rewards/rejected": -0.6506363749504089,
"step": 940
},
{
"epoch": 0.98,
"learning_rate": 3.738997321086873e-07,
"logits/chosen": -2.3986384868621826,
"logits/rejected": -2.3761980533599854,
"logps/chosen": -268.1097106933594,
"logps/rejected": -234.03213500976562,
"loss": 0.5697,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": -0.10402411222457886,
"rewards/margins": 0.4980190396308899,
"rewards/rejected": -0.6020431518554688,
"step": 950
},
{
"epoch": 0.99,
"learning_rate": 3.7198622273249137e-07,
"logits/chosen": -2.408790111541748,
"logits/rejected": -2.36995267868042,
"logps/chosen": -284.46514892578125,
"logps/rejected": -238.64352416992188,
"loss": 0.5648,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": -0.10631656646728516,
"rewards/margins": 0.5066617131233215,
"rewards/rejected": -0.6129782795906067,
"step": 960
},
{
"epoch": 1.0,
"eval_logits/chosen": -2.0685439109802246,
"eval_logits/rejected": -2.015035390853882,
"eval_logps/chosen": -265.6653137207031,
"eval_logps/rejected": -230.13473510742188,
"eval_loss": 0.5547605752944946,
"eval_rewards/accuracies": 0.7114999890327454,
"eval_rewards/chosen": -0.11070162057876587,
"eval_rewards/margins": 0.5359883904457092,
"eval_rewards/rejected": -0.6466900110244751,
"eval_runtime": 1658.2887,
"eval_samples_per_second": 1.206,
"eval_steps_per_second": 0.302,
"step": 968
},
{
"epoch": 1.0,
"learning_rate": 3.7007271335629544e-07,
"logits/chosen": -2.44268798828125,
"logits/rejected": -2.3829662799835205,
"logps/chosen": -269.1377868652344,
"logps/rejected": -230.5484161376953,
"loss": 0.5465,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.06904083490371704,
"rewards/margins": 0.5532599091529846,
"rewards/rejected": -0.6223007440567017,
"step": 970
},
{
"epoch": 1.01,
"learning_rate": 3.681592039800995e-07,
"logits/chosen": -2.417346715927124,
"logits/rejected": -2.347588062286377,
"logps/chosen": -267.7151184082031,
"logps/rejected": -227.2200469970703,
"loss": 0.5439,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.10437774658203125,
"rewards/margins": 0.5515931844711304,
"rewards/rejected": -0.6559709310531616,
"step": 980
},
{
"epoch": 1.02,
"learning_rate": 3.662456946039035e-07,
"logits/chosen": -2.4241156578063965,
"logits/rejected": -2.3838791847229004,
"logps/chosen": -277.7347717285156,
"logps/rejected": -225.5718994140625,
"loss": 0.5427,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.09920702129602432,
"rewards/margins": 0.5613608360290527,
"rewards/rejected": -0.6605678796768188,
"step": 990
},
{
"epoch": 1.03,
"learning_rate": 3.6433218522770757e-07,
"logits/chosen": -2.432286024093628,
"logits/rejected": -2.350795030593872,
"logps/chosen": -280.2424621582031,
"logps/rejected": -238.3251495361328,
"loss": 0.5514,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.10347769409418106,
"rewards/margins": 0.5584946870803833,
"rewards/rejected": -0.6619724035263062,
"step": 1000
},
{
"epoch": 1.04,
"learning_rate": 3.6241867585151163e-07,
"logits/chosen": -2.402839183807373,
"logits/rejected": -2.3319945335388184,
"logps/chosen": -258.05889892578125,
"logps/rejected": -223.97216796875,
"loss": 0.5708,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.13955342769622803,
"rewards/margins": 0.5002504587173462,
"rewards/rejected": -0.6398038268089294,
"step": 1010
},
{
"epoch": 1.05,
"learning_rate": 3.605051664753157e-07,
"logits/chosen": -2.428884267807007,
"logits/rejected": -2.35528826713562,
"logps/chosen": -272.50457763671875,
"logps/rejected": -231.6855010986328,
"loss": 0.5459,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.09094850718975067,
"rewards/margins": 0.5691753029823303,
"rewards/rejected": -0.6601237058639526,
"step": 1020
},
{
"epoch": 1.06,
"learning_rate": 3.5859165709911975e-07,
"logits/chosen": -2.4268717765808105,
"logits/rejected": -2.349578380584717,
"logps/chosen": -275.3159484863281,
"logps/rejected": -223.78414916992188,
"loss": 0.5457,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.07625160366296768,
"rewards/margins": 0.5891859531402588,
"rewards/rejected": -0.6654375791549683,
"step": 1030
},
{
"epoch": 1.07,
"learning_rate": 3.566781477229238e-07,
"logits/chosen": -2.3932693004608154,
"logits/rejected": -2.3574328422546387,
"logps/chosen": -256.0076599121094,
"logps/rejected": -222.4263458251953,
"loss": 0.5746,
"rewards/accuracies": 0.698437511920929,
"rewards/chosen": -0.1472007781267166,
"rewards/margins": 0.5160545706748962,
"rewards/rejected": -0.6632553339004517,
"step": 1040
},
{
"epoch": 1.08,
"learning_rate": 3.547646383467279e-07,
"logits/chosen": -2.3803439140319824,
"logits/rejected": -2.3705551624298096,
"logps/chosen": -261.7268371582031,
"logps/rejected": -223.354736328125,
"loss": 0.5494,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.09807038307189941,
"rewards/margins": 0.5811691880226135,
"rewards/rejected": -0.6792395114898682,
"step": 1050
},
{
"epoch": 1.09,
"learning_rate": 3.5285112897053194e-07,
"logits/chosen": -2.4154138565063477,
"logits/rejected": -2.3943445682525635,
"logps/chosen": -280.23260498046875,
"logps/rejected": -242.3921661376953,
"loss": 0.5691,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.12113519757986069,
"rewards/margins": 0.510390043258667,
"rewards/rejected": -0.6315252184867859,
"step": 1060
},
{
"epoch": 1.11,
"learning_rate": 3.50937619594336e-07,
"logits/chosen": -2.400869131088257,
"logits/rejected": -2.3419127464294434,
"logps/chosen": -269.11322021484375,
"logps/rejected": -218.6542510986328,
"loss": 0.5401,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.11252293735742569,
"rewards/margins": 0.6148664355278015,
"rewards/rejected": -0.7273894548416138,
"step": 1070
},
{
"epoch": 1.12,
"learning_rate": 3.4902411021814007e-07,
"logits/chosen": -2.398010492324829,
"logits/rejected": -2.373257875442505,
"logps/chosen": -261.5543212890625,
"logps/rejected": -220.818603515625,
"loss": 0.5628,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.12159077823162079,
"rewards/margins": 0.5545440912246704,
"rewards/rejected": -0.6761348843574524,
"step": 1080
},
{
"epoch": 1.13,
"learning_rate": 3.4711060084194413e-07,
"logits/chosen": -2.398470401763916,
"logits/rejected": -2.3667078018188477,
"logps/chosen": -265.3479309082031,
"logps/rejected": -223.44967651367188,
"loss": 0.5444,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.13141986727714539,
"rewards/margins": 0.5646113157272339,
"rewards/rejected": -0.6960310935974121,
"step": 1090
},
{
"epoch": 1.14,
"learning_rate": 3.4519709146574814e-07,
"logits/chosen": -2.400993824005127,
"logits/rejected": -2.34995698928833,
"logps/chosen": -261.8519287109375,
"logps/rejected": -225.39340209960938,
"loss": 0.5674,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.14484994113445282,
"rewards/margins": 0.5336871147155762,
"rewards/rejected": -0.6785370707511902,
"step": 1100
},
{
"epoch": 1.15,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": -2.4596643447875977,
"logits/rejected": -2.3810102939605713,
"logps/chosen": -277.64697265625,
"logps/rejected": -240.9037322998047,
"loss": 0.5414,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.0647854283452034,
"rewards/margins": 0.6014553308486938,
"rewards/rejected": -0.6662408113479614,
"step": 1110
},
{
"epoch": 1.16,
"learning_rate": 3.4137007271335626e-07,
"logits/chosen": -2.4132676124572754,
"logits/rejected": -2.3595833778381348,
"logps/chosen": -269.6545104980469,
"logps/rejected": -220.05996704101562,
"loss": 0.5393,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.1037089005112648,
"rewards/margins": 0.624849796295166,
"rewards/rejected": -0.7285586595535278,
"step": 1120
},
{
"epoch": 1.17,
"learning_rate": 3.394565633371603e-07,
"logits/chosen": -2.4248127937316895,
"logits/rejected": -2.3464858531951904,
"logps/chosen": -280.0205078125,
"logps/rejected": -239.3312225341797,
"loss": 0.5435,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.0879250168800354,
"rewards/margins": 0.6255140900611877,
"rewards/rejected": -0.7134391069412231,
"step": 1130
},
{
"epoch": 1.18,
"learning_rate": 3.375430539609644e-07,
"logits/chosen": -2.3959927558898926,
"logits/rejected": -2.3690690994262695,
"logps/chosen": -256.87310791015625,
"logps/rejected": -227.3232879638672,
"loss": 0.5591,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.12515565752983093,
"rewards/margins": 0.5448717474937439,
"rewards/rejected": -0.6700273752212524,
"step": 1140
},
{
"epoch": 1.19,
"learning_rate": 3.3562954458476845e-07,
"logits/chosen": -2.349555492401123,
"logits/rejected": -2.3227591514587402,
"logps/chosen": -253.9552459716797,
"logps/rejected": -217.4364776611328,
"loss": 0.5485,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.11552216857671738,
"rewards/margins": 0.5672619342803955,
"rewards/rejected": -0.6827840805053711,
"step": 1150
},
{
"epoch": 1.2,
"learning_rate": 3.337160352085725e-07,
"logits/chosen": -2.403165340423584,
"logits/rejected": -2.312551975250244,
"logps/chosen": -266.38970947265625,
"logps/rejected": -219.06478881835938,
"loss": 0.5408,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.11588382720947266,
"rewards/margins": 0.6109603643417358,
"rewards/rejected": -0.7268441915512085,
"step": 1160
},
{
"epoch": 1.21,
"learning_rate": 3.3180252583237657e-07,
"logits/chosen": -2.426140308380127,
"logits/rejected": -2.364060640335083,
"logps/chosen": -277.64935302734375,
"logps/rejected": -233.41964721679688,
"loss": 0.5449,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.15606358647346497,
"rewards/margins": 0.5846539735794067,
"rewards/rejected": -0.7407175302505493,
"step": 1170
},
{
"epoch": 1.22,
"learning_rate": 3.2988901645618063e-07,
"logits/chosen": -2.4718017578125,
"logits/rejected": -2.4017224311828613,
"logps/chosen": -267.055908203125,
"logps/rejected": -247.3446044921875,
"loss": 0.5608,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.139754980802536,
"rewards/margins": 0.585044801235199,
"rewards/rejected": -0.7247998714447021,
"step": 1180
},
{
"epoch": 1.23,
"learning_rate": 3.279755070799847e-07,
"logits/chosen": -2.3561863899230957,
"logits/rejected": -2.336796522140503,
"logps/chosen": -260.29742431640625,
"logps/rejected": -226.8851776123047,
"loss": 0.5631,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": -0.1206628829240799,
"rewards/margins": 0.5734033584594727,
"rewards/rejected": -0.6940661668777466,
"step": 1190
},
{
"epoch": 1.24,
"learning_rate": 3.260619977037887e-07,
"logits/chosen": -2.383582353591919,
"logits/rejected": -2.350806713104248,
"logps/chosen": -261.0791015625,
"logps/rejected": -222.1552734375,
"loss": 0.5274,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.11264900863170624,
"rewards/margins": 0.6694163084030151,
"rewards/rejected": -0.7820653915405273,
"step": 1200
},
{
"epoch": 1.25,
"learning_rate": 3.2414848832759277e-07,
"logits/chosen": -2.3713862895965576,
"logits/rejected": -2.331512928009033,
"logps/chosen": -257.71209716796875,
"logps/rejected": -222.45755004882812,
"loss": 0.5578,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.14970478415489197,
"rewards/margins": 0.5587034821510315,
"rewards/rejected": -0.7084082365036011,
"step": 1210
},
{
"epoch": 1.26,
"learning_rate": 3.2223497895139683e-07,
"logits/chosen": -2.3780715465545654,
"logits/rejected": -2.3363735675811768,
"logps/chosen": -272.34039306640625,
"logps/rejected": -232.05892944335938,
"loss": 0.5362,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.11652742326259613,
"rewards/margins": 0.6240389347076416,
"rewards/rejected": -0.7405663728713989,
"step": 1220
},
{
"epoch": 1.27,
"learning_rate": 3.203214695752009e-07,
"logits/chosen": -2.4418883323669434,
"logits/rejected": -2.374427080154419,
"logps/chosen": -270.23785400390625,
"logps/rejected": -228.5772247314453,
"loss": 0.5371,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.14697889983654022,
"rewards/margins": 0.6358888149261475,
"rewards/rejected": -0.7828676700592041,
"step": 1230
},
{
"epoch": 1.28,
"learning_rate": 3.1840796019900495e-07,
"logits/chosen": -2.321744203567505,
"logits/rejected": -2.3201723098754883,
"logps/chosen": -244.7095184326172,
"logps/rejected": -224.6121368408203,
"loss": 0.5635,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1834283024072647,
"rewards/margins": 0.5532564520835876,
"rewards/rejected": -0.7366846799850464,
"step": 1240
},
{
"epoch": 1.29,
"learning_rate": 3.16494450822809e-07,
"logits/chosen": -2.392040967941284,
"logits/rejected": -2.3873581886291504,
"logps/chosen": -260.9786682128906,
"logps/rejected": -217.41488647460938,
"loss": 0.5285,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.10610403120517731,
"rewards/margins": 0.6539296507835388,
"rewards/rejected": -0.7600336074829102,
"step": 1250
},
{
"epoch": 1.3,
"learning_rate": 3.145809414466131e-07,
"logits/chosen": -2.4131011962890625,
"logits/rejected": -2.3864870071411133,
"logps/chosen": -268.1712951660156,
"logps/rejected": -243.2780303955078,
"loss": 0.5451,
"rewards/accuracies": 0.7015625238418579,
"rewards/chosen": -0.14184710383415222,
"rewards/margins": 0.5766977071762085,
"rewards/rejected": -0.7185447216033936,
"step": 1260
},
{
"epoch": 1.31,
"learning_rate": 3.1266743207041714e-07,
"logits/chosen": -2.4226272106170654,
"logits/rejected": -2.3552260398864746,
"logps/chosen": -275.0395812988281,
"logps/rejected": -246.27993774414062,
"loss": 0.5488,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.08415131270885468,
"rewards/margins": 0.5906020402908325,
"rewards/rejected": -0.6747534275054932,
"step": 1270
},
{
"epoch": 1.32,
"learning_rate": 3.107539226942212e-07,
"logits/chosen": -2.384654998779297,
"logits/rejected": -2.3474934101104736,
"logps/chosen": -280.5086364746094,
"logps/rejected": -249.03744506835938,
"loss": 0.5093,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.09258462488651276,
"rewards/margins": 0.6895312070846558,
"rewards/rejected": -0.7821158170700073,
"step": 1280
},
{
"epoch": 1.33,
"learning_rate": 3.0884041331802526e-07,
"logits/chosen": -2.3851654529571533,
"logits/rejected": -2.3415586948394775,
"logps/chosen": -275.87249755859375,
"logps/rejected": -238.9077606201172,
"loss": 0.5182,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.12925606966018677,
"rewards/margins": 0.6958507299423218,
"rewards/rejected": -0.8251067399978638,
"step": 1290
},
{
"epoch": 1.34,
"learning_rate": 3.0692690394182927e-07,
"logits/chosen": -2.347446918487549,
"logits/rejected": -2.3569579124450684,
"logps/chosen": -254.8171844482422,
"logps/rejected": -225.75048828125,
"loss": 0.5475,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.12040869146585464,
"rewards/margins": 0.6625940203666687,
"rewards/rejected": -0.7830026745796204,
"step": 1300
},
{
"epoch": 1.35,
"learning_rate": 3.0501339456563334e-07,
"logits/chosen": -2.3930153846740723,
"logits/rejected": -2.3339743614196777,
"logps/chosen": -282.39208984375,
"logps/rejected": -241.4809112548828,
"loss": 0.5539,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.15331654250621796,
"rewards/margins": 0.5918524861335754,
"rewards/rejected": -0.7451690435409546,
"step": 1310
},
{
"epoch": 1.36,
"learning_rate": 3.030998851894374e-07,
"logits/chosen": -2.4213199615478516,
"logits/rejected": -2.372786045074463,
"logps/chosen": -277.1365661621094,
"logps/rejected": -231.59835815429688,
"loss": 0.5408,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.11530748754739761,
"rewards/margins": 0.6329683065414429,
"rewards/rejected": -0.7482757568359375,
"step": 1320
},
{
"epoch": 1.37,
"learning_rate": 3.0118637581324146e-07,
"logits/chosen": -2.3494033813476562,
"logits/rejected": -2.2917075157165527,
"logps/chosen": -283.3105773925781,
"logps/rejected": -236.96218872070312,
"loss": 0.5337,
"rewards/accuracies": 0.754687488079071,
"rewards/chosen": -0.16250093281269073,
"rewards/margins": 0.6374029517173767,
"rewards/rejected": -0.7999038696289062,
"step": 1330
},
{
"epoch": 1.38,
"learning_rate": 2.992728664370455e-07,
"logits/chosen": -2.410579204559326,
"logits/rejected": -2.395461320877075,
"logps/chosen": -265.9371643066406,
"logps/rejected": -225.1396484375,
"loss": 0.5493,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.15022125840187073,
"rewards/margins": 0.6349747776985168,
"rewards/rejected": -0.78519606590271,
"step": 1340
},
{
"epoch": 1.39,
"learning_rate": 2.973593570608496e-07,
"logits/chosen": -2.354858636856079,
"logits/rejected": -2.333374500274658,
"logps/chosen": -267.4329528808594,
"logps/rejected": -229.37033081054688,
"loss": 0.5584,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": -0.13673868775367737,
"rewards/margins": 0.6077600121498108,
"rewards/rejected": -0.7444987297058105,
"step": 1350
},
{
"epoch": 1.4,
"learning_rate": 2.9544584768465365e-07,
"logits/chosen": -2.4165000915527344,
"logits/rejected": -2.389608860015869,
"logps/chosen": -256.08953857421875,
"logps/rejected": -234.6078643798828,
"loss": 0.5434,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.11967863142490387,
"rewards/margins": 0.641726553440094,
"rewards/rejected": -0.7614051699638367,
"step": 1360
},
{
"epoch": 1.41,
"learning_rate": 2.935323383084577e-07,
"logits/chosen": -2.441561460494995,
"logits/rejected": -2.385502338409424,
"logps/chosen": -276.8068542480469,
"logps/rejected": -226.84146118164062,
"loss": 0.56,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.18507501482963562,
"rewards/margins": 0.5834146738052368,
"rewards/rejected": -0.7684897780418396,
"step": 1370
},
{
"epoch": 1.43,
"learning_rate": 2.9161882893226177e-07,
"logits/chosen": -2.4208102226257324,
"logits/rejected": -2.371891498565674,
"logps/chosen": -267.7480163574219,
"logps/rejected": -228.93954467773438,
"loss": 0.5457,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.12237964570522308,
"rewards/margins": 0.6243287324905396,
"rewards/rejected": -0.746708333492279,
"step": 1380
},
{
"epoch": 1.44,
"learning_rate": 2.8970531955606583e-07,
"logits/chosen": -2.4296679496765137,
"logits/rejected": -2.384805202484131,
"logps/chosen": -269.50341796875,
"logps/rejected": -226.89120483398438,
"loss": 0.5158,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.11908123642206192,
"rewards/margins": 0.7121685743331909,
"rewards/rejected": -0.8312498331069946,
"step": 1390
},
{
"epoch": 1.45,
"learning_rate": 2.8779181017986984e-07,
"logits/chosen": -2.3762552738189697,
"logits/rejected": -2.3503944873809814,
"logps/chosen": -265.67657470703125,
"logps/rejected": -227.09695434570312,
"loss": 0.5772,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -0.15753719210624695,
"rewards/margins": 0.5870577692985535,
"rewards/rejected": -0.744594931602478,
"step": 1400
},
{
"epoch": 1.46,
"learning_rate": 2.858783008036739e-07,
"logits/chosen": -2.433023452758789,
"logits/rejected": -2.39740252494812,
"logps/chosen": -254.3402862548828,
"logps/rejected": -216.28042602539062,
"loss": 0.5347,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.11956797540187836,
"rewards/margins": 0.6502863168716431,
"rewards/rejected": -0.7698543667793274,
"step": 1410
},
{
"epoch": 1.47,
"learning_rate": 2.8396479142747797e-07,
"logits/chosen": -2.3455922603607178,
"logits/rejected": -2.367486000061035,
"logps/chosen": -271.64971923828125,
"logps/rejected": -226.45278930664062,
"loss": 0.5615,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.12722182273864746,
"rewards/margins": 0.5968899726867676,
"rewards/rejected": -0.7241117358207703,
"step": 1420
},
{
"epoch": 1.48,
"learning_rate": 2.8205128205128203e-07,
"logits/chosen": -2.437638759613037,
"logits/rejected": -2.3999106884002686,
"logps/chosen": -278.81402587890625,
"logps/rejected": -233.3411407470703,
"loss": 0.5484,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": -0.16687321662902832,
"rewards/margins": 0.6460558176040649,
"rewards/rejected": -0.8129289746284485,
"step": 1430
},
{
"epoch": 1.49,
"learning_rate": 2.801377726750861e-07,
"logits/chosen": -2.4091131687164307,
"logits/rejected": -2.3717777729034424,
"logps/chosen": -259.6352844238281,
"logps/rejected": -234.7488250732422,
"loss": 0.5472,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.12939931452274323,
"rewards/margins": 0.6247848272323608,
"rewards/rejected": -0.7541841268539429,
"step": 1440
},
{
"epoch": 1.5,
"learning_rate": 2.7822426329889015e-07,
"logits/chosen": -2.3630013465881348,
"logits/rejected": -2.3308510780334473,
"logps/chosen": -265.0796813964844,
"logps/rejected": -235.9908905029297,
"loss": 0.5262,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.12796229124069214,
"rewards/margins": 0.6874942779541016,
"rewards/rejected": -0.8154565691947937,
"step": 1450
},
{
"epoch": 1.51,
"learning_rate": 2.763107539226942e-07,
"logits/chosen": -2.384431838989258,
"logits/rejected": -2.3531229496002197,
"logps/chosen": -273.24407958984375,
"logps/rejected": -237.3506622314453,
"loss": 0.5534,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.13397890329360962,
"rewards/margins": 0.6210099458694458,
"rewards/rejected": -0.7549887895584106,
"step": 1460
},
{
"epoch": 1.52,
"learning_rate": 2.743972445464983e-07,
"logits/chosen": -2.3733363151550293,
"logits/rejected": -2.3497159481048584,
"logps/chosen": -256.4417419433594,
"logps/rejected": -218.98245239257812,
"loss": 0.5466,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.10995250940322876,
"rewards/margins": 0.625704288482666,
"rewards/rejected": -0.7356568574905396,
"step": 1470
},
{
"epoch": 1.53,
"learning_rate": 2.7248373517030234e-07,
"logits/chosen": -2.391806125640869,
"logits/rejected": -2.3594701290130615,
"logps/chosen": -261.59954833984375,
"logps/rejected": -222.4551544189453,
"loss": 0.5685,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.1836286336183548,
"rewards/margins": 0.6219319105148315,
"rewards/rejected": -0.8055604696273804,
"step": 1480
},
{
"epoch": 1.54,
"learning_rate": 2.705702257941064e-07,
"logits/chosen": -2.395291805267334,
"logits/rejected": -2.335513114929199,
"logps/chosen": -271.76495361328125,
"logps/rejected": -223.66604614257812,
"loss": 0.5284,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.13636186718940735,
"rewards/margins": 0.6964784860610962,
"rewards/rejected": -0.8328403234481812,
"step": 1490
},
{
"epoch": 1.55,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": -2.409865140914917,
"logits/rejected": -2.343632936477661,
"logps/chosen": -261.92218017578125,
"logps/rejected": -225.1486358642578,
"loss": 0.528,
"rewards/accuracies": 0.7578125,
"rewards/chosen": -0.10242755711078644,
"rewards/margins": 0.660239577293396,
"rewards/rejected": -0.7626670598983765,
"step": 1500
},
{
"epoch": 1.56,
"learning_rate": 2.6674320704171447e-07,
"logits/chosen": -2.3943867683410645,
"logits/rejected": -2.34450101852417,
"logps/chosen": -267.44940185546875,
"logps/rejected": -227.99658203125,
"loss": 0.539,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.15267999470233917,
"rewards/margins": 0.6513444185256958,
"rewards/rejected": -0.8040245175361633,
"step": 1510
},
{
"epoch": 1.57,
"learning_rate": 2.6482969766551853e-07,
"logits/chosen": -2.3414573669433594,
"logits/rejected": -2.3219292163848877,
"logps/chosen": -263.6014709472656,
"logps/rejected": -227.2323760986328,
"loss": 0.5145,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.12107870727777481,
"rewards/margins": 0.7130403518676758,
"rewards/rejected": -0.8341191411018372,
"step": 1520
},
{
"epoch": 1.58,
"learning_rate": 2.629161882893226e-07,
"logits/chosen": -2.4271631240844727,
"logits/rejected": -2.3703784942626953,
"logps/chosen": -279.36041259765625,
"logps/rejected": -239.1962127685547,
"loss": 0.5668,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.17160965502262115,
"rewards/margins": 0.6050060987472534,
"rewards/rejected": -0.7766157388687134,
"step": 1530
},
{
"epoch": 1.59,
"learning_rate": 2.6100267891312666e-07,
"logits/chosen": -2.3565828800201416,
"logits/rejected": -2.311810255050659,
"logps/chosen": -261.40228271484375,
"logps/rejected": -224.05148315429688,
"loss": 0.5431,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1451948881149292,
"rewards/margins": 0.6642559170722961,
"rewards/rejected": -0.8094508051872253,
"step": 1540
},
{
"epoch": 1.6,
"learning_rate": 2.590891695369307e-07,
"logits/chosen": -2.4056944847106934,
"logits/rejected": -2.3697822093963623,
"logps/chosen": -281.52032470703125,
"logps/rejected": -240.4828338623047,
"loss": 0.5358,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.10201269388198853,
"rewards/margins": 0.6899839639663696,
"rewards/rejected": -0.7919965982437134,
"step": 1550
},
{
"epoch": 1.61,
"learning_rate": 2.571756601607348e-07,
"logits/chosen": -2.399779796600342,
"logits/rejected": -2.3576903343200684,
"logps/chosen": -281.99725341796875,
"logps/rejected": -232.83047485351562,
"loss": 0.5334,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.13619406521320343,
"rewards/margins": 0.7102779150009155,
"rewards/rejected": -0.8464719653129578,
"step": 1560
},
{
"epoch": 1.62,
"learning_rate": 2.5526215078453884e-07,
"logits/chosen": -2.3670992851257324,
"logits/rejected": -2.330780506134033,
"logps/chosen": -275.53179931640625,
"logps/rejected": -241.46340942382812,
"loss": 0.551,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.15719105303287506,
"rewards/margins": 0.6423493027687073,
"rewards/rejected": -0.7995403409004211,
"step": 1570
},
{
"epoch": 1.63,
"learning_rate": 2.533486414083429e-07,
"logits/chosen": -2.3753514289855957,
"logits/rejected": -2.358102798461914,
"logps/chosen": -273.9100036621094,
"logps/rejected": -226.753662109375,
"loss": 0.5245,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.18196144700050354,
"rewards/margins": 0.6733365654945374,
"rewards/rejected": -0.8552980422973633,
"step": 1580
},
{
"epoch": 1.64,
"learning_rate": 2.5143513203214697e-07,
"logits/chosen": -2.405080795288086,
"logits/rejected": -2.3308959007263184,
"logps/chosen": -261.7924499511719,
"logps/rejected": -232.6370849609375,
"loss": 0.5617,
"rewards/accuracies": 0.7046874761581421,
"rewards/chosen": -0.19414867460727692,
"rewards/margins": 0.6336308717727661,
"rewards/rejected": -0.8277795910835266,
"step": 1590
},
{
"epoch": 1.65,
"learning_rate": 2.49521622655951e-07,
"logits/chosen": -2.4004340171813965,
"logits/rejected": -2.3892195224761963,
"logps/chosen": -263.91217041015625,
"logps/rejected": -233.1723175048828,
"loss": 0.5558,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.13633672893047333,
"rewards/margins": 0.6239665746688843,
"rewards/rejected": -0.7603033185005188,
"step": 1600
},
{
"epoch": 1.66,
"learning_rate": 2.4760811327975504e-07,
"logits/chosen": -2.387913465499878,
"logits/rejected": -2.3446784019470215,
"logps/chosen": -276.1678161621094,
"logps/rejected": -239.7351531982422,
"loss": 0.5395,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.12410640716552734,
"rewards/margins": 0.6739387512207031,
"rewards/rejected": -0.7980451583862305,
"step": 1610
},
{
"epoch": 1.67,
"learning_rate": 2.456946039035591e-07,
"logits/chosen": -2.379734992980957,
"logits/rejected": -2.370614528656006,
"logps/chosen": -256.16510009765625,
"logps/rejected": -217.11697387695312,
"loss": 0.5126,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.1286478042602539,
"rewards/margins": 0.7220448851585388,
"rewards/rejected": -0.8506927490234375,
"step": 1620
},
{
"epoch": 1.68,
"learning_rate": 2.4378109452736316e-07,
"logits/chosen": -2.348128080368042,
"logits/rejected": -2.3468122482299805,
"logps/chosen": -268.2215576171875,
"logps/rejected": -235.09970092773438,
"loss": 0.537,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.1686348021030426,
"rewards/margins": 0.6620901823043823,
"rewards/rejected": -0.8307248950004578,
"step": 1630
},
{
"epoch": 1.69,
"learning_rate": 2.418675851511672e-07,
"logits/chosen": -2.385132312774658,
"logits/rejected": -2.3292319774627686,
"logps/chosen": -258.2908630371094,
"logps/rejected": -227.2532196044922,
"loss": 0.525,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.1993228644132614,
"rewards/margins": 0.6777428984642029,
"rewards/rejected": -0.8770657777786255,
"step": 1640
},
{
"epoch": 1.7,
"learning_rate": 2.399540757749713e-07,
"logits/chosen": -2.3949036598205566,
"logits/rejected": -2.36201810836792,
"logps/chosen": -252.385986328125,
"logps/rejected": -226.19009399414062,
"loss": 0.5471,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.12631697952747345,
"rewards/margins": 0.6516768336296082,
"rewards/rejected": -0.7779937982559204,
"step": 1650
},
{
"epoch": 1.71,
"learning_rate": 2.3804056639877535e-07,
"logits/chosen": -2.352290153503418,
"logits/rejected": -2.3201920986175537,
"logps/chosen": -269.6744689941406,
"logps/rejected": -239.7770233154297,
"loss": 0.5522,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1264854222536087,
"rewards/margins": 0.6343278884887695,
"rewards/rejected": -0.7608132362365723,
"step": 1660
},
{
"epoch": 1.72,
"learning_rate": 2.361270570225794e-07,
"logits/chosen": -2.369065523147583,
"logits/rejected": -2.3292198181152344,
"logps/chosen": -273.328857421875,
"logps/rejected": -227.1344451904297,
"loss": 0.5526,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.1670379340648651,
"rewards/margins": 0.6671660542488098,
"rewards/rejected": -0.8342038989067078,
"step": 1670
},
{
"epoch": 1.74,
"learning_rate": 2.3421354764638345e-07,
"logits/chosen": -2.330441951751709,
"logits/rejected": -2.277582883834839,
"logps/chosen": -262.3780822753906,
"logps/rejected": -222.9958038330078,
"loss": 0.5384,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.14923453330993652,
"rewards/margins": 0.680332601070404,
"rewards/rejected": -0.8295671343803406,
"step": 1680
},
{
"epoch": 1.75,
"learning_rate": 2.323000382701875e-07,
"logits/chosen": -2.358654260635376,
"logits/rejected": -2.2956321239471436,
"logps/chosen": -262.77276611328125,
"logps/rejected": -223.75985717773438,
"loss": 0.5338,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.1570906639099121,
"rewards/margins": 0.6962771415710449,
"rewards/rejected": -0.853367805480957,
"step": 1690
},
{
"epoch": 1.76,
"learning_rate": 2.3038652889399157e-07,
"logits/chosen": -2.368196964263916,
"logits/rejected": -2.365723133087158,
"logps/chosen": -264.42950439453125,
"logps/rejected": -235.8118896484375,
"loss": 0.5235,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.13157618045806885,
"rewards/margins": 0.6999197602272034,
"rewards/rejected": -0.8314959406852722,
"step": 1700
},
{
"epoch": 1.77,
"learning_rate": 2.2847301951779563e-07,
"logits/chosen": -2.3950960636138916,
"logits/rejected": -2.3562042713165283,
"logps/chosen": -273.6717834472656,
"logps/rejected": -232.68161010742188,
"loss": 0.5347,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.15049448609352112,
"rewards/margins": 0.7236722707748413,
"rewards/rejected": -0.8741667866706848,
"step": 1710
},
{
"epoch": 1.78,
"learning_rate": 2.265595101415997e-07,
"logits/chosen": -2.3810062408447266,
"logits/rejected": -2.3283467292785645,
"logps/chosen": -269.14801025390625,
"logps/rejected": -230.89248657226562,
"loss": 0.5464,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.15543130040168762,
"rewards/margins": 0.6771457195281982,
"rewards/rejected": -0.8325770497322083,
"step": 1720
},
{
"epoch": 1.79,
"learning_rate": 2.2464600076540373e-07,
"logits/chosen": -2.3967809677124023,
"logits/rejected": -2.319446563720703,
"logps/chosen": -279.8531188964844,
"logps/rejected": -237.2471160888672,
"loss": 0.5556,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.12391219288110733,
"rewards/margins": 0.6414698362350464,
"rewards/rejected": -0.7653820514678955,
"step": 1730
},
{
"epoch": 1.8,
"learning_rate": 2.227324913892078e-07,
"logits/chosen": -2.3728363513946533,
"logits/rejected": -2.3333539962768555,
"logps/chosen": -273.06304931640625,
"logps/rejected": -229.03561401367188,
"loss": 0.5477,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.12746073305606842,
"rewards/margins": 0.6716173887252808,
"rewards/rejected": -0.799078106880188,
"step": 1740
},
{
"epoch": 1.81,
"learning_rate": 2.2081898201301186e-07,
"logits/chosen": -2.3346054553985596,
"logits/rejected": -2.3393990993499756,
"logps/chosen": -284.27886962890625,
"logps/rejected": -243.03475952148438,
"loss": 0.5597,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.14506694674491882,
"rewards/margins": 0.604491114616394,
"rewards/rejected": -0.7495580911636353,
"step": 1750
},
{
"epoch": 1.82,
"learning_rate": 2.1890547263681592e-07,
"logits/chosen": -2.3285999298095703,
"logits/rejected": -2.319593906402588,
"logps/chosen": -279.5855712890625,
"logps/rejected": -228.2317657470703,
"loss": 0.5416,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.13806693255901337,
"rewards/margins": 0.6977806091308594,
"rewards/rejected": -0.8358476758003235,
"step": 1760
},
{
"epoch": 1.83,
"learning_rate": 2.1699196326061998e-07,
"logits/chosen": -2.325159788131714,
"logits/rejected": -2.3459110260009766,
"logps/chosen": -272.990478515625,
"logps/rejected": -230.53524780273438,
"loss": 0.5335,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.13658006489276886,
"rewards/margins": 0.7168129086494446,
"rewards/rejected": -0.8533929586410522,
"step": 1770
},
{
"epoch": 1.84,
"learning_rate": 2.1507845388442402e-07,
"logits/chosen": -2.4010326862335205,
"logits/rejected": -2.3410234451293945,
"logps/chosen": -276.6645202636719,
"logps/rejected": -219.21414184570312,
"loss": 0.519,
"rewards/accuracies": 0.7640625238418579,
"rewards/chosen": -0.0858209878206253,
"rewards/margins": 0.6973799467086792,
"rewards/rejected": -0.7832010388374329,
"step": 1780
},
{
"epoch": 1.85,
"learning_rate": 2.1316494450822808e-07,
"logits/chosen": -2.3417165279388428,
"logits/rejected": -2.341611623764038,
"logps/chosen": -271.9029235839844,
"logps/rejected": -232.0885009765625,
"loss": 0.5266,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.13584092259407043,
"rewards/margins": 0.702505886554718,
"rewards/rejected": -0.8383467793464661,
"step": 1790
},
{
"epoch": 1.86,
"learning_rate": 2.1125143513203214e-07,
"logits/chosen": -2.416315793991089,
"logits/rejected": -2.3927767276763916,
"logps/chosen": -264.4131774902344,
"logps/rejected": -227.99179077148438,
"loss": 0.5479,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.12012849003076553,
"rewards/margins": 0.6565206050872803,
"rewards/rejected": -0.7766491174697876,
"step": 1800
},
{
"epoch": 1.87,
"learning_rate": 2.093379257558362e-07,
"logits/chosen": -2.3913142681121826,
"logits/rejected": -2.3399569988250732,
"logps/chosen": -271.81011962890625,
"logps/rejected": -241.50759887695312,
"loss": 0.5633,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.1379830241203308,
"rewards/margins": 0.6094950437545776,
"rewards/rejected": -0.7474781274795532,
"step": 1810
},
{
"epoch": 1.88,
"learning_rate": 2.0742441637964026e-07,
"logits/chosen": -2.376094102859497,
"logits/rejected": -2.3213579654693604,
"logps/chosen": -245.985107421875,
"logps/rejected": -217.1469268798828,
"loss": 0.5383,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.16205313801765442,
"rewards/margins": 0.6546685099601746,
"rewards/rejected": -0.8167217373847961,
"step": 1820
},
{
"epoch": 1.89,
"learning_rate": 2.055109070034443e-07,
"logits/chosen": -2.4133598804473877,
"logits/rejected": -2.322434663772583,
"logps/chosen": -261.0830993652344,
"logps/rejected": -222.3695068359375,
"loss": 0.5411,
"rewards/accuracies": 0.7515624761581421,
"rewards/chosen": -0.12403901666402817,
"rewards/margins": 0.6635336875915527,
"rewards/rejected": -0.7875727415084839,
"step": 1830
},
{
"epoch": 1.9,
"learning_rate": 2.0359739762724836e-07,
"logits/chosen": -2.3970634937286377,
"logits/rejected": -2.334484577178955,
"logps/chosen": -289.25213623046875,
"logps/rejected": -241.99313354492188,
"loss": 0.4944,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": -0.11199700832366943,
"rewards/margins": 0.7853450179100037,
"rewards/rejected": -0.8973420858383179,
"step": 1840
},
{
"epoch": 1.91,
"learning_rate": 2.0168388825105242e-07,
"logits/chosen": -2.3959927558898926,
"logits/rejected": -2.340639591217041,
"logps/chosen": -261.5354919433594,
"logps/rejected": -230.05795288085938,
"loss": 0.5513,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.1715332418680191,
"rewards/margins": 0.6642467379570007,
"rewards/rejected": -0.8357800245285034,
"step": 1850
},
{
"epoch": 1.92,
"learning_rate": 1.997703788748565e-07,
"logits/chosen": -2.383922576904297,
"logits/rejected": -2.3244893550872803,
"logps/chosen": -261.2723693847656,
"logps/rejected": -237.76644897460938,
"loss": 0.5373,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.1503421664237976,
"rewards/margins": 0.6998149156570435,
"rewards/rejected": -0.8501569628715515,
"step": 1860
},
{
"epoch": 1.93,
"learning_rate": 1.9785686949866055e-07,
"logits/chosen": -2.4093589782714844,
"logits/rejected": -2.353217601776123,
"logps/chosen": -276.1336975097656,
"logps/rejected": -229.25216674804688,
"loss": 0.5199,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.17228913307189941,
"rewards/margins": 0.713442325592041,
"rewards/rejected": -0.8857313394546509,
"step": 1870
},
{
"epoch": 1.94,
"learning_rate": 1.9594336012246458e-07,
"logits/chosen": -2.3640589714050293,
"logits/rejected": -2.3493194580078125,
"logps/chosen": -263.424072265625,
"logps/rejected": -229.8025360107422,
"loss": 0.5399,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.17658892273902893,
"rewards/margins": 0.6467570066452026,
"rewards/rejected": -0.823345959186554,
"step": 1880
},
{
"epoch": 1.95,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": -2.3777713775634766,
"logits/rejected": -2.316854238510132,
"logps/chosen": -261.4559631347656,
"logps/rejected": -227.89718627929688,
"loss": 0.5238,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.1278056502342224,
"rewards/margins": 0.7158471345901489,
"rewards/rejected": -0.8436528444290161,
"step": 1890
},
{
"epoch": 1.96,
"learning_rate": 1.921163413700727e-07,
"logits/chosen": -2.392733335494995,
"logits/rejected": -2.373281240463257,
"logps/chosen": -281.6858825683594,
"logps/rejected": -244.1327362060547,
"loss": 0.5358,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.11135254800319672,
"rewards/margins": 0.7201862931251526,
"rewards/rejected": -0.8315388560295105,
"step": 1900
},
{
"epoch": 1.97,
"learning_rate": 1.9020283199387677e-07,
"logits/chosen": -2.343679189682007,
"logits/rejected": -2.3387351036071777,
"logps/chosen": -268.7857666015625,
"logps/rejected": -238.4132843017578,
"loss": 0.5173,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.1281011551618576,
"rewards/margins": 0.7418644428253174,
"rewards/rejected": -0.869965672492981,
"step": 1910
},
{
"epoch": 1.98,
"learning_rate": 1.8828932261768083e-07,
"logits/chosen": -2.372236967086792,
"logits/rejected": -2.3373138904571533,
"logps/chosen": -265.18865966796875,
"logps/rejected": -227.602783203125,
"loss": 0.5272,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.13484473526477814,
"rewards/margins": 0.7227139472961426,
"rewards/rejected": -0.8575586080551147,
"step": 1920
},
{
"epoch": 1.99,
"learning_rate": 1.8637581324148487e-07,
"logits/chosen": -2.392242670059204,
"logits/rejected": -2.3611741065979004,
"logps/chosen": -268.98431396484375,
"logps/rejected": -239.24978637695312,
"loss": 0.5563,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.17363707721233368,
"rewards/margins": 0.6345449686050415,
"rewards/rejected": -0.8081819415092468,
"step": 1930
},
{
"epoch": 2.0,
"eval_logits/chosen": -2.031247615814209,
"eval_logits/rejected": -1.9774165153503418,
"eval_logps/chosen": -266.0887145996094,
"eval_logps/rejected": -232.28887939453125,
"eval_loss": 0.5312530398368835,
"eval_rewards/accuracies": 0.7315000295639038,
"eval_rewards/chosen": -0.15304329991340637,
"eval_rewards/margins": 0.709061324596405,
"eval_rewards/rejected": -0.862104594707489,
"eval_runtime": 1653.0531,
"eval_samples_per_second": 1.21,
"eval_steps_per_second": 0.302,
"step": 1936
},
{
"epoch": 2.0,
"learning_rate": 1.8446230386528893e-07,
"logits/chosen": -2.4022011756896973,
"logits/rejected": -2.324002504348755,
"logps/chosen": -260.53228759765625,
"logps/rejected": -229.572998046875,
"loss": 0.5515,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.20086026191711426,
"rewards/margins": 0.6226423978805542,
"rewards/rejected": -0.8235027194023132,
"step": 1940
},
{
"epoch": 2.01,
"learning_rate": 1.82548794489093e-07,
"logits/chosen": -2.3827035427093506,
"logits/rejected": -2.3521687984466553,
"logps/chosen": -272.88433837890625,
"logps/rejected": -228.8638458251953,
"loss": 0.5432,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.1703009009361267,
"rewards/margins": 0.6672419309616089,
"rewards/rejected": -0.8375428318977356,
"step": 1950
},
{
"epoch": 2.02,
"learning_rate": 1.8063528511289706e-07,
"logits/chosen": -2.3884754180908203,
"logits/rejected": -2.3221449851989746,
"logps/chosen": -258.5544128417969,
"logps/rejected": -224.48971557617188,
"loss": 0.5424,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.21564142405986786,
"rewards/margins": 0.6631011366844177,
"rewards/rejected": -0.8787425756454468,
"step": 1960
},
{
"epoch": 2.03,
"learning_rate": 1.7872177573670112e-07,
"logits/chosen": -2.3919119834899902,
"logits/rejected": -2.3483376502990723,
"logps/chosen": -276.0721435546875,
"logps/rejected": -239.58154296875,
"loss": 0.5155,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.13372206687927246,
"rewards/margins": 0.7485690712928772,
"rewards/rejected": -0.8822910189628601,
"step": 1970
},
{
"epoch": 2.04,
"learning_rate": 1.7680826636050515e-07,
"logits/chosen": -2.360553741455078,
"logits/rejected": -2.3577828407287598,
"logps/chosen": -257.6646728515625,
"logps/rejected": -230.3592529296875,
"loss": 0.5352,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.16400420665740967,
"rewards/margins": 0.7011340856552124,
"rewards/rejected": -0.8651384115219116,
"step": 1980
},
{
"epoch": 2.06,
"learning_rate": 1.7489475698430921e-07,
"logits/chosen": -2.3634886741638184,
"logits/rejected": -2.322361469268799,
"logps/chosen": -282.5517883300781,
"logps/rejected": -233.2860870361328,
"loss": 0.5361,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.1445886641740799,
"rewards/margins": 0.752617597579956,
"rewards/rejected": -0.89720618724823,
"step": 1990
},
{
"epoch": 2.07,
"learning_rate": 1.7298124760811328e-07,
"logits/chosen": -2.3623392581939697,
"logits/rejected": -2.319822311401367,
"logps/chosen": -253.936279296875,
"logps/rejected": -234.66848754882812,
"loss": 0.5119,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.09708345681428909,
"rewards/margins": 0.7794226408004761,
"rewards/rejected": -0.8765062093734741,
"step": 2000
},
{
"epoch": 2.08,
"learning_rate": 1.7106773823191734e-07,
"logits/chosen": -2.3689372539520264,
"logits/rejected": -2.3116328716278076,
"logps/chosen": -268.030029296875,
"logps/rejected": -231.4261474609375,
"loss": 0.5711,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.18213681876659393,
"rewards/margins": 0.5894214510917664,
"rewards/rejected": -0.7715582251548767,
"step": 2010
},
{
"epoch": 2.09,
"learning_rate": 1.691542288557214e-07,
"logits/chosen": -2.3579487800598145,
"logits/rejected": -2.3073954582214355,
"logps/chosen": -260.38775634765625,
"logps/rejected": -225.4844207763672,
"loss": 0.5156,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.14314624667167664,
"rewards/margins": 0.7362397313117981,
"rewards/rejected": -0.8793859481811523,
"step": 2020
},
{
"epoch": 2.1,
"learning_rate": 1.6724071947952544e-07,
"logits/chosen": -2.3365800380706787,
"logits/rejected": -2.2837400436401367,
"logps/chosen": -266.9193420410156,
"logps/rejected": -238.1469268798828,
"loss": 0.5267,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.12520021200180054,
"rewards/margins": 0.7114790678024292,
"rewards/rejected": -0.8366793394088745,
"step": 2030
},
{
"epoch": 2.11,
"learning_rate": 1.653272101033295e-07,
"logits/chosen": -2.400599479675293,
"logits/rejected": -2.35431170463562,
"logps/chosen": -262.89825439453125,
"logps/rejected": -228.84310913085938,
"loss": 0.559,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.15787425637245178,
"rewards/margins": 0.6567065119743347,
"rewards/rejected": -0.8145807385444641,
"step": 2040
},
{
"epoch": 2.12,
"learning_rate": 1.6341370072713356e-07,
"logits/chosen": -2.400794267654419,
"logits/rejected": -2.3113760948181152,
"logps/chosen": -246.63400268554688,
"logps/rejected": -223.24923706054688,
"loss": 0.5255,
"rewards/accuracies": 0.7421875,
"rewards/chosen": -0.14660078287124634,
"rewards/margins": 0.7161286473274231,
"rewards/rejected": -0.8627294301986694,
"step": 2050
},
{
"epoch": 2.13,
"learning_rate": 1.6150019135093762e-07,
"logits/chosen": -2.344493865966797,
"logits/rejected": -2.3352556228637695,
"logps/chosen": -278.28839111328125,
"logps/rejected": -243.4131622314453,
"loss": 0.5521,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.13938100636005402,
"rewards/margins": 0.6712489128112793,
"rewards/rejected": -0.8106300234794617,
"step": 2060
},
{
"epoch": 2.14,
"learning_rate": 1.5958668197474169e-07,
"logits/chosen": -2.3649954795837402,
"logits/rejected": -2.3345046043395996,
"logps/chosen": -286.7098083496094,
"logps/rejected": -232.02517700195312,
"loss": 0.5329,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.10944189131259918,
"rewards/margins": 0.7143247723579407,
"rewards/rejected": -0.8237665891647339,
"step": 2070
},
{
"epoch": 2.15,
"learning_rate": 1.5767317259854572e-07,
"logits/chosen": -2.401505947113037,
"logits/rejected": -2.3687071800231934,
"logps/chosen": -252.4344940185547,
"logps/rejected": -230.4383087158203,
"loss": 0.5534,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.16689328849315643,
"rewards/margins": 0.670124351978302,
"rewards/rejected": -0.8370175361633301,
"step": 2080
},
{
"epoch": 2.16,
"learning_rate": 1.5575966322234978e-07,
"logits/chosen": -2.3800606727600098,
"logits/rejected": -2.3494057655334473,
"logps/chosen": -277.4918212890625,
"logps/rejected": -236.21533203125,
"loss": 0.5376,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.11864397674798965,
"rewards/margins": 0.7518168687820435,
"rewards/rejected": -0.8704608082771301,
"step": 2090
},
{
"epoch": 2.17,
"learning_rate": 1.5384615384615385e-07,
"logits/chosen": -2.361454963684082,
"logits/rejected": -2.327955722808838,
"logps/chosen": -283.4490051269531,
"logps/rejected": -234.3018035888672,
"loss": 0.5348,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.1568828970193863,
"rewards/margins": 0.7170850038528442,
"rewards/rejected": -0.8739679455757141,
"step": 2100
},
{
"epoch": 2.18,
"learning_rate": 1.519326444699579e-07,
"logits/chosen": -2.3744759559631348,
"logits/rejected": -2.358099937438965,
"logps/chosen": -277.69195556640625,
"logps/rejected": -241.0815887451172,
"loss": 0.5268,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.12414580583572388,
"rewards/margins": 0.7003077864646912,
"rewards/rejected": -0.8244536519050598,
"step": 2110
},
{
"epoch": 2.19,
"learning_rate": 1.5001913509376197e-07,
"logits/chosen": -2.4057154655456543,
"logits/rejected": -2.3354310989379883,
"logps/chosen": -260.5264892578125,
"logps/rejected": -222.79110717773438,
"loss": 0.5231,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.15866820514202118,
"rewards/margins": 0.7241252660751343,
"rewards/rejected": -0.8827934265136719,
"step": 2120
},
{
"epoch": 2.2,
"learning_rate": 1.4810562571756603e-07,
"logits/chosen": -2.3917877674102783,
"logits/rejected": -2.355861186981201,
"logps/chosen": -283.84320068359375,
"logps/rejected": -230.14404296875,
"loss": 0.5136,
"rewards/accuracies": 0.7515624761581421,
"rewards/chosen": -0.13140609860420227,
"rewards/margins": 0.7532661557197571,
"rewards/rejected": -0.8846722841262817,
"step": 2130
},
{
"epoch": 2.21,
"learning_rate": 1.4619211634137007e-07,
"logits/chosen": -2.387089967727661,
"logits/rejected": -2.314539670944214,
"logps/chosen": -268.9212341308594,
"logps/rejected": -232.61898803710938,
"loss": 0.4982,
"rewards/accuracies": 0.7484375238418579,
"rewards/chosen": -0.10855790227651596,
"rewards/margins": 0.8006644248962402,
"rewards/rejected": -0.9092223048210144,
"step": 2140
},
{
"epoch": 2.22,
"learning_rate": 1.4427860696517413e-07,
"logits/chosen": -2.3513126373291016,
"logits/rejected": -2.3646275997161865,
"logps/chosen": -262.60345458984375,
"logps/rejected": -245.1242218017578,
"loss": 0.5552,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.1720902919769287,
"rewards/margins": 0.6639872193336487,
"rewards/rejected": -0.8360773921012878,
"step": 2150
},
{
"epoch": 2.23,
"learning_rate": 1.423650975889782e-07,
"logits/chosen": -2.3261661529541016,
"logits/rejected": -2.308116912841797,
"logps/chosen": -271.75543212890625,
"logps/rejected": -232.4984588623047,
"loss": 0.5473,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.15690350532531738,
"rewards/margins": 0.6513178944587708,
"rewards/rejected": -0.8082213401794434,
"step": 2160
},
{
"epoch": 2.24,
"learning_rate": 1.4045158821278225e-07,
"logits/chosen": -2.3861842155456543,
"logits/rejected": -2.354062080383301,
"logps/chosen": -280.88116455078125,
"logps/rejected": -232.3457794189453,
"loss": 0.5446,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.15454408526420593,
"rewards/margins": 0.6919762492179871,
"rewards/rejected": -0.8465203046798706,
"step": 2170
},
{
"epoch": 2.25,
"learning_rate": 1.3853807883658632e-07,
"logits/chosen": -2.3726694583892822,
"logits/rejected": -2.3578009605407715,
"logps/chosen": -275.0099182128906,
"logps/rejected": -245.10873413085938,
"loss": 0.4978,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.08260266482830048,
"rewards/margins": 0.7924613952636719,
"rewards/rejected": -0.8750640749931335,
"step": 2180
},
{
"epoch": 2.26,
"learning_rate": 1.3662456946039035e-07,
"logits/chosen": -2.339141368865967,
"logits/rejected": -2.3254787921905518,
"logps/chosen": -258.27996826171875,
"logps/rejected": -228.74398803710938,
"loss": 0.547,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.15882189571857452,
"rewards/margins": 0.6925019025802612,
"rewards/rejected": -0.8513237833976746,
"step": 2190
},
{
"epoch": 2.27,
"learning_rate": 1.3471106008419441e-07,
"logits/chosen": -2.406780958175659,
"logits/rejected": -2.3241848945617676,
"logps/chosen": -263.73931884765625,
"logps/rejected": -224.1796417236328,
"loss": 0.5389,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.1274387389421463,
"rewards/margins": 0.7019168138504028,
"rewards/rejected": -0.8293555378913879,
"step": 2200
},
{
"epoch": 2.28,
"learning_rate": 1.3279755070799848e-07,
"logits/chosen": -2.3664703369140625,
"logits/rejected": -2.318784713745117,
"logps/chosen": -261.4429626464844,
"logps/rejected": -229.2322540283203,
"loss": 0.5361,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.13853470981121063,
"rewards/margins": 0.7004620432853699,
"rewards/rejected": -0.8389967083930969,
"step": 2210
},
{
"epoch": 2.29,
"learning_rate": 1.3088404133180254e-07,
"logits/chosen": -2.3630192279815674,
"logits/rejected": -2.3432974815368652,
"logps/chosen": -273.27850341796875,
"logps/rejected": -241.81771850585938,
"loss": 0.5111,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.10639991611242294,
"rewards/margins": 0.7741705179214478,
"rewards/rejected": -0.8805704116821289,
"step": 2220
},
{
"epoch": 2.3,
"learning_rate": 1.289705319556066e-07,
"logits/chosen": -2.3936803340911865,
"logits/rejected": -2.3308169841766357,
"logps/chosen": -269.2107849121094,
"logps/rejected": -238.88394165039062,
"loss": 0.5046,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.1324814110994339,
"rewards/margins": 0.8152027130126953,
"rewards/rejected": -0.947684109210968,
"step": 2230
},
{
"epoch": 2.31,
"learning_rate": 1.2705702257941064e-07,
"logits/chosen": -2.3774123191833496,
"logits/rejected": -2.312371253967285,
"logps/chosen": -247.8770751953125,
"logps/rejected": -221.15536499023438,
"loss": 0.5219,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.13058343529701233,
"rewards/margins": 0.7142859697341919,
"rewards/rejected": -0.8448693156242371,
"step": 2240
},
{
"epoch": 2.32,
"learning_rate": 1.251435132032147e-07,
"logits/chosen": -2.399721622467041,
"logits/rejected": -2.319878101348877,
"logps/chosen": -299.5263671875,
"logps/rejected": -242.10964965820312,
"loss": 0.5404,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.1261121928691864,
"rewards/margins": 0.7005943059921265,
"rewards/rejected": -0.8267065286636353,
"step": 2250
},
{
"epoch": 2.33,
"learning_rate": 1.2323000382701873e-07,
"logits/chosen": -2.3750648498535156,
"logits/rejected": -2.33599591255188,
"logps/chosen": -281.7725830078125,
"logps/rejected": -241.2296600341797,
"loss": 0.5367,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.12657758593559265,
"rewards/margins": 0.7330743074417114,
"rewards/rejected": -0.8596519231796265,
"step": 2260
},
{
"epoch": 2.34,
"learning_rate": 1.213164944508228e-07,
"logits/chosen": -2.3655242919921875,
"logits/rejected": -2.309812068939209,
"logps/chosen": -262.09954833984375,
"logps/rejected": -234.92758178710938,
"loss": 0.551,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.13195794820785522,
"rewards/margins": 0.6896259784698486,
"rewards/rejected": -0.8215838670730591,
"step": 2270
},
{
"epoch": 2.35,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": -2.4004878997802734,
"logits/rejected": -2.3559048175811768,
"logps/chosen": -270.6529846191406,
"logps/rejected": -242.43368530273438,
"loss": 0.5288,
"rewards/accuracies": 0.754687488079071,
"rewards/chosen": -0.17092928290367126,
"rewards/margins": 0.7322403192520142,
"rewards/rejected": -0.9031696319580078,
"step": 2280
},
{
"epoch": 2.37,
"learning_rate": 1.1748947569843092e-07,
"logits/chosen": -2.2949576377868652,
"logits/rejected": -2.279416561126709,
"logps/chosen": -253.159423828125,
"logps/rejected": -220.51596069335938,
"loss": 0.5365,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.14947687089443207,
"rewards/margins": 0.7013243436813354,
"rewards/rejected": -0.8508013486862183,
"step": 2290
},
{
"epoch": 2.38,
"learning_rate": 1.1557596632223497e-07,
"logits/chosen": -2.376298427581787,
"logits/rejected": -2.301685333251953,
"logps/chosen": -269.91168212890625,
"logps/rejected": -221.9505615234375,
"loss": 0.552,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.19331762194633484,
"rewards/margins": 0.7175094485282898,
"rewards/rejected": -0.9108270406723022,
"step": 2300
},
{
"epoch": 2.39,
"learning_rate": 1.1366245694603903e-07,
"logits/chosen": -2.3533215522766113,
"logits/rejected": -2.3328850269317627,
"logps/chosen": -258.6532897949219,
"logps/rejected": -218.423095703125,
"loss": 0.5498,
"rewards/accuracies": 0.723437488079071,
"rewards/chosen": -0.14851602911949158,
"rewards/margins": 0.6814883947372437,
"rewards/rejected": -0.8300043940544128,
"step": 2310
},
{
"epoch": 2.4,
"learning_rate": 1.1174894756984308e-07,
"logits/chosen": -2.3606135845184326,
"logits/rejected": -2.3032069206237793,
"logps/chosen": -252.78604125976562,
"logps/rejected": -211.8778076171875,
"loss": 0.5301,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.16221049427986145,
"rewards/margins": 0.706489086151123,
"rewards/rejected": -0.8686995506286621,
"step": 2320
},
{
"epoch": 2.41,
"learning_rate": 1.0983543819364714e-07,
"logits/chosen": -2.406503677368164,
"logits/rejected": -2.3547933101654053,
"logps/chosen": -278.7993469238281,
"logps/rejected": -237.6390380859375,
"loss": 0.5369,
"rewards/accuracies": 0.745312511920929,
"rewards/chosen": -0.1446961909532547,
"rewards/margins": 0.725204348564148,
"rewards/rejected": -0.8699005842208862,
"step": 2330
},
{
"epoch": 2.42,
"learning_rate": 1.079219288174512e-07,
"logits/chosen": -2.3510079383850098,
"logits/rejected": -2.296820640563965,
"logps/chosen": -261.72235107421875,
"logps/rejected": -243.376708984375,
"loss": 0.546,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.17369435727596283,
"rewards/margins": 0.6820122599601746,
"rewards/rejected": -0.8557065725326538,
"step": 2340
},
{
"epoch": 2.43,
"learning_rate": 1.0600841944125525e-07,
"logits/chosen": -2.408764362335205,
"logits/rejected": -2.336327314376831,
"logps/chosen": -258.2755126953125,
"logps/rejected": -228.48385620117188,
"loss": 0.5113,
"rewards/accuracies": 0.760937511920929,
"rewards/chosen": -0.15298260748386383,
"rewards/margins": 0.7763695120811462,
"rewards/rejected": -0.9293521642684937,
"step": 2350
},
{
"epoch": 2.44,
"learning_rate": 1.0409491006505931e-07,
"logits/chosen": -2.367042064666748,
"logits/rejected": -2.317692279815674,
"logps/chosen": -266.90362548828125,
"logps/rejected": -228.7984619140625,
"loss": 0.5205,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17633156478405,
"rewards/margins": 0.7394440770149231,
"rewards/rejected": -0.9157756567001343,
"step": 2360
},
{
"epoch": 2.45,
"learning_rate": 1.0218140068886336e-07,
"logits/chosen": -2.372544050216675,
"logits/rejected": -2.297020673751831,
"logps/chosen": -267.892822265625,
"logps/rejected": -229.27035522460938,
"loss": 0.5245,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.1406041383743286,
"rewards/margins": 0.7415550351142883,
"rewards/rejected": -0.8821592330932617,
"step": 2370
},
{
"epoch": 2.46,
"learning_rate": 1.0026789131266743e-07,
"logits/chosen": -2.380431890487671,
"logits/rejected": -2.3486175537109375,
"logps/chosen": -260.1377868652344,
"logps/rejected": -228.09994506835938,
"loss": 0.5274,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.16361048817634583,
"rewards/margins": 0.6924134492874146,
"rewards/rejected": -0.8560239672660828,
"step": 2380
},
{
"epoch": 2.47,
"learning_rate": 9.835438193647149e-08,
"logits/chosen": -2.364969491958618,
"logits/rejected": -2.3163743019104004,
"logps/chosen": -261.2802429199219,
"logps/rejected": -235.7176055908203,
"loss": 0.5158,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.11532945930957794,
"rewards/margins": 0.7920664548873901,
"rewards/rejected": -0.9073959589004517,
"step": 2390
},
{
"epoch": 2.48,
"learning_rate": 9.644087256027554e-08,
"logits/chosen": -2.3899219036102295,
"logits/rejected": -2.3310904502868652,
"logps/chosen": -270.76837158203125,
"logps/rejected": -242.98434448242188,
"loss": 0.5564,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.13495083153247833,
"rewards/margins": 0.7031392455101013,
"rewards/rejected": -0.8380901217460632,
"step": 2400
},
{
"epoch": 2.49,
"learning_rate": 9.45273631840796e-08,
"logits/chosen": -2.3676700592041016,
"logits/rejected": -2.2973744869232178,
"logps/chosen": -256.33416748046875,
"logps/rejected": -215.53921508789062,
"loss": 0.5391,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.17805704474449158,
"rewards/margins": 0.6919922232627869,
"rewards/rejected": -0.8700492978096008,
"step": 2410
},
{
"epoch": 2.5,
"learning_rate": 9.261385380788366e-08,
"logits/chosen": -2.4081311225891113,
"logits/rejected": -2.315382480621338,
"logps/chosen": -267.7056579589844,
"logps/rejected": -226.94400024414062,
"loss": 0.5408,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.17336109280586243,
"rewards/margins": 0.7087409496307373,
"rewards/rejected": -0.8821020126342773,
"step": 2420
},
{
"epoch": 2.51,
"learning_rate": 9.070034443168771e-08,
"logits/chosen": -2.3482162952423096,
"logits/rejected": -2.3113105297088623,
"logps/chosen": -263.73065185546875,
"logps/rejected": -226.03207397460938,
"loss": 0.541,
"rewards/accuracies": 0.707812488079071,
"rewards/chosen": -0.2133699208498001,
"rewards/margins": 0.7167404890060425,
"rewards/rejected": -0.9301104545593262,
"step": 2430
},
{
"epoch": 2.52,
"learning_rate": 8.878683505549177e-08,
"logits/chosen": -2.3624765872955322,
"logits/rejected": -2.3461878299713135,
"logps/chosen": -275.10125732421875,
"logps/rejected": -241.44970703125,
"loss": 0.5474,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.16694757342338562,
"rewards/margins": 0.6941839456558228,
"rewards/rejected": -0.861131489276886,
"step": 2440
},
{
"epoch": 2.53,
"learning_rate": 8.687332567929582e-08,
"logits/chosen": -2.3443427085876465,
"logits/rejected": -2.325216054916382,
"logps/chosen": -267.3571472167969,
"logps/rejected": -219.15695190429688,
"loss": 0.5181,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.14058147370815277,
"rewards/margins": 0.7361636161804199,
"rewards/rejected": -0.8767450451850891,
"step": 2450
},
{
"epoch": 2.54,
"learning_rate": 8.495981630309988e-08,
"logits/chosen": -2.363398313522339,
"logits/rejected": -2.2995753288269043,
"logps/chosen": -262.1716613769531,
"logps/rejected": -236.1666259765625,
"loss": 0.5242,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.12495926767587662,
"rewards/margins": 0.7235976457595825,
"rewards/rejected": -0.8485569953918457,
"step": 2460
},
{
"epoch": 2.55,
"learning_rate": 8.304630692690395e-08,
"logits/chosen": -2.387195110321045,
"logits/rejected": -2.3478474617004395,
"logps/chosen": -283.22705078125,
"logps/rejected": -233.081298828125,
"loss": 0.5162,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.11790215969085693,
"rewards/margins": 0.787219762802124,
"rewards/rejected": -0.9051219820976257,
"step": 2470
},
{
"epoch": 2.56,
"learning_rate": 8.1132797550708e-08,
"logits/chosen": -2.3662703037261963,
"logits/rejected": -2.3089377880096436,
"logps/chosen": -270.521728515625,
"logps/rejected": -229.98519897460938,
"loss": 0.5213,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.14989617466926575,
"rewards/margins": 0.7254993915557861,
"rewards/rejected": -0.875395655632019,
"step": 2480
},
{
"epoch": 2.57,
"learning_rate": 7.921928817451206e-08,
"logits/chosen": -2.360635280609131,
"logits/rejected": -2.328989267349243,
"logps/chosen": -272.1587829589844,
"logps/rejected": -221.1785888671875,
"loss": 0.48,
"rewards/accuracies": 0.7640625238418579,
"rewards/chosen": -0.07772710919380188,
"rewards/margins": 0.8303700685501099,
"rewards/rejected": -0.9080971479415894,
"step": 2490
},
{
"epoch": 2.58,
"learning_rate": 7.73057787983161e-08,
"logits/chosen": -2.3522746562957764,
"logits/rejected": -2.3457765579223633,
"logps/chosen": -270.2672424316406,
"logps/rejected": -231.2746124267578,
"loss": 0.5063,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.11053086817264557,
"rewards/margins": 0.8113416433334351,
"rewards/rejected": -0.921872615814209,
"step": 2500
},
{
"epoch": 2.59,
"learning_rate": 7.539226942212017e-08,
"logits/chosen": -2.3699328899383545,
"logits/rejected": -2.322545289993286,
"logps/chosen": -271.3714599609375,
"logps/rejected": -247.797119140625,
"loss": 0.5434,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.14844852685928345,
"rewards/margins": 0.7081364989280701,
"rewards/rejected": -0.8565850257873535,
"step": 2510
},
{
"epoch": 2.6,
"learning_rate": 7.347876004592423e-08,
"logits/chosen": -2.4080615043640137,
"logits/rejected": -2.354224681854248,
"logps/chosen": -271.5215148925781,
"logps/rejected": -234.2542724609375,
"loss": 0.5274,
"rewards/accuracies": 0.7515624761581421,
"rewards/chosen": -0.19082528352737427,
"rewards/margins": 0.7402253746986389,
"rewards/rejected": -0.9310504794120789,
"step": 2520
},
{
"epoch": 2.61,
"learning_rate": 7.156525066972828e-08,
"logits/chosen": -2.4200048446655273,
"logits/rejected": -2.3449947834014893,
"logps/chosen": -284.7613830566406,
"logps/rejected": -238.87850952148438,
"loss": 0.5191,
"rewards/accuracies": 0.754687488079071,
"rewards/chosen": -0.12657888233661652,
"rewards/margins": 0.8030962944030762,
"rewards/rejected": -0.9296752214431763,
"step": 2530
},
{
"epoch": 2.62,
"learning_rate": 6.965174129353234e-08,
"logits/chosen": -2.356966733932495,
"logits/rejected": -2.29770827293396,
"logps/chosen": -261.1152038574219,
"logps/rejected": -240.00387573242188,
"loss": 0.5399,
"rewards/accuracies": 0.714062511920929,
"rewards/chosen": -0.16734077036380768,
"rewards/margins": 0.7260617613792419,
"rewards/rejected": -0.8934024572372437,
"step": 2540
},
{
"epoch": 2.63,
"learning_rate": 6.773823191733639e-08,
"logits/chosen": -2.3702635765075684,
"logits/rejected": -2.317229747772217,
"logps/chosen": -272.82440185546875,
"logps/rejected": -221.4154815673828,
"loss": 0.5175,
"rewards/accuracies": 0.7640625238418579,
"rewards/chosen": -0.1133556216955185,
"rewards/margins": 0.7367923259735107,
"rewards/rejected": -0.8501479029655457,
"step": 2550
},
{
"epoch": 2.64,
"learning_rate": 6.582472254114045e-08,
"logits/chosen": -2.4438915252685547,
"logits/rejected": -2.385143756866455,
"logps/chosen": -275.418212890625,
"logps/rejected": -239.0946502685547,
"loss": 0.5384,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.11291754245758057,
"rewards/margins": 0.7378134727478027,
"rewards/rejected": -0.8507310748100281,
"step": 2560
},
{
"epoch": 2.65,
"learning_rate": 6.391121316494451e-08,
"logits/chosen": -2.3825366497039795,
"logits/rejected": -2.3399150371551514,
"logps/chosen": -251.03720092773438,
"logps/rejected": -234.3162078857422,
"loss": 0.5305,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.16436532139778137,
"rewards/margins": 0.7372573614120483,
"rewards/rejected": -0.9016226530075073,
"step": 2570
},
{
"epoch": 2.66,
"learning_rate": 6.199770378874856e-08,
"logits/chosen": -2.37642502784729,
"logits/rejected": -2.3453516960144043,
"logps/chosen": -269.23577880859375,
"logps/rejected": -229.6055908203125,
"loss": 0.5202,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1360473334789276,
"rewards/margins": 0.7748234272003174,
"rewards/rejected": -0.9108708500862122,
"step": 2580
},
{
"epoch": 2.68,
"learning_rate": 6.008419441255262e-08,
"logits/chosen": -2.3883450031280518,
"logits/rejected": -2.3713505268096924,
"logps/chosen": -263.39288330078125,
"logps/rejected": -231.93930053710938,
"loss": 0.5264,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.07088092714548111,
"rewards/margins": 0.7212754487991333,
"rewards/rejected": -0.792156457901001,
"step": 2590
},
{
"epoch": 2.69,
"learning_rate": 5.817068503635668e-08,
"logits/chosen": -2.3673863410949707,
"logits/rejected": -2.307626724243164,
"logps/chosen": -281.2471618652344,
"logps/rejected": -218.8713836669922,
"loss": 0.5129,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.10778670012950897,
"rewards/margins": 0.7753579020500183,
"rewards/rejected": -0.8831446766853333,
"step": 2600
},
{
"epoch": 2.7,
"learning_rate": 5.6257175660160735e-08,
"logits/chosen": -2.3701910972595215,
"logits/rejected": -2.3337950706481934,
"logps/chosen": -273.3838806152344,
"logps/rejected": -228.57546997070312,
"loss": 0.5202,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.14705790579319,
"rewards/margins": 0.7927185893058777,
"rewards/rejected": -0.939776599407196,
"step": 2610
},
{
"epoch": 2.71,
"learning_rate": 5.4343666283964784e-08,
"logits/chosen": -2.3455982208251953,
"logits/rejected": -2.3253159523010254,
"logps/chosen": -262.5321960449219,
"logps/rejected": -226.5032501220703,
"loss": 0.5441,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.18480037152767181,
"rewards/margins": 0.7010098695755005,
"rewards/rejected": -0.8858101963996887,
"step": 2620
},
{
"epoch": 2.72,
"learning_rate": 5.243015690776884e-08,
"logits/chosen": -2.38588285446167,
"logits/rejected": -2.3365259170532227,
"logps/chosen": -264.8544616699219,
"logps/rejected": -217.23599243164062,
"loss": 0.5238,
"rewards/accuracies": 0.7265625,
"rewards/chosen": -0.13912011682987213,
"rewards/margins": 0.7289184331893921,
"rewards/rejected": -0.8680384755134583,
"step": 2630
},
{
"epoch": 2.73,
"learning_rate": 5.05166475315729e-08,
"logits/chosen": -2.3164820671081543,
"logits/rejected": -2.3066811561584473,
"logps/chosen": -250.55001831054688,
"logps/rejected": -226.17489624023438,
"loss": 0.5331,
"rewards/accuracies": 0.729687511920929,
"rewards/chosen": -0.15784478187561035,
"rewards/margins": 0.6939374804496765,
"rewards/rejected": -0.8517822027206421,
"step": 2640
},
{
"epoch": 2.74,
"learning_rate": 4.860313815537696e-08,
"logits/chosen": -2.4132914543151855,
"logits/rejected": -2.347529411315918,
"logps/chosen": -267.1165771484375,
"logps/rejected": -219.255615234375,
"loss": 0.5299,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.1091521754860878,
"rewards/margins": 0.7415350079536438,
"rewards/rejected": -0.8506871461868286,
"step": 2650
},
{
"epoch": 2.75,
"learning_rate": 4.668962877918101e-08,
"logits/chosen": -2.3676562309265137,
"logits/rejected": -2.3321216106414795,
"logps/chosen": -271.9303894042969,
"logps/rejected": -224.6049346923828,
"loss": 0.4929,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.09524812549352646,
"rewards/margins": 0.8164734840393066,
"rewards/rejected": -0.9117215871810913,
"step": 2660
},
{
"epoch": 2.76,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": -2.3520119190216064,
"logits/rejected": -2.338006019592285,
"logps/chosen": -260.5480041503906,
"logps/rejected": -230.41860961914062,
"loss": 0.5032,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.12855994701385498,
"rewards/margins": 0.8062857389450073,
"rewards/rejected": -0.9348458051681519,
"step": 2670
},
{
"epoch": 2.77,
"learning_rate": 4.2862610026789124e-08,
"logits/chosen": -2.359654664993286,
"logits/rejected": -2.2997984886169434,
"logps/chosen": -267.7292175292969,
"logps/rejected": -227.33132934570312,
"loss": 0.538,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.14395280182361603,
"rewards/margins": 0.735295832157135,
"rewards/rejected": -0.8792486190795898,
"step": 2680
},
{
"epoch": 2.78,
"learning_rate": 4.0949100650593186e-08,
"logits/chosen": -2.4200806617736816,
"logits/rejected": -2.360294818878174,
"logps/chosen": -271.26324462890625,
"logps/rejected": -227.1554412841797,
"loss": 0.535,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.16018937528133392,
"rewards/margins": 0.7067100405693054,
"rewards/rejected": -0.866899311542511,
"step": 2690
},
{
"epoch": 2.79,
"learning_rate": 3.903559127439724e-08,
"logits/chosen": -2.3925411701202393,
"logits/rejected": -2.331263780593872,
"logps/chosen": -271.6239318847656,
"logps/rejected": -231.4758758544922,
"loss": 0.5233,
"rewards/accuracies": 0.754687488079071,
"rewards/chosen": -0.17314036190509796,
"rewards/margins": 0.7330547571182251,
"rewards/rejected": -0.9061950445175171,
"step": 2700
},
{
"epoch": 2.8,
"learning_rate": 3.71220818982013e-08,
"logits/chosen": -2.4014270305633545,
"logits/rejected": -2.355578899383545,
"logps/chosen": -279.74566650390625,
"logps/rejected": -228.99746704101562,
"loss": 0.5655,
"rewards/accuracies": 0.6890624761581421,
"rewards/chosen": -0.2012714445590973,
"rewards/margins": 0.6636210680007935,
"rewards/rejected": -0.8648926019668579,
"step": 2710
},
{
"epoch": 2.81,
"learning_rate": 3.520857252200535e-08,
"logits/chosen": -2.419564723968506,
"logits/rejected": -2.3469746112823486,
"logps/chosen": -271.7889099121094,
"logps/rejected": -229.7169952392578,
"loss": 0.5384,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1927814781665802,
"rewards/margins": 0.7116800546646118,
"rewards/rejected": -0.9044615626335144,
"step": 2720
},
{
"epoch": 2.82,
"learning_rate": 3.3295063145809414e-08,
"logits/chosen": -2.3487582206726074,
"logits/rejected": -2.311703681945801,
"logps/chosen": -273.3060302734375,
"logps/rejected": -253.8658905029297,
"loss": 0.5457,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.18712952733039856,
"rewards/margins": 0.712023138999939,
"rewards/rejected": -0.8991526365280151,
"step": 2730
},
{
"epoch": 2.83,
"learning_rate": 3.138155376961347e-08,
"logits/chosen": -2.3431172370910645,
"logits/rejected": -2.3230600357055664,
"logps/chosen": -256.7168273925781,
"logps/rejected": -227.06924438476562,
"loss": 0.525,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.14321701228618622,
"rewards/margins": 0.7300316095352173,
"rewards/rejected": -0.8732486963272095,
"step": 2740
},
{
"epoch": 2.84,
"learning_rate": 2.9468044393417525e-08,
"logits/chosen": -2.3237102031707764,
"logits/rejected": -2.318800449371338,
"logps/chosen": -260.8058776855469,
"logps/rejected": -226.2300262451172,
"loss": 0.54,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.18601644039154053,
"rewards/margins": 0.6762635111808777,
"rewards/rejected": -0.862280011177063,
"step": 2750
},
{
"epoch": 2.85,
"learning_rate": 2.755453501722158e-08,
"logits/chosen": -2.3754734992980957,
"logits/rejected": -2.337226629257202,
"logps/chosen": -268.50054931640625,
"logps/rejected": -225.01815795898438,
"loss": 0.531,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.15901225805282593,
"rewards/margins": 0.7717152237892151,
"rewards/rejected": -0.930727481842041,
"step": 2760
},
{
"epoch": 2.86,
"learning_rate": 2.564102564102564e-08,
"logits/chosen": -2.372312068939209,
"logits/rejected": -2.3167147636413574,
"logps/chosen": -265.208740234375,
"logps/rejected": -244.6002197265625,
"loss": 0.5552,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.14687782526016235,
"rewards/margins": 0.6419769525527954,
"rewards/rejected": -0.788854718208313,
"step": 2770
},
{
"epoch": 2.87,
"learning_rate": 2.3727516264829695e-08,
"logits/chosen": -2.3352761268615723,
"logits/rejected": -2.3101370334625244,
"logps/chosen": -266.26129150390625,
"logps/rejected": -228.8058624267578,
"loss": 0.5182,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1582275927066803,
"rewards/margins": 0.7328441739082336,
"rewards/rejected": -0.8910716772079468,
"step": 2780
},
{
"epoch": 2.88,
"learning_rate": 2.1814006888633754e-08,
"logits/chosen": -2.34548020362854,
"logits/rejected": -2.3345954418182373,
"logps/chosen": -268.830078125,
"logps/rejected": -234.70590209960938,
"loss": 0.5568,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.19458332657814026,
"rewards/margins": 0.6641789674758911,
"rewards/rejected": -0.8587621450424194,
"step": 2790
},
{
"epoch": 2.89,
"learning_rate": 1.990049751243781e-08,
"logits/chosen": -2.345353126525879,
"logits/rejected": -2.309774875640869,
"logps/chosen": -264.1852111816406,
"logps/rejected": -232.37881469726562,
"loss": 0.5252,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.13199767470359802,
"rewards/margins": 0.7402657270431519,
"rewards/rejected": -0.8722633123397827,
"step": 2800
},
{
"epoch": 2.9,
"learning_rate": 1.7986988136241865e-08,
"logits/chosen": -2.3702383041381836,
"logits/rejected": -2.3145265579223633,
"logps/chosen": -271.8026428222656,
"logps/rejected": -234.308349609375,
"loss": 0.5342,
"rewards/accuracies": 0.7203124761581421,
"rewards/chosen": -0.18479926884174347,
"rewards/margins": 0.7022183537483215,
"rewards/rejected": -0.8870177268981934,
"step": 2810
},
{
"epoch": 2.91,
"learning_rate": 1.6073478760045924e-08,
"logits/chosen": -2.3550527095794678,
"logits/rejected": -2.3468880653381348,
"logps/chosen": -282.18310546875,
"logps/rejected": -234.3654327392578,
"loss": 0.5167,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.06430914252996445,
"rewards/margins": 0.790905237197876,
"rewards/rejected": -0.8552142977714539,
"step": 2820
},
{
"epoch": 2.92,
"learning_rate": 1.4159969383849981e-08,
"logits/chosen": -2.3620452880859375,
"logits/rejected": -2.345167398452759,
"logps/chosen": -280.26470947265625,
"logps/rejected": -235.08993530273438,
"loss": 0.5554,
"rewards/accuracies": 0.7171875238418579,
"rewards/chosen": -0.1752968728542328,
"rewards/margins": 0.6895657777786255,
"rewards/rejected": -0.8648626208305359,
"step": 2830
},
{
"epoch": 2.93,
"learning_rate": 1.2246460007654037e-08,
"logits/chosen": -2.3489487171173096,
"logits/rejected": -2.3238141536712646,
"logps/chosen": -278.1292724609375,
"logps/rejected": -240.13705444335938,
"loss": 0.5593,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.15504539012908936,
"rewards/margins": 0.6761684417724609,
"rewards/rejected": -0.8312137722969055,
"step": 2840
},
{
"epoch": 2.94,
"learning_rate": 1.0332950631458094e-08,
"logits/chosen": -2.389085292816162,
"logits/rejected": -2.364729166030884,
"logps/chosen": -267.89630126953125,
"logps/rejected": -229.4080352783203,
"loss": 0.5274,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.1344664990901947,
"rewards/margins": 0.7971667051315308,
"rewards/rejected": -0.9316331744194031,
"step": 2850
},
{
"epoch": 2.95,
"learning_rate": 8.419441255262151e-09,
"logits/chosen": -2.343165874481201,
"logits/rejected": -2.3094429969787598,
"logps/chosen": -260.5189514160156,
"logps/rejected": -233.33432006835938,
"loss": 0.5264,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.17281007766723633,
"rewards/margins": 0.7437566518783569,
"rewards/rejected": -0.9165668487548828,
"step": 2860
},
{
"epoch": 2.96,
"learning_rate": 6.505931879066207e-09,
"logits/chosen": -2.3336236476898193,
"logits/rejected": -2.321570873260498,
"logps/chosen": -278.14080810546875,
"logps/rejected": -233.65365600585938,
"loss": 0.5251,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.09798727929592133,
"rewards/margins": 0.7966296076774597,
"rewards/rejected": -0.8946169018745422,
"step": 2870
},
{
"epoch": 2.97,
"learning_rate": 4.592422502870264e-09,
"logits/chosen": -2.3987810611724854,
"logits/rejected": -2.3644556999206543,
"logps/chosen": -280.18609619140625,
"logps/rejected": -233.4442138671875,
"loss": 0.5218,
"rewards/accuracies": 0.739062488079071,
"rewards/chosen": -0.1451808661222458,
"rewards/margins": 0.7559449076652527,
"rewards/rejected": -0.9011257886886597,
"step": 2880
},
{
"epoch": 2.98,
"learning_rate": 2.6789131266743202e-09,
"logits/chosen": -2.3656845092773438,
"logits/rejected": -2.3112010955810547,
"logps/chosen": -255.6254425048828,
"logps/rejected": -207.92041015625,
"loss": 0.5262,
"rewards/accuracies": 0.7359374761581421,
"rewards/chosen": -0.161960169672966,
"rewards/margins": 0.7441297769546509,
"rewards/rejected": -0.9060899615287781,
"step": 2890
},
{
"epoch": 3.0,
"learning_rate": 7.654037504783773e-10,
"logits/chosen": -2.371070384979248,
"logits/rejected": -2.3036046028137207,
"logps/chosen": -267.93048095703125,
"logps/rejected": -234.49667358398438,
"loss": 0.5172,
"rewards/accuracies": 0.7328125238418579,
"rewards/chosen": -0.12272216379642487,
"rewards/margins": 0.7688759565353394,
"rewards/rejected": -0.891598105430603,
"step": 2900
},
{
"epoch": 3.0,
"eval_logits/chosen": -2.020789384841919,
"eval_logits/rejected": -1.9665637016296387,
"eval_logps/chosen": -266.1182556152344,
"eval_logps/rejected": -232.68226623535156,
"eval_loss": 0.5263917446136475,
"eval_rewards/accuracies": 0.734499990940094,
"eval_rewards/chosen": -0.15599758923053741,
"eval_rewards/margins": 0.7454450726509094,
"eval_rewards/rejected": -0.9014427661895752,
"eval_runtime": 1638.9888,
"eval_samples_per_second": 1.22,
"eval_steps_per_second": 0.305,
"step": 2904
},
{
"epoch": 3.0,
"step": 2904,
"total_flos": 0.0,
"train_loss": 0.5636412144405126,
"train_runtime": 257163.3959,
"train_samples_per_second": 0.723,
"train_steps_per_second": 0.011
}
],
"logging_steps": 10,
"max_steps": 2904,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}