{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9993222089532967, "eval_steps": 100, "global_step": 2904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -2.4422173500061035, "logits/rejected": -2.526975631713867, "logps/chosen": -235.28317260742188, "logps/rejected": -214.19320678710938, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.4839749336242676, "logits/rejected": -2.4228153228759766, "logps/chosen": -280.0798034667969, "logps/rejected": -230.15765380859375, "loss": 0.6934, "rewards/accuracies": 0.4600694477558136, "rewards/chosen": -7.412416744045913e-05, "rewards/margins": 0.0002167515631299466, "rewards/rejected": -0.0002908758178818971, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.414851188659668, "logits/rejected": -2.354247570037842, "logps/chosen": -255.57260131835938, "logps/rejected": -226.37820434570312, "loss": 0.6956, "rewards/accuracies": 0.4765625, "rewards/chosen": -0.0009197852341458201, "rewards/margins": -0.004096911288797855, "rewards/rejected": 0.003177126171067357, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.4241952896118164, "logits/rejected": -2.400988817214966, "logps/chosen": -272.502197265625, "logps/rejected": -227.431884765625, "loss": 0.6943, "rewards/accuracies": 0.484375, "rewards/chosen": -0.0013401806354522705, "rewards/margins": -0.0016240004915744066, "rewards/rejected": 0.0002838193904608488, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.4217023849487305, "logits/rejected": -2.3694050312042236, "logps/chosen": -249.1688995361328, "logps/rejected": -220.63821411132812, "loss": 0.6953, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.0008647952345199883, "rewards/margins": -0.0035088094882667065, "rewards/rejected": 0.002644014311954379, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.457735538482666, "logits/rejected": -2.413196563720703, "logps/chosen": -259.66912841796875, "logps/rejected": -220.27700805664062, "loss": 0.694, "rewards/accuracies": 0.484375, "rewards/chosen": 0.0021845095325261354, "rewards/margins": -0.0009308269363828003, "rewards/rejected": 0.003115336410701275, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.4587912559509277, "logits/rejected": -2.4032034873962402, "logps/chosen": -258.9931945800781, "logps/rejected": -228.43301391601562, "loss": 0.6937, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.00026782663189806044, "rewards/margins": -0.00029953793273307383, "rewards/rejected": 0.0005673646228387952, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -2.438084840774536, "logits/rejected": -2.4171149730682373, "logps/chosen": -267.3536071777344, "logps/rejected": -210.99343872070312, "loss": 0.6927, "rewards/accuracies": 0.515625, "rewards/chosen": 0.0028153976891189814, "rewards/margins": 0.0016862023621797562, "rewards/rejected": 0.0011291948612779379, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.449380397796631, "logits/rejected": -2.3840742111206055, "logps/chosen": -280.45050048828125, "logps/rejected": -225.0115966796875, "loss": 0.6925, "rewards/accuracies": 0.5390625, "rewards/chosen": 0.003211159957572818, "rewards/margins": 0.0021671659778803587, "rewards/rejected": 0.0010439944453537464, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -2.4744973182678223, "logits/rejected": -2.3940176963806152, "logps/chosen": -271.45843505859375, "logps/rejected": -231.62643432617188, "loss": 0.6919, "rewards/accuracies": 0.520312488079071, "rewards/chosen": 0.0016121773514896631, "rewards/margins": 0.0033598211593925953, "rewards/rejected": -0.0017476438079029322, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.489611864089966, "logits/rejected": -2.3956551551818848, "logps/chosen": -264.9837646484375, "logps/rejected": -215.3820343017578, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": 0.0020506351720541716, "rewards/margins": 0.0036049727350473404, "rewards/rejected": -0.0015543376794084907, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.4358983039855957, "logits/rejected": -2.3630106449127197, "logps/chosen": -283.7451477050781, "logps/rejected": -214.5529327392578, "loss": 0.6887, "rewards/accuracies": 0.557812511920929, "rewards/chosen": 0.006171266548335552, "rewards/margins": 0.00967160053551197, "rewards/rejected": -0.003500334918498993, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.4519553184509277, "logits/rejected": -2.4237570762634277, "logps/chosen": -271.7016906738281, "logps/rejected": -229.41964721679688, "loss": 0.6911, "rewards/accuracies": 0.5140625238418579, "rewards/chosen": 0.0020948010496795177, "rewards/margins": 0.004834444727748632, "rewards/rejected": -0.0027396436780691147, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -2.448523998260498, "logits/rejected": -2.3815808296203613, "logps/chosen": -267.52691650390625, "logps/rejected": -212.1881866455078, "loss": 0.6888, "rewards/accuracies": 0.5796874761581421, "rewards/chosen": 0.005649151746183634, "rewards/margins": 0.009708194993436337, "rewards/rejected": -0.00405904371291399, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.46962308883667, "logits/rejected": -2.4132840633392334, "logps/chosen": -274.2135009765625, "logps/rejected": -212.12222290039062, "loss": 0.6881, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": 0.005823948420584202, "rewards/margins": 0.01115177758038044, "rewards/rejected": -0.0053278305567801, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -2.4161739349365234, "logits/rejected": -2.3815319538116455, "logps/chosen": -250.0603485107422, "logps/rejected": -210.3913116455078, "loss": 0.6891, "rewards/accuracies": 0.5625, "rewards/chosen": 0.005285581108182669, "rewards/margins": 0.009132949635386467, "rewards/rejected": -0.003847368760034442, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.437349557876587, "logits/rejected": -2.374871015548706, "logps/chosen": -267.4647521972656, "logps/rejected": -218.4558563232422, "loss": 0.6883, "rewards/accuracies": 0.59375, "rewards/chosen": 0.004300659988075495, "rewards/margins": 0.010579666122794151, "rewards/rejected": -0.006279005669057369, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.400257110595703, "logits/rejected": -2.368004083633423, "logps/chosen": -277.8876953125, "logps/rejected": -221.78640747070312, "loss": 0.6878, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.005591097287833691, "rewards/margins": 0.011688882485032082, "rewards/rejected": -0.006097783800214529, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -2.4092414379119873, "logits/rejected": -2.376796007156372, "logps/chosen": -271.3955993652344, "logps/rejected": -226.97055053710938, "loss": 0.6844, "rewards/accuracies": 0.598437488079071, "rewards/chosen": 0.008772986009716988, "rewards/margins": 0.018974503502249718, "rewards/rejected": -0.010201516561210155, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.4419052600860596, "logits/rejected": -2.3903238773345947, "logps/chosen": -276.5497131347656, "logps/rejected": -222.6400604248047, "loss": 0.685, "rewards/accuracies": 0.5921875238418579, "rewards/chosen": 0.011585086584091187, "rewards/margins": 0.017912257462739944, "rewards/rejected": -0.006327168550342321, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -2.419137477874756, "logits/rejected": -2.3881921768188477, "logps/chosen": -249.6503448486328, "logps/rejected": -214.3955078125, "loss": 0.6802, "rewards/accuracies": 0.6265624761581421, "rewards/chosen": 0.014122622087597847, "rewards/margins": 0.027482766658067703, "rewards/rejected": -0.01336014736443758, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.4479005336761475, "logits/rejected": -2.4040849208831787, "logps/chosen": -270.5887145996094, "logps/rejected": -235.2773895263672, "loss": 0.6807, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.011653213761746883, "rewards/margins": 0.026884321123361588, "rewards/rejected": -0.01523110456764698, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.4104199409484863, "logits/rejected": -2.3601438999176025, "logps/chosen": -263.31024169921875, "logps/rejected": -219.03466796875, "loss": 0.6787, "rewards/accuracies": 0.6796875, "rewards/chosen": 0.013443564996123314, "rewards/margins": 0.03105132281780243, "rewards/rejected": -0.017607757821679115, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -2.406188726425171, "logits/rejected": -2.3706843852996826, "logps/chosen": -273.1360778808594, "logps/rejected": -231.5634765625, "loss": 0.6728, "rewards/accuracies": 0.6390625238418579, "rewards/chosen": 0.01533445529639721, "rewards/margins": 0.0440392941236496, "rewards/rejected": -0.02870483696460724, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -2.4351398944854736, "logits/rejected": -2.393178701400757, "logps/chosen": -271.65087890625, "logps/rejected": -233.6895751953125, "loss": 0.6708, "rewards/accuracies": 0.660937488079071, "rewards/chosen": 0.02181144617497921, "rewards/margins": 0.04818682745099068, "rewards/rejected": -0.026375379413366318, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -2.4275219440460205, "logits/rejected": -2.455544948577881, "logps/chosen": -264.905517578125, "logps/rejected": -225.69180297851562, "loss": 0.6707, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": 0.020589668303728104, "rewards/margins": 0.04932643473148346, "rewards/rejected": -0.028736764565110207, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.4035658836364746, "logits/rejected": -2.4021072387695312, "logps/chosen": -249.67953491210938, "logps/rejected": -204.56887817382812, "loss": 0.6655, "rewards/accuracies": 0.671875, "rewards/chosen": 0.01743764989078045, "rewards/margins": 0.06085206940770149, "rewards/rejected": -0.043414413928985596, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.4608845710754395, "logits/rejected": -2.433506488800049, "logps/chosen": -281.73260498046875, "logps/rejected": -224.5501251220703, "loss": 0.6585, "rewards/accuracies": 0.692187488079071, "rewards/chosen": 0.0280983354896307, "rewards/margins": 0.07657264918088913, "rewards/rejected": -0.04847431182861328, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -2.4379546642303467, "logits/rejected": -2.374706745147705, "logps/chosen": -266.258544921875, "logps/rejected": -217.6811981201172, "loss": 0.6557, "rewards/accuracies": 0.698437511920929, "rewards/chosen": 0.02859863080084324, "rewards/margins": 0.08380897343158722, "rewards/rejected": -0.05521036311984062, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.449183940887451, "logits/rejected": -2.4152982234954834, "logps/chosen": -251.42617797851562, "logps/rejected": -206.6394805908203, "loss": 0.6567, "rewards/accuracies": 0.6703125238418579, "rewards/chosen": 0.018911005929112434, "rewards/margins": 0.08338409662246704, "rewards/rejected": -0.06447309255599976, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982778415614236e-07, "logits/chosen": -2.431877374649048, "logits/rejected": -2.3985190391540527, "logps/chosen": -258.9649353027344, "logps/rejected": -214.5379180908203, "loss": 0.6557, "rewards/accuracies": 0.6578124761581421, "rewards/chosen": 0.022992964833974838, "rewards/margins": 0.08744792640209198, "rewards/rejected": -0.06445495784282684, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963643321852277e-07, "logits/chosen": -2.4291908740997314, "logits/rejected": -2.3937199115753174, "logps/chosen": -264.531005859375, "logps/rejected": -223.732177734375, "loss": 0.6499, "rewards/accuracies": 0.690625011920929, "rewards/chosen": 0.026863668113946915, "rewards/margins": 0.10220368206501007, "rewards/rejected": -0.07534001767635345, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944508228090318e-07, "logits/chosen": -2.4233272075653076, "logits/rejected": -2.3791823387145996, "logps/chosen": -268.5234375, "logps/rejected": -215.85610961914062, "loss": 0.6425, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.03440080210566521, "rewards/margins": 0.11941323429346085, "rewards/rejected": -0.08501242101192474, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.925373134328357e-07, "logits/chosen": -2.4789443016052246, "logits/rejected": -2.4048690795898438, "logps/chosen": -266.273681640625, "logps/rejected": -230.77957153320312, "loss": 0.6404, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": 0.036819975823163986, "rewards/margins": 0.12693271040916443, "rewards/rejected": -0.09011274576187134, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906238040566398e-07, "logits/chosen": -2.4333298206329346, "logits/rejected": -2.3658194541931152, "logps/chosen": -252.1578826904297, "logps/rejected": -221.54611206054688, "loss": 0.6406, "rewards/accuracies": 0.692187488079071, "rewards/chosen": 0.02940729819238186, "rewards/margins": 0.12888944149017334, "rewards/rejected": -0.09948214888572693, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.887102946804438e-07, "logits/chosen": -2.4523606300354004, "logits/rejected": -2.388826847076416, "logps/chosen": -263.3428039550781, "logps/rejected": -218.86074829101562, "loss": 0.6345, "rewards/accuracies": 0.667187511920929, "rewards/chosen": 0.022628765553236008, "rewards/margins": 0.14625979959964752, "rewards/rejected": -0.12363102287054062, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.867967853042479e-07, "logits/chosen": -2.450970411300659, "logits/rejected": -2.3968029022216797, "logps/chosen": -267.2438049316406, "logps/rejected": -222.943359375, "loss": 0.6252, "rewards/accuracies": 0.684374988079071, "rewards/chosen": 0.03826092556118965, "rewards/margins": 0.1685018539428711, "rewards/rejected": -0.13024093210697174, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.84883275928052e-07, "logits/chosen": -2.4290268421173096, "logits/rejected": -2.417224645614624, "logps/chosen": -266.91693115234375, "logps/rejected": -235.718017578125, "loss": 0.6357, "rewards/accuracies": 0.660937488079071, "rewards/chosen": 0.016346175223588943, "rewards/margins": 0.1523023247718811, "rewards/rejected": -0.13595613837242126, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.82969766551856e-07, "logits/chosen": -2.412705421447754, "logits/rejected": -2.374624013900757, "logps/chosen": -261.72271728515625, "logps/rejected": -229.1892547607422, "loss": 0.635, "rewards/accuracies": 0.6640625, "rewards/chosen": 0.012038780376315117, "rewards/margins": 0.15452785789966583, "rewards/rejected": -0.14248906075954437, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810562571756601e-07, "logits/chosen": -2.453866720199585, "logits/rejected": -2.3835487365722656, "logps/chosen": -263.36273193359375, "logps/rejected": -218.2621307373047, "loss": 0.6147, "rewards/accuracies": 0.707812488079071, "rewards/chosen": 0.04369325190782547, "rewards/margins": 0.19822198152542114, "rewards/rejected": -0.15452872216701508, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791427477994642e-07, "logits/chosen": -2.446223258972168, "logits/rejected": -2.4049808979034424, "logps/chosen": -270.06732177734375, "logps/rejected": -222.06930541992188, "loss": 0.6228, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": 0.036653708666563034, "rewards/margins": 0.1898169219493866, "rewards/rejected": -0.15316320955753326, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772292384232682e-07, "logits/chosen": -2.4419703483581543, "logits/rejected": -2.409404754638672, "logps/chosen": -261.24114990234375, "logps/rejected": -230.6668701171875, "loss": 0.627, "rewards/accuracies": 0.653124988079071, "rewards/chosen": 0.020566729828715324, "rewards/margins": 0.1889314353466034, "rewards/rejected": -0.1683647185564041, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753157290470723e-07, "logits/chosen": -2.4552600383758545, "logits/rejected": -2.4268581867218018, "logps/chosen": -276.3377990722656, "logps/rejected": -226.77072143554688, "loss": 0.6127, "rewards/accuracies": 0.698437511920929, "rewards/chosen": 0.02126963995397091, "rewards/margins": 0.22278845310211182, "rewards/rejected": -0.20151881873607635, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7340221967087635e-07, "logits/chosen": -2.413886308670044, "logits/rejected": -2.3775603771209717, "logps/chosen": -250.6868896484375, "logps/rejected": -207.9862518310547, "loss": 0.6152, "rewards/accuracies": 0.682812511920929, "rewards/chosen": 0.01281009428203106, "rewards/margins": 0.20990212261676788, "rewards/rejected": -0.19709204137325287, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.714887102946804e-07, "logits/chosen": -2.4519877433776855, "logits/rejected": -2.3820528984069824, "logps/chosen": -274.48419189453125, "logps/rejected": -226.1211395263672, "loss": 0.6002, "rewards/accuracies": 0.7265625, "rewards/chosen": 0.03596062213182449, "rewards/margins": 0.2432943880558014, "rewards/rejected": -0.20733380317687988, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.6957520091848447e-07, "logits/chosen": -2.4627573490142822, "logits/rejected": -2.424017906188965, "logps/chosen": -262.072509765625, "logps/rejected": -227.0179443359375, "loss": 0.6138, "rewards/accuracies": 0.6640625, "rewards/chosen": 0.0052271028980612755, "rewards/margins": 0.22640076279640198, "rewards/rejected": -0.22117361426353455, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.6766169154228853e-07, "logits/chosen": -2.419257640838623, "logits/rejected": -2.350435733795166, "logps/chosen": -256.357421875, "logps/rejected": -219.219970703125, "loss": 0.6211, "rewards/accuracies": 0.651562511920929, "rewards/chosen": -0.010133005678653717, "rewards/margins": 0.2125161588191986, "rewards/rejected": -0.22264917194843292, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.657481821660926e-07, "logits/chosen": -2.466386079788208, "logits/rejected": -2.42856764793396, "logps/chosen": -279.46905517578125, "logps/rejected": -232.4190673828125, "loss": 0.6076, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.01027261558920145, "rewards/margins": 0.24917173385620117, "rewards/rejected": -0.2388991117477417, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.6383467278989666e-07, "logits/chosen": -2.3757636547088623, "logits/rejected": -2.4026846885681152, "logps/chosen": -262.5502624511719, "logps/rejected": -227.03671264648438, "loss": 0.5955, "rewards/accuracies": 0.7109375, "rewards/chosen": 0.036844007670879364, "rewards/margins": 0.2827877700328827, "rewards/rejected": -0.24594378471374512, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6192116341370067e-07, "logits/chosen": -2.4304065704345703, "logits/rejected": -2.3491933345794678, "logps/chosen": -263.8543395996094, "logps/rejected": -223.7662353515625, "loss": 0.5863, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.016529660671949387, "rewards/margins": 0.30523234605789185, "rewards/rejected": -0.28870272636413574, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.6000765403750473e-07, "logits/chosen": -2.3874869346618652, "logits/rejected": -2.369131088256836, "logps/chosen": -273.19482421875, "logps/rejected": -219.20993041992188, "loss": 0.5999, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": 0.016616690903902054, "rewards/margins": 0.27095186710357666, "rewards/rejected": -0.2543351948261261, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.580941446613088e-07, "logits/chosen": -2.432959794998169, "logits/rejected": -2.4002068042755127, "logps/chosen": -268.3512268066406, "logps/rejected": -213.71481323242188, "loss": 0.5767, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": 0.02091902121901512, "rewards/margins": 0.3344683051109314, "rewards/rejected": -0.3135492205619812, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.5618063528511285e-07, "logits/chosen": -2.423337459564209, "logits/rejected": -2.367553234100342, "logps/chosen": -270.74822998046875, "logps/rejected": -229.89859008789062, "loss": 0.5871, "rewards/accuracies": 0.715624988079071, "rewards/chosen": 0.01809154823422432, "rewards/margins": 0.32776904106140137, "rewards/rejected": -0.30967751145362854, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.542671259089169e-07, "logits/chosen": -2.407766103744507, "logits/rejected": -2.3573992252349854, "logps/chosen": -272.4050598144531, "logps/rejected": -231.3518524169922, "loss": 0.592, "rewards/accuracies": 0.690625011920929, "rewards/chosen": 0.009805982932448387, "rewards/margins": 0.31897610425949097, "rewards/rejected": -0.309170126914978, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.52353616532721e-07, "logits/chosen": -2.438194990158081, "logits/rejected": -2.3653242588043213, "logps/chosen": -265.61004638671875, "logps/rejected": -224.57614135742188, "loss": 0.5941, "rewards/accuracies": 0.692187488079071, "rewards/chosen": -0.03684517741203308, "rewards/margins": 0.3040197789669037, "rewards/rejected": -0.34086498618125916, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.5044010715652504e-07, "logits/chosen": -2.454868793487549, "logits/rejected": -2.4135568141937256, "logps/chosen": -262.34844970703125, "logps/rejected": -231.43362426757812, "loss": 0.598, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.0009088374790735543, "rewards/margins": 0.2974187731742859, "rewards/rejected": -0.29832762479782104, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.485265977803291e-07, "logits/chosen": -2.378328800201416, "logits/rejected": -2.380078077316284, "logps/chosen": -268.5691833496094, "logps/rejected": -231.4857940673828, "loss": 0.5899, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.003624826669692993, "rewards/margins": 0.3238300085067749, "rewards/rejected": -0.3202051818370819, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.4661308840413316e-07, "logits/chosen": -2.4507269859313965, "logits/rejected": -2.4167442321777344, "logps/chosen": -268.0180969238281, "logps/rejected": -222.7704315185547, "loss": 0.5739, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": 0.004687085747718811, "rewards/margins": 0.3652251660823822, "rewards/rejected": -0.3605380654335022, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.446995790279372e-07, "logits/chosen": -2.4484550952911377, "logits/rejected": -2.3759899139404297, "logps/chosen": -270.5180969238281, "logps/rejected": -231.528564453125, "loss": 0.5729, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.012805086560547352, "rewards/margins": 0.3726140558719635, "rewards/rejected": -0.3598089814186096, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4278606965174123e-07, "logits/chosen": -2.441225528717041, "logits/rejected": -2.3825039863586426, "logps/chosen": -271.6439514160156, "logps/rejected": -223.9091033935547, "loss": 0.5831, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.006787353660911322, "rewards/margins": 0.36242157220840454, "rewards/rejected": -0.3692089319229126, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.408725602755453e-07, "logits/chosen": -2.4532737731933594, "logits/rejected": -2.4243369102478027, "logps/chosen": -266.54681396484375, "logps/rejected": -231.792236328125, "loss": 0.5821, "rewards/accuracies": 0.703125, "rewards/chosen": -0.014241993427276611, "rewards/margins": 0.35581323504447937, "rewards/rejected": -0.3700551986694336, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3895905089934936e-07, "logits/chosen": -2.441526412963867, "logits/rejected": -2.4337775707244873, "logps/chosen": -280.2122497558594, "logps/rejected": -237.7913360595703, "loss": 0.5681, "rewards/accuracies": 0.7109375, "rewards/chosen": 0.010852565988898277, "rewards/margins": 0.39948010444641113, "rewards/rejected": -0.3886275291442871, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.370455415231534e-07, "logits/chosen": -2.4393959045410156, "logits/rejected": -2.3926703929901123, "logps/chosen": -257.3842468261719, "logps/rejected": -213.9781494140625, "loss": 0.575, "rewards/accuracies": 0.703125, "rewards/chosen": 8.446201536571607e-05, "rewards/margins": 0.40542498230934143, "rewards/rejected": -0.4053404927253723, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.351320321469575e-07, "logits/chosen": -2.4491591453552246, "logits/rejected": -2.398932933807373, "logps/chosen": -269.54095458984375, "logps/rejected": -223.14892578125, "loss": 0.5835, "rewards/accuracies": 0.703125, "rewards/chosen": -0.04456017538905144, "rewards/margins": 0.3685937225818634, "rewards/rejected": -0.41315382719039917, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3321852277076154e-07, "logits/chosen": -2.4231886863708496, "logits/rejected": -2.3941125869750977, "logps/chosen": -278.4034729003906, "logps/rejected": -239.0082244873047, "loss": 0.5597, "rewards/accuracies": 0.71875, "rewards/chosen": -0.026434283703565598, "rewards/margins": 0.431147962808609, "rewards/rejected": -0.4575822949409485, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313050133945656e-07, "logits/chosen": -2.4560158252716064, "logits/rejected": -2.3756983280181885, "logps/chosen": -274.8756103515625, "logps/rejected": -228.38619995117188, "loss": 0.5685, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.037291474640369415, "rewards/margins": 0.42679017782211304, "rewards/rejected": -0.46408161520957947, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2939150401836967e-07, "logits/chosen": -2.4025585651397705, "logits/rejected": -2.362170934677124, "logps/chosen": -279.28619384765625, "logps/rejected": -235.0756378173828, "loss": 0.5796, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.03398103266954422, "rewards/margins": 0.38851290941238403, "rewards/rejected": -0.42249393463134766, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.2747799464217373e-07, "logits/chosen": -2.3948450088500977, "logits/rejected": -2.380448818206787, "logps/chosen": -265.87774658203125, "logps/rejected": -221.30856323242188, "loss": 0.5815, "rewards/accuracies": 0.6890624761581421, "rewards/chosen": -0.02471747435629368, "rewards/margins": 0.38041889667510986, "rewards/rejected": -0.4051364064216614, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.255644852659778e-07, "logits/chosen": -2.428434371948242, "logits/rejected": -2.3654212951660156, "logps/chosen": -265.0726013183594, "logps/rejected": -230.83480834960938, "loss": 0.5846, "rewards/accuracies": 0.6875, "rewards/chosen": -0.058827854692935944, "rewards/margins": 0.39134788513183594, "rewards/rejected": -0.4501757025718689, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.236509758897818e-07, "logits/chosen": -2.4110920429229736, "logits/rejected": -2.3683831691741943, "logps/chosen": -263.7908935546875, "logps/rejected": -228.82852172851562, "loss": 0.56, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.040988270193338394, "rewards/margins": 0.4240415692329407, "rewards/rejected": -0.4650298058986664, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2173746651358586e-07, "logits/chosen": -2.487917423248291, "logits/rejected": -2.386542797088623, "logps/chosen": -277.84173583984375, "logps/rejected": -226.375732421875, "loss": 0.5783, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0548752136528492, "rewards/margins": 0.41838139295578003, "rewards/rejected": -0.4732566475868225, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.198239571373899e-07, "logits/chosen": -2.4249427318573, "logits/rejected": -2.397531270980835, "logps/chosen": -280.85748291015625, "logps/rejected": -239.29263305664062, "loss": 0.5849, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.060692138969898224, "rewards/margins": 0.412163645029068, "rewards/rejected": -0.472855806350708, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.17910447761194e-07, "logits/chosen": -2.441291332244873, "logits/rejected": -2.3523077964782715, "logps/chosen": -252.23779296875, "logps/rejected": -217.0404510498047, "loss": 0.5835, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.06024731323122978, "rewards/margins": 0.37643498182296753, "rewards/rejected": -0.4366822838783264, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.1599693838499805e-07, "logits/chosen": -2.3433680534362793, "logits/rejected": -2.340506076812744, "logps/chosen": -248.30313110351562, "logps/rejected": -215.12033081054688, "loss": 0.5863, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.08882372081279755, "rewards/margins": 0.3825678527355194, "rewards/rejected": -0.47139161825180054, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.140834290088021e-07, "logits/chosen": -2.3932948112487793, "logits/rejected": -2.3713467121124268, "logps/chosen": -246.66860961914062, "logps/rejected": -206.83572387695312, "loss": 0.5667, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.05001844838261604, "rewards/margins": 0.4407244324684143, "rewards/rejected": -0.49074286222457886, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.121699196326062e-07, "logits/chosen": -2.3883261680603027, "logits/rejected": -2.3786678314208984, "logps/chosen": -266.27886962890625, "logps/rejected": -225.5245819091797, "loss": 0.5482, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.022055109962821007, "rewards/margins": 0.4830406606197357, "rewards/rejected": -0.5050958395004272, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.1025641025641024e-07, "logits/chosen": -2.4633374214172363, "logits/rejected": -2.3861355781555176, "logps/chosen": -269.9394836425781, "logps/rejected": -229.15283203125, "loss": 0.5616, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.0641530454158783, "rewards/margins": 0.4700705409049988, "rewards/rejected": -0.5342236161231995, "step": 760 }, { "epoch": 0.8, "learning_rate": 4.083429008802143e-07, "logits/chosen": -2.394104480743408, "logits/rejected": -2.379462718963623, "logps/chosen": -265.15008544921875, "logps/rejected": -216.9358367919922, "loss": 0.5678, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.05918584018945694, "rewards/margins": 0.4691968560218811, "rewards/rejected": -0.5283826589584351, "step": 770 }, { "epoch": 0.81, "learning_rate": 4.0642939150401836e-07, "logits/chosen": -2.439371109008789, "logits/rejected": -2.3606810569763184, "logps/chosen": -263.8412780761719, "logps/rejected": -222.4286346435547, "loss": 0.5684, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.07698424160480499, "rewards/margins": 0.44548702239990234, "rewards/rejected": -0.5224713087081909, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.0451588212782237e-07, "logits/chosen": -2.4030070304870605, "logits/rejected": -2.3470616340637207, "logps/chosen": -277.69207763671875, "logps/rejected": -241.84603881835938, "loss": 0.5782, "rewards/accuracies": 0.703125, "rewards/chosen": -0.08946071565151215, "rewards/margins": 0.44598451256752014, "rewards/rejected": -0.5354452729225159, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0260237275162643e-07, "logits/chosen": -2.4240808486938477, "logits/rejected": -2.379459857940674, "logps/chosen": -267.9504699707031, "logps/rejected": -237.7564239501953, "loss": 0.5631, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1054706797003746, "rewards/margins": 0.47193509340286255, "rewards/rejected": -0.5774057507514954, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.006888633754305e-07, "logits/chosen": -2.4196529388427734, "logits/rejected": -2.378551959991455, "logps/chosen": -265.9372253417969, "logps/rejected": -226.904296875, "loss": 0.5845, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.06137767434120178, "rewards/margins": 0.43556785583496094, "rewards/rejected": -0.4969455301761627, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9877535399923456e-07, "logits/chosen": -2.418703317642212, "logits/rejected": -2.3797688484191895, "logps/chosen": -272.62408447265625, "logps/rejected": -233.73880004882812, "loss": 0.5658, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.07619120925664902, "rewards/margins": 0.48392266035079956, "rewards/rejected": -0.5601138472557068, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.968618446230386e-07, "logits/chosen": -2.4377353191375732, "logits/rejected": -2.3761370182037354, "logps/chosen": -264.58331298828125, "logps/rejected": -227.14138793945312, "loss": 0.5727, "rewards/accuracies": 0.692187488079071, "rewards/chosen": -0.07739187777042389, "rewards/margins": 0.45683830976486206, "rewards/rejected": -0.5342302918434143, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.949483352468427e-07, "logits/chosen": -2.4207470417022705, "logits/rejected": -2.3555076122283936, "logps/chosen": -254.27468872070312, "logps/rejected": -230.114013671875, "loss": 0.5806, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.09545855224132538, "rewards/margins": 0.434993177652359, "rewards/rejected": -0.5304517149925232, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9303482587064674e-07, "logits/chosen": -2.408648729324341, "logits/rejected": -2.392381429672241, "logps/chosen": -268.1350402832031, "logps/rejected": -232.40829467773438, "loss": 0.5628, "rewards/accuracies": 0.71875, "rewards/chosen": -0.10538534820079803, "rewards/margins": 0.4874357283115387, "rewards/rejected": -0.5928210616111755, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.911213164944508e-07, "logits/chosen": -2.4571163654327393, "logits/rejected": -2.4369616508483887, "logps/chosen": -260.79400634765625, "logps/rejected": -234.09521484375, "loss": 0.5797, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.08784712105989456, "rewards/margins": 0.4429641664028168, "rewards/rejected": -0.5308112502098083, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8920780711825487e-07, "logits/chosen": -2.4167487621307373, "logits/rejected": -2.3556063175201416, "logps/chosen": -264.9407653808594, "logps/rejected": -234.77792358398438, "loss": 0.5627, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.09353096783161163, "rewards/margins": 0.47954684495925903, "rewards/rejected": -0.5730777978897095, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.8729429774205893e-07, "logits/chosen": -2.4163200855255127, "logits/rejected": -2.3464267253875732, "logps/chosen": -258.3912048339844, "logps/rejected": -219.3795623779297, "loss": 0.5831, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.13110245764255524, "rewards/margins": 0.43385767936706543, "rewards/rejected": -0.5649601221084595, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8538078836586294e-07, "logits/chosen": -2.4255287647247314, "logits/rejected": -2.355109453201294, "logps/chosen": -283.6699523925781, "logps/rejected": -235.8437957763672, "loss": 0.5491, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.07336901128292084, "rewards/margins": 0.5329583883285522, "rewards/rejected": -0.6063274145126343, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.83467278989667e-07, "logits/chosen": -2.44077205657959, "logits/rejected": -2.361696481704712, "logps/chosen": -273.6830749511719, "logps/rejected": -237.58401489257812, "loss": 0.5821, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.09832929819822311, "rewards/margins": 0.45769548416137695, "rewards/rejected": -0.5560247302055359, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.8155376961347106e-07, "logits/chosen": -2.424898624420166, "logits/rejected": -2.3718905448913574, "logps/chosen": -268.20269775390625, "logps/rejected": -231.1397247314453, "loss": 0.5609, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.0910768061876297, "rewards/margins": 0.4992894232273102, "rewards/rejected": -0.5903662443161011, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.796402602372751e-07, "logits/chosen": -2.4363582134246826, "logits/rejected": -2.377957582473755, "logps/chosen": -276.6056213378906, "logps/rejected": -233.3043975830078, "loss": 0.5433, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.05291268229484558, "rewards/margins": 0.5536981821060181, "rewards/rejected": -0.606610894203186, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.777267508610792e-07, "logits/chosen": -2.3768372535705566, "logits/rejected": -2.393470287322998, "logps/chosen": -272.34576416015625, "logps/rejected": -224.0140838623047, "loss": 0.5594, "rewards/accuracies": 0.703125, "rewards/chosen": -0.10874155908823013, "rewards/margins": 0.5176196694374084, "rewards/rejected": -0.6263612508773804, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7581324148488325e-07, "logits/chosen": -2.4112918376922607, "logits/rejected": -2.388984441757202, "logps/chosen": -273.1308898925781, "logps/rejected": -239.2963409423828, "loss": 0.5473, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.1055203229188919, "rewards/margins": 0.545116126537323, "rewards/rejected": -0.6506363749504089, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.738997321086873e-07, "logits/chosen": -2.3986384868621826, "logits/rejected": -2.3761980533599854, "logps/chosen": -268.1097106933594, "logps/rejected": -234.03213500976562, "loss": 0.5697, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.10402411222457886, "rewards/margins": 0.4980190396308899, "rewards/rejected": -0.6020431518554688, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7198622273249137e-07, "logits/chosen": -2.408790111541748, "logits/rejected": -2.36995267868042, "logps/chosen": -284.46514892578125, "logps/rejected": -238.64352416992188, "loss": 0.5648, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.10631656646728516, "rewards/margins": 0.5066617131233215, "rewards/rejected": -0.6129782795906067, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -2.0685439109802246, "eval_logits/rejected": -2.015035390853882, "eval_logps/chosen": -265.6653137207031, "eval_logps/rejected": -230.13473510742188, "eval_loss": 0.5547605752944946, "eval_rewards/accuracies": 0.7114999890327454, "eval_rewards/chosen": -0.11070162057876587, "eval_rewards/margins": 0.5359883904457092, "eval_rewards/rejected": -0.6466900110244751, "eval_runtime": 1658.2887, "eval_samples_per_second": 1.206, "eval_steps_per_second": 0.302, "step": 968 }, { "epoch": 1.0, "learning_rate": 3.7007271335629544e-07, "logits/chosen": -2.44268798828125, "logits/rejected": -2.3829662799835205, "logps/chosen": -269.1377868652344, "logps/rejected": -230.5484161376953, "loss": 0.5465, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.06904083490371704, "rewards/margins": 0.5532599091529846, "rewards/rejected": -0.6223007440567017, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.681592039800995e-07, "logits/chosen": -2.417346715927124, "logits/rejected": -2.347588062286377, "logps/chosen": -267.7151184082031, "logps/rejected": -227.2200469970703, "loss": 0.5439, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.10437774658203125, "rewards/margins": 0.5515931844711304, "rewards/rejected": -0.6559709310531616, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.662456946039035e-07, "logits/chosen": -2.4241156578063965, "logits/rejected": -2.3838791847229004, "logps/chosen": -277.7347717285156, "logps/rejected": -225.5718994140625, "loss": 0.5427, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09920702129602432, "rewards/margins": 0.5613608360290527, "rewards/rejected": -0.6605678796768188, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6433218522770757e-07, "logits/chosen": -2.432286024093628, "logits/rejected": -2.350795030593872, "logps/chosen": -280.2424621582031, "logps/rejected": -238.3251495361328, "loss": 0.5514, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.10347769409418106, "rewards/margins": 0.5584946870803833, "rewards/rejected": -0.6619724035263062, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6241867585151163e-07, "logits/chosen": -2.402839183807373, "logits/rejected": -2.3319945335388184, "logps/chosen": -258.05889892578125, "logps/rejected": -223.97216796875, "loss": 0.5708, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.13955342769622803, "rewards/margins": 0.5002504587173462, "rewards/rejected": -0.6398038268089294, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.605051664753157e-07, "logits/chosen": -2.428884267807007, "logits/rejected": -2.35528826713562, "logps/chosen": -272.50457763671875, "logps/rejected": -231.6855010986328, "loss": 0.5459, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09094850718975067, "rewards/margins": 0.5691753029823303, "rewards/rejected": -0.6601237058639526, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5859165709911975e-07, "logits/chosen": -2.4268717765808105, "logits/rejected": -2.349578380584717, "logps/chosen": -275.3159484863281, "logps/rejected": -223.78414916992188, "loss": 0.5457, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.07625160366296768, "rewards/margins": 0.5891859531402588, "rewards/rejected": -0.6654375791549683, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.566781477229238e-07, "logits/chosen": -2.3932693004608154, "logits/rejected": -2.3574328422546387, "logps/chosen": -256.0076599121094, "logps/rejected": -222.4263458251953, "loss": 0.5746, "rewards/accuracies": 0.698437511920929, "rewards/chosen": -0.1472007781267166, "rewards/margins": 0.5160545706748962, "rewards/rejected": -0.6632553339004517, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.547646383467279e-07, "logits/chosen": -2.3803439140319824, "logits/rejected": -2.3705551624298096, "logps/chosen": -261.7268371582031, "logps/rejected": -223.354736328125, "loss": 0.5494, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.09807038307189941, "rewards/margins": 0.5811691880226135, "rewards/rejected": -0.6792395114898682, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5285112897053194e-07, "logits/chosen": -2.4154138565063477, "logits/rejected": -2.3943445682525635, "logps/chosen": -280.23260498046875, "logps/rejected": -242.3921661376953, "loss": 0.5691, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.12113519757986069, "rewards/margins": 0.510390043258667, "rewards/rejected": -0.6315252184867859, "step": 1060 }, { "epoch": 1.11, "learning_rate": 3.50937619594336e-07, "logits/chosen": -2.400869131088257, "logits/rejected": -2.3419127464294434, "logps/chosen": -269.11322021484375, "logps/rejected": -218.6542510986328, "loss": 0.5401, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.11252293735742569, "rewards/margins": 0.6148664355278015, "rewards/rejected": -0.7273894548416138, "step": 1070 }, { "epoch": 1.12, "learning_rate": 3.4902411021814007e-07, "logits/chosen": -2.398010492324829, "logits/rejected": -2.373257875442505, "logps/chosen": -261.5543212890625, "logps/rejected": -220.818603515625, "loss": 0.5628, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.12159077823162079, "rewards/margins": 0.5545440912246704, "rewards/rejected": -0.6761348843574524, "step": 1080 }, { "epoch": 1.13, "learning_rate": 3.4711060084194413e-07, "logits/chosen": -2.398470401763916, "logits/rejected": -2.3667078018188477, "logps/chosen": -265.3479309082031, "logps/rejected": -223.44967651367188, "loss": 0.5444, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.13141986727714539, "rewards/margins": 0.5646113157272339, "rewards/rejected": -0.6960310935974121, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4519709146574814e-07, "logits/chosen": -2.400993824005127, "logits/rejected": -2.34995698928833, "logps/chosen": -261.8519287109375, "logps/rejected": -225.39340209960938, "loss": 0.5674, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.14484994113445282, "rewards/margins": 0.5336871147155762, "rewards/rejected": -0.6785370707511902, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.432835820895522e-07, "logits/chosen": -2.4596643447875977, "logits/rejected": -2.3810102939605713, "logps/chosen": -277.64697265625, "logps/rejected": -240.9037322998047, "loss": 0.5414, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0647854283452034, "rewards/margins": 0.6014553308486938, "rewards/rejected": -0.6662408113479614, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.4137007271335626e-07, "logits/chosen": -2.4132676124572754, "logits/rejected": -2.3595833778381348, "logps/chosen": -269.6545104980469, "logps/rejected": -220.05996704101562, "loss": 0.5393, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.1037089005112648, "rewards/margins": 0.624849796295166, "rewards/rejected": -0.7285586595535278, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.394565633371603e-07, "logits/chosen": -2.4248127937316895, "logits/rejected": -2.3464858531951904, "logps/chosen": -280.0205078125, "logps/rejected": -239.3312225341797, "loss": 0.5435, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.0879250168800354, "rewards/margins": 0.6255140900611877, "rewards/rejected": -0.7134391069412231, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.375430539609644e-07, "logits/chosen": -2.3959927558898926, "logits/rejected": -2.3690690994262695, "logps/chosen": -256.87310791015625, "logps/rejected": -227.3232879638672, "loss": 0.5591, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.12515565752983093, "rewards/margins": 0.5448717474937439, "rewards/rejected": -0.6700273752212524, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3562954458476845e-07, "logits/chosen": -2.349555492401123, "logits/rejected": -2.3227591514587402, "logps/chosen": -253.9552459716797, "logps/rejected": -217.4364776611328, "loss": 0.5485, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.11552216857671738, "rewards/margins": 0.5672619342803955, "rewards/rejected": -0.6827840805053711, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.337160352085725e-07, "logits/chosen": -2.403165340423584, "logits/rejected": -2.312551975250244, "logps/chosen": -266.38970947265625, "logps/rejected": -219.06478881835938, "loss": 0.5408, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.11588382720947266, "rewards/margins": 0.6109603643417358, "rewards/rejected": -0.7268441915512085, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.3180252583237657e-07, "logits/chosen": -2.426140308380127, "logits/rejected": -2.364060640335083, "logps/chosen": -277.64935302734375, "logps/rejected": -233.41964721679688, "loss": 0.5449, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.15606358647346497, "rewards/margins": 0.5846539735794067, "rewards/rejected": -0.7407175302505493, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.2988901645618063e-07, "logits/chosen": -2.4718017578125, "logits/rejected": -2.4017224311828613, "logps/chosen": -267.055908203125, "logps/rejected": -247.3446044921875, "loss": 0.5608, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.139754980802536, "rewards/margins": 0.585044801235199, "rewards/rejected": -0.7247998714447021, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.279755070799847e-07, "logits/chosen": -2.3561863899230957, "logits/rejected": -2.336796522140503, "logps/chosen": -260.29742431640625, "logps/rejected": -226.8851776123047, "loss": 0.5631, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.1206628829240799, "rewards/margins": 0.5734033584594727, "rewards/rejected": -0.6940661668777466, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.260619977037887e-07, "logits/chosen": -2.383582353591919, "logits/rejected": -2.350806713104248, "logps/chosen": -261.0791015625, "logps/rejected": -222.1552734375, "loss": 0.5274, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.11264900863170624, "rewards/margins": 0.6694163084030151, "rewards/rejected": -0.7820653915405273, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2414848832759277e-07, "logits/chosen": -2.3713862895965576, "logits/rejected": -2.331512928009033, "logps/chosen": -257.71209716796875, "logps/rejected": -222.45755004882812, "loss": 0.5578, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.14970478415489197, "rewards/margins": 0.5587034821510315, "rewards/rejected": -0.7084082365036011, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2223497895139683e-07, "logits/chosen": -2.3780715465545654, "logits/rejected": -2.3363735675811768, "logps/chosen": -272.34039306640625, "logps/rejected": -232.05892944335938, "loss": 0.5362, "rewards/accuracies": 0.734375, "rewards/chosen": -0.11652742326259613, "rewards/margins": 0.6240389347076416, "rewards/rejected": -0.7405663728713989, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.203214695752009e-07, "logits/chosen": -2.4418883323669434, "logits/rejected": -2.374427080154419, "logps/chosen": -270.23785400390625, "logps/rejected": -228.5772247314453, "loss": 0.5371, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.14697889983654022, "rewards/margins": 0.6358888149261475, "rewards/rejected": -0.7828676700592041, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.1840796019900495e-07, "logits/chosen": -2.321744203567505, "logits/rejected": -2.3201723098754883, "logps/chosen": -244.7095184326172, "logps/rejected": -224.6121368408203, "loss": 0.5635, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1834283024072647, "rewards/margins": 0.5532564520835876, "rewards/rejected": -0.7366846799850464, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.16494450822809e-07, "logits/chosen": -2.392040967941284, "logits/rejected": -2.3873581886291504, "logps/chosen": -260.9786682128906, "logps/rejected": -217.41488647460938, "loss": 0.5285, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.10610403120517731, "rewards/margins": 0.6539296507835388, "rewards/rejected": -0.7600336074829102, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.145809414466131e-07, "logits/chosen": -2.4131011962890625, "logits/rejected": -2.3864870071411133, "logps/chosen": -268.1712951660156, "logps/rejected": -243.2780303955078, "loss": 0.5451, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.14184710383415222, "rewards/margins": 0.5766977071762085, "rewards/rejected": -0.7185447216033936, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.1266743207041714e-07, "logits/chosen": -2.4226272106170654, "logits/rejected": -2.3552260398864746, "logps/chosen": -275.0395812988281, "logps/rejected": -246.27993774414062, "loss": 0.5488, "rewards/accuracies": 0.734375, "rewards/chosen": -0.08415131270885468, "rewards/margins": 0.5906020402908325, "rewards/rejected": -0.6747534275054932, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.107539226942212e-07, "logits/chosen": -2.384654998779297, "logits/rejected": -2.3474934101104736, "logps/chosen": -280.5086364746094, "logps/rejected": -249.03744506835938, "loss": 0.5093, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.09258462488651276, "rewards/margins": 0.6895312070846558, "rewards/rejected": -0.7821158170700073, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0884041331802526e-07, "logits/chosen": -2.3851654529571533, "logits/rejected": -2.3415586948394775, "logps/chosen": -275.87249755859375, "logps/rejected": -238.9077606201172, "loss": 0.5182, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.12925606966018677, "rewards/margins": 0.6958507299423218, "rewards/rejected": -0.8251067399978638, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.0692690394182927e-07, "logits/chosen": -2.347446918487549, "logits/rejected": -2.3569579124450684, "logps/chosen": -254.8171844482422, "logps/rejected": -225.75048828125, "loss": 0.5475, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12040869146585464, "rewards/margins": 0.6625940203666687, "rewards/rejected": -0.7830026745796204, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0501339456563334e-07, "logits/chosen": -2.3930153846740723, "logits/rejected": -2.3339743614196777, "logps/chosen": -282.39208984375, "logps/rejected": -241.4809112548828, "loss": 0.5539, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.15331654250621796, "rewards/margins": 0.5918524861335754, "rewards/rejected": -0.7451690435409546, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.030998851894374e-07, "logits/chosen": -2.4213199615478516, "logits/rejected": -2.372786045074463, "logps/chosen": -277.1365661621094, "logps/rejected": -231.59835815429688, "loss": 0.5408, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.11530748754739761, "rewards/margins": 0.6329683065414429, "rewards/rejected": -0.7482757568359375, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.0118637581324146e-07, "logits/chosen": -2.3494033813476562, "logits/rejected": -2.2917075157165527, "logps/chosen": -283.3105773925781, "logps/rejected": -236.96218872070312, "loss": 0.5337, "rewards/accuracies": 0.754687488079071, "rewards/chosen": -0.16250093281269073, "rewards/margins": 0.6374029517173767, "rewards/rejected": -0.7999038696289062, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.992728664370455e-07, "logits/chosen": -2.410579204559326, "logits/rejected": -2.395461320877075, "logps/chosen": -265.9371643066406, "logps/rejected": -225.1396484375, "loss": 0.5493, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.15022125840187073, "rewards/margins": 0.6349747776985168, "rewards/rejected": -0.78519606590271, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.973593570608496e-07, "logits/chosen": -2.354858636856079, "logits/rejected": -2.333374500274658, "logps/chosen": -267.4329528808594, "logps/rejected": -229.37033081054688, "loss": 0.5584, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.13673868775367737, "rewards/margins": 0.6077600121498108, "rewards/rejected": -0.7444987297058105, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9544584768465365e-07, "logits/chosen": -2.4165000915527344, "logits/rejected": -2.389608860015869, "logps/chosen": -256.08953857421875, "logps/rejected": -234.6078643798828, "loss": 0.5434, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.11967863142490387, "rewards/margins": 0.641726553440094, "rewards/rejected": -0.7614051699638367, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.935323383084577e-07, "logits/chosen": -2.441561460494995, "logits/rejected": -2.385502338409424, "logps/chosen": -276.8068542480469, "logps/rejected": -226.84146118164062, "loss": 0.56, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.18507501482963562, "rewards/margins": 0.5834146738052368, "rewards/rejected": -0.7684897780418396, "step": 1370 }, { "epoch": 1.43, "learning_rate": 2.9161882893226177e-07, "logits/chosen": -2.4208102226257324, "logits/rejected": -2.371891498565674, "logps/chosen": -267.7480163574219, "logps/rejected": -228.93954467773438, "loss": 0.5457, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.12237964570522308, "rewards/margins": 0.6243287324905396, "rewards/rejected": -0.746708333492279, "step": 1380 }, { "epoch": 1.44, "learning_rate": 2.8970531955606583e-07, "logits/chosen": -2.4296679496765137, "logits/rejected": -2.384805202484131, "logps/chosen": -269.50341796875, "logps/rejected": -226.89120483398438, "loss": 0.5158, "rewards/accuracies": 0.75, "rewards/chosen": -0.11908123642206192, "rewards/margins": 0.7121685743331909, "rewards/rejected": -0.8312498331069946, "step": 1390 }, { "epoch": 1.45, "learning_rate": 2.8779181017986984e-07, "logits/chosen": -2.3762552738189697, "logits/rejected": -2.3503944873809814, "logps/chosen": -265.67657470703125, "logps/rejected": -227.09695434570312, "loss": 0.5772, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -0.15753719210624695, "rewards/margins": 0.5870577692985535, "rewards/rejected": -0.744594931602478, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.858783008036739e-07, "logits/chosen": -2.433023452758789, "logits/rejected": -2.39740252494812, "logps/chosen": -254.3402862548828, "logps/rejected": -216.28042602539062, "loss": 0.5347, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.11956797540187836, "rewards/margins": 0.6502863168716431, "rewards/rejected": -0.7698543667793274, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.8396479142747797e-07, "logits/chosen": -2.3455922603607178, "logits/rejected": -2.367486000061035, "logps/chosen": -271.64971923828125, "logps/rejected": -226.45278930664062, "loss": 0.5615, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.12722182273864746, "rewards/margins": 0.5968899726867676, "rewards/rejected": -0.7241117358207703, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8205128205128203e-07, "logits/chosen": -2.437638759613037, "logits/rejected": -2.3999106884002686, "logps/chosen": -278.81402587890625, "logps/rejected": -233.3411407470703, "loss": 0.5484, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.16687321662902832, "rewards/margins": 0.6460558176040649, "rewards/rejected": -0.8129289746284485, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.801377726750861e-07, "logits/chosen": -2.4091131687164307, "logits/rejected": -2.3717777729034424, "logps/chosen": -259.6352844238281, "logps/rejected": -234.7488250732422, "loss": 0.5472, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.12939931452274323, "rewards/margins": 0.6247848272323608, "rewards/rejected": -0.7541841268539429, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.7822426329889015e-07, "logits/chosen": -2.3630013465881348, "logits/rejected": -2.3308510780334473, "logps/chosen": -265.0796813964844, "logps/rejected": -235.9908905029297, "loss": 0.5262, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.12796229124069214, "rewards/margins": 0.6874942779541016, "rewards/rejected": -0.8154565691947937, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.763107539226942e-07, "logits/chosen": -2.384431838989258, "logits/rejected": -2.3531229496002197, "logps/chosen": -273.24407958984375, "logps/rejected": -237.3506622314453, "loss": 0.5534, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.13397890329360962, "rewards/margins": 0.6210099458694458, "rewards/rejected": -0.7549887895584106, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.743972445464983e-07, "logits/chosen": -2.3733363151550293, "logits/rejected": -2.3497159481048584, "logps/chosen": -256.4417419433594, "logps/rejected": -218.98245239257812, "loss": 0.5466, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.10995250940322876, "rewards/margins": 0.625704288482666, "rewards/rejected": -0.7356568574905396, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.7248373517030234e-07, "logits/chosen": -2.391806125640869, "logits/rejected": -2.3594701290130615, "logps/chosen": -261.59954833984375, "logps/rejected": -222.4551544189453, "loss": 0.5685, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.1836286336183548, "rewards/margins": 0.6219319105148315, "rewards/rejected": -0.8055604696273804, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.705702257941064e-07, "logits/chosen": -2.395291805267334, "logits/rejected": -2.335513114929199, "logps/chosen": -271.76495361328125, "logps/rejected": -223.66604614257812, "loss": 0.5284, "rewards/accuracies": 0.71875, "rewards/chosen": -0.13636186718940735, "rewards/margins": 0.6964784860610962, "rewards/rejected": -0.8328403234481812, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.686567164179104e-07, "logits/chosen": -2.409865140914917, "logits/rejected": -2.343632936477661, "logps/chosen": -261.92218017578125, "logps/rejected": -225.1486358642578, "loss": 0.528, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.10242755711078644, "rewards/margins": 0.660239577293396, "rewards/rejected": -0.7626670598983765, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6674320704171447e-07, "logits/chosen": -2.3943867683410645, "logits/rejected": -2.34450101852417, "logps/chosen": -267.44940185546875, "logps/rejected": -227.99658203125, "loss": 0.539, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15267999470233917, "rewards/margins": 0.6513444185256958, "rewards/rejected": -0.8040245175361633, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6482969766551853e-07, "logits/chosen": -2.3414573669433594, "logits/rejected": -2.3219292163848877, "logps/chosen": -263.6014709472656, "logps/rejected": -227.2323760986328, "loss": 0.5145, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.12107870727777481, "rewards/margins": 0.7130403518676758, "rewards/rejected": -0.8341191411018372, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.629161882893226e-07, "logits/chosen": -2.4271631240844727, "logits/rejected": -2.3703784942626953, "logps/chosen": -279.36041259765625, "logps/rejected": -239.1962127685547, "loss": 0.5668, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.17160965502262115, "rewards/margins": 0.6050060987472534, "rewards/rejected": -0.7766157388687134, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.6100267891312666e-07, "logits/chosen": -2.3565828800201416, "logits/rejected": -2.311810255050659, "logps/chosen": -261.40228271484375, "logps/rejected": -224.05148315429688, "loss": 0.5431, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1451948881149292, "rewards/margins": 0.6642559170722961, "rewards/rejected": -0.8094508051872253, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.590891695369307e-07, "logits/chosen": -2.4056944847106934, "logits/rejected": -2.3697822093963623, "logps/chosen": -281.52032470703125, "logps/rejected": -240.4828338623047, "loss": 0.5358, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.10201269388198853, "rewards/margins": 0.6899839639663696, "rewards/rejected": -0.7919965982437134, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.571756601607348e-07, "logits/chosen": -2.399779796600342, "logits/rejected": -2.3576903343200684, "logps/chosen": -281.99725341796875, "logps/rejected": -232.83047485351562, "loss": 0.5334, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.13619406521320343, "rewards/margins": 0.7102779150009155, "rewards/rejected": -0.8464719653129578, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5526215078453884e-07, "logits/chosen": -2.3670992851257324, "logits/rejected": -2.330780506134033, "logps/chosen": -275.53179931640625, "logps/rejected": -241.46340942382812, "loss": 0.551, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.15719105303287506, "rewards/margins": 0.6423493027687073, "rewards/rejected": -0.7995403409004211, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.533486414083429e-07, "logits/chosen": -2.3753514289855957, "logits/rejected": -2.358102798461914, "logps/chosen": -273.9100036621094, "logps/rejected": -226.753662109375, "loss": 0.5245, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.18196144700050354, "rewards/margins": 0.6733365654945374, "rewards/rejected": -0.8552980422973633, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5143513203214697e-07, "logits/chosen": -2.405080795288086, "logits/rejected": -2.3308959007263184, "logps/chosen": -261.7924499511719, "logps/rejected": -232.6370849609375, "loss": 0.5617, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.19414867460727692, "rewards/margins": 0.6336308717727661, "rewards/rejected": -0.8277795910835266, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.49521622655951e-07, "logits/chosen": -2.4004340171813965, "logits/rejected": -2.3892195224761963, "logps/chosen": -263.91217041015625, "logps/rejected": -233.1723175048828, "loss": 0.5558, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.13633672893047333, "rewards/margins": 0.6239665746688843, "rewards/rejected": -0.7603033185005188, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.4760811327975504e-07, "logits/chosen": -2.387913465499878, "logits/rejected": -2.3446784019470215, "logps/chosen": -276.1678161621094, "logps/rejected": -239.7351531982422, "loss": 0.5395, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.12410640716552734, "rewards/margins": 0.6739387512207031, "rewards/rejected": -0.7980451583862305, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.456946039035591e-07, "logits/chosen": -2.379734992980957, "logits/rejected": -2.370614528656006, "logps/chosen": -256.16510009765625, "logps/rejected": -217.11697387695312, "loss": 0.5126, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.1286478042602539, "rewards/margins": 0.7220448851585388, "rewards/rejected": -0.8506927490234375, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.4378109452736316e-07, "logits/chosen": -2.348128080368042, "logits/rejected": -2.3468122482299805, "logps/chosen": -268.2215576171875, "logps/rejected": -235.09970092773438, "loss": 0.537, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.1686348021030426, "rewards/margins": 0.6620901823043823, "rewards/rejected": -0.8307248950004578, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.418675851511672e-07, "logits/chosen": -2.385132312774658, "logits/rejected": -2.3292319774627686, "logps/chosen": -258.2908630371094, "logps/rejected": -227.2532196044922, "loss": 0.525, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.1993228644132614, "rewards/margins": 0.6777428984642029, "rewards/rejected": -0.8770657777786255, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.399540757749713e-07, "logits/chosen": -2.3949036598205566, "logits/rejected": -2.36201810836792, "logps/chosen": -252.385986328125, "logps/rejected": -226.19009399414062, "loss": 0.5471, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.12631697952747345, "rewards/margins": 0.6516768336296082, "rewards/rejected": -0.7779937982559204, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3804056639877535e-07, "logits/chosen": -2.352290153503418, "logits/rejected": -2.3201920986175537, "logps/chosen": -269.6744689941406, "logps/rejected": -239.7770233154297, "loss": 0.5522, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1264854222536087, "rewards/margins": 0.6343278884887695, "rewards/rejected": -0.7608132362365723, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.361270570225794e-07, "logits/chosen": -2.369065523147583, "logits/rejected": -2.3292198181152344, "logps/chosen": -273.328857421875, "logps/rejected": -227.1344451904297, "loss": 0.5526, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.1670379340648651, "rewards/margins": 0.6671660542488098, "rewards/rejected": -0.8342038989067078, "step": 1670 }, { "epoch": 1.74, "learning_rate": 2.3421354764638345e-07, "logits/chosen": -2.330441951751709, "logits/rejected": -2.277582883834839, "logps/chosen": -262.3780822753906, "logps/rejected": -222.9958038330078, "loss": 0.5384, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.14923453330993652, "rewards/margins": 0.680332601070404, "rewards/rejected": -0.8295671343803406, "step": 1680 }, { "epoch": 1.75, "learning_rate": 2.323000382701875e-07, "logits/chosen": -2.358654260635376, "logits/rejected": -2.2956321239471436, "logps/chosen": -262.77276611328125, "logps/rejected": -223.75985717773438, "loss": 0.5338, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.1570906639099121, "rewards/margins": 0.6962771415710449, "rewards/rejected": -0.853367805480957, "step": 1690 }, { "epoch": 1.76, "learning_rate": 2.3038652889399157e-07, "logits/chosen": -2.368196964263916, "logits/rejected": -2.365723133087158, "logps/chosen": -264.42950439453125, "logps/rejected": -235.8118896484375, "loss": 0.5235, "rewards/accuracies": 0.75, "rewards/chosen": -0.13157618045806885, "rewards/margins": 0.6999197602272034, "rewards/rejected": -0.8314959406852722, "step": 1700 }, { "epoch": 1.77, "learning_rate": 2.2847301951779563e-07, "logits/chosen": -2.3950960636138916, "logits/rejected": -2.3562042713165283, "logps/chosen": -273.6717834472656, "logps/rejected": -232.68161010742188, "loss": 0.5347, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.15049448609352112, "rewards/margins": 0.7236722707748413, "rewards/rejected": -0.8741667866706848, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.265595101415997e-07, "logits/chosen": -2.3810062408447266, "logits/rejected": -2.3283467292785645, "logps/chosen": -269.14801025390625, "logps/rejected": -230.89248657226562, "loss": 0.5464, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.15543130040168762, "rewards/margins": 0.6771457195281982, "rewards/rejected": -0.8325770497322083, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.2464600076540373e-07, "logits/chosen": -2.3967809677124023, "logits/rejected": -2.319446563720703, "logps/chosen": -279.8531188964844, "logps/rejected": -237.2471160888672, "loss": 0.5556, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.12391219288110733, "rewards/margins": 0.6414698362350464, "rewards/rejected": -0.7653820514678955, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.227324913892078e-07, "logits/chosen": -2.3728363513946533, "logits/rejected": -2.3333539962768555, "logps/chosen": -273.06304931640625, "logps/rejected": -229.03561401367188, "loss": 0.5477, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.12746073305606842, "rewards/margins": 0.6716173887252808, "rewards/rejected": -0.799078106880188, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2081898201301186e-07, "logits/chosen": -2.3346054553985596, "logits/rejected": -2.3393990993499756, "logps/chosen": -284.27886962890625, "logps/rejected": -243.03475952148438, "loss": 0.5597, "rewards/accuracies": 0.734375, "rewards/chosen": -0.14506694674491882, "rewards/margins": 0.604491114616394, "rewards/rejected": -0.7495580911636353, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1890547263681592e-07, "logits/chosen": -2.3285999298095703, "logits/rejected": -2.319593906402588, "logps/chosen": -279.5855712890625, "logps/rejected": -228.2317657470703, "loss": 0.5416, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.13806693255901337, "rewards/margins": 0.6977806091308594, "rewards/rejected": -0.8358476758003235, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1699196326061998e-07, "logits/chosen": -2.325159788131714, "logits/rejected": -2.3459110260009766, "logps/chosen": -272.990478515625, "logps/rejected": -230.53524780273438, "loss": 0.5335, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.13658006489276886, "rewards/margins": 0.7168129086494446, "rewards/rejected": -0.8533929586410522, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.1507845388442402e-07, "logits/chosen": -2.4010326862335205, "logits/rejected": -2.3410234451293945, "logps/chosen": -276.6645202636719, "logps/rejected": -219.21414184570312, "loss": 0.519, "rewards/accuracies": 0.7640625238418579, "rewards/chosen": -0.0858209878206253, "rewards/margins": 0.6973799467086792, "rewards/rejected": -0.7832010388374329, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.1316494450822808e-07, "logits/chosen": -2.3417165279388428, "logits/rejected": -2.341611623764038, "logps/chosen": -271.9029235839844, "logps/rejected": -232.0885009765625, "loss": 0.5266, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.13584092259407043, "rewards/margins": 0.702505886554718, "rewards/rejected": -0.8383467793464661, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1125143513203214e-07, "logits/chosen": -2.416315793991089, "logits/rejected": -2.3927767276763916, "logps/chosen": -264.4131774902344, "logps/rejected": -227.99179077148438, "loss": 0.5479, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.12012849003076553, "rewards/margins": 0.6565206050872803, "rewards/rejected": -0.7766491174697876, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.093379257558362e-07, "logits/chosen": -2.3913142681121826, "logits/rejected": -2.3399569988250732, "logps/chosen": -271.81011962890625, "logps/rejected": -241.50759887695312, "loss": 0.5633, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.1379830241203308, "rewards/margins": 0.6094950437545776, "rewards/rejected": -0.7474781274795532, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0742441637964026e-07, "logits/chosen": -2.376094102859497, "logits/rejected": -2.3213579654693604, "logps/chosen": -245.985107421875, "logps/rejected": -217.1469268798828, "loss": 0.5383, "rewards/accuracies": 0.71875, "rewards/chosen": -0.16205313801765442, "rewards/margins": 0.6546685099601746, "rewards/rejected": -0.8167217373847961, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.055109070034443e-07, "logits/chosen": -2.4133598804473877, "logits/rejected": -2.322434663772583, "logps/chosen": -261.0830993652344, "logps/rejected": -222.3695068359375, "loss": 0.5411, "rewards/accuracies": 0.7515624761581421, "rewards/chosen": -0.12403901666402817, "rewards/margins": 0.6635336875915527, "rewards/rejected": -0.7875727415084839, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0359739762724836e-07, "logits/chosen": -2.3970634937286377, "logits/rejected": -2.334484577178955, "logps/chosen": -289.25213623046875, "logps/rejected": -241.99313354492188, "loss": 0.4944, "rewards/accuracies": 0.7718750238418579, "rewards/chosen": -0.11199700832366943, "rewards/margins": 0.7853450179100037, "rewards/rejected": -0.8973420858383179, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0168388825105242e-07, "logits/chosen": -2.3959927558898926, "logits/rejected": -2.340639591217041, "logps/chosen": -261.5354919433594, "logps/rejected": -230.05795288085938, "loss": 0.5513, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.1715332418680191, "rewards/margins": 0.6642467379570007, "rewards/rejected": -0.8357800245285034, "step": 1850 }, { "epoch": 1.92, "learning_rate": 1.997703788748565e-07, "logits/chosen": -2.383922576904297, "logits/rejected": -2.3244893550872803, "logps/chosen": -261.2723693847656, "logps/rejected": -237.76644897460938, "loss": 0.5373, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.1503421664237976, "rewards/margins": 0.6998149156570435, "rewards/rejected": -0.8501569628715515, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9785686949866055e-07, "logits/chosen": -2.4093589782714844, "logits/rejected": -2.353217601776123, "logps/chosen": -276.1336975097656, "logps/rejected": -229.25216674804688, "loss": 0.5199, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.17228913307189941, "rewards/margins": 0.713442325592041, "rewards/rejected": -0.8857313394546509, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9594336012246458e-07, "logits/chosen": -2.3640589714050293, "logits/rejected": -2.3493194580078125, "logps/chosen": -263.424072265625, "logps/rejected": -229.8025360107422, "loss": 0.5399, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.17658892273902893, "rewards/margins": 0.6467570066452026, "rewards/rejected": -0.823345959186554, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -2.3777713775634766, "logits/rejected": -2.316854238510132, "logps/chosen": -261.4559631347656, "logps/rejected": -227.89718627929688, "loss": 0.5238, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.1278056502342224, "rewards/margins": 0.7158471345901489, "rewards/rejected": -0.8436528444290161, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.921163413700727e-07, "logits/chosen": -2.392733335494995, "logits/rejected": -2.373281240463257, "logps/chosen": -281.6858825683594, "logps/rejected": -244.1327362060547, "loss": 0.5358, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.11135254800319672, "rewards/margins": 0.7201862931251526, "rewards/rejected": -0.8315388560295105, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9020283199387677e-07, "logits/chosen": -2.343679189682007, "logits/rejected": -2.3387351036071777, "logps/chosen": -268.7857666015625, "logps/rejected": -238.4132843017578, "loss": 0.5173, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.1281011551618576, "rewards/margins": 0.7418644428253174, "rewards/rejected": -0.869965672492981, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8828932261768083e-07, "logits/chosen": -2.372236967086792, "logits/rejected": -2.3373138904571533, "logps/chosen": -265.18865966796875, "logps/rejected": -227.602783203125, "loss": 0.5272, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.13484473526477814, "rewards/margins": 0.7227139472961426, "rewards/rejected": -0.8575586080551147, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.8637581324148487e-07, "logits/chosen": -2.392242670059204, "logits/rejected": -2.3611741065979004, "logps/chosen": -268.98431396484375, "logps/rejected": -239.24978637695312, "loss": 0.5563, "rewards/accuracies": 0.6875, "rewards/chosen": -0.17363707721233368, "rewards/margins": 0.6345449686050415, "rewards/rejected": -0.8081819415092468, "step": 1930 }, { "epoch": 2.0, "eval_logits/chosen": -2.031247615814209, "eval_logits/rejected": -1.9774165153503418, "eval_logps/chosen": -266.0887145996094, "eval_logps/rejected": -232.28887939453125, "eval_loss": 0.5312530398368835, "eval_rewards/accuracies": 0.7315000295639038, "eval_rewards/chosen": -0.15304329991340637, "eval_rewards/margins": 0.709061324596405, "eval_rewards/rejected": -0.862104594707489, "eval_runtime": 1653.0531, "eval_samples_per_second": 1.21, "eval_steps_per_second": 0.302, "step": 1936 }, { "epoch": 2.0, "learning_rate": 1.8446230386528893e-07, "logits/chosen": -2.4022011756896973, "logits/rejected": -2.324002504348755, "logps/chosen": -260.53228759765625, "logps/rejected": -229.572998046875, "loss": 0.5515, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.20086026191711426, "rewards/margins": 0.6226423978805542, "rewards/rejected": -0.8235027194023132, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.82548794489093e-07, "logits/chosen": -2.3827035427093506, "logits/rejected": -2.3521687984466553, "logps/chosen": -272.88433837890625, "logps/rejected": -228.8638458251953, "loss": 0.5432, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.1703009009361267, "rewards/margins": 0.6672419309616089, "rewards/rejected": -0.8375428318977356, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8063528511289706e-07, "logits/chosen": -2.3884754180908203, "logits/rejected": -2.3221449851989746, "logps/chosen": -258.5544128417969, "logps/rejected": -224.48971557617188, "loss": 0.5424, "rewards/accuracies": 0.734375, "rewards/chosen": -0.21564142405986786, "rewards/margins": 0.6631011366844177, "rewards/rejected": -0.8787425756454468, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7872177573670112e-07, "logits/chosen": -2.3919119834899902, "logits/rejected": -2.3483376502990723, "logps/chosen": -276.0721435546875, "logps/rejected": -239.58154296875, "loss": 0.5155, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.13372206687927246, "rewards/margins": 0.7485690712928772, "rewards/rejected": -0.8822910189628601, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.7680826636050515e-07, "logits/chosen": -2.360553741455078, "logits/rejected": -2.3577828407287598, "logps/chosen": -257.6646728515625, "logps/rejected": -230.3592529296875, "loss": 0.5352, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.16400420665740967, "rewards/margins": 0.7011340856552124, "rewards/rejected": -0.8651384115219116, "step": 1980 }, { "epoch": 2.06, "learning_rate": 1.7489475698430921e-07, "logits/chosen": -2.3634886741638184, "logits/rejected": -2.322361469268799, "logps/chosen": -282.5517883300781, "logps/rejected": -233.2860870361328, "loss": 0.5361, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.1445886641740799, "rewards/margins": 0.752617597579956, "rewards/rejected": -0.89720618724823, "step": 1990 }, { "epoch": 2.07, "learning_rate": 1.7298124760811328e-07, "logits/chosen": -2.3623392581939697, "logits/rejected": -2.319822311401367, "logps/chosen": -253.936279296875, "logps/rejected": -234.66848754882812, "loss": 0.5119, "rewards/accuracies": 0.765625, "rewards/chosen": -0.09708345681428909, "rewards/margins": 0.7794226408004761, "rewards/rejected": -0.8765062093734741, "step": 2000 }, { "epoch": 2.08, "learning_rate": 1.7106773823191734e-07, "logits/chosen": -2.3689372539520264, "logits/rejected": -2.3116328716278076, "logps/chosen": -268.030029296875, "logps/rejected": -231.4261474609375, "loss": 0.5711, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.18213681876659393, "rewards/margins": 0.5894214510917664, "rewards/rejected": -0.7715582251548767, "step": 2010 }, { "epoch": 2.09, "learning_rate": 1.691542288557214e-07, "logits/chosen": -2.3579487800598145, "logits/rejected": -2.3073954582214355, "logps/chosen": -260.38775634765625, "logps/rejected": -225.4844207763672, "loss": 0.5156, "rewards/accuracies": 0.75, "rewards/chosen": -0.14314624667167664, "rewards/margins": 0.7362397313117981, "rewards/rejected": -0.8793859481811523, "step": 2020 }, { "epoch": 2.1, "learning_rate": 1.6724071947952544e-07, "logits/chosen": -2.3365800380706787, "logits/rejected": -2.2837400436401367, "logps/chosen": -266.9193420410156, "logps/rejected": -238.1469268798828, "loss": 0.5267, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.12520021200180054, "rewards/margins": 0.7114790678024292, "rewards/rejected": -0.8366793394088745, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.653272101033295e-07, "logits/chosen": -2.400599479675293, "logits/rejected": -2.35431170463562, "logps/chosen": -262.89825439453125, "logps/rejected": -228.84310913085938, "loss": 0.559, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.15787425637245178, "rewards/margins": 0.6567065119743347, "rewards/rejected": -0.8145807385444641, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6341370072713356e-07, "logits/chosen": -2.400794267654419, "logits/rejected": -2.3113760948181152, "logps/chosen": -246.63400268554688, "logps/rejected": -223.24923706054688, "loss": 0.5255, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.14660078287124634, "rewards/margins": 0.7161286473274231, "rewards/rejected": -0.8627294301986694, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6150019135093762e-07, "logits/chosen": -2.344493865966797, "logits/rejected": -2.3352556228637695, "logps/chosen": -278.28839111328125, "logps/rejected": -243.4131622314453, "loss": 0.5521, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.13938100636005402, "rewards/margins": 0.6712489128112793, "rewards/rejected": -0.8106300234794617, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5958668197474169e-07, "logits/chosen": -2.3649954795837402, "logits/rejected": -2.3345046043395996, "logps/chosen": -286.7098083496094, "logps/rejected": -232.02517700195312, "loss": 0.5329, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.10944189131259918, "rewards/margins": 0.7143247723579407, "rewards/rejected": -0.8237665891647339, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.5767317259854572e-07, "logits/chosen": -2.401505947113037, "logits/rejected": -2.3687071800231934, "logps/chosen": -252.4344940185547, "logps/rejected": -230.4383087158203, "loss": 0.5534, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.16689328849315643, "rewards/margins": 0.670124351978302, "rewards/rejected": -0.8370175361633301, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5575966322234978e-07, "logits/chosen": -2.3800606727600098, "logits/rejected": -2.3494057655334473, "logps/chosen": -277.4918212890625, "logps/rejected": -236.21533203125, "loss": 0.5376, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.11864397674798965, "rewards/margins": 0.7518168687820435, "rewards/rejected": -0.8704608082771301, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -2.361454963684082, "logits/rejected": -2.327955722808838, "logps/chosen": -283.4490051269531, "logps/rejected": -234.3018035888672, "loss": 0.5348, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.1568828970193863, "rewards/margins": 0.7170850038528442, "rewards/rejected": -0.8739679455757141, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.519326444699579e-07, "logits/chosen": -2.3744759559631348, "logits/rejected": -2.358099937438965, "logps/chosen": -277.69195556640625, "logps/rejected": -241.0815887451172, "loss": 0.5268, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.12414580583572388, "rewards/margins": 0.7003077864646912, "rewards/rejected": -0.8244536519050598, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.5001913509376197e-07, "logits/chosen": -2.4057154655456543, "logits/rejected": -2.3354310989379883, "logps/chosen": -260.5264892578125, "logps/rejected": -222.79110717773438, "loss": 0.5231, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.15866820514202118, "rewards/margins": 0.7241252660751343, "rewards/rejected": -0.8827934265136719, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.4810562571756603e-07, "logits/chosen": -2.3917877674102783, "logits/rejected": -2.355861186981201, "logps/chosen": -283.84320068359375, "logps/rejected": -230.14404296875, "loss": 0.5136, "rewards/accuracies": 0.7515624761581421, "rewards/chosen": -0.13140609860420227, "rewards/margins": 0.7532661557197571, "rewards/rejected": -0.8846722841262817, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.4619211634137007e-07, "logits/chosen": -2.387089967727661, "logits/rejected": -2.314539670944214, "logps/chosen": -268.9212341308594, "logps/rejected": -232.61898803710938, "loss": 0.4982, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.10855790227651596, "rewards/margins": 0.8006644248962402, "rewards/rejected": -0.9092223048210144, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4427860696517413e-07, "logits/chosen": -2.3513126373291016, "logits/rejected": -2.3646275997161865, "logps/chosen": -262.60345458984375, "logps/rejected": -245.1242218017578, "loss": 0.5552, "rewards/accuracies": 0.71875, "rewards/chosen": -0.1720902919769287, "rewards/margins": 0.6639872193336487, "rewards/rejected": -0.8360773921012878, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.423650975889782e-07, "logits/chosen": -2.3261661529541016, "logits/rejected": -2.308116912841797, "logps/chosen": -271.75543212890625, "logps/rejected": -232.4984588623047, "loss": 0.5473, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.15690350532531738, "rewards/margins": 0.6513178944587708, "rewards/rejected": -0.8082213401794434, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4045158821278225e-07, "logits/chosen": -2.3861842155456543, "logits/rejected": -2.354062080383301, "logps/chosen": -280.88116455078125, "logps/rejected": -232.3457794189453, "loss": 0.5446, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.15454408526420593, "rewards/margins": 0.6919762492179871, "rewards/rejected": -0.8465203046798706, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.3853807883658632e-07, "logits/chosen": -2.3726694583892822, "logits/rejected": -2.3578009605407715, "logps/chosen": -275.0099182128906, "logps/rejected": -245.10873413085938, "loss": 0.4978, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08260266482830048, "rewards/margins": 0.7924613952636719, "rewards/rejected": -0.8750640749931335, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.3662456946039035e-07, "logits/chosen": -2.339141368865967, "logits/rejected": -2.3254787921905518, "logps/chosen": -258.27996826171875, "logps/rejected": -228.74398803710938, "loss": 0.547, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.15882189571857452, "rewards/margins": 0.6925019025802612, "rewards/rejected": -0.8513237833976746, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3471106008419441e-07, "logits/chosen": -2.406780958175659, "logits/rejected": -2.3241848945617676, "logps/chosen": -263.73931884765625, "logps/rejected": -224.1796417236328, "loss": 0.5389, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.1274387389421463, "rewards/margins": 0.7019168138504028, "rewards/rejected": -0.8293555378913879, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3279755070799848e-07, "logits/chosen": -2.3664703369140625, "logits/rejected": -2.318784713745117, "logps/chosen": -261.4429626464844, "logps/rejected": -229.2322540283203, "loss": 0.5361, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.13853470981121063, "rewards/margins": 0.7004620432853699, "rewards/rejected": -0.8389967083930969, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3088404133180254e-07, "logits/chosen": -2.3630192279815674, "logits/rejected": -2.3432974815368652, "logps/chosen": -273.27850341796875, "logps/rejected": -241.81771850585938, "loss": 0.5111, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.10639991611242294, "rewards/margins": 0.7741705179214478, "rewards/rejected": -0.8805704116821289, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.289705319556066e-07, "logits/chosen": -2.3936803340911865, "logits/rejected": -2.3308169841766357, "logps/chosen": -269.2107849121094, "logps/rejected": -238.88394165039062, "loss": 0.5046, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.1324814110994339, "rewards/margins": 0.8152027130126953, "rewards/rejected": -0.947684109210968, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2705702257941064e-07, "logits/chosen": -2.3774123191833496, "logits/rejected": -2.312371253967285, "logps/chosen": -247.8770751953125, "logps/rejected": -221.15536499023438, "loss": 0.5219, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.13058343529701233, "rewards/margins": 0.7142859697341919, "rewards/rejected": -0.8448693156242371, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.251435132032147e-07, "logits/chosen": -2.399721622467041, "logits/rejected": -2.319878101348877, "logps/chosen": -299.5263671875, "logps/rejected": -242.10964965820312, "loss": 0.5404, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.1261121928691864, "rewards/margins": 0.7005943059921265, "rewards/rejected": -0.8267065286636353, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2323000382701873e-07, "logits/chosen": -2.3750648498535156, "logits/rejected": -2.33599591255188, "logps/chosen": -281.7725830078125, "logps/rejected": -241.2296600341797, "loss": 0.5367, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.12657758593559265, "rewards/margins": 0.7330743074417114, "rewards/rejected": -0.8596519231796265, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.213164944508228e-07, "logits/chosen": -2.3655242919921875, "logits/rejected": -2.309812068939209, "logps/chosen": -262.09954833984375, "logps/rejected": -234.92758178710938, "loss": 0.551, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.13195794820785522, "rewards/margins": 0.6896259784698486, "rewards/rejected": -0.8215838670730591, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -2.4004878997802734, "logits/rejected": -2.3559048175811768, "logps/chosen": -270.6529846191406, "logps/rejected": -242.43368530273438, "loss": 0.5288, "rewards/accuracies": 0.754687488079071, "rewards/chosen": -0.17092928290367126, "rewards/margins": 0.7322403192520142, "rewards/rejected": -0.9031696319580078, "step": 2280 }, { "epoch": 2.37, "learning_rate": 1.1748947569843092e-07, "logits/chosen": -2.2949576377868652, "logits/rejected": -2.279416561126709, "logps/chosen": -253.159423828125, "logps/rejected": -220.51596069335938, "loss": 0.5365, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.14947687089443207, "rewards/margins": 0.7013243436813354, "rewards/rejected": -0.8508013486862183, "step": 2290 }, { "epoch": 2.38, "learning_rate": 1.1557596632223497e-07, "logits/chosen": -2.376298427581787, "logits/rejected": -2.301685333251953, "logps/chosen": -269.91168212890625, "logps/rejected": -221.9505615234375, "loss": 0.552, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.19331762194633484, "rewards/margins": 0.7175094485282898, "rewards/rejected": -0.9108270406723022, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.1366245694603903e-07, "logits/chosen": -2.3533215522766113, "logits/rejected": -2.3328850269317627, "logps/chosen": -258.6532897949219, "logps/rejected": -218.423095703125, "loss": 0.5498, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.14851602911949158, "rewards/margins": 0.6814883947372437, "rewards/rejected": -0.8300043940544128, "step": 2310 }, { "epoch": 2.4, "learning_rate": 1.1174894756984308e-07, "logits/chosen": -2.3606135845184326, "logits/rejected": -2.3032069206237793, "logps/chosen": -252.78604125976562, "logps/rejected": -211.8778076171875, "loss": 0.5301, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.16221049427986145, "rewards/margins": 0.706489086151123, "rewards/rejected": -0.8686995506286621, "step": 2320 }, { "epoch": 2.41, "learning_rate": 1.0983543819364714e-07, "logits/chosen": -2.406503677368164, "logits/rejected": -2.3547933101654053, "logps/chosen": -278.7993469238281, "logps/rejected": -237.6390380859375, "loss": 0.5369, "rewards/accuracies": 0.745312511920929, "rewards/chosen": -0.1446961909532547, "rewards/margins": 0.725204348564148, "rewards/rejected": -0.8699005842208862, "step": 2330 }, { "epoch": 2.42, "learning_rate": 1.079219288174512e-07, "logits/chosen": -2.3510079383850098, "logits/rejected": -2.296820640563965, "logps/chosen": -261.72235107421875, "logps/rejected": -243.376708984375, "loss": 0.546, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.17369435727596283, "rewards/margins": 0.6820122599601746, "rewards/rejected": -0.8557065725326538, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0600841944125525e-07, "logits/chosen": -2.408764362335205, "logits/rejected": -2.336327314376831, "logps/chosen": -258.2755126953125, "logps/rejected": -228.48385620117188, "loss": 0.5113, "rewards/accuracies": 0.760937511920929, "rewards/chosen": -0.15298260748386383, "rewards/margins": 0.7763695120811462, "rewards/rejected": -0.9293521642684937, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0409491006505931e-07, "logits/chosen": -2.367042064666748, "logits/rejected": -2.317692279815674, "logps/chosen": -266.90362548828125, "logps/rejected": -228.7984619140625, "loss": 0.5205, "rewards/accuracies": 0.75, "rewards/chosen": -0.17633156478405, "rewards/margins": 0.7394440770149231, "rewards/rejected": -0.9157756567001343, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0218140068886336e-07, "logits/chosen": -2.372544050216675, "logits/rejected": -2.297020673751831, "logps/chosen": -267.892822265625, "logps/rejected": -229.27035522460938, "loss": 0.5245, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.1406041383743286, "rewards/margins": 0.7415550351142883, "rewards/rejected": -0.8821592330932617, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.0026789131266743e-07, "logits/chosen": -2.380431890487671, "logits/rejected": -2.3486175537109375, "logps/chosen": -260.1377868652344, "logps/rejected": -228.09994506835938, "loss": 0.5274, "rewards/accuracies": 0.734375, "rewards/chosen": -0.16361048817634583, "rewards/margins": 0.6924134492874146, "rewards/rejected": -0.8560239672660828, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.835438193647149e-08, "logits/chosen": -2.364969491958618, "logits/rejected": -2.3163743019104004, "logps/chosen": -261.2802429199219, "logps/rejected": -235.7176055908203, "loss": 0.5158, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.11532945930957794, "rewards/margins": 0.7920664548873901, "rewards/rejected": -0.9073959589004517, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.644087256027554e-08, "logits/chosen": -2.3899219036102295, "logits/rejected": -2.3310904502868652, "logps/chosen": -270.76837158203125, "logps/rejected": -242.98434448242188, "loss": 0.5564, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.13495083153247833, "rewards/margins": 0.7031392455101013, "rewards/rejected": -0.8380901217460632, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.45273631840796e-08, "logits/chosen": -2.3676700592041016, "logits/rejected": -2.2973744869232178, "logps/chosen": -256.33416748046875, "logps/rejected": -215.53921508789062, "loss": 0.5391, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.17805704474449158, "rewards/margins": 0.6919922232627869, "rewards/rejected": -0.8700492978096008, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.261385380788366e-08, "logits/chosen": -2.4081311225891113, "logits/rejected": -2.315382480621338, "logps/chosen": -267.7056579589844, "logps/rejected": -226.94400024414062, "loss": 0.5408, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.17336109280586243, "rewards/margins": 0.7087409496307373, "rewards/rejected": -0.8821020126342773, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.070034443168771e-08, "logits/chosen": -2.3482162952423096, "logits/rejected": -2.3113105297088623, "logps/chosen": -263.73065185546875, "logps/rejected": -226.03207397460938, "loss": 0.541, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.2133699208498001, "rewards/margins": 0.7167404890060425, "rewards/rejected": -0.9301104545593262, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.878683505549177e-08, "logits/chosen": -2.3624765872955322, "logits/rejected": -2.3461878299713135, "logps/chosen": -275.10125732421875, "logps/rejected": -241.44970703125, "loss": 0.5474, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.16694757342338562, "rewards/margins": 0.6941839456558228, "rewards/rejected": -0.861131489276886, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.687332567929582e-08, "logits/chosen": -2.3443427085876465, "logits/rejected": -2.325216054916382, "logps/chosen": -267.3571472167969, "logps/rejected": -219.15695190429688, "loss": 0.5181, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.14058147370815277, "rewards/margins": 0.7361636161804199, "rewards/rejected": -0.8767450451850891, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.495981630309988e-08, "logits/chosen": -2.363398313522339, "logits/rejected": -2.2995753288269043, "logps/chosen": -262.1716613769531, "logps/rejected": -236.1666259765625, "loss": 0.5242, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.12495926767587662, "rewards/margins": 0.7235976457595825, "rewards/rejected": -0.8485569953918457, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.304630692690395e-08, "logits/chosen": -2.387195110321045, "logits/rejected": -2.3478474617004395, "logps/chosen": -283.22705078125, "logps/rejected": -233.081298828125, "loss": 0.5162, "rewards/accuracies": 0.75, "rewards/chosen": -0.11790215969085693, "rewards/margins": 0.787219762802124, "rewards/rejected": -0.9051219820976257, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.1132797550708e-08, "logits/chosen": -2.3662703037261963, "logits/rejected": -2.3089377880096436, "logps/chosen": -270.521728515625, "logps/rejected": -229.98519897460938, "loss": 0.5213, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.14989617466926575, "rewards/margins": 0.7254993915557861, "rewards/rejected": -0.875395655632019, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.921928817451206e-08, "logits/chosen": -2.360635280609131, "logits/rejected": -2.328989267349243, "logps/chosen": -272.1587829589844, "logps/rejected": -221.1785888671875, "loss": 0.48, "rewards/accuracies": 0.7640625238418579, "rewards/chosen": -0.07772710919380188, "rewards/margins": 0.8303700685501099, "rewards/rejected": -0.9080971479415894, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.73057787983161e-08, "logits/chosen": -2.3522746562957764, "logits/rejected": -2.3457765579223633, "logps/chosen": -270.2672424316406, "logps/rejected": -231.2746124267578, "loss": 0.5063, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.11053086817264557, "rewards/margins": 0.8113416433334351, "rewards/rejected": -0.921872615814209, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.539226942212017e-08, "logits/chosen": -2.3699328899383545, "logits/rejected": -2.322545289993286, "logps/chosen": -271.3714599609375, "logps/rejected": -247.797119140625, "loss": 0.5434, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.14844852685928345, "rewards/margins": 0.7081364989280701, "rewards/rejected": -0.8565850257873535, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.347876004592423e-08, "logits/chosen": -2.4080615043640137, "logits/rejected": -2.354224681854248, "logps/chosen": -271.5215148925781, "logps/rejected": -234.2542724609375, "loss": 0.5274, "rewards/accuracies": 0.7515624761581421, "rewards/chosen": -0.19082528352737427, "rewards/margins": 0.7402253746986389, "rewards/rejected": -0.9310504794120789, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.156525066972828e-08, "logits/chosen": -2.4200048446655273, "logits/rejected": -2.3449947834014893, "logps/chosen": -284.7613830566406, "logps/rejected": -238.87850952148438, "loss": 0.5191, "rewards/accuracies": 0.754687488079071, "rewards/chosen": -0.12657888233661652, "rewards/margins": 0.8030962944030762, "rewards/rejected": -0.9296752214431763, "step": 2530 }, { "epoch": 2.62, "learning_rate": 6.965174129353234e-08, "logits/chosen": -2.356966733932495, "logits/rejected": -2.29770827293396, "logps/chosen": -261.1152038574219, "logps/rejected": -240.00387573242188, "loss": 0.5399, "rewards/accuracies": 0.714062511920929, "rewards/chosen": -0.16734077036380768, "rewards/margins": 0.7260617613792419, "rewards/rejected": -0.8934024572372437, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.773823191733639e-08, "logits/chosen": -2.3702635765075684, "logits/rejected": -2.317229747772217, "logps/chosen": -272.82440185546875, "logps/rejected": -221.4154815673828, "loss": 0.5175, "rewards/accuracies": 0.7640625238418579, "rewards/chosen": -0.1133556216955185, "rewards/margins": 0.7367923259735107, "rewards/rejected": -0.8501479029655457, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.582472254114045e-08, "logits/chosen": -2.4438915252685547, "logits/rejected": -2.385143756866455, "logps/chosen": -275.418212890625, "logps/rejected": -239.0946502685547, "loss": 0.5384, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.11291754245758057, "rewards/margins": 0.7378134727478027, "rewards/rejected": -0.8507310748100281, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.391121316494451e-08, "logits/chosen": -2.3825366497039795, "logits/rejected": -2.3399150371551514, "logps/chosen": -251.03720092773438, "logps/rejected": -234.3162078857422, "loss": 0.5305, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.16436532139778137, "rewards/margins": 0.7372573614120483, "rewards/rejected": -0.9016226530075073, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.199770378874856e-08, "logits/chosen": -2.37642502784729, "logits/rejected": -2.3453516960144043, "logps/chosen": -269.23577880859375, "logps/rejected": -229.6055908203125, "loss": 0.5202, "rewards/accuracies": 0.75, "rewards/chosen": -0.1360473334789276, "rewards/margins": 0.7748234272003174, "rewards/rejected": -0.9108708500862122, "step": 2580 }, { "epoch": 2.68, "learning_rate": 6.008419441255262e-08, "logits/chosen": -2.3883450031280518, "logits/rejected": -2.3713505268096924, "logps/chosen": -263.39288330078125, "logps/rejected": -231.93930053710938, "loss": 0.5264, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07088092714548111, "rewards/margins": 0.7212754487991333, "rewards/rejected": -0.792156457901001, "step": 2590 }, { "epoch": 2.69, "learning_rate": 5.817068503635668e-08, "logits/chosen": -2.3673863410949707, "logits/rejected": -2.307626724243164, "logps/chosen": -281.2471618652344, "logps/rejected": -218.8713836669922, "loss": 0.5129, "rewards/accuracies": 0.75, "rewards/chosen": -0.10778670012950897, "rewards/margins": 0.7753579020500183, "rewards/rejected": -0.8831446766853333, "step": 2600 }, { "epoch": 2.7, "learning_rate": 5.6257175660160735e-08, "logits/chosen": -2.3701910972595215, "logits/rejected": -2.3337950706481934, "logps/chosen": -273.3838806152344, "logps/rejected": -228.57546997070312, "loss": 0.5202, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.14705790579319, "rewards/margins": 0.7927185893058777, "rewards/rejected": -0.939776599407196, "step": 2610 }, { "epoch": 2.71, "learning_rate": 5.4343666283964784e-08, "logits/chosen": -2.3455982208251953, "logits/rejected": -2.3253159523010254, "logps/chosen": -262.5321960449219, "logps/rejected": -226.5032501220703, "loss": 0.5441, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.18480037152767181, "rewards/margins": 0.7010098695755005, "rewards/rejected": -0.8858101963996887, "step": 2620 }, { "epoch": 2.72, "learning_rate": 5.243015690776884e-08, "logits/chosen": -2.38588285446167, "logits/rejected": -2.3365259170532227, "logps/chosen": -264.8544616699219, "logps/rejected": -217.23599243164062, "loss": 0.5238, "rewards/accuracies": 0.7265625, "rewards/chosen": -0.13912011682987213, "rewards/margins": 0.7289184331893921, "rewards/rejected": -0.8680384755134583, "step": 2630 }, { "epoch": 2.73, "learning_rate": 5.05166475315729e-08, "logits/chosen": -2.3164820671081543, "logits/rejected": -2.3066811561584473, "logps/chosen": -250.55001831054688, "logps/rejected": -226.17489624023438, "loss": 0.5331, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.15784478187561035, "rewards/margins": 0.6939374804496765, "rewards/rejected": -0.8517822027206421, "step": 2640 }, { "epoch": 2.74, "learning_rate": 4.860313815537696e-08, "logits/chosen": -2.4132914543151855, "logits/rejected": -2.347529411315918, "logps/chosen": -267.1165771484375, "logps/rejected": -219.255615234375, "loss": 0.5299, "rewards/accuracies": 0.734375, "rewards/chosen": -0.1091521754860878, "rewards/margins": 0.7415350079536438, "rewards/rejected": -0.8506871461868286, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.668962877918101e-08, "logits/chosen": -2.3676562309265137, "logits/rejected": -2.3321216106414795, "logps/chosen": -271.9303894042969, "logps/rejected": -224.6049346923828, "loss": 0.4929, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.09524812549352646, "rewards/margins": 0.8164734840393066, "rewards/rejected": -0.9117215871810913, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.477611940298507e-08, "logits/chosen": -2.3520119190216064, "logits/rejected": -2.338006019592285, "logps/chosen": -260.5480041503906, "logps/rejected": -230.41860961914062, "loss": 0.5032, "rewards/accuracies": 0.75, "rewards/chosen": -0.12855994701385498, "rewards/margins": 0.8062857389450073, "rewards/rejected": -0.9348458051681519, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.2862610026789124e-08, "logits/chosen": -2.359654664993286, "logits/rejected": -2.2997984886169434, "logps/chosen": -267.7292175292969, "logps/rejected": -227.33132934570312, "loss": 0.538, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14395280182361603, "rewards/margins": 0.735295832157135, "rewards/rejected": -0.8792486190795898, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.0949100650593186e-08, "logits/chosen": -2.4200806617736816, "logits/rejected": -2.360294818878174, "logps/chosen": -271.26324462890625, "logps/rejected": -227.1554412841797, "loss": 0.535, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.16018937528133392, "rewards/margins": 0.7067100405693054, "rewards/rejected": -0.866899311542511, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.903559127439724e-08, "logits/chosen": -2.3925411701202393, "logits/rejected": -2.331263780593872, "logps/chosen": -271.6239318847656, "logps/rejected": -231.4758758544922, "loss": 0.5233, "rewards/accuracies": 0.754687488079071, "rewards/chosen": -0.17314036190509796, "rewards/margins": 0.7330547571182251, "rewards/rejected": -0.9061950445175171, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.71220818982013e-08, "logits/chosen": -2.4014270305633545, "logits/rejected": -2.355578899383545, "logps/chosen": -279.74566650390625, "logps/rejected": -228.99746704101562, "loss": 0.5655, "rewards/accuracies": 0.6890624761581421, "rewards/chosen": -0.2012714445590973, "rewards/margins": 0.6636210680007935, "rewards/rejected": -0.8648926019668579, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.520857252200535e-08, "logits/chosen": -2.419564723968506, "logits/rejected": -2.3469746112823486, "logps/chosen": -271.7889099121094, "logps/rejected": -229.7169952392578, "loss": 0.5384, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1927814781665802, "rewards/margins": 0.7116800546646118, "rewards/rejected": -0.9044615626335144, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.3295063145809414e-08, "logits/chosen": -2.3487582206726074, "logits/rejected": -2.311703681945801, "logps/chosen": -273.3060302734375, "logps/rejected": -253.8658905029297, "loss": 0.5457, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.18712952733039856, "rewards/margins": 0.712023138999939, "rewards/rejected": -0.8991526365280151, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.138155376961347e-08, "logits/chosen": -2.3431172370910645, "logits/rejected": -2.3230600357055664, "logps/chosen": -256.7168273925781, "logps/rejected": -227.06924438476562, "loss": 0.525, "rewards/accuracies": 0.734375, "rewards/chosen": -0.14321701228618622, "rewards/margins": 0.7300316095352173, "rewards/rejected": -0.8732486963272095, "step": 2740 }, { "epoch": 2.84, "learning_rate": 2.9468044393417525e-08, "logits/chosen": -2.3237102031707764, "logits/rejected": -2.318800449371338, "logps/chosen": -260.8058776855469, "logps/rejected": -226.2300262451172, "loss": 0.54, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.18601644039154053, "rewards/margins": 0.6762635111808777, "rewards/rejected": -0.862280011177063, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.755453501722158e-08, "logits/chosen": -2.3754734992980957, "logits/rejected": -2.337226629257202, "logps/chosen": -268.50054931640625, "logps/rejected": -225.01815795898438, "loss": 0.531, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.15901225805282593, "rewards/margins": 0.7717152237892151, "rewards/rejected": -0.930727481842041, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.564102564102564e-08, "logits/chosen": -2.372312068939209, "logits/rejected": -2.3167147636413574, "logps/chosen": -265.208740234375, "logps/rejected": -244.6002197265625, "loss": 0.5552, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.14687782526016235, "rewards/margins": 0.6419769525527954, "rewards/rejected": -0.788854718208313, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.3727516264829695e-08, "logits/chosen": -2.3352761268615723, "logits/rejected": -2.3101370334625244, "logps/chosen": -266.26129150390625, "logps/rejected": -228.8058624267578, "loss": 0.5182, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.1582275927066803, "rewards/margins": 0.7328441739082336, "rewards/rejected": -0.8910716772079468, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.1814006888633754e-08, "logits/chosen": -2.34548020362854, "logits/rejected": -2.3345954418182373, "logps/chosen": -268.830078125, "logps/rejected": -234.70590209960938, "loss": 0.5568, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.19458332657814026, "rewards/margins": 0.6641789674758911, "rewards/rejected": -0.8587621450424194, "step": 2790 }, { "epoch": 2.89, "learning_rate": 1.990049751243781e-08, "logits/chosen": -2.345353126525879, "logits/rejected": -2.309774875640869, "logps/chosen": -264.1852111816406, "logps/rejected": -232.37881469726562, "loss": 0.5252, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.13199767470359802, "rewards/margins": 0.7402657270431519, "rewards/rejected": -0.8722633123397827, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.7986988136241865e-08, "logits/chosen": -2.3702383041381836, "logits/rejected": -2.3145265579223633, "logps/chosen": -271.8026428222656, "logps/rejected": -234.308349609375, "loss": 0.5342, "rewards/accuracies": 0.7203124761581421, "rewards/chosen": -0.18479926884174347, "rewards/margins": 0.7022183537483215, "rewards/rejected": -0.8870177268981934, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6073478760045924e-08, "logits/chosen": -2.3550527095794678, "logits/rejected": -2.3468880653381348, "logps/chosen": -282.18310546875, "logps/rejected": -234.3654327392578, "loss": 0.5167, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.06430914252996445, "rewards/margins": 0.790905237197876, "rewards/rejected": -0.8552142977714539, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4159969383849981e-08, "logits/chosen": -2.3620452880859375, "logits/rejected": -2.345167398452759, "logps/chosen": -280.26470947265625, "logps/rejected": -235.08993530273438, "loss": 0.5554, "rewards/accuracies": 0.7171875238418579, "rewards/chosen": -0.1752968728542328, "rewards/margins": 0.6895657777786255, "rewards/rejected": -0.8648626208305359, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2246460007654037e-08, "logits/chosen": -2.3489487171173096, "logits/rejected": -2.3238141536712646, "logps/chosen": -278.1292724609375, "logps/rejected": -240.13705444335938, "loss": 0.5593, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.15504539012908936, "rewards/margins": 0.6761684417724609, "rewards/rejected": -0.8312137722969055, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0332950631458094e-08, "logits/chosen": -2.389085292816162, "logits/rejected": -2.364729166030884, "logps/chosen": -267.89630126953125, "logps/rejected": -229.4080352783203, "loss": 0.5274, "rewards/accuracies": 0.746874988079071, "rewards/chosen": -0.1344664990901947, "rewards/margins": 0.7971667051315308, "rewards/rejected": -0.9316331744194031, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.419441255262151e-09, "logits/chosen": -2.343165874481201, "logits/rejected": -2.3094429969787598, "logps/chosen": -260.5189514160156, "logps/rejected": -233.33432006835938, "loss": 0.5264, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.17281007766723633, "rewards/margins": 0.7437566518783569, "rewards/rejected": -0.9165668487548828, "step": 2860 }, { "epoch": 2.96, "learning_rate": 6.505931879066207e-09, "logits/chosen": -2.3336236476898193, "logits/rejected": -2.321570873260498, "logps/chosen": -278.14080810546875, "logps/rejected": -233.65365600585938, "loss": 0.5251, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.09798727929592133, "rewards/margins": 0.7966296076774597, "rewards/rejected": -0.8946169018745422, "step": 2870 }, { "epoch": 2.97, "learning_rate": 4.592422502870264e-09, "logits/chosen": -2.3987810611724854, "logits/rejected": -2.3644556999206543, "logps/chosen": -280.18609619140625, "logps/rejected": -233.4442138671875, "loss": 0.5218, "rewards/accuracies": 0.739062488079071, "rewards/chosen": -0.1451808661222458, "rewards/margins": 0.7559449076652527, "rewards/rejected": -0.9011257886886597, "step": 2880 }, { "epoch": 2.98, "learning_rate": 2.6789131266743202e-09, "logits/chosen": -2.3656845092773438, "logits/rejected": -2.3112010955810547, "logps/chosen": -255.6254425048828, "logps/rejected": -207.92041015625, "loss": 0.5262, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.161960169672966, "rewards/margins": 0.7441297769546509, "rewards/rejected": -0.9060899615287781, "step": 2890 }, { "epoch": 3.0, "learning_rate": 7.654037504783773e-10, "logits/chosen": -2.371070384979248, "logits/rejected": -2.3036046028137207, "logps/chosen": -267.93048095703125, "logps/rejected": -234.49667358398438, "loss": 0.5172, "rewards/accuracies": 0.7328125238418579, "rewards/chosen": -0.12272216379642487, "rewards/margins": 0.7688759565353394, "rewards/rejected": -0.891598105430603, "step": 2900 }, { "epoch": 3.0, "eval_logits/chosen": -2.020789384841919, "eval_logits/rejected": -1.9665637016296387, "eval_logps/chosen": -266.1182556152344, "eval_logps/rejected": -232.68226623535156, "eval_loss": 0.5263917446136475, "eval_rewards/accuracies": 0.734499990940094, "eval_rewards/chosen": -0.15599758923053741, "eval_rewards/margins": 0.7454450726509094, "eval_rewards/rejected": -0.9014427661895752, "eval_runtime": 1638.9888, "eval_samples_per_second": 1.22, "eval_steps_per_second": 0.305, "step": 2904 }, { "epoch": 3.0, "step": 2904, "total_flos": 0.0, "train_loss": 0.5636412144405126, "train_runtime": 257163.3959, "train_samples_per_second": 0.723, "train_steps_per_second": 0.011 } ], "logging_steps": 10, "max_steps": 2904, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }