|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9993222089532967, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.4422173500061035, |
|
"logits/rejected": -2.526975631713867, |
|
"logps/chosen": -235.28317260742188, |
|
"logps/rejected": -214.19320678710938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.4839749336242676, |
|
"logits/rejected": -2.4228153228759766, |
|
"logps/chosen": -280.0798034667969, |
|
"logps/rejected": -230.15765380859375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4600694477558136, |
|
"rewards/chosen": -7.412416744045913e-05, |
|
"rewards/margins": 0.0002167515631299466, |
|
"rewards/rejected": -0.0002908758178818971, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.414851188659668, |
|
"logits/rejected": -2.354247570037842, |
|
"logps/chosen": -255.57260131835938, |
|
"logps/rejected": -226.37820434570312, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -0.0009197852341458201, |
|
"rewards/margins": -0.004096911288797855, |
|
"rewards/rejected": 0.003177126171067357, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.4241952896118164, |
|
"logits/rejected": -2.400988817214966, |
|
"logps/chosen": -272.502197265625, |
|
"logps/rejected": -227.431884765625, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0013401806354522705, |
|
"rewards/margins": -0.0016240004915744066, |
|
"rewards/rejected": 0.0002838193904608488, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.4217023849487305, |
|
"logits/rejected": -2.3694050312042236, |
|
"logps/chosen": -249.1688995361328, |
|
"logps/rejected": -220.63821411132812, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0008647952345199883, |
|
"rewards/margins": -0.0035088094882667065, |
|
"rewards/rejected": 0.002644014311954379, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.457735538482666, |
|
"logits/rejected": -2.413196563720703, |
|
"logps/chosen": -259.66912841796875, |
|
"logps/rejected": -220.27700805664062, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.0021845095325261354, |
|
"rewards/margins": -0.0009308269363828003, |
|
"rewards/rejected": 0.003115336410701275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.4587912559509277, |
|
"logits/rejected": -2.4032034873962402, |
|
"logps/chosen": -258.9931945800781, |
|
"logps/rejected": -228.43301391601562, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": 0.00026782663189806044, |
|
"rewards/margins": -0.00029953793273307383, |
|
"rewards/rejected": 0.0005673646228387952, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.438084840774536, |
|
"logits/rejected": -2.4171149730682373, |
|
"logps/chosen": -267.3536071777344, |
|
"logps/rejected": -210.99343872070312, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0028153976891189814, |
|
"rewards/margins": 0.0016862023621797562, |
|
"rewards/rejected": 0.0011291948612779379, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.449380397796631, |
|
"logits/rejected": -2.3840742111206055, |
|
"logps/chosen": -280.45050048828125, |
|
"logps/rejected": -225.0115966796875, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.003211159957572818, |
|
"rewards/margins": 0.0021671659778803587, |
|
"rewards/rejected": 0.0010439944453537464, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.4744973182678223, |
|
"logits/rejected": -2.3940176963806152, |
|
"logps/chosen": -271.45843505859375, |
|
"logps/rejected": -231.62643432617188, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.520312488079071, |
|
"rewards/chosen": 0.0016121773514896631, |
|
"rewards/margins": 0.0033598211593925953, |
|
"rewards/rejected": -0.0017476438079029322, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.489611864089966, |
|
"logits/rejected": -2.3956551551818848, |
|
"logps/chosen": -264.9837646484375, |
|
"logps/rejected": -215.3820343017578, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0020506351720541716, |
|
"rewards/margins": 0.0036049727350473404, |
|
"rewards/rejected": -0.0015543376794084907, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.4358983039855957, |
|
"logits/rejected": -2.3630106449127197, |
|
"logps/chosen": -283.7451477050781, |
|
"logps/rejected": -214.5529327392578, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.557812511920929, |
|
"rewards/chosen": 0.006171266548335552, |
|
"rewards/margins": 0.00967160053551197, |
|
"rewards/rejected": -0.003500334918498993, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.4519553184509277, |
|
"logits/rejected": -2.4237570762634277, |
|
"logps/chosen": -271.7016906738281, |
|
"logps/rejected": -229.41964721679688, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5140625238418579, |
|
"rewards/chosen": 0.0020948010496795177, |
|
"rewards/margins": 0.004834444727748632, |
|
"rewards/rejected": -0.0027396436780691147, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -2.448523998260498, |
|
"logits/rejected": -2.3815808296203613, |
|
"logps/chosen": -267.52691650390625, |
|
"logps/rejected": -212.1881866455078, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.5796874761581421, |
|
"rewards/chosen": 0.005649151746183634, |
|
"rewards/margins": 0.009708194993436337, |
|
"rewards/rejected": -0.00405904371291399, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.46962308883667, |
|
"logits/rejected": -2.4132840633392334, |
|
"logps/chosen": -274.2135009765625, |
|
"logps/rejected": -212.12222290039062, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": 0.005823948420584202, |
|
"rewards/margins": 0.01115177758038044, |
|
"rewards/rejected": -0.0053278305567801, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.4161739349365234, |
|
"logits/rejected": -2.3815319538116455, |
|
"logps/chosen": -250.0603485107422, |
|
"logps/rejected": -210.3913116455078, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005285581108182669, |
|
"rewards/margins": 0.009132949635386467, |
|
"rewards/rejected": -0.003847368760034442, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.437349557876587, |
|
"logits/rejected": -2.374871015548706, |
|
"logps/chosen": -267.4647521972656, |
|
"logps/rejected": -218.4558563232422, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.004300659988075495, |
|
"rewards/margins": 0.010579666122794151, |
|
"rewards/rejected": -0.006279005669057369, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.400257110595703, |
|
"logits/rejected": -2.368004083633423, |
|
"logps/chosen": -277.8876953125, |
|
"logps/rejected": -221.78640747070312, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.005591097287833691, |
|
"rewards/margins": 0.011688882485032082, |
|
"rewards/rejected": -0.006097783800214529, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.4092414379119873, |
|
"logits/rejected": -2.376796007156372, |
|
"logps/chosen": -271.3955993652344, |
|
"logps/rejected": -226.97055053710938, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.598437488079071, |
|
"rewards/chosen": 0.008772986009716988, |
|
"rewards/margins": 0.018974503502249718, |
|
"rewards/rejected": -0.010201516561210155, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -2.4419052600860596, |
|
"logits/rejected": -2.3903238773345947, |
|
"logps/chosen": -276.5497131347656, |
|
"logps/rejected": -222.6400604248047, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.5921875238418579, |
|
"rewards/chosen": 0.011585086584091187, |
|
"rewards/margins": 0.017912257462739944, |
|
"rewards/rejected": -0.006327168550342321, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -2.419137477874756, |
|
"logits/rejected": -2.3881921768188477, |
|
"logps/chosen": -249.6503448486328, |
|
"logps/rejected": -214.3955078125, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.6265624761581421, |
|
"rewards/chosen": 0.014122622087597847, |
|
"rewards/margins": 0.027482766658067703, |
|
"rewards/rejected": -0.01336014736443758, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.4479005336761475, |
|
"logits/rejected": -2.4040849208831787, |
|
"logps/chosen": -270.5887145996094, |
|
"logps/rejected": -235.2773895263672, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.011653213761746883, |
|
"rewards/margins": 0.026884321123361588, |
|
"rewards/rejected": -0.01523110456764698, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.4104199409484863, |
|
"logits/rejected": -2.3601438999176025, |
|
"logps/chosen": -263.31024169921875, |
|
"logps/rejected": -219.03466796875, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 0.013443564996123314, |
|
"rewards/margins": 0.03105132281780243, |
|
"rewards/rejected": -0.017607757821679115, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.406188726425171, |
|
"logits/rejected": -2.3706843852996826, |
|
"logps/chosen": -273.1360778808594, |
|
"logps/rejected": -231.5634765625, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.6390625238418579, |
|
"rewards/chosen": 0.01533445529639721, |
|
"rewards/margins": 0.0440392941236496, |
|
"rewards/rejected": -0.02870483696460724, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.4351398944854736, |
|
"logits/rejected": -2.393178701400757, |
|
"logps/chosen": -271.65087890625, |
|
"logps/rejected": -233.6895751953125, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.660937488079071, |
|
"rewards/chosen": 0.02181144617497921, |
|
"rewards/margins": 0.04818682745099068, |
|
"rewards/rejected": -0.026375379413366318, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -2.4275219440460205, |
|
"logits/rejected": -2.455544948577881, |
|
"logps/chosen": -264.905517578125, |
|
"logps/rejected": -225.69180297851562, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": 0.020589668303728104, |
|
"rewards/margins": 0.04932643473148346, |
|
"rewards/rejected": -0.028736764565110207, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.4035658836364746, |
|
"logits/rejected": -2.4021072387695312, |
|
"logps/chosen": -249.67953491210938, |
|
"logps/rejected": -204.56887817382812, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.01743764989078045, |
|
"rewards/margins": 0.06085206940770149, |
|
"rewards/rejected": -0.043414413928985596, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.4608845710754395, |
|
"logits/rejected": -2.433506488800049, |
|
"logps/chosen": -281.73260498046875, |
|
"logps/rejected": -224.5501251220703, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.0280983354896307, |
|
"rewards/margins": 0.07657264918088913, |
|
"rewards/rejected": -0.04847431182861328, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -2.4379546642303467, |
|
"logits/rejected": -2.374706745147705, |
|
"logps/chosen": -266.258544921875, |
|
"logps/rejected": -217.6811981201172, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": 0.02859863080084324, |
|
"rewards/margins": 0.08380897343158722, |
|
"rewards/rejected": -0.05521036311984062, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.449183940887451, |
|
"logits/rejected": -2.4152982234954834, |
|
"logps/chosen": -251.42617797851562, |
|
"logps/rejected": -206.6394805908203, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.6703125238418579, |
|
"rewards/chosen": 0.018911005929112434, |
|
"rewards/margins": 0.08338409662246704, |
|
"rewards/rejected": -0.06447309255599976, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -2.431877374649048, |
|
"logits/rejected": -2.3985190391540527, |
|
"logps/chosen": -258.9649353027344, |
|
"logps/rejected": -214.5379180908203, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.6578124761581421, |
|
"rewards/chosen": 0.022992964833974838, |
|
"rewards/margins": 0.08744792640209198, |
|
"rewards/rejected": -0.06445495784282684, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": -2.4291908740997314, |
|
"logits/rejected": -2.3937199115753174, |
|
"logps/chosen": -264.531005859375, |
|
"logps/rejected": -223.732177734375, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.026863668113946915, |
|
"rewards/margins": 0.10220368206501007, |
|
"rewards/rejected": -0.07534001767635345, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": -2.4233272075653076, |
|
"logits/rejected": -2.3791823387145996, |
|
"logps/chosen": -268.5234375, |
|
"logps/rejected": -215.85610961914062, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.03440080210566521, |
|
"rewards/margins": 0.11941323429346085, |
|
"rewards/rejected": -0.08501242101192474, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -2.4789443016052246, |
|
"logits/rejected": -2.4048690795898438, |
|
"logps/chosen": -266.273681640625, |
|
"logps/rejected": -230.77957153320312, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": 0.036819975823163986, |
|
"rewards/margins": 0.12693271040916443, |
|
"rewards/rejected": -0.09011274576187134, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": -2.4333298206329346, |
|
"logits/rejected": -2.3658194541931152, |
|
"logps/chosen": -252.1578826904297, |
|
"logps/rejected": -221.54611206054688, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.02940729819238186, |
|
"rewards/margins": 0.12888944149017334, |
|
"rewards/rejected": -0.09948214888572693, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": -2.4523606300354004, |
|
"logits/rejected": -2.388826847076416, |
|
"logps/chosen": -263.3428039550781, |
|
"logps/rejected": -218.86074829101562, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": 0.022628765553236008, |
|
"rewards/margins": 0.14625979959964752, |
|
"rewards/rejected": -0.12363102287054062, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -2.450970411300659, |
|
"logits/rejected": -2.3968029022216797, |
|
"logps/chosen": -267.2438049316406, |
|
"logps/rejected": -222.943359375, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.03826092556118965, |
|
"rewards/margins": 0.1685018539428711, |
|
"rewards/rejected": -0.13024093210697174, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": -2.4290268421173096, |
|
"logits/rejected": -2.417224645614624, |
|
"logps/chosen": -266.91693115234375, |
|
"logps/rejected": -235.718017578125, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.660937488079071, |
|
"rewards/chosen": 0.016346175223588943, |
|
"rewards/margins": 0.1523023247718811, |
|
"rewards/rejected": -0.13595613837242126, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": -2.412705421447754, |
|
"logits/rejected": -2.374624013900757, |
|
"logps/chosen": -261.72271728515625, |
|
"logps/rejected": -229.1892547607422, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.012038780376315117, |
|
"rewards/margins": 0.15452785789966583, |
|
"rewards/rejected": -0.14248906075954437, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -2.453866720199585, |
|
"logits/rejected": -2.3835487365722656, |
|
"logps/chosen": -263.36273193359375, |
|
"logps/rejected": -218.2621307373047, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": 0.04369325190782547, |
|
"rewards/margins": 0.19822198152542114, |
|
"rewards/rejected": -0.15452872216701508, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": -2.446223258972168, |
|
"logits/rejected": -2.4049808979034424, |
|
"logps/chosen": -270.06732177734375, |
|
"logps/rejected": -222.06930541992188, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.036653708666563034, |
|
"rewards/margins": 0.1898169219493866, |
|
"rewards/rejected": -0.15316320955753326, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": -2.4419703483581543, |
|
"logits/rejected": -2.409404754638672, |
|
"logps/chosen": -261.24114990234375, |
|
"logps/rejected": -230.6668701171875, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.020566729828715324, |
|
"rewards/margins": 0.1889314353466034, |
|
"rewards/rejected": -0.1683647185564041, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -2.4552600383758545, |
|
"logits/rejected": -2.4268581867218018, |
|
"logps/chosen": -276.3377990722656, |
|
"logps/rejected": -226.77072143554688, |
|
"loss": 0.6127, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": 0.02126963995397091, |
|
"rewards/margins": 0.22278845310211182, |
|
"rewards/rejected": -0.20151881873607635, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": -2.413886308670044, |
|
"logits/rejected": -2.3775603771209717, |
|
"logps/chosen": -250.6868896484375, |
|
"logps/rejected": -207.9862518310547, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.682812511920929, |
|
"rewards/chosen": 0.01281009428203106, |
|
"rewards/margins": 0.20990212261676788, |
|
"rewards/rejected": -0.19709204137325287, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": -2.4519877433776855, |
|
"logits/rejected": -2.3820528984069824, |
|
"logps/chosen": -274.48419189453125, |
|
"logps/rejected": -226.1211395263672, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.03596062213182449, |
|
"rewards/margins": 0.2432943880558014, |
|
"rewards/rejected": -0.20733380317687988, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -2.4627573490142822, |
|
"logits/rejected": -2.424017906188965, |
|
"logps/chosen": -262.072509765625, |
|
"logps/rejected": -227.0179443359375, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.0052271028980612755, |
|
"rewards/margins": 0.22640076279640198, |
|
"rewards/rejected": -0.22117361426353455, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": -2.419257640838623, |
|
"logits/rejected": -2.350435733795166, |
|
"logps/chosen": -256.357421875, |
|
"logps/rejected": -219.219970703125, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.651562511920929, |
|
"rewards/chosen": -0.010133005678653717, |
|
"rewards/margins": 0.2125161588191986, |
|
"rewards/rejected": -0.22264917194843292, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": -2.466386079788208, |
|
"logits/rejected": -2.42856764793396, |
|
"logps/chosen": -279.46905517578125, |
|
"logps/rejected": -232.4190673828125, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.01027261558920145, |
|
"rewards/margins": 0.24917173385620117, |
|
"rewards/rejected": -0.2388991117477417, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -2.3757636547088623, |
|
"logits/rejected": -2.4026846885681152, |
|
"logps/chosen": -262.5502624511719, |
|
"logps/rejected": -227.03671264648438, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.036844007670879364, |
|
"rewards/margins": 0.2827877700328827, |
|
"rewards/rejected": -0.24594378471374512, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": -2.4304065704345703, |
|
"logits/rejected": -2.3491933345794678, |
|
"logps/chosen": -263.8543395996094, |
|
"logps/rejected": -223.7662353515625, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.016529660671949387, |
|
"rewards/margins": 0.30523234605789185, |
|
"rewards/rejected": -0.28870272636413574, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": -2.3874869346618652, |
|
"logits/rejected": -2.369131088256836, |
|
"logps/chosen": -273.19482421875, |
|
"logps/rejected": -219.20993041992188, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": 0.016616690903902054, |
|
"rewards/margins": 0.27095186710357666, |
|
"rewards/rejected": -0.2543351948261261, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -2.432959794998169, |
|
"logits/rejected": -2.4002068042755127, |
|
"logps/chosen": -268.3512268066406, |
|
"logps/rejected": -213.71481323242188, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": 0.02091902121901512, |
|
"rewards/margins": 0.3344683051109314, |
|
"rewards/rejected": -0.3135492205619812, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": -2.423337459564209, |
|
"logits/rejected": -2.367553234100342, |
|
"logps/chosen": -270.74822998046875, |
|
"logps/rejected": -229.89859008789062, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.01809154823422432, |
|
"rewards/margins": 0.32776904106140137, |
|
"rewards/rejected": -0.30967751145362854, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": -2.407766103744507, |
|
"logits/rejected": -2.3573992252349854, |
|
"logps/chosen": -272.4050598144531, |
|
"logps/rejected": -231.3518524169922, |
|
"loss": 0.592, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.009805982932448387, |
|
"rewards/margins": 0.31897610425949097, |
|
"rewards/rejected": -0.309170126914978, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -2.438194990158081, |
|
"logits/rejected": -2.3653242588043213, |
|
"logps/chosen": -265.61004638671875, |
|
"logps/rejected": -224.57614135742188, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": -0.03684517741203308, |
|
"rewards/margins": 0.3040197789669037, |
|
"rewards/rejected": -0.34086498618125916, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": -2.454868793487549, |
|
"logits/rejected": -2.4135568141937256, |
|
"logps/chosen": -262.34844970703125, |
|
"logps/rejected": -231.43362426757812, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.0009088374790735543, |
|
"rewards/margins": 0.2974187731742859, |
|
"rewards/rejected": -0.29832762479782104, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": -2.378328800201416, |
|
"logits/rejected": -2.380078077316284, |
|
"logps/chosen": -268.5691833496094, |
|
"logps/rejected": -231.4857940673828, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.003624826669692993, |
|
"rewards/margins": 0.3238300085067749, |
|
"rewards/rejected": -0.3202051818370819, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -2.4507269859313965, |
|
"logits/rejected": -2.4167442321777344, |
|
"logps/chosen": -268.0180969238281, |
|
"logps/rejected": -222.7704315185547, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": 0.004687085747718811, |
|
"rewards/margins": 0.3652251660823822, |
|
"rewards/rejected": -0.3605380654335022, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": -2.4484550952911377, |
|
"logits/rejected": -2.3759899139404297, |
|
"logps/chosen": -270.5180969238281, |
|
"logps/rejected": -231.528564453125, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.012805086560547352, |
|
"rewards/margins": 0.3726140558719635, |
|
"rewards/rejected": -0.3598089814186096, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": -2.441225528717041, |
|
"logits/rejected": -2.3825039863586426, |
|
"logps/chosen": -271.6439514160156, |
|
"logps/rejected": -223.9091033935547, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.006787353660911322, |
|
"rewards/margins": 0.36242157220840454, |
|
"rewards/rejected": -0.3692089319229126, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -2.4532737731933594, |
|
"logits/rejected": -2.4243369102478027, |
|
"logps/chosen": -266.54681396484375, |
|
"logps/rejected": -231.792236328125, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.014241993427276611, |
|
"rewards/margins": 0.35581323504447937, |
|
"rewards/rejected": -0.3700551986694336, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": -2.441526412963867, |
|
"logits/rejected": -2.4337775707244873, |
|
"logps/chosen": -280.2122497558594, |
|
"logps/rejected": -237.7913360595703, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": 0.010852565988898277, |
|
"rewards/margins": 0.39948010444641113, |
|
"rewards/rejected": -0.3886275291442871, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": -2.4393959045410156, |
|
"logits/rejected": -2.3926703929901123, |
|
"logps/chosen": -257.3842468261719, |
|
"logps/rejected": -213.9781494140625, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 8.446201536571607e-05, |
|
"rewards/margins": 0.40542498230934143, |
|
"rewards/rejected": -0.4053404927253723, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -2.4491591453552246, |
|
"logits/rejected": -2.398932933807373, |
|
"logps/chosen": -269.54095458984375, |
|
"logps/rejected": -223.14892578125, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.04456017538905144, |
|
"rewards/margins": 0.3685937225818634, |
|
"rewards/rejected": -0.41315382719039917, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": -2.4231886863708496, |
|
"logits/rejected": -2.3941125869750977, |
|
"logps/chosen": -278.4034729003906, |
|
"logps/rejected": -239.0082244873047, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.026434283703565598, |
|
"rewards/margins": 0.431147962808609, |
|
"rewards/rejected": -0.4575822949409485, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": -2.4560158252716064, |
|
"logits/rejected": -2.3756983280181885, |
|
"logps/chosen": -274.8756103515625, |
|
"logps/rejected": -228.38619995117188, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.037291474640369415, |
|
"rewards/margins": 0.42679017782211304, |
|
"rewards/rejected": -0.46408161520957947, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -2.4025585651397705, |
|
"logits/rejected": -2.362170934677124, |
|
"logps/chosen": -279.28619384765625, |
|
"logps/rejected": -235.0756378173828, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.03398103266954422, |
|
"rewards/margins": 0.38851290941238403, |
|
"rewards/rejected": -0.42249393463134766, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": -2.3948450088500977, |
|
"logits/rejected": -2.380448818206787, |
|
"logps/chosen": -265.87774658203125, |
|
"logps/rejected": -221.30856323242188, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": -0.02471747435629368, |
|
"rewards/margins": 0.38041889667510986, |
|
"rewards/rejected": -0.4051364064216614, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": -2.428434371948242, |
|
"logits/rejected": -2.3654212951660156, |
|
"logps/chosen": -265.0726013183594, |
|
"logps/rejected": -230.83480834960938, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.058827854692935944, |
|
"rewards/margins": 0.39134788513183594, |
|
"rewards/rejected": -0.4501757025718689, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -2.4110920429229736, |
|
"logits/rejected": -2.3683831691741943, |
|
"logps/chosen": -263.7908935546875, |
|
"logps/rejected": -228.82852172851562, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.040988270193338394, |
|
"rewards/margins": 0.4240415692329407, |
|
"rewards/rejected": -0.4650298058986664, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": -2.487917423248291, |
|
"logits/rejected": -2.386542797088623, |
|
"logps/chosen": -277.84173583984375, |
|
"logps/rejected": -226.375732421875, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.0548752136528492, |
|
"rewards/margins": 0.41838139295578003, |
|
"rewards/rejected": -0.4732566475868225, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": -2.4249427318573, |
|
"logits/rejected": -2.397531270980835, |
|
"logps/chosen": -280.85748291015625, |
|
"logps/rejected": -239.29263305664062, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.060692138969898224, |
|
"rewards/margins": 0.412163645029068, |
|
"rewards/rejected": -0.472855806350708, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -2.441291332244873, |
|
"logits/rejected": -2.3523077964782715, |
|
"logps/chosen": -252.23779296875, |
|
"logps/rejected": -217.0404510498047, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.06024731323122978, |
|
"rewards/margins": 0.37643498182296753, |
|
"rewards/rejected": -0.4366822838783264, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": -2.3433680534362793, |
|
"logits/rejected": -2.340506076812744, |
|
"logps/chosen": -248.30313110351562, |
|
"logps/rejected": -215.12033081054688, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.08882372081279755, |
|
"rewards/margins": 0.3825678527355194, |
|
"rewards/rejected": -0.47139161825180054, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": -2.3932948112487793, |
|
"logits/rejected": -2.3713467121124268, |
|
"logps/chosen": -246.66860961914062, |
|
"logps/rejected": -206.83572387695312, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.05001844838261604, |
|
"rewards/margins": 0.4407244324684143, |
|
"rewards/rejected": -0.49074286222457886, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -2.3883261680603027, |
|
"logits/rejected": -2.3786678314208984, |
|
"logps/chosen": -266.27886962890625, |
|
"logps/rejected": -225.5245819091797, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.022055109962821007, |
|
"rewards/margins": 0.4830406606197357, |
|
"rewards/rejected": -0.5050958395004272, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": -2.4633374214172363, |
|
"logits/rejected": -2.3861355781555176, |
|
"logps/chosen": -269.9394836425781, |
|
"logps/rejected": -229.15283203125, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.0641530454158783, |
|
"rewards/margins": 0.4700705409049988, |
|
"rewards/rejected": -0.5342236161231995, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": -2.394104480743408, |
|
"logits/rejected": -2.379462718963623, |
|
"logps/chosen": -265.15008544921875, |
|
"logps/rejected": -216.9358367919922, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.05918584018945694, |
|
"rewards/margins": 0.4691968560218811, |
|
"rewards/rejected": -0.5283826589584351, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -2.439371109008789, |
|
"logits/rejected": -2.3606810569763184, |
|
"logps/chosen": -263.8412780761719, |
|
"logps/rejected": -222.4286346435547, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.07698424160480499, |
|
"rewards/margins": 0.44548702239990234, |
|
"rewards/rejected": -0.5224713087081909, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": -2.4030070304870605, |
|
"logits/rejected": -2.3470616340637207, |
|
"logps/chosen": -277.69207763671875, |
|
"logps/rejected": -241.84603881835938, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.08946071565151215, |
|
"rewards/margins": 0.44598451256752014, |
|
"rewards/rejected": -0.5354452729225159, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": -2.4240808486938477, |
|
"logits/rejected": -2.379459857940674, |
|
"logps/chosen": -267.9504699707031, |
|
"logps/rejected": -237.7564239501953, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1054706797003746, |
|
"rewards/margins": 0.47193509340286255, |
|
"rewards/rejected": -0.5774057507514954, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -2.4196529388427734, |
|
"logits/rejected": -2.378551959991455, |
|
"logps/chosen": -265.9372253417969, |
|
"logps/rejected": -226.904296875, |
|
"loss": 0.5845, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.06137767434120178, |
|
"rewards/margins": 0.43556785583496094, |
|
"rewards/rejected": -0.4969455301761627, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": -2.418703317642212, |
|
"logits/rejected": -2.3797688484191895, |
|
"logps/chosen": -272.62408447265625, |
|
"logps/rejected": -233.73880004882812, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.07619120925664902, |
|
"rewards/margins": 0.48392266035079956, |
|
"rewards/rejected": -0.5601138472557068, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": -2.4377353191375732, |
|
"logits/rejected": -2.3761370182037354, |
|
"logps/chosen": -264.58331298828125, |
|
"logps/rejected": -227.14138793945312, |
|
"loss": 0.5727, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": -0.07739187777042389, |
|
"rewards/margins": 0.45683830976486206, |
|
"rewards/rejected": -0.5342302918434143, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -2.4207470417022705, |
|
"logits/rejected": -2.3555076122283936, |
|
"logps/chosen": -254.27468872070312, |
|
"logps/rejected": -230.114013671875, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.09545855224132538, |
|
"rewards/margins": 0.434993177652359, |
|
"rewards/rejected": -0.5304517149925232, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": -2.408648729324341, |
|
"logits/rejected": -2.392381429672241, |
|
"logps/chosen": -268.1350402832031, |
|
"logps/rejected": -232.40829467773438, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10538534820079803, |
|
"rewards/margins": 0.4874357283115387, |
|
"rewards/rejected": -0.5928210616111755, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": -2.4571163654327393, |
|
"logits/rejected": -2.4369616508483887, |
|
"logps/chosen": -260.79400634765625, |
|
"logps/rejected": -234.09521484375, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.08784712105989456, |
|
"rewards/margins": 0.4429641664028168, |
|
"rewards/rejected": -0.5308112502098083, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -2.4167487621307373, |
|
"logits/rejected": -2.3556063175201416, |
|
"logps/chosen": -264.9407653808594, |
|
"logps/rejected": -234.77792358398438, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.09353096783161163, |
|
"rewards/margins": 0.47954684495925903, |
|
"rewards/rejected": -0.5730777978897095, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": -2.4163200855255127, |
|
"logits/rejected": -2.3464267253875732, |
|
"logps/chosen": -258.3912048339844, |
|
"logps/rejected": -219.3795623779297, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.13110245764255524, |
|
"rewards/margins": 0.43385767936706543, |
|
"rewards/rejected": -0.5649601221084595, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": -2.4255287647247314, |
|
"logits/rejected": -2.355109453201294, |
|
"logps/chosen": -283.6699523925781, |
|
"logps/rejected": -235.8437957763672, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.07336901128292084, |
|
"rewards/margins": 0.5329583883285522, |
|
"rewards/rejected": -0.6063274145126343, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -2.44077205657959, |
|
"logits/rejected": -2.361696481704712, |
|
"logps/chosen": -273.6830749511719, |
|
"logps/rejected": -237.58401489257812, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.09832929819822311, |
|
"rewards/margins": 0.45769548416137695, |
|
"rewards/rejected": -0.5560247302055359, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": -2.424898624420166, |
|
"logits/rejected": -2.3718905448913574, |
|
"logps/chosen": -268.20269775390625, |
|
"logps/rejected": -231.1397247314453, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.0910768061876297, |
|
"rewards/margins": 0.4992894232273102, |
|
"rewards/rejected": -0.5903662443161011, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": -2.4363582134246826, |
|
"logits/rejected": -2.377957582473755, |
|
"logps/chosen": -276.6056213378906, |
|
"logps/rejected": -233.3043975830078, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.05291268229484558, |
|
"rewards/margins": 0.5536981821060181, |
|
"rewards/rejected": -0.606610894203186, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -2.3768372535705566, |
|
"logits/rejected": -2.393470287322998, |
|
"logps/chosen": -272.34576416015625, |
|
"logps/rejected": -224.0140838623047, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.10874155908823013, |
|
"rewards/margins": 0.5176196694374084, |
|
"rewards/rejected": -0.6263612508773804, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": -2.4112918376922607, |
|
"logits/rejected": -2.388984441757202, |
|
"logps/chosen": -273.1308898925781, |
|
"logps/rejected": -239.2963409423828, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.1055203229188919, |
|
"rewards/margins": 0.545116126537323, |
|
"rewards/rejected": -0.6506363749504089, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": -2.3986384868621826, |
|
"logits/rejected": -2.3761980533599854, |
|
"logps/chosen": -268.1097106933594, |
|
"logps/rejected": -234.03213500976562, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.10402411222457886, |
|
"rewards/margins": 0.4980190396308899, |
|
"rewards/rejected": -0.6020431518554688, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -2.408790111541748, |
|
"logits/rejected": -2.36995267868042, |
|
"logps/chosen": -284.46514892578125, |
|
"logps/rejected": -238.64352416992188, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.10631656646728516, |
|
"rewards/margins": 0.5066617131233215, |
|
"rewards/rejected": -0.6129782795906067, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.0685439109802246, |
|
"eval_logits/rejected": -2.015035390853882, |
|
"eval_logps/chosen": -265.6653137207031, |
|
"eval_logps/rejected": -230.13473510742188, |
|
"eval_loss": 0.5547605752944946, |
|
"eval_rewards/accuracies": 0.7114999890327454, |
|
"eval_rewards/chosen": -0.11070162057876587, |
|
"eval_rewards/margins": 0.5359883904457092, |
|
"eval_rewards/rejected": -0.6466900110244751, |
|
"eval_runtime": 1658.2887, |
|
"eval_samples_per_second": 1.206, |
|
"eval_steps_per_second": 0.302, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": -2.44268798828125, |
|
"logits/rejected": -2.3829662799835205, |
|
"logps/chosen": -269.1377868652344, |
|
"logps/rejected": -230.5484161376953, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.06904083490371704, |
|
"rewards/margins": 0.5532599091529846, |
|
"rewards/rejected": -0.6223007440567017, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": -2.417346715927124, |
|
"logits/rejected": -2.347588062286377, |
|
"logps/chosen": -267.7151184082031, |
|
"logps/rejected": -227.2200469970703, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.10437774658203125, |
|
"rewards/margins": 0.5515931844711304, |
|
"rewards/rejected": -0.6559709310531616, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -2.4241156578063965, |
|
"logits/rejected": -2.3838791847229004, |
|
"logps/chosen": -277.7347717285156, |
|
"logps/rejected": -225.5718994140625, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.09920702129602432, |
|
"rewards/margins": 0.5613608360290527, |
|
"rewards/rejected": -0.6605678796768188, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": -2.432286024093628, |
|
"logits/rejected": -2.350795030593872, |
|
"logps/chosen": -280.2424621582031, |
|
"logps/rejected": -238.3251495361328, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.10347769409418106, |
|
"rewards/margins": 0.5584946870803833, |
|
"rewards/rejected": -0.6619724035263062, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": -2.402839183807373, |
|
"logits/rejected": -2.3319945335388184, |
|
"logps/chosen": -258.05889892578125, |
|
"logps/rejected": -223.97216796875, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.13955342769622803, |
|
"rewards/margins": 0.5002504587173462, |
|
"rewards/rejected": -0.6398038268089294, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -2.428884267807007, |
|
"logits/rejected": -2.35528826713562, |
|
"logps/chosen": -272.50457763671875, |
|
"logps/rejected": -231.6855010986328, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09094850718975067, |
|
"rewards/margins": 0.5691753029823303, |
|
"rewards/rejected": -0.6601237058639526, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": -2.4268717765808105, |
|
"logits/rejected": -2.349578380584717, |
|
"logps/chosen": -275.3159484863281, |
|
"logps/rejected": -223.78414916992188, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.07625160366296768, |
|
"rewards/margins": 0.5891859531402588, |
|
"rewards/rejected": -0.6654375791549683, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": -2.3932693004608154, |
|
"logits/rejected": -2.3574328422546387, |
|
"logps/chosen": -256.0076599121094, |
|
"logps/rejected": -222.4263458251953, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": -0.1472007781267166, |
|
"rewards/margins": 0.5160545706748962, |
|
"rewards/rejected": -0.6632553339004517, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -2.3803439140319824, |
|
"logits/rejected": -2.3705551624298096, |
|
"logps/chosen": -261.7268371582031, |
|
"logps/rejected": -223.354736328125, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.09807038307189941, |
|
"rewards/margins": 0.5811691880226135, |
|
"rewards/rejected": -0.6792395114898682, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": -2.4154138565063477, |
|
"logits/rejected": -2.3943445682525635, |
|
"logps/chosen": -280.23260498046875, |
|
"logps/rejected": -242.3921661376953, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.12113519757986069, |
|
"rewards/margins": 0.510390043258667, |
|
"rewards/rejected": -0.6315252184867859, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": -2.400869131088257, |
|
"logits/rejected": -2.3419127464294434, |
|
"logps/chosen": -269.11322021484375, |
|
"logps/rejected": -218.6542510986328, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.11252293735742569, |
|
"rewards/margins": 0.6148664355278015, |
|
"rewards/rejected": -0.7273894548416138, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -2.398010492324829, |
|
"logits/rejected": -2.373257875442505, |
|
"logps/chosen": -261.5543212890625, |
|
"logps/rejected": -220.818603515625, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.12159077823162079, |
|
"rewards/margins": 0.5545440912246704, |
|
"rewards/rejected": -0.6761348843574524, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": -2.398470401763916, |
|
"logits/rejected": -2.3667078018188477, |
|
"logps/chosen": -265.3479309082031, |
|
"logps/rejected": -223.44967651367188, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.13141986727714539, |
|
"rewards/margins": 0.5646113157272339, |
|
"rewards/rejected": -0.6960310935974121, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": -2.400993824005127, |
|
"logits/rejected": -2.34995698928833, |
|
"logps/chosen": -261.8519287109375, |
|
"logps/rejected": -225.39340209960938, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.14484994113445282, |
|
"rewards/margins": 0.5336871147155762, |
|
"rewards/rejected": -0.6785370707511902, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -2.4596643447875977, |
|
"logits/rejected": -2.3810102939605713, |
|
"logps/chosen": -277.64697265625, |
|
"logps/rejected": -240.9037322998047, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.0647854283452034, |
|
"rewards/margins": 0.6014553308486938, |
|
"rewards/rejected": -0.6662408113479614, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": -2.4132676124572754, |
|
"logits/rejected": -2.3595833778381348, |
|
"logps/chosen": -269.6545104980469, |
|
"logps/rejected": -220.05996704101562, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.1037089005112648, |
|
"rewards/margins": 0.624849796295166, |
|
"rewards/rejected": -0.7285586595535278, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": -2.4248127937316895, |
|
"logits/rejected": -2.3464858531951904, |
|
"logps/chosen": -280.0205078125, |
|
"logps/rejected": -239.3312225341797, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.0879250168800354, |
|
"rewards/margins": 0.6255140900611877, |
|
"rewards/rejected": -0.7134391069412231, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -2.3959927558898926, |
|
"logits/rejected": -2.3690690994262695, |
|
"logps/chosen": -256.87310791015625, |
|
"logps/rejected": -227.3232879638672, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.12515565752983093, |
|
"rewards/margins": 0.5448717474937439, |
|
"rewards/rejected": -0.6700273752212524, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": -2.349555492401123, |
|
"logits/rejected": -2.3227591514587402, |
|
"logps/chosen": -253.9552459716797, |
|
"logps/rejected": -217.4364776611328, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.11552216857671738, |
|
"rewards/margins": 0.5672619342803955, |
|
"rewards/rejected": -0.6827840805053711, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": -2.403165340423584, |
|
"logits/rejected": -2.312551975250244, |
|
"logps/chosen": -266.38970947265625, |
|
"logps/rejected": -219.06478881835938, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.11588382720947266, |
|
"rewards/margins": 0.6109603643417358, |
|
"rewards/rejected": -0.7268441915512085, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -2.426140308380127, |
|
"logits/rejected": -2.364060640335083, |
|
"logps/chosen": -277.64935302734375, |
|
"logps/rejected": -233.41964721679688, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.15606358647346497, |
|
"rewards/margins": 0.5846539735794067, |
|
"rewards/rejected": -0.7407175302505493, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": -2.4718017578125, |
|
"logits/rejected": -2.4017224311828613, |
|
"logps/chosen": -267.055908203125, |
|
"logps/rejected": -247.3446044921875, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.139754980802536, |
|
"rewards/margins": 0.585044801235199, |
|
"rewards/rejected": -0.7247998714447021, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": -2.3561863899230957, |
|
"logits/rejected": -2.336796522140503, |
|
"logps/chosen": -260.29742431640625, |
|
"logps/rejected": -226.8851776123047, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.1206628829240799, |
|
"rewards/margins": 0.5734033584594727, |
|
"rewards/rejected": -0.6940661668777466, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -2.383582353591919, |
|
"logits/rejected": -2.350806713104248, |
|
"logps/chosen": -261.0791015625, |
|
"logps/rejected": -222.1552734375, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.11264900863170624, |
|
"rewards/margins": 0.6694163084030151, |
|
"rewards/rejected": -0.7820653915405273, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": -2.3713862895965576, |
|
"logits/rejected": -2.331512928009033, |
|
"logps/chosen": -257.71209716796875, |
|
"logps/rejected": -222.45755004882812, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.14970478415489197, |
|
"rewards/margins": 0.5587034821510315, |
|
"rewards/rejected": -0.7084082365036011, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": -2.3780715465545654, |
|
"logits/rejected": -2.3363735675811768, |
|
"logps/chosen": -272.34039306640625, |
|
"logps/rejected": -232.05892944335938, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.11652742326259613, |
|
"rewards/margins": 0.6240389347076416, |
|
"rewards/rejected": -0.7405663728713989, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -2.4418883323669434, |
|
"logits/rejected": -2.374427080154419, |
|
"logps/chosen": -270.23785400390625, |
|
"logps/rejected": -228.5772247314453, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.14697889983654022, |
|
"rewards/margins": 0.6358888149261475, |
|
"rewards/rejected": -0.7828676700592041, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": -2.321744203567505, |
|
"logits/rejected": -2.3201723098754883, |
|
"logps/chosen": -244.7095184326172, |
|
"logps/rejected": -224.6121368408203, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1834283024072647, |
|
"rewards/margins": 0.5532564520835876, |
|
"rewards/rejected": -0.7366846799850464, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": -2.392040967941284, |
|
"logits/rejected": -2.3873581886291504, |
|
"logps/chosen": -260.9786682128906, |
|
"logps/rejected": -217.41488647460938, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.10610403120517731, |
|
"rewards/margins": 0.6539296507835388, |
|
"rewards/rejected": -0.7600336074829102, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -2.4131011962890625, |
|
"logits/rejected": -2.3864870071411133, |
|
"logps/chosen": -268.1712951660156, |
|
"logps/rejected": -243.2780303955078, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.14184710383415222, |
|
"rewards/margins": 0.5766977071762085, |
|
"rewards/rejected": -0.7185447216033936, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": -2.4226272106170654, |
|
"logits/rejected": -2.3552260398864746, |
|
"logps/chosen": -275.0395812988281, |
|
"logps/rejected": -246.27993774414062, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.08415131270885468, |
|
"rewards/margins": 0.5906020402908325, |
|
"rewards/rejected": -0.6747534275054932, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": -2.384654998779297, |
|
"logits/rejected": -2.3474934101104736, |
|
"logps/chosen": -280.5086364746094, |
|
"logps/rejected": -249.03744506835938, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.09258462488651276, |
|
"rewards/margins": 0.6895312070846558, |
|
"rewards/rejected": -0.7821158170700073, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -2.3851654529571533, |
|
"logits/rejected": -2.3415586948394775, |
|
"logps/chosen": -275.87249755859375, |
|
"logps/rejected": -238.9077606201172, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.12925606966018677, |
|
"rewards/margins": 0.6958507299423218, |
|
"rewards/rejected": -0.8251067399978638, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": -2.347446918487549, |
|
"logits/rejected": -2.3569579124450684, |
|
"logps/chosen": -254.8171844482422, |
|
"logps/rejected": -225.75048828125, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12040869146585464, |
|
"rewards/margins": 0.6625940203666687, |
|
"rewards/rejected": -0.7830026745796204, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": -2.3930153846740723, |
|
"logits/rejected": -2.3339743614196777, |
|
"logps/chosen": -282.39208984375, |
|
"logps/rejected": -241.4809112548828, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.15331654250621796, |
|
"rewards/margins": 0.5918524861335754, |
|
"rewards/rejected": -0.7451690435409546, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -2.4213199615478516, |
|
"logits/rejected": -2.372786045074463, |
|
"logps/chosen": -277.1365661621094, |
|
"logps/rejected": -231.59835815429688, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.11530748754739761, |
|
"rewards/margins": 0.6329683065414429, |
|
"rewards/rejected": -0.7482757568359375, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": -2.3494033813476562, |
|
"logits/rejected": -2.2917075157165527, |
|
"logps/chosen": -283.3105773925781, |
|
"logps/rejected": -236.96218872070312, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.754687488079071, |
|
"rewards/chosen": -0.16250093281269073, |
|
"rewards/margins": 0.6374029517173767, |
|
"rewards/rejected": -0.7999038696289062, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": -2.410579204559326, |
|
"logits/rejected": -2.395461320877075, |
|
"logps/chosen": -265.9371643066406, |
|
"logps/rejected": -225.1396484375, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.15022125840187073, |
|
"rewards/margins": 0.6349747776985168, |
|
"rewards/rejected": -0.78519606590271, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -2.354858636856079, |
|
"logits/rejected": -2.333374500274658, |
|
"logps/chosen": -267.4329528808594, |
|
"logps/rejected": -229.37033081054688, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.13673868775367737, |
|
"rewards/margins": 0.6077600121498108, |
|
"rewards/rejected": -0.7444987297058105, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": -2.4165000915527344, |
|
"logits/rejected": -2.389608860015869, |
|
"logps/chosen": -256.08953857421875, |
|
"logps/rejected": -234.6078643798828, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.11967863142490387, |
|
"rewards/margins": 0.641726553440094, |
|
"rewards/rejected": -0.7614051699638367, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": -2.441561460494995, |
|
"logits/rejected": -2.385502338409424, |
|
"logps/chosen": -276.8068542480469, |
|
"logps/rejected": -226.84146118164062, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.18507501482963562, |
|
"rewards/margins": 0.5834146738052368, |
|
"rewards/rejected": -0.7684897780418396, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -2.4208102226257324, |
|
"logits/rejected": -2.371891498565674, |
|
"logps/chosen": -267.7480163574219, |
|
"logps/rejected": -228.93954467773438, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.12237964570522308, |
|
"rewards/margins": 0.6243287324905396, |
|
"rewards/rejected": -0.746708333492279, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": -2.4296679496765137, |
|
"logits/rejected": -2.384805202484131, |
|
"logps/chosen": -269.50341796875, |
|
"logps/rejected": -226.89120483398438, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11908123642206192, |
|
"rewards/margins": 0.7121685743331909, |
|
"rewards/rejected": -0.8312498331069946, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": -2.3762552738189697, |
|
"logits/rejected": -2.3503944873809814, |
|
"logps/chosen": -265.67657470703125, |
|
"logps/rejected": -227.09695434570312, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.15753719210624695, |
|
"rewards/margins": 0.5870577692985535, |
|
"rewards/rejected": -0.744594931602478, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -2.433023452758789, |
|
"logits/rejected": -2.39740252494812, |
|
"logps/chosen": -254.3402862548828, |
|
"logps/rejected": -216.28042602539062, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.11956797540187836, |
|
"rewards/margins": 0.6502863168716431, |
|
"rewards/rejected": -0.7698543667793274, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": -2.3455922603607178, |
|
"logits/rejected": -2.367486000061035, |
|
"logps/chosen": -271.64971923828125, |
|
"logps/rejected": -226.45278930664062, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.12722182273864746, |
|
"rewards/margins": 0.5968899726867676, |
|
"rewards/rejected": -0.7241117358207703, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": -2.437638759613037, |
|
"logits/rejected": -2.3999106884002686, |
|
"logps/chosen": -278.81402587890625, |
|
"logps/rejected": -233.3411407470703, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.16687321662902832, |
|
"rewards/margins": 0.6460558176040649, |
|
"rewards/rejected": -0.8129289746284485, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -2.4091131687164307, |
|
"logits/rejected": -2.3717777729034424, |
|
"logps/chosen": -259.6352844238281, |
|
"logps/rejected": -234.7488250732422, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.12939931452274323, |
|
"rewards/margins": 0.6247848272323608, |
|
"rewards/rejected": -0.7541841268539429, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": -2.3630013465881348, |
|
"logits/rejected": -2.3308510780334473, |
|
"logps/chosen": -265.0796813964844, |
|
"logps/rejected": -235.9908905029297, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.12796229124069214, |
|
"rewards/margins": 0.6874942779541016, |
|
"rewards/rejected": -0.8154565691947937, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": -2.384431838989258, |
|
"logits/rejected": -2.3531229496002197, |
|
"logps/chosen": -273.24407958984375, |
|
"logps/rejected": -237.3506622314453, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.13397890329360962, |
|
"rewards/margins": 0.6210099458694458, |
|
"rewards/rejected": -0.7549887895584106, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -2.3733363151550293, |
|
"logits/rejected": -2.3497159481048584, |
|
"logps/chosen": -256.4417419433594, |
|
"logps/rejected": -218.98245239257812, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.10995250940322876, |
|
"rewards/margins": 0.625704288482666, |
|
"rewards/rejected": -0.7356568574905396, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": -2.391806125640869, |
|
"logits/rejected": -2.3594701290130615, |
|
"logps/chosen": -261.59954833984375, |
|
"logps/rejected": -222.4551544189453, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.1836286336183548, |
|
"rewards/margins": 0.6219319105148315, |
|
"rewards/rejected": -0.8055604696273804, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": -2.395291805267334, |
|
"logits/rejected": -2.335513114929199, |
|
"logps/chosen": -271.76495361328125, |
|
"logps/rejected": -223.66604614257812, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.13636186718940735, |
|
"rewards/margins": 0.6964784860610962, |
|
"rewards/rejected": -0.8328403234481812, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -2.409865140914917, |
|
"logits/rejected": -2.343632936477661, |
|
"logps/chosen": -261.92218017578125, |
|
"logps/rejected": -225.1486358642578, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -0.10242755711078644, |
|
"rewards/margins": 0.660239577293396, |
|
"rewards/rejected": -0.7626670598983765, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": -2.3943867683410645, |
|
"logits/rejected": -2.34450101852417, |
|
"logps/chosen": -267.44940185546875, |
|
"logps/rejected": -227.99658203125, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15267999470233917, |
|
"rewards/margins": 0.6513444185256958, |
|
"rewards/rejected": -0.8040245175361633, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": -2.3414573669433594, |
|
"logits/rejected": -2.3219292163848877, |
|
"logps/chosen": -263.6014709472656, |
|
"logps/rejected": -227.2323760986328, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.12107870727777481, |
|
"rewards/margins": 0.7130403518676758, |
|
"rewards/rejected": -0.8341191411018372, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -2.4271631240844727, |
|
"logits/rejected": -2.3703784942626953, |
|
"logps/chosen": -279.36041259765625, |
|
"logps/rejected": -239.1962127685547, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.17160965502262115, |
|
"rewards/margins": 0.6050060987472534, |
|
"rewards/rejected": -0.7766157388687134, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": -2.3565828800201416, |
|
"logits/rejected": -2.311810255050659, |
|
"logps/chosen": -261.40228271484375, |
|
"logps/rejected": -224.05148315429688, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1451948881149292, |
|
"rewards/margins": 0.6642559170722961, |
|
"rewards/rejected": -0.8094508051872253, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": -2.4056944847106934, |
|
"logits/rejected": -2.3697822093963623, |
|
"logps/chosen": -281.52032470703125, |
|
"logps/rejected": -240.4828338623047, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.10201269388198853, |
|
"rewards/margins": 0.6899839639663696, |
|
"rewards/rejected": -0.7919965982437134, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -2.399779796600342, |
|
"logits/rejected": -2.3576903343200684, |
|
"logps/chosen": -281.99725341796875, |
|
"logps/rejected": -232.83047485351562, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.13619406521320343, |
|
"rewards/margins": 0.7102779150009155, |
|
"rewards/rejected": -0.8464719653129578, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": -2.3670992851257324, |
|
"logits/rejected": -2.330780506134033, |
|
"logps/chosen": -275.53179931640625, |
|
"logps/rejected": -241.46340942382812, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.15719105303287506, |
|
"rewards/margins": 0.6423493027687073, |
|
"rewards/rejected": -0.7995403409004211, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": -2.3753514289855957, |
|
"logits/rejected": -2.358102798461914, |
|
"logps/chosen": -273.9100036621094, |
|
"logps/rejected": -226.753662109375, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.18196144700050354, |
|
"rewards/margins": 0.6733365654945374, |
|
"rewards/rejected": -0.8552980422973633, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -2.405080795288086, |
|
"logits/rejected": -2.3308959007263184, |
|
"logps/chosen": -261.7924499511719, |
|
"logps/rejected": -232.6370849609375, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.19414867460727692, |
|
"rewards/margins": 0.6336308717727661, |
|
"rewards/rejected": -0.8277795910835266, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": -2.4004340171813965, |
|
"logits/rejected": -2.3892195224761963, |
|
"logps/chosen": -263.91217041015625, |
|
"logps/rejected": -233.1723175048828, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.13633672893047333, |
|
"rewards/margins": 0.6239665746688843, |
|
"rewards/rejected": -0.7603033185005188, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": -2.387913465499878, |
|
"logits/rejected": -2.3446784019470215, |
|
"logps/chosen": -276.1678161621094, |
|
"logps/rejected": -239.7351531982422, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.12410640716552734, |
|
"rewards/margins": 0.6739387512207031, |
|
"rewards/rejected": -0.7980451583862305, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -2.379734992980957, |
|
"logits/rejected": -2.370614528656006, |
|
"logps/chosen": -256.16510009765625, |
|
"logps/rejected": -217.11697387695312, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.1286478042602539, |
|
"rewards/margins": 0.7220448851585388, |
|
"rewards/rejected": -0.8506927490234375, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": -2.348128080368042, |
|
"logits/rejected": -2.3468122482299805, |
|
"logps/chosen": -268.2215576171875, |
|
"logps/rejected": -235.09970092773438, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.1686348021030426, |
|
"rewards/margins": 0.6620901823043823, |
|
"rewards/rejected": -0.8307248950004578, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": -2.385132312774658, |
|
"logits/rejected": -2.3292319774627686, |
|
"logps/chosen": -258.2908630371094, |
|
"logps/rejected": -227.2532196044922, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.1993228644132614, |
|
"rewards/margins": 0.6777428984642029, |
|
"rewards/rejected": -0.8770657777786255, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -2.3949036598205566, |
|
"logits/rejected": -2.36201810836792, |
|
"logps/chosen": -252.385986328125, |
|
"logps/rejected": -226.19009399414062, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.12631697952747345, |
|
"rewards/margins": 0.6516768336296082, |
|
"rewards/rejected": -0.7779937982559204, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": -2.352290153503418, |
|
"logits/rejected": -2.3201920986175537, |
|
"logps/chosen": -269.6744689941406, |
|
"logps/rejected": -239.7770233154297, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1264854222536087, |
|
"rewards/margins": 0.6343278884887695, |
|
"rewards/rejected": -0.7608132362365723, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": -2.369065523147583, |
|
"logits/rejected": -2.3292198181152344, |
|
"logps/chosen": -273.328857421875, |
|
"logps/rejected": -227.1344451904297, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.1670379340648651, |
|
"rewards/margins": 0.6671660542488098, |
|
"rewards/rejected": -0.8342038989067078, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -2.330441951751709, |
|
"logits/rejected": -2.277582883834839, |
|
"logps/chosen": -262.3780822753906, |
|
"logps/rejected": -222.9958038330078, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.14923453330993652, |
|
"rewards/margins": 0.680332601070404, |
|
"rewards/rejected": -0.8295671343803406, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": -2.358654260635376, |
|
"logits/rejected": -2.2956321239471436, |
|
"logps/chosen": -262.77276611328125, |
|
"logps/rejected": -223.75985717773438, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.1570906639099121, |
|
"rewards/margins": 0.6962771415710449, |
|
"rewards/rejected": -0.853367805480957, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": -2.368196964263916, |
|
"logits/rejected": -2.365723133087158, |
|
"logps/chosen": -264.42950439453125, |
|
"logps/rejected": -235.8118896484375, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13157618045806885, |
|
"rewards/margins": 0.6999197602272034, |
|
"rewards/rejected": -0.8314959406852722, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -2.3950960636138916, |
|
"logits/rejected": -2.3562042713165283, |
|
"logps/chosen": -273.6717834472656, |
|
"logps/rejected": -232.68161010742188, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.15049448609352112, |
|
"rewards/margins": 0.7236722707748413, |
|
"rewards/rejected": -0.8741667866706848, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": -2.3810062408447266, |
|
"logits/rejected": -2.3283467292785645, |
|
"logps/chosen": -269.14801025390625, |
|
"logps/rejected": -230.89248657226562, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.15543130040168762, |
|
"rewards/margins": 0.6771457195281982, |
|
"rewards/rejected": -0.8325770497322083, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": -2.3967809677124023, |
|
"logits/rejected": -2.319446563720703, |
|
"logps/chosen": -279.8531188964844, |
|
"logps/rejected": -237.2471160888672, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.12391219288110733, |
|
"rewards/margins": 0.6414698362350464, |
|
"rewards/rejected": -0.7653820514678955, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -2.3728363513946533, |
|
"logits/rejected": -2.3333539962768555, |
|
"logps/chosen": -273.06304931640625, |
|
"logps/rejected": -229.03561401367188, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.12746073305606842, |
|
"rewards/margins": 0.6716173887252808, |
|
"rewards/rejected": -0.799078106880188, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": -2.3346054553985596, |
|
"logits/rejected": -2.3393990993499756, |
|
"logps/chosen": -284.27886962890625, |
|
"logps/rejected": -243.03475952148438, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.14506694674491882, |
|
"rewards/margins": 0.604491114616394, |
|
"rewards/rejected": -0.7495580911636353, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": -2.3285999298095703, |
|
"logits/rejected": -2.319593906402588, |
|
"logps/chosen": -279.5855712890625, |
|
"logps/rejected": -228.2317657470703, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13806693255901337, |
|
"rewards/margins": 0.6977806091308594, |
|
"rewards/rejected": -0.8358476758003235, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -2.325159788131714, |
|
"logits/rejected": -2.3459110260009766, |
|
"logps/chosen": -272.990478515625, |
|
"logps/rejected": -230.53524780273438, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.13658006489276886, |
|
"rewards/margins": 0.7168129086494446, |
|
"rewards/rejected": -0.8533929586410522, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": -2.4010326862335205, |
|
"logits/rejected": -2.3410234451293945, |
|
"logps/chosen": -276.6645202636719, |
|
"logps/rejected": -219.21414184570312, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7640625238418579, |
|
"rewards/chosen": -0.0858209878206253, |
|
"rewards/margins": 0.6973799467086792, |
|
"rewards/rejected": -0.7832010388374329, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": -2.3417165279388428, |
|
"logits/rejected": -2.341611623764038, |
|
"logps/chosen": -271.9029235839844, |
|
"logps/rejected": -232.0885009765625, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.13584092259407043, |
|
"rewards/margins": 0.702505886554718, |
|
"rewards/rejected": -0.8383467793464661, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -2.416315793991089, |
|
"logits/rejected": -2.3927767276763916, |
|
"logps/chosen": -264.4131774902344, |
|
"logps/rejected": -227.99179077148438, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.12012849003076553, |
|
"rewards/margins": 0.6565206050872803, |
|
"rewards/rejected": -0.7766491174697876, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": -2.3913142681121826, |
|
"logits/rejected": -2.3399569988250732, |
|
"logps/chosen": -271.81011962890625, |
|
"logps/rejected": -241.50759887695312, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.1379830241203308, |
|
"rewards/margins": 0.6094950437545776, |
|
"rewards/rejected": -0.7474781274795532, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": -2.376094102859497, |
|
"logits/rejected": -2.3213579654693604, |
|
"logps/chosen": -245.985107421875, |
|
"logps/rejected": -217.1469268798828, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.16205313801765442, |
|
"rewards/margins": 0.6546685099601746, |
|
"rewards/rejected": -0.8167217373847961, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -2.4133598804473877, |
|
"logits/rejected": -2.322434663772583, |
|
"logps/chosen": -261.0830993652344, |
|
"logps/rejected": -222.3695068359375, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.7515624761581421, |
|
"rewards/chosen": -0.12403901666402817, |
|
"rewards/margins": 0.6635336875915527, |
|
"rewards/rejected": -0.7875727415084839, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": -2.3970634937286377, |
|
"logits/rejected": -2.334484577178955, |
|
"logps/chosen": -289.25213623046875, |
|
"logps/rejected": -241.99313354492188, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.11199700832366943, |
|
"rewards/margins": 0.7853450179100037, |
|
"rewards/rejected": -0.8973420858383179, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": -2.3959927558898926, |
|
"logits/rejected": -2.340639591217041, |
|
"logps/chosen": -261.5354919433594, |
|
"logps/rejected": -230.05795288085938, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.1715332418680191, |
|
"rewards/margins": 0.6642467379570007, |
|
"rewards/rejected": -0.8357800245285034, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -2.383922576904297, |
|
"logits/rejected": -2.3244893550872803, |
|
"logps/chosen": -261.2723693847656, |
|
"logps/rejected": -237.76644897460938, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.1503421664237976, |
|
"rewards/margins": 0.6998149156570435, |
|
"rewards/rejected": -0.8501569628715515, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": -2.4093589782714844, |
|
"logits/rejected": -2.353217601776123, |
|
"logps/chosen": -276.1336975097656, |
|
"logps/rejected": -229.25216674804688, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.17228913307189941, |
|
"rewards/margins": 0.713442325592041, |
|
"rewards/rejected": -0.8857313394546509, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": -2.3640589714050293, |
|
"logits/rejected": -2.3493194580078125, |
|
"logps/chosen": -263.424072265625, |
|
"logps/rejected": -229.8025360107422, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.17658892273902893, |
|
"rewards/margins": 0.6467570066452026, |
|
"rewards/rejected": -0.823345959186554, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -2.3777713775634766, |
|
"logits/rejected": -2.316854238510132, |
|
"logps/chosen": -261.4559631347656, |
|
"logps/rejected": -227.89718627929688, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.1278056502342224, |
|
"rewards/margins": 0.7158471345901489, |
|
"rewards/rejected": -0.8436528444290161, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": -2.392733335494995, |
|
"logits/rejected": -2.373281240463257, |
|
"logps/chosen": -281.6858825683594, |
|
"logps/rejected": -244.1327362060547, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.11135254800319672, |
|
"rewards/margins": 0.7201862931251526, |
|
"rewards/rejected": -0.8315388560295105, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": -2.343679189682007, |
|
"logits/rejected": -2.3387351036071777, |
|
"logps/chosen": -268.7857666015625, |
|
"logps/rejected": -238.4132843017578, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.1281011551618576, |
|
"rewards/margins": 0.7418644428253174, |
|
"rewards/rejected": -0.869965672492981, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -2.372236967086792, |
|
"logits/rejected": -2.3373138904571533, |
|
"logps/chosen": -265.18865966796875, |
|
"logps/rejected": -227.602783203125, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.13484473526477814, |
|
"rewards/margins": 0.7227139472961426, |
|
"rewards/rejected": -0.8575586080551147, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": -2.392242670059204, |
|
"logits/rejected": -2.3611741065979004, |
|
"logps/chosen": -268.98431396484375, |
|
"logps/rejected": -239.24978637695312, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.17363707721233368, |
|
"rewards/margins": 0.6345449686050415, |
|
"rewards/rejected": -0.8081819415092468, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.031247615814209, |
|
"eval_logits/rejected": -1.9774165153503418, |
|
"eval_logps/chosen": -266.0887145996094, |
|
"eval_logps/rejected": -232.28887939453125, |
|
"eval_loss": 0.5312530398368835, |
|
"eval_rewards/accuracies": 0.7315000295639038, |
|
"eval_rewards/chosen": -0.15304329991340637, |
|
"eval_rewards/margins": 0.709061324596405, |
|
"eval_rewards/rejected": -0.862104594707489, |
|
"eval_runtime": 1653.0531, |
|
"eval_samples_per_second": 1.21, |
|
"eval_steps_per_second": 0.302, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": -2.4022011756896973, |
|
"logits/rejected": -2.324002504348755, |
|
"logps/chosen": -260.53228759765625, |
|
"logps/rejected": -229.572998046875, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.20086026191711426, |
|
"rewards/margins": 0.6226423978805542, |
|
"rewards/rejected": -0.8235027194023132, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -2.3827035427093506, |
|
"logits/rejected": -2.3521687984466553, |
|
"logps/chosen": -272.88433837890625, |
|
"logps/rejected": -228.8638458251953, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.1703009009361267, |
|
"rewards/margins": 0.6672419309616089, |
|
"rewards/rejected": -0.8375428318977356, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": -2.3884754180908203, |
|
"logits/rejected": -2.3221449851989746, |
|
"logps/chosen": -258.5544128417969, |
|
"logps/rejected": -224.48971557617188, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.21564142405986786, |
|
"rewards/margins": 0.6631011366844177, |
|
"rewards/rejected": -0.8787425756454468, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": -2.3919119834899902, |
|
"logits/rejected": -2.3483376502990723, |
|
"logps/chosen": -276.0721435546875, |
|
"logps/rejected": -239.58154296875, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.13372206687927246, |
|
"rewards/margins": 0.7485690712928772, |
|
"rewards/rejected": -0.8822910189628601, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -2.360553741455078, |
|
"logits/rejected": -2.3577828407287598, |
|
"logps/chosen": -257.6646728515625, |
|
"logps/rejected": -230.3592529296875, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.16400420665740967, |
|
"rewards/margins": 0.7011340856552124, |
|
"rewards/rejected": -0.8651384115219116, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": -2.3634886741638184, |
|
"logits/rejected": -2.322361469268799, |
|
"logps/chosen": -282.5517883300781, |
|
"logps/rejected": -233.2860870361328, |
|
"loss": 0.5361, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.1445886641740799, |
|
"rewards/margins": 0.752617597579956, |
|
"rewards/rejected": -0.89720618724823, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": -2.3623392581939697, |
|
"logits/rejected": -2.319822311401367, |
|
"logps/chosen": -253.936279296875, |
|
"logps/rejected": -234.66848754882812, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.09708345681428909, |
|
"rewards/margins": 0.7794226408004761, |
|
"rewards/rejected": -0.8765062093734741, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -2.3689372539520264, |
|
"logits/rejected": -2.3116328716278076, |
|
"logps/chosen": -268.030029296875, |
|
"logps/rejected": -231.4261474609375, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.18213681876659393, |
|
"rewards/margins": 0.5894214510917664, |
|
"rewards/rejected": -0.7715582251548767, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": -2.3579487800598145, |
|
"logits/rejected": -2.3073954582214355, |
|
"logps/chosen": -260.38775634765625, |
|
"logps/rejected": -225.4844207763672, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14314624667167664, |
|
"rewards/margins": 0.7362397313117981, |
|
"rewards/rejected": -0.8793859481811523, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": -2.3365800380706787, |
|
"logits/rejected": -2.2837400436401367, |
|
"logps/chosen": -266.9193420410156, |
|
"logps/rejected": -238.1469268798828, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.12520021200180054, |
|
"rewards/margins": 0.7114790678024292, |
|
"rewards/rejected": -0.8366793394088745, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -2.400599479675293, |
|
"logits/rejected": -2.35431170463562, |
|
"logps/chosen": -262.89825439453125, |
|
"logps/rejected": -228.84310913085938, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.15787425637245178, |
|
"rewards/margins": 0.6567065119743347, |
|
"rewards/rejected": -0.8145807385444641, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": -2.400794267654419, |
|
"logits/rejected": -2.3113760948181152, |
|
"logps/chosen": -246.63400268554688, |
|
"logps/rejected": -223.24923706054688, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -0.14660078287124634, |
|
"rewards/margins": 0.7161286473274231, |
|
"rewards/rejected": -0.8627294301986694, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": -2.344493865966797, |
|
"logits/rejected": -2.3352556228637695, |
|
"logps/chosen": -278.28839111328125, |
|
"logps/rejected": -243.4131622314453, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.13938100636005402, |
|
"rewards/margins": 0.6712489128112793, |
|
"rewards/rejected": -0.8106300234794617, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -2.3649954795837402, |
|
"logits/rejected": -2.3345046043395996, |
|
"logps/chosen": -286.7098083496094, |
|
"logps/rejected": -232.02517700195312, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.10944189131259918, |
|
"rewards/margins": 0.7143247723579407, |
|
"rewards/rejected": -0.8237665891647339, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": -2.401505947113037, |
|
"logits/rejected": -2.3687071800231934, |
|
"logps/chosen": -252.4344940185547, |
|
"logps/rejected": -230.4383087158203, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.16689328849315643, |
|
"rewards/margins": 0.670124351978302, |
|
"rewards/rejected": -0.8370175361633301, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": -2.3800606727600098, |
|
"logits/rejected": -2.3494057655334473, |
|
"logps/chosen": -277.4918212890625, |
|
"logps/rejected": -236.21533203125, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.11864397674798965, |
|
"rewards/margins": 0.7518168687820435, |
|
"rewards/rejected": -0.8704608082771301, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -2.361454963684082, |
|
"logits/rejected": -2.327955722808838, |
|
"logps/chosen": -283.4490051269531, |
|
"logps/rejected": -234.3018035888672, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.1568828970193863, |
|
"rewards/margins": 0.7170850038528442, |
|
"rewards/rejected": -0.8739679455757141, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": -2.3744759559631348, |
|
"logits/rejected": -2.358099937438965, |
|
"logps/chosen": -277.69195556640625, |
|
"logps/rejected": -241.0815887451172, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.12414580583572388, |
|
"rewards/margins": 0.7003077864646912, |
|
"rewards/rejected": -0.8244536519050598, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": -2.4057154655456543, |
|
"logits/rejected": -2.3354310989379883, |
|
"logps/chosen": -260.5264892578125, |
|
"logps/rejected": -222.79110717773438, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.15866820514202118, |
|
"rewards/margins": 0.7241252660751343, |
|
"rewards/rejected": -0.8827934265136719, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -2.3917877674102783, |
|
"logits/rejected": -2.355861186981201, |
|
"logps/chosen": -283.84320068359375, |
|
"logps/rejected": -230.14404296875, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.7515624761581421, |
|
"rewards/chosen": -0.13140609860420227, |
|
"rewards/margins": 0.7532661557197571, |
|
"rewards/rejected": -0.8846722841262817, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": -2.387089967727661, |
|
"logits/rejected": -2.314539670944214, |
|
"logps/chosen": -268.9212341308594, |
|
"logps/rejected": -232.61898803710938, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.10855790227651596, |
|
"rewards/margins": 0.8006644248962402, |
|
"rewards/rejected": -0.9092223048210144, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": -2.3513126373291016, |
|
"logits/rejected": -2.3646275997161865, |
|
"logps/chosen": -262.60345458984375, |
|
"logps/rejected": -245.1242218017578, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1720902919769287, |
|
"rewards/margins": 0.6639872193336487, |
|
"rewards/rejected": -0.8360773921012878, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -2.3261661529541016, |
|
"logits/rejected": -2.308116912841797, |
|
"logps/chosen": -271.75543212890625, |
|
"logps/rejected": -232.4984588623047, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.15690350532531738, |
|
"rewards/margins": 0.6513178944587708, |
|
"rewards/rejected": -0.8082213401794434, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": -2.3861842155456543, |
|
"logits/rejected": -2.354062080383301, |
|
"logps/chosen": -280.88116455078125, |
|
"logps/rejected": -232.3457794189453, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.15454408526420593, |
|
"rewards/margins": 0.6919762492179871, |
|
"rewards/rejected": -0.8465203046798706, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": -2.3726694583892822, |
|
"logits/rejected": -2.3578009605407715, |
|
"logps/chosen": -275.0099182128906, |
|
"logps/rejected": -245.10873413085938, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.08260266482830048, |
|
"rewards/margins": 0.7924613952636719, |
|
"rewards/rejected": -0.8750640749931335, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -2.339141368865967, |
|
"logits/rejected": -2.3254787921905518, |
|
"logps/chosen": -258.27996826171875, |
|
"logps/rejected": -228.74398803710938, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.15882189571857452, |
|
"rewards/margins": 0.6925019025802612, |
|
"rewards/rejected": -0.8513237833976746, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": -2.406780958175659, |
|
"logits/rejected": -2.3241848945617676, |
|
"logps/chosen": -263.73931884765625, |
|
"logps/rejected": -224.1796417236328, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.1274387389421463, |
|
"rewards/margins": 0.7019168138504028, |
|
"rewards/rejected": -0.8293555378913879, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": -2.3664703369140625, |
|
"logits/rejected": -2.318784713745117, |
|
"logps/chosen": -261.4429626464844, |
|
"logps/rejected": -229.2322540283203, |
|
"loss": 0.5361, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.13853470981121063, |
|
"rewards/margins": 0.7004620432853699, |
|
"rewards/rejected": -0.8389967083930969, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -2.3630192279815674, |
|
"logits/rejected": -2.3432974815368652, |
|
"logps/chosen": -273.27850341796875, |
|
"logps/rejected": -241.81771850585938, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.10639991611242294, |
|
"rewards/margins": 0.7741705179214478, |
|
"rewards/rejected": -0.8805704116821289, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": -2.3936803340911865, |
|
"logits/rejected": -2.3308169841766357, |
|
"logps/chosen": -269.2107849121094, |
|
"logps/rejected": -238.88394165039062, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.1324814110994339, |
|
"rewards/margins": 0.8152027130126953, |
|
"rewards/rejected": -0.947684109210968, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": -2.3774123191833496, |
|
"logits/rejected": -2.312371253967285, |
|
"logps/chosen": -247.8770751953125, |
|
"logps/rejected": -221.15536499023438, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.13058343529701233, |
|
"rewards/margins": 0.7142859697341919, |
|
"rewards/rejected": -0.8448693156242371, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -2.399721622467041, |
|
"logits/rejected": -2.319878101348877, |
|
"logps/chosen": -299.5263671875, |
|
"logps/rejected": -242.10964965820312, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.1261121928691864, |
|
"rewards/margins": 0.7005943059921265, |
|
"rewards/rejected": -0.8267065286636353, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": -2.3750648498535156, |
|
"logits/rejected": -2.33599591255188, |
|
"logps/chosen": -281.7725830078125, |
|
"logps/rejected": -241.2296600341797, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.12657758593559265, |
|
"rewards/margins": 0.7330743074417114, |
|
"rewards/rejected": -0.8596519231796265, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": -2.3655242919921875, |
|
"logits/rejected": -2.309812068939209, |
|
"logps/chosen": -262.09954833984375, |
|
"logps/rejected": -234.92758178710938, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.13195794820785522, |
|
"rewards/margins": 0.6896259784698486, |
|
"rewards/rejected": -0.8215838670730591, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -2.4004878997802734, |
|
"logits/rejected": -2.3559048175811768, |
|
"logps/chosen": -270.6529846191406, |
|
"logps/rejected": -242.43368530273438, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.754687488079071, |
|
"rewards/chosen": -0.17092928290367126, |
|
"rewards/margins": 0.7322403192520142, |
|
"rewards/rejected": -0.9031696319580078, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": -2.2949576377868652, |
|
"logits/rejected": -2.279416561126709, |
|
"logps/chosen": -253.159423828125, |
|
"logps/rejected": -220.51596069335938, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.14947687089443207, |
|
"rewards/margins": 0.7013243436813354, |
|
"rewards/rejected": -0.8508013486862183, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": -2.376298427581787, |
|
"logits/rejected": -2.301685333251953, |
|
"logps/chosen": -269.91168212890625, |
|
"logps/rejected": -221.9505615234375, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.19331762194633484, |
|
"rewards/margins": 0.7175094485282898, |
|
"rewards/rejected": -0.9108270406723022, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -2.3533215522766113, |
|
"logits/rejected": -2.3328850269317627, |
|
"logps/chosen": -258.6532897949219, |
|
"logps/rejected": -218.423095703125, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.14851602911949158, |
|
"rewards/margins": 0.6814883947372437, |
|
"rewards/rejected": -0.8300043940544128, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": -2.3606135845184326, |
|
"logits/rejected": -2.3032069206237793, |
|
"logps/chosen": -252.78604125976562, |
|
"logps/rejected": -211.8778076171875, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.16221049427986145, |
|
"rewards/margins": 0.706489086151123, |
|
"rewards/rejected": -0.8686995506286621, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": -2.406503677368164, |
|
"logits/rejected": -2.3547933101654053, |
|
"logps/chosen": -278.7993469238281, |
|
"logps/rejected": -237.6390380859375, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.745312511920929, |
|
"rewards/chosen": -0.1446961909532547, |
|
"rewards/margins": 0.725204348564148, |
|
"rewards/rejected": -0.8699005842208862, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -2.3510079383850098, |
|
"logits/rejected": -2.296820640563965, |
|
"logps/chosen": -261.72235107421875, |
|
"logps/rejected": -243.376708984375, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.17369435727596283, |
|
"rewards/margins": 0.6820122599601746, |
|
"rewards/rejected": -0.8557065725326538, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": -2.408764362335205, |
|
"logits/rejected": -2.336327314376831, |
|
"logps/chosen": -258.2755126953125, |
|
"logps/rejected": -228.48385620117188, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.760937511920929, |
|
"rewards/chosen": -0.15298260748386383, |
|
"rewards/margins": 0.7763695120811462, |
|
"rewards/rejected": -0.9293521642684937, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": -2.367042064666748, |
|
"logits/rejected": -2.317692279815674, |
|
"logps/chosen": -266.90362548828125, |
|
"logps/rejected": -228.7984619140625, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17633156478405, |
|
"rewards/margins": 0.7394440770149231, |
|
"rewards/rejected": -0.9157756567001343, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -2.372544050216675, |
|
"logits/rejected": -2.297020673751831, |
|
"logps/chosen": -267.892822265625, |
|
"logps/rejected": -229.27035522460938, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.1406041383743286, |
|
"rewards/margins": 0.7415550351142883, |
|
"rewards/rejected": -0.8821592330932617, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": -2.380431890487671, |
|
"logits/rejected": -2.3486175537109375, |
|
"logps/chosen": -260.1377868652344, |
|
"logps/rejected": -228.09994506835938, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16361048817634583, |
|
"rewards/margins": 0.6924134492874146, |
|
"rewards/rejected": -0.8560239672660828, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": -2.364969491958618, |
|
"logits/rejected": -2.3163743019104004, |
|
"logps/chosen": -261.2802429199219, |
|
"logps/rejected": -235.7176055908203, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.11532945930957794, |
|
"rewards/margins": 0.7920664548873901, |
|
"rewards/rejected": -0.9073959589004517, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -2.3899219036102295, |
|
"logits/rejected": -2.3310904502868652, |
|
"logps/chosen": -270.76837158203125, |
|
"logps/rejected": -242.98434448242188, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.13495083153247833, |
|
"rewards/margins": 0.7031392455101013, |
|
"rewards/rejected": -0.8380901217460632, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": -2.3676700592041016, |
|
"logits/rejected": -2.2973744869232178, |
|
"logps/chosen": -256.33416748046875, |
|
"logps/rejected": -215.53921508789062, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.17805704474449158, |
|
"rewards/margins": 0.6919922232627869, |
|
"rewards/rejected": -0.8700492978096008, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": -2.4081311225891113, |
|
"logits/rejected": -2.315382480621338, |
|
"logps/chosen": -267.7056579589844, |
|
"logps/rejected": -226.94400024414062, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.17336109280586243, |
|
"rewards/margins": 0.7087409496307373, |
|
"rewards/rejected": -0.8821020126342773, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -2.3482162952423096, |
|
"logits/rejected": -2.3113105297088623, |
|
"logps/chosen": -263.73065185546875, |
|
"logps/rejected": -226.03207397460938, |
|
"loss": 0.541, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.2133699208498001, |
|
"rewards/margins": 0.7167404890060425, |
|
"rewards/rejected": -0.9301104545593262, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": -2.3624765872955322, |
|
"logits/rejected": -2.3461878299713135, |
|
"logps/chosen": -275.10125732421875, |
|
"logps/rejected": -241.44970703125, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -0.16694757342338562, |
|
"rewards/margins": 0.6941839456558228, |
|
"rewards/rejected": -0.861131489276886, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": -2.3443427085876465, |
|
"logits/rejected": -2.325216054916382, |
|
"logps/chosen": -267.3571472167969, |
|
"logps/rejected": -219.15695190429688, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14058147370815277, |
|
"rewards/margins": 0.7361636161804199, |
|
"rewards/rejected": -0.8767450451850891, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -2.363398313522339, |
|
"logits/rejected": -2.2995753288269043, |
|
"logps/chosen": -262.1716613769531, |
|
"logps/rejected": -236.1666259765625, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.12495926767587662, |
|
"rewards/margins": 0.7235976457595825, |
|
"rewards/rejected": -0.8485569953918457, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": -2.387195110321045, |
|
"logits/rejected": -2.3478474617004395, |
|
"logps/chosen": -283.22705078125, |
|
"logps/rejected": -233.081298828125, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11790215969085693, |
|
"rewards/margins": 0.787219762802124, |
|
"rewards/rejected": -0.9051219820976257, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": -2.3662703037261963, |
|
"logits/rejected": -2.3089377880096436, |
|
"logps/chosen": -270.521728515625, |
|
"logps/rejected": -229.98519897460938, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.14989617466926575, |
|
"rewards/margins": 0.7254993915557861, |
|
"rewards/rejected": -0.875395655632019, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -2.360635280609131, |
|
"logits/rejected": -2.328989267349243, |
|
"logps/chosen": -272.1587829589844, |
|
"logps/rejected": -221.1785888671875, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.7640625238418579, |
|
"rewards/chosen": -0.07772710919380188, |
|
"rewards/margins": 0.8303700685501099, |
|
"rewards/rejected": -0.9080971479415894, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": -2.3522746562957764, |
|
"logits/rejected": -2.3457765579223633, |
|
"logps/chosen": -270.2672424316406, |
|
"logps/rejected": -231.2746124267578, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11053086817264557, |
|
"rewards/margins": 0.8113416433334351, |
|
"rewards/rejected": -0.921872615814209, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": -2.3699328899383545, |
|
"logits/rejected": -2.322545289993286, |
|
"logps/chosen": -271.3714599609375, |
|
"logps/rejected": -247.797119140625, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.14844852685928345, |
|
"rewards/margins": 0.7081364989280701, |
|
"rewards/rejected": -0.8565850257873535, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -2.4080615043640137, |
|
"logits/rejected": -2.354224681854248, |
|
"logps/chosen": -271.5215148925781, |
|
"logps/rejected": -234.2542724609375, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.7515624761581421, |
|
"rewards/chosen": -0.19082528352737427, |
|
"rewards/margins": 0.7402253746986389, |
|
"rewards/rejected": -0.9310504794120789, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": -2.4200048446655273, |
|
"logits/rejected": -2.3449947834014893, |
|
"logps/chosen": -284.7613830566406, |
|
"logps/rejected": -238.87850952148438, |
|
"loss": 0.5191, |
|
"rewards/accuracies": 0.754687488079071, |
|
"rewards/chosen": -0.12657888233661652, |
|
"rewards/margins": 0.8030962944030762, |
|
"rewards/rejected": -0.9296752214431763, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": -2.356966733932495, |
|
"logits/rejected": -2.29770827293396, |
|
"logps/chosen": -261.1152038574219, |
|
"logps/rejected": -240.00387573242188, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.714062511920929, |
|
"rewards/chosen": -0.16734077036380768, |
|
"rewards/margins": 0.7260617613792419, |
|
"rewards/rejected": -0.8934024572372437, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -2.3702635765075684, |
|
"logits/rejected": -2.317229747772217, |
|
"logps/chosen": -272.82440185546875, |
|
"logps/rejected": -221.4154815673828, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.7640625238418579, |
|
"rewards/chosen": -0.1133556216955185, |
|
"rewards/margins": 0.7367923259735107, |
|
"rewards/rejected": -0.8501479029655457, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": -2.4438915252685547, |
|
"logits/rejected": -2.385143756866455, |
|
"logps/chosen": -275.418212890625, |
|
"logps/rejected": -239.0946502685547, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.11291754245758057, |
|
"rewards/margins": 0.7378134727478027, |
|
"rewards/rejected": -0.8507310748100281, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": -2.3825366497039795, |
|
"logits/rejected": -2.3399150371551514, |
|
"logps/chosen": -251.03720092773438, |
|
"logps/rejected": -234.3162078857422, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.16436532139778137, |
|
"rewards/margins": 0.7372573614120483, |
|
"rewards/rejected": -0.9016226530075073, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -2.37642502784729, |
|
"logits/rejected": -2.3453516960144043, |
|
"logps/chosen": -269.23577880859375, |
|
"logps/rejected": -229.6055908203125, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1360473334789276, |
|
"rewards/margins": 0.7748234272003174, |
|
"rewards/rejected": -0.9108708500862122, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": -2.3883450031280518, |
|
"logits/rejected": -2.3713505268096924, |
|
"logps/chosen": -263.39288330078125, |
|
"logps/rejected": -231.93930053710938, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.07088092714548111, |
|
"rewards/margins": 0.7212754487991333, |
|
"rewards/rejected": -0.792156457901001, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": -2.3673863410949707, |
|
"logits/rejected": -2.307626724243164, |
|
"logps/chosen": -281.2471618652344, |
|
"logps/rejected": -218.8713836669922, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10778670012950897, |
|
"rewards/margins": 0.7753579020500183, |
|
"rewards/rejected": -0.8831446766853333, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -2.3701910972595215, |
|
"logits/rejected": -2.3337950706481934, |
|
"logps/chosen": -273.3838806152344, |
|
"logps/rejected": -228.57546997070312, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.14705790579319, |
|
"rewards/margins": 0.7927185893058777, |
|
"rewards/rejected": -0.939776599407196, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": -2.3455982208251953, |
|
"logits/rejected": -2.3253159523010254, |
|
"logps/chosen": -262.5321960449219, |
|
"logps/rejected": -226.5032501220703, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.18480037152767181, |
|
"rewards/margins": 0.7010098695755005, |
|
"rewards/rejected": -0.8858101963996887, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": -2.38588285446167, |
|
"logits/rejected": -2.3365259170532227, |
|
"logps/chosen": -264.8544616699219, |
|
"logps/rejected": -217.23599243164062, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -0.13912011682987213, |
|
"rewards/margins": 0.7289184331893921, |
|
"rewards/rejected": -0.8680384755134583, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -2.3164820671081543, |
|
"logits/rejected": -2.3066811561584473, |
|
"logps/chosen": -250.55001831054688, |
|
"logps/rejected": -226.17489624023438, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.15784478187561035, |
|
"rewards/margins": 0.6939374804496765, |
|
"rewards/rejected": -0.8517822027206421, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": -2.4132914543151855, |
|
"logits/rejected": -2.347529411315918, |
|
"logps/chosen": -267.1165771484375, |
|
"logps/rejected": -219.255615234375, |
|
"loss": 0.5299, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1091521754860878, |
|
"rewards/margins": 0.7415350079536438, |
|
"rewards/rejected": -0.8506871461868286, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": -2.3676562309265137, |
|
"logits/rejected": -2.3321216106414795, |
|
"logps/chosen": -271.9303894042969, |
|
"logps/rejected": -224.6049346923828, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -0.09524812549352646, |
|
"rewards/margins": 0.8164734840393066, |
|
"rewards/rejected": -0.9117215871810913, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -2.3520119190216064, |
|
"logits/rejected": -2.338006019592285, |
|
"logps/chosen": -260.5480041503906, |
|
"logps/rejected": -230.41860961914062, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12855994701385498, |
|
"rewards/margins": 0.8062857389450073, |
|
"rewards/rejected": -0.9348458051681519, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": -2.359654664993286, |
|
"logits/rejected": -2.2997984886169434, |
|
"logps/chosen": -267.7292175292969, |
|
"logps/rejected": -227.33132934570312, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14395280182361603, |
|
"rewards/margins": 0.735295832157135, |
|
"rewards/rejected": -0.8792486190795898, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": -2.4200806617736816, |
|
"logits/rejected": -2.360294818878174, |
|
"logps/chosen": -271.26324462890625, |
|
"logps/rejected": -227.1554412841797, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.16018937528133392, |
|
"rewards/margins": 0.7067100405693054, |
|
"rewards/rejected": -0.866899311542511, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -2.3925411701202393, |
|
"logits/rejected": -2.331263780593872, |
|
"logps/chosen": -271.6239318847656, |
|
"logps/rejected": -231.4758758544922, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.754687488079071, |
|
"rewards/chosen": -0.17314036190509796, |
|
"rewards/margins": 0.7330547571182251, |
|
"rewards/rejected": -0.9061950445175171, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": -2.4014270305633545, |
|
"logits/rejected": -2.355578899383545, |
|
"logps/chosen": -279.74566650390625, |
|
"logps/rejected": -228.99746704101562, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": -0.2012714445590973, |
|
"rewards/margins": 0.6636210680007935, |
|
"rewards/rejected": -0.8648926019668579, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": -2.419564723968506, |
|
"logits/rejected": -2.3469746112823486, |
|
"logps/chosen": -271.7889099121094, |
|
"logps/rejected": -229.7169952392578, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1927814781665802, |
|
"rewards/margins": 0.7116800546646118, |
|
"rewards/rejected": -0.9044615626335144, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -2.3487582206726074, |
|
"logits/rejected": -2.311703681945801, |
|
"logps/chosen": -273.3060302734375, |
|
"logps/rejected": -253.8658905029297, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.18712952733039856, |
|
"rewards/margins": 0.712023138999939, |
|
"rewards/rejected": -0.8991526365280151, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": -2.3431172370910645, |
|
"logits/rejected": -2.3230600357055664, |
|
"logps/chosen": -256.7168273925781, |
|
"logps/rejected": -227.06924438476562, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.14321701228618622, |
|
"rewards/margins": 0.7300316095352173, |
|
"rewards/rejected": -0.8732486963272095, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": -2.3237102031707764, |
|
"logits/rejected": -2.318800449371338, |
|
"logps/chosen": -260.8058776855469, |
|
"logps/rejected": -226.2300262451172, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.18601644039154053, |
|
"rewards/margins": 0.6762635111808777, |
|
"rewards/rejected": -0.862280011177063, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -2.3754734992980957, |
|
"logits/rejected": -2.337226629257202, |
|
"logps/chosen": -268.50054931640625, |
|
"logps/rejected": -225.01815795898438, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.15901225805282593, |
|
"rewards/margins": 0.7717152237892151, |
|
"rewards/rejected": -0.930727481842041, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": -2.372312068939209, |
|
"logits/rejected": -2.3167147636413574, |
|
"logps/chosen": -265.208740234375, |
|
"logps/rejected": -244.6002197265625, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.14687782526016235, |
|
"rewards/margins": 0.6419769525527954, |
|
"rewards/rejected": -0.788854718208313, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": -2.3352761268615723, |
|
"logits/rejected": -2.3101370334625244, |
|
"logps/chosen": -266.26129150390625, |
|
"logps/rejected": -228.8058624267578, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1582275927066803, |
|
"rewards/margins": 0.7328441739082336, |
|
"rewards/rejected": -0.8910716772079468, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -2.34548020362854, |
|
"logits/rejected": -2.3345954418182373, |
|
"logps/chosen": -268.830078125, |
|
"logps/rejected": -234.70590209960938, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.19458332657814026, |
|
"rewards/margins": 0.6641789674758911, |
|
"rewards/rejected": -0.8587621450424194, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": -2.345353126525879, |
|
"logits/rejected": -2.309774875640869, |
|
"logps/chosen": -264.1852111816406, |
|
"logps/rejected": -232.37881469726562, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.13199767470359802, |
|
"rewards/margins": 0.7402657270431519, |
|
"rewards/rejected": -0.8722633123397827, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": -2.3702383041381836, |
|
"logits/rejected": -2.3145265579223633, |
|
"logps/chosen": -271.8026428222656, |
|
"logps/rejected": -234.308349609375, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.7203124761581421, |
|
"rewards/chosen": -0.18479926884174347, |
|
"rewards/margins": 0.7022183537483215, |
|
"rewards/rejected": -0.8870177268981934, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -2.3550527095794678, |
|
"logits/rejected": -2.3468880653381348, |
|
"logps/chosen": -282.18310546875, |
|
"logps/rejected": -234.3654327392578, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.06430914252996445, |
|
"rewards/margins": 0.790905237197876, |
|
"rewards/rejected": -0.8552142977714539, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": -2.3620452880859375, |
|
"logits/rejected": -2.345167398452759, |
|
"logps/chosen": -280.26470947265625, |
|
"logps/rejected": -235.08993530273438, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.7171875238418579, |
|
"rewards/chosen": -0.1752968728542328, |
|
"rewards/margins": 0.6895657777786255, |
|
"rewards/rejected": -0.8648626208305359, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": -2.3489487171173096, |
|
"logits/rejected": -2.3238141536712646, |
|
"logps/chosen": -278.1292724609375, |
|
"logps/rejected": -240.13705444335938, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.15504539012908936, |
|
"rewards/margins": 0.6761684417724609, |
|
"rewards/rejected": -0.8312137722969055, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -2.389085292816162, |
|
"logits/rejected": -2.364729166030884, |
|
"logps/chosen": -267.89630126953125, |
|
"logps/rejected": -229.4080352783203, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.1344664990901947, |
|
"rewards/margins": 0.7971667051315308, |
|
"rewards/rejected": -0.9316331744194031, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": -2.343165874481201, |
|
"logits/rejected": -2.3094429969787598, |
|
"logps/chosen": -260.5189514160156, |
|
"logps/rejected": -233.33432006835938, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.17281007766723633, |
|
"rewards/margins": 0.7437566518783569, |
|
"rewards/rejected": -0.9165668487548828, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": -2.3336236476898193, |
|
"logits/rejected": -2.321570873260498, |
|
"logps/chosen": -278.14080810546875, |
|
"logps/rejected": -233.65365600585938, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.09798727929592133, |
|
"rewards/margins": 0.7966296076774597, |
|
"rewards/rejected": -0.8946169018745422, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -2.3987810611724854, |
|
"logits/rejected": -2.3644556999206543, |
|
"logps/chosen": -280.18609619140625, |
|
"logps/rejected": -233.4442138671875, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.739062488079071, |
|
"rewards/chosen": -0.1451808661222458, |
|
"rewards/margins": 0.7559449076652527, |
|
"rewards/rejected": -0.9011257886886597, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": -2.3656845092773438, |
|
"logits/rejected": -2.3112010955810547, |
|
"logps/chosen": -255.6254425048828, |
|
"logps/rejected": -207.92041015625, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.161960169672966, |
|
"rewards/margins": 0.7441297769546509, |
|
"rewards/rejected": -0.9060899615287781, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": -2.371070384979248, |
|
"logits/rejected": -2.3036046028137207, |
|
"logps/chosen": -267.93048095703125, |
|
"logps/rejected": -234.49667358398438, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.7328125238418579, |
|
"rewards/chosen": -0.12272216379642487, |
|
"rewards/margins": 0.7688759565353394, |
|
"rewards/rejected": -0.891598105430603, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.020789384841919, |
|
"eval_logits/rejected": -1.9665637016296387, |
|
"eval_logps/chosen": -266.1182556152344, |
|
"eval_logps/rejected": -232.68226623535156, |
|
"eval_loss": 0.5263917446136475, |
|
"eval_rewards/accuracies": 0.734499990940094, |
|
"eval_rewards/chosen": -0.15599758923053741, |
|
"eval_rewards/margins": 0.7454450726509094, |
|
"eval_rewards/rejected": -0.9014427661895752, |
|
"eval_runtime": 1638.9888, |
|
"eval_samples_per_second": 1.22, |
|
"eval_steps_per_second": 0.305, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5636412144405126, |
|
"train_runtime": 257163.3959, |
|
"train_samples_per_second": 0.723, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|