{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 500, "global_step": 5607, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.9126559714795e-10, "logits/chosen": -1.1435530185699463, "logits/rejected": -1.1284090280532837, "logps/chosen": -80.93795776367188, "logps/rejected": -82.66487884521484, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.7825311942959e-09, "logits/chosen": -1.3172465562820435, "logits/rejected": -1.3799258470535278, "logps/chosen": -76.21844482421875, "logps/rejected": -76.22967529296875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.67379679144385e-09, "logits/chosen": -1.2846455574035645, "logits/rejected": -1.2674322128295898, "logps/chosen": -92.08033752441406, "logps/rejected": -89.7175521850586, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": 0.0014805792598053813, "rewards/margins": 0.005074121057987213, "rewards/rejected": -0.003593540983274579, "step": 3 }, { "epoch": 0.01, "learning_rate": 3.5650623885918e-09, "logits/chosen": -1.1334583759307861, "logits/rejected": -1.1685543060302734, "logps/chosen": -65.57213592529297, "logps/rejected": -87.30081939697266, "loss": 0.6958, "rewards/accuracies": 0.25, "rewards/chosen": -0.005223465617746115, "rewards/margins": -0.006298257503658533, "rewards/rejected": 0.001074791420251131, "step": 4 }, { "epoch": 0.01, "learning_rate": 4.45632798573975e-09, "logits/chosen": -1.3780877590179443, "logits/rejected": -1.3577934503555298, "logps/chosen": -112.97721099853516, "logps/rejected": -92.511474609375, "loss": 0.695, "rewards/accuracies": 0.5, "rewards/chosen": 0.011667252518236637, "rewards/margins": 0.0011396417394280434, "rewards/rejected": 0.01052760984748602, "step": 5 }, { "epoch": 0.01, "learning_rate": 5.3475935828877e-09, "logits/chosen": -1.1994566917419434, "logits/rejected": -1.1853225231170654, "logps/chosen": -91.11537170410156, "logps/rejected": -77.11100769042969, "loss": 0.697, "rewards/accuracies": 0.25, "rewards/chosen": 0.0005447386065497994, "rewards/margins": -0.008729648776352406, "rewards/rejected": 0.009274386800825596, "step": 6 }, { "epoch": 0.01, "learning_rate": 6.2388591800356504e-09, "logits/chosen": -1.214829921722412, "logits/rejected": -1.1547781229019165, "logps/chosen": -116.8848876953125, "logps/rejected": -88.86708068847656, "loss": 0.6952, "rewards/accuracies": 0.75, "rewards/chosen": -0.013874244876205921, "rewards/margins": -0.0020969377364963293, "rewards/rejected": -0.011777305975556374, "step": 7 }, { "epoch": 0.01, "learning_rate": 7.1301247771836e-09, "logits/chosen": -1.3349050283432007, "logits/rejected": -1.3133201599121094, "logps/chosen": -92.63496398925781, "logps/rejected": -84.50946807861328, "loss": 0.6934, "rewards/accuracies": 0.75, "rewards/chosen": 0.013919067569077015, "rewards/margins": 0.01947193220257759, "rewards/rejected": -0.005552864633500576, "step": 8 }, { "epoch": 0.01, "learning_rate": 8.021390374331551e-09, "logits/chosen": -1.3035023212432861, "logits/rejected": -1.2244908809661865, "logps/chosen": -87.8225326538086, "logps/rejected": -96.7188491821289, "loss": 0.6943, "rewards/accuracies": 1.0, "rewards/chosen": 0.017471885308623314, "rewards/margins": 0.034188367426395416, "rewards/rejected": -0.016716480255126953, "step": 9 }, { "epoch": 0.02, "learning_rate": 8.9126559714795e-09, "logits/chosen": -1.2716120481491089, "logits/rejected": -1.2810933589935303, "logps/chosen": -75.09244537353516, "logps/rejected": -81.57750701904297, "loss": 0.6957, "rewards/accuracies": 0.25, "rewards/chosen": -0.02150268666446209, "rewards/margins": -0.012680627405643463, "rewards/rejected": -0.008822059258818626, "step": 10 }, { "epoch": 0.02, "learning_rate": 9.803921568627451e-09, "logits/chosen": -1.3337805271148682, "logits/rejected": -1.3374552726745605, "logps/chosen": -54.97137451171875, "logps/rejected": -77.16277313232422, "loss": 0.6864, "rewards/accuracies": 1.0, "rewards/chosen": 0.017574407160282135, "rewards/margins": 0.030472660437226295, "rewards/rejected": -0.01289825513958931, "step": 11 }, { "epoch": 0.02, "learning_rate": 1.06951871657754e-08, "logits/chosen": -1.2354915142059326, "logits/rejected": -1.2200746536254883, "logps/chosen": -78.03058624267578, "logps/rejected": -74.38961029052734, "loss": 0.6921, "rewards/accuracies": 1.0, "rewards/chosen": 0.02010498195886612, "rewards/margins": 0.021007061004638672, "rewards/rejected": -0.0009020804427564144, "step": 12 }, { "epoch": 0.02, "learning_rate": 1.1586452762923352e-08, "logits/chosen": -1.1696809530258179, "logits/rejected": -1.224806308746338, "logps/chosen": -53.55621337890625, "logps/rejected": -65.93765258789062, "loss": 0.6909, "rewards/accuracies": 0.5, "rewards/chosen": 0.004014873411506414, "rewards/margins": -0.00037641567178070545, "rewards/rejected": 0.0043912893161177635, "step": 13 }, { "epoch": 0.02, "learning_rate": 1.2477718360071301e-08, "logits/chosen": -1.177738070487976, "logits/rejected": -1.1873152256011963, "logps/chosen": -81.28230285644531, "logps/rejected": -85.71018981933594, "loss": 0.6928, "rewards/accuracies": 0.0, "rewards/chosen": -0.008526802062988281, "rewards/margins": -0.023273276165127754, "rewards/rejected": 0.014746475033462048, "step": 14 }, { "epoch": 0.02, "learning_rate": 1.336898395721925e-08, "logits/chosen": -1.131972312927246, "logits/rejected": -1.2693361043930054, "logps/chosen": -92.68744659423828, "logps/rejected": -101.66392517089844, "loss": 0.692, "rewards/accuracies": 1.0, "rewards/chosen": 0.03864727169275284, "rewards/margins": 0.026333998888731003, "rewards/rejected": 0.012313270941376686, "step": 15 }, { "epoch": 0.03, "learning_rate": 1.42602495543672e-08, "logits/chosen": -1.3204665184020996, "logits/rejected": -1.2761931419372559, "logps/chosen": -95.74635314941406, "logps/rejected": -91.4443588256836, "loss": 0.6891, "rewards/accuracies": 1.0, "rewards/chosen": 0.01901693269610405, "rewards/margins": 0.03508748859167099, "rewards/rejected": -0.01607055589556694, "step": 16 }, { "epoch": 0.03, "learning_rate": 1.5151515151515152e-08, "logits/chosen": -1.1829864978790283, "logits/rejected": -1.2121195793151855, "logps/chosen": -86.8672103881836, "logps/rejected": -77.72029113769531, "loss": 0.6946, "rewards/accuracies": 0.5, "rewards/chosen": 0.03137397766113281, "rewards/margins": 0.03347301483154297, "rewards/rejected": -0.0020990371704101562, "step": 17 }, { "epoch": 0.03, "learning_rate": 1.6042780748663103e-08, "logits/chosen": -1.1455968618392944, "logits/rejected": -1.1175220012664795, "logps/chosen": -89.77186584472656, "logps/rejected": -80.4815444946289, "loss": 0.6904, "rewards/accuracies": 0.0, "rewards/chosen": -0.02430114708840847, "rewards/margins": -0.03043375164270401, "rewards/rejected": 0.006132603157311678, "step": 18 }, { "epoch": 0.03, "learning_rate": 1.693404634581105e-08, "logits/chosen": -1.1125303506851196, "logits/rejected": -1.0411198139190674, "logps/chosen": -74.30352783203125, "logps/rejected": -74.13288879394531, "loss": 0.6939, "rewards/accuracies": 0.25, "rewards/chosen": -0.031328774988651276, "rewards/margins": -0.026041794568300247, "rewards/rejected": -0.005286979489028454, "step": 19 }, { "epoch": 0.03, "learning_rate": 1.7825311942959e-08, "logits/chosen": -1.3786427974700928, "logits/rejected": -1.3799937963485718, "logps/chosen": -120.43927764892578, "logps/rejected": -118.28070068359375, "loss": 0.6957, "rewards/accuracies": 0.25, "rewards/chosen": -0.031014632433652878, "rewards/margins": -0.018782615661621094, "rewards/rejected": -0.012232016772031784, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.8716577540106948e-08, "logits/chosen": -1.072481632232666, "logits/rejected": -1.051990270614624, "logps/chosen": -73.76265716552734, "logps/rejected": -83.66194152832031, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": -0.003286171006038785, "rewards/margins": -0.0012664794921875, "rewards/rejected": -0.0020196912810206413, "step": 21 }, { "epoch": 0.04, "learning_rate": 1.9607843137254902e-08, "logits/chosen": -1.2472436428070068, "logits/rejected": -1.2484467029571533, "logps/chosen": -73.8175277709961, "logps/rejected": -82.17957305908203, "loss": 0.694, "rewards/accuracies": 0.75, "rewards/chosen": 0.00903787650167942, "rewards/margins": 0.004723548889160156, "rewards/rejected": 0.004314327612519264, "step": 22 }, { "epoch": 0.04, "learning_rate": 2.0499108734402852e-08, "logits/chosen": -1.4827728271484375, "logits/rejected": -1.438743233680725, "logps/chosen": -81.41081237792969, "logps/rejected": -114.13521575927734, "loss": 0.6919, "rewards/accuracies": 0.75, "rewards/chosen": 0.013235283084213734, "rewards/margins": 0.01791839674115181, "rewards/rejected": -0.004683113191276789, "step": 23 }, { "epoch": 0.04, "learning_rate": 2.13903743315508e-08, "logits/chosen": -1.171872854232788, "logits/rejected": -1.0910282135009766, "logps/chosen": -112.17322540283203, "logps/rejected": -94.95963287353516, "loss": 0.6966, "rewards/accuracies": 0.75, "rewards/chosen": -0.006474972702562809, "rewards/margins": 0.018526744097471237, "rewards/rejected": -0.02500171586871147, "step": 24 }, { "epoch": 0.04, "learning_rate": 2.228163992869875e-08, "logits/chosen": -1.2558165788650513, "logits/rejected": -1.2542061805725098, "logps/chosen": -81.30638122558594, "logps/rejected": -77.05133056640625, "loss": 0.6983, "rewards/accuracies": 0.0, "rewards/chosen": -0.012335395440459251, "rewards/margins": -0.027157213538885117, "rewards/rejected": 0.014821816235780716, "step": 25 }, { "epoch": 0.04, "learning_rate": 2.3172905525846704e-08, "logits/chosen": -1.0553042888641357, "logits/rejected": -1.1029515266418457, "logps/chosen": -75.61978149414062, "logps/rejected": -102.44906616210938, "loss": 0.6915, "rewards/accuracies": 0.75, "rewards/chosen": 0.014333153143525124, "rewards/margins": 0.02329235151410103, "rewards/rejected": -0.008959198370575905, "step": 26 }, { "epoch": 0.04, "learning_rate": 2.406417112299465e-08, "logits/chosen": -1.2646524906158447, "logits/rejected": -1.263723611831665, "logps/chosen": -69.84374237060547, "logps/rejected": -64.16584014892578, "loss": 0.689, "rewards/accuracies": 1.0, "rewards/chosen": 0.03945665434002876, "rewards/margins": 0.015945911407470703, "rewards/rejected": 0.02351074293255806, "step": 27 }, { "epoch": 0.04, "learning_rate": 2.4955436720142602e-08, "logits/chosen": -1.2104555368423462, "logits/rejected": -1.1757853031158447, "logps/chosen": -74.42176818847656, "logps/rejected": -86.26324462890625, "loss": 0.6905, "rewards/accuracies": 0.75, "rewards/chosen": 0.024415016174316406, "rewards/margins": 0.026796627789735794, "rewards/rejected": -0.002381610684096813, "step": 28 }, { "epoch": 0.05, "learning_rate": 2.5846702317290552e-08, "logits/chosen": -1.2878258228302002, "logits/rejected": -1.409778118133545, "logps/chosen": -72.87904357910156, "logps/rejected": -105.05842590332031, "loss": 0.6983, "rewards/accuracies": 0.25, "rewards/chosen": -0.0002737997565418482, "rewards/margins": -0.029415415599942207, "rewards/rejected": 0.029141617938876152, "step": 29 }, { "epoch": 0.05, "learning_rate": 2.67379679144385e-08, "logits/chosen": -1.0850152969360352, "logits/rejected": -1.0075057744979858, "logps/chosen": -66.09901428222656, "logps/rejected": -76.13536834716797, "loss": 0.6985, "rewards/accuracies": 0.5, "rewards/chosen": -0.004696083255112171, "rewards/margins": 0.0047134398482739925, "rewards/rejected": -0.009409523569047451, "step": 30 }, { "epoch": 0.05, "learning_rate": 2.7629233511586453e-08, "logits/chosen": -1.023692011833191, "logits/rejected": -0.9985321164131165, "logps/chosen": -78.41415405273438, "logps/rejected": -96.77940368652344, "loss": 0.6927, "rewards/accuracies": 0.25, "rewards/chosen": 0.0017423636745661497, "rewards/margins": 0.0020977025851607323, "rewards/rejected": -0.0003553388814907521, "step": 31 }, { "epoch": 0.05, "learning_rate": 2.85204991087344e-08, "logits/chosen": -1.1439048051834106, "logits/rejected": -1.19899320602417, "logps/chosen": -88.75979614257812, "logps/rejected": -94.55015563964844, "loss": 0.6981, "rewards/accuracies": 0.75, "rewards/chosen": 0.018172454088926315, "rewards/margins": 0.0387752503156662, "rewards/rejected": -0.020602799952030182, "step": 32 }, { "epoch": 0.05, "learning_rate": 2.941176470588235e-08, "logits/chosen": -1.3200393915176392, "logits/rejected": -1.2867811918258667, "logps/chosen": -92.84091186523438, "logps/rejected": -73.74278259277344, "loss": 0.6941, "rewards/accuracies": 0.75, "rewards/chosen": 0.008177567273378372, "rewards/margins": 0.01898803561925888, "rewards/rejected": -0.010810472071170807, "step": 33 }, { "epoch": 0.05, "learning_rate": 3.0303030303030305e-08, "logits/chosen": -1.2860469818115234, "logits/rejected": -1.176468849182129, "logps/chosen": -79.335205078125, "logps/rejected": -83.57208251953125, "loss": 0.6928, "rewards/accuracies": 0.5, "rewards/chosen": -0.0002735140733420849, "rewards/margins": 0.018741607666015625, "rewards/rejected": -0.019015122205018997, "step": 34 }, { "epoch": 0.06, "learning_rate": 3.119429590017825e-08, "logits/chosen": -1.3077772855758667, "logits/rejected": -1.2369978427886963, "logps/chosen": -94.12866973876953, "logps/rejected": -87.97264862060547, "loss": 0.6958, "rewards/accuracies": 0.5, "rewards/chosen": -0.022930718958377838, "rewards/margins": -0.02383594401180744, "rewards/rejected": 0.0009052273817360401, "step": 35 }, { "epoch": 0.06, "learning_rate": 3.2085561497326206e-08, "logits/chosen": -1.5053722858428955, "logits/rejected": -1.4844354391098022, "logps/chosen": -88.12223052978516, "logps/rejected": -94.58377075195312, "loss": 0.6896, "rewards/accuracies": 0.5, "rewards/chosen": 0.0020842552185058594, "rewards/margins": 0.012209415435791016, "rewards/rejected": -0.010125160217285156, "step": 36 }, { "epoch": 0.06, "learning_rate": 3.297682709447415e-08, "logits/chosen": -1.3753101825714111, "logits/rejected": -1.4570457935333252, "logps/chosen": -93.10310363769531, "logps/rejected": -114.03001403808594, "loss": 0.6922, "rewards/accuracies": 0.75, "rewards/chosen": 0.012287425808608532, "rewards/margins": 0.03860483318567276, "rewards/rejected": -0.026317404583096504, "step": 37 }, { "epoch": 0.06, "learning_rate": 3.38680926916221e-08, "logits/chosen": -1.0623289346694946, "logits/rejected": -1.0550472736358643, "logps/chosen": -85.97282409667969, "logps/rejected": -94.81553649902344, "loss": 0.6915, "rewards/accuracies": 0.5, "rewards/chosen": -0.01326742209494114, "rewards/margins": -0.0038396830204874277, "rewards/rejected": -0.00942773837596178, "step": 38 }, { "epoch": 0.06, "learning_rate": 3.475935828877005e-08, "logits/chosen": -1.1288137435913086, "logits/rejected": -1.1795384883880615, "logps/chosen": -68.82371520996094, "logps/rejected": -62.92646026611328, "loss": 0.6972, "rewards/accuracies": 0.5, "rewards/chosen": -0.01655597612261772, "rewards/margins": 0.009626006707549095, "rewards/rejected": -0.026181982830166817, "step": 39 }, { "epoch": 0.06, "learning_rate": 3.5650623885918e-08, "logits/chosen": -1.2615855932235718, "logits/rejected": -1.3178114891052246, "logps/chosen": -75.31927490234375, "logps/rejected": -70.06914520263672, "loss": 0.6913, "rewards/accuracies": 0.25, "rewards/chosen": -0.007126140408217907, "rewards/margins": -0.01494908519089222, "rewards/rejected": 0.007822942920029163, "step": 40 }, { "epoch": 0.07, "learning_rate": 3.654188948306595e-08, "logits/chosen": -1.2270495891571045, "logits/rejected": -1.2540949583053589, "logps/chosen": -97.13208770751953, "logps/rejected": -105.10755920410156, "loss": 0.6936, "rewards/accuracies": 0.0, "rewards/chosen": -0.020570946857333183, "rewards/margins": -0.03218650817871094, "rewards/rejected": 0.011615563184022903, "step": 41 }, { "epoch": 0.07, "learning_rate": 3.7433155080213896e-08, "logits/chosen": -1.099852442741394, "logits/rejected": -1.232105016708374, "logps/chosen": -88.85939025878906, "logps/rejected": -95.95564270019531, "loss": 0.6935, "rewards/accuracies": 0.5, "rewards/chosen": 0.015971947461366653, "rewards/margins": 0.0004159929230809212, "rewards/rejected": 0.015555954538285732, "step": 42 }, { "epoch": 0.07, "learning_rate": 3.832442067736185e-08, "logits/chosen": -1.3121964931488037, "logits/rejected": -1.3425496816635132, "logps/chosen": -120.95159912109375, "logps/rejected": -95.6594009399414, "loss": 0.6957, "rewards/accuracies": 0.25, "rewards/chosen": -0.016383934766054153, "rewards/margins": -0.013637255877256393, "rewards/rejected": -0.0027466770261526108, "step": 43 }, { "epoch": 0.07, "learning_rate": 3.9215686274509804e-08, "logits/chosen": -1.1658923625946045, "logits/rejected": -1.11716890335083, "logps/chosen": -79.65956115722656, "logps/rejected": -100.74508666992188, "loss": 0.6936, "rewards/accuracies": 0.5, "rewards/chosen": 9.164831135421991e-05, "rewards/margins": -0.014169787988066673, "rewards/rejected": 0.014261436648666859, "step": 44 }, { "epoch": 0.07, "learning_rate": 4.0106951871657754e-08, "logits/chosen": -1.1392419338226318, "logits/rejected": -1.1202974319458008, "logps/chosen": -103.25138854980469, "logps/rejected": -85.0802001953125, "loss": 0.6896, "rewards/accuracies": 0.5, "rewards/chosen": 0.01794738881289959, "rewards/margins": 0.017765045166015625, "rewards/rejected": 0.00018234271556138992, "step": 45 }, { "epoch": 0.07, "learning_rate": 4.0998217468805705e-08, "logits/chosen": -1.1125216484069824, "logits/rejected": -1.1562517881393433, "logps/chosen": -88.44237518310547, "logps/rejected": -82.15657043457031, "loss": 0.6945, "rewards/accuracies": 0.5, "rewards/chosen": 0.008600997738540173, "rewards/margins": -0.0014699934981763363, "rewards/rejected": 0.010070991702377796, "step": 46 }, { "epoch": 0.08, "learning_rate": 4.1889483065953655e-08, "logits/chosen": -1.4318203926086426, "logits/rejected": -1.4092633724212646, "logps/chosen": -96.57569122314453, "logps/rejected": -92.53756713867188, "loss": 0.689, "rewards/accuracies": 0.5, "rewards/chosen": 0.013475132174789906, "rewards/margins": 0.009241009131073952, "rewards/rejected": 0.0042341239750385284, "step": 47 }, { "epoch": 0.08, "learning_rate": 4.27807486631016e-08, "logits/chosen": -1.3281644582748413, "logits/rejected": -1.2621127367019653, "logps/chosen": -106.2555160522461, "logps/rejected": -62.03329849243164, "loss": 0.6884, "rewards/accuracies": 0.5, "rewards/chosen": -0.01718731038272381, "rewards/margins": 0.006827164441347122, "rewards/rejected": -0.02401447296142578, "step": 48 }, { "epoch": 0.08, "learning_rate": 4.367201426024955e-08, "logits/chosen": -0.8626635074615479, "logits/rejected": -0.9543948173522949, "logps/chosen": -107.60574340820312, "logps/rejected": -87.0836410522461, "loss": 0.6903, "rewards/accuracies": 0.0, "rewards/chosen": -0.00990066584199667, "rewards/margins": -0.016774367541074753, "rewards/rejected": 0.006873703561723232, "step": 49 }, { "epoch": 0.08, "learning_rate": 4.45632798573975e-08, "logits/chosen": -1.2819422483444214, "logits/rejected": -1.2799181938171387, "logps/chosen": -81.89445495605469, "logps/rejected": -75.72430419921875, "loss": 0.6953, "rewards/accuracies": 0.25, "rewards/chosen": -0.007124138064682484, "rewards/margins": -0.01375818345695734, "rewards/rejected": 0.006634045392274857, "step": 50 }, { "epoch": 0.08, "learning_rate": 4.545454545454545e-08, "logits/chosen": -1.1933066844940186, "logits/rejected": -1.20717191696167, "logps/chosen": -105.99375915527344, "logps/rejected": -94.26728820800781, "loss": 0.6908, "rewards/accuracies": 0.5, "rewards/chosen": 0.05494384467601776, "rewards/margins": 0.022523686289787292, "rewards/rejected": 0.03242015838623047, "step": 51 }, { "epoch": 0.08, "learning_rate": 4.634581105169341e-08, "logits/chosen": -1.120680809020996, "logits/rejected": -1.26053786277771, "logps/chosen": -64.94696807861328, "logps/rejected": -67.43417358398438, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": 0.018457412719726562, "rewards/margins": 0.03609037399291992, "rewards/rejected": -0.01763296127319336, "step": 52 }, { "epoch": 0.09, "learning_rate": 4.723707664884135e-08, "logits/chosen": -1.1840916872024536, "logits/rejected": -1.2131097316741943, "logps/chosen": -71.52344512939453, "logps/rejected": -62.33109664916992, "loss": 0.6948, "rewards/accuracies": 0.75, "rewards/chosen": 0.019170094281435013, "rewards/margins": 0.022496510297060013, "rewards/rejected": -0.0033264162484556437, "step": 53 }, { "epoch": 0.09, "learning_rate": 4.81283422459893e-08, "logits/chosen": -1.202501654624939, "logits/rejected": -1.224836826324463, "logps/chosen": -72.78712463378906, "logps/rejected": -66.86026000976562, "loss": 0.7008, "rewards/accuracies": 0.5, "rewards/chosen": 0.012299585156142712, "rewards/margins": -0.004205942153930664, "rewards/rejected": 0.01650552824139595, "step": 54 }, { "epoch": 0.09, "learning_rate": 4.901960784313725e-08, "logits/chosen": -1.2900843620300293, "logits/rejected": -1.295175313949585, "logps/chosen": -87.58349609375, "logps/rejected": -96.93292999267578, "loss": 0.691, "rewards/accuracies": 0.5, "rewards/chosen": 0.0020314217545092106, "rewards/margins": -0.009500597603619099, "rewards/rejected": 0.011532019823789597, "step": 55 }, { "epoch": 0.09, "learning_rate": 4.9910873440285203e-08, "logits/chosen": -1.2029945850372314, "logits/rejected": -1.1422687768936157, "logps/chosen": -84.79794311523438, "logps/rejected": -100.21464538574219, "loss": 0.6949, "rewards/accuracies": 0.5, "rewards/chosen": 0.006718254182487726, "rewards/margins": -0.00048532476648688316, "rewards/rejected": 0.007203578948974609, "step": 56 }, { "epoch": 0.09, "learning_rate": 5.0802139037433154e-08, "logits/chosen": -1.20074462890625, "logits/rejected": -1.2153781652450562, "logps/chosen": -79.45578002929688, "logps/rejected": -78.18204498291016, "loss": 0.697, "rewards/accuracies": 0.5, "rewards/chosen": -0.0031892764382064342, "rewards/margins": -0.0035653107333928347, "rewards/rejected": 0.00037603359669446945, "step": 57 }, { "epoch": 0.09, "learning_rate": 5.1693404634581104e-08, "logits/chosen": -1.1200675964355469, "logits/rejected": -1.1578197479248047, "logps/chosen": -75.36161804199219, "logps/rejected": -93.00215911865234, "loss": 0.6916, "rewards/accuracies": 0.75, "rewards/chosen": -0.005454349331557751, "rewards/margins": 0.011344622820615768, "rewards/rejected": -0.016798973083496094, "step": 58 }, { "epoch": 0.09, "learning_rate": 5.258467023172905e-08, "logits/chosen": -1.4972758293151855, "logits/rejected": -1.517890214920044, "logps/chosen": -73.85816192626953, "logps/rejected": -91.58338928222656, "loss": 0.6976, "rewards/accuracies": 0.75, "rewards/chosen": -0.009135151281952858, "rewards/margins": -0.0029885279946029186, "rewards/rejected": -0.006146621890366077, "step": 59 }, { "epoch": 0.1, "learning_rate": 5.3475935828877e-08, "logits/chosen": -1.1440531015396118, "logits/rejected": -1.2267813682556152, "logps/chosen": -78.04267883300781, "logps/rejected": -75.09068298339844, "loss": 0.6968, "rewards/accuracies": 0.75, "rewards/chosen": -0.0037278179079294205, "rewards/margins": 0.0017625801265239716, "rewards/rejected": -0.005490398034453392, "step": 60 }, { "epoch": 0.1, "learning_rate": 5.4367201426024956e-08, "logits/chosen": -1.3389140367507935, "logits/rejected": -1.2966046333312988, "logps/chosen": -111.0379867553711, "logps/rejected": -92.76790618896484, "loss": 0.6947, "rewards/accuracies": 0.25, "rewards/chosen": -0.009166527539491653, "rewards/margins": -0.0020053863991051912, "rewards/rejected": -0.007161140441894531, "step": 61 }, { "epoch": 0.1, "learning_rate": 5.5258467023172907e-08, "logits/chosen": -1.1153532266616821, "logits/rejected": -1.08543860912323, "logps/chosen": -73.2496337890625, "logps/rejected": -69.35696411132812, "loss": 0.6946, "rewards/accuracies": 0.5, "rewards/chosen": -0.010410403832793236, "rewards/margins": 0.0034862528555095196, "rewards/rejected": -0.013896657153964043, "step": 62 }, { "epoch": 0.1, "learning_rate": 5.614973262032086e-08, "logits/chosen": -1.1505615711212158, "logits/rejected": -1.1815423965454102, "logps/chosen": -81.41874694824219, "logps/rejected": -80.91473388671875, "loss": 0.6928, "rewards/accuracies": 1.0, "rewards/chosen": 0.0011878013610839844, "rewards/margins": 0.016660023480653763, "rewards/rejected": -0.015472222119569778, "step": 63 }, { "epoch": 0.1, "learning_rate": 5.70409982174688e-08, "logits/chosen": -1.2964575290679932, "logits/rejected": -1.2357640266418457, "logps/chosen": -76.82340240478516, "logps/rejected": -81.20909881591797, "loss": 0.6943, "rewards/accuracies": 0.25, "rewards/chosen": -0.012250232510268688, "rewards/margins": -0.016117095947265625, "rewards/rejected": 0.003866863902658224, "step": 64 }, { "epoch": 0.1, "learning_rate": 5.793226381461675e-08, "logits/chosen": -1.2380067110061646, "logits/rejected": -1.1993043422698975, "logps/chosen": -84.76339721679688, "logps/rejected": -101.11196899414062, "loss": 0.696, "rewards/accuracies": 0.25, "rewards/chosen": 0.0003474232507869601, "rewards/margins": -0.007950305938720703, "rewards/rejected": 0.008297729305922985, "step": 65 }, { "epoch": 0.11, "learning_rate": 5.88235294117647e-08, "logits/chosen": -1.3593496084213257, "logits/rejected": -1.4487707614898682, "logps/chosen": -84.22575378417969, "logps/rejected": -76.25640869140625, "loss": 0.6904, "rewards/accuracies": 0.25, "rewards/chosen": -0.008716392330825329, "rewards/margins": -0.010149955749511719, "rewards/rejected": 0.0014335631858557463, "step": 66 }, { "epoch": 0.11, "learning_rate": 5.971479500891265e-08, "logits/chosen": -1.190024733543396, "logits/rejected": -1.196122407913208, "logps/chosen": -72.794677734375, "logps/rejected": -77.54939270019531, "loss": 0.6928, "rewards/accuracies": 0.5, "rewards/chosen": 0.009924030862748623, "rewards/margins": 8.163508027791977e-05, "rewards/rejected": 0.009842395782470703, "step": 67 }, { "epoch": 0.11, "learning_rate": 6.060606060606061e-08, "logits/chosen": -1.1421548128128052, "logits/rejected": -1.1087861061096191, "logps/chosen": -97.81107330322266, "logps/rejected": -84.20929718017578, "loss": 0.6926, "rewards/accuracies": 0.25, "rewards/chosen": -0.005739974789321423, "rewards/margins": -0.01303710974752903, "rewards/rejected": 0.00729713449254632, "step": 68 }, { "epoch": 0.11, "learning_rate": 6.149732620320855e-08, "logits/chosen": -1.3280189037322998, "logits/rejected": -1.3729376792907715, "logps/chosen": -71.10903930664062, "logps/rejected": -93.05481719970703, "loss": 0.6936, "rewards/accuracies": 0.75, "rewards/chosen": 0.004936314187943935, "rewards/margins": 0.0032421117648482323, "rewards/rejected": 0.0016942024230957031, "step": 69 }, { "epoch": 0.11, "learning_rate": 6.23885918003565e-08, "logits/chosen": -1.2318353652954102, "logits/rejected": -1.2696576118469238, "logps/chosen": -87.1533203125, "logps/rejected": -79.95406341552734, "loss": 0.6946, "rewards/accuracies": 0.5, "rewards/chosen": -0.008516121655702591, "rewards/margins": -0.006702518090605736, "rewards/rejected": -0.0018136021681129932, "step": 70 }, { "epoch": 0.11, "learning_rate": 6.327985739750445e-08, "logits/chosen": -1.1851850748062134, "logits/rejected": -1.1961557865142822, "logps/chosen": -88.64912414550781, "logps/rejected": -97.94415283203125, "loss": 0.6984, "rewards/accuracies": 0.25, "rewards/chosen": -0.019295310601592064, "rewards/margins": -0.014165686443448067, "rewards/rejected": -0.00512962369248271, "step": 71 }, { "epoch": 0.12, "learning_rate": 6.417112299465241e-08, "logits/chosen": -1.3361687660217285, "logits/rejected": -1.2918274402618408, "logps/chosen": -72.51200866699219, "logps/rejected": -94.09864044189453, "loss": 0.6945, "rewards/accuracies": 0.25, "rewards/chosen": 0.002146434970200062, "rewards/margins": -0.007274341303855181, "rewards/rejected": 0.00942077673971653, "step": 72 }, { "epoch": 0.12, "learning_rate": 6.506238859180036e-08, "logits/chosen": -1.173933982849121, "logits/rejected": -1.243645191192627, "logps/chosen": -98.51423645019531, "logps/rejected": -77.10397338867188, "loss": 0.6948, "rewards/accuracies": 0.5, "rewards/chosen": 0.013683319091796875, "rewards/margins": 0.0037531850393861532, "rewards/rejected": 0.009930133819580078, "step": 73 }, { "epoch": 0.12, "learning_rate": 6.59536541889483e-08, "logits/chosen": -1.320555329322815, "logits/rejected": -1.2917400598526, "logps/chosen": -100.91671752929688, "logps/rejected": -86.16136169433594, "loss": 0.6944, "rewards/accuracies": 0.0, "rewards/chosen": -0.022674178704619408, "rewards/margins": -0.011483382433652878, "rewards/rejected": -0.01119079627096653, "step": 74 }, { "epoch": 0.12, "learning_rate": 6.684491978609626e-08, "logits/chosen": -1.1561110019683838, "logits/rejected": -1.2179300785064697, "logps/chosen": -76.69493103027344, "logps/rejected": -73.7353286743164, "loss": 0.7003, "rewards/accuracies": 0.5, "rewards/chosen": 0.000678634736686945, "rewards/margins": 0.00713272113353014, "rewards/rejected": -0.006454085931181908, "step": 75 }, { "epoch": 0.12, "learning_rate": 6.77361853832442e-08, "logits/chosen": -0.9949007034301758, "logits/rejected": -1.088796854019165, "logps/chosen": -73.66630554199219, "logps/rejected": -88.02740478515625, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.016364479437470436, "rewards/margins": 0.0032363897189497948, "rewards/rejected": 0.013128089718520641, "step": 76 }, { "epoch": 0.12, "learning_rate": 6.862745098039216e-08, "logits/chosen": -1.1558034420013428, "logits/rejected": -1.074524164199829, "logps/chosen": -92.88443756103516, "logps/rejected": -97.43186950683594, "loss": 0.6871, "rewards/accuracies": 1.0, "rewards/chosen": 0.012816810049116611, "rewards/margins": 0.026808548718690872, "rewards/rejected": -0.013991737738251686, "step": 77 }, { "epoch": 0.13, "learning_rate": 6.95187165775401e-08, "logits/chosen": -1.161596417427063, "logits/rejected": -1.1057379245758057, "logps/chosen": -111.82357788085938, "logps/rejected": -101.24547576904297, "loss": 0.69, "rewards/accuracies": 0.25, "rewards/chosen": -0.016997529193758965, "rewards/margins": -0.002710532397031784, "rewards/rejected": -0.014286995865404606, "step": 78 }, { "epoch": 0.13, "learning_rate": 7.040998217468805e-08, "logits/chosen": -1.1989147663116455, "logits/rejected": -1.229402780532837, "logps/chosen": -102.84278106689453, "logps/rejected": -109.17547607421875, "loss": 0.6888, "rewards/accuracies": 0.5, "rewards/chosen": -0.0013053894508630037, "rewards/margins": -0.008565520867705345, "rewards/rejected": 0.007260132115334272, "step": 79 }, { "epoch": 0.13, "learning_rate": 7.1301247771836e-08, "logits/chosen": -1.3043824434280396, "logits/rejected": -1.3470394611358643, "logps/chosen": -101.53778076171875, "logps/rejected": -98.79399871826172, "loss": 0.7023, "rewards/accuracies": 0.25, "rewards/chosen": -0.026903916150331497, "rewards/margins": -0.04309577867388725, "rewards/rejected": 0.016191862523555756, "step": 80 }, { "epoch": 0.13, "learning_rate": 7.219251336898395e-08, "logits/chosen": -1.2793223857879639, "logits/rejected": -1.3029398918151855, "logps/chosen": -102.49290466308594, "logps/rejected": -95.17884826660156, "loss": 0.6945, "rewards/accuracies": 0.5, "rewards/chosen": -0.008845615200698376, "rewards/margins": 0.008823300711810589, "rewards/rejected": -0.017668915912508965, "step": 81 }, { "epoch": 0.13, "learning_rate": 7.30837789661319e-08, "logits/chosen": -1.2792365550994873, "logits/rejected": -1.343281626701355, "logps/chosen": -83.31664276123047, "logps/rejected": -81.52680969238281, "loss": 0.6909, "rewards/accuracies": 0.25, "rewards/chosen": 0.00019426352810114622, "rewards/margins": -0.009444141760468483, "rewards/rejected": 0.009638404473662376, "step": 82 }, { "epoch": 0.13, "learning_rate": 7.397504456327985e-08, "logits/chosen": -1.191178798675537, "logits/rejected": -1.2203766107559204, "logps/chosen": -99.72430419921875, "logps/rejected": -99.75408935546875, "loss": 0.6925, "rewards/accuracies": 0.25, "rewards/chosen": -0.013767242431640625, "rewards/margins": -0.015200234018266201, "rewards/rejected": 0.0014329918194562197, "step": 83 }, { "epoch": 0.13, "learning_rate": 7.486631016042779e-08, "logits/chosen": -1.2354995012283325, "logits/rejected": -1.320689082145691, "logps/chosen": -83.50981140136719, "logps/rejected": -93.85690307617188, "loss": 0.6841, "rewards/accuracies": 0.75, "rewards/chosen": 0.012112140655517578, "rewards/margins": 0.022345637902617455, "rewards/rejected": -0.010233497247099876, "step": 84 }, { "epoch": 0.14, "learning_rate": 7.575757575757576e-08, "logits/chosen": -1.0114192962646484, "logits/rejected": -1.1006252765655518, "logps/chosen": -61.200103759765625, "logps/rejected": -76.0113525390625, "loss": 0.6963, "rewards/accuracies": 0.5, "rewards/chosen": -0.0027909281197935343, "rewards/margins": 0.007663249969482422, "rewards/rejected": -0.010454177856445312, "step": 85 }, { "epoch": 0.14, "learning_rate": 7.66488413547237e-08, "logits/chosen": -1.3288710117340088, "logits/rejected": -1.2539799213409424, "logps/chosen": -65.80592346191406, "logps/rejected": -63.5286865234375, "loss": 0.6875, "rewards/accuracies": 1.0, "rewards/chosen": 0.0201434139162302, "rewards/margins": 0.023700810968875885, "rewards/rejected": -0.0035573961213231087, "step": 86 }, { "epoch": 0.14, "learning_rate": 7.754010695187166e-08, "logits/chosen": -1.0129657983779907, "logits/rejected": -1.0055696964263916, "logps/chosen": -51.792137145996094, "logps/rejected": -69.52151489257812, "loss": 0.6963, "rewards/accuracies": 0.0, "rewards/chosen": -0.014261627569794655, "rewards/margins": -0.006102276034653187, "rewards/rejected": -0.008159352466464043, "step": 87 }, { "epoch": 0.14, "learning_rate": 7.843137254901961e-08, "logits/chosen": -1.0171966552734375, "logits/rejected": -0.9589600563049316, "logps/chosen": -89.1555404663086, "logps/rejected": -84.46793365478516, "loss": 0.6917, "rewards/accuracies": 0.25, "rewards/chosen": -0.004412651062011719, "rewards/margins": -0.008263587020337582, "rewards/rejected": 0.0038509368896484375, "step": 88 }, { "epoch": 0.14, "learning_rate": 7.932263814616755e-08, "logits/chosen": -1.095560908317566, "logits/rejected": -1.1174005270004272, "logps/chosen": -84.72117614746094, "logps/rejected": -98.22677612304688, "loss": 0.6961, "rewards/accuracies": 0.5, "rewards/chosen": 0.00022869138047099113, "rewards/margins": 0.0016847611404955387, "rewards/rejected": -0.0014560702256858349, "step": 89 }, { "epoch": 0.14, "learning_rate": 8.021390374331551e-08, "logits/chosen": -1.3894323110580444, "logits/rejected": -1.3977967500686646, "logps/chosen": -91.18753051757812, "logps/rejected": -86.59329223632812, "loss": 0.6908, "rewards/accuracies": 0.75, "rewards/chosen": 0.036875344812870026, "rewards/margins": 0.044654086232185364, "rewards/rejected": -0.007778740022331476, "step": 90 }, { "epoch": 0.15, "learning_rate": 8.110516934046345e-08, "logits/chosen": -1.090181589126587, "logits/rejected": -1.0431842803955078, "logps/chosen": -72.19103240966797, "logps/rejected": -67.673095703125, "loss": 0.6954, "rewards/accuracies": 0.25, "rewards/chosen": 0.0002225879579782486, "rewards/margins": -0.009957600384950638, "rewards/rejected": 0.010180187411606312, "step": 91 }, { "epoch": 0.15, "learning_rate": 8.199643493761141e-08, "logits/chosen": -1.1848105192184448, "logits/rejected": -1.1699137687683105, "logps/chosen": -82.49169158935547, "logps/rejected": -78.44300079345703, "loss": 0.6897, "rewards/accuracies": 0.25, "rewards/chosen": -0.01538839377462864, "rewards/margins": -0.03306303173303604, "rewards/rejected": 0.017674636095762253, "step": 92 }, { "epoch": 0.15, "learning_rate": 8.288770053475935e-08, "logits/chosen": -1.3438172340393066, "logits/rejected": -1.2754483222961426, "logps/chosen": -78.58428192138672, "logps/rejected": -101.5166244506836, "loss": 0.6904, "rewards/accuracies": 0.5, "rewards/chosen": -0.013726996257901192, "rewards/margins": -0.02110939286649227, "rewards/rejected": 0.007382392883300781, "step": 93 }, { "epoch": 0.15, "learning_rate": 8.377896613190731e-08, "logits/chosen": -1.277827262878418, "logits/rejected": -1.3513429164886475, "logps/chosen": -65.3743896484375, "logps/rejected": -80.37142944335938, "loss": 0.6947, "rewards/accuracies": 0.5, "rewards/chosen": 0.005968189332634211, "rewards/margins": -0.0009260177612304688, "rewards/rejected": 0.006894207559525967, "step": 94 }, { "epoch": 0.15, "learning_rate": 8.467023172905525e-08, "logits/chosen": -1.2165343761444092, "logits/rejected": -1.132431983947754, "logps/chosen": -59.45248794555664, "logps/rejected": -70.6431884765625, "loss": 0.6902, "rewards/accuracies": 0.0, "rewards/chosen": -0.014494800940155983, "rewards/margins": -0.03155374526977539, "rewards/rejected": 0.017058944329619408, "step": 95 }, { "epoch": 0.15, "learning_rate": 8.55614973262032e-08, "logits/chosen": -1.2969179153442383, "logits/rejected": -1.3179165124893188, "logps/chosen": -48.20478439331055, "logps/rejected": -67.55217742919922, "loss": 0.6981, "rewards/accuracies": 0.5, "rewards/chosen": -0.010920858941972256, "rewards/margins": -0.016322661191225052, "rewards/rejected": 0.005401803180575371, "step": 96 }, { "epoch": 0.16, "learning_rate": 8.645276292335116e-08, "logits/chosen": -1.0689046382904053, "logits/rejected": -1.1024280786514282, "logps/chosen": -123.50025939941406, "logps/rejected": -101.05804443359375, "loss": 0.6934, "rewards/accuracies": 0.75, "rewards/chosen": 0.021617889404296875, "rewards/margins": 0.003183746710419655, "rewards/rejected": 0.01843414269387722, "step": 97 }, { "epoch": 0.16, "learning_rate": 8.73440285204991e-08, "logits/chosen": -1.0501301288604736, "logits/rejected": -1.1594352722167969, "logps/chosen": -81.53727722167969, "logps/rejected": -104.27888488769531, "loss": 0.6938, "rewards/accuracies": 0.0, "rewards/chosen": 0.006105995737016201, "rewards/margins": -0.017554093152284622, "rewards/rejected": 0.023660089820623398, "step": 98 }, { "epoch": 0.16, "learning_rate": 8.823529411764706e-08, "logits/chosen": -1.2999415397644043, "logits/rejected": -1.309904932975769, "logps/chosen": -68.355712890625, "logps/rejected": -83.71822357177734, "loss": 0.6954, "rewards/accuracies": 0.25, "rewards/chosen": -0.0024814605712890625, "rewards/margins": -0.011721325106918812, "rewards/rejected": 0.00923986453562975, "step": 99 }, { "epoch": 0.16, "learning_rate": 8.9126559714795e-08, "logits/chosen": -1.1235615015029907, "logits/rejected": -1.137649416923523, "logps/chosen": -94.91709899902344, "logps/rejected": -105.24687194824219, "loss": 0.6904, "rewards/accuracies": 0.5, "rewards/chosen": 0.0030929564964026213, "rewards/margins": 0.007976721972227097, "rewards/rejected": -0.004883766174316406, "step": 100 }, { "epoch": 0.16, "learning_rate": 9.001782531194294e-08, "logits/chosen": -1.1797455549240112, "logits/rejected": -1.1092474460601807, "logps/chosen": -84.97235107421875, "logps/rejected": -67.17857360839844, "loss": 0.6977, "rewards/accuracies": 0.75, "rewards/chosen": 0.017327498644590378, "rewards/margins": 0.028324604034423828, "rewards/rejected": -0.010997104458510876, "step": 101 }, { "epoch": 0.16, "learning_rate": 9.09090909090909e-08, "logits/chosen": -1.191285490989685, "logits/rejected": -1.1985759735107422, "logps/chosen": -82.893310546875, "logps/rejected": -72.94395446777344, "loss": 0.6897, "rewards/accuracies": 0.5, "rewards/chosen": -0.007577705197036266, "rewards/margins": 0.015474701300263405, "rewards/rejected": -0.023052407428622246, "step": 102 }, { "epoch": 0.17, "learning_rate": 9.180035650623885e-08, "logits/chosen": -1.247119426727295, "logits/rejected": -1.1441011428833008, "logps/chosen": -72.40908813476562, "logps/rejected": -86.26667785644531, "loss": 0.699, "rewards/accuracies": 0.25, "rewards/chosen": 0.01435003336519003, "rewards/margins": -0.0020779615733772516, "rewards/rejected": 0.016427993774414062, "step": 103 }, { "epoch": 0.17, "learning_rate": 9.269162210338682e-08, "logits/chosen": -1.253834843635559, "logits/rejected": -1.2230826616287231, "logps/chosen": -60.051551818847656, "logps/rejected": -66.02203369140625, "loss": 0.6903, "rewards/accuracies": 0.5, "rewards/chosen": 0.006067848764359951, "rewards/margins": 0.010862255468964577, "rewards/rejected": -0.004794406238943338, "step": 104 }, { "epoch": 0.17, "learning_rate": 9.358288770053476e-08, "logits/chosen": -1.2417004108428955, "logits/rejected": -1.2845187187194824, "logps/chosen": -71.45110321044922, "logps/rejected": -75.50022888183594, "loss": 0.6933, "rewards/accuracies": 0.25, "rewards/chosen": -0.0034451482351869345, "rewards/margins": -0.006827355362474918, "rewards/rejected": 0.0033822059631347656, "step": 105 }, { "epoch": 0.17, "learning_rate": 9.44741532976827e-08, "logits/chosen": -1.2560675144195557, "logits/rejected": -1.3176476955413818, "logps/chosen": -72.40788269042969, "logps/rejected": -65.07300567626953, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.007790183648467064, "rewards/margins": 0.002993679838255048, "rewards/rejected": 0.004796504974365234, "step": 106 }, { "epoch": 0.17, "learning_rate": 9.536541889483066e-08, "logits/chosen": -1.268344521522522, "logits/rejected": -1.2234901189804077, "logps/chosen": -96.00572204589844, "logps/rejected": -87.59380340576172, "loss": 0.695, "rewards/accuracies": 0.5, "rewards/chosen": -0.010178660973906517, "rewards/margins": -0.0005763047374784946, "rewards/rejected": -0.00960235670208931, "step": 107 }, { "epoch": 0.17, "learning_rate": 9.62566844919786e-08, "logits/chosen": -0.9771088361740112, "logits/rejected": -1.0272163152694702, "logps/chosen": -89.22775268554688, "logps/rejected": -75.48786163330078, "loss": 0.6916, "rewards/accuracies": 0.25, "rewards/chosen": -0.006131744012236595, "rewards/margins": -0.019008636474609375, "rewards/rejected": 0.01287689246237278, "step": 108 }, { "epoch": 0.17, "learning_rate": 9.714795008912656e-08, "logits/chosen": -1.2823981046676636, "logits/rejected": -1.2501758337020874, "logps/chosen": -121.24838256835938, "logps/rejected": -106.70596313476562, "loss": 0.696, "rewards/accuracies": 0.75, "rewards/chosen": 0.013632584363222122, "rewards/margins": 0.009738731198012829, "rewards/rejected": 0.003893852699548006, "step": 109 }, { "epoch": 0.18, "learning_rate": 9.80392156862745e-08, "logits/chosen": -1.1504831314086914, "logits/rejected": -1.1097204685211182, "logps/chosen": -58.84628677368164, "logps/rejected": -70.464111328125, "loss": 0.6953, "rewards/accuracies": 0.0, "rewards/chosen": -0.01100912131369114, "rewards/margins": -0.021300887688994408, "rewards/rejected": 0.010291767306625843, "step": 110 }, { "epoch": 0.18, "learning_rate": 9.893048128342246e-08, "logits/chosen": -1.1563762426376343, "logits/rejected": -1.16436767578125, "logps/chosen": -86.93107604980469, "logps/rejected": -77.52449035644531, "loss": 0.6934, "rewards/accuracies": 0.0, "rewards/chosen": -0.00881652906537056, "rewards/margins": -0.02929544448852539, "rewards/rejected": 0.02047891542315483, "step": 111 }, { "epoch": 0.18, "learning_rate": 9.982174688057041e-08, "logits/chosen": -1.1488101482391357, "logits/rejected": -1.084565281867981, "logps/chosen": -73.11860656738281, "logps/rejected": -71.21179962158203, "loss": 0.6944, "rewards/accuracies": 0.0, "rewards/chosen": -0.018657494336366653, "rewards/margins": -0.04843597859144211, "rewards/rejected": 0.029778478667140007, "step": 112 }, { "epoch": 0.18, "learning_rate": 1.0071301247771835e-07, "logits/chosen": -1.1814026832580566, "logits/rejected": -1.1518394947052002, "logps/chosen": -73.14173889160156, "logps/rejected": -64.669921875, "loss": 0.6942, "rewards/accuracies": 0.5, "rewards/chosen": -0.015314674004912376, "rewards/margins": -0.011362170800566673, "rewards/rejected": -0.003952503204345703, "step": 113 }, { "epoch": 0.18, "learning_rate": 1.0160427807486631e-07, "logits/chosen": -1.1952779293060303, "logits/rejected": -1.2421295642852783, "logps/chosen": -79.21939086914062, "logps/rejected": -65.20329284667969, "loss": 0.6868, "rewards/accuracies": 0.5, "rewards/chosen": -0.002678298857063055, "rewards/margins": 0.008234024979174137, "rewards/rejected": -0.01091232243925333, "step": 114 }, { "epoch": 0.18, "learning_rate": 1.0249554367201425e-07, "logits/chosen": -1.3426727056503296, "logits/rejected": -1.3321638107299805, "logps/chosen": -90.40554809570312, "logps/rejected": -81.71943664550781, "loss": 0.6888, "rewards/accuracies": 1.0, "rewards/chosen": 0.0161882396787405, "rewards/margins": 0.03692302852869034, "rewards/rejected": -0.020734786987304688, "step": 115 }, { "epoch": 0.19, "learning_rate": 1.0338680926916221e-07, "logits/chosen": -1.1115459203720093, "logits/rejected": -1.146256685256958, "logps/chosen": -80.80537414550781, "logps/rejected": -65.19287109375, "loss": 0.6855, "rewards/accuracies": 1.0, "rewards/chosen": 0.02475748211145401, "rewards/margins": 0.018207073211669922, "rewards/rejected": 0.0065504079684615135, "step": 116 }, { "epoch": 0.19, "learning_rate": 1.0427807486631015e-07, "logits/chosen": -1.0816545486450195, "logits/rejected": -1.0843591690063477, "logps/chosen": -73.26416015625, "logps/rejected": -81.66397094726562, "loss": 0.6944, "rewards/accuracies": 1.0, "rewards/chosen": 0.027614975348114967, "rewards/margins": 0.03924961015582085, "rewards/rejected": -0.011634635739028454, "step": 117 }, { "epoch": 0.19, "learning_rate": 1.051693404634581e-07, "logits/chosen": -1.1614558696746826, "logits/rejected": -1.1129220724105835, "logps/chosen": -82.39322662353516, "logps/rejected": -84.50247955322266, "loss": 0.6958, "rewards/accuracies": 0.25, "rewards/chosen": -0.002945899497717619, "rewards/margins": -0.011921119876205921, "rewards/rejected": 0.008975219912827015, "step": 118 }, { "epoch": 0.19, "learning_rate": 1.0606060606060605e-07, "logits/chosen": -1.004603385925293, "logits/rejected": -1.0580523014068604, "logps/chosen": -87.60785675048828, "logps/rejected": -82.76498413085938, "loss": 0.6884, "rewards/accuracies": 0.75, "rewards/chosen": 0.002479648683220148, "rewards/margins": 0.006744003854691982, "rewards/rejected": -0.0042643556371331215, "step": 119 }, { "epoch": 0.19, "learning_rate": 1.06951871657754e-07, "logits/chosen": -1.127279281616211, "logits/rejected": -1.243972659111023, "logps/chosen": -98.51547241210938, "logps/rejected": -73.45686340332031, "loss": 0.6975, "rewards/accuracies": 0.5, "rewards/chosen": 0.006734561175107956, "rewards/margins": 0.0008148197084665298, "rewards/rejected": 0.005919742397964001, "step": 120 }, { "epoch": 0.19, "learning_rate": 1.0784313725490195e-07, "logits/chosen": -1.1682674884796143, "logits/rejected": -1.2230191230773926, "logps/chosen": -92.4317626953125, "logps/rejected": -68.97154998779297, "loss": 0.6926, "rewards/accuracies": 0.5, "rewards/chosen": -0.014802360907196999, "rewards/margins": -0.008460521697998047, "rewards/rejected": -0.006341838743537664, "step": 121 }, { "epoch": 0.2, "learning_rate": 1.0873440285204991e-07, "logits/chosen": -1.3279443979263306, "logits/rejected": -1.3496955633163452, "logps/chosen": -96.37141418457031, "logps/rejected": -83.9437255859375, "loss": 0.6916, "rewards/accuracies": 0.75, "rewards/chosen": 0.022973060607910156, "rewards/margins": 0.009210778400301933, "rewards/rejected": 0.013762283138930798, "step": 122 }, { "epoch": 0.2, "learning_rate": 1.0962566844919786e-07, "logits/chosen": -1.1670727729797363, "logits/rejected": -1.1303013563156128, "logps/chosen": -89.6489028930664, "logps/rejected": -90.10624694824219, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": 0.006630897521972656, "rewards/margins": 0.012310028076171875, "rewards/rejected": -0.005679131019860506, "step": 123 }, { "epoch": 0.2, "learning_rate": 1.1051693404634581e-07, "logits/chosen": -1.1634020805358887, "logits/rejected": -1.223006010055542, "logps/chosen": -81.78547668457031, "logps/rejected": -81.63375854492188, "loss": 0.6909, "rewards/accuracies": 0.5, "rewards/chosen": -0.011829186230897903, "rewards/margins": 0.02196311764419079, "rewards/rejected": -0.03379230573773384, "step": 124 }, { "epoch": 0.2, "learning_rate": 1.1140819964349376e-07, "logits/chosen": -1.1678276062011719, "logits/rejected": -1.1843390464782715, "logps/chosen": -73.96417236328125, "logps/rejected": -79.01814270019531, "loss": 0.6849, "rewards/accuracies": 0.75, "rewards/chosen": 0.007498741615563631, "rewards/margins": 0.01339798141270876, "rewards/rejected": -0.005899238400161266, "step": 125 }, { "epoch": 0.2, "learning_rate": 1.1229946524064171e-07, "logits/chosen": -1.1589395999908447, "logits/rejected": -1.1751755475997925, "logps/chosen": -86.82635498046875, "logps/rejected": -95.4218521118164, "loss": 0.6969, "rewards/accuracies": 0.25, "rewards/chosen": -0.020419882610440254, "rewards/margins": -0.01981792412698269, "rewards/rejected": -0.0006019594147801399, "step": 126 }, { "epoch": 0.2, "learning_rate": 1.1319073083778966e-07, "logits/chosen": -1.6151647567749023, "logits/rejected": -1.646388053894043, "logps/chosen": -67.51033782958984, "logps/rejected": -72.9522933959961, "loss": 0.6859, "rewards/accuracies": 1.0, "rewards/chosen": 0.022501755505800247, "rewards/margins": 0.024559784680604935, "rewards/rejected": -0.0020580291748046875, "step": 127 }, { "epoch": 0.21, "learning_rate": 1.140819964349376e-07, "logits/chosen": -1.1699700355529785, "logits/rejected": -1.0814133882522583, "logps/chosen": -91.83600616455078, "logps/rejected": -86.32220458984375, "loss": 0.6947, "rewards/accuracies": 0.5, "rewards/chosen": 0.0037170411087572575, "rewards/margins": -0.0029117583762854338, "rewards/rejected": 0.006628799252212048, "step": 128 }, { "epoch": 0.21, "learning_rate": 1.1497326203208556e-07, "logits/chosen": -1.2395000457763672, "logits/rejected": -1.1810842752456665, "logps/chosen": -80.84342956542969, "logps/rejected": -110.77255249023438, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": 0.03232545778155327, "rewards/margins": -0.0032841686625033617, "rewards/rejected": 0.03560962527990341, "step": 129 }, { "epoch": 0.21, "learning_rate": 1.158645276292335e-07, "logits/chosen": -1.1515140533447266, "logits/rejected": -1.248138189315796, "logps/chosen": -76.70220947265625, "logps/rejected": -62.8144416809082, "loss": 0.6872, "rewards/accuracies": 0.25, "rewards/chosen": -0.003283405676484108, "rewards/margins": -0.0019655227661132812, "rewards/rejected": -0.001317883376032114, "step": 130 }, { "epoch": 0.21, "learning_rate": 1.1675579322638146e-07, "logits/chosen": -1.4005471467971802, "logits/rejected": -1.2750332355499268, "logps/chosen": -89.06306457519531, "logps/rejected": -93.75791931152344, "loss": 0.6889, "rewards/accuracies": 1.0, "rewards/chosen": 0.015990447252988815, "rewards/margins": 0.026848316192626953, "rewards/rejected": -0.010857868939638138, "step": 131 }, { "epoch": 0.21, "learning_rate": 1.176470588235294e-07, "logits/chosen": -1.3139766454696655, "logits/rejected": -1.2992300987243652, "logps/chosen": -76.49496459960938, "logps/rejected": -103.74681091308594, "loss": 0.6981, "rewards/accuracies": 0.5, "rewards/chosen": -0.0029782294295728207, "rewards/margins": -0.02903452143073082, "rewards/rejected": 0.02605629153549671, "step": 132 }, { "epoch": 0.21, "learning_rate": 1.1853832442067736e-07, "logits/chosen": -1.0782837867736816, "logits/rejected": -1.0972421169281006, "logps/chosen": -102.93182373046875, "logps/rejected": -106.13312530517578, "loss": 0.6918, "rewards/accuracies": 0.25, "rewards/chosen": 0.003751564072445035, "rewards/margins": -0.013335609808564186, "rewards/rejected": 0.017087174579501152, "step": 133 }, { "epoch": 0.22, "learning_rate": 1.194295900178253e-07, "logits/chosen": -1.4395631551742554, "logits/rejected": -1.402905821800232, "logps/chosen": -75.32859802246094, "logps/rejected": -78.72920227050781, "loss": 0.6913, "rewards/accuracies": 0.5, "rewards/chosen": 0.001145745161920786, "rewards/margins": 0.00497551029548049, "rewards/rejected": -0.003829765133559704, "step": 134 }, { "epoch": 0.22, "learning_rate": 1.2032085561497325e-07, "logits/chosen": -1.0842761993408203, "logits/rejected": -1.086230993270874, "logps/chosen": -96.27215576171875, "logps/rejected": -92.43170928955078, "loss": 0.6918, "rewards/accuracies": 0.25, "rewards/chosen": -0.004030800424516201, "rewards/margins": -0.02459125779569149, "rewards/rejected": 0.020560454577207565, "step": 135 }, { "epoch": 0.22, "learning_rate": 1.2121212121212122e-07, "logits/chosen": -1.1672528982162476, "logits/rejected": -1.1845693588256836, "logps/chosen": -85.19649505615234, "logps/rejected": -89.12590026855469, "loss": 0.6897, "rewards/accuracies": 0.75, "rewards/chosen": 0.009124755859375, "rewards/margins": 0.01982726901769638, "rewards/rejected": -0.01070251502096653, "step": 136 }, { "epoch": 0.22, "learning_rate": 1.2210338680926916e-07, "logits/chosen": -1.1850414276123047, "logits/rejected": -1.1530171632766724, "logps/chosen": -98.41848754882812, "logps/rejected": -68.85673522949219, "loss": 0.6884, "rewards/accuracies": 0.25, "rewards/chosen": -6.999960169196129e-05, "rewards/margins": 0.005049895960837603, "rewards/rejected": -0.005119895562529564, "step": 137 }, { "epoch": 0.22, "learning_rate": 1.229946524064171e-07, "logits/chosen": -1.3228200674057007, "logits/rejected": -1.2559106349945068, "logps/chosen": -106.5719223022461, "logps/rejected": -97.08259582519531, "loss": 0.6932, "rewards/accuracies": 0.75, "rewards/chosen": -0.003446007613092661, "rewards/margins": 0.008224296383559704, "rewards/rejected": -0.011670304462313652, "step": 138 }, { "epoch": 0.22, "learning_rate": 1.2388591800356505e-07, "logits/chosen": -1.1929320096969604, "logits/rejected": -1.1778711080551147, "logps/chosen": -112.883544921875, "logps/rejected": -110.8670425415039, "loss": 0.6946, "rewards/accuracies": 0.5, "rewards/chosen": 0.00234222412109375, "rewards/margins": 0.002154732123017311, "rewards/rejected": 0.0001874919980764389, "step": 139 }, { "epoch": 0.22, "learning_rate": 1.24777183600713e-07, "logits/chosen": -1.2298780679702759, "logits/rejected": -1.208550214767456, "logps/chosen": -85.53558349609375, "logps/rejected": -96.42930603027344, "loss": 0.696, "rewards/accuracies": 0.25, "rewards/chosen": 0.014610577374696732, "rewards/margins": -0.015222837217152119, "rewards/rejected": 0.029833413660526276, "step": 140 }, { "epoch": 0.23, "learning_rate": 1.2566844919786097e-07, "logits/chosen": -1.1011197566986084, "logits/rejected": -1.103609323501587, "logps/chosen": -103.02680969238281, "logps/rejected": -103.75904846191406, "loss": 0.6966, "rewards/accuracies": 0.5, "rewards/chosen": -0.012478637509047985, "rewards/margins": 0.002953434595838189, "rewards/rejected": -0.015432070940732956, "step": 141 }, { "epoch": 0.23, "learning_rate": 1.265597147950089e-07, "logits/chosen": -1.2788375616073608, "logits/rejected": -1.321542739868164, "logps/chosen": -78.17115020751953, "logps/rejected": -81.06138610839844, "loss": 0.6854, "rewards/accuracies": 0.25, "rewards/chosen": -0.0042650229297578335, "rewards/margins": 0.014875983819365501, "rewards/rejected": -0.019141007214784622, "step": 142 }, { "epoch": 0.23, "learning_rate": 1.2745098039215685e-07, "logits/chosen": -1.1079347133636475, "logits/rejected": -1.0206882953643799, "logps/chosen": -85.5785140991211, "logps/rejected": -84.78573608398438, "loss": 0.6971, "rewards/accuracies": 0.25, "rewards/chosen": -0.04109363630414009, "rewards/margins": -0.0006904592737555504, "rewards/rejected": -0.040403176099061966, "step": 143 }, { "epoch": 0.23, "learning_rate": 1.2834224598930482e-07, "logits/chosen": -1.1014010906219482, "logits/rejected": -1.1406495571136475, "logps/chosen": -60.63249206542969, "logps/rejected": -55.480812072753906, "loss": 0.694, "rewards/accuracies": 0.0, "rewards/chosen": -0.00833749771118164, "rewards/margins": -0.022405626252293587, "rewards/rejected": 0.014068126678466797, "step": 144 }, { "epoch": 0.23, "learning_rate": 1.2923351158645277e-07, "logits/chosen": -1.2830541133880615, "logits/rejected": -1.2623753547668457, "logps/chosen": -80.148681640625, "logps/rejected": -73.31366729736328, "loss": 0.6918, "rewards/accuracies": 0.25, "rewards/chosen": 0.0032028192654252052, "rewards/margins": 0.0021616918966174126, "rewards/rejected": 0.0010411262046545744, "step": 145 }, { "epoch": 0.23, "learning_rate": 1.301247771836007e-07, "logits/chosen": -1.1449625492095947, "logits/rejected": -1.1683745384216309, "logps/chosen": -60.46405029296875, "logps/rejected": -72.65692138671875, "loss": 0.6816, "rewards/accuracies": 0.75, "rewards/chosen": 0.03136143833398819, "rewards/margins": 0.03643498569726944, "rewards/rejected": -0.00507354736328125, "step": 146 }, { "epoch": 0.24, "learning_rate": 1.3101604278074866e-07, "logits/chosen": -1.2042714357376099, "logits/rejected": -1.1847949028015137, "logps/chosen": -74.31504821777344, "logps/rejected": -75.55863952636719, "loss": 0.6895, "rewards/accuracies": 0.0, "rewards/chosen": -0.02252321131527424, "rewards/margins": -0.02265758439898491, "rewards/rejected": 0.00013437285088002682, "step": 147 }, { "epoch": 0.24, "learning_rate": 1.319073083778966e-07, "logits/chosen": -1.0576329231262207, "logits/rejected": -1.2008721828460693, "logps/chosen": -65.27671813964844, "logps/rejected": -92.62938690185547, "loss": 0.6899, "rewards/accuracies": 0.5, "rewards/chosen": -0.021604442968964577, "rewards/margins": -0.01308126375079155, "rewards/rejected": -0.008523178286850452, "step": 148 }, { "epoch": 0.24, "learning_rate": 1.3279857397504457e-07, "logits/chosen": -1.107293725013733, "logits/rejected": -1.2227246761322021, "logps/chosen": -92.2590560913086, "logps/rejected": -87.06993103027344, "loss": 0.6874, "rewards/accuracies": 0.25, "rewards/chosen": 0.006343650631606579, "rewards/margins": -0.0033267969265580177, "rewards/rejected": 0.009670448489487171, "step": 149 }, { "epoch": 0.24, "learning_rate": 1.3368983957219251e-07, "logits/chosen": -1.2669398784637451, "logits/rejected": -1.3035881519317627, "logps/chosen": -75.93667602539062, "logps/rejected": -74.04835510253906, "loss": 0.686, "rewards/accuracies": 0.5, "rewards/chosen": -0.003344536293298006, "rewards/margins": -0.011771298944950104, "rewards/rejected": 0.008426761254668236, "step": 150 }, { "epoch": 0.24, "learning_rate": 1.3458110516934046e-07, "logits/chosen": -1.2319573163986206, "logits/rejected": -1.248245358467102, "logps/chosen": -81.9886474609375, "logps/rejected": -84.21199035644531, "loss": 0.6851, "rewards/accuracies": 0.5, "rewards/chosen": -0.005156421568244696, "rewards/margins": 0.008423137478530407, "rewards/rejected": -0.013579560443758965, "step": 151 }, { "epoch": 0.24, "learning_rate": 1.354723707664884e-07, "logits/chosen": -1.2663254737854004, "logits/rejected": -1.3274998664855957, "logps/chosen": -53.74374008178711, "logps/rejected": -69.90965270996094, "loss": 0.6895, "rewards/accuracies": 0.25, "rewards/chosen": 0.01172170601785183, "rewards/margins": -0.01493387296795845, "rewards/rejected": 0.02665557898581028, "step": 152 }, { "epoch": 0.25, "learning_rate": 1.3636363636363635e-07, "logits/chosen": -1.1918727159500122, "logits/rejected": -1.2879536151885986, "logps/chosen": -68.19717407226562, "logps/rejected": -91.91706848144531, "loss": 0.6936, "rewards/accuracies": 0.25, "rewards/chosen": 0.011574840173125267, "rewards/margins": -0.014360904693603516, "rewards/rejected": 0.025935746729373932, "step": 153 }, { "epoch": 0.25, "learning_rate": 1.3725490196078432e-07, "logits/chosen": -1.110097050666809, "logits/rejected": -1.2036888599395752, "logps/chosen": -92.6609878540039, "logps/rejected": -96.52682495117188, "loss": 0.6883, "rewards/accuracies": 0.5, "rewards/chosen": 0.005514146760106087, "rewards/margins": -0.006318665109574795, "rewards/rejected": 0.011832811869680882, "step": 154 }, { "epoch": 0.25, "learning_rate": 1.3814616755793226e-07, "logits/chosen": -1.285487413406372, "logits/rejected": -1.2902686595916748, "logps/chosen": -72.99339294433594, "logps/rejected": -64.3037109375, "loss": 0.6868, "rewards/accuracies": 1.0, "rewards/chosen": 0.04228544235229492, "rewards/margins": 0.027436494827270508, "rewards/rejected": 0.014848947525024414, "step": 155 }, { "epoch": 0.25, "learning_rate": 1.390374331550802e-07, "logits/chosen": -1.1897838115692139, "logits/rejected": -1.25021493434906, "logps/chosen": -77.29078674316406, "logps/rejected": -78.77659606933594, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": 0.005116081330925226, "rewards/margins": -0.005609511863440275, "rewards/rejected": 0.010725593194365501, "step": 156 }, { "epoch": 0.25, "learning_rate": 1.3992869875222815e-07, "logits/chosen": -1.2299305200576782, "logits/rejected": -1.2583080530166626, "logps/chosen": -74.7567138671875, "logps/rejected": -77.03506469726562, "loss": 0.6913, "rewards/accuracies": 0.75, "rewards/chosen": 0.0184447281062603, "rewards/margins": 0.006376456934958696, "rewards/rejected": 0.012068272568285465, "step": 157 }, { "epoch": 0.25, "learning_rate": 1.408199643493761e-07, "logits/chosen": -1.4284183979034424, "logits/rejected": -1.4431934356689453, "logps/chosen": -73.93280029296875, "logps/rejected": -68.55723571777344, "loss": 0.6844, "rewards/accuracies": 0.75, "rewards/chosen": 0.0240675937384367, "rewards/margins": 0.00622797105461359, "rewards/rejected": 0.017839621752500534, "step": 158 }, { "epoch": 0.26, "learning_rate": 1.4171122994652406e-07, "logits/chosen": -0.9991000890731812, "logits/rejected": -1.0306744575500488, "logps/chosen": -80.68217468261719, "logps/rejected": -76.83024597167969, "loss": 0.6871, "rewards/accuracies": 0.75, "rewards/chosen": -0.0027531625237315893, "rewards/margins": 0.04543151706457138, "rewards/rejected": -0.04818468168377876, "step": 159 }, { "epoch": 0.26, "learning_rate": 1.42602495543672e-07, "logits/chosen": -1.0936920642852783, "logits/rejected": -1.1092602014541626, "logps/chosen": -82.60357666015625, "logps/rejected": -75.55970001220703, "loss": 0.6886, "rewards/accuracies": 0.5, "rewards/chosen": 0.010325146839022636, "rewards/margins": 0.004160404670983553, "rewards/rejected": 0.006164741702377796, "step": 160 }, { "epoch": 0.26, "learning_rate": 1.4349376114081995e-07, "logits/chosen": -1.2158175706863403, "logits/rejected": -1.2169979810714722, "logps/chosen": -128.269287109375, "logps/rejected": -90.16124725341797, "loss": 0.6914, "rewards/accuracies": 0.75, "rewards/chosen": 0.0017871854361146688, "rewards/margins": -0.004718400072306395, "rewards/rejected": 0.00650558527559042, "step": 161 }, { "epoch": 0.26, "learning_rate": 1.443850267379679e-07, "logits/chosen": -1.1846238374710083, "logits/rejected": -1.2020435333251953, "logps/chosen": -80.48837280273438, "logps/rejected": -78.94100952148438, "loss": 0.6914, "rewards/accuracies": 0.25, "rewards/chosen": -0.009000301361083984, "rewards/margins": -0.024142839014530182, "rewards/rejected": 0.015142536722123623, "step": 162 }, { "epoch": 0.26, "learning_rate": 1.4527629233511584e-07, "logits/chosen": -1.2966161966323853, "logits/rejected": -1.3049771785736084, "logps/chosen": -104.6605224609375, "logps/rejected": -100.4988784790039, "loss": 0.6812, "rewards/accuracies": 1.0, "rewards/chosen": 0.035912513732910156, "rewards/margins": 0.05647735670208931, "rewards/rejected": -0.020564841106534004, "step": 163 }, { "epoch": 0.26, "learning_rate": 1.461675579322638e-07, "logits/chosen": -1.2582217454910278, "logits/rejected": -1.2453134059906006, "logps/chosen": -78.0390396118164, "logps/rejected": -84.03144836425781, "loss": 0.6912, "rewards/accuracies": 0.5, "rewards/chosen": 0.010477638803422451, "rewards/margins": -0.0006960853934288025, "rewards/rejected": 0.011173725128173828, "step": 164 }, { "epoch": 0.26, "learning_rate": 1.4705882352941175e-07, "logits/chosen": -1.3116538524627686, "logits/rejected": -1.368645191192627, "logps/chosen": -90.49311828613281, "logps/rejected": -96.47917938232422, "loss": 0.6953, "rewards/accuracies": 0.5, "rewards/chosen": 0.014243889600038528, "rewards/margins": -0.001439286395907402, "rewards/rejected": 0.01568317413330078, "step": 165 }, { "epoch": 0.27, "learning_rate": 1.479500891265597e-07, "logits/chosen": -1.1102708578109741, "logits/rejected": -1.0590384006500244, "logps/chosen": -95.77940368652344, "logps/rejected": -96.39000701904297, "loss": 0.6821, "rewards/accuracies": 0.75, "rewards/chosen": 0.02114238776266575, "rewards/margins": 0.014655305072665215, "rewards/rejected": 0.006487083155661821, "step": 166 }, { "epoch": 0.27, "learning_rate": 1.4884135472370764e-07, "logits/chosen": -1.2088526487350464, "logits/rejected": -1.257607340812683, "logps/chosen": -129.15087890625, "logps/rejected": -122.53927612304688, "loss": 0.6898, "rewards/accuracies": 0.5, "rewards/chosen": 0.029584500938653946, "rewards/margins": 0.0133909210562706, "rewards/rejected": 0.016193581745028496, "step": 167 }, { "epoch": 0.27, "learning_rate": 1.4973262032085558e-07, "logits/chosen": -1.1217880249023438, "logits/rejected": -1.1830638647079468, "logps/chosen": -82.94430541992188, "logps/rejected": -77.02546691894531, "loss": 0.6972, "rewards/accuracies": 0.5, "rewards/chosen": 0.018335916101932526, "rewards/margins": 0.024320602416992188, "rewards/rejected": -0.005984686780720949, "step": 168 }, { "epoch": 0.27, "learning_rate": 1.5062388591800358e-07, "logits/chosen": -1.0542296171188354, "logits/rejected": -1.0406967401504517, "logps/chosen": -96.65447998046875, "logps/rejected": -117.56917572021484, "loss": 0.6973, "rewards/accuracies": 0.5, "rewards/chosen": 0.012562274001538754, "rewards/margins": 0.0050829872488975525, "rewards/rejected": 0.007479285821318626, "step": 169 }, { "epoch": 0.27, "learning_rate": 1.5151515151515152e-07, "logits/chosen": -1.0995701551437378, "logits/rejected": -1.186861276626587, "logps/chosen": -92.70673370361328, "logps/rejected": -95.64176940917969, "loss": 0.6934, "rewards/accuracies": 0.25, "rewards/chosen": -0.013466835021972656, "rewards/margins": -0.0192292220890522, "rewards/rejected": 0.0057623861357569695, "step": 170 }, { "epoch": 0.27, "learning_rate": 1.5240641711229947e-07, "logits/chosen": -1.1159003973007202, "logits/rejected": -1.2103300094604492, "logps/chosen": -81.66310119628906, "logps/rejected": -78.21598815917969, "loss": 0.7001, "rewards/accuracies": 0.5, "rewards/chosen": 0.022819900885224342, "rewards/margins": 0.0027945516631007195, "rewards/rejected": 0.020025350153446198, "step": 171 }, { "epoch": 0.28, "learning_rate": 1.532976827094474e-07, "logits/chosen": -1.056949496269226, "logits/rejected": -1.0529078245162964, "logps/chosen": -90.16043853759766, "logps/rejected": -96.05148315429688, "loss": 0.6974, "rewards/accuracies": 0.5, "rewards/chosen": 0.015632057562470436, "rewards/margins": -0.0013263700529932976, "rewards/rejected": 0.01695842668414116, "step": 172 }, { "epoch": 0.28, "learning_rate": 1.5418894830659536e-07, "logits/chosen": -1.3446035385131836, "logits/rejected": -1.3823251724243164, "logps/chosen": -87.43083190917969, "logps/rejected": -95.5851821899414, "loss": 0.6845, "rewards/accuracies": 1.0, "rewards/chosen": 0.04805288463830948, "rewards/margins": 0.046666622161865234, "rewards/rejected": 0.0013862615451216698, "step": 173 }, { "epoch": 0.28, "learning_rate": 1.5508021390374333e-07, "logits/chosen": -1.210251808166504, "logits/rejected": -1.1726438999176025, "logps/chosen": -55.933528900146484, "logps/rejected": -68.61833190917969, "loss": 0.7022, "rewards/accuracies": 0.25, "rewards/chosen": -0.0025814054533839226, "rewards/margins": -0.005895042791962624, "rewards/rejected": 0.0033136368729174137, "step": 174 }, { "epoch": 0.28, "learning_rate": 1.5597147950089127e-07, "logits/chosen": -1.4685124158859253, "logits/rejected": -1.3827660083770752, "logps/chosen": -85.58157348632812, "logps/rejected": -69.87470245361328, "loss": 0.6856, "rewards/accuracies": 0.5, "rewards/chosen": 0.009827231988310814, "rewards/margins": 0.012124920263886452, "rewards/rejected": -0.002297687344253063, "step": 175 }, { "epoch": 0.28, "learning_rate": 1.5686274509803921e-07, "logits/chosen": -1.3376049995422363, "logits/rejected": -1.3044078350067139, "logps/chosen": -86.311767578125, "logps/rejected": -75.01780700683594, "loss": 0.6901, "rewards/accuracies": 0.75, "rewards/chosen": 0.02645111083984375, "rewards/margins": 0.022428512573242188, "rewards/rejected": 0.00402259873226285, "step": 176 }, { "epoch": 0.28, "learning_rate": 1.5775401069518716e-07, "logits/chosen": -1.31523859500885, "logits/rejected": -1.2526075839996338, "logps/chosen": -91.53955078125, "logps/rejected": -114.09359741210938, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": 0.034424781799316406, "rewards/margins": 0.0620538704097271, "rewards/rejected": -0.02762908861041069, "step": 177 }, { "epoch": 0.29, "learning_rate": 1.586452762923351e-07, "logits/chosen": -1.0876498222351074, "logits/rejected": -1.1826117038726807, "logps/chosen": -82.99105072021484, "logps/rejected": -74.619873046875, "loss": 0.6859, "rewards/accuracies": 0.25, "rewards/chosen": -0.004779815673828125, "rewards/margins": -0.030318640172481537, "rewards/rejected": 0.02553882636129856, "step": 178 }, { "epoch": 0.29, "learning_rate": 1.5953654188948307e-07, "logits/chosen": -1.4762051105499268, "logits/rejected": -1.4248936176300049, "logps/chosen": -110.35680389404297, "logps/rejected": -94.32610321044922, "loss": 0.6858, "rewards/accuracies": 0.5, "rewards/chosen": -0.02806110493838787, "rewards/margins": -0.005148697644472122, "rewards/rejected": -0.0229124054312706, "step": 179 }, { "epoch": 0.29, "learning_rate": 1.6042780748663102e-07, "logits/chosen": -1.441422462463379, "logits/rejected": -1.3664965629577637, "logps/chosen": -94.42696380615234, "logps/rejected": -73.93683624267578, "loss": 0.6817, "rewards/accuracies": 0.75, "rewards/chosen": 0.022891998291015625, "rewards/margins": 0.012346649542450905, "rewards/rejected": 0.01054534874856472, "step": 180 }, { "epoch": 0.29, "learning_rate": 1.6131907308377896e-07, "logits/chosen": -0.9797192215919495, "logits/rejected": -1.0084091424942017, "logps/chosen": -76.98902893066406, "logps/rejected": -98.45145416259766, "loss": 0.688, "rewards/accuracies": 0.75, "rewards/chosen": 0.01589355431497097, "rewards/margins": 0.02487964741885662, "rewards/rejected": -0.008986091241240501, "step": 181 }, { "epoch": 0.29, "learning_rate": 1.622103386809269e-07, "logits/chosen": -1.1448172330856323, "logits/rejected": -1.2438055276870728, "logps/chosen": -64.28092956542969, "logps/rejected": -88.49763488769531, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": 0.002738380804657936, "rewards/margins": 0.038314249366521835, "rewards/rejected": -0.03557587042450905, "step": 182 }, { "epoch": 0.29, "learning_rate": 1.6310160427807487e-07, "logits/chosen": -1.2652349472045898, "logits/rejected": -1.2993640899658203, "logps/chosen": -72.77031707763672, "logps/rejected": -72.89250183105469, "loss": 0.6984, "rewards/accuracies": 0.25, "rewards/chosen": 0.023846054449677467, "rewards/margins": -0.019249439239501953, "rewards/rejected": 0.04309549555182457, "step": 183 }, { "epoch": 0.3, "learning_rate": 1.6399286987522282e-07, "logits/chosen": -1.1174217462539673, "logits/rejected": -1.125874400138855, "logps/chosen": -73.41419982910156, "logps/rejected": -87.02188110351562, "loss": 0.703, "rewards/accuracies": 0.25, "rewards/chosen": -0.002849388401955366, "rewards/margins": -0.02789783477783203, "rewards/rejected": 0.025048445910215378, "step": 184 }, { "epoch": 0.3, "learning_rate": 1.6488413547237076e-07, "logits/chosen": -1.5134884119033813, "logits/rejected": -1.450934648513794, "logps/chosen": -79.44047546386719, "logps/rejected": -65.8511962890625, "loss": 0.6909, "rewards/accuracies": 0.5, "rewards/chosen": 0.013629722408950329, "rewards/margins": 0.005207587033510208, "rewards/rejected": 0.008422136306762695, "step": 185 }, { "epoch": 0.3, "learning_rate": 1.657754010695187e-07, "logits/chosen": -1.0555158853530884, "logits/rejected": -1.0353024005889893, "logps/chosen": -94.71896362304688, "logps/rejected": -86.52894592285156, "loss": 0.6861, "rewards/accuracies": 0.5, "rewards/chosen": -0.019899560138583183, "rewards/margins": -0.010300257243216038, "rewards/rejected": -0.009599304758012295, "step": 186 }, { "epoch": 0.3, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -1.2581530809402466, "logits/rejected": -1.3141635656356812, "logps/chosen": -112.1159439086914, "logps/rejected": -97.25033569335938, "loss": 0.6977, "rewards/accuracies": 0.5, "rewards/chosen": 0.034465789794921875, "rewards/margins": 0.04063625633716583, "rewards/rejected": -0.006170463282614946, "step": 187 }, { "epoch": 0.3, "learning_rate": 1.6755793226381462e-07, "logits/chosen": -1.2355010509490967, "logits/rejected": -1.249297857284546, "logps/chosen": -91.64498901367188, "logps/rejected": -89.41168212890625, "loss": 0.6849, "rewards/accuracies": 0.5, "rewards/chosen": 0.0010955813340842724, "rewards/margins": 0.0016227737069129944, "rewards/rejected": -0.0005271914415061474, "step": 188 }, { "epoch": 0.3, "learning_rate": 1.6844919786096256e-07, "logits/chosen": -1.3012874126434326, "logits/rejected": -1.3630125522613525, "logps/chosen": -68.51990509033203, "logps/rejected": -76.87109375, "loss": 0.6877, "rewards/accuracies": 0.5, "rewards/chosen": 0.01358041726052761, "rewards/margins": 0.016314316540956497, "rewards/rejected": -0.002733898116275668, "step": 189 }, { "epoch": 0.3, "learning_rate": 1.693404634581105e-07, "logits/chosen": -1.170872449874878, "logits/rejected": -1.1361101865768433, "logps/chosen": -70.6655044555664, "logps/rejected": -56.43096160888672, "loss": 0.6799, "rewards/accuracies": 0.5, "rewards/chosen": 0.012186527252197266, "rewards/margins": 0.007533167488873005, "rewards/rejected": 0.004653359763324261, "step": 190 }, { "epoch": 0.31, "learning_rate": 1.7023172905525845e-07, "logits/chosen": -1.0795990228652954, "logits/rejected": -1.078825831413269, "logps/chosen": -101.79287719726562, "logps/rejected": -88.37467956542969, "loss": 0.6974, "rewards/accuracies": 0.5, "rewards/chosen": -0.008512306958436966, "rewards/margins": -0.01216125674545765, "rewards/rejected": 0.0036489497870206833, "step": 191 }, { "epoch": 0.31, "learning_rate": 1.711229946524064e-07, "logits/chosen": -1.2868682146072388, "logits/rejected": -1.2432432174682617, "logps/chosen": -91.89615631103516, "logps/rejected": -83.5869369506836, "loss": 0.6833, "rewards/accuracies": 0.75, "rewards/chosen": 0.007503128610551357, "rewards/margins": 0.020963573828339577, "rewards/rejected": -0.013460446149110794, "step": 192 }, { "epoch": 0.31, "learning_rate": 1.7201426024955437e-07, "logits/chosen": -1.264981985092163, "logits/rejected": -1.2010303735733032, "logps/chosen": -95.26475524902344, "logps/rejected": -82.73414611816406, "loss": 0.6908, "rewards/accuracies": 0.5, "rewards/chosen": 0.0174986831843853, "rewards/margins": -0.0011507999151945114, "rewards/rejected": 0.01864948309957981, "step": 193 }, { "epoch": 0.31, "learning_rate": 1.729055258467023e-07, "logits/chosen": -1.2113144397735596, "logits/rejected": -0.978897213935852, "logps/chosen": -68.5088119506836, "logps/rejected": -91.72132873535156, "loss": 0.6766, "rewards/accuracies": 0.75, "rewards/chosen": 0.013660717755556107, "rewards/margins": 0.027810953557491302, "rewards/rejected": -0.014150239527225494, "step": 194 }, { "epoch": 0.31, "learning_rate": 1.7379679144385025e-07, "logits/chosen": -1.3174318075180054, "logits/rejected": -1.4233349561691284, "logps/chosen": -68.59219360351562, "logps/rejected": -74.4993667602539, "loss": 0.6817, "rewards/accuracies": 0.5, "rewards/chosen": 0.01985139772295952, "rewards/margins": -0.0039217001758515835, "rewards/rejected": 0.02377310022711754, "step": 195 }, { "epoch": 0.31, "learning_rate": 1.746880570409982e-07, "logits/chosen": -1.145938754081726, "logits/rejected": -1.253713607788086, "logps/chosen": -77.18778228759766, "logps/rejected": -89.86077117919922, "loss": 0.6892, "rewards/accuracies": 0.75, "rewards/chosen": -0.007975578308105469, "rewards/margins": -0.0084991455078125, "rewards/rejected": 0.0005235671997070312, "step": 196 }, { "epoch": 0.32, "learning_rate": 1.7557932263814614e-07, "logits/chosen": -1.2652584314346313, "logits/rejected": -1.2274739742279053, "logps/chosen": -99.92034912109375, "logps/rejected": -104.35964965820312, "loss": 0.6809, "rewards/accuracies": 0.75, "rewards/chosen": 0.03284492343664169, "rewards/margins": 0.017350386828184128, "rewards/rejected": 0.015494538471102715, "step": 197 }, { "epoch": 0.32, "learning_rate": 1.764705882352941e-07, "logits/chosen": -1.2105274200439453, "logits/rejected": -1.117488980293274, "logps/chosen": -85.86508178710938, "logps/rejected": -71.0420150756836, "loss": 0.688, "rewards/accuracies": 1.0, "rewards/chosen": 0.05680304020643234, "rewards/margins": 0.10031509399414062, "rewards/rejected": -0.04351206123828888, "step": 198 }, { "epoch": 0.32, "learning_rate": 1.7736185383244206e-07, "logits/chosen": -1.2283986806869507, "logits/rejected": -1.256447672843933, "logps/chosen": -71.98006439208984, "logps/rejected": -80.81570434570312, "loss": 0.6966, "rewards/accuracies": 0.75, "rewards/chosen": -0.008759688585996628, "rewards/margins": -0.031231118366122246, "rewards/rejected": 0.02247142791748047, "step": 199 }, { "epoch": 0.32, "learning_rate": 1.7825311942959e-07, "logits/chosen": -1.295608401298523, "logits/rejected": -1.3207356929779053, "logps/chosen": -81.70811462402344, "logps/rejected": -84.58729553222656, "loss": 0.699, "rewards/accuracies": 0.5, "rewards/chosen": 0.0452427864074707, "rewards/margins": 0.024469852447509766, "rewards/rejected": 0.020772933959960938, "step": 200 }, { "epoch": 0.32, "learning_rate": 1.7914438502673794e-07, "logits/chosen": -1.2896004915237427, "logits/rejected": -1.2928847074508667, "logps/chosen": -67.5517578125, "logps/rejected": -78.02079010009766, "loss": 0.689, "rewards/accuracies": 0.75, "rewards/chosen": 0.048406124114990234, "rewards/margins": 0.07088927924633026, "rewards/rejected": -0.022483156993985176, "step": 201 }, { "epoch": 0.32, "learning_rate": 1.800356506238859e-07, "logits/chosen": -1.0671335458755493, "logits/rejected": -1.0690761804580688, "logps/chosen": -97.25962829589844, "logps/rejected": -94.14231872558594, "loss": 0.6864, "rewards/accuracies": 0.75, "rewards/chosen": 0.0874597579240799, "rewards/margins": 0.06820917129516602, "rewards/rejected": 0.01925058290362358, "step": 202 }, { "epoch": 0.33, "learning_rate": 1.8092691622103386e-07, "logits/chosen": -1.3357504606246948, "logits/rejected": -1.4083187580108643, "logps/chosen": -69.22675323486328, "logps/rejected": -78.70281219482422, "loss": 0.6955, "rewards/accuracies": 0.5, "rewards/chosen": -0.019612502306699753, "rewards/margins": -0.02757863886654377, "rewards/rejected": 0.007966136559844017, "step": 203 }, { "epoch": 0.33, "learning_rate": 1.818181818181818e-07, "logits/chosen": -1.154530644416809, "logits/rejected": -1.1778064966201782, "logps/chosen": -87.59091186523438, "logps/rejected": -74.59123992919922, "loss": 0.6681, "rewards/accuracies": 0.5, "rewards/chosen": -0.014525031670928001, "rewards/margins": -0.011475468054413795, "rewards/rejected": -0.003049564315006137, "step": 204 }, { "epoch": 0.33, "learning_rate": 1.8270944741532975e-07, "logits/chosen": -1.2200852632522583, "logits/rejected": -1.2569987773895264, "logps/chosen": -96.88046264648438, "logps/rejected": -106.29316711425781, "loss": 0.6812, "rewards/accuracies": 1.0, "rewards/chosen": 0.07605037838220596, "rewards/margins": 0.0842885971069336, "rewards/rejected": -0.00823822058737278, "step": 205 }, { "epoch": 0.33, "learning_rate": 1.836007130124777e-07, "logits/chosen": -1.1513392925262451, "logits/rejected": -1.0660691261291504, "logps/chosen": -80.135009765625, "logps/rejected": -76.30355072021484, "loss": 0.686, "rewards/accuracies": 0.75, "rewards/chosen": 0.055201053619384766, "rewards/margins": 0.07844934612512589, "rewards/rejected": -0.02324829250574112, "step": 206 }, { "epoch": 0.33, "learning_rate": 1.8449197860962566e-07, "logits/chosen": -1.354569435119629, "logits/rejected": -1.2773513793945312, "logps/chosen": -95.84893798828125, "logps/rejected": -72.3054428100586, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": 0.03253040462732315, "rewards/margins": 0.03406982496380806, "rewards/rejected": -0.0015394208021461964, "step": 207 }, { "epoch": 0.33, "learning_rate": 1.8538324420677363e-07, "logits/chosen": -1.1984279155731201, "logits/rejected": -1.2835437059402466, "logps/chosen": -111.05964660644531, "logps/rejected": -89.39425659179688, "loss": 0.6852, "rewards/accuracies": 0.25, "rewards/chosen": 0.008302688598632812, "rewards/margins": -0.014565277844667435, "rewards/rejected": 0.022867966443300247, "step": 208 }, { "epoch": 0.34, "learning_rate": 1.8627450980392158e-07, "logits/chosen": -1.1748929023742676, "logits/rejected": -1.1685056686401367, "logps/chosen": -72.09039306640625, "logps/rejected": -86.42749786376953, "loss": 0.6857, "rewards/accuracies": 0.25, "rewards/chosen": 0.03194408491253853, "rewards/margins": -0.02014312893152237, "rewards/rejected": 0.0520872101187706, "step": 209 }, { "epoch": 0.34, "learning_rate": 1.8716577540106952e-07, "logits/chosen": -1.343977689743042, "logits/rejected": -1.3213236331939697, "logps/chosen": -88.28121948242188, "logps/rejected": -115.24087524414062, "loss": 0.7002, "rewards/accuracies": 0.75, "rewards/chosen": 0.05695114657282829, "rewards/margins": 0.029232025146484375, "rewards/rejected": 0.02771911583840847, "step": 210 }, { "epoch": 0.34, "learning_rate": 1.8805704099821746e-07, "logits/chosen": -1.2223469018936157, "logits/rejected": -1.1940040588378906, "logps/chosen": -83.94541931152344, "logps/rejected": -78.602783203125, "loss": 0.6647, "rewards/accuracies": 0.75, "rewards/chosen": 0.09268398582935333, "rewards/margins": 0.11456432938575745, "rewards/rejected": -0.021880339831113815, "step": 211 }, { "epoch": 0.34, "learning_rate": 1.889483065953654e-07, "logits/chosen": -1.275195837020874, "logits/rejected": -1.2824195623397827, "logps/chosen": -84.36309814453125, "logps/rejected": -61.45875930786133, "loss": 0.6943, "rewards/accuracies": 0.75, "rewards/chosen": -0.0012431144714355469, "rewards/margins": -0.0024353028275072575, "rewards/rejected": 0.0011921883560717106, "step": 212 }, { "epoch": 0.34, "learning_rate": 1.8983957219251338e-07, "logits/chosen": -1.1856679916381836, "logits/rejected": -1.1402510404586792, "logps/chosen": -95.24775695800781, "logps/rejected": -124.43846130371094, "loss": 0.6958, "rewards/accuracies": 0.5, "rewards/chosen": 0.04189911112189293, "rewards/margins": 0.006594468839466572, "rewards/rejected": 0.03530464321374893, "step": 213 }, { "epoch": 0.34, "learning_rate": 1.9073083778966132e-07, "logits/chosen": -1.3067560195922852, "logits/rejected": -1.2862671613693237, "logps/chosen": -66.29334259033203, "logps/rejected": -68.76512145996094, "loss": 0.6792, "rewards/accuracies": 1.0, "rewards/chosen": 0.03160810470581055, "rewards/margins": 0.04609499126672745, "rewards/rejected": -0.014486884698271751, "step": 214 }, { "epoch": 0.35, "learning_rate": 1.9162210338680927e-07, "logits/chosen": -1.1827154159545898, "logits/rejected": -1.270613431930542, "logps/chosen": -84.06794738769531, "logps/rejected": -67.85546875, "loss": 0.685, "rewards/accuracies": 0.5, "rewards/chosen": 0.03467101976275444, "rewards/margins": 0.02241525799036026, "rewards/rejected": 0.01225576363503933, "step": 215 }, { "epoch": 0.35, "learning_rate": 1.925133689839572e-07, "logits/chosen": -1.2252124547958374, "logits/rejected": -1.2743034362792969, "logps/chosen": -62.56760025024414, "logps/rejected": -104.43107604980469, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": 0.021578025072813034, "rewards/margins": 0.042513083666563034, "rewards/rejected": -0.02093506045639515, "step": 216 }, { "epoch": 0.35, "learning_rate": 1.9340463458110515e-07, "logits/chosen": -1.067476511001587, "logits/rejected": -1.0687108039855957, "logps/chosen": -76.008544921875, "logps/rejected": -88.04805755615234, "loss": 0.6923, "rewards/accuracies": 0.5, "rewards/chosen": -0.002370451809838414, "rewards/margins": -0.018574140965938568, "rewards/rejected": 0.016203688457608223, "step": 217 }, { "epoch": 0.35, "learning_rate": 1.9429590017825312e-07, "logits/chosen": -1.2216835021972656, "logits/rejected": -1.1859307289123535, "logps/chosen": -89.28216552734375, "logps/rejected": -87.74323272705078, "loss": 0.6903, "rewards/accuracies": 0.5, "rewards/chosen": 0.027384568005800247, "rewards/margins": 0.018771935254335403, "rewards/rejected": 0.008612633682787418, "step": 218 }, { "epoch": 0.35, "learning_rate": 1.9518716577540107e-07, "logits/chosen": -1.101264476776123, "logits/rejected": -1.1525955200195312, "logps/chosen": -82.58341979980469, "logps/rejected": -48.99835968017578, "loss": 0.6804, "rewards/accuracies": 0.5, "rewards/chosen": 0.015311813913285732, "rewards/margins": -0.005624962039291859, "rewards/rejected": 0.02093677595257759, "step": 219 }, { "epoch": 0.35, "learning_rate": 1.96078431372549e-07, "logits/chosen": -1.2214932441711426, "logits/rejected": -1.1190643310546875, "logps/chosen": -94.95525360107422, "logps/rejected": -108.86164855957031, "loss": 0.6786, "rewards/accuracies": 0.5, "rewards/chosen": 0.09720917046070099, "rewards/margins": 0.09963169693946838, "rewards/rejected": -0.0024225234519690275, "step": 220 }, { "epoch": 0.35, "learning_rate": 1.9696969696969696e-07, "logits/chosen": -1.1900205612182617, "logits/rejected": -1.2499765157699585, "logps/chosen": -76.85565185546875, "logps/rejected": -83.38502502441406, "loss": 0.6805, "rewards/accuracies": 1.0, "rewards/chosen": 0.06594543159008026, "rewards/margins": 0.13332080841064453, "rewards/rejected": -0.06737537682056427, "step": 221 }, { "epoch": 0.36, "learning_rate": 1.9786096256684493e-07, "logits/chosen": -1.19120192527771, "logits/rejected": -1.2155377864837646, "logps/chosen": -102.14087677001953, "logps/rejected": -80.68405151367188, "loss": 0.6878, "rewards/accuracies": 0.75, "rewards/chosen": 0.02893543429672718, "rewards/margins": 0.052164457738399506, "rewards/rejected": -0.023229029029607773, "step": 222 }, { "epoch": 0.36, "learning_rate": 1.9875222816399287e-07, "logits/chosen": -1.2805598974227905, "logits/rejected": -1.2869868278503418, "logps/chosen": -75.24971771240234, "logps/rejected": -82.32807922363281, "loss": 0.6993, "rewards/accuracies": 0.5, "rewards/chosen": 0.06184406578540802, "rewards/margins": 0.03207845985889435, "rewards/rejected": 0.029765605926513672, "step": 223 }, { "epoch": 0.36, "learning_rate": 1.9964349376114081e-07, "logits/chosen": -1.3543906211853027, "logits/rejected": -1.321485996246338, "logps/chosen": -89.03424072265625, "logps/rejected": -89.04637908935547, "loss": 0.6926, "rewards/accuracies": 1.0, "rewards/chosen": 0.10586948692798615, "rewards/margins": 0.07139110565185547, "rewards/rejected": 0.03447837755084038, "step": 224 }, { "epoch": 0.36, "learning_rate": 2.0053475935828876e-07, "logits/chosen": -1.291060209274292, "logits/rejected": -1.2525196075439453, "logps/chosen": -109.12630462646484, "logps/rejected": -113.05500030517578, "loss": 0.6905, "rewards/accuracies": 0.25, "rewards/chosen": -0.03588543087244034, "rewards/margins": -0.028558731079101562, "rewards/rejected": -0.007326698396354914, "step": 225 }, { "epoch": 0.36, "learning_rate": 2.014260249554367e-07, "logits/chosen": -1.2375459671020508, "logits/rejected": -1.1319429874420166, "logps/chosen": -87.06940460205078, "logps/rejected": -81.87649536132812, "loss": 0.6879, "rewards/accuracies": 1.0, "rewards/chosen": 0.06458874046802521, "rewards/margins": 0.07568836212158203, "rewards/rejected": -0.011099625378847122, "step": 226 }, { "epoch": 0.36, "learning_rate": 2.0231729055258467e-07, "logits/chosen": -1.2574338912963867, "logits/rejected": -1.3508352041244507, "logps/chosen": -51.461002349853516, "logps/rejected": -52.147308349609375, "loss": 0.689, "rewards/accuracies": 0.75, "rewards/chosen": 0.017365265637636185, "rewards/margins": 0.016486644744873047, "rewards/rejected": 0.0008786208927631378, "step": 227 }, { "epoch": 0.37, "learning_rate": 2.0320855614973262e-07, "logits/chosen": -1.225588321685791, "logits/rejected": -1.278594732284546, "logps/chosen": -76.51856994628906, "logps/rejected": -84.6484375, "loss": 0.6834, "rewards/accuracies": 0.5, "rewards/chosen": -0.011736584827303886, "rewards/margins": -0.013005736283957958, "rewards/rejected": 0.001269150641746819, "step": 228 }, { "epoch": 0.37, "learning_rate": 2.0409982174688056e-07, "logits/chosen": -1.2359657287597656, "logits/rejected": -1.1987488269805908, "logps/chosen": -97.26112365722656, "logps/rejected": -100.06990051269531, "loss": 0.6755, "rewards/accuracies": 0.75, "rewards/chosen": 0.0042654043063521385, "rewards/margins": 0.015516474843025208, "rewards/rejected": -0.011251069605350494, "step": 229 }, { "epoch": 0.37, "learning_rate": 2.049910873440285e-07, "logits/chosen": -1.1559699773788452, "logits/rejected": -1.2827870845794678, "logps/chosen": -89.52205657958984, "logps/rejected": -93.19667053222656, "loss": 0.6874, "rewards/accuracies": 0.75, "rewards/chosen": 0.04632082208991051, "rewards/margins": 0.03583383560180664, "rewards/rejected": 0.010486985556781292, "step": 230 }, { "epoch": 0.37, "learning_rate": 2.0588235294117645e-07, "logits/chosen": -1.2297463417053223, "logits/rejected": -1.3464181423187256, "logps/chosen": -92.26803588867188, "logps/rejected": -95.75495147705078, "loss": 0.6882, "rewards/accuracies": 1.0, "rewards/chosen": 0.03621730953454971, "rewards/margins": 0.10506764054298401, "rewards/rejected": -0.0688503310084343, "step": 231 }, { "epoch": 0.37, "learning_rate": 2.0677361853832442e-07, "logits/chosen": -1.3085564374923706, "logits/rejected": -1.1796925067901611, "logps/chosen": -80.08882904052734, "logps/rejected": -90.08299255371094, "loss": 0.6822, "rewards/accuracies": 0.5, "rewards/chosen": 0.054616548120975494, "rewards/margins": 0.015880106016993523, "rewards/rejected": 0.03873644024133682, "step": 232 }, { "epoch": 0.37, "learning_rate": 2.0766488413547236e-07, "logits/chosen": -1.3770502805709839, "logits/rejected": -1.294053077697754, "logps/chosen": -79.77741241455078, "logps/rejected": -96.57077026367188, "loss": 0.6851, "rewards/accuracies": 0.5, "rewards/chosen": 0.0904666930437088, "rewards/margins": 0.07377853244543076, "rewards/rejected": 0.016688156872987747, "step": 233 }, { "epoch": 0.38, "learning_rate": 2.085561497326203e-07, "logits/chosen": -1.0940014123916626, "logits/rejected": -1.0471937656402588, "logps/chosen": -79.23580169677734, "logps/rejected": -65.29253387451172, "loss": 0.6696, "rewards/accuracies": 1.0, "rewards/chosen": 0.0256697665899992, "rewards/margins": 0.04503040388226509, "rewards/rejected": -0.01936063915491104, "step": 234 }, { "epoch": 0.38, "learning_rate": 2.0944741532976825e-07, "logits/chosen": -1.4373263120651245, "logits/rejected": -1.3277997970581055, "logps/chosen": -86.504638671875, "logps/rejected": -94.64060974121094, "loss": 0.6859, "rewards/accuracies": 0.5, "rewards/chosen": 0.04969749599695206, "rewards/margins": 0.05911875143647194, "rewards/rejected": -0.009421253576874733, "step": 235 }, { "epoch": 0.38, "learning_rate": 2.103386809269162e-07, "logits/chosen": -1.0195074081420898, "logits/rejected": -1.0604435205459595, "logps/chosen": -73.08753967285156, "logps/rejected": -78.03620147705078, "loss": 0.6818, "rewards/accuracies": 0.25, "rewards/chosen": -0.02312946319580078, "rewards/margins": -0.03335971757769585, "rewards/rejected": 0.01023025531321764, "step": 236 }, { "epoch": 0.38, "learning_rate": 2.1122994652406416e-07, "logits/chosen": -1.3641736507415771, "logits/rejected": -1.3585805892944336, "logps/chosen": -80.21371459960938, "logps/rejected": -113.59886169433594, "loss": 0.6639, "rewards/accuracies": 1.0, "rewards/chosen": 0.043053723871707916, "rewards/margins": 0.07565870881080627, "rewards/rejected": -0.03260498121380806, "step": 237 }, { "epoch": 0.38, "learning_rate": 2.121212121212121e-07, "logits/chosen": -1.2279131412506104, "logits/rejected": -1.2762916088104248, "logps/chosen": -65.65019226074219, "logps/rejected": -71.62332153320312, "loss": 0.6835, "rewards/accuracies": 0.75, "rewards/chosen": 0.01965961419045925, "rewards/margins": 0.02842998504638672, "rewards/rejected": -0.008770370855927467, "step": 238 }, { "epoch": 0.38, "learning_rate": 2.1301247771836005e-07, "logits/chosen": -1.0042330026626587, "logits/rejected": -1.0530705451965332, "logps/chosen": -80.02039337158203, "logps/rejected": -73.7850341796875, "loss": 0.6769, "rewards/accuracies": 1.0, "rewards/chosen": 0.06413745880126953, "rewards/margins": 0.05847463756799698, "rewards/rejected": 0.005662822630256414, "step": 239 }, { "epoch": 0.39, "learning_rate": 2.13903743315508e-07, "logits/chosen": -1.196266770362854, "logits/rejected": -1.23978590965271, "logps/chosen": -86.27023315429688, "logps/rejected": -88.421875, "loss": 0.6932, "rewards/accuracies": 0.75, "rewards/chosen": -0.0023431777954101562, "rewards/margins": 0.03253936767578125, "rewards/rejected": -0.034882545471191406, "step": 240 }, { "epoch": 0.39, "learning_rate": 2.1479500891265594e-07, "logits/chosen": -1.13138747215271, "logits/rejected": -1.0820896625518799, "logps/chosen": -89.10614013671875, "logps/rejected": -95.83501434326172, "loss": 0.6848, "rewards/accuracies": 1.0, "rewards/chosen": 0.07065601646900177, "rewards/margins": 0.05399303883314133, "rewards/rejected": 0.01666298136115074, "step": 241 }, { "epoch": 0.39, "learning_rate": 2.156862745098039e-07, "logits/chosen": -1.1835781335830688, "logits/rejected": -1.2211390733718872, "logps/chosen": -105.93952941894531, "logps/rejected": -112.36106872558594, "loss": 0.6813, "rewards/accuracies": 0.75, "rewards/chosen": 0.09962844103574753, "rewards/margins": 0.11508216708898544, "rewards/rejected": -0.015453722327947617, "step": 242 }, { "epoch": 0.39, "learning_rate": 2.1657754010695188e-07, "logits/chosen": -0.9580905437469482, "logits/rejected": -0.9684072136878967, "logps/chosen": -117.10250091552734, "logps/rejected": -85.48649597167969, "loss": 0.6938, "rewards/accuracies": 0.75, "rewards/chosen": 0.06850758194923401, "rewards/margins": 0.08076611161231995, "rewards/rejected": -0.012258529663085938, "step": 243 }, { "epoch": 0.39, "learning_rate": 2.1746880570409982e-07, "logits/chosen": -1.2071748971939087, "logits/rejected": -1.1654880046844482, "logps/chosen": -114.59228515625, "logps/rejected": -108.928955078125, "loss": 0.6714, "rewards/accuracies": 0.25, "rewards/chosen": 0.06423740088939667, "rewards/margins": 0.013858222402632236, "rewards/rejected": 0.050379179418087006, "step": 244 }, { "epoch": 0.39, "learning_rate": 2.1836007130124777e-07, "logits/chosen": -1.4055743217468262, "logits/rejected": -1.4316047430038452, "logps/chosen": -83.58515930175781, "logps/rejected": -83.21244812011719, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": 0.11131229251623154, "rewards/margins": 0.12444982677698135, "rewards/rejected": -0.013137530535459518, "step": 245 }, { "epoch": 0.39, "learning_rate": 2.192513368983957e-07, "logits/chosen": -1.5405527353286743, "logits/rejected": -1.4525147676467896, "logps/chosen": -87.57505798339844, "logps/rejected": -94.3548583984375, "loss": 0.6803, "rewards/accuracies": 0.5, "rewards/chosen": -0.019840242341160774, "rewards/margins": -0.0008686091750860214, "rewards/rejected": -0.018971635028719902, "step": 246 }, { "epoch": 0.4, "learning_rate": 2.2014260249554368e-07, "logits/chosen": -1.1597777605056763, "logits/rejected": -1.0557482242584229, "logps/chosen": -87.28373718261719, "logps/rejected": -108.77473449707031, "loss": 0.6833, "rewards/accuracies": 0.5, "rewards/chosen": 0.029638197273015976, "rewards/margins": -0.017582416534423828, "rewards/rejected": 0.047220610082149506, "step": 247 }, { "epoch": 0.4, "learning_rate": 2.2103386809269163e-07, "logits/chosen": -1.150583028793335, "logits/rejected": -1.199609637260437, "logps/chosen": -69.0417709350586, "logps/rejected": -68.70387268066406, "loss": 0.6839, "rewards/accuracies": 0.5, "rewards/chosen": 0.007855510339140892, "rewards/margins": -0.016690729185938835, "rewards/rejected": 0.024546241387724876, "step": 248 }, { "epoch": 0.4, "learning_rate": 2.2192513368983957e-07, "logits/chosen": -1.2985551357269287, "logits/rejected": -1.287369728088379, "logps/chosen": -85.56787109375, "logps/rejected": -83.8221206665039, "loss": 0.6892, "rewards/accuracies": 1.0, "rewards/chosen": 0.0590452216565609, "rewards/margins": 0.06892690807580948, "rewards/rejected": -0.009881686419248581, "step": 249 }, { "epoch": 0.4, "learning_rate": 2.2281639928698751e-07, "logits/chosen": -1.2801727056503296, "logits/rejected": -1.2373180389404297, "logps/chosen": -82.9253158569336, "logps/rejected": -94.94396209716797, "loss": 0.6849, "rewards/accuracies": 0.75, "rewards/chosen": 0.07767181098461151, "rewards/margins": 0.09010696411132812, "rewards/rejected": -0.012435151264071465, "step": 250 }, { "epoch": 0.4, "learning_rate": 2.2370766488413546e-07, "logits/chosen": -1.451326847076416, "logits/rejected": -1.4487104415893555, "logps/chosen": -70.09224700927734, "logps/rejected": -81.06734466552734, "loss": 0.669, "rewards/accuracies": 0.75, "rewards/chosen": 0.06737308204174042, "rewards/margins": 0.018830107524991035, "rewards/rejected": 0.04854297637939453, "step": 251 }, { "epoch": 0.4, "learning_rate": 2.2459893048128343e-07, "logits/chosen": -1.2333167791366577, "logits/rejected": -1.2645306587219238, "logps/chosen": -106.37818908691406, "logps/rejected": -86.23303985595703, "loss": 0.6977, "rewards/accuracies": 0.5, "rewards/chosen": -0.04568672180175781, "rewards/margins": -0.08175735175609589, "rewards/rejected": 0.03607063367962837, "step": 252 }, { "epoch": 0.41, "learning_rate": 2.2549019607843137e-07, "logits/chosen": -1.0072137117385864, "logits/rejected": -1.0914777517318726, "logps/chosen": -62.423851013183594, "logps/rejected": -72.65961456298828, "loss": 0.6843, "rewards/accuracies": 0.5, "rewards/chosen": -0.0005303383804857731, "rewards/margins": -0.03809156268835068, "rewards/rejected": 0.03756122663617134, "step": 253 }, { "epoch": 0.41, "learning_rate": 2.2638146167557932e-07, "logits/chosen": -0.885297417640686, "logits/rejected": -0.91676926612854, "logps/chosen": -99.86686706542969, "logps/rejected": -91.1577377319336, "loss": 0.6999, "rewards/accuracies": 0.25, "rewards/chosen": 0.037349700927734375, "rewards/margins": -0.0669986754655838, "rewards/rejected": 0.10434836894273758, "step": 254 }, { "epoch": 0.41, "learning_rate": 2.2727272727272726e-07, "logits/chosen": -1.2678251266479492, "logits/rejected": -1.3043278455734253, "logps/chosen": -89.47602844238281, "logps/rejected": -106.13329315185547, "loss": 0.6728, "rewards/accuracies": 0.5, "rewards/chosen": 0.05332384258508682, "rewards/margins": 0.015943430364131927, "rewards/rejected": 0.0373804084956646, "step": 255 }, { "epoch": 0.41, "learning_rate": 2.281639928698752e-07, "logits/chosen": -0.9590396881103516, "logits/rejected": -1.0655536651611328, "logps/chosen": -84.81697082519531, "logps/rejected": -97.45384216308594, "loss": 0.68, "rewards/accuracies": 0.75, "rewards/chosen": 0.07383251190185547, "rewards/margins": 0.040572166442871094, "rewards/rejected": 0.033260345458984375, "step": 256 }, { "epoch": 0.41, "learning_rate": 2.2905525846702317e-07, "logits/chosen": -1.0711615085601807, "logits/rejected": -1.066248893737793, "logps/chosen": -85.9084243774414, "logps/rejected": -90.72894287109375, "loss": 0.6811, "rewards/accuracies": 0.0, "rewards/chosen": 0.013261604122817516, "rewards/margins": -0.05296192318201065, "rewards/rejected": 0.06622352451086044, "step": 257 }, { "epoch": 0.41, "learning_rate": 2.2994652406417112e-07, "logits/chosen": -1.2024062871932983, "logits/rejected": -1.1923305988311768, "logps/chosen": -85.6546630859375, "logps/rejected": -90.01422882080078, "loss": 0.6926, "rewards/accuracies": 1.0, "rewards/chosen": 0.12012015283107758, "rewards/margins": 0.1374165564775467, "rewards/rejected": -0.017296411097049713, "step": 258 }, { "epoch": 0.42, "learning_rate": 2.3083778966131906e-07, "logits/chosen": -1.3026809692382812, "logits/rejected": -1.2740707397460938, "logps/chosen": -85.69515991210938, "logps/rejected": -86.67021942138672, "loss": 0.6743, "rewards/accuracies": 0.75, "rewards/chosen": 0.05926590412855148, "rewards/margins": -0.0055942535400390625, "rewards/rejected": 0.06486015021800995, "step": 259 }, { "epoch": 0.42, "learning_rate": 2.31729055258467e-07, "logits/chosen": -1.0968396663665771, "logits/rejected": -1.1124399900436401, "logps/chosen": -109.53699493408203, "logps/rejected": -99.47398376464844, "loss": 0.6889, "rewards/accuracies": 0.75, "rewards/chosen": 0.10410556942224503, "rewards/margins": 0.055524252355098724, "rewards/rejected": 0.048581313341856, "step": 260 }, { "epoch": 0.42, "learning_rate": 2.3262032085561498e-07, "logits/chosen": -1.2599648237228394, "logits/rejected": -1.2472862005233765, "logps/chosen": -88.58628845214844, "logps/rejected": -98.78579711914062, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": 0.09660253673791885, "rewards/margins": 0.06635971367359161, "rewards/rejected": 0.03024282492697239, "step": 261 }, { "epoch": 0.42, "learning_rate": 2.3351158645276292e-07, "logits/chosen": -1.2192515134811401, "logits/rejected": -1.2429025173187256, "logps/chosen": -113.75950622558594, "logps/rejected": -97.70962524414062, "loss": 0.6724, "rewards/accuracies": 0.75, "rewards/chosen": 0.04426403343677521, "rewards/margins": 0.05276508629322052, "rewards/rejected": -0.008501052856445312, "step": 262 }, { "epoch": 0.42, "learning_rate": 2.3440285204991086e-07, "logits/chosen": -1.1820435523986816, "logits/rejected": -1.3682022094726562, "logps/chosen": -94.43738555908203, "logps/rejected": -118.38250732421875, "loss": 0.6761, "rewards/accuracies": 0.25, "rewards/chosen": 0.11442070454359055, "rewards/margins": 0.11597920209169388, "rewards/rejected": -0.0015584924258291721, "step": 263 }, { "epoch": 0.42, "learning_rate": 2.352941176470588e-07, "logits/chosen": -1.3031953573226929, "logits/rejected": -1.3824700117111206, "logps/chosen": -76.28455352783203, "logps/rejected": -111.09174346923828, "loss": 0.6683, "rewards/accuracies": 0.25, "rewards/chosen": 0.05090579763054848, "rewards/margins": 0.027386857196688652, "rewards/rejected": 0.023518944159150124, "step": 264 }, { "epoch": 0.43, "learning_rate": 2.3618538324420675e-07, "logits/chosen": -1.47735595703125, "logits/rejected": -1.4064598083496094, "logps/chosen": -91.46055603027344, "logps/rejected": -90.98507690429688, "loss": 0.6689, "rewards/accuracies": 0.5, "rewards/chosen": 0.06539182364940643, "rewards/margins": 0.05169401317834854, "rewards/rejected": 0.01369781605899334, "step": 265 }, { "epoch": 0.43, "learning_rate": 2.3707664884135472e-07, "logits/chosen": -1.2291524410247803, "logits/rejected": -1.2295732498168945, "logps/chosen": -91.46742248535156, "logps/rejected": -65.66752624511719, "loss": 0.6616, "rewards/accuracies": 1.0, "rewards/chosen": 0.1306879073381424, "rewards/margins": 0.10604839026927948, "rewards/rejected": 0.024639511480927467, "step": 266 }, { "epoch": 0.43, "learning_rate": 2.3796791443850267e-07, "logits/chosen": -1.049019694328308, "logits/rejected": -1.0370814800262451, "logps/chosen": -85.65200805664062, "logps/rejected": -127.39057159423828, "loss": 0.6918, "rewards/accuracies": 0.0, "rewards/chosen": 0.01296691969037056, "rewards/margins": -0.15692175924777985, "rewards/rejected": 0.1698886901140213, "step": 267 }, { "epoch": 0.43, "learning_rate": 2.388591800356506e-07, "logits/chosen": -1.2315694093704224, "logits/rejected": -1.1849857568740845, "logps/chosen": -75.49824523925781, "logps/rejected": -76.63864135742188, "loss": 0.6838, "rewards/accuracies": 0.5, "rewards/chosen": 0.04204292595386505, "rewards/margins": 0.009609700180590153, "rewards/rejected": 0.03243322670459747, "step": 268 }, { "epoch": 0.43, "learning_rate": 2.3975044563279855e-07, "logits/chosen": -1.0717147588729858, "logits/rejected": -1.1135541200637817, "logps/chosen": -83.387939453125, "logps/rejected": -69.6025619506836, "loss": 0.6914, "rewards/accuracies": 0.25, "rewards/chosen": 0.02753486856818199, "rewards/margins": -0.028724290430545807, "rewards/rejected": 0.0562591589987278, "step": 269 }, { "epoch": 0.43, "learning_rate": 2.406417112299465e-07, "logits/chosen": -1.1316823959350586, "logits/rejected": -1.161321997642517, "logps/chosen": -76.32443237304688, "logps/rejected": -73.42608642578125, "loss": 0.6998, "rewards/accuracies": 0.5, "rewards/chosen": 0.04347391426563263, "rewards/margins": -0.016432952135801315, "rewards/rejected": 0.059906862676143646, "step": 270 }, { "epoch": 0.43, "learning_rate": 2.415329768270945e-07, "logits/chosen": -1.2896268367767334, "logits/rejected": -1.291379451751709, "logps/chosen": -80.56018829345703, "logps/rejected": -122.67534637451172, "loss": 0.6727, "rewards/accuracies": 0.75, "rewards/chosen": 0.14456282556056976, "rewards/margins": 0.11899366229772568, "rewards/rejected": 0.02556915022432804, "step": 271 }, { "epoch": 0.44, "learning_rate": 2.4242424242424244e-07, "logits/chosen": -1.3808834552764893, "logits/rejected": -1.4008418321609497, "logps/chosen": -81.40470123291016, "logps/rejected": -71.37447357177734, "loss": 0.6794, "rewards/accuracies": 0.25, "rewards/chosen": 0.04850349575281143, "rewards/margins": -0.05490855872631073, "rewards/rejected": 0.10341206192970276, "step": 272 }, { "epoch": 0.44, "learning_rate": 2.433155080213904e-07, "logits/chosen": -1.2501654624938965, "logits/rejected": -1.1767557859420776, "logps/chosen": -87.16156005859375, "logps/rejected": -85.14739990234375, "loss": 0.6687, "rewards/accuracies": 0.75, "rewards/chosen": 0.10303746163845062, "rewards/margins": 0.05751323327422142, "rewards/rejected": 0.04552421718835831, "step": 273 }, { "epoch": 0.44, "learning_rate": 2.4420677361853833e-07, "logits/chosen": -1.3462412357330322, "logits/rejected": -1.3435797691345215, "logps/chosen": -101.33477783203125, "logps/rejected": -82.9685287475586, "loss": 0.6649, "rewards/accuracies": 0.5, "rewards/chosen": 0.05540180206298828, "rewards/margins": -0.03626727685332298, "rewards/rejected": 0.09166907519102097, "step": 274 }, { "epoch": 0.44, "learning_rate": 2.4509803921568627e-07, "logits/chosen": -1.4355266094207764, "logits/rejected": -1.392993688583374, "logps/chosen": -66.99176025390625, "logps/rejected": -71.37014770507812, "loss": 0.6819, "rewards/accuracies": 0.5, "rewards/chosen": 0.02662467770278454, "rewards/margins": 0.04171285405755043, "rewards/rejected": -0.015088176354765892, "step": 275 }, { "epoch": 0.44, "learning_rate": 2.459893048128342e-07, "logits/chosen": -1.0971918106079102, "logits/rejected": -1.0950058698654175, "logps/chosen": -113.23728942871094, "logps/rejected": -85.44772338867188, "loss": 0.668, "rewards/accuracies": 0.75, "rewards/chosen": 0.16021136939525604, "rewards/margins": 0.1308424025774002, "rewards/rejected": 0.029368974268436432, "step": 276 }, { "epoch": 0.44, "learning_rate": 2.4688057040998216e-07, "logits/chosen": -1.1685456037521362, "logits/rejected": -1.21990168094635, "logps/chosen": -79.86454010009766, "logps/rejected": -57.833099365234375, "loss": 0.6648, "rewards/accuracies": 0.75, "rewards/chosen": 0.11408253014087677, "rewards/margins": 0.05703301727771759, "rewards/rejected": 0.05704950913786888, "step": 277 }, { "epoch": 0.45, "learning_rate": 2.477718360071301e-07, "logits/chosen": -1.162726640701294, "logits/rejected": -1.2013813257217407, "logps/chosen": -76.55426788330078, "logps/rejected": -92.88990783691406, "loss": 0.6693, "rewards/accuracies": 0.75, "rewards/chosen": 0.16482925415039062, "rewards/margins": 0.10860061645507812, "rewards/rejected": 0.0562286376953125, "step": 278 }, { "epoch": 0.45, "learning_rate": 2.4866310160427805e-07, "logits/chosen": -1.1953692436218262, "logits/rejected": -1.2959855794906616, "logps/chosen": -95.178466796875, "logps/rejected": -104.57371520996094, "loss": 0.6627, "rewards/accuracies": 0.5, "rewards/chosen": 0.09443511813879013, "rewards/margins": 0.02760467678308487, "rewards/rejected": 0.06683044135570526, "step": 279 }, { "epoch": 0.45, "learning_rate": 2.49554367201426e-07, "logits/chosen": -1.2878360748291016, "logits/rejected": -1.2261579036712646, "logps/chosen": -65.76364135742188, "logps/rejected": -74.13623046875, "loss": 0.6734, "rewards/accuracies": 0.5, "rewards/chosen": 0.062433626502752304, "rewards/margins": 0.008893683552742004, "rewards/rejected": 0.0535399429500103, "step": 280 }, { "epoch": 0.45, "learning_rate": 2.5044563279857393e-07, "logits/chosen": -1.1199045181274414, "logits/rejected": -1.0260179042816162, "logps/chosen": -99.21446228027344, "logps/rejected": -110.10317993164062, "loss": 0.6631, "rewards/accuracies": 0.75, "rewards/chosen": 0.07889652252197266, "rewards/margins": 0.08441983163356781, "rewards/rejected": -0.005523303523659706, "step": 281 }, { "epoch": 0.45, "learning_rate": 2.5133689839572193e-07, "logits/chosen": -1.2737451791763306, "logits/rejected": -1.3133862018585205, "logps/chosen": -83.18331909179688, "logps/rejected": -96.2452392578125, "loss": 0.6879, "rewards/accuracies": 0.0, "rewards/chosen": 0.03858843073248863, "rewards/margins": -0.051953792572021484, "rewards/rejected": 0.09054221957921982, "step": 282 }, { "epoch": 0.45, "learning_rate": 2.522281639928699e-07, "logits/chosen": -1.218468427658081, "logits/rejected": -1.1898638010025024, "logps/chosen": -75.5515365600586, "logps/rejected": -73.00982666015625, "loss": 0.6597, "rewards/accuracies": 1.0, "rewards/chosen": 0.14914913475513458, "rewards/margins": 0.10491552203893661, "rewards/rejected": 0.04423360899090767, "step": 283 }, { "epoch": 0.46, "learning_rate": 2.531194295900178e-07, "logits/chosen": -1.0265485048294067, "logits/rejected": -1.0492814779281616, "logps/chosen": -77.20716857910156, "logps/rejected": -76.20745849609375, "loss": 0.6633, "rewards/accuracies": 0.5, "rewards/chosen": 0.25422269105911255, "rewards/margins": 0.11567412316799164, "rewards/rejected": 0.1385485678911209, "step": 284 }, { "epoch": 0.46, "learning_rate": 2.5401069518716576e-07, "logits/chosen": -1.4330224990844727, "logits/rejected": -1.5077335834503174, "logps/chosen": -96.22386169433594, "logps/rejected": -112.85494995117188, "loss": 0.691, "rewards/accuracies": 0.75, "rewards/chosen": 0.2253652662038803, "rewards/margins": 0.19391365349292755, "rewards/rejected": 0.03145160526037216, "step": 285 }, { "epoch": 0.46, "learning_rate": 2.549019607843137e-07, "logits/chosen": -1.150084376335144, "logits/rejected": -1.0905041694641113, "logps/chosen": -97.41041564941406, "logps/rejected": -98.05892181396484, "loss": 0.66, "rewards/accuracies": 1.0, "rewards/chosen": 0.10761947929859161, "rewards/margins": 0.08484935760498047, "rewards/rejected": 0.022770121693611145, "step": 286 }, { "epoch": 0.46, "learning_rate": 2.5579322638146165e-07, "logits/chosen": -1.2912417650222778, "logits/rejected": -1.329816222190857, "logps/chosen": -83.36727905273438, "logps/rejected": -68.7192153930664, "loss": 0.6517, "rewards/accuracies": 0.75, "rewards/chosen": 0.11727103590965271, "rewards/margins": 0.07184983044862747, "rewards/rejected": 0.04542122036218643, "step": 287 }, { "epoch": 0.46, "learning_rate": 2.5668449197860965e-07, "logits/chosen": -1.1300667524337769, "logits/rejected": -1.2059800624847412, "logps/chosen": -109.12397766113281, "logps/rejected": -91.6461181640625, "loss": 0.652, "rewards/accuracies": 0.75, "rewards/chosen": 0.2241353988647461, "rewards/margins": 0.09366723895072937, "rewards/rejected": 0.13046817481517792, "step": 288 }, { "epoch": 0.46, "learning_rate": 2.5757575757575754e-07, "logits/chosen": -1.2609786987304688, "logits/rejected": -1.2727422714233398, "logps/chosen": -80.6039047241211, "logps/rejected": -100.51264953613281, "loss": 0.6749, "rewards/accuracies": 0.75, "rewards/chosen": 0.16222628951072693, "rewards/margins": 0.05164431780576706, "rewards/rejected": 0.11058197915554047, "step": 289 }, { "epoch": 0.47, "learning_rate": 2.5846702317290554e-07, "logits/chosen": -1.2211748361587524, "logits/rejected": -1.2957744598388672, "logps/chosen": -81.94351196289062, "logps/rejected": -65.82157897949219, "loss": 0.6717, "rewards/accuracies": 0.5, "rewards/chosen": 0.06339158862829208, "rewards/margins": 0.019843099638819695, "rewards/rejected": 0.04354849085211754, "step": 290 }, { "epoch": 0.47, "learning_rate": 2.593582887700534e-07, "logits/chosen": -1.1203752756118774, "logits/rejected": -1.250995397567749, "logps/chosen": -71.03523254394531, "logps/rejected": -103.40093994140625, "loss": 0.6876, "rewards/accuracies": 0.5, "rewards/chosen": 0.07859501987695694, "rewards/margins": -0.11592496186494827, "rewards/rejected": 0.1945199966430664, "step": 291 }, { "epoch": 0.47, "learning_rate": 2.602495543672014e-07, "logits/chosen": -1.1564207077026367, "logits/rejected": -1.2927607297897339, "logps/chosen": -79.52952575683594, "logps/rejected": -75.32966613769531, "loss": 0.6662, "rewards/accuracies": 0.5, "rewards/chosen": 0.16619624197483063, "rewards/margins": 0.02776785008609295, "rewards/rejected": 0.13842840492725372, "step": 292 }, { "epoch": 0.47, "learning_rate": 2.6114081996434937e-07, "logits/chosen": -1.1339128017425537, "logits/rejected": -1.2188538312911987, "logps/chosen": -68.42950439453125, "logps/rejected": -67.3084487915039, "loss": 0.6508, "rewards/accuracies": 1.0, "rewards/chosen": 0.17174777388572693, "rewards/margins": 0.14832183718681335, "rewards/rejected": 0.023425960913300514, "step": 293 }, { "epoch": 0.47, "learning_rate": 2.620320855614973e-07, "logits/chosen": -1.2407300472259521, "logits/rejected": -1.147329568862915, "logps/chosen": -72.10598754882812, "logps/rejected": -74.8037109375, "loss": 0.6831, "rewards/accuracies": 0.25, "rewards/chosen": 0.06572581082582474, "rewards/margins": -0.001997658982872963, "rewards/rejected": 0.06772346049547195, "step": 294 }, { "epoch": 0.47, "learning_rate": 2.6292335115864525e-07, "logits/chosen": -0.969772458076477, "logits/rejected": -1.02313232421875, "logps/chosen": -109.25942993164062, "logps/rejected": -96.70529174804688, "loss": 0.6759, "rewards/accuracies": 0.0, "rewards/chosen": 0.103376105427742, "rewards/margins": -0.022210123017430305, "rewards/rejected": 0.12558622658252716, "step": 295 }, { "epoch": 0.48, "learning_rate": 2.638146167557932e-07, "logits/chosen": -1.3709019422531128, "logits/rejected": -1.260079264640808, "logps/chosen": -88.28411865234375, "logps/rejected": -86.83455657958984, "loss": 0.67, "rewards/accuracies": 0.5, "rewards/chosen": 0.13328608870506287, "rewards/margins": 0.020631026476621628, "rewards/rejected": 0.11265506595373154, "step": 296 }, { "epoch": 0.48, "learning_rate": 2.6470588235294114e-07, "logits/chosen": -1.2452988624572754, "logits/rejected": -1.314401626586914, "logps/chosen": -80.23633575439453, "logps/rejected": -74.17669677734375, "loss": 0.6598, "rewards/accuracies": 0.75, "rewards/chosen": 0.09177089482545853, "rewards/margins": -0.006086774170398712, "rewards/rejected": 0.09785766154527664, "step": 297 }, { "epoch": 0.48, "learning_rate": 2.6559714795008914e-07, "logits/chosen": -1.3040896654129028, "logits/rejected": -1.2823154926300049, "logps/chosen": -78.94120025634766, "logps/rejected": -89.09233856201172, "loss": 0.6608, "rewards/accuracies": 1.0, "rewards/chosen": 0.21634846925735474, "rewards/margins": 0.3118218183517456, "rewards/rejected": -0.09547338634729385, "step": 298 }, { "epoch": 0.48, "learning_rate": 2.6648841354723703e-07, "logits/chosen": -1.1973069906234741, "logits/rejected": -1.2220531702041626, "logps/chosen": -81.25874328613281, "logps/rejected": -77.93314361572266, "loss": 0.688, "rewards/accuracies": 0.75, "rewards/chosen": 0.13577900826931, "rewards/margins": 0.040225982666015625, "rewards/rejected": 0.09555301815271378, "step": 299 }, { "epoch": 0.48, "learning_rate": 2.6737967914438503e-07, "logits/chosen": -1.2609388828277588, "logits/rejected": -1.2270541191101074, "logps/chosen": -77.33185577392578, "logps/rejected": -84.50601196289062, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": 0.15515795350074768, "rewards/margins": 0.08373098075389862, "rewards/rejected": 0.07142696529626846, "step": 300 }, { "epoch": 0.48, "learning_rate": 2.6827094474153297e-07, "logits/chosen": -0.9112197756767273, "logits/rejected": -0.9008002877235413, "logps/chosen": -105.34730529785156, "logps/rejected": -96.0273208618164, "loss": 0.6654, "rewards/accuracies": 1.0, "rewards/chosen": 0.13273297250270844, "rewards/margins": 0.11414032429456711, "rewards/rejected": 0.01859264448285103, "step": 301 }, { "epoch": 0.48, "learning_rate": 2.691622103386809e-07, "logits/chosen": -1.1602048873901367, "logits/rejected": -1.2361183166503906, "logps/chosen": -69.30332946777344, "logps/rejected": -59.33778762817383, "loss": 0.6815, "rewards/accuracies": 0.75, "rewards/chosen": 0.10628003627061844, "rewards/margins": 0.03762922063469887, "rewards/rejected": 0.06865081936120987, "step": 302 }, { "epoch": 0.49, "learning_rate": 2.700534759358289e-07, "logits/chosen": -1.2832103967666626, "logits/rejected": -1.2536712884902954, "logps/chosen": -87.33821868896484, "logps/rejected": -83.83084106445312, "loss": 0.6751, "rewards/accuracies": 0.75, "rewards/chosen": 0.13247795403003693, "rewards/margins": 0.062273457646369934, "rewards/rejected": 0.07020450383424759, "step": 303 }, { "epoch": 0.49, "learning_rate": 2.709447415329768e-07, "logits/chosen": -1.4619874954223633, "logits/rejected": -1.5157349109649658, "logps/chosen": -89.15220642089844, "logps/rejected": -93.2753677368164, "loss": 0.6698, "rewards/accuracies": 0.5, "rewards/chosen": 0.022847745567560196, "rewards/margins": -0.009965900331735611, "rewards/rejected": 0.03281364589929581, "step": 304 }, { "epoch": 0.49, "learning_rate": 2.718360071301248e-07, "logits/chosen": -1.135348916053772, "logits/rejected": -1.1251702308654785, "logps/chosen": -69.99653625488281, "logps/rejected": -67.7619400024414, "loss": 0.6858, "rewards/accuracies": 0.5, "rewards/chosen": 0.12758751213550568, "rewards/margins": 0.026541896164417267, "rewards/rejected": 0.10104560852050781, "step": 305 }, { "epoch": 0.49, "learning_rate": 2.727272727272727e-07, "logits/chosen": -1.2959814071655273, "logits/rejected": -1.3914355039596558, "logps/chosen": -84.02043151855469, "logps/rejected": -75.88203430175781, "loss": 0.6495, "rewards/accuracies": 0.75, "rewards/chosen": 0.09288692474365234, "rewards/margins": 0.07419758290052414, "rewards/rejected": 0.018689343705773354, "step": 306 }, { "epoch": 0.49, "learning_rate": 2.736185383244207e-07, "logits/chosen": -1.3438833951950073, "logits/rejected": -1.2410920858383179, "logps/chosen": -103.746337890625, "logps/rejected": -95.27484893798828, "loss": 0.7026, "rewards/accuracies": 0.25, "rewards/chosen": -0.032576750963926315, "rewards/margins": -0.10241146385669708, "rewards/rejected": 0.06983470916748047, "step": 307 }, { "epoch": 0.49, "learning_rate": 2.7450980392156863e-07, "logits/chosen": -1.1224687099456787, "logits/rejected": -1.2203575372695923, "logps/chosen": -100.82917785644531, "logps/rejected": -104.21343231201172, "loss": 0.6879, "rewards/accuracies": 1.0, "rewards/chosen": 0.22812862694263458, "rewards/margins": 0.18242168426513672, "rewards/rejected": 0.045706942677497864, "step": 308 }, { "epoch": 0.5, "learning_rate": 2.754010695187166e-07, "logits/chosen": -1.0806572437286377, "logits/rejected": -1.0251590013504028, "logps/chosen": -88.9085693359375, "logps/rejected": -99.7394790649414, "loss": 0.6612, "rewards/accuracies": 0.5, "rewards/chosen": 0.09342250227928162, "rewards/margins": 0.040767572820186615, "rewards/rejected": 0.0526549331843853, "step": 309 }, { "epoch": 0.5, "learning_rate": 2.762923351158645e-07, "logits/chosen": -1.1136000156402588, "logits/rejected": -1.153560996055603, "logps/chosen": -74.92514038085938, "logps/rejected": -68.93941497802734, "loss": 0.6677, "rewards/accuracies": 0.5, "rewards/chosen": 0.06848563998937607, "rewards/margins": -0.012454701587557793, "rewards/rejected": 0.08094033598899841, "step": 310 }, { "epoch": 0.5, "learning_rate": 2.7718360071301246e-07, "logits/chosen": -1.2570738792419434, "logits/rejected": -1.236356496810913, "logps/chosen": -65.06344604492188, "logps/rejected": -58.971641540527344, "loss": 0.6608, "rewards/accuracies": 1.0, "rewards/chosen": 0.07606229931116104, "rewards/margins": 0.06727390736341476, "rewards/rejected": 0.008788393810391426, "step": 311 }, { "epoch": 0.5, "learning_rate": 2.780748663101604e-07, "logits/chosen": -1.2095311880111694, "logits/rejected": -1.2186963558197021, "logps/chosen": -104.74409484863281, "logps/rejected": -83.13262939453125, "loss": 0.647, "rewards/accuracies": 1.0, "rewards/chosen": 0.31059589982032776, "rewards/margins": 0.2990642488002777, "rewards/rejected": 0.011531639844179153, "step": 312 }, { "epoch": 0.5, "learning_rate": 2.789661319073084e-07, "logits/chosen": -1.143808364868164, "logits/rejected": -1.0819718837738037, "logps/chosen": -67.19629669189453, "logps/rejected": -77.13871002197266, "loss": 0.6707, "rewards/accuracies": 1.0, "rewards/chosen": 0.25130119919776917, "rewards/margins": 0.24421121180057526, "rewards/rejected": 0.007089996710419655, "step": 313 }, { "epoch": 0.5, "learning_rate": 2.798573975044563e-07, "logits/chosen": -1.1860305070877075, "logits/rejected": -1.2017343044281006, "logps/chosen": -72.12841033935547, "logps/rejected": -87.95671081542969, "loss": 0.6534, "rewards/accuracies": 1.0, "rewards/chosen": 0.14963217079639435, "rewards/margins": 0.12384997308254242, "rewards/rejected": 0.025782205164432526, "step": 314 }, { "epoch": 0.51, "learning_rate": 2.807486631016043e-07, "logits/chosen": -1.1104300022125244, "logits/rejected": -1.1325981616973877, "logps/chosen": -75.87508392333984, "logps/rejected": -100.69789123535156, "loss": 0.6678, "rewards/accuracies": 0.5, "rewards/chosen": 0.049971774220466614, "rewards/margins": -0.07510032504796982, "rewards/rejected": 0.12507209181785583, "step": 315 }, { "epoch": 0.51, "learning_rate": 2.816399286987522e-07, "logits/chosen": -1.3556714057922363, "logits/rejected": -1.3343536853790283, "logps/chosen": -94.25479888916016, "logps/rejected": -94.40919494628906, "loss": 0.684, "rewards/accuracies": 0.75, "rewards/chosen": 0.28538981080055237, "rewards/margins": 0.2286706119775772, "rewards/rejected": 0.056719209998846054, "step": 316 }, { "epoch": 0.51, "learning_rate": 2.825311942959002e-07, "logits/chosen": -1.2946072816848755, "logits/rejected": -1.2683013677597046, "logps/chosen": -78.06330108642578, "logps/rejected": -65.232421875, "loss": 0.6474, "rewards/accuracies": 0.25, "rewards/chosen": -0.0706791877746582, "rewards/margins": -0.03911658376455307, "rewards/rejected": -0.03156261518597603, "step": 317 }, { "epoch": 0.51, "learning_rate": 2.834224598930481e-07, "logits/chosen": -1.3057444095611572, "logits/rejected": -1.3305914402008057, "logps/chosen": -67.47688293457031, "logps/rejected": -77.79869079589844, "loss": 0.6454, "rewards/accuracies": 1.0, "rewards/chosen": 0.3435845673084259, "rewards/margins": 0.2820982336997986, "rewards/rejected": 0.061486341059207916, "step": 318 }, { "epoch": 0.51, "learning_rate": 2.8431372549019607e-07, "logits/chosen": -1.2416703701019287, "logits/rejected": -1.3033418655395508, "logps/chosen": -88.55001068115234, "logps/rejected": -70.91754913330078, "loss": 0.6809, "rewards/accuracies": 1.0, "rewards/chosen": 0.21004284918308258, "rewards/margins": 0.22509776055812836, "rewards/rejected": -0.015054893679916859, "step": 319 }, { "epoch": 0.51, "learning_rate": 2.85204991087344e-07, "logits/chosen": -1.1377530097961426, "logits/rejected": -1.1824742555618286, "logps/chosen": -73.46063232421875, "logps/rejected": -100.76456451416016, "loss": 0.6489, "rewards/accuracies": 0.75, "rewards/chosen": 0.13749217987060547, "rewards/margins": 0.0714053213596344, "rewards/rejected": 0.06608686596155167, "step": 320 }, { "epoch": 0.52, "learning_rate": 2.8609625668449196e-07, "logits/chosen": -1.2777680158615112, "logits/rejected": -1.2261133193969727, "logps/chosen": -83.79942321777344, "logps/rejected": -97.80702209472656, "loss": 0.6428, "rewards/accuracies": 0.5, "rewards/chosen": 0.0702640563249588, "rewards/margins": 0.031407542526721954, "rewards/rejected": 0.03885651007294655, "step": 321 }, { "epoch": 0.52, "learning_rate": 2.869875222816399e-07, "logits/chosen": -1.3164100646972656, "logits/rejected": -1.3429471254348755, "logps/chosen": -86.61787414550781, "logps/rejected": -87.35304260253906, "loss": 0.645, "rewards/accuracies": 0.75, "rewards/chosen": 0.18511180579662323, "rewards/margins": 0.09819040447473526, "rewards/rejected": 0.08692140877246857, "step": 322 }, { "epoch": 0.52, "learning_rate": 2.878787878787879e-07, "logits/chosen": -1.2861645221710205, "logits/rejected": -1.3452929258346558, "logps/chosen": -82.19828033447266, "logps/rejected": -93.90924072265625, "loss": 0.6665, "rewards/accuracies": 0.5, "rewards/chosen": 0.13947677612304688, "rewards/margins": 0.037457745522260666, "rewards/rejected": 0.10201903432607651, "step": 323 }, { "epoch": 0.52, "learning_rate": 2.887700534759358e-07, "logits/chosen": -1.378412127494812, "logits/rejected": -1.3714053630828857, "logps/chosen": -79.28540802001953, "logps/rejected": -91.89629364013672, "loss": 0.659, "rewards/accuracies": 0.75, "rewards/chosen": 0.07279205322265625, "rewards/margins": 0.10650424659252167, "rewards/rejected": -0.03371219336986542, "step": 324 }, { "epoch": 0.52, "learning_rate": 2.896613190730838e-07, "logits/chosen": -0.9697253108024597, "logits/rejected": -0.9202805757522583, "logps/chosen": -92.09651184082031, "logps/rejected": -92.39553833007812, "loss": 0.6525, "rewards/accuracies": 0.75, "rewards/chosen": 0.1783168762922287, "rewards/margins": 0.07809047400951385, "rewards/rejected": 0.10022640228271484, "step": 325 }, { "epoch": 0.52, "learning_rate": 2.905525846702317e-07, "logits/chosen": -1.0326021909713745, "logits/rejected": -1.1439400911331177, "logps/chosen": -91.49354553222656, "logps/rejected": -96.96902465820312, "loss": 0.6611, "rewards/accuracies": 0.5, "rewards/chosen": 0.06546631455421448, "rewards/margins": 0.0043199509382247925, "rewards/rejected": 0.061146363615989685, "step": 326 }, { "epoch": 0.52, "learning_rate": 2.9144385026737967e-07, "logits/chosen": -1.3942519426345825, "logits/rejected": -1.3838871717453003, "logps/chosen": -107.79054260253906, "logps/rejected": -100.28005981445312, "loss": 0.6734, "rewards/accuracies": 0.75, "rewards/chosen": 0.06261520087718964, "rewards/margins": -0.008522801101207733, "rewards/rejected": 0.07113800197839737, "step": 327 }, { "epoch": 0.53, "learning_rate": 2.923351158645276e-07, "logits/chosen": -1.0869139432907104, "logits/rejected": -1.1671937704086304, "logps/chosen": -86.10123443603516, "logps/rejected": -65.01278686523438, "loss": 0.6586, "rewards/accuracies": 1.0, "rewards/chosen": 0.15183678269386292, "rewards/margins": 0.17026816308498383, "rewards/rejected": -0.018431376665830612, "step": 328 }, { "epoch": 0.53, "learning_rate": 2.9322638146167556e-07, "logits/chosen": -1.3306090831756592, "logits/rejected": -1.2829090356826782, "logps/chosen": -79.16413879394531, "logps/rejected": -79.23770141601562, "loss": 0.665, "rewards/accuracies": 0.5, "rewards/chosen": 0.056937411427497864, "rewards/margins": 0.04890613257884979, "rewards/rejected": 0.008031275123357773, "step": 329 }, { "epoch": 0.53, "learning_rate": 2.941176470588235e-07, "logits/chosen": -1.2391971349716187, "logits/rejected": -1.182995319366455, "logps/chosen": -86.610595703125, "logps/rejected": -89.02350616455078, "loss": 0.6531, "rewards/accuracies": 0.25, "rewards/chosen": 0.0064332978799939156, "rewards/margins": -0.06546249985694885, "rewards/rejected": 0.0718957930803299, "step": 330 }, { "epoch": 0.53, "learning_rate": 2.9500891265597145e-07, "logits/chosen": -1.4625813961029053, "logits/rejected": -1.3253648281097412, "logps/chosen": -82.2672119140625, "logps/rejected": -105.3332290649414, "loss": 0.6673, "rewards/accuracies": 0.25, "rewards/chosen": 0.058600619435310364, "rewards/margins": -0.005243297666311264, "rewards/rejected": 0.06384392082691193, "step": 331 }, { "epoch": 0.53, "learning_rate": 2.959001782531194e-07, "logits/chosen": -1.2394137382507324, "logits/rejected": -1.2723644971847534, "logps/chosen": -77.5280990600586, "logps/rejected": -77.68917083740234, "loss": 0.6611, "rewards/accuracies": 0.75, "rewards/chosen": -0.07767380774021149, "rewards/margins": -0.0977436974644661, "rewards/rejected": 0.02006988599896431, "step": 332 }, { "epoch": 0.53, "learning_rate": 2.967914438502674e-07, "logits/chosen": -1.2740813493728638, "logits/rejected": -1.25852632522583, "logps/chosen": -86.94034576416016, "logps/rejected": -92.96407318115234, "loss": 0.6438, "rewards/accuracies": 0.75, "rewards/chosen": 0.15614834427833557, "rewards/margins": 0.13373985886573792, "rewards/rejected": 0.022408489137887955, "step": 333 }, { "epoch": 0.54, "learning_rate": 2.976827094474153e-07, "logits/chosen": -1.0233662128448486, "logits/rejected": -1.0137670040130615, "logps/chosen": -73.70561981201172, "logps/rejected": -68.85248565673828, "loss": 0.6815, "rewards/accuracies": 1.0, "rewards/chosen": 0.07474251091480255, "rewards/margins": 0.10938635468482971, "rewards/rejected": -0.03464384004473686, "step": 334 }, { "epoch": 0.54, "learning_rate": 2.985739750445633e-07, "logits/chosen": -1.344076156616211, "logits/rejected": -1.356690526008606, "logps/chosen": -74.8858642578125, "logps/rejected": -87.20622253417969, "loss": 0.6723, "rewards/accuracies": 0.75, "rewards/chosen": 0.05453595891594887, "rewards/margins": 0.09962186217308044, "rewards/rejected": -0.045085906982421875, "step": 335 }, { "epoch": 0.54, "learning_rate": 2.9946524064171117e-07, "logits/chosen": -1.0446094274520874, "logits/rejected": -1.1010466814041138, "logps/chosen": -94.72644805908203, "logps/rejected": -95.78164672851562, "loss": 0.6672, "rewards/accuracies": 0.75, "rewards/chosen": 0.2673872113227844, "rewards/margins": 0.1685742437839508, "rewards/rejected": 0.09881296008825302, "step": 336 }, { "epoch": 0.54, "learning_rate": 3.0035650623885916e-07, "logits/chosen": -1.3309125900268555, "logits/rejected": -1.325650930404663, "logps/chosen": -57.150413513183594, "logps/rejected": -85.42903137207031, "loss": 0.6582, "rewards/accuracies": 1.0, "rewards/chosen": 0.1457572877407074, "rewards/margins": 0.17408093810081482, "rewards/rejected": -0.02832365222275257, "step": 337 }, { "epoch": 0.54, "learning_rate": 3.0124777183600716e-07, "logits/chosen": -1.131293773651123, "logits/rejected": -1.1094197034835815, "logps/chosen": -70.03778076171875, "logps/rejected": -114.54940795898438, "loss": 0.6316, "rewards/accuracies": 0.5, "rewards/chosen": 0.05447092279791832, "rewards/margins": 0.07138299942016602, "rewards/rejected": -0.016912082210183144, "step": 338 }, { "epoch": 0.54, "learning_rate": 3.0213903743315505e-07, "logits/chosen": -1.033536672592163, "logits/rejected": -1.0619078874588013, "logps/chosen": -104.26609802246094, "logps/rejected": -97.17399597167969, "loss": 0.6977, "rewards/accuracies": 0.75, "rewards/chosen": 0.19521158933639526, "rewards/margins": 0.09221534430980682, "rewards/rejected": 0.10299625992774963, "step": 339 }, { "epoch": 0.55, "learning_rate": 3.0303030303030305e-07, "logits/chosen": -1.4197341203689575, "logits/rejected": -1.4034355878829956, "logps/chosen": -95.6905517578125, "logps/rejected": -105.3212661743164, "loss": 0.6722, "rewards/accuracies": 0.5, "rewards/chosen": 0.05179291218519211, "rewards/margins": 0.00024356693029403687, "rewards/rejected": 0.05154934152960777, "step": 340 }, { "epoch": 0.55, "learning_rate": 3.0392156862745094e-07, "logits/chosen": -1.1902062892913818, "logits/rejected": -1.191072702407837, "logps/chosen": -62.47047424316406, "logps/rejected": -75.2205810546875, "loss": 0.6456, "rewards/accuracies": 0.75, "rewards/chosen": 0.1127171516418457, "rewards/margins": 0.1579171121120453, "rewards/rejected": -0.04519996792078018, "step": 341 }, { "epoch": 0.55, "learning_rate": 3.0481283422459894e-07, "logits/chosen": -1.2813851833343506, "logits/rejected": -1.3402589559555054, "logps/chosen": -79.7896728515625, "logps/rejected": -83.75751495361328, "loss": 0.6686, "rewards/accuracies": 0.5, "rewards/chosen": 0.039339445531368256, "rewards/margins": -0.10005035996437073, "rewards/rejected": 0.13938981294631958, "step": 342 }, { "epoch": 0.55, "learning_rate": 3.057040998217469e-07, "logits/chosen": -1.1492787599563599, "logits/rejected": -1.1995255947113037, "logps/chosen": -70.57642364501953, "logps/rejected": -81.1024398803711, "loss": 0.6584, "rewards/accuracies": 1.0, "rewards/chosen": 0.14896260201931, "rewards/margins": 0.16111516952514648, "rewards/rejected": -0.012152578681707382, "step": 343 }, { "epoch": 0.55, "learning_rate": 3.065953654188948e-07, "logits/chosen": -1.135663628578186, "logits/rejected": -1.2606675624847412, "logps/chosen": -69.75550079345703, "logps/rejected": -90.71636199951172, "loss": 0.682, "rewards/accuracies": 0.75, "rewards/chosen": 0.08117876201868057, "rewards/margins": 0.11775828152894974, "rewards/rejected": -0.03657951578497887, "step": 344 }, { "epoch": 0.55, "learning_rate": 3.0748663101604277e-07, "logits/chosen": -1.1516859531402588, "logits/rejected": -1.1780242919921875, "logps/chosen": -121.22928619384766, "logps/rejected": -77.4291763305664, "loss": 0.654, "rewards/accuracies": 0.5, "rewards/chosen": 0.12420845031738281, "rewards/margins": 0.0853872299194336, "rewards/rejected": 0.03882122039794922, "step": 345 }, { "epoch": 0.56, "learning_rate": 3.083778966131907e-07, "logits/chosen": -1.165571928024292, "logits/rejected": -1.129453420639038, "logps/chosen": -85.010498046875, "logps/rejected": -78.66081237792969, "loss": 0.6905, "rewards/accuracies": 0.5, "rewards/chosen": 0.11477203667163849, "rewards/margins": -0.020890049636363983, "rewards/rejected": 0.13566207885742188, "step": 346 }, { "epoch": 0.56, "learning_rate": 3.0926916221033866e-07, "logits/chosen": -1.1392394304275513, "logits/rejected": -1.2081985473632812, "logps/chosen": -111.85536193847656, "logps/rejected": -112.08555603027344, "loss": 0.6448, "rewards/accuracies": 0.75, "rewards/chosen": 0.12098044902086258, "rewards/margins": 0.10723160207271576, "rewards/rejected": 0.013748839497566223, "step": 347 }, { "epoch": 0.56, "learning_rate": 3.1016042780748665e-07, "logits/chosen": -1.1994439363479614, "logits/rejected": -1.2931379079818726, "logps/chosen": -87.59042358398438, "logps/rejected": -89.93650817871094, "loss": 0.637, "rewards/accuracies": 1.0, "rewards/chosen": 0.18579445779323578, "rewards/margins": 0.2058069258928299, "rewards/rejected": -0.020012473687529564, "step": 348 }, { "epoch": 0.56, "learning_rate": 3.1105169340463454e-07, "logits/chosen": -1.197122573852539, "logits/rejected": -1.2733999490737915, "logps/chosen": -96.97772216796875, "logps/rejected": -108.40806579589844, "loss": 0.6369, "rewards/accuracies": 1.0, "rewards/chosen": 0.42491811513900757, "rewards/margins": 0.33811426162719727, "rewards/rejected": 0.08680381625890732, "step": 349 }, { "epoch": 0.56, "learning_rate": 3.1194295900178254e-07, "logits/chosen": -1.389410376548767, "logits/rejected": -1.4083772897720337, "logps/chosen": -79.16168212890625, "logps/rejected": -74.05593872070312, "loss": 0.6676, "rewards/accuracies": 0.5, "rewards/chosen": 0.1258258819580078, "rewards/margins": 0.1736762970685959, "rewards/rejected": -0.04785041883587837, "step": 350 }, { "epoch": 0.56, "learning_rate": 3.1283422459893043e-07, "logits/chosen": -1.2818942070007324, "logits/rejected": -1.251924991607666, "logps/chosen": -100.95098876953125, "logps/rejected": -86.05398559570312, "loss": 0.6439, "rewards/accuracies": 1.0, "rewards/chosen": 0.21145668625831604, "rewards/margins": 0.4483541250228882, "rewards/rejected": -0.23689746856689453, "step": 351 }, { "epoch": 0.57, "learning_rate": 3.1372549019607843e-07, "logits/chosen": -1.2242491245269775, "logits/rejected": -1.2055611610412598, "logps/chosen": -93.73908996582031, "logps/rejected": -91.90907287597656, "loss": 0.6419, "rewards/accuracies": 0.75, "rewards/chosen": 0.2045745849609375, "rewards/margins": 0.07897510379552841, "rewards/rejected": 0.1255994737148285, "step": 352 }, { "epoch": 0.57, "learning_rate": 3.1461675579322637e-07, "logits/chosen": -1.1034070253372192, "logits/rejected": -1.047081708908081, "logps/chosen": -95.1728515625, "logps/rejected": -87.18228149414062, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.05260887369513512, "rewards/margins": -0.03103962168097496, "rewards/rejected": 0.08364849537611008, "step": 353 }, { "epoch": 0.57, "learning_rate": 3.155080213903743e-07, "logits/chosen": -1.3019390106201172, "logits/rejected": -1.3352599143981934, "logps/chosen": -92.92581176757812, "logps/rejected": -76.58450317382812, "loss": 0.6532, "rewards/accuracies": 0.5, "rewards/chosen": 0.2540329694747925, "rewards/margins": 0.22556601464748383, "rewards/rejected": 0.028466984629631042, "step": 354 }, { "epoch": 0.57, "learning_rate": 3.1639928698752226e-07, "logits/chosen": -1.0122573375701904, "logits/rejected": -1.0527276992797852, "logps/chosen": -80.72872924804688, "logps/rejected": -92.98417663574219, "loss": 0.6332, "rewards/accuracies": 1.0, "rewards/chosen": 0.10311374813318253, "rewards/margins": 0.11633892357349396, "rewards/rejected": -0.013225173577666283, "step": 355 }, { "epoch": 0.57, "learning_rate": 3.172905525846702e-07, "logits/chosen": -1.4666850566864014, "logits/rejected": -1.3761698007583618, "logps/chosen": -82.38252258300781, "logps/rejected": -90.35575866699219, "loss": 0.6527, "rewards/accuracies": 0.75, "rewards/chosen": -0.08907909691333771, "rewards/margins": 0.219102680683136, "rewards/rejected": -0.3081817626953125, "step": 356 }, { "epoch": 0.57, "learning_rate": 3.1818181818181815e-07, "logits/chosen": -1.2391200065612793, "logits/rejected": -1.1638773679733276, "logps/chosen": -82.595703125, "logps/rejected": -92.2404556274414, "loss": 0.6453, "rewards/accuracies": 1.0, "rewards/chosen": 0.1964838057756424, "rewards/margins": 0.11183013021945953, "rewards/rejected": 0.08465366065502167, "step": 357 }, { "epoch": 0.57, "learning_rate": 3.1907308377896615e-07, "logits/chosen": -1.2415614128112793, "logits/rejected": -1.3072352409362793, "logps/chosen": -82.42324829101562, "logps/rejected": -79.86811828613281, "loss": 0.6147, "rewards/accuracies": 1.0, "rewards/chosen": 0.3321830928325653, "rewards/margins": 0.3355535566806793, "rewards/rejected": -0.003370479680597782, "step": 358 }, { "epoch": 0.58, "learning_rate": 3.1996434937611404e-07, "logits/chosen": -1.4379420280456543, "logits/rejected": -1.4619537591934204, "logps/chosen": -96.29267883300781, "logps/rejected": -99.57647705078125, "loss": 0.6722, "rewards/accuracies": 0.5, "rewards/chosen": -0.10045166313648224, "rewards/margins": -0.1403556764125824, "rewards/rejected": 0.039904020726680756, "step": 359 }, { "epoch": 0.58, "learning_rate": 3.2085561497326203e-07, "logits/chosen": -1.025461196899414, "logits/rejected": -0.9652552604675293, "logps/chosen": -66.25369262695312, "logps/rejected": -86.99246215820312, "loss": 0.6554, "rewards/accuracies": 0.75, "rewards/chosen": 0.09439974278211594, "rewards/margins": 0.05792675167322159, "rewards/rejected": 0.03647299110889435, "step": 360 }, { "epoch": 0.58, "learning_rate": 3.2174688057041e-07, "logits/chosen": -1.1521224975585938, "logits/rejected": -1.2380375862121582, "logps/chosen": -97.18840789794922, "logps/rejected": -99.73968505859375, "loss": 0.6752, "rewards/accuracies": 0.0, "rewards/chosen": -0.04388751834630966, "rewards/margins": -0.15880241990089417, "rewards/rejected": 0.1149148941040039, "step": 361 }, { "epoch": 0.58, "learning_rate": 3.226381461675579e-07, "logits/chosen": -1.208679437637329, "logits/rejected": -1.185349702835083, "logps/chosen": -72.53080749511719, "logps/rejected": -80.53642272949219, "loss": 0.661, "rewards/accuracies": 1.0, "rewards/chosen": 0.2304014265537262, "rewards/margins": 0.2845538258552551, "rewards/rejected": -0.05415239557623863, "step": 362 }, { "epoch": 0.58, "learning_rate": 3.2352941176470586e-07, "logits/chosen": -1.336916446685791, "logits/rejected": -1.3191590309143066, "logps/chosen": -94.59489440917969, "logps/rejected": -86.10430908203125, "loss": 0.6473, "rewards/accuracies": 0.75, "rewards/chosen": -0.11104259639978409, "rewards/margins": 0.09010010212659836, "rewards/rejected": -0.20114269852638245, "step": 363 }, { "epoch": 0.58, "learning_rate": 3.244206773618538e-07, "logits/chosen": -1.2220344543457031, "logits/rejected": -1.233252763748169, "logps/chosen": -68.93061828613281, "logps/rejected": -66.35883331298828, "loss": 0.6541, "rewards/accuracies": 0.75, "rewards/chosen": 0.07871419191360474, "rewards/margins": 0.13881589472293854, "rewards/rejected": -0.0601017028093338, "step": 364 }, { "epoch": 0.59, "learning_rate": 3.2531194295900175e-07, "logits/chosen": -1.2429455518722534, "logits/rejected": -1.1710143089294434, "logps/chosen": -85.29800415039062, "logps/rejected": -92.43489837646484, "loss": 0.6475, "rewards/accuracies": 0.5, "rewards/chosen": -0.0367710143327713, "rewards/margins": -0.1528114378452301, "rewards/rejected": 0.1160404235124588, "step": 365 }, { "epoch": 0.59, "learning_rate": 3.2620320855614975e-07, "logits/chosen": -1.3044629096984863, "logits/rejected": -1.3333033323287964, "logps/chosen": -68.10717010498047, "logps/rejected": -60.813785552978516, "loss": 0.6339, "rewards/accuracies": 1.0, "rewards/chosen": 0.2690240740776062, "rewards/margins": 0.3610461354255676, "rewards/rejected": -0.09202203899621964, "step": 366 }, { "epoch": 0.59, "learning_rate": 3.2709447415329764e-07, "logits/chosen": -1.2491205930709839, "logits/rejected": -1.2234408855438232, "logps/chosen": -89.05366516113281, "logps/rejected": -91.87873840332031, "loss": 0.6682, "rewards/accuracies": 0.25, "rewards/chosen": 0.10182952880859375, "rewards/margins": -0.040028758347034454, "rewards/rejected": 0.1418582946062088, "step": 367 }, { "epoch": 0.59, "learning_rate": 3.2798573975044564e-07, "logits/chosen": -1.2973251342773438, "logits/rejected": -1.288782000541687, "logps/chosen": -101.07843017578125, "logps/rejected": -106.12442016601562, "loss": 0.6715, "rewards/accuracies": 0.5, "rewards/chosen": 0.15144634246826172, "rewards/margins": -0.18129310011863708, "rewards/rejected": 0.3327394425868988, "step": 368 }, { "epoch": 0.59, "learning_rate": 3.2887700534759353e-07, "logits/chosen": -1.32097589969635, "logits/rejected": -1.3288911581039429, "logps/chosen": -83.82524108886719, "logps/rejected": -89.81623840332031, "loss": 0.6705, "rewards/accuracies": 0.25, "rewards/chosen": 0.13176709413528442, "rewards/margins": -0.039643287658691406, "rewards/rejected": 0.17141036689281464, "step": 369 }, { "epoch": 0.59, "learning_rate": 3.297682709447415e-07, "logits/chosen": -1.2945737838745117, "logits/rejected": -1.3219444751739502, "logps/chosen": -81.30706787109375, "logps/rejected": -67.80621337890625, "loss": 0.6445, "rewards/accuracies": 0.75, "rewards/chosen": 0.1486591398715973, "rewards/margins": 0.10006695240736008, "rewards/rejected": 0.04859218746423721, "step": 370 }, { "epoch": 0.6, "learning_rate": 3.3065953654188947e-07, "logits/chosen": -1.2011052370071411, "logits/rejected": -1.3189787864685059, "logps/chosen": -83.37198638916016, "logps/rejected": -86.59442901611328, "loss": 0.6368, "rewards/accuracies": 0.5, "rewards/chosen": 0.03623456880450249, "rewards/margins": 0.02062254212796688, "rewards/rejected": 0.015612030401825905, "step": 371 }, { "epoch": 0.6, "learning_rate": 3.315508021390374e-07, "logits/chosen": -1.1101040840148926, "logits/rejected": -1.2037853002548218, "logps/chosen": -90.63269805908203, "logps/rejected": -104.23745727539062, "loss": 0.6407, "rewards/accuracies": 0.5, "rewards/chosen": -0.013490297831594944, "rewards/margins": 0.019208140671253204, "rewards/rejected": -0.032698437571525574, "step": 372 }, { "epoch": 0.6, "learning_rate": 3.3244206773618536e-07, "logits/chosen": -1.4860928058624268, "logits/rejected": -1.4265408515930176, "logps/chosen": -55.30668640136719, "logps/rejected": -53.76654052734375, "loss": 0.6303, "rewards/accuracies": 0.5, "rewards/chosen": -0.069188691675663, "rewards/margins": -0.09972162544727325, "rewards/rejected": 0.03053293190896511, "step": 373 }, { "epoch": 0.6, "learning_rate": 3.333333333333333e-07, "logits/chosen": -1.3011339902877808, "logits/rejected": -1.298499345779419, "logps/chosen": -85.10960388183594, "logps/rejected": -76.63179016113281, "loss": 0.6355, "rewards/accuracies": 0.5, "rewards/chosen": 0.06693020462989807, "rewards/margins": 0.14191971719264984, "rewards/rejected": -0.07498951256275177, "step": 374 }, { "epoch": 0.6, "learning_rate": 3.342245989304813e-07, "logits/chosen": -1.3101189136505127, "logits/rejected": -1.1973093748092651, "logps/chosen": -88.72516632080078, "logps/rejected": -98.17828369140625, "loss": 0.6462, "rewards/accuracies": 0.75, "rewards/chosen": 0.3494857847690582, "rewards/margins": 0.3560778796672821, "rewards/rejected": -0.006592085584998131, "step": 375 }, { "epoch": 0.6, "learning_rate": 3.3511586452762924e-07, "logits/chosen": -1.1811562776565552, "logits/rejected": -1.166032314300537, "logps/chosen": -76.493896484375, "logps/rejected": -82.70040893554688, "loss": 0.7138, "rewards/accuracies": 0.5, "rewards/chosen": 0.057799145579338074, "rewards/margins": 0.02832164615392685, "rewards/rejected": 0.029477499425411224, "step": 376 }, { "epoch": 0.61, "learning_rate": 3.360071301247772e-07, "logits/chosen": -1.093819499015808, "logits/rejected": -1.0885708332061768, "logps/chosen": -69.27837371826172, "logps/rejected": -78.768798828125, "loss": 0.684, "rewards/accuracies": 0.5, "rewards/chosen": 0.04837637022137642, "rewards/margins": 0.1414116770029068, "rewards/rejected": -0.09303531795740128, "step": 377 }, { "epoch": 0.61, "learning_rate": 3.3689839572192513e-07, "logits/chosen": -1.0821808576583862, "logits/rejected": -1.220176100730896, "logps/chosen": -86.40784454345703, "logps/rejected": -76.07899475097656, "loss": 0.608, "rewards/accuracies": 0.75, "rewards/chosen": 0.01283950824290514, "rewards/margins": 0.0928201675415039, "rewards/rejected": -0.07998065650463104, "step": 378 }, { "epoch": 0.61, "learning_rate": 3.3778966131907307e-07, "logits/chosen": -1.3450623750686646, "logits/rejected": -1.2655870914459229, "logps/chosen": -86.39474487304688, "logps/rejected": -88.1534194946289, "loss": 0.6891, "rewards/accuracies": 0.75, "rewards/chosen": 0.19252319633960724, "rewards/margins": 0.10598678886890411, "rewards/rejected": 0.08653640747070312, "step": 379 }, { "epoch": 0.61, "learning_rate": 3.38680926916221e-07, "logits/chosen": -1.100825548171997, "logits/rejected": -1.1182647943496704, "logps/chosen": -81.43254852294922, "logps/rejected": -79.96047973632812, "loss": 0.6812, "rewards/accuracies": 1.0, "rewards/chosen": 0.0992332473397255, "rewards/margins": 0.2478349804878235, "rewards/rejected": -0.1486017256975174, "step": 380 }, { "epoch": 0.61, "learning_rate": 3.39572192513369e-07, "logits/chosen": -1.1832740306854248, "logits/rejected": -1.0460376739501953, "logps/chosen": -110.68850708007812, "logps/rejected": -83.01714324951172, "loss": 0.6388, "rewards/accuracies": 1.0, "rewards/chosen": 0.13920727372169495, "rewards/margins": 0.2141868621110916, "rewards/rejected": -0.07497958838939667, "step": 381 }, { "epoch": 0.61, "learning_rate": 3.404634581105169e-07, "logits/chosen": -1.2313740253448486, "logits/rejected": -1.2706027030944824, "logps/chosen": -67.66912078857422, "logps/rejected": -82.19145965576172, "loss": 0.6094, "rewards/accuracies": 1.0, "rewards/chosen": 0.11870632320642471, "rewards/margins": 0.3881695866584778, "rewards/rejected": -0.2694632411003113, "step": 382 }, { "epoch": 0.61, "learning_rate": 3.413547237076649e-07, "logits/chosen": -1.3749035596847534, "logits/rejected": -1.3463488817214966, "logps/chosen": -87.93421936035156, "logps/rejected": -92.43940734863281, "loss": 0.6267, "rewards/accuracies": 0.75, "rewards/chosen": -0.0707298293709755, "rewards/margins": 0.17520791292190552, "rewards/rejected": -0.24593773484230042, "step": 383 }, { "epoch": 0.62, "learning_rate": 3.422459893048128e-07, "logits/chosen": -1.3163602352142334, "logits/rejected": -1.2924959659576416, "logps/chosen": -81.60529327392578, "logps/rejected": -74.29386138916016, "loss": 0.6752, "rewards/accuracies": 0.75, "rewards/chosen": 0.12190552055835724, "rewards/margins": 0.3265104591846466, "rewards/rejected": -0.20460492372512817, "step": 384 }, { "epoch": 0.62, "learning_rate": 3.431372549019608e-07, "logits/chosen": -1.3704231977462769, "logits/rejected": -1.4214121103286743, "logps/chosen": -97.04386901855469, "logps/rejected": -94.88743591308594, "loss": 0.6235, "rewards/accuracies": 1.0, "rewards/chosen": 0.1238035261631012, "rewards/margins": 0.15635471045970917, "rewards/rejected": -0.03255119174718857, "step": 385 }, { "epoch": 0.62, "learning_rate": 3.4402852049910873e-07, "logits/chosen": -1.38521409034729, "logits/rejected": -1.3705888986587524, "logps/chosen": -71.66741943359375, "logps/rejected": -98.85029602050781, "loss": 0.616, "rewards/accuracies": 1.0, "rewards/chosen": 0.14586620032787323, "rewards/margins": 0.3557409346103668, "rewards/rejected": -0.2098747342824936, "step": 386 }, { "epoch": 0.62, "learning_rate": 3.449197860962567e-07, "logits/chosen": -1.3385391235351562, "logits/rejected": -1.2553272247314453, "logps/chosen": -94.73194885253906, "logps/rejected": -103.12027740478516, "loss": 0.6515, "rewards/accuracies": 1.0, "rewards/chosen": 0.29380762577056885, "rewards/margins": 0.25849783420562744, "rewards/rejected": 0.035309791564941406, "step": 387 }, { "epoch": 0.62, "learning_rate": 3.458110516934046e-07, "logits/chosen": -1.2103568315505981, "logits/rejected": -1.2251718044281006, "logps/chosen": -68.93494415283203, "logps/rejected": -112.6019287109375, "loss": 0.7005, "rewards/accuracies": 0.5, "rewards/chosen": 0.017379092052578926, "rewards/margins": 0.02281179279088974, "rewards/rejected": -0.0054327016696333885, "step": 388 }, { "epoch": 0.62, "learning_rate": 3.4670231729055257e-07, "logits/chosen": -1.2033101320266724, "logits/rejected": -1.172431230545044, "logps/chosen": -95.48100280761719, "logps/rejected": -90.35955810546875, "loss": 0.634, "rewards/accuracies": 0.5, "rewards/chosen": -0.012022022157907486, "rewards/margins": 0.08636283129453659, "rewards/rejected": -0.09838485717773438, "step": 389 }, { "epoch": 0.63, "learning_rate": 3.475935828877005e-07, "logits/chosen": -1.1878324747085571, "logits/rejected": -1.1600477695465088, "logps/chosen": -117.0290298461914, "logps/rejected": -121.62348937988281, "loss": 0.6994, "rewards/accuracies": 0.75, "rewards/chosen": 0.2388772964477539, "rewards/margins": 0.18195724487304688, "rewards/rejected": 0.05692005529999733, "step": 390 }, { "epoch": 0.63, "learning_rate": 3.484848484848485e-07, "logits/chosen": -1.1555097103118896, "logits/rejected": -1.1768207550048828, "logps/chosen": -65.32601928710938, "logps/rejected": -85.47814178466797, "loss": 0.6526, "rewards/accuracies": 0.5, "rewards/chosen": 0.15381231904029846, "rewards/margins": -0.003842446953058243, "rewards/rejected": 0.1576547771692276, "step": 391 }, { "epoch": 0.63, "learning_rate": 3.493761140819964e-07, "logits/chosen": -1.2543988227844238, "logits/rejected": -1.165090560913086, "logps/chosen": -75.44717407226562, "logps/rejected": -73.69781494140625, "loss": 0.6549, "rewards/accuracies": 0.5, "rewards/chosen": 0.17111997306346893, "rewards/margins": 0.2362898886203766, "rewards/rejected": -0.06516990065574646, "step": 392 }, { "epoch": 0.63, "learning_rate": 3.502673796791444e-07, "logits/chosen": -1.2911741733551025, "logits/rejected": -1.3765945434570312, "logps/chosen": -72.8716049194336, "logps/rejected": -76.04766845703125, "loss": 0.644, "rewards/accuracies": 1.0, "rewards/chosen": 0.09749536216259003, "rewards/margins": 0.11990642547607422, "rewards/rejected": -0.022411061450839043, "step": 393 }, { "epoch": 0.63, "learning_rate": 3.511586452762923e-07, "logits/chosen": -1.1218092441558838, "logits/rejected": -1.1795790195465088, "logps/chosen": -75.23165893554688, "logps/rejected": -100.90333557128906, "loss": 0.702, "rewards/accuracies": 0.75, "rewards/chosen": 0.07753906399011612, "rewards/margins": 0.23031121492385864, "rewards/rejected": -0.15277214348316193, "step": 394 }, { "epoch": 0.63, "learning_rate": 3.520499108734403e-07, "logits/chosen": -1.2529432773590088, "logits/rejected": -1.1057623624801636, "logps/chosen": -87.33377075195312, "logps/rejected": -77.89424896240234, "loss": 0.6006, "rewards/accuracies": 0.5, "rewards/chosen": 0.6743290424346924, "rewards/margins": 0.674172043800354, "rewards/rejected": 0.00015697465278208256, "step": 395 }, { "epoch": 0.64, "learning_rate": 3.529411764705882e-07, "logits/chosen": -1.3106813430786133, "logits/rejected": -1.2821837663650513, "logps/chosen": -105.39674377441406, "logps/rejected": -103.61710357666016, "loss": 0.5822, "rewards/accuracies": 0.75, "rewards/chosen": 0.3019241392612457, "rewards/margins": 0.19212952256202698, "rewards/rejected": 0.10979461669921875, "step": 396 }, { "epoch": 0.64, "learning_rate": 3.5383244206773617e-07, "logits/chosen": -1.1724590063095093, "logits/rejected": -1.095735788345337, "logps/chosen": -110.49842834472656, "logps/rejected": -98.04237365722656, "loss": 0.606, "rewards/accuracies": 1.0, "rewards/chosen": 0.21506786346435547, "rewards/margins": 0.3884463906288147, "rewards/rejected": -0.17337855696678162, "step": 397 }, { "epoch": 0.64, "learning_rate": 3.547237076648841e-07, "logits/chosen": -1.5231051445007324, "logits/rejected": -1.5002015829086304, "logps/chosen": -100.35980224609375, "logps/rejected": -96.23614501953125, "loss": 0.6705, "rewards/accuracies": 0.5, "rewards/chosen": -0.15456752479076385, "rewards/margins": -0.07866973429918289, "rewards/rejected": -0.07589778304100037, "step": 398 }, { "epoch": 0.64, "learning_rate": 3.5561497326203206e-07, "logits/chosen": -1.1538028717041016, "logits/rejected": -1.2506217956542969, "logps/chosen": -95.0109634399414, "logps/rejected": -84.59688568115234, "loss": 0.6128, "rewards/accuracies": 0.75, "rewards/chosen": 0.07230892777442932, "rewards/margins": 0.1273467093706131, "rewards/rejected": -0.05503778159618378, "step": 399 }, { "epoch": 0.64, "learning_rate": 3.5650623885918e-07, "logits/chosen": -1.2661176919937134, "logits/rejected": -1.233147144317627, "logps/chosen": -96.84146881103516, "logps/rejected": -102.0416259765625, "loss": 0.5738, "rewards/accuracies": 1.0, "rewards/chosen": 0.21052856743335724, "rewards/margins": 0.5785010457038879, "rewards/rejected": -0.3679724633693695, "step": 400 }, { "epoch": 0.64, "learning_rate": 3.57397504456328e-07, "logits/chosen": -1.133629322052002, "logits/rejected": -1.1447527408599854, "logps/chosen": -92.78630065917969, "logps/rejected": -103.00799560546875, "loss": 0.5324, "rewards/accuracies": 0.25, "rewards/chosen": 0.054760172963142395, "rewards/margins": -0.021310605108737946, "rewards/rejected": 0.07607078552246094, "step": 401 }, { "epoch": 0.65, "learning_rate": 3.582887700534759e-07, "logits/chosen": -1.2577345371246338, "logits/rejected": -1.1366872787475586, "logps/chosen": -71.67396545410156, "logps/rejected": -97.9654312133789, "loss": 0.6239, "rewards/accuracies": 0.75, "rewards/chosen": 0.5582008361816406, "rewards/margins": 0.511823832988739, "rewards/rejected": 0.04637698829174042, "step": 402 }, { "epoch": 0.65, "learning_rate": 3.591800356506239e-07, "logits/chosen": -1.2527836561203003, "logits/rejected": -1.2571171522140503, "logps/chosen": -102.89054870605469, "logps/rejected": -101.0600357055664, "loss": 0.6907, "rewards/accuracies": 0.25, "rewards/chosen": -0.1261884719133377, "rewards/margins": -0.23575058579444885, "rewards/rejected": 0.10956211388111115, "step": 403 }, { "epoch": 0.65, "learning_rate": 3.600713012477718e-07, "logits/chosen": -1.4421614408493042, "logits/rejected": -1.4135040044784546, "logps/chosen": -64.36327362060547, "logps/rejected": -71.80448150634766, "loss": 0.7028, "rewards/accuracies": 0.25, "rewards/chosen": -0.03604850918054581, "rewards/margins": -0.05248910188674927, "rewards/rejected": 0.016440588980913162, "step": 404 }, { "epoch": 0.65, "learning_rate": 3.609625668449198e-07, "logits/chosen": -1.1047401428222656, "logits/rejected": -1.0041768550872803, "logps/chosen": -77.09197998046875, "logps/rejected": -82.11797332763672, "loss": 0.6283, "rewards/accuracies": 1.0, "rewards/chosen": 0.280493825674057, "rewards/margins": 0.32525700330734253, "rewards/rejected": -0.04476318508386612, "step": 405 }, { "epoch": 0.65, "learning_rate": 3.618538324420677e-07, "logits/chosen": -1.2029763460159302, "logits/rejected": -1.2655439376831055, "logps/chosen": -79.59333801269531, "logps/rejected": -80.0947036743164, "loss": 0.621, "rewards/accuracies": 1.0, "rewards/chosen": 0.16619901359081268, "rewards/margins": 0.24877768754959106, "rewards/rejected": -0.08257866650819778, "step": 406 }, { "epoch": 0.65, "learning_rate": 3.6274509803921566e-07, "logits/chosen": -1.3102222681045532, "logits/rejected": -1.3523752689361572, "logps/chosen": -79.61971282958984, "logps/rejected": -80.59629821777344, "loss": 0.7123, "rewards/accuracies": 0.75, "rewards/chosen": -0.017102908343076706, "rewards/margins": 0.08415079116821289, "rewards/rejected": -0.1012537032365799, "step": 407 }, { "epoch": 0.65, "learning_rate": 3.636363636363636e-07, "logits/chosen": -1.329397439956665, "logits/rejected": -1.2707304954528809, "logps/chosen": -99.12361907958984, "logps/rejected": -82.02562713623047, "loss": 0.7147, "rewards/accuracies": 0.0, "rewards/chosen": -0.25493907928466797, "rewards/margins": -0.4594806730747223, "rewards/rejected": 0.20454159379005432, "step": 408 }, { "epoch": 0.66, "learning_rate": 3.6452762923351155e-07, "logits/chosen": -1.2714667320251465, "logits/rejected": -1.2921520471572876, "logps/chosen": -71.1310806274414, "logps/rejected": -68.48872375488281, "loss": 0.5946, "rewards/accuracies": 0.75, "rewards/chosen": 0.1605023443698883, "rewards/margins": 0.42172977328300476, "rewards/rejected": -0.26122742891311646, "step": 409 }, { "epoch": 0.66, "learning_rate": 3.654188948306595e-07, "logits/chosen": -1.5152530670166016, "logits/rejected": -1.5307106971740723, "logps/chosen": -98.88082885742188, "logps/rejected": -106.0846176147461, "loss": 0.5964, "rewards/accuracies": 0.75, "rewards/chosen": -0.06149701774120331, "rewards/margins": 0.42133378982543945, "rewards/rejected": -0.48283082246780396, "step": 410 }, { "epoch": 0.66, "learning_rate": 3.663101604278075e-07, "logits/chosen": -1.2989656925201416, "logits/rejected": -1.331050157546997, "logps/chosen": -90.549072265625, "logps/rejected": -105.76593017578125, "loss": 0.6343, "rewards/accuracies": 0.25, "rewards/chosen": -0.23155003786087036, "rewards/margins": -0.1511373519897461, "rewards/rejected": -0.08041267096996307, "step": 411 }, { "epoch": 0.66, "learning_rate": 3.672014260249554e-07, "logits/chosen": -1.0633184909820557, "logits/rejected": -1.1093069314956665, "logps/chosen": -78.18463134765625, "logps/rejected": -68.89715576171875, "loss": 0.6547, "rewards/accuracies": 0.75, "rewards/chosen": -0.002037622034549713, "rewards/margins": 0.14608974754810333, "rewards/rejected": -0.14812736213207245, "step": 412 }, { "epoch": 0.66, "learning_rate": 3.680926916221034e-07, "logits/chosen": -1.3019121885299683, "logits/rejected": -1.237338900566101, "logps/chosen": -71.09500122070312, "logps/rejected": -81.72965240478516, "loss": 0.6335, "rewards/accuracies": 0.75, "rewards/chosen": 0.18916082382202148, "rewards/margins": 0.1384078860282898, "rewards/rejected": 0.050752922892570496, "step": 413 }, { "epoch": 0.66, "learning_rate": 3.689839572192513e-07, "logits/chosen": -1.277834177017212, "logits/rejected": -1.2826359272003174, "logps/chosen": -66.15577697753906, "logps/rejected": -72.5907974243164, "loss": 0.706, "rewards/accuracies": 0.5, "rewards/chosen": 0.0682130828499794, "rewards/margins": 0.07889804989099503, "rewards/rejected": -0.010684968903660774, "step": 414 }, { "epoch": 0.67, "learning_rate": 3.6987522281639927e-07, "logits/chosen": -1.239984393119812, "logits/rejected": -1.2482647895812988, "logps/chosen": -80.39286804199219, "logps/rejected": -77.93550109863281, "loss": 0.6302, "rewards/accuracies": 0.5, "rewards/chosen": 0.028056718409061432, "rewards/margins": -0.08626452833414078, "rewards/rejected": 0.11432123184204102, "step": 415 }, { "epoch": 0.67, "learning_rate": 3.7076648841354726e-07, "logits/chosen": -1.0962185859680176, "logits/rejected": -1.1565070152282715, "logps/chosen": -110.23213195800781, "logps/rejected": -97.54344177246094, "loss": 0.6743, "rewards/accuracies": 1.0, "rewards/chosen": 0.16205865144729614, "rewards/margins": 0.3520887494087219, "rewards/rejected": -0.19003009796142578, "step": 416 }, { "epoch": 0.67, "learning_rate": 3.7165775401069515e-07, "logits/chosen": -1.1722536087036133, "logits/rejected": -1.2589123249053955, "logps/chosen": -82.18974304199219, "logps/rejected": -96.46875, "loss": 0.6358, "rewards/accuracies": 0.25, "rewards/chosen": -0.019259454682469368, "rewards/margins": -0.16217099130153656, "rewards/rejected": 0.14291153848171234, "step": 417 }, { "epoch": 0.67, "learning_rate": 3.7254901960784315e-07, "logits/chosen": -1.235077142715454, "logits/rejected": -1.342763066291809, "logps/chosen": -78.13264465332031, "logps/rejected": -71.77613830566406, "loss": 0.6382, "rewards/accuracies": 1.0, "rewards/chosen": -0.07785630226135254, "rewards/margins": 0.30776700377464294, "rewards/rejected": -0.3856232464313507, "step": 418 }, { "epoch": 0.67, "learning_rate": 3.7344028520499104e-07, "logits/chosen": -0.9298986196517944, "logits/rejected": -0.9937185049057007, "logps/chosen": -85.10452270507812, "logps/rejected": -75.92705535888672, "loss": 0.6695, "rewards/accuracies": 0.25, "rewards/chosen": -0.20175619423389435, "rewards/margins": -0.21358099579811096, "rewards/rejected": 0.011824799701571465, "step": 419 }, { "epoch": 0.67, "learning_rate": 3.7433155080213904e-07, "logits/chosen": -1.2044453620910645, "logits/rejected": -1.169341802597046, "logps/chosen": -75.73751831054688, "logps/rejected": -65.20587921142578, "loss": 0.6217, "rewards/accuracies": 0.5, "rewards/chosen": -0.1165313720703125, "rewards/margins": 0.02438211441040039, "rewards/rejected": -0.1409134864807129, "step": 420 }, { "epoch": 0.68, "learning_rate": 3.75222816399287e-07, "logits/chosen": -1.0531060695648193, "logits/rejected": -1.1608250141143799, "logps/chosen": -92.49887084960938, "logps/rejected": -80.49767303466797, "loss": 0.6618, "rewards/accuracies": 0.25, "rewards/chosen": -0.09879360347986221, "rewards/margins": -0.23068523406982422, "rewards/rejected": 0.1318916231393814, "step": 421 }, { "epoch": 0.68, "learning_rate": 3.761140819964349e-07, "logits/chosen": -1.084995150566101, "logits/rejected": -1.0075992345809937, "logps/chosen": -92.8894271850586, "logps/rejected": -86.93257904052734, "loss": 0.5995, "rewards/accuracies": 1.0, "rewards/chosen": 0.343304842710495, "rewards/margins": 0.4756888449192047, "rewards/rejected": -0.1323840171098709, "step": 422 }, { "epoch": 0.68, "learning_rate": 3.7700534759358287e-07, "logits/chosen": -0.9901492595672607, "logits/rejected": -1.0132852792739868, "logps/chosen": -96.18614196777344, "logps/rejected": -100.50067901611328, "loss": 0.6225, "rewards/accuracies": 0.25, "rewards/chosen": 0.02076416090130806, "rewards/margins": -0.06052512675523758, "rewards/rejected": 0.08128928393125534, "step": 423 }, { "epoch": 0.68, "learning_rate": 3.778966131907308e-07, "logits/chosen": -1.2984669208526611, "logits/rejected": -1.2945114374160767, "logps/chosen": -97.54997253417969, "logps/rejected": -99.01551055908203, "loss": 0.5635, "rewards/accuracies": 1.0, "rewards/chosen": 0.029336359351873398, "rewards/margins": 0.4102731943130493, "rewards/rejected": -0.38093680143356323, "step": 424 }, { "epoch": 0.68, "learning_rate": 3.7878787878787876e-07, "logits/chosen": -1.296586513519287, "logits/rejected": -1.3621975183486938, "logps/chosen": -63.68661117553711, "logps/rejected": -65.74790954589844, "loss": 0.6138, "rewards/accuracies": 0.75, "rewards/chosen": 0.2702306807041168, "rewards/margins": 0.534076988697052, "rewards/rejected": -0.2638463079929352, "step": 425 }, { "epoch": 0.68, "learning_rate": 3.7967914438502675e-07, "logits/chosen": -1.231501579284668, "logits/rejected": -1.154856562614441, "logps/chosen": -101.69435119628906, "logps/rejected": -89.25820922851562, "loss": 0.6063, "rewards/accuracies": 0.75, "rewards/chosen": 0.2861669659614563, "rewards/margins": 0.4406183362007141, "rewards/rejected": -0.1544513702392578, "step": 426 }, { "epoch": 0.69, "learning_rate": 3.8057040998217465e-07, "logits/chosen": -1.1180994510650635, "logits/rejected": -1.1067962646484375, "logps/chosen": -76.10966491699219, "logps/rejected": -89.00008392333984, "loss": 0.6789, "rewards/accuracies": 0.75, "rewards/chosen": 0.046874530613422394, "rewards/margins": 0.16661272943019867, "rewards/rejected": -0.11973819881677628, "step": 427 }, { "epoch": 0.69, "learning_rate": 3.8146167557932264e-07, "logits/chosen": -1.363166332244873, "logits/rejected": -1.322693943977356, "logps/chosen": -83.13065338134766, "logps/rejected": -82.67440795898438, "loss": 0.6405, "rewards/accuracies": 0.75, "rewards/chosen": 0.16470997035503387, "rewards/margins": 0.32157471776008606, "rewards/rejected": -0.156864732503891, "step": 428 }, { "epoch": 0.69, "learning_rate": 3.8235294117647053e-07, "logits/chosen": -1.310354471206665, "logits/rejected": -1.2848073244094849, "logps/chosen": -88.2528305053711, "logps/rejected": -97.04901885986328, "loss": 0.5913, "rewards/accuracies": 0.5, "rewards/chosen": 0.14081469178199768, "rewards/margins": 0.4230685234069824, "rewards/rejected": -0.28225386142730713, "step": 429 }, { "epoch": 0.69, "learning_rate": 3.8324420677361853e-07, "logits/chosen": -1.2690279483795166, "logits/rejected": -1.2807759046554565, "logps/chosen": -89.16944885253906, "logps/rejected": -83.23316192626953, "loss": 0.5713, "rewards/accuracies": 0.25, "rewards/chosen": 0.016077138483524323, "rewards/margins": 0.04096364974975586, "rewards/rejected": -0.024886513128876686, "step": 430 }, { "epoch": 0.69, "learning_rate": 3.841354723707665e-07, "logits/chosen": -1.2887147665023804, "logits/rejected": -1.280814528465271, "logps/chosen": -120.57044219970703, "logps/rejected": -81.70338439941406, "loss": 0.6032, "rewards/accuracies": 0.5, "rewards/chosen": 0.07986507564783096, "rewards/margins": 0.07214917987585068, "rewards/rejected": 0.007715891115367413, "step": 431 }, { "epoch": 0.69, "learning_rate": 3.850267379679144e-07, "logits/chosen": -1.2743338346481323, "logits/rejected": -1.327607274055481, "logps/chosen": -99.2079849243164, "logps/rejected": -106.66618347167969, "loss": 0.655, "rewards/accuracies": 1.0, "rewards/chosen": 0.1152595579624176, "rewards/margins": 0.24007073044776917, "rewards/rejected": -0.12481116503477097, "step": 432 }, { "epoch": 0.7, "learning_rate": 3.8591800356506236e-07, "logits/chosen": -1.2637909650802612, "logits/rejected": -1.3247734308242798, "logps/chosen": -92.75313568115234, "logps/rejected": -121.42718505859375, "loss": 0.725, "rewards/accuracies": 0.25, "rewards/chosen": -0.19493962824344635, "rewards/margins": -0.2441082000732422, "rewards/rejected": 0.04916858300566673, "step": 433 }, { "epoch": 0.7, "learning_rate": 3.868092691622103e-07, "logits/chosen": -1.3659807443618774, "logits/rejected": -1.4013768434524536, "logps/chosen": -65.14983367919922, "logps/rejected": -70.91036987304688, "loss": 0.5735, "rewards/accuracies": 1.0, "rewards/chosen": 0.2728325128555298, "rewards/margins": 0.5080000162124634, "rewards/rejected": -0.2351675182580948, "step": 434 }, { "epoch": 0.7, "learning_rate": 3.8770053475935825e-07, "logits/chosen": -1.2288713455200195, "logits/rejected": -1.2812716960906982, "logps/chosen": -89.29964447021484, "logps/rejected": -83.95532989501953, "loss": 0.6349, "rewards/accuracies": 0.25, "rewards/chosen": -0.029742244631052017, "rewards/margins": -0.03181428462266922, "rewards/rejected": 0.002072051167488098, "step": 435 }, { "epoch": 0.7, "learning_rate": 3.8859180035650625e-07, "logits/chosen": -1.312159538269043, "logits/rejected": -1.2309988737106323, "logps/chosen": -73.71180725097656, "logps/rejected": -79.66343688964844, "loss": 0.5641, "rewards/accuracies": 0.75, "rewards/chosen": 0.4514661133289337, "rewards/margins": 0.45228320360183716, "rewards/rejected": -0.0008171088993549347, "step": 436 }, { "epoch": 0.7, "learning_rate": 3.8948306595365414e-07, "logits/chosen": -1.286069631576538, "logits/rejected": -1.2134954929351807, "logps/chosen": -97.17425537109375, "logps/rejected": -81.6067886352539, "loss": 0.6285, "rewards/accuracies": 0.5, "rewards/chosen": 0.1706571727991104, "rewards/margins": 0.040442466735839844, "rewards/rejected": 0.13021469116210938, "step": 437 }, { "epoch": 0.7, "learning_rate": 3.9037433155080213e-07, "logits/chosen": -1.2278181314468384, "logits/rejected": -1.2416651248931885, "logps/chosen": -83.85308074951172, "logps/rejected": -82.41248321533203, "loss": 0.5662, "rewards/accuracies": 0.5, "rewards/chosen": -0.0012120248284190893, "rewards/margins": 0.1378270983695984, "rewards/rejected": -0.13903912901878357, "step": 438 }, { "epoch": 0.7, "learning_rate": 3.9126559714795e-07, "logits/chosen": -1.307107925415039, "logits/rejected": -1.2987736463546753, "logps/chosen": -84.60057067871094, "logps/rejected": -86.71941375732422, "loss": 0.6738, "rewards/accuracies": 0.75, "rewards/chosen": -0.07804012298583984, "rewards/margins": 0.04080143943428993, "rewards/rejected": -0.11884155124425888, "step": 439 }, { "epoch": 0.71, "learning_rate": 3.92156862745098e-07, "logits/chosen": -1.2865593433380127, "logits/rejected": -1.2043914794921875, "logps/chosen": -89.62144470214844, "logps/rejected": -85.88352966308594, "loss": 0.6085, "rewards/accuracies": 0.25, "rewards/chosen": -0.04045906662940979, "rewards/margins": -0.09667320549488068, "rewards/rejected": 0.056214142590761185, "step": 440 }, { "epoch": 0.71, "learning_rate": 3.9304812834224597e-07, "logits/chosen": -1.153688907623291, "logits/rejected": -1.2368963956832886, "logps/chosen": -65.02372741699219, "logps/rejected": -88.77371978759766, "loss": 0.6256, "rewards/accuracies": 1.0, "rewards/chosen": 0.20344524085521698, "rewards/margins": 0.6848279237747192, "rewards/rejected": -0.48138266801834106, "step": 441 }, { "epoch": 0.71, "learning_rate": 3.939393939393939e-07, "logits/chosen": -1.4094884395599365, "logits/rejected": -1.4219439029693604, "logps/chosen": -44.74273681640625, "logps/rejected": -56.57225036621094, "loss": 0.6507, "rewards/accuracies": 1.0, "rewards/chosen": 0.0022291187196969986, "rewards/margins": 0.09854794293642044, "rewards/rejected": -0.096318818628788, "step": 442 }, { "epoch": 0.71, "learning_rate": 3.9483065953654185e-07, "logits/chosen": -1.425800085067749, "logits/rejected": -1.3602354526519775, "logps/chosen": -65.93795013427734, "logps/rejected": -77.2974853515625, "loss": 0.6193, "rewards/accuracies": 0.75, "rewards/chosen": -0.1508585810661316, "rewards/margins": 0.0631929486989975, "rewards/rejected": -0.21405154466629028, "step": 443 }, { "epoch": 0.71, "learning_rate": 3.9572192513368985e-07, "logits/chosen": -1.198150634765625, "logits/rejected": -1.2567341327667236, "logps/chosen": -80.4825439453125, "logps/rejected": -83.21296691894531, "loss": 0.5793, "rewards/accuracies": 1.0, "rewards/chosen": 0.08257760852575302, "rewards/margins": 0.3871579170227051, "rewards/rejected": -0.30458030104637146, "step": 444 }, { "epoch": 0.71, "learning_rate": 3.9661319073083774e-07, "logits/chosen": -1.2651132345199585, "logits/rejected": -1.3281967639923096, "logps/chosen": -93.98489379882812, "logps/rejected": -116.48754119873047, "loss": 0.6354, "rewards/accuracies": 0.75, "rewards/chosen": -0.08730335533618927, "rewards/margins": -0.015226557850837708, "rewards/rejected": -0.07207679003477097, "step": 445 }, { "epoch": 0.72, "learning_rate": 3.9750445632798574e-07, "logits/chosen": -1.3390636444091797, "logits/rejected": -1.1928832530975342, "logps/chosen": -94.0273666381836, "logps/rejected": -72.57819366455078, "loss": 0.6415, "rewards/accuracies": 0.25, "rewards/chosen": 0.20120640099048615, "rewards/margins": -0.01834353804588318, "rewards/rejected": 0.21954993903636932, "step": 446 }, { "epoch": 0.72, "learning_rate": 3.9839572192513363e-07, "logits/chosen": -1.0589655637741089, "logits/rejected": -1.0946158170700073, "logps/chosen": -83.42378234863281, "logps/rejected": -116.17475891113281, "loss": 0.6579, "rewards/accuracies": 1.0, "rewards/chosen": 0.1473393440246582, "rewards/margins": 0.4214257597923279, "rewards/rejected": -0.2740863859653473, "step": 447 }, { "epoch": 0.72, "learning_rate": 3.9928698752228163e-07, "logits/chosen": -1.3913222551345825, "logits/rejected": -1.452187418937683, "logps/chosen": -98.67301940917969, "logps/rejected": -95.60882568359375, "loss": 0.5858, "rewards/accuracies": 0.75, "rewards/chosen": 0.32782667875289917, "rewards/margins": 0.39873600006103516, "rewards/rejected": -0.07090931385755539, "step": 448 }, { "epoch": 0.72, "learning_rate": 4.001782531194296e-07, "logits/chosen": -1.4081518650054932, "logits/rejected": -1.3985217809677124, "logps/chosen": -71.8021469116211, "logps/rejected": -99.19285583496094, "loss": 0.5986, "rewards/accuracies": 0.5, "rewards/chosen": -0.07844753563404083, "rewards/margins": -0.020090483129024506, "rewards/rejected": -0.05835704505443573, "step": 449 }, { "epoch": 0.72, "learning_rate": 4.010695187165775e-07, "logits/chosen": -1.290668249130249, "logits/rejected": -1.3075284957885742, "logps/chosen": -83.3858413696289, "logps/rejected": -91.45824432373047, "loss": 0.6105, "rewards/accuracies": 0.5, "rewards/chosen": 0.022474385797977448, "rewards/margins": -0.0019368194043636322, "rewards/rejected": 0.024411197751760483, "step": 450 }, { "epoch": 0.72, "learning_rate": 4.019607843137255e-07, "logits/chosen": -1.4386577606201172, "logits/rejected": -1.3995468616485596, "logps/chosen": -87.14105987548828, "logps/rejected": -82.3614273071289, "loss": 0.5944, "rewards/accuracies": 0.75, "rewards/chosen": 0.17357024550437927, "rewards/margins": 0.18619516491889954, "rewards/rejected": -0.012624932453036308, "step": 451 }, { "epoch": 0.73, "learning_rate": 4.028520499108734e-07, "logits/chosen": -1.0986660718917847, "logits/rejected": -0.9879285097122192, "logps/chosen": -84.15496063232422, "logps/rejected": -115.26528930664062, "loss": 0.5758, "rewards/accuracies": 0.5, "rewards/chosen": 0.11674938350915909, "rewards/margins": 0.00457992497831583, "rewards/rejected": 0.11216945946216583, "step": 452 }, { "epoch": 0.73, "learning_rate": 4.037433155080214e-07, "logits/chosen": -1.1762040853500366, "logits/rejected": -1.0985565185546875, "logps/chosen": -100.03556823730469, "logps/rejected": -88.69412231445312, "loss": 0.5683, "rewards/accuracies": 0.5, "rewards/chosen": 0.022872548550367355, "rewards/margins": 0.1401207000017166, "rewards/rejected": -0.11724815517663956, "step": 453 }, { "epoch": 0.73, "learning_rate": 4.0463458110516934e-07, "logits/chosen": -1.1346769332885742, "logits/rejected": -1.0895377397537231, "logps/chosen": -83.95707702636719, "logps/rejected": -75.14948272705078, "loss": 0.6249, "rewards/accuracies": 0.5, "rewards/chosen": 0.09833468496799469, "rewards/margins": 0.1491558849811554, "rewards/rejected": -0.0508212111890316, "step": 454 }, { "epoch": 0.73, "learning_rate": 4.055258467023173e-07, "logits/chosen": -1.2797484397888184, "logits/rejected": -1.2674553394317627, "logps/chosen": -77.8741455078125, "logps/rejected": -88.85834503173828, "loss": 0.5555, "rewards/accuracies": 0.75, "rewards/chosen": -0.013510417193174362, "rewards/margins": 0.07482538372278214, "rewards/rejected": -0.0883358046412468, "step": 455 }, { "epoch": 0.73, "learning_rate": 4.0641711229946523e-07, "logits/chosen": -1.3066165447235107, "logits/rejected": -1.2400354146957397, "logps/chosen": -78.11245727539062, "logps/rejected": -90.78754425048828, "loss": 0.6177, "rewards/accuracies": 0.5, "rewards/chosen": 0.14486083388328552, "rewards/margins": 0.484927773475647, "rewards/rejected": -0.34006690979003906, "step": 456 }, { "epoch": 0.73, "learning_rate": 4.073083778966132e-07, "logits/chosen": -1.2984991073608398, "logits/rejected": -1.298667073249817, "logps/chosen": -104.656982421875, "logps/rejected": -80.5057601928711, "loss": 0.6831, "rewards/accuracies": 0.5, "rewards/chosen": -0.021123699843883514, "rewards/margins": -0.13160055875778198, "rewards/rejected": 0.11047688126564026, "step": 457 }, { "epoch": 0.74, "learning_rate": 4.081996434937611e-07, "logits/chosen": -1.4500277042388916, "logits/rejected": -1.5118764638900757, "logps/chosen": -100.04800415039062, "logps/rejected": -86.5777816772461, "loss": 0.6482, "rewards/accuracies": 0.75, "rewards/chosen": -0.019234277307987213, "rewards/margins": 0.288046270608902, "rewards/rejected": -0.3072805404663086, "step": 458 }, { "epoch": 0.74, "learning_rate": 4.090909090909091e-07, "logits/chosen": -1.3442444801330566, "logits/rejected": -1.3969312906265259, "logps/chosen": -114.2316665649414, "logps/rejected": -106.92385864257812, "loss": 0.6321, "rewards/accuracies": 0.5, "rewards/chosen": -0.18799743056297302, "rewards/margins": -0.05285205319523811, "rewards/rejected": -0.1351453810930252, "step": 459 }, { "epoch": 0.74, "learning_rate": 4.09982174688057e-07, "logits/chosen": -1.203410267829895, "logits/rejected": -1.133986473083496, "logps/chosen": -95.15753936767578, "logps/rejected": -83.20284271240234, "loss": 0.5944, "rewards/accuracies": 0.5, "rewards/chosen": 0.23127174377441406, "rewards/margins": 0.298564076423645, "rewards/rejected": -0.06729232519865036, "step": 460 }, { "epoch": 0.74, "learning_rate": 4.10873440285205e-07, "logits/chosen": -1.2983790636062622, "logits/rejected": -1.2700847387313843, "logps/chosen": -88.56631469726562, "logps/rejected": -96.26264953613281, "loss": 0.6372, "rewards/accuracies": 0.5, "rewards/chosen": 0.2767552137374878, "rewards/margins": 0.15458115935325623, "rewards/rejected": 0.12217407673597336, "step": 461 }, { "epoch": 0.74, "learning_rate": 4.117647058823529e-07, "logits/chosen": -1.3383396863937378, "logits/rejected": -1.2539695501327515, "logps/chosen": -68.66460418701172, "logps/rejected": -77.72669982910156, "loss": 0.603, "rewards/accuracies": 0.5, "rewards/chosen": 0.29955366253852844, "rewards/margins": 0.4738039970397949, "rewards/rejected": -0.17425031960010529, "step": 462 }, { "epoch": 0.74, "learning_rate": 4.126559714795009e-07, "logits/chosen": -1.2988406419754028, "logits/rejected": -1.3252497911453247, "logps/chosen": -89.51847076416016, "logps/rejected": -86.02799987792969, "loss": 0.5326, "rewards/accuracies": 0.75, "rewards/chosen": 0.09732237458229065, "rewards/margins": 0.30294695496559143, "rewards/rejected": -0.20562458038330078, "step": 463 }, { "epoch": 0.74, "learning_rate": 4.1354723707664884e-07, "logits/chosen": -1.1332547664642334, "logits/rejected": -1.0900863409042358, "logps/chosen": -125.12483978271484, "logps/rejected": -90.96456146240234, "loss": 0.5653, "rewards/accuracies": 0.5, "rewards/chosen": -0.039594076573848724, "rewards/margins": 0.21002072095870972, "rewards/rejected": -0.24961480498313904, "step": 464 }, { "epoch": 0.75, "learning_rate": 4.144385026737968e-07, "logits/chosen": -1.2806475162506104, "logits/rejected": -1.2539416551589966, "logps/chosen": -82.32113647460938, "logps/rejected": -83.65682983398438, "loss": 0.5596, "rewards/accuracies": 1.0, "rewards/chosen": 0.14572773873806, "rewards/margins": 0.6775773763656616, "rewards/rejected": -0.5318496823310852, "step": 465 }, { "epoch": 0.75, "learning_rate": 4.153297682709447e-07, "logits/chosen": -1.2206542491912842, "logits/rejected": -1.2182166576385498, "logps/chosen": -72.96160125732422, "logps/rejected": -65.20282745361328, "loss": 0.6074, "rewards/accuracies": 0.75, "rewards/chosen": 0.2063087671995163, "rewards/margins": 0.09210057556629181, "rewards/rejected": 0.1142081767320633, "step": 466 }, { "epoch": 0.75, "learning_rate": 4.1622103386809267e-07, "logits/chosen": -1.063786506652832, "logits/rejected": -1.082587480545044, "logps/chosen": -89.14126586914062, "logps/rejected": -126.00691223144531, "loss": 0.6159, "rewards/accuracies": 0.75, "rewards/chosen": -0.02730713039636612, "rewards/margins": 0.5163610577583313, "rewards/rejected": -0.5436682105064392, "step": 467 }, { "epoch": 0.75, "learning_rate": 4.171122994652406e-07, "logits/chosen": -1.1326887607574463, "logits/rejected": -1.1859641075134277, "logps/chosen": -84.5179443359375, "logps/rejected": -94.02557373046875, "loss": 0.6589, "rewards/accuracies": 0.75, "rewards/chosen": 0.23464013636112213, "rewards/margins": 0.22856560349464417, "rewards/rejected": 0.006074526347219944, "step": 468 }, { "epoch": 0.75, "learning_rate": 4.180035650623886e-07, "logits/chosen": -1.3083617687225342, "logits/rejected": -1.2259063720703125, "logps/chosen": -69.37739562988281, "logps/rejected": -78.87149047851562, "loss": 0.579, "rewards/accuracies": 1.0, "rewards/chosen": 0.13563308119773865, "rewards/margins": 0.20477294921875, "rewards/rejected": -0.06913986057043076, "step": 469 }, { "epoch": 0.75, "learning_rate": 4.188948306595365e-07, "logits/chosen": -1.2209584712982178, "logits/rejected": -1.1905081272125244, "logps/chosen": -76.27886962890625, "logps/rejected": -81.78018951416016, "loss": 0.6121, "rewards/accuracies": 0.5, "rewards/chosen": 0.16167010366916656, "rewards/margins": 0.023904796689748764, "rewards/rejected": 0.1377653181552887, "step": 470 }, { "epoch": 0.76, "learning_rate": 4.197860962566845e-07, "logits/chosen": -1.3911468982696533, "logits/rejected": -1.398682713508606, "logps/chosen": -60.501556396484375, "logps/rejected": -68.39920806884766, "loss": 0.6455, "rewards/accuracies": 1.0, "rewards/chosen": 0.18640299141407013, "rewards/margins": 0.2992064654827118, "rewards/rejected": -0.11280346661806107, "step": 471 }, { "epoch": 0.76, "learning_rate": 4.206773618538324e-07, "logits/chosen": -1.116468906402588, "logits/rejected": -1.1297643184661865, "logps/chosen": -108.1712417602539, "logps/rejected": -87.00677490234375, "loss": 0.63, "rewards/accuracies": 0.75, "rewards/chosen": -0.10710355639457703, "rewards/margins": 0.3907243609428406, "rewards/rejected": -0.4978279173374176, "step": 472 }, { "epoch": 0.76, "learning_rate": 4.215686274509804e-07, "logits/chosen": -1.5154005289077759, "logits/rejected": -1.4778096675872803, "logps/chosen": -63.15167999267578, "logps/rejected": -72.02388000488281, "loss": 0.5746, "rewards/accuracies": 0.5, "rewards/chosen": 0.17897653579711914, "rewards/margins": 0.13677901029586792, "rewards/rejected": 0.042197518050670624, "step": 473 }, { "epoch": 0.76, "learning_rate": 4.2245989304812833e-07, "logits/chosen": -1.3389841318130493, "logits/rejected": -1.2838752269744873, "logps/chosen": -84.42916107177734, "logps/rejected": -90.39540100097656, "loss": 0.6403, "rewards/accuracies": 0.75, "rewards/chosen": -0.012149907648563385, "rewards/margins": 0.041347406804561615, "rewards/rejected": -0.053497314453125, "step": 474 }, { "epoch": 0.76, "learning_rate": 4.2335115864527627e-07, "logits/chosen": -1.3151636123657227, "logits/rejected": -1.2679822444915771, "logps/chosen": -87.32825469970703, "logps/rejected": -83.658203125, "loss": 0.578, "rewards/accuracies": 0.5, "rewards/chosen": 0.11957035958766937, "rewards/margins": 0.06148138269782066, "rewards/rejected": 0.05808897316455841, "step": 475 }, { "epoch": 0.76, "learning_rate": 4.242424242424242e-07, "logits/chosen": -1.190159797668457, "logits/rejected": -1.178831696510315, "logps/chosen": -95.5047607421875, "logps/rejected": -105.34327697753906, "loss": 0.5845, "rewards/accuracies": 0.5, "rewards/chosen": -0.12953606247901917, "rewards/margins": 0.1360061764717102, "rewards/rejected": -0.265542209148407, "step": 476 }, { "epoch": 0.77, "learning_rate": 4.2513368983957216e-07, "logits/chosen": -1.153006672859192, "logits/rejected": -1.1645163297653198, "logps/chosen": -87.86785125732422, "logps/rejected": -92.99889373779297, "loss": 0.6725, "rewards/accuracies": 0.75, "rewards/chosen": 0.5782763361930847, "rewards/margins": 0.47999846935272217, "rewards/rejected": 0.09827785938978195, "step": 477 }, { "epoch": 0.77, "learning_rate": 4.260249554367201e-07, "logits/chosen": -1.2326632738113403, "logits/rejected": -1.2825582027435303, "logps/chosen": -85.50484466552734, "logps/rejected": -105.1346435546875, "loss": 0.6395, "rewards/accuracies": 0.75, "rewards/chosen": 0.2969025671482086, "rewards/margins": 0.4198203682899475, "rewards/rejected": -0.12291783839464188, "step": 478 }, { "epoch": 0.77, "learning_rate": 4.269162210338681e-07, "logits/chosen": -1.3497681617736816, "logits/rejected": -1.4065210819244385, "logps/chosen": -96.04811096191406, "logps/rejected": -108.46571350097656, "loss": 0.6808, "rewards/accuracies": 0.5, "rewards/chosen": -0.006238652393221855, "rewards/margins": 0.04013318940997124, "rewards/rejected": -0.04637184366583824, "step": 479 }, { "epoch": 0.77, "learning_rate": 4.27807486631016e-07, "logits/chosen": -1.2886266708374023, "logits/rejected": -1.2083921432495117, "logps/chosen": -83.16029357910156, "logps/rejected": -68.50463104248047, "loss": 0.5767, "rewards/accuracies": 0.25, "rewards/chosen": -0.06715640425682068, "rewards/margins": 0.0225057452917099, "rewards/rejected": -0.08966217190027237, "step": 480 }, { "epoch": 0.77, "learning_rate": 4.28698752228164e-07, "logits/chosen": -1.2284700870513916, "logits/rejected": -1.117881178855896, "logps/chosen": -83.8816909790039, "logps/rejected": -99.60755157470703, "loss": 0.5359, "rewards/accuracies": 0.5, "rewards/chosen": 0.27510881423950195, "rewards/margins": 0.41564759612083435, "rewards/rejected": -0.1405387818813324, "step": 481 }, { "epoch": 0.77, "learning_rate": 4.295900178253119e-07, "logits/chosen": -1.2234052419662476, "logits/rejected": -1.2356905937194824, "logps/chosen": -86.45711517333984, "logps/rejected": -90.36863708496094, "loss": 0.6247, "rewards/accuracies": 0.5, "rewards/chosen": 0.03613615036010742, "rewards/margins": -0.32101961970329285, "rewards/rejected": 0.35715579986572266, "step": 482 }, { "epoch": 0.78, "learning_rate": 4.304812834224599e-07, "logits/chosen": -1.3215898275375366, "logits/rejected": -1.2860819101333618, "logps/chosen": -79.0875244140625, "logps/rejected": -67.99415588378906, "loss": 0.5611, "rewards/accuracies": 0.75, "rewards/chosen": -0.11069746315479279, "rewards/margins": 0.16057005524635315, "rewards/rejected": -0.27126753330230713, "step": 483 }, { "epoch": 0.78, "learning_rate": 4.313725490196078e-07, "logits/chosen": -1.3516974449157715, "logits/rejected": -1.4098927974700928, "logps/chosen": -96.32000732421875, "logps/rejected": -94.46757507324219, "loss": 0.5992, "rewards/accuracies": 0.75, "rewards/chosen": 0.020639605820178986, "rewards/margins": 0.2952066659927368, "rewards/rejected": -0.27456703782081604, "step": 484 }, { "epoch": 0.78, "learning_rate": 4.3226381461675576e-07, "logits/chosen": -1.4597803354263306, "logits/rejected": -1.3645751476287842, "logps/chosen": -74.60675811767578, "logps/rejected": -71.40281677246094, "loss": 0.5958, "rewards/accuracies": 0.25, "rewards/chosen": -0.046377092599868774, "rewards/margins": -0.06542091071605682, "rewards/rejected": 0.019043825566768646, "step": 485 }, { "epoch": 0.78, "learning_rate": 4.3315508021390376e-07, "logits/chosen": -1.166602373123169, "logits/rejected": -1.2259540557861328, "logps/chosen": -97.90666198730469, "logps/rejected": -90.58246612548828, "loss": 0.6174, "rewards/accuracies": 0.5, "rewards/chosen": -0.3365577757358551, "rewards/margins": 0.32069388031959534, "rewards/rejected": -0.6572516560554504, "step": 486 }, { "epoch": 0.78, "learning_rate": 4.3404634581105165e-07, "logits/chosen": -1.1641950607299805, "logits/rejected": -1.323347568511963, "logps/chosen": -76.68212890625, "logps/rejected": -97.8846206665039, "loss": 0.6491, "rewards/accuracies": 0.25, "rewards/chosen": -0.09745101630687714, "rewards/margins": -0.05226903408765793, "rewards/rejected": -0.0451819933950901, "step": 487 }, { "epoch": 0.78, "learning_rate": 4.3493761140819965e-07, "logits/chosen": -1.320125699043274, "logits/rejected": -1.314950942993164, "logps/chosen": -67.85660552978516, "logps/rejected": -74.83441162109375, "loss": 0.6189, "rewards/accuracies": 0.75, "rewards/chosen": 0.3528144061565399, "rewards/margins": 0.3462958335876465, "rewards/rejected": 0.006518557667732239, "step": 488 }, { "epoch": 0.78, "learning_rate": 4.358288770053476e-07, "logits/chosen": -1.3386001586914062, "logits/rejected": -1.350630760192871, "logps/chosen": -73.04529571533203, "logps/rejected": -82.00672912597656, "loss": 0.5815, "rewards/accuracies": 0.75, "rewards/chosen": -0.06825485080480576, "rewards/margins": 0.14499531686306, "rewards/rejected": -0.21325016021728516, "step": 489 }, { "epoch": 0.79, "learning_rate": 4.3672014260249554e-07, "logits/chosen": -1.1427404880523682, "logits/rejected": -1.177492380142212, "logps/chosen": -99.52444458007812, "logps/rejected": -101.83626556396484, "loss": 0.6346, "rewards/accuracies": 0.25, "rewards/chosen": -0.08892592787742615, "rewards/margins": -0.11826153099536896, "rewards/rejected": 0.029335597530007362, "step": 490 }, { "epoch": 0.79, "learning_rate": 4.376114081996435e-07, "logits/chosen": -1.274001955986023, "logits/rejected": -1.245922565460205, "logps/chosen": -87.59429168701172, "logps/rejected": -91.91871643066406, "loss": 0.6426, "rewards/accuracies": 0.25, "rewards/chosen": -0.23058490455150604, "rewards/margins": -0.1302284300327301, "rewards/rejected": -0.10035648941993713, "step": 491 }, { "epoch": 0.79, "learning_rate": 4.385026737967914e-07, "logits/chosen": -1.1912789344787598, "logits/rejected": -1.1602954864501953, "logps/chosen": -88.62810516357422, "logps/rejected": -87.14308166503906, "loss": 0.5823, "rewards/accuracies": 1.0, "rewards/chosen": 0.14948883652687073, "rewards/margins": 0.4789545238018036, "rewards/rejected": -0.32946568727493286, "step": 492 }, { "epoch": 0.79, "learning_rate": 4.3939393939393937e-07, "logits/chosen": -1.3624935150146484, "logits/rejected": -1.2808935642242432, "logps/chosen": -65.746337890625, "logps/rejected": -74.42325592041016, "loss": 0.5125, "rewards/accuracies": 1.0, "rewards/chosen": 0.2584618926048279, "rewards/margins": 0.5056008696556091, "rewards/rejected": -0.24713897705078125, "step": 493 }, { "epoch": 0.79, "learning_rate": 4.4028520499108736e-07, "logits/chosen": -1.2846308946609497, "logits/rejected": -1.4656882286071777, "logps/chosen": -77.66488647460938, "logps/rejected": -58.457427978515625, "loss": 0.61, "rewards/accuracies": 0.5, "rewards/chosen": 0.055696919560432434, "rewards/margins": 0.050402313470840454, "rewards/rejected": 0.0052946098148822784, "step": 494 }, { "epoch": 0.79, "learning_rate": 4.4117647058823526e-07, "logits/chosen": -1.5594539642333984, "logits/rejected": -1.508124589920044, "logps/chosen": -87.69598388671875, "logps/rejected": -106.2069091796875, "loss": 0.5802, "rewards/accuracies": 1.0, "rewards/chosen": 0.40858763456344604, "rewards/margins": 1.0477604866027832, "rewards/rejected": -0.6391727328300476, "step": 495 }, { "epoch": 0.8, "learning_rate": 4.4206773618538325e-07, "logits/chosen": -1.2237030267715454, "logits/rejected": -1.2619823217391968, "logps/chosen": -114.19792175292969, "logps/rejected": -105.03025817871094, "loss": 0.588, "rewards/accuracies": 0.75, "rewards/chosen": 0.3114168345928192, "rewards/margins": 0.5542125701904297, "rewards/rejected": -0.24279576539993286, "step": 496 }, { "epoch": 0.8, "learning_rate": 4.4295900178253114e-07, "logits/chosen": -1.1107803583145142, "logits/rejected": -1.1437745094299316, "logps/chosen": -79.51960754394531, "logps/rejected": -92.55284881591797, "loss": 0.5983, "rewards/accuracies": 0.5, "rewards/chosen": -0.10698748379945755, "rewards/margins": -0.014848999679088593, "rewards/rejected": -0.09213848412036896, "step": 497 }, { "epoch": 0.8, "learning_rate": 4.4385026737967914e-07, "logits/chosen": -1.3689204454421997, "logits/rejected": -1.3806201219558716, "logps/chosen": -76.36368560791016, "logps/rejected": -90.34919738769531, "loss": 0.5986, "rewards/accuracies": 0.75, "rewards/chosen": 0.33824339509010315, "rewards/margins": 0.469723641872406, "rewards/rejected": -0.13148020207881927, "step": 498 }, { "epoch": 0.8, "learning_rate": 4.447415329768271e-07, "logits/chosen": -1.270858883857727, "logits/rejected": -1.2354862689971924, "logps/chosen": -82.68109130859375, "logps/rejected": -65.69955444335938, "loss": 0.7413, "rewards/accuracies": 1.0, "rewards/chosen": 0.26749736070632935, "rewards/margins": 0.40030500292778015, "rewards/rejected": -0.1328076422214508, "step": 499 }, { "epoch": 0.8, "learning_rate": 4.4563279857397503e-07, "logits/chosen": -1.2757349014282227, "logits/rejected": -1.2853527069091797, "logps/chosen": -107.81135559082031, "logps/rejected": -104.32655334472656, "loss": 0.5196, "rewards/accuracies": 0.75, "rewards/chosen": 0.06813926249742508, "rewards/margins": 0.8135612607002258, "rewards/rejected": -0.7454220056533813, "step": 500 }, { "epoch": 0.8, "learning_rate": 4.4652406417112297e-07, "logits/chosen": -1.386798620223999, "logits/rejected": -1.4083800315856934, "logps/chosen": -83.87106323242188, "logps/rejected": -76.98448944091797, "loss": 0.6505, "rewards/accuracies": 0.75, "rewards/chosen": 0.43621501326560974, "rewards/margins": 0.3472493290901184, "rewards/rejected": 0.08896571397781372, "step": 501 }, { "epoch": 0.81, "learning_rate": 4.474153297682709e-07, "logits/chosen": -1.3509119749069214, "logits/rejected": -1.3410181999206543, "logps/chosen": -60.21222686767578, "logps/rejected": -74.73516845703125, "loss": 0.612, "rewards/accuracies": 0.75, "rewards/chosen": 0.0664929449558258, "rewards/margins": 0.11449499428272247, "rewards/rejected": -0.048002053052186966, "step": 502 }, { "epoch": 0.81, "learning_rate": 4.4830659536541886e-07, "logits/chosen": -1.105777382850647, "logits/rejected": -1.1056811809539795, "logps/chosen": -98.85588836669922, "logps/rejected": -118.79904174804688, "loss": 0.7098, "rewards/accuracies": 0.75, "rewards/chosen": 0.14117011427879333, "rewards/margins": 0.1986784040927887, "rewards/rejected": -0.057508282363414764, "step": 503 }, { "epoch": 0.81, "learning_rate": 4.4919786096256686e-07, "logits/chosen": -1.1480892896652222, "logits/rejected": -1.2372040748596191, "logps/chosen": -87.48677062988281, "logps/rejected": -94.84223937988281, "loss": 0.5946, "rewards/accuracies": 0.25, "rewards/chosen": 0.014048953540623188, "rewards/margins": -0.17580872774124146, "rewards/rejected": 0.18985766172409058, "step": 504 }, { "epoch": 0.81, "learning_rate": 4.5008912655971475e-07, "logits/chosen": -1.3914356231689453, "logits/rejected": -1.3938919305801392, "logps/chosen": -82.96173095703125, "logps/rejected": -79.52250671386719, "loss": 0.6397, "rewards/accuracies": 0.75, "rewards/chosen": -0.06953773647546768, "rewards/margins": 0.2171051800251007, "rewards/rejected": -0.2866429388523102, "step": 505 }, { "epoch": 0.81, "learning_rate": 4.5098039215686274e-07, "logits/chosen": -1.2574098110198975, "logits/rejected": -1.2677593231201172, "logps/chosen": -96.27035522460938, "logps/rejected": -87.74009704589844, "loss": 0.6185, "rewards/accuracies": 0.5, "rewards/chosen": -0.12315329909324646, "rewards/margins": 0.24885998666286469, "rewards/rejected": -0.37201327085494995, "step": 506 }, { "epoch": 0.81, "learning_rate": 4.5187165775401064e-07, "logits/chosen": -1.288081407546997, "logits/rejected": -1.1755859851837158, "logps/chosen": -55.842926025390625, "logps/rejected": -82.43338012695312, "loss": 0.7029, "rewards/accuracies": 0.5, "rewards/chosen": -0.04859896004199982, "rewards/margins": 0.24106062948703766, "rewards/rejected": -0.28965961933135986, "step": 507 }, { "epoch": 0.82, "learning_rate": 4.5276292335115863e-07, "logits/chosen": -1.1611793041229248, "logits/rejected": -1.0328072309494019, "logps/chosen": -99.04428100585938, "logps/rejected": -93.73161315917969, "loss": 0.5782, "rewards/accuracies": 0.5, "rewards/chosen": 0.11443749070167542, "rewards/margins": 0.10516633093357086, "rewards/rejected": 0.00927114486694336, "step": 508 }, { "epoch": 0.82, "learning_rate": 4.536541889483066e-07, "logits/chosen": -1.3150384426116943, "logits/rejected": -1.319748878479004, "logps/chosen": -106.81163787841797, "logps/rejected": -120.40884399414062, "loss": 0.5103, "rewards/accuracies": 1.0, "rewards/chosen": 0.24300481379032135, "rewards/margins": 1.0338271856307983, "rewards/rejected": -0.7908223867416382, "step": 509 }, { "epoch": 0.82, "learning_rate": 4.545454545454545e-07, "logits/chosen": -1.28767728805542, "logits/rejected": -1.2918866872787476, "logps/chosen": -76.27578735351562, "logps/rejected": -89.114501953125, "loss": 0.6208, "rewards/accuracies": 0.25, "rewards/chosen": -0.05444173142313957, "rewards/margins": -0.051602739840745926, "rewards/rejected": -0.002838999032974243, "step": 510 }, { "epoch": 0.82, "learning_rate": 4.5543672014260246e-07, "logits/chosen": -1.1188105344772339, "logits/rejected": -1.1183234453201294, "logps/chosen": -77.97904205322266, "logps/rejected": -84.43910217285156, "loss": 0.6107, "rewards/accuracies": 0.75, "rewards/chosen": 0.08819914609193802, "rewards/margins": 0.1966385841369629, "rewards/rejected": -0.10843944549560547, "step": 511 }, { "epoch": 0.82, "learning_rate": 4.563279857397504e-07, "logits/chosen": -1.0546026229858398, "logits/rejected": -1.16685152053833, "logps/chosen": -82.48338317871094, "logps/rejected": -92.44326782226562, "loss": 0.5062, "rewards/accuracies": 0.5, "rewards/chosen": 0.06616096198558807, "rewards/margins": 0.6463236212730408, "rewards/rejected": -0.5801626443862915, "step": 512 }, { "epoch": 0.82, "learning_rate": 4.5721925133689835e-07, "logits/chosen": -1.4381133317947388, "logits/rejected": -1.4179308414459229, "logps/chosen": -76.12765502929688, "logps/rejected": -98.17503356933594, "loss": 0.6139, "rewards/accuracies": 0.5, "rewards/chosen": -0.07124081254005432, "rewards/margins": 0.17919884622097015, "rewards/rejected": -0.2504396438598633, "step": 513 }, { "epoch": 0.83, "learning_rate": 4.5811051693404635e-07, "logits/chosen": -1.035050630569458, "logits/rejected": -1.0544079542160034, "logps/chosen": -69.63946533203125, "logps/rejected": -98.0809326171875, "loss": 0.6549, "rewards/accuracies": 0.75, "rewards/chosen": 0.09673299640417099, "rewards/margins": 0.23705987632274628, "rewards/rejected": -0.14032688736915588, "step": 514 }, { "epoch": 0.83, "learning_rate": 4.5900178253119424e-07, "logits/chosen": -1.2437968254089355, "logits/rejected": -1.3745887279510498, "logps/chosen": -67.9451904296875, "logps/rejected": -118.43597412109375, "loss": 0.5989, "rewards/accuracies": 0.75, "rewards/chosen": 0.0662335455417633, "rewards/margins": 0.12917622923851013, "rewards/rejected": -0.06294269859790802, "step": 515 }, { "epoch": 0.83, "learning_rate": 4.5989304812834224e-07, "logits/chosen": -1.3039069175720215, "logits/rejected": -1.274573802947998, "logps/chosen": -93.60668182373047, "logps/rejected": -98.05294799804688, "loss": 0.5838, "rewards/accuracies": 0.75, "rewards/chosen": -0.21373215317726135, "rewards/margins": 0.0997920036315918, "rewards/rejected": -0.31352412700653076, "step": 516 }, { "epoch": 0.83, "learning_rate": 4.6078431372549013e-07, "logits/chosen": -0.9579216241836548, "logits/rejected": -0.9002583622932434, "logps/chosen": -81.09269714355469, "logps/rejected": -103.15159606933594, "loss": 0.6325, "rewards/accuracies": 0.75, "rewards/chosen": -0.26695510745048523, "rewards/margins": 0.07634346187114716, "rewards/rejected": -0.3432985544204712, "step": 517 }, { "epoch": 0.83, "learning_rate": 4.616755793226381e-07, "logits/chosen": -1.107038974761963, "logits/rejected": -1.0528725385665894, "logps/chosen": -75.65821838378906, "logps/rejected": -87.76038360595703, "loss": 0.6774, "rewards/accuracies": 0.25, "rewards/chosen": 0.1812681257724762, "rewards/margins": -0.12633362412452698, "rewards/rejected": 0.3076017498970032, "step": 518 }, { "epoch": 0.83, "learning_rate": 4.6256684491978607e-07, "logits/chosen": -1.201785922050476, "logits/rejected": -1.1985136270523071, "logps/chosen": -80.35051727294922, "logps/rejected": -81.10623168945312, "loss": 0.5669, "rewards/accuracies": 0.5, "rewards/chosen": 0.12764224410057068, "rewards/margins": 0.30668601393699646, "rewards/rejected": -0.17904376983642578, "step": 519 }, { "epoch": 0.83, "learning_rate": 4.63458110516934e-07, "logits/chosen": -1.3224291801452637, "logits/rejected": -1.3507975339889526, "logps/chosen": -71.86321258544922, "logps/rejected": -90.33245849609375, "loss": 0.4835, "rewards/accuracies": 1.0, "rewards/chosen": 0.22387278079986572, "rewards/margins": 1.1589363813400269, "rewards/rejected": -0.9350636005401611, "step": 520 }, { "epoch": 0.84, "learning_rate": 4.6434937611408196e-07, "logits/chosen": -1.4065959453582764, "logits/rejected": -1.3828284740447998, "logps/chosen": -74.67182922363281, "logps/rejected": -83.63604736328125, "loss": 0.5226, "rewards/accuracies": 0.75, "rewards/chosen": 0.20358523726463318, "rewards/margins": 0.7563045024871826, "rewards/rejected": -0.5527192950248718, "step": 521 }, { "epoch": 0.84, "learning_rate": 4.6524064171122995e-07, "logits/chosen": -1.3299577236175537, "logits/rejected": -1.279381513595581, "logps/chosen": -111.23765563964844, "logps/rejected": -88.43760681152344, "loss": 0.5822, "rewards/accuracies": 0.5, "rewards/chosen": -0.03902149200439453, "rewards/margins": 0.11511565744876862, "rewards/rejected": -0.15413713455200195, "step": 522 }, { "epoch": 0.84, "learning_rate": 4.6613190730837784e-07, "logits/chosen": -1.1045217514038086, "logits/rejected": -1.1174486875534058, "logps/chosen": -77.10578918457031, "logps/rejected": -99.78474426269531, "loss": 0.5509, "rewards/accuracies": 1.0, "rewards/chosen": 0.13691720366477966, "rewards/margins": 0.6185317039489746, "rewards/rejected": -0.48161450028419495, "step": 523 }, { "epoch": 0.84, "learning_rate": 4.6702317290552584e-07, "logits/chosen": -1.3873976469039917, "logits/rejected": -1.359724521636963, "logps/chosen": -97.50643920898438, "logps/rejected": -103.14022827148438, "loss": 0.5671, "rewards/accuracies": 1.0, "rewards/chosen": -0.14090441167354584, "rewards/margins": 0.8121362924575806, "rewards/rejected": -0.9530407190322876, "step": 524 }, { "epoch": 0.84, "learning_rate": 4.679144385026738e-07, "logits/chosen": -1.388555884361267, "logits/rejected": -1.3609460592269897, "logps/chosen": -66.62744140625, "logps/rejected": -88.90987396240234, "loss": 0.4806, "rewards/accuracies": 1.0, "rewards/chosen": 0.1254931390285492, "rewards/margins": 1.0332814455032349, "rewards/rejected": -0.9077882766723633, "step": 525 }, { "epoch": 0.84, "learning_rate": 4.6880570409982173e-07, "logits/chosen": -1.3501313924789429, "logits/rejected": -1.3655818700790405, "logps/chosen": -71.48239135742188, "logps/rejected": -98.6288070678711, "loss": 0.6287, "rewards/accuracies": 0.75, "rewards/chosen": 0.5461432933807373, "rewards/margins": 0.8678500652313232, "rewards/rejected": -0.32170677185058594, "step": 526 }, { "epoch": 0.85, "learning_rate": 4.696969696969697e-07, "logits/chosen": -1.1225335597991943, "logits/rejected": -1.2468353509902954, "logps/chosen": -80.09088134765625, "logps/rejected": -79.40737915039062, "loss": 0.7199, "rewards/accuracies": 0.75, "rewards/chosen": -0.026747509837150574, "rewards/margins": 0.008074279874563217, "rewards/rejected": -0.03482179343700409, "step": 527 }, { "epoch": 0.85, "learning_rate": 4.705882352941176e-07, "logits/chosen": -1.2061996459960938, "logits/rejected": -1.1446453332901, "logps/chosen": -61.434844970703125, "logps/rejected": -86.4167251586914, "loss": 0.5524, "rewards/accuracies": 1.0, "rewards/chosen": 0.08185644447803497, "rewards/margins": 0.4391217529773712, "rewards/rejected": -0.35726529359817505, "step": 528 }, { "epoch": 0.85, "learning_rate": 4.714795008912656e-07, "logits/chosen": -1.2546424865722656, "logits/rejected": -1.2719806432724, "logps/chosen": -103.4587173461914, "logps/rejected": -103.05526733398438, "loss": 0.5637, "rewards/accuracies": 0.75, "rewards/chosen": 0.47841280698776245, "rewards/margins": 0.5294553637504578, "rewards/rejected": -0.05104255676269531, "step": 529 }, { "epoch": 0.85, "learning_rate": 4.723707664884135e-07, "logits/chosen": -1.197046160697937, "logits/rejected": -1.2775962352752686, "logps/chosen": -81.32250213623047, "logps/rejected": -108.00031280517578, "loss": 0.5489, "rewards/accuracies": 0.75, "rewards/chosen": 0.2760895788669586, "rewards/margins": 0.4764505624771118, "rewards/rejected": -0.200360968708992, "step": 530 }, { "epoch": 0.85, "learning_rate": 4.732620320855615e-07, "logits/chosen": -1.231143593788147, "logits/rejected": -1.2688180208206177, "logps/chosen": -74.77055358886719, "logps/rejected": -90.42788696289062, "loss": 0.6115, "rewards/accuracies": 0.75, "rewards/chosen": 0.09370565414428711, "rewards/margins": 0.414714515209198, "rewards/rejected": -0.3210088908672333, "step": 531 }, { "epoch": 0.85, "learning_rate": 4.7415329768270945e-07, "logits/chosen": -1.4162225723266602, "logits/rejected": -1.404780626296997, "logps/chosen": -79.75340270996094, "logps/rejected": -110.08251953125, "loss": 0.6176, "rewards/accuracies": 0.5, "rewards/chosen": 0.5088580846786499, "rewards/margins": 0.2608366310596466, "rewards/rejected": 0.24802151322364807, "step": 532 }, { "epoch": 0.86, "learning_rate": 4.750445632798574e-07, "logits/chosen": -1.1498849391937256, "logits/rejected": -1.187833547592163, "logps/chosen": -83.81990814208984, "logps/rejected": -82.23490905761719, "loss": 0.5914, "rewards/accuracies": 0.5, "rewards/chosen": -0.22813062369823456, "rewards/margins": -0.005987636744976044, "rewards/rejected": -0.2221429944038391, "step": 533 }, { "epoch": 0.86, "learning_rate": 4.7593582887700533e-07, "logits/chosen": -1.3782057762145996, "logits/rejected": -1.3437566757202148, "logps/chosen": -74.18921661376953, "logps/rejected": -73.58160400390625, "loss": 0.5428, "rewards/accuracies": 0.75, "rewards/chosen": -0.01146039366722107, "rewards/margins": 0.22365587949752808, "rewards/rejected": -0.23511627316474915, "step": 534 }, { "epoch": 0.86, "learning_rate": 4.768270944741533e-07, "logits/chosen": -1.072864055633545, "logits/rejected": -1.0429401397705078, "logps/chosen": -101.87779235839844, "logps/rejected": -97.3675537109375, "loss": 0.5175, "rewards/accuracies": 1.0, "rewards/chosen": 0.07676525413990021, "rewards/margins": 0.931308925151825, "rewards/rejected": -0.8545436859130859, "step": 535 }, { "epoch": 0.86, "learning_rate": 4.777183600713012e-07, "logits/chosen": -1.2275487184524536, "logits/rejected": -1.269572138786316, "logps/chosen": -85.5536117553711, "logps/rejected": -92.15901184082031, "loss": 0.5412, "rewards/accuracies": 0.75, "rewards/chosen": 0.2822950482368469, "rewards/margins": 0.10773582756519318, "rewards/rejected": 0.17455920577049255, "step": 536 }, { "epoch": 0.86, "learning_rate": 4.786096256684492e-07, "logits/chosen": -1.298561692237854, "logits/rejected": -1.2600250244140625, "logps/chosen": -85.48323822021484, "logps/rejected": -78.68019104003906, "loss": 0.6211, "rewards/accuracies": 0.75, "rewards/chosen": 0.07721023261547089, "rewards/margins": 0.15014401078224182, "rewards/rejected": -0.07293376326560974, "step": 537 }, { "epoch": 0.86, "learning_rate": 4.795008912655971e-07, "logits/chosen": -1.3352574110031128, "logits/rejected": -1.2423317432403564, "logps/chosen": -88.6736068725586, "logps/rejected": -104.8981704711914, "loss": 0.5699, "rewards/accuracies": 0.75, "rewards/chosen": 0.4905332624912262, "rewards/margins": 0.6380726099014282, "rewards/rejected": -0.14753934741020203, "step": 538 }, { "epoch": 0.87, "learning_rate": 4.803921568627451e-07, "logits/chosen": -1.2144358158111572, "logits/rejected": -1.2555122375488281, "logps/chosen": -87.49639129638672, "logps/rejected": -82.2857666015625, "loss": 0.5641, "rewards/accuracies": 0.75, "rewards/chosen": 0.036625102162361145, "rewards/margins": 0.206405371427536, "rewards/rejected": -0.16978025436401367, "step": 539 }, { "epoch": 0.87, "learning_rate": 4.81283422459893e-07, "logits/chosen": -1.3311662673950195, "logits/rejected": -1.333019733428955, "logps/chosen": -88.40093994140625, "logps/rejected": -84.13224792480469, "loss": 0.5598, "rewards/accuracies": 0.75, "rewards/chosen": 0.12027893215417862, "rewards/margins": 0.511608898639679, "rewards/rejected": -0.39132997393608093, "step": 540 }, { "epoch": 0.87, "learning_rate": 4.82174688057041e-07, "logits/chosen": -1.214052677154541, "logits/rejected": -1.1910885572433472, "logps/chosen": -75.60552978515625, "logps/rejected": -84.18815612792969, "loss": 0.6806, "rewards/accuracies": 0.5, "rewards/chosen": -0.13181447982788086, "rewards/margins": 0.07297420501708984, "rewards/rejected": -0.2047886699438095, "step": 541 }, { "epoch": 0.87, "learning_rate": 4.83065953654189e-07, "logits/chosen": -1.4014720916748047, "logits/rejected": -1.3548715114593506, "logps/chosen": -87.47981262207031, "logps/rejected": -92.57257080078125, "loss": 0.5512, "rewards/accuracies": 0.75, "rewards/chosen": 0.16412773728370667, "rewards/margins": 0.4658505618572235, "rewards/rejected": -0.30172282457351685, "step": 542 }, { "epoch": 0.87, "learning_rate": 4.839572192513369e-07, "logits/chosen": -1.30021071434021, "logits/rejected": -1.3385143280029297, "logps/chosen": -73.56980895996094, "logps/rejected": -86.39370727539062, "loss": 0.588, "rewards/accuracies": 1.0, "rewards/chosen": 0.3292170763015747, "rewards/margins": 0.5708808302879333, "rewards/rejected": -0.24166375398635864, "step": 543 }, { "epoch": 0.87, "learning_rate": 4.848484848484849e-07, "logits/chosen": -1.225258231163025, "logits/rejected": -1.2333037853240967, "logps/chosen": -96.12332153320312, "logps/rejected": -90.2834243774414, "loss": 0.5256, "rewards/accuracies": 0.75, "rewards/chosen": 0.18250027298927307, "rewards/margins": 0.40183037519454956, "rewards/rejected": -0.21933013200759888, "step": 544 }, { "epoch": 0.87, "learning_rate": 4.857397504456328e-07, "logits/chosen": -1.3764548301696777, "logits/rejected": -1.3731576204299927, "logps/chosen": -98.42424774169922, "logps/rejected": -86.71283721923828, "loss": 0.6197, "rewards/accuracies": 0.25, "rewards/chosen": 0.10954676568508148, "rewards/margins": -0.16430866718292236, "rewards/rejected": 0.27385538816452026, "step": 545 }, { "epoch": 0.88, "learning_rate": 4.866310160427808e-07, "logits/chosen": -1.5454630851745605, "logits/rejected": -1.4743194580078125, "logps/chosen": -107.26051330566406, "logps/rejected": -84.01083374023438, "loss": 0.5673, "rewards/accuracies": 0.5, "rewards/chosen": 0.28673800826072693, "rewards/margins": 0.26270198822021484, "rewards/rejected": 0.024036023765802383, "step": 546 }, { "epoch": 0.88, "learning_rate": 4.875222816399287e-07, "logits/chosen": -1.2737960815429688, "logits/rejected": -1.2832921743392944, "logps/chosen": -77.96357727050781, "logps/rejected": -88.15559387207031, "loss": 0.4448, "rewards/accuracies": 1.0, "rewards/chosen": 0.33258840441703796, "rewards/margins": 1.1377196311950684, "rewards/rejected": -0.8051311373710632, "step": 547 }, { "epoch": 0.88, "learning_rate": 4.884135472370767e-07, "logits/chosen": -1.3180967569351196, "logits/rejected": -1.2780282497406006, "logps/chosen": -96.52350616455078, "logps/rejected": -94.44822692871094, "loss": 0.4853, "rewards/accuracies": 1.0, "rewards/chosen": 0.4558217227458954, "rewards/margins": 0.961876630783081, "rewards/rejected": -0.5060548782348633, "step": 548 }, { "epoch": 0.88, "learning_rate": 4.893048128342245e-07, "logits/chosen": -1.2838759422302246, "logits/rejected": -1.318559169769287, "logps/chosen": -69.40345764160156, "logps/rejected": -68.38792419433594, "loss": 0.4806, "rewards/accuracies": 1.0, "rewards/chosen": 0.30686864256858826, "rewards/margins": 0.5407623648643494, "rewards/rejected": -0.23389369249343872, "step": 549 }, { "epoch": 0.88, "learning_rate": 4.901960784313725e-07, "logits/chosen": -1.1323721408843994, "logits/rejected": -1.198715090751648, "logps/chosen": -77.53663635253906, "logps/rejected": -86.59736633300781, "loss": 0.6526, "rewards/accuracies": 1.0, "rewards/chosen": 0.04195786267518997, "rewards/margins": 0.4667506515979767, "rewards/rejected": -0.4247927665710449, "step": 550 }, { "epoch": 0.88, "learning_rate": 4.910873440285204e-07, "logits/chosen": -1.294614315032959, "logits/rejected": -1.3211073875427246, "logps/chosen": -54.81311798095703, "logps/rejected": -58.249305725097656, "loss": 0.612, "rewards/accuracies": 1.0, "rewards/chosen": 0.08941354602575302, "rewards/margins": 0.586482048034668, "rewards/rejected": -0.49706852436065674, "step": 551 }, { "epoch": 0.89, "learning_rate": 4.919786096256684e-07, "logits/chosen": -1.0424494743347168, "logits/rejected": -1.069559931755066, "logps/chosen": -66.43762969970703, "logps/rejected": -77.0403060913086, "loss": 0.524, "rewards/accuracies": 1.0, "rewards/chosen": 0.2138873189687729, "rewards/margins": 0.795117974281311, "rewards/rejected": -0.581230640411377, "step": 552 }, { "epoch": 0.89, "learning_rate": 4.928698752228163e-07, "logits/chosen": -1.2054654359817505, "logits/rejected": -1.191928744316101, "logps/chosen": -121.63521575927734, "logps/rejected": -97.16558837890625, "loss": 0.7453, "rewards/accuracies": 0.75, "rewards/chosen": 0.17204265296459198, "rewards/margins": 0.2710127830505371, "rewards/rejected": -0.09897013008594513, "step": 553 }, { "epoch": 0.89, "learning_rate": 4.937611408199643e-07, "logits/chosen": -1.2801506519317627, "logits/rejected": -1.238020658493042, "logps/chosen": -95.95796203613281, "logps/rejected": -96.977783203125, "loss": 0.5683, "rewards/accuracies": 0.75, "rewards/chosen": 0.06383419036865234, "rewards/margins": 0.5699779987335205, "rewards/rejected": -0.5061437487602234, "step": 554 }, { "epoch": 0.89, "learning_rate": 4.946524064171122e-07, "logits/chosen": -1.4404335021972656, "logits/rejected": -1.452319622039795, "logps/chosen": -89.66966247558594, "logps/rejected": -91.53480529785156, "loss": 0.5671, "rewards/accuracies": 0.25, "rewards/chosen": -0.16168269515037537, "rewards/margins": -0.2626934051513672, "rewards/rejected": 0.10101071000099182, "step": 555 }, { "epoch": 0.89, "learning_rate": 4.955436720142602e-07, "logits/chosen": -1.5597975254058838, "logits/rejected": -1.5821168422698975, "logps/chosen": -80.59342956542969, "logps/rejected": -87.59696960449219, "loss": 0.4188, "rewards/accuracies": 1.0, "rewards/chosen": 0.21060650050640106, "rewards/margins": 1.4811652898788452, "rewards/rejected": -1.2705588340759277, "step": 556 }, { "epoch": 0.89, "learning_rate": 4.964349376114082e-07, "logits/chosen": -1.3771765232086182, "logits/rejected": -1.3455736637115479, "logps/chosen": -60.690284729003906, "logps/rejected": -78.45352172851562, "loss": 0.6013, "rewards/accuracies": 0.75, "rewards/chosen": -0.20071697235107422, "rewards/margins": 0.21986961364746094, "rewards/rejected": -0.42058658599853516, "step": 557 }, { "epoch": 0.9, "learning_rate": 4.973262032085561e-07, "logits/chosen": -1.4628134965896606, "logits/rejected": -1.364511489868164, "logps/chosen": -86.15171813964844, "logps/rejected": -101.25032043457031, "loss": 0.6716, "rewards/accuracies": 1.0, "rewards/chosen": -0.17940084636211395, "rewards/margins": 0.5485048294067383, "rewards/rejected": -0.7279056906700134, "step": 558 }, { "epoch": 0.9, "learning_rate": 4.982174688057041e-07, "logits/chosen": -1.33989679813385, "logits/rejected": -1.3297724723815918, "logps/chosen": -71.61750030517578, "logps/rejected": -86.65336608886719, "loss": 0.4819, "rewards/accuracies": 0.75, "rewards/chosen": 0.39479389786720276, "rewards/margins": 0.9868059158325195, "rewards/rejected": -0.5920120477676392, "step": 559 }, { "epoch": 0.9, "learning_rate": 4.99108734402852e-07, "logits/chosen": -1.3494040966033936, "logits/rejected": -1.4182546138763428, "logps/chosen": -74.16643524169922, "logps/rejected": -90.03691101074219, "loss": 0.4271, "rewards/accuracies": 0.75, "rewards/chosen": 0.23634515702724457, "rewards/margins": 0.9433552026748657, "rewards/rejected": -0.7070100903511047, "step": 560 }, { "epoch": 0.9, "learning_rate": 5e-07, "logits/chosen": -1.285981297492981, "logits/rejected": -1.258286714553833, "logps/chosen": -85.81561279296875, "logps/rejected": -88.25396728515625, "loss": 0.4903, "rewards/accuracies": 0.75, "rewards/chosen": 0.04454001784324646, "rewards/margins": 1.0039767026901245, "rewards/rejected": -0.9594365954399109, "step": 561 }, { "epoch": 0.9, "learning_rate": 4.999009116131589e-07, "logits/chosen": -1.120640754699707, "logits/rejected": -1.178164005279541, "logps/chosen": -67.48035430908203, "logps/rejected": -63.35462188720703, "loss": 0.6207, "rewards/accuracies": 0.5, "rewards/chosen": -0.2988602817058563, "rewards/margins": 0.04768940806388855, "rewards/rejected": -0.3465496897697449, "step": 562 }, { "epoch": 0.9, "learning_rate": 4.998018232263179e-07, "logits/chosen": -1.1995348930358887, "logits/rejected": -1.1698591709136963, "logps/chosen": -90.9691162109375, "logps/rejected": -101.98523712158203, "loss": 0.5705, "rewards/accuracies": 0.5, "rewards/chosen": -0.30659523606300354, "rewards/margins": -0.2975767254829407, "rewards/rejected": -0.009018510580062866, "step": 563 }, { "epoch": 0.91, "learning_rate": 4.997027348394768e-07, "logits/chosen": -1.227642297744751, "logits/rejected": -1.3569164276123047, "logps/chosen": -88.36021423339844, "logps/rejected": -85.75800323486328, "loss": 0.5984, "rewards/accuracies": 1.0, "rewards/chosen": 0.20611286163330078, "rewards/margins": 0.4835100471973419, "rewards/rejected": -0.27739715576171875, "step": 564 }, { "epoch": 0.91, "learning_rate": 4.996036464526357e-07, "logits/chosen": -1.4158629179000854, "logits/rejected": -1.3493050336837769, "logps/chosen": -73.33955383300781, "logps/rejected": -78.94459533691406, "loss": 0.5957, "rewards/accuracies": 0.5, "rewards/chosen": -0.36537277698516846, "rewards/margins": 0.15291184186935425, "rewards/rejected": -0.5182846188545227, "step": 565 }, { "epoch": 0.91, "learning_rate": 4.995045580657946e-07, "logits/chosen": -1.0916895866394043, "logits/rejected": -1.1476216316223145, "logps/chosen": -86.02511596679688, "logps/rejected": -115.76933288574219, "loss": 0.6424, "rewards/accuracies": 0.75, "rewards/chosen": 0.12086418271064758, "rewards/margins": 0.6599949598312378, "rewards/rejected": -0.5391308069229126, "step": 566 }, { "epoch": 0.91, "learning_rate": 4.994054696789536e-07, "logits/chosen": -1.2955822944641113, "logits/rejected": -1.3777801990509033, "logps/chosen": -80.52973175048828, "logps/rejected": -117.38594818115234, "loss": 0.5107, "rewards/accuracies": 0.5, "rewards/chosen": -0.3260672688484192, "rewards/margins": -0.2619982957839966, "rewards/rejected": -0.0640689805150032, "step": 567 }, { "epoch": 0.91, "learning_rate": 4.993063812921125e-07, "logits/chosen": -1.1383839845657349, "logits/rejected": -1.1670085191726685, "logps/chosen": -91.63558959960938, "logps/rejected": -112.77704620361328, "loss": 0.4847, "rewards/accuracies": 1.0, "rewards/chosen": 0.3510518968105316, "rewards/margins": 0.9902811646461487, "rewards/rejected": -0.6392291784286499, "step": 568 }, { "epoch": 0.91, "learning_rate": 4.992072929052714e-07, "logits/chosen": -1.2374907732009888, "logits/rejected": -1.2569301128387451, "logps/chosen": -72.45689392089844, "logps/rejected": -75.0589599609375, "loss": 0.5812, "rewards/accuracies": 0.5, "rewards/chosen": -0.5854387283325195, "rewards/margins": -0.5143527984619141, "rewards/rejected": -0.07108592987060547, "step": 569 }, { "epoch": 0.91, "learning_rate": 4.991082045184305e-07, "logits/chosen": -1.3399752378463745, "logits/rejected": -1.2377240657806396, "logps/chosen": -64.93412780761719, "logps/rejected": -101.14253234863281, "loss": 0.5135, "rewards/accuracies": 1.0, "rewards/chosen": 0.18759813904762268, "rewards/margins": 1.3550052642822266, "rewards/rejected": -1.1674072742462158, "step": 570 }, { "epoch": 0.92, "learning_rate": 4.990091161315894e-07, "logits/chosen": -1.2700810432434082, "logits/rejected": -1.336404800415039, "logps/chosen": -94.3863754272461, "logps/rejected": -107.42289733886719, "loss": 0.6091, "rewards/accuracies": 1.0, "rewards/chosen": 0.6374274492263794, "rewards/margins": 1.5782051086425781, "rewards/rejected": -0.940777599811554, "step": 571 }, { "epoch": 0.92, "learning_rate": 4.989100277447483e-07, "logits/chosen": -1.3706709146499634, "logits/rejected": -1.3005098104476929, "logps/chosen": -87.9084701538086, "logps/rejected": -95.61963653564453, "loss": 0.4605, "rewards/accuracies": 0.75, "rewards/chosen": 0.2238912731409073, "rewards/margins": 0.42033764719963074, "rewards/rejected": -0.19644641876220703, "step": 572 }, { "epoch": 0.92, "learning_rate": 4.988109393579073e-07, "logits/chosen": -0.9631893038749695, "logits/rejected": -1.0513523817062378, "logps/chosen": -92.02458190917969, "logps/rejected": -70.5799560546875, "loss": 0.5872, "rewards/accuracies": 0.75, "rewards/chosen": -0.14784622192382812, "rewards/margins": 0.40031006932258606, "rewards/rejected": -0.5481562614440918, "step": 573 }, { "epoch": 0.92, "learning_rate": 4.987118509710662e-07, "logits/chosen": -1.2372655868530273, "logits/rejected": -1.3549309968948364, "logps/chosen": -76.55875396728516, "logps/rejected": -95.23396301269531, "loss": 0.6052, "rewards/accuracies": 1.0, "rewards/chosen": 0.03763227164745331, "rewards/margins": 0.33491814136505127, "rewards/rejected": -0.29728585481643677, "step": 574 }, { "epoch": 0.92, "learning_rate": 4.986127625842251e-07, "logits/chosen": -1.4140807390213013, "logits/rejected": -1.3425477743148804, "logps/chosen": -82.0469970703125, "logps/rejected": -94.49651336669922, "loss": 0.5915, "rewards/accuracies": 0.5, "rewards/chosen": -0.055813416838645935, "rewards/margins": 0.1146366074681282, "rewards/rejected": -0.17045000195503235, "step": 575 }, { "epoch": 0.92, "learning_rate": 4.98513674197384e-07, "logits/chosen": -1.3198412656784058, "logits/rejected": -1.2619305849075317, "logps/chosen": -88.65512084960938, "logps/rejected": -97.06603240966797, "loss": 0.5274, "rewards/accuracies": 0.75, "rewards/chosen": -0.37089288234710693, "rewards/margins": 0.4976494014263153, "rewards/rejected": -0.8685423135757446, "step": 576 }, { "epoch": 0.93, "learning_rate": 4.98414585810543e-07, "logits/chosen": -1.2501246929168701, "logits/rejected": -1.2402082681655884, "logps/chosen": -76.59994506835938, "logps/rejected": -73.78067016601562, "loss": 0.5572, "rewards/accuracies": 0.75, "rewards/chosen": 0.16381186246871948, "rewards/margins": 0.4064028859138489, "rewards/rejected": -0.242590993642807, "step": 577 }, { "epoch": 0.93, "learning_rate": 4.983154974237019e-07, "logits/chosen": -1.2636576890945435, "logits/rejected": -1.3445930480957031, "logps/chosen": -83.80538177490234, "logps/rejected": -128.08084106445312, "loss": 0.5559, "rewards/accuracies": 0.75, "rewards/chosen": 0.4779362678527832, "rewards/margins": 1.111436367034912, "rewards/rejected": -0.6335000991821289, "step": 578 }, { "epoch": 0.93, "learning_rate": 4.982164090368608e-07, "logits/chosen": -1.0301737785339355, "logits/rejected": -1.0386723279953003, "logps/chosen": -78.90385437011719, "logps/rejected": -89.29969787597656, "loss": 0.4674, "rewards/accuracies": 0.75, "rewards/chosen": 0.23647700250148773, "rewards/margins": 0.30727100372314453, "rewards/rejected": -0.070794016122818, "step": 579 }, { "epoch": 0.93, "learning_rate": 4.981173206500197e-07, "logits/chosen": -1.1910960674285889, "logits/rejected": -1.2563642263412476, "logps/chosen": -76.75785827636719, "logps/rejected": -101.27616882324219, "loss": 0.5398, "rewards/accuracies": 0.75, "rewards/chosen": 0.16353550553321838, "rewards/margins": 0.8526790738105774, "rewards/rejected": -0.6891435384750366, "step": 580 }, { "epoch": 0.93, "learning_rate": 4.980182322631787e-07, "logits/chosen": -1.2392325401306152, "logits/rejected": -1.295891284942627, "logps/chosen": -85.30451965332031, "logps/rejected": -77.27715301513672, "loss": 0.544, "rewards/accuracies": 0.75, "rewards/chosen": -0.3910151422023773, "rewards/margins": 0.15746518969535828, "rewards/rejected": -0.5484803318977356, "step": 581 }, { "epoch": 0.93, "learning_rate": 4.979191438763377e-07, "logits/chosen": -1.152367353439331, "logits/rejected": -1.1962895393371582, "logps/chosen": -72.00267791748047, "logps/rejected": -93.83041381835938, "loss": 0.498, "rewards/accuracies": 0.75, "rewards/chosen": 0.2042720913887024, "rewards/margins": 0.20998209714889526, "rewards/rejected": -0.005710035562515259, "step": 582 }, { "epoch": 0.94, "learning_rate": 4.978200554894967e-07, "logits/chosen": -1.3009443283081055, "logits/rejected": -1.2929425239562988, "logps/chosen": -86.97715759277344, "logps/rejected": -91.3478012084961, "loss": 0.5083, "rewards/accuracies": 0.75, "rewards/chosen": 0.18478050827980042, "rewards/margins": 0.48371171951293945, "rewards/rejected": -0.29893121123313904, "step": 583 }, { "epoch": 0.94, "learning_rate": 4.977209671026556e-07, "logits/chosen": -1.2644720077514648, "logits/rejected": -1.2851758003234863, "logps/chosen": -71.13385772705078, "logps/rejected": -71.88487243652344, "loss": 0.5852, "rewards/accuracies": 0.75, "rewards/chosen": 0.2822767198085785, "rewards/margins": 0.4247641861438751, "rewards/rejected": -0.14248742163181305, "step": 584 }, { "epoch": 0.94, "learning_rate": 4.976218787158145e-07, "logits/chosen": -1.2103443145751953, "logits/rejected": -1.1881924867630005, "logps/chosen": -64.32135009765625, "logps/rejected": -80.29042053222656, "loss": 0.5376, "rewards/accuracies": 1.0, "rewards/chosen": 0.3851920962333679, "rewards/margins": 0.863133430480957, "rewards/rejected": -0.4779413342475891, "step": 585 }, { "epoch": 0.94, "learning_rate": 4.975227903289734e-07, "logits/chosen": -1.1768839359283447, "logits/rejected": -1.179174542427063, "logps/chosen": -59.19326400756836, "logps/rejected": -56.490089416503906, "loss": 0.4396, "rewards/accuracies": 1.0, "rewards/chosen": 0.3354972004890442, "rewards/margins": 0.5555751323699951, "rewards/rejected": -0.22007791697978973, "step": 586 }, { "epoch": 0.94, "learning_rate": 4.974237019421323e-07, "logits/chosen": -1.3507404327392578, "logits/rejected": -1.393140196800232, "logps/chosen": -76.04619598388672, "logps/rejected": -76.75607299804688, "loss": 0.5188, "rewards/accuracies": 0.75, "rewards/chosen": 0.17205534875392914, "rewards/margins": 0.7690553665161133, "rewards/rejected": -0.5970000624656677, "step": 587 }, { "epoch": 0.94, "learning_rate": 4.973246135552913e-07, "logits/chosen": -1.1391477584838867, "logits/rejected": -1.222862720489502, "logps/chosen": -100.2994155883789, "logps/rejected": -111.29771423339844, "loss": 0.5536, "rewards/accuracies": 0.75, "rewards/chosen": -0.1966579556465149, "rewards/margins": 0.5500324368476868, "rewards/rejected": -0.7466903924942017, "step": 588 }, { "epoch": 0.95, "learning_rate": 4.972255251684502e-07, "logits/chosen": -1.1883690357208252, "logits/rejected": -1.1895211935043335, "logps/chosen": -75.74125671386719, "logps/rejected": -92.78091430664062, "loss": 0.5756, "rewards/accuracies": 0.5, "rewards/chosen": -0.057890601456165314, "rewards/margins": 0.19253160059452057, "rewards/rejected": -0.2504222095012665, "step": 589 }, { "epoch": 0.95, "learning_rate": 4.971264367816091e-07, "logits/chosen": -1.33736252784729, "logits/rejected": -1.3468315601348877, "logps/chosen": -56.19721984863281, "logps/rejected": -89.37432861328125, "loss": 0.5893, "rewards/accuracies": 0.75, "rewards/chosen": 0.15139514207839966, "rewards/margins": 0.7856346368789673, "rewards/rejected": -0.6342394948005676, "step": 590 }, { "epoch": 0.95, "learning_rate": 4.970273483947681e-07, "logits/chosen": -1.25485098361969, "logits/rejected": -1.2543526887893677, "logps/chosen": -84.75244140625, "logps/rejected": -89.346923828125, "loss": 0.5758, "rewards/accuracies": 0.75, "rewards/chosen": 0.17230664193630219, "rewards/margins": 0.5664899945259094, "rewards/rejected": -0.39418336749076843, "step": 591 }, { "epoch": 0.95, "learning_rate": 4.96928260007927e-07, "logits/chosen": -1.245758295059204, "logits/rejected": -1.2876516580581665, "logps/chosen": -95.44541931152344, "logps/rejected": -98.16511535644531, "loss": 0.5025, "rewards/accuracies": 0.75, "rewards/chosen": 0.1914152204990387, "rewards/margins": 1.2667051553726196, "rewards/rejected": -1.0752899646759033, "step": 592 }, { "epoch": 0.95, "learning_rate": 4.968291716210861e-07, "logits/chosen": -1.4653078317642212, "logits/rejected": -1.4944977760314941, "logps/chosen": -77.44109344482422, "logps/rejected": -86.93052673339844, "loss": 0.6532, "rewards/accuracies": 0.75, "rewards/chosen": 0.2208063155412674, "rewards/margins": 0.4184355139732361, "rewards/rejected": -0.1976291686296463, "step": 593 }, { "epoch": 0.95, "learning_rate": 4.96730083234245e-07, "logits/chosen": -1.2143566608428955, "logits/rejected": -1.117998719215393, "logps/chosen": -117.43280792236328, "logps/rejected": -126.66216278076172, "loss": 0.5213, "rewards/accuracies": 0.75, "rewards/chosen": -0.17674694955348969, "rewards/margins": 0.4255586564540863, "rewards/rejected": -0.6023055911064148, "step": 594 }, { "epoch": 0.96, "learning_rate": 4.966309948474039e-07, "logits/chosen": -1.1985645294189453, "logits/rejected": -1.18355131149292, "logps/chosen": -88.03733825683594, "logps/rejected": -98.13749694824219, "loss": 0.614, "rewards/accuracies": 0.25, "rewards/chosen": -0.042481809854507446, "rewards/margins": 0.20142421126365662, "rewards/rejected": -0.24390602111816406, "step": 595 }, { "epoch": 0.96, "learning_rate": 4.965319064605628e-07, "logits/chosen": -1.3978559970855713, "logits/rejected": -1.4010615348815918, "logps/chosen": -105.0785903930664, "logps/rejected": -115.11529541015625, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": 0.16706030070781708, "rewards/margins": 0.5290287733078003, "rewards/rejected": -0.361968457698822, "step": 596 }, { "epoch": 0.96, "learning_rate": 4.964328180737217e-07, "logits/chosen": -1.3503137826919556, "logits/rejected": -1.3680930137634277, "logps/chosen": -105.66533660888672, "logps/rejected": -105.16546630859375, "loss": 0.4967, "rewards/accuracies": 0.75, "rewards/chosen": 0.04705962538719177, "rewards/margins": 0.7302875518798828, "rewards/rejected": -0.6832278966903687, "step": 597 }, { "epoch": 0.96, "learning_rate": 4.963337296868807e-07, "logits/chosen": -1.0336048603057861, "logits/rejected": -1.0683064460754395, "logps/chosen": -96.23306274414062, "logps/rejected": -112.42273712158203, "loss": 0.4875, "rewards/accuracies": 0.5, "rewards/chosen": -0.15882760286331177, "rewards/margins": 0.8142921924591064, "rewards/rejected": -0.9731197357177734, "step": 598 }, { "epoch": 0.96, "learning_rate": 4.962346413000396e-07, "logits/chosen": -1.227872371673584, "logits/rejected": -1.2946739196777344, "logps/chosen": -73.75743865966797, "logps/rejected": -83.07308959960938, "loss": 0.4614, "rewards/accuracies": 0.5, "rewards/chosen": 0.02288997173309326, "rewards/margins": 1.1017094850540161, "rewards/rejected": -1.0788193941116333, "step": 599 }, { "epoch": 0.96, "learning_rate": 4.961355529131985e-07, "logits/chosen": -1.2668145895004272, "logits/rejected": -1.214590072631836, "logps/chosen": -66.786865234375, "logps/rejected": -93.34317779541016, "loss": 0.472, "rewards/accuracies": 1.0, "rewards/chosen": -0.13436584174633026, "rewards/margins": 0.6218290328979492, "rewards/rejected": -0.7561948895454407, "step": 600 }, { "epoch": 0.96, "learning_rate": 4.960364645263575e-07, "logits/chosen": -1.1189699172973633, "logits/rejected": -1.1388099193572998, "logps/chosen": -78.88019561767578, "logps/rejected": -94.41413879394531, "loss": 0.598, "rewards/accuracies": 0.75, "rewards/chosen": 0.1018582284450531, "rewards/margins": 0.27284228801727295, "rewards/rejected": -0.17098407447338104, "step": 601 }, { "epoch": 0.97, "learning_rate": 4.959373761395164e-07, "logits/chosen": -1.1796256303787231, "logits/rejected": -1.3338419198989868, "logps/chosen": -115.3305435180664, "logps/rejected": -113.66812133789062, "loss": 0.5184, "rewards/accuracies": 0.75, "rewards/chosen": 0.11392012238502502, "rewards/margins": 0.6475144624710083, "rewards/rejected": -0.5335943102836609, "step": 602 }, { "epoch": 0.97, "learning_rate": 4.958382877526754e-07, "logits/chosen": -1.1492931842803955, "logits/rejected": -1.1668894290924072, "logps/chosen": -111.79397583007812, "logps/rejected": -99.64217376708984, "loss": 0.4899, "rewards/accuracies": 0.75, "rewards/chosen": -0.05117340385913849, "rewards/margins": 1.0088286399841309, "rewards/rejected": -1.0600019693374634, "step": 603 }, { "epoch": 0.97, "learning_rate": 4.957391993658343e-07, "logits/chosen": -1.369917392730713, "logits/rejected": -1.3833253383636475, "logps/chosen": -91.1497573852539, "logps/rejected": -89.06022644042969, "loss": 0.5099, "rewards/accuracies": 1.0, "rewards/chosen": -0.005936337634921074, "rewards/margins": 0.7539305686950684, "rewards/rejected": -0.7598669528961182, "step": 604 }, { "epoch": 0.97, "learning_rate": 4.956401109789933e-07, "logits/chosen": -1.218024492263794, "logits/rejected": -1.1901224851608276, "logps/chosen": -70.98052215576172, "logps/rejected": -83.63078308105469, "loss": 0.5684, "rewards/accuracies": 1.0, "rewards/chosen": 0.20148998498916626, "rewards/margins": 0.8203837871551514, "rewards/rejected": -0.6188938617706299, "step": 605 }, { "epoch": 0.97, "learning_rate": 4.955410225921522e-07, "logits/chosen": -1.0874919891357422, "logits/rejected": -1.0735647678375244, "logps/chosen": -96.29199981689453, "logps/rejected": -109.10118103027344, "loss": 0.5126, "rewards/accuracies": 0.75, "rewards/chosen": 0.012957759201526642, "rewards/margins": 0.8108606338500977, "rewards/rejected": -0.7979028820991516, "step": 606 }, { "epoch": 0.97, "learning_rate": 4.954419342053111e-07, "logits/chosen": -1.308809757232666, "logits/rejected": -1.3217300176620483, "logps/chosen": -84.76177978515625, "logps/rejected": -103.04386901855469, "loss": 0.6098, "rewards/accuracies": 0.75, "rewards/chosen": -0.03036022186279297, "rewards/margins": 0.5690226554870605, "rewards/rejected": -0.5993828177452087, "step": 607 }, { "epoch": 0.98, "learning_rate": 4.953428458184701e-07, "logits/chosen": -1.3524197340011597, "logits/rejected": -1.2285836935043335, "logps/chosen": -65.55500793457031, "logps/rejected": -73.93505859375, "loss": 0.5174, "rewards/accuracies": 0.5, "rewards/chosen": -0.008192157372832298, "rewards/margins": 0.0657425969839096, "rewards/rejected": -0.07393474876880646, "step": 608 }, { "epoch": 0.98, "learning_rate": 4.95243757431629e-07, "logits/chosen": -1.394136667251587, "logits/rejected": -1.4213712215423584, "logps/chosen": -92.72711181640625, "logps/rejected": -100.61636352539062, "loss": 0.6154, "rewards/accuracies": 0.5, "rewards/chosen": 0.04054677486419678, "rewards/margins": 0.5561317801475525, "rewards/rejected": -0.5155849456787109, "step": 609 }, { "epoch": 0.98, "learning_rate": 4.951446690447879e-07, "logits/chosen": -1.3426814079284668, "logits/rejected": -1.3937761783599854, "logps/chosen": -110.43621826171875, "logps/rejected": -112.39772033691406, "loss": 0.4923, "rewards/accuracies": 0.5, "rewards/chosen": 0.22036132216453552, "rewards/margins": 1.0566819906234741, "rewards/rejected": -0.836320698261261, "step": 610 }, { "epoch": 0.98, "learning_rate": 4.950455806579469e-07, "logits/chosen": -1.336385726928711, "logits/rejected": -1.3817124366760254, "logps/chosen": -55.61134338378906, "logps/rejected": -67.29580688476562, "loss": 0.5324, "rewards/accuracies": 0.75, "rewards/chosen": -0.4319985508918762, "rewards/margins": 0.3919748067855835, "rewards/rejected": -0.8239734172821045, "step": 611 }, { "epoch": 0.98, "learning_rate": 4.949464922711058e-07, "logits/chosen": -1.2799365520477295, "logits/rejected": -1.323362112045288, "logps/chosen": -96.78672790527344, "logps/rejected": -101.25515747070312, "loss": 0.4555, "rewards/accuracies": 0.5, "rewards/chosen": 0.012155160307884216, "rewards/margins": 0.9761806130409241, "rewards/rejected": -0.9640255570411682, "step": 612 }, { "epoch": 0.98, "learning_rate": 4.948474038842647e-07, "logits/chosen": -1.1801824569702148, "logits/rejected": -1.1360538005828857, "logps/chosen": -86.85538482666016, "logps/rejected": -112.64627838134766, "loss": 0.5193, "rewards/accuracies": 0.75, "rewards/chosen": -0.23429737985134125, "rewards/margins": 0.8688868284225464, "rewards/rejected": -1.1031842231750488, "step": 613 }, { "epoch": 0.99, "learning_rate": 4.947483154974237e-07, "logits/chosen": -1.2512667179107666, "logits/rejected": -1.3186497688293457, "logps/chosen": -87.35343933105469, "logps/rejected": -68.60981750488281, "loss": 0.5044, "rewards/accuracies": 0.5, "rewards/chosen": 0.11721611022949219, "rewards/margins": 0.06015154719352722, "rewards/rejected": 0.05706453323364258, "step": 614 }, { "epoch": 0.99, "learning_rate": 4.946492271105826e-07, "logits/chosen": -1.293550729751587, "logits/rejected": -1.2342557907104492, "logps/chosen": -98.51182556152344, "logps/rejected": -101.82608795166016, "loss": 0.4262, "rewards/accuracies": 1.0, "rewards/chosen": 0.327775776386261, "rewards/margins": 1.0352253913879395, "rewards/rejected": -0.7074496746063232, "step": 615 }, { "epoch": 0.99, "learning_rate": 4.945501387237415e-07, "logits/chosen": -1.2498924732208252, "logits/rejected": -1.210220456123352, "logps/chosen": -84.36942291259766, "logps/rejected": -113.22923278808594, "loss": 0.3929, "rewards/accuracies": 1.0, "rewards/chosen": 0.21220609545707703, "rewards/margins": 1.5308270454406738, "rewards/rejected": -1.318621039390564, "step": 616 }, { "epoch": 0.99, "learning_rate": 4.944510503369005e-07, "logits/chosen": -1.2861942052841187, "logits/rejected": -1.324744462966919, "logps/chosen": -92.80854797363281, "logps/rejected": -88.98739624023438, "loss": 0.5026, "rewards/accuracies": 0.75, "rewards/chosen": -0.303274929523468, "rewards/margins": 0.8929769992828369, "rewards/rejected": -1.1962519884109497, "step": 617 }, { "epoch": 0.99, "learning_rate": 4.943519619500594e-07, "logits/chosen": -1.2218245267868042, "logits/rejected": -1.1458569765090942, "logps/chosen": -82.04283142089844, "logps/rejected": -98.89848327636719, "loss": 0.5439, "rewards/accuracies": 0.75, "rewards/chosen": -0.23666420578956604, "rewards/margins": 1.128169059753418, "rewards/rejected": -1.3648332357406616, "step": 618 }, { "epoch": 0.99, "learning_rate": 4.942528735632184e-07, "logits/chosen": -1.181472659111023, "logits/rejected": -1.2209782600402832, "logps/chosen": -84.9579086303711, "logps/rejected": -97.18292999267578, "loss": 0.559, "rewards/accuracies": 0.75, "rewards/chosen": -0.5308420658111572, "rewards/margins": 0.18872787058353424, "rewards/rejected": -0.7195700407028198, "step": 619 }, { "epoch": 1.0, "learning_rate": 4.941537851763773e-07, "logits/chosen": -1.118780493736267, "logits/rejected": -1.217832088470459, "logps/chosen": -84.7042236328125, "logps/rejected": -59.18722152709961, "loss": 0.5081, "rewards/accuracies": 0.5, "rewards/chosen": -0.3175990879535675, "rewards/margins": 0.09654952585697174, "rewards/rejected": -0.41414862871170044, "step": 620 }, { "epoch": 1.0, "learning_rate": 4.940546967895363e-07, "logits/chosen": -1.3697404861450195, "logits/rejected": -1.3122416734695435, "logps/chosen": -80.81403350830078, "logps/rejected": -74.36764526367188, "loss": 0.4562, "rewards/accuracies": 0.75, "rewards/chosen": 0.14627933502197266, "rewards/margins": 1.3645817041397095, "rewards/rejected": -1.2183023691177368, "step": 621 }, { "epoch": 1.0, "learning_rate": 4.939556084026952e-07, "logits/chosen": -1.4454662799835205, "logits/rejected": -1.5150693655014038, "logps/chosen": -113.60740661621094, "logps/rejected": -124.25518798828125, "loss": 0.548, "rewards/accuracies": 0.75, "rewards/chosen": 0.06348437815904617, "rewards/margins": 0.7040247321128845, "rewards/rejected": -0.6405403017997742, "step": 622 }, { "epoch": 1.0, "learning_rate": 4.938565200158541e-07, "logits/chosen": -1.3987926244735718, "logits/rejected": -1.3848011493682861, "logps/chosen": -83.26103210449219, "logps/rejected": -113.33751678466797, "loss": 0.4615, "rewards/accuracies": 1.0, "rewards/chosen": 0.3207220137119293, "rewards/margins": 0.8599616885185242, "rewards/rejected": -0.5392396450042725, "step": 623 }, { "epoch": 1.0, "learning_rate": 4.93757431629013e-07, "logits/chosen": -1.1824086904525757, "logits/rejected": -1.2046773433685303, "logps/chosen": -108.81339263916016, "logps/rejected": -105.2822265625, "loss": 0.4835, "rewards/accuracies": 0.75, "rewards/chosen": 0.25489044189453125, "rewards/margins": 1.1353492736816406, "rewards/rejected": -0.8804588317871094, "step": 624 }, { "epoch": 1.0, "learning_rate": 4.93658343242172e-07, "logits/chosen": -1.4204611778259277, "logits/rejected": -1.465986967086792, "logps/chosen": -67.87345123291016, "logps/rejected": -94.70164489746094, "loss": 0.4054, "rewards/accuracies": 1.0, "rewards/chosen": 0.3485986590385437, "rewards/margins": 1.519789695739746, "rewards/rejected": -1.1711910963058472, "step": 625 }, { "epoch": 1.0, "learning_rate": 4.935592548553309e-07, "logits/chosen": -1.3747793436050415, "logits/rejected": -1.3640648126602173, "logps/chosen": -127.38607788085938, "logps/rejected": -107.25726318359375, "loss": 0.4574, "rewards/accuracies": 0.5, "rewards/chosen": 0.024099163711071014, "rewards/margins": 1.0369360446929932, "rewards/rejected": -1.0128369331359863, "step": 626 }, { "epoch": 1.01, "learning_rate": 4.934601664684898e-07, "logits/chosen": -1.4290785789489746, "logits/rejected": -1.4127848148345947, "logps/chosen": -85.94258880615234, "logps/rejected": -93.57180786132812, "loss": 0.4062, "rewards/accuracies": 0.75, "rewards/chosen": -0.08154525607824326, "rewards/margins": 0.78749680519104, "rewards/rejected": -0.8690420389175415, "step": 627 }, { "epoch": 1.01, "learning_rate": 4.933610780816487e-07, "logits/chosen": -1.1691243648529053, "logits/rejected": -1.1604645252227783, "logps/chosen": -83.20337677001953, "logps/rejected": -83.00849151611328, "loss": 0.3516, "rewards/accuracies": 1.0, "rewards/chosen": 0.2836381793022156, "rewards/margins": 0.9041110873222351, "rewards/rejected": -0.6204729080200195, "step": 628 }, { "epoch": 1.01, "learning_rate": 4.932619896948078e-07, "logits/chosen": -1.3732610940933228, "logits/rejected": -1.4009371995925903, "logps/chosen": -82.5809555053711, "logps/rejected": -114.51441192626953, "loss": 0.3926, "rewards/accuracies": 0.75, "rewards/chosen": 0.18428926169872284, "rewards/margins": 0.7399064302444458, "rewards/rejected": -0.5556171536445618, "step": 629 }, { "epoch": 1.01, "learning_rate": 4.931629013079667e-07, "logits/chosen": -1.0174378156661987, "logits/rejected": -0.9966703057289124, "logps/chosen": -77.53195190429688, "logps/rejected": -89.54998016357422, "loss": 0.5007, "rewards/accuracies": 0.75, "rewards/chosen": -0.07600125670433044, "rewards/margins": 0.30136364698410034, "rewards/rejected": -0.3773649334907532, "step": 630 }, { "epoch": 1.01, "learning_rate": 4.930638129211257e-07, "logits/chosen": -1.2752962112426758, "logits/rejected": -1.3282191753387451, "logps/chosen": -74.81553649902344, "logps/rejected": -70.57725524902344, "loss": 0.3933, "rewards/accuracies": 0.75, "rewards/chosen": -0.012401953339576721, "rewards/margins": 0.6531705856323242, "rewards/rejected": -0.6655725836753845, "step": 631 }, { "epoch": 1.01, "learning_rate": 4.929647245342846e-07, "logits/chosen": -1.1984384059906006, "logits/rejected": -1.2730480432510376, "logps/chosen": -67.83452606201172, "logps/rejected": -85.81764221191406, "loss": 0.4048, "rewards/accuracies": 1.0, "rewards/chosen": 0.1890590488910675, "rewards/margins": 1.2360830307006836, "rewards/rejected": -1.0470240116119385, "step": 632 }, { "epoch": 1.02, "learning_rate": 4.928656361474435e-07, "logits/chosen": -1.2535192966461182, "logits/rejected": -1.2146508693695068, "logps/chosen": -76.38894653320312, "logps/rejected": -83.81744384765625, "loss": 0.5053, "rewards/accuracies": 1.0, "rewards/chosen": -0.008367255330085754, "rewards/margins": 1.4612643718719482, "rewards/rejected": -1.469631552696228, "step": 633 }, { "epoch": 1.02, "learning_rate": 4.927665477606024e-07, "logits/chosen": -1.3400647640228271, "logits/rejected": -1.249745488166809, "logps/chosen": -83.00265502929688, "logps/rejected": -89.74468231201172, "loss": 0.3128, "rewards/accuracies": 1.0, "rewards/chosen": 0.0014201141893863678, "rewards/margins": 1.8063328266143799, "rewards/rejected": -1.8049126863479614, "step": 634 }, { "epoch": 1.02, "learning_rate": 4.926674593737614e-07, "logits/chosen": -1.2703955173492432, "logits/rejected": -1.246978998184204, "logps/chosen": -72.07546997070312, "logps/rejected": -92.34222412109375, "loss": 0.4488, "rewards/accuracies": 1.0, "rewards/chosen": 0.39483940601348877, "rewards/margins": 1.74713134765625, "rewards/rejected": -1.3522919416427612, "step": 635 }, { "epoch": 1.02, "learning_rate": 4.925683709869203e-07, "logits/chosen": -1.2539812326431274, "logits/rejected": -1.2792292833328247, "logps/chosen": -92.83717346191406, "logps/rejected": -88.76817321777344, "loss": 0.5116, "rewards/accuracies": 0.75, "rewards/chosen": -0.2486865222454071, "rewards/margins": 0.9229283332824707, "rewards/rejected": -1.1716147661209106, "step": 636 }, { "epoch": 1.02, "learning_rate": 4.924692826000792e-07, "logits/chosen": -1.188881278038025, "logits/rejected": -1.2930753231048584, "logps/chosen": -93.86711120605469, "logps/rejected": -114.76409912109375, "loss": 0.3959, "rewards/accuracies": 0.75, "rewards/chosen": -0.5074115991592407, "rewards/margins": 0.9505544900894165, "rewards/rejected": -1.4579660892486572, "step": 637 }, { "epoch": 1.02, "learning_rate": 4.923701942132381e-07, "logits/chosen": -1.2970342636108398, "logits/rejected": -1.3914799690246582, "logps/chosen": -71.81866455078125, "logps/rejected": -85.15555572509766, "loss": 0.3697, "rewards/accuracies": 1.0, "rewards/chosen": -0.06648483872413635, "rewards/margins": 1.2776933908462524, "rewards/rejected": -1.3441780805587769, "step": 638 }, { "epoch": 1.03, "learning_rate": 4.922711058263971e-07, "logits/chosen": -1.4044644832611084, "logits/rejected": -1.4443693161010742, "logps/chosen": -85.48546600341797, "logps/rejected": -113.81663513183594, "loss": 0.4003, "rewards/accuracies": 1.0, "rewards/chosen": -0.14920464158058167, "rewards/margins": 1.5218045711517334, "rewards/rejected": -1.6710090637207031, "step": 639 }, { "epoch": 1.03, "learning_rate": 4.92172017439556e-07, "logits/chosen": -1.400816559791565, "logits/rejected": -1.4438532590866089, "logps/chosen": -80.0377197265625, "logps/rejected": -99.41096496582031, "loss": 0.4066, "rewards/accuracies": 1.0, "rewards/chosen": 0.10934562236070633, "rewards/margins": 1.4306946992874146, "rewards/rejected": -1.3213491439819336, "step": 640 }, { "epoch": 1.03, "learning_rate": 4.920729290527151e-07, "logits/chosen": -1.279848337173462, "logits/rejected": -1.2278246879577637, "logps/chosen": -99.23127746582031, "logps/rejected": -108.05778503417969, "loss": 0.3572, "rewards/accuracies": 1.0, "rewards/chosen": -0.013964645564556122, "rewards/margins": 0.42387890815734863, "rewards/rejected": -0.4378435015678406, "step": 641 }, { "epoch": 1.03, "learning_rate": 4.91973840665874e-07, "logits/chosen": -1.0680007934570312, "logits/rejected": -1.0609631538391113, "logps/chosen": -75.86782836914062, "logps/rejected": -100.1400375366211, "loss": 0.4444, "rewards/accuracies": 0.75, "rewards/chosen": -0.4477252960205078, "rewards/margins": 0.3204069137573242, "rewards/rejected": -0.768132209777832, "step": 642 }, { "epoch": 1.03, "learning_rate": 4.918747522790329e-07, "logits/chosen": -1.1777209043502808, "logits/rejected": -1.2381162643432617, "logps/chosen": -60.29715347290039, "logps/rejected": -86.87454986572266, "loss": 0.3988, "rewards/accuracies": 1.0, "rewards/chosen": 0.20973452925682068, "rewards/margins": 1.9457286596298218, "rewards/rejected": -1.7359941005706787, "step": 643 }, { "epoch": 1.03, "learning_rate": 4.917756638921918e-07, "logits/chosen": -1.3921170234680176, "logits/rejected": -1.38982355594635, "logps/chosen": -107.87478637695312, "logps/rejected": -119.84930419921875, "loss": 0.4759, "rewards/accuracies": 0.5, "rewards/chosen": -0.18342742323875427, "rewards/margins": 1.3088815212249756, "rewards/rejected": -1.4923089742660522, "step": 644 }, { "epoch": 1.04, "learning_rate": 4.916765755053507e-07, "logits/chosen": -1.4214887619018555, "logits/rejected": -1.3831188678741455, "logps/chosen": -87.8571548461914, "logps/rejected": -115.83609008789062, "loss": 0.3721, "rewards/accuracies": 0.75, "rewards/chosen": -0.04127369821071625, "rewards/margins": 1.2303071022033691, "rewards/rejected": -1.271580696105957, "step": 645 }, { "epoch": 1.04, "learning_rate": 4.915774871185097e-07, "logits/chosen": -1.3584530353546143, "logits/rejected": -1.3286323547363281, "logps/chosen": -102.18888854980469, "logps/rejected": -116.63023376464844, "loss": 0.3835, "rewards/accuracies": 0.5, "rewards/chosen": -0.02864512801170349, "rewards/margins": 1.191394567489624, "rewards/rejected": -1.22003972530365, "step": 646 }, { "epoch": 1.04, "learning_rate": 4.914783987316686e-07, "logits/chosen": -1.2144412994384766, "logits/rejected": -1.2142349481582642, "logps/chosen": -70.0474853515625, "logps/rejected": -70.82455444335938, "loss": 0.5009, "rewards/accuracies": 0.75, "rewards/chosen": -0.4862457811832428, "rewards/margins": 0.018204249441623688, "rewards/rejected": -0.5044500827789307, "step": 647 }, { "epoch": 1.04, "learning_rate": 4.913793103448275e-07, "logits/chosen": -1.4385855197906494, "logits/rejected": -1.4576252698898315, "logps/chosen": -71.42688751220703, "logps/rejected": -107.62899780273438, "loss": 0.5393, "rewards/accuracies": 1.0, "rewards/chosen": 0.271390438079834, "rewards/margins": 2.428725481033325, "rewards/rejected": -2.157335042953491, "step": 648 }, { "epoch": 1.04, "learning_rate": 4.912802219579865e-07, "logits/chosen": -1.5344856977462769, "logits/rejected": -1.4828660488128662, "logps/chosen": -99.17671966552734, "logps/rejected": -109.19749450683594, "loss": 0.3673, "rewards/accuracies": 1.0, "rewards/chosen": -0.36937353014945984, "rewards/margins": 1.0891389846801758, "rewards/rejected": -1.4585124254226685, "step": 649 }, { "epoch": 1.04, "learning_rate": 4.911811335711454e-07, "logits/chosen": -1.361491084098816, "logits/rejected": -1.3482656478881836, "logps/chosen": -73.83683013916016, "logps/rejected": -90.90443420410156, "loss": 0.3095, "rewards/accuracies": 1.0, "rewards/chosen": 0.16256314516067505, "rewards/margins": 2.009800434112549, "rewards/rejected": -1.8472371101379395, "step": 650 }, { "epoch": 1.04, "learning_rate": 4.910820451843043e-07, "logits/chosen": -1.4432443380355835, "logits/rejected": -1.5034996271133423, "logps/chosen": -85.41169738769531, "logps/rejected": -99.66044616699219, "loss": 0.4121, "rewards/accuracies": 1.0, "rewards/chosen": 0.3063993453979492, "rewards/margins": 1.8867002725601196, "rewards/rejected": -1.5803008079528809, "step": 651 }, { "epoch": 1.05, "learning_rate": 4.909829567974634e-07, "logits/chosen": -1.2041844129562378, "logits/rejected": -1.2665297985076904, "logps/chosen": -76.08789825439453, "logps/rejected": -79.43038940429688, "loss": 0.4398, "rewards/accuracies": 0.75, "rewards/chosen": -0.3352125883102417, "rewards/margins": 0.8976166844367981, "rewards/rejected": -1.2328293323516846, "step": 652 }, { "epoch": 1.05, "learning_rate": 4.908838684106223e-07, "logits/chosen": -1.3578230142593384, "logits/rejected": -1.391003131866455, "logps/chosen": -91.78827667236328, "logps/rejected": -84.38101959228516, "loss": 0.4628, "rewards/accuracies": 0.75, "rewards/chosen": -0.6770832538604736, "rewards/margins": 0.312582403421402, "rewards/rejected": -0.9896656274795532, "step": 653 }, { "epoch": 1.05, "learning_rate": 4.907847800237812e-07, "logits/chosen": -1.421005368232727, "logits/rejected": -1.4379866123199463, "logps/chosen": -82.587890625, "logps/rejected": -108.2442626953125, "loss": 0.3911, "rewards/accuracies": 0.75, "rewards/chosen": -0.9373767375946045, "rewards/margins": 1.229305386543274, "rewards/rejected": -2.166682004928589, "step": 654 }, { "epoch": 1.05, "learning_rate": 4.906856916369401e-07, "logits/chosen": -1.4695162773132324, "logits/rejected": -1.4607484340667725, "logps/chosen": -109.91849517822266, "logps/rejected": -106.9648666381836, "loss": 0.4322, "rewards/accuracies": 0.75, "rewards/chosen": -0.7354822158813477, "rewards/margins": 1.1511354446411133, "rewards/rejected": -1.886617660522461, "step": 655 }, { "epoch": 1.05, "learning_rate": 4.905866032500991e-07, "logits/chosen": -1.3564175367355347, "logits/rejected": -1.3141993284225464, "logps/chosen": -82.68637084960938, "logps/rejected": -95.49859619140625, "loss": 0.3537, "rewards/accuracies": 1.0, "rewards/chosen": -0.15954247117042542, "rewards/margins": 1.99716055393219, "rewards/rejected": -2.156702995300293, "step": 656 }, { "epoch": 1.05, "learning_rate": 4.90487514863258e-07, "logits/chosen": -1.421112298965454, "logits/rejected": -1.484668254852295, "logps/chosen": -73.33814239501953, "logps/rejected": -107.28533172607422, "loss": 0.3578, "rewards/accuracies": 1.0, "rewards/chosen": 0.32613545656204224, "rewards/margins": 1.340680718421936, "rewards/rejected": -1.014545202255249, "step": 657 }, { "epoch": 1.06, "learning_rate": 4.903884264764169e-07, "logits/chosen": -1.3598849773406982, "logits/rejected": -1.2592506408691406, "logps/chosen": -81.93537139892578, "logps/rejected": -85.50978088378906, "loss": 0.3371, "rewards/accuracies": 1.0, "rewards/chosen": 0.09509792923927307, "rewards/margins": 1.9154412746429443, "rewards/rejected": -1.8203434944152832, "step": 658 }, { "epoch": 1.06, "learning_rate": 4.902893380895759e-07, "logits/chosen": -1.4744057655334473, "logits/rejected": -1.4016293287277222, "logps/chosen": -88.7152328491211, "logps/rejected": -93.18165588378906, "loss": 0.4269, "rewards/accuracies": 0.75, "rewards/chosen": -0.3904764652252197, "rewards/margins": 0.2449941784143448, "rewards/rejected": -0.6354706287384033, "step": 659 }, { "epoch": 1.06, "learning_rate": 4.901902497027348e-07, "logits/chosen": -1.4940810203552246, "logits/rejected": -1.533150553703308, "logps/chosen": -92.45540618896484, "logps/rejected": -104.93388366699219, "loss": 0.456, "rewards/accuracies": 0.75, "rewards/chosen": -0.16409960389137268, "rewards/margins": 1.9249449968338013, "rewards/rejected": -2.0890445709228516, "step": 660 }, { "epoch": 1.06, "learning_rate": 4.900911613158937e-07, "logits/chosen": -1.452685832977295, "logits/rejected": -1.387607455253601, "logps/chosen": -79.71768188476562, "logps/rejected": -110.98922729492188, "loss": 0.2936, "rewards/accuracies": 1.0, "rewards/chosen": 0.08903427422046661, "rewards/margins": 2.114292621612549, "rewards/rejected": -2.0252583026885986, "step": 661 }, { "epoch": 1.06, "learning_rate": 4.899920729290527e-07, "logits/chosen": -1.307018518447876, "logits/rejected": -1.2686035633087158, "logps/chosen": -80.6418685913086, "logps/rejected": -113.55203247070312, "loss": 0.4011, "rewards/accuracies": 0.75, "rewards/chosen": -0.06917839497327805, "rewards/margins": 1.9063173532485962, "rewards/rejected": -1.9754958152770996, "step": 662 }, { "epoch": 1.06, "learning_rate": 4.898929845422117e-07, "logits/chosen": -1.3047620058059692, "logits/rejected": -1.375331163406372, "logps/chosen": -93.12171173095703, "logps/rejected": -87.462158203125, "loss": 0.4294, "rewards/accuracies": 1.0, "rewards/chosen": 0.5945455431938171, "rewards/margins": 1.3139636516571045, "rewards/rejected": -0.7194180488586426, "step": 663 }, { "epoch": 1.07, "learning_rate": 4.897938961553706e-07, "logits/chosen": -1.4076361656188965, "logits/rejected": -1.4384138584136963, "logps/chosen": -68.136474609375, "logps/rejected": -91.2838134765625, "loss": 0.3755, "rewards/accuracies": 1.0, "rewards/chosen": 0.08144272863864899, "rewards/margins": 0.995979905128479, "rewards/rejected": -0.9145371913909912, "step": 664 }, { "epoch": 1.07, "learning_rate": 4.896948077685295e-07, "logits/chosen": -1.3898680210113525, "logits/rejected": -1.3664714097976685, "logps/chosen": -75.07269287109375, "logps/rejected": -97.89827728271484, "loss": 0.4304, "rewards/accuracies": 1.0, "rewards/chosen": 0.367051899433136, "rewards/margins": 0.6964758038520813, "rewards/rejected": -0.3294239044189453, "step": 665 }, { "epoch": 1.07, "learning_rate": 4.895957193816884e-07, "logits/chosen": -1.252596378326416, "logits/rejected": -1.2643063068389893, "logps/chosen": -86.65847778320312, "logps/rejected": -91.89356994628906, "loss": 0.3753, "rewards/accuracies": 1.0, "rewards/chosen": 0.4718170166015625, "rewards/margins": 1.935595989227295, "rewards/rejected": -1.4637789726257324, "step": 666 }, { "epoch": 1.07, "learning_rate": 4.894966309948474e-07, "logits/chosen": -1.3138413429260254, "logits/rejected": -1.3359873294830322, "logps/chosen": -63.763797760009766, "logps/rejected": -105.08572387695312, "loss": 0.4126, "rewards/accuracies": 0.75, "rewards/chosen": 0.3451036214828491, "rewards/margins": 1.1234796047210693, "rewards/rejected": -0.7783759832382202, "step": 667 }, { "epoch": 1.07, "learning_rate": 4.893975426080063e-07, "logits/chosen": -1.3729325532913208, "logits/rejected": -1.370099425315857, "logps/chosen": -76.80646514892578, "logps/rejected": -88.10457611083984, "loss": 0.4534, "rewards/accuracies": 0.75, "rewards/chosen": 0.47632887959480286, "rewards/margins": 1.4175050258636475, "rewards/rejected": -0.941176176071167, "step": 668 }, { "epoch": 1.07, "learning_rate": 4.892984542211653e-07, "logits/chosen": -1.577545166015625, "logits/rejected": -1.4969347715377808, "logps/chosen": -97.56498718261719, "logps/rejected": -98.31114959716797, "loss": 0.3843, "rewards/accuracies": 1.0, "rewards/chosen": 0.1538480669260025, "rewards/margins": 1.37328040599823, "rewards/rejected": -1.2194322347640991, "step": 669 }, { "epoch": 1.08, "learning_rate": 4.891993658343242e-07, "logits/chosen": -1.3017845153808594, "logits/rejected": -1.3382878303527832, "logps/chosen": -119.81527709960938, "logps/rejected": -99.70899963378906, "loss": 0.3721, "rewards/accuracies": 0.75, "rewards/chosen": -0.1655813306570053, "rewards/margins": 0.6841763854026794, "rewards/rejected": -0.8497576713562012, "step": 670 }, { "epoch": 1.08, "learning_rate": 4.891002774474831e-07, "logits/chosen": -1.4028246402740479, "logits/rejected": -1.3671778440475464, "logps/chosen": -87.83415222167969, "logps/rejected": -116.3924560546875, "loss": 0.4801, "rewards/accuracies": 1.0, "rewards/chosen": -0.7481651902198792, "rewards/margins": 2.4451420307159424, "rewards/rejected": -3.193307399749756, "step": 671 }, { "epoch": 1.08, "learning_rate": 4.890011890606421e-07, "logits/chosen": -1.4041634798049927, "logits/rejected": -1.3903625011444092, "logps/chosen": -85.5989990234375, "logps/rejected": -103.05985260009766, "loss": 0.3319, "rewards/accuracies": 1.0, "rewards/chosen": -0.04308643937110901, "rewards/margins": 1.465963363647461, "rewards/rejected": -1.5090497732162476, "step": 672 }, { "epoch": 1.08, "learning_rate": 4.88902100673801e-07, "logits/chosen": -1.2173091173171997, "logits/rejected": -1.2083362340927124, "logps/chosen": -74.22984313964844, "logps/rejected": -99.57096862792969, "loss": 0.2718, "rewards/accuracies": 1.0, "rewards/chosen": -0.19407843053340912, "rewards/margins": 1.3219690322875977, "rewards/rejected": -1.516047477722168, "step": 673 }, { "epoch": 1.08, "learning_rate": 4.888030122869599e-07, "logits/chosen": -1.3940848112106323, "logits/rejected": -1.3909956216812134, "logps/chosen": -87.79124450683594, "logps/rejected": -93.30967712402344, "loss": 0.4066, "rewards/accuracies": 0.75, "rewards/chosen": 0.3775016963481903, "rewards/margins": 2.078174114227295, "rewards/rejected": -1.7006725072860718, "step": 674 }, { "epoch": 1.08, "learning_rate": 4.887039239001188e-07, "logits/chosen": -1.1757735013961792, "logits/rejected": -1.1456665992736816, "logps/chosen": -64.52625274658203, "logps/rejected": -74.41081237792969, "loss": 0.3759, "rewards/accuracies": 0.75, "rewards/chosen": 0.26755914092063904, "rewards/margins": 0.6684736609458923, "rewards/rejected": -0.4009144902229309, "step": 675 }, { "epoch": 1.09, "learning_rate": 4.886048355132778e-07, "logits/chosen": -1.4524002075195312, "logits/rejected": -1.3943781852722168, "logps/chosen": -106.05867004394531, "logps/rejected": -111.13282775878906, "loss": 0.5199, "rewards/accuracies": 0.5, "rewards/chosen": -0.4834062457084656, "rewards/margins": 0.5546987652778625, "rewards/rejected": -1.0381050109863281, "step": 676 }, { "epoch": 1.09, "learning_rate": 4.885057471264368e-07, "logits/chosen": -1.3313775062561035, "logits/rejected": -1.295607089996338, "logps/chosen": -95.60672760009766, "logps/rejected": -90.5439453125, "loss": 0.4146, "rewards/accuracies": 0.75, "rewards/chosen": -0.24198472499847412, "rewards/margins": 0.6441235542297363, "rewards/rejected": -0.8861082792282104, "step": 677 }, { "epoch": 1.09, "learning_rate": 4.884066587395957e-07, "logits/chosen": -1.4745087623596191, "logits/rejected": -1.3941504955291748, "logps/chosen": -110.55616760253906, "logps/rejected": -112.32786560058594, "loss": 0.4338, "rewards/accuracies": 0.75, "rewards/chosen": 0.1369665265083313, "rewards/margins": 1.4547393321990967, "rewards/rejected": -1.3177727460861206, "step": 678 }, { "epoch": 1.09, "learning_rate": 4.883075703527547e-07, "logits/chosen": -1.2815626859664917, "logits/rejected": -1.2682147026062012, "logps/chosen": -94.577392578125, "logps/rejected": -87.65025329589844, "loss": 0.5877, "rewards/accuracies": 1.0, "rewards/chosen": 0.19889946281909943, "rewards/margins": 1.7685933113098145, "rewards/rejected": -1.569693922996521, "step": 679 }, { "epoch": 1.09, "learning_rate": 4.882084819659136e-07, "logits/chosen": -1.2902193069458008, "logits/rejected": -1.3339675664901733, "logps/chosen": -51.09904479980469, "logps/rejected": -75.3345947265625, "loss": 0.3769, "rewards/accuracies": 1.0, "rewards/chosen": 0.4521670341491699, "rewards/margins": 1.5573551654815674, "rewards/rejected": -1.1051881313323975, "step": 680 }, { "epoch": 1.09, "learning_rate": 4.881093935790725e-07, "logits/chosen": -1.2462666034698486, "logits/rejected": -1.2348511219024658, "logps/chosen": -94.88175201416016, "logps/rejected": -96.019287109375, "loss": 0.4197, "rewards/accuracies": 0.5, "rewards/chosen": -0.2923596203327179, "rewards/margins": 1.2718596458435059, "rewards/rejected": -1.5642192363739014, "step": 681 }, { "epoch": 1.09, "learning_rate": 4.880103051922315e-07, "logits/chosen": -1.392362356185913, "logits/rejected": -1.3957639932632446, "logps/chosen": -86.64036560058594, "logps/rejected": -114.23690795898438, "loss": 0.2885, "rewards/accuracies": 1.0, "rewards/chosen": 0.8077174425125122, "rewards/margins": 3.3931140899658203, "rewards/rejected": -2.5853967666625977, "step": 682 }, { "epoch": 1.1, "learning_rate": 4.879112168053904e-07, "logits/chosen": -1.217178463935852, "logits/rejected": -1.2532049417495728, "logps/chosen": -84.75413513183594, "logps/rejected": -88.60211181640625, "loss": 0.4016, "rewards/accuracies": 1.0, "rewards/chosen": 0.2749830484390259, "rewards/margins": 0.44355031847953796, "rewards/rejected": -0.16856727004051208, "step": 683 }, { "epoch": 1.1, "learning_rate": 4.878121284185493e-07, "logits/chosen": -1.4306700229644775, "logits/rejected": -1.4092631340026855, "logps/chosen": -91.0965576171875, "logps/rejected": -120.12495422363281, "loss": 0.3036, "rewards/accuracies": 1.0, "rewards/chosen": 0.8886934518814087, "rewards/margins": 2.7132325172424316, "rewards/rejected": -1.8245391845703125, "step": 684 }, { "epoch": 1.1, "learning_rate": 4.877130400317082e-07, "logits/chosen": -1.256531000137329, "logits/rejected": -1.2331968545913696, "logps/chosen": -98.4859390258789, "logps/rejected": -111.16522979736328, "loss": 0.3377, "rewards/accuracies": 0.75, "rewards/chosen": 0.0002240985631942749, "rewards/margins": 0.6413943767547607, "rewards/rejected": -0.64117032289505, "step": 685 }, { "epoch": 1.1, "learning_rate": 4.876139516448671e-07, "logits/chosen": -1.4218237400054932, "logits/rejected": -1.4343600273132324, "logps/chosen": -74.59205627441406, "logps/rejected": -100.2962646484375, "loss": 0.4305, "rewards/accuracies": 1.0, "rewards/chosen": 0.3101362884044647, "rewards/margins": 2.4496498107910156, "rewards/rejected": -2.1395134925842285, "step": 686 }, { "epoch": 1.1, "learning_rate": 4.875148632580262e-07, "logits/chosen": -1.176518201828003, "logits/rejected": -1.215410590171814, "logps/chosen": -98.8648681640625, "logps/rejected": -92.96239471435547, "loss": 0.5288, "rewards/accuracies": 0.25, "rewards/chosen": -0.8273013830184937, "rewards/margins": -0.33038386702537537, "rewards/rejected": -0.49691757559776306, "step": 687 }, { "epoch": 1.1, "learning_rate": 4.874157748711851e-07, "logits/chosen": -1.3909225463867188, "logits/rejected": -1.4012142419815063, "logps/chosen": -71.40349578857422, "logps/rejected": -101.88626098632812, "loss": 0.5532, "rewards/accuracies": 1.0, "rewards/chosen": 0.2450089305639267, "rewards/margins": 2.2021284103393555, "rewards/rejected": -1.9571192264556885, "step": 688 }, { "epoch": 1.11, "learning_rate": 4.87316686484344e-07, "logits/chosen": -1.3780250549316406, "logits/rejected": -1.4211817979812622, "logps/chosen": -75.49307250976562, "logps/rejected": -124.76383972167969, "loss": 0.4276, "rewards/accuracies": 1.0, "rewards/chosen": 1.0431815385818481, "rewards/margins": 3.264399528503418, "rewards/rejected": -2.2212181091308594, "step": 689 }, { "epoch": 1.11, "learning_rate": 4.87217598097503e-07, "logits/chosen": -1.3870809078216553, "logits/rejected": -1.3473308086395264, "logps/chosen": -81.65586853027344, "logps/rejected": -64.48933410644531, "loss": 0.3918, "rewards/accuracies": 1.0, "rewards/chosen": 0.19423332810401917, "rewards/margins": 0.7103424072265625, "rewards/rejected": -0.5161091089248657, "step": 690 }, { "epoch": 1.11, "learning_rate": 4.871185097106619e-07, "logits/chosen": -1.4294345378875732, "logits/rejected": -1.4277026653289795, "logps/chosen": -83.83518981933594, "logps/rejected": -105.68745422363281, "loss": 0.364, "rewards/accuracies": 1.0, "rewards/chosen": 0.7375525236129761, "rewards/margins": 2.8126306533813477, "rewards/rejected": -2.075078248977661, "step": 691 }, { "epoch": 1.11, "learning_rate": 4.870194213238208e-07, "logits/chosen": -1.3175357580184937, "logits/rejected": -1.3599967956542969, "logps/chosen": -75.39169311523438, "logps/rejected": -116.61439514160156, "loss": 0.2932, "rewards/accuracies": 1.0, "rewards/chosen": 0.8548386693000793, "rewards/margins": 2.965169906616211, "rewards/rejected": -2.1103315353393555, "step": 692 }, { "epoch": 1.11, "learning_rate": 4.869203329369798e-07, "logits/chosen": -1.2648781538009644, "logits/rejected": -1.2773479223251343, "logps/chosen": -93.60340118408203, "logps/rejected": -115.82379913330078, "loss": 0.3383, "rewards/accuracies": 0.75, "rewards/chosen": -0.385244756937027, "rewards/margins": 1.4317095279693604, "rewards/rejected": -1.816954255104065, "step": 693 }, { "epoch": 1.11, "learning_rate": 4.868212445501387e-07, "logits/chosen": -1.431262731552124, "logits/rejected": -1.3091509342193604, "logps/chosen": -76.77781677246094, "logps/rejected": -95.88572692871094, "loss": 0.5651, "rewards/accuracies": 0.75, "rewards/chosen": -0.17363770306110382, "rewards/margins": 0.6252973079681396, "rewards/rejected": -0.7989349961280823, "step": 694 }, { "epoch": 1.12, "learning_rate": 4.867221561632976e-07, "logits/chosen": -1.400439739227295, "logits/rejected": -1.2975910902023315, "logps/chosen": -108.42198181152344, "logps/rejected": -76.13269805908203, "loss": 0.4849, "rewards/accuracies": 0.75, "rewards/chosen": 0.06289122998714447, "rewards/margins": 0.8940479755401611, "rewards/rejected": -0.8311567306518555, "step": 695 }, { "epoch": 1.12, "learning_rate": 4.866230677764565e-07, "logits/chosen": -1.283737301826477, "logits/rejected": -1.26528000831604, "logps/chosen": -84.72105407714844, "logps/rejected": -93.80245971679688, "loss": 0.5532, "rewards/accuracies": 0.75, "rewards/chosen": -0.4540802240371704, "rewards/margins": 0.8983899354934692, "rewards/rejected": -1.3524702787399292, "step": 696 }, { "epoch": 1.12, "learning_rate": 4.865239793896155e-07, "logits/chosen": -1.3320884704589844, "logits/rejected": -1.3314589262008667, "logps/chosen": -95.37186431884766, "logps/rejected": -122.7088851928711, "loss": 0.4369, "rewards/accuracies": 0.75, "rewards/chosen": -0.2376115918159485, "rewards/margins": 3.0371224880218506, "rewards/rejected": -3.2747340202331543, "step": 697 }, { "epoch": 1.12, "learning_rate": 4.864248910027744e-07, "logits/chosen": -1.435495376586914, "logits/rejected": -1.4480663537979126, "logps/chosen": -82.41349792480469, "logps/rejected": -107.73414611816406, "loss": 0.4134, "rewards/accuracies": 1.0, "rewards/chosen": 0.08212928473949432, "rewards/margins": 1.9673526287078857, "rewards/rejected": -1.8852232694625854, "step": 698 }, { "epoch": 1.12, "learning_rate": 4.863258026159334e-07, "logits/chosen": -1.1918554306030273, "logits/rejected": -1.1888558864593506, "logps/chosen": -104.25653076171875, "logps/rejected": -89.11030578613281, "loss": 0.5198, "rewards/accuracies": 0.25, "rewards/chosen": -0.39893800020217896, "rewards/margins": -0.38930776715278625, "rewards/rejected": -0.009630199521780014, "step": 699 }, { "epoch": 1.12, "learning_rate": 4.862267142290924e-07, "logits/chosen": -1.2214057445526123, "logits/rejected": -1.1729865074157715, "logps/chosen": -90.03568267822266, "logps/rejected": -96.5575180053711, "loss": 0.3764, "rewards/accuracies": 1.0, "rewards/chosen": -0.17320403456687927, "rewards/margins": 1.5838011503219604, "rewards/rejected": -1.757005214691162, "step": 700 }, { "epoch": 1.13, "learning_rate": 4.861276258422513e-07, "logits/chosen": -1.2873765230178833, "logits/rejected": -1.4059195518493652, "logps/chosen": -50.244667053222656, "logps/rejected": -96.05281829833984, "loss": 0.3495, "rewards/accuracies": 1.0, "rewards/chosen": 0.19809409976005554, "rewards/margins": 2.324190855026245, "rewards/rejected": -2.1260969638824463, "step": 701 }, { "epoch": 1.13, "learning_rate": 4.860285374554102e-07, "logits/chosen": -1.367733120918274, "logits/rejected": -1.3186416625976562, "logps/chosen": -86.34362030029297, "logps/rejected": -78.70965576171875, "loss": 0.3492, "rewards/accuracies": 0.5, "rewards/chosen": -0.4689810872077942, "rewards/margins": 0.11503283679485321, "rewards/rejected": -0.5840139389038086, "step": 702 }, { "epoch": 1.13, "learning_rate": 4.859294490685692e-07, "logits/chosen": -1.2739040851593018, "logits/rejected": -1.280039668083191, "logps/chosen": -93.02059936523438, "logps/rejected": -112.9046630859375, "loss": 0.3919, "rewards/accuracies": 0.75, "rewards/chosen": -0.9253799319267273, "rewards/margins": 0.37055206298828125, "rewards/rejected": -1.2959319353103638, "step": 703 }, { "epoch": 1.13, "learning_rate": 4.858303606817281e-07, "logits/chosen": -1.3368730545043945, "logits/rejected": -1.403881549835205, "logps/chosen": -85.17813110351562, "logps/rejected": -85.52774047851562, "loss": 0.521, "rewards/accuracies": 0.75, "rewards/chosen": -0.4710594415664673, "rewards/margins": 0.19951912760734558, "rewards/rejected": -0.6705785989761353, "step": 704 }, { "epoch": 1.13, "learning_rate": 4.85731272294887e-07, "logits/chosen": -1.4163870811462402, "logits/rejected": -1.5107501745224, "logps/chosen": -69.57830810546875, "logps/rejected": -98.72921752929688, "loss": 0.3689, "rewards/accuracies": 1.0, "rewards/chosen": 0.27763184905052185, "rewards/margins": 1.5279160737991333, "rewards/rejected": -1.250284194946289, "step": 705 }, { "epoch": 1.13, "learning_rate": 4.856321839080459e-07, "logits/chosen": -1.386713981628418, "logits/rejected": -1.3277173042297363, "logps/chosen": -78.09333038330078, "logps/rejected": -109.55380249023438, "loss": 0.2667, "rewards/accuracies": 1.0, "rewards/chosen": 0.12456148862838745, "rewards/margins": 3.0154080390930176, "rewards/rejected": -2.8908462524414062, "step": 706 }, { "epoch": 1.13, "learning_rate": 4.855330955212049e-07, "logits/chosen": -1.5303404331207275, "logits/rejected": -1.4563004970550537, "logps/chosen": -80.1597900390625, "logps/rejected": -73.34475708007812, "loss": 0.3769, "rewards/accuracies": 0.75, "rewards/chosen": 0.30568239092826843, "rewards/margins": 1.245052456855774, "rewards/rejected": -0.9393701553344727, "step": 707 }, { "epoch": 1.14, "learning_rate": 4.854340071343638e-07, "logits/chosen": -1.3940812349319458, "logits/rejected": -1.4239314794540405, "logps/chosen": -79.12136840820312, "logps/rejected": -128.3715057373047, "loss": 0.3562, "rewards/accuracies": 1.0, "rewards/chosen": -0.12827616930007935, "rewards/margins": 3.0281131267547607, "rewards/rejected": -3.156388998031616, "step": 708 }, { "epoch": 1.14, "learning_rate": 4.853349187475227e-07, "logits/chosen": -1.3046540021896362, "logits/rejected": -1.2640495300292969, "logps/chosen": -82.0637435913086, "logps/rejected": -120.984619140625, "loss": 0.4317, "rewards/accuracies": 1.0, "rewards/chosen": -0.2928811013698578, "rewards/margins": 1.5113428831100464, "rewards/rejected": -1.8042240142822266, "step": 709 }, { "epoch": 1.14, "learning_rate": 4.852358303606818e-07, "logits/chosen": -1.2987786531448364, "logits/rejected": -1.2881207466125488, "logps/chosen": -73.8106460571289, "logps/rejected": -87.80175018310547, "loss": 0.3021, "rewards/accuracies": 1.0, "rewards/chosen": 0.16426116228103638, "rewards/margins": 1.7342331409454346, "rewards/rejected": -1.569972038269043, "step": 710 }, { "epoch": 1.14, "learning_rate": 4.851367419738407e-07, "logits/chosen": -1.2424931526184082, "logits/rejected": -1.2251152992248535, "logps/chosen": -70.90630340576172, "logps/rejected": -89.6545639038086, "loss": 0.3857, "rewards/accuracies": 0.75, "rewards/chosen": 0.30844318866729736, "rewards/margins": 0.7844092845916748, "rewards/rejected": -0.47596609592437744, "step": 711 }, { "epoch": 1.14, "learning_rate": 4.850376535869996e-07, "logits/chosen": -1.456213355064392, "logits/rejected": -1.458816647529602, "logps/chosen": -89.06340026855469, "logps/rejected": -99.77567291259766, "loss": 0.4287, "rewards/accuracies": 0.5, "rewards/chosen": 0.005985448136925697, "rewards/margins": 0.8525654077529907, "rewards/rejected": -0.8465799689292908, "step": 712 }, { "epoch": 1.14, "learning_rate": 4.849385652001585e-07, "logits/chosen": -1.5520946979522705, "logits/rejected": -1.5076453685760498, "logps/chosen": -78.24342346191406, "logps/rejected": -76.43972778320312, "loss": 0.3702, "rewards/accuracies": 1.0, "rewards/chosen": 0.6748777627944946, "rewards/margins": 0.9726927280426025, "rewards/rejected": -0.2978150248527527, "step": 713 }, { "epoch": 1.15, "learning_rate": 4.848394768133175e-07, "logits/chosen": -1.3569817543029785, "logits/rejected": -1.497166395187378, "logps/chosen": -64.95269775390625, "logps/rejected": -121.00613403320312, "loss": 0.3733, "rewards/accuracies": 1.0, "rewards/chosen": 0.25779610872268677, "rewards/margins": 2.6353683471679688, "rewards/rejected": -2.3775722980499268, "step": 714 }, { "epoch": 1.15, "learning_rate": 4.847403884264764e-07, "logits/chosen": -1.472232699394226, "logits/rejected": -1.4263136386871338, "logps/chosen": -80.36674499511719, "logps/rejected": -94.16943359375, "loss": 0.2439, "rewards/accuracies": 0.75, "rewards/chosen": 0.027488499879837036, "rewards/margins": 2.399116039276123, "rewards/rejected": -2.3716275691986084, "step": 715 }, { "epoch": 1.15, "learning_rate": 4.846413000396353e-07, "logits/chosen": -1.2371050119400024, "logits/rejected": -1.3675442934036255, "logps/chosen": -90.00377655029297, "logps/rejected": -112.8480224609375, "loss": 0.3803, "rewards/accuracies": 0.75, "rewards/chosen": 0.35150033235549927, "rewards/margins": 1.3933682441711426, "rewards/rejected": -1.041867971420288, "step": 716 }, { "epoch": 1.15, "learning_rate": 4.845422116527943e-07, "logits/chosen": -1.357757806777954, "logits/rejected": -1.3562750816345215, "logps/chosen": -86.84135437011719, "logps/rejected": -90.60419464111328, "loss": 0.3426, "rewards/accuracies": 1.0, "rewards/chosen": 0.2319336235523224, "rewards/margins": 2.5690128803253174, "rewards/rejected": -2.3370792865753174, "step": 717 }, { "epoch": 1.15, "learning_rate": 4.844431232659532e-07, "logits/chosen": -1.2183332443237305, "logits/rejected": -1.3116271495819092, "logps/chosen": -84.6858139038086, "logps/rejected": -136.48333740234375, "loss": 0.4054, "rewards/accuracies": 1.0, "rewards/chosen": 0.03499441593885422, "rewards/margins": 0.6446696519851685, "rewards/rejected": -0.6096751689910889, "step": 718 }, { "epoch": 1.15, "learning_rate": 4.843440348791121e-07, "logits/chosen": -1.2306610345840454, "logits/rejected": -1.2731142044067383, "logps/chosen": -83.11454772949219, "logps/rejected": -108.62286376953125, "loss": 0.36, "rewards/accuracies": 0.75, "rewards/chosen": 0.3523496687412262, "rewards/margins": 2.2580440044403076, "rewards/rejected": -1.9056943655014038, "step": 719 }, { "epoch": 1.16, "learning_rate": 4.842449464922711e-07, "logits/chosen": -1.4083362817764282, "logits/rejected": -1.3653844594955444, "logps/chosen": -76.98210144042969, "logps/rejected": -90.14613342285156, "loss": 0.3235, "rewards/accuracies": 1.0, "rewards/chosen": -0.06422042846679688, "rewards/margins": 1.6898572444915771, "rewards/rejected": -1.7540777921676636, "step": 720 }, { "epoch": 1.16, "learning_rate": 4.8414585810543e-07, "logits/chosen": -1.3067749738693237, "logits/rejected": -1.4802623987197876, "logps/chosen": -81.7290267944336, "logps/rejected": -85.93888854980469, "loss": 0.425, "rewards/accuracies": 1.0, "rewards/chosen": 0.1996994912624359, "rewards/margins": 0.5706990361213684, "rewards/rejected": -0.3709995150566101, "step": 721 }, { "epoch": 1.16, "learning_rate": 4.84046769718589e-07, "logits/chosen": -1.4746081829071045, "logits/rejected": -1.416220784187317, "logps/chosen": -86.321533203125, "logps/rejected": -86.31310272216797, "loss": 0.5195, "rewards/accuracies": 1.0, "rewards/chosen": 0.39218634366989136, "rewards/margins": 1.2685749530792236, "rewards/rejected": -0.8763886094093323, "step": 722 }, { "epoch": 1.16, "learning_rate": 4.839476813317479e-07, "logits/chosen": -1.3782109022140503, "logits/rejected": -1.3840434551239014, "logps/chosen": -83.28592681884766, "logps/rejected": -113.08039093017578, "loss": 0.4118, "rewards/accuracies": 0.25, "rewards/chosen": 0.03958795964717865, "rewards/margins": 1.1366941928863525, "rewards/rejected": -1.0971062183380127, "step": 723 }, { "epoch": 1.16, "learning_rate": 4.838485929449068e-07, "logits/chosen": -1.4512611627578735, "logits/rejected": -1.4973119497299194, "logps/chosen": -99.79325866699219, "logps/rejected": -109.58123779296875, "loss": 0.4482, "rewards/accuracies": 0.75, "rewards/chosen": -0.6222860813140869, "rewards/margins": 0.7553225755691528, "rewards/rejected": -1.3776085376739502, "step": 724 }, { "epoch": 1.16, "learning_rate": 4.837495045580658e-07, "logits/chosen": -1.3606019020080566, "logits/rejected": -1.4003849029541016, "logps/chosen": -80.1319351196289, "logps/rejected": -93.48213958740234, "loss": 0.4539, "rewards/accuracies": 0.75, "rewards/chosen": 0.08363408595323563, "rewards/margins": 1.8318675756454468, "rewards/rejected": -1.7482335567474365, "step": 725 }, { "epoch": 1.17, "learning_rate": 4.836504161712247e-07, "logits/chosen": -1.26254403591156, "logits/rejected": -1.254642367362976, "logps/chosen": -77.28530883789062, "logps/rejected": -97.5022964477539, "loss": 0.2827, "rewards/accuracies": 1.0, "rewards/chosen": 0.15811175107955933, "rewards/margins": 1.9200843572616577, "rewards/rejected": -1.7619726657867432, "step": 726 }, { "epoch": 1.17, "learning_rate": 4.835513277843836e-07, "logits/chosen": -1.1635406017303467, "logits/rejected": -1.285835862159729, "logps/chosen": -95.50721740722656, "logps/rejected": -100.75607299804688, "loss": 0.3475, "rewards/accuracies": 1.0, "rewards/chosen": -0.12072741985321045, "rewards/margins": 1.947732925415039, "rewards/rejected": -2.06846022605896, "step": 727 }, { "epoch": 1.17, "learning_rate": 4.834522393975426e-07, "logits/chosen": -1.3621854782104492, "logits/rejected": -1.3827555179595947, "logps/chosen": -81.45904541015625, "logps/rejected": -118.23136901855469, "loss": 0.2991, "rewards/accuracies": 0.75, "rewards/chosen": -1.1145493984222412, "rewards/margins": 1.8331549167633057, "rewards/rejected": -2.947704553604126, "step": 728 }, { "epoch": 1.17, "learning_rate": 4.833531510107015e-07, "logits/chosen": -1.478434681892395, "logits/rejected": -1.5006532669067383, "logps/chosen": -90.63253021240234, "logps/rejected": -95.31001281738281, "loss": 0.2861, "rewards/accuracies": 1.0, "rewards/chosen": 0.3823423385620117, "rewards/margins": 1.9535698890686035, "rewards/rejected": -1.5712274312973022, "step": 729 }, { "epoch": 1.17, "learning_rate": 4.832540626238605e-07, "logits/chosen": -1.4763373136520386, "logits/rejected": -1.5043272972106934, "logps/chosen": -87.62971496582031, "logps/rejected": -98.61248779296875, "loss": 0.3364, "rewards/accuracies": 1.0, "rewards/chosen": -0.5257824659347534, "rewards/margins": 1.0554237365722656, "rewards/rejected": -1.581206202507019, "step": 730 }, { "epoch": 1.17, "learning_rate": 4.831549742370194e-07, "logits/chosen": -1.2278738021850586, "logits/rejected": -1.2407901287078857, "logps/chosen": -81.0696792602539, "logps/rejected": -122.22711944580078, "loss": 0.2992, "rewards/accuracies": 1.0, "rewards/chosen": 0.10796594619750977, "rewards/margins": 2.862574815750122, "rewards/rejected": -2.754608631134033, "step": 731 }, { "epoch": 1.17, "learning_rate": 4.830558858501783e-07, "logits/chosen": -1.3226670026779175, "logits/rejected": -1.2967493534088135, "logps/chosen": -86.84967041015625, "logps/rejected": -93.37415313720703, "loss": 0.4527, "rewards/accuracies": 0.5, "rewards/chosen": -0.40712404251098633, "rewards/margins": 0.32398176193237305, "rewards/rejected": -0.7311058044433594, "step": 732 }, { "epoch": 1.18, "learning_rate": 4.829567974633372e-07, "logits/chosen": -1.3872661590576172, "logits/rejected": -1.375831127166748, "logps/chosen": -83.81224060058594, "logps/rejected": -114.14056396484375, "loss": 0.2801, "rewards/accuracies": 1.0, "rewards/chosen": 0.2387852817773819, "rewards/margins": 1.705719232559204, "rewards/rejected": -1.4669338464736938, "step": 733 }, { "epoch": 1.18, "learning_rate": 4.828577090764962e-07, "logits/chosen": -1.2816146612167358, "logits/rejected": -1.2391327619552612, "logps/chosen": -89.1363296508789, "logps/rejected": -115.42158508300781, "loss": 0.428, "rewards/accuracies": 0.5, "rewards/chosen": -0.3459809124469757, "rewards/margins": 1.4279472827911377, "rewards/rejected": -1.773928165435791, "step": 734 }, { "epoch": 1.18, "learning_rate": 4.827586206896552e-07, "logits/chosen": -1.1272742748260498, "logits/rejected": -1.1582305431365967, "logps/chosen": -104.31429290771484, "logps/rejected": -103.746826171875, "loss": 0.317, "rewards/accuracies": 0.75, "rewards/chosen": 0.6392549872398376, "rewards/margins": 1.7178443670272827, "rewards/rejected": -1.0785894393920898, "step": 735 }, { "epoch": 1.18, "learning_rate": 4.826595323028141e-07, "logits/chosen": -1.1992348432540894, "logits/rejected": -1.1813983917236328, "logps/chosen": -80.00708770751953, "logps/rejected": -96.01605224609375, "loss": 0.4723, "rewards/accuracies": 0.5, "rewards/chosen": -0.6706594824790955, "rewards/margins": 0.2200649231672287, "rewards/rejected": -0.8907244205474854, "step": 736 }, { "epoch": 1.18, "learning_rate": 4.82560443915973e-07, "logits/chosen": -1.4851752519607544, "logits/rejected": -1.4014997482299805, "logps/chosen": -76.55154418945312, "logps/rejected": -79.50753021240234, "loss": 0.4668, "rewards/accuracies": 0.5, "rewards/chosen": -0.16261833906173706, "rewards/margins": 0.7433182001113892, "rewards/rejected": -0.9059365391731262, "step": 737 }, { "epoch": 1.18, "learning_rate": 4.82461355529132e-07, "logits/chosen": -1.4084041118621826, "logits/rejected": -1.4284309148788452, "logps/chosen": -70.0961685180664, "logps/rejected": -94.36293029785156, "loss": 0.3101, "rewards/accuracies": 0.75, "rewards/chosen": 0.687903642654419, "rewards/margins": 0.8425151705741882, "rewards/rejected": -0.15461158752441406, "step": 738 }, { "epoch": 1.19, "learning_rate": 4.823622671422909e-07, "logits/chosen": -1.4665236473083496, "logits/rejected": -1.4735379219055176, "logps/chosen": -80.98249816894531, "logps/rejected": -108.79353332519531, "loss": 0.4748, "rewards/accuracies": 0.75, "rewards/chosen": 0.03623943775892258, "rewards/margins": 1.6640636920928955, "rewards/rejected": -1.627824306488037, "step": 739 }, { "epoch": 1.19, "learning_rate": 4.822631787554499e-07, "logits/chosen": -1.637945294380188, "logits/rejected": -1.5040910243988037, "logps/chosen": -77.62777709960938, "logps/rejected": -110.36424255371094, "loss": 0.3324, "rewards/accuracies": 1.0, "rewards/chosen": -0.11681614071130753, "rewards/margins": 1.5027096271514893, "rewards/rejected": -1.6195257902145386, "step": 740 }, { "epoch": 1.19, "learning_rate": 4.821640903686088e-07, "logits/chosen": -1.3603579998016357, "logits/rejected": -1.4150307178497314, "logps/chosen": -69.90480041503906, "logps/rejected": -119.72610473632812, "loss": 0.4112, "rewards/accuracies": 0.75, "rewards/chosen": 0.39656582474708557, "rewards/margins": 1.9443217515945435, "rewards/rejected": -1.5477559566497803, "step": 741 }, { "epoch": 1.19, "learning_rate": 4.820650019817677e-07, "logits/chosen": -1.3286076784133911, "logits/rejected": -1.3548285961151123, "logps/chosen": -106.43323516845703, "logps/rejected": -106.11851501464844, "loss": 0.3127, "rewards/accuracies": 0.75, "rewards/chosen": -0.34319305419921875, "rewards/margins": 1.3729503154754639, "rewards/rejected": -1.7161434888839722, "step": 742 }, { "epoch": 1.19, "learning_rate": 4.819659135949266e-07, "logits/chosen": -1.4347248077392578, "logits/rejected": -1.4451441764831543, "logps/chosen": -96.6881332397461, "logps/rejected": -119.82936096191406, "loss": 0.2813, "rewards/accuracies": 0.75, "rewards/chosen": 0.06824912875890732, "rewards/margins": 0.9606689214706421, "rewards/rejected": -0.8924198150634766, "step": 743 }, { "epoch": 1.19, "learning_rate": 4.818668252080855e-07, "logits/chosen": -1.213517189025879, "logits/rejected": -1.183061122894287, "logps/chosen": -70.6455078125, "logps/rejected": -121.3418197631836, "loss": 0.316, "rewards/accuracies": 0.75, "rewards/chosen": 0.22858218848705292, "rewards/margins": 1.4289774894714355, "rewards/rejected": -1.2003952264785767, "step": 744 }, { "epoch": 1.2, "learning_rate": 4.817677368212446e-07, "logits/chosen": -1.3739423751831055, "logits/rejected": -1.3826394081115723, "logps/chosen": -94.8553695678711, "logps/rejected": -111.91023254394531, "loss": 0.4128, "rewards/accuracies": 1.0, "rewards/chosen": 0.13024768233299255, "rewards/margins": 3.171170949935913, "rewards/rejected": -3.0409233570098877, "step": 745 }, { "epoch": 1.2, "learning_rate": 4.816686484344035e-07, "logits/chosen": -1.3604732751846313, "logits/rejected": -1.3502367734909058, "logps/chosen": -84.6005630493164, "logps/rejected": -127.40678405761719, "loss": 0.2939, "rewards/accuracies": 1.0, "rewards/chosen": 0.25329896807670593, "rewards/margins": 2.860900402069092, "rewards/rejected": -2.6076014041900635, "step": 746 }, { "epoch": 1.2, "learning_rate": 4.815695600475624e-07, "logits/chosen": -1.4921693801879883, "logits/rejected": -1.4455829858779907, "logps/chosen": -93.55008697509766, "logps/rejected": -117.99861907958984, "loss": 0.2904, "rewards/accuracies": 1.0, "rewards/chosen": 0.18700505793094635, "rewards/margins": 1.9049981832504272, "rewards/rejected": -1.717993140220642, "step": 747 }, { "epoch": 1.2, "learning_rate": 4.814704716607214e-07, "logits/chosen": -1.4955112934112549, "logits/rejected": -1.516878604888916, "logps/chosen": -104.51922607421875, "logps/rejected": -114.1079330444336, "loss": 0.4066, "rewards/accuracies": 1.0, "rewards/chosen": -0.31188470125198364, "rewards/margins": 2.3178582191467285, "rewards/rejected": -2.6297428607940674, "step": 748 }, { "epoch": 1.2, "learning_rate": 4.813713832738803e-07, "logits/chosen": -1.5547142028808594, "logits/rejected": -1.6513943672180176, "logps/chosen": -70.39974975585938, "logps/rejected": -100.32977294921875, "loss": 0.2914, "rewards/accuracies": 0.75, "rewards/chosen": 0.27557528018951416, "rewards/margins": 1.2797244787216187, "rewards/rejected": -1.004149317741394, "step": 749 }, { "epoch": 1.2, "learning_rate": 4.812722948870392e-07, "logits/chosen": -1.3909766674041748, "logits/rejected": -1.3985364437103271, "logps/chosen": -89.81668853759766, "logps/rejected": -110.48096466064453, "loss": 0.3856, "rewards/accuracies": 0.75, "rewards/chosen": -1.041695237159729, "rewards/margins": 0.54599928855896, "rewards/rejected": -1.587694525718689, "step": 750 }, { "epoch": 1.21, "learning_rate": 4.811732065001982e-07, "logits/chosen": -1.3678425550460815, "logits/rejected": -1.3341031074523926, "logps/chosen": -85.22631072998047, "logps/rejected": -88.07720947265625, "loss": 0.3761, "rewards/accuracies": 0.75, "rewards/chosen": -0.10841922461986542, "rewards/margins": 1.0902758836746216, "rewards/rejected": -1.1986950635910034, "step": 751 }, { "epoch": 1.21, "learning_rate": 4.810741181133571e-07, "logits/chosen": -1.3009955883026123, "logits/rejected": -1.3190722465515137, "logps/chosen": -53.975914001464844, "logps/rejected": -87.44203186035156, "loss": 0.1972, "rewards/accuracies": 1.0, "rewards/chosen": -0.13077707588672638, "rewards/margins": 2.6516196727752686, "rewards/rejected": -2.7823965549468994, "step": 752 }, { "epoch": 1.21, "learning_rate": 4.80975029726516e-07, "logits/chosen": -1.27962064743042, "logits/rejected": -1.22838294506073, "logps/chosen": -87.70652770996094, "logps/rejected": -93.4171142578125, "loss": 0.4433, "rewards/accuracies": 0.5, "rewards/chosen": 0.08696909248828888, "rewards/margins": 0.4725745618343353, "rewards/rejected": -0.3856053948402405, "step": 753 }, { "epoch": 1.21, "learning_rate": 4.808759413396749e-07, "logits/chosen": -1.3934028148651123, "logits/rejected": -1.393033742904663, "logps/chosen": -123.2108154296875, "logps/rejected": -137.65505981445312, "loss": 0.4008, "rewards/accuracies": 1.0, "rewards/chosen": -0.14924241602420807, "rewards/margins": 0.3563644587993622, "rewards/rejected": -0.5056068897247314, "step": 754 }, { "epoch": 1.21, "learning_rate": 4.807768529528339e-07, "logits/chosen": -1.3339588642120361, "logits/rejected": -1.345149278640747, "logps/chosen": -101.16690063476562, "logps/rejected": -103.3846664428711, "loss": 0.3155, "rewards/accuracies": 0.75, "rewards/chosen": -1.3908159732818604, "rewards/margins": 0.6748300194740295, "rewards/rejected": -2.065645933151245, "step": 755 }, { "epoch": 1.21, "learning_rate": 4.806777645659928e-07, "logits/chosen": -1.4270007610321045, "logits/rejected": -1.4061098098754883, "logps/chosen": -64.88216400146484, "logps/rejected": -113.3449935913086, "loss": 0.282, "rewards/accuracies": 1.0, "rewards/chosen": 0.5908172726631165, "rewards/margins": 1.6563498973846436, "rewards/rejected": -1.0655326843261719, "step": 756 }, { "epoch": 1.22, "learning_rate": 4.805786761791518e-07, "logits/chosen": -1.4576385021209717, "logits/rejected": -1.4271901845932007, "logps/chosen": -102.80131530761719, "logps/rejected": -102.6478042602539, "loss": 0.3755, "rewards/accuracies": 0.75, "rewards/chosen": -0.036965757608413696, "rewards/margins": 1.8292450904846191, "rewards/rejected": -1.8662108182907104, "step": 757 }, { "epoch": 1.22, "learning_rate": 4.804795877923108e-07, "logits/chosen": -1.4351778030395508, "logits/rejected": -1.4006785154342651, "logps/chosen": -91.8701171875, "logps/rejected": -119.11117553710938, "loss": 0.3684, "rewards/accuracies": 1.0, "rewards/chosen": -0.8073354363441467, "rewards/margins": 1.7560611963272095, "rewards/rejected": -2.563396692276001, "step": 758 }, { "epoch": 1.22, "learning_rate": 4.803804994054697e-07, "logits/chosen": -1.441806674003601, "logits/rejected": -1.3243894577026367, "logps/chosen": -91.26377868652344, "logps/rejected": -104.00052642822266, "loss": 0.3093, "rewards/accuracies": 1.0, "rewards/chosen": 0.32737839221954346, "rewards/margins": 1.348159670829773, "rewards/rejected": -1.0207812786102295, "step": 759 }, { "epoch": 1.22, "learning_rate": 4.802814110186286e-07, "logits/chosen": -1.4768427610397339, "logits/rejected": -1.5375205278396606, "logps/chosen": -92.62492370605469, "logps/rejected": -124.95454406738281, "loss": 0.39, "rewards/accuracies": 1.0, "rewards/chosen": -0.32660216093063354, "rewards/margins": 1.3024914264678955, "rewards/rejected": -1.6290936470031738, "step": 760 }, { "epoch": 1.22, "learning_rate": 4.801823226317876e-07, "logits/chosen": -1.4000916481018066, "logits/rejected": -1.3609226942062378, "logps/chosen": -104.15169525146484, "logps/rejected": -120.63566589355469, "loss": 0.5661, "rewards/accuracies": 1.0, "rewards/chosen": -2.0361316204071045, "rewards/margins": 1.9480665922164917, "rewards/rejected": -3.9841980934143066, "step": 761 }, { "epoch": 1.22, "learning_rate": 4.800832342449465e-07, "logits/chosen": -1.428407907485962, "logits/rejected": -1.4114431142807007, "logps/chosen": -56.21022415161133, "logps/rejected": -96.48658752441406, "loss": 0.3196, "rewards/accuracies": 1.0, "rewards/chosen": 0.4970060884952545, "rewards/margins": 3.394813060760498, "rewards/rejected": -2.8978071212768555, "step": 762 }, { "epoch": 1.22, "learning_rate": 4.799841458581054e-07, "logits/chosen": -1.4527724981307983, "logits/rejected": -1.387725830078125, "logps/chosen": -100.71971130371094, "logps/rejected": -115.25833129882812, "loss": 0.5031, "rewards/accuracies": 0.75, "rewards/chosen": -0.7372692227363586, "rewards/margins": 1.4218025207519531, "rewards/rejected": -2.159071922302246, "step": 763 }, { "epoch": 1.23, "learning_rate": 4.798850574712643e-07, "logits/chosen": -1.3154606819152832, "logits/rejected": -1.3589729070663452, "logps/chosen": -97.50381469726562, "logps/rejected": -115.63888549804688, "loss": 0.4125, "rewards/accuracies": 0.75, "rewards/chosen": -0.12407875806093216, "rewards/margins": 0.3485240936279297, "rewards/rejected": -0.47260284423828125, "step": 764 }, { "epoch": 1.23, "learning_rate": 4.797859690844232e-07, "logits/chosen": -1.2987507581710815, "logits/rejected": -1.298435091972351, "logps/chosen": -75.32613372802734, "logps/rejected": -96.35774230957031, "loss": 0.3798, "rewards/accuracies": 1.0, "rewards/chosen": 0.3852194845676422, "rewards/margins": 1.3482251167297363, "rewards/rejected": -0.9630056619644165, "step": 765 }, { "epoch": 1.23, "learning_rate": 4.796868806975822e-07, "logits/chosen": -1.399732232093811, "logits/rejected": -1.440435767173767, "logps/chosen": -89.42658996582031, "logps/rejected": -86.78474426269531, "loss": 0.4381, "rewards/accuracies": 1.0, "rewards/chosen": -0.1284601241350174, "rewards/margins": 1.3479983806610107, "rewards/rejected": -1.4764585494995117, "step": 766 }, { "epoch": 1.23, "learning_rate": 4.795877923107411e-07, "logits/chosen": -1.3685798645019531, "logits/rejected": -1.4169092178344727, "logps/chosen": -66.56224060058594, "logps/rejected": -79.65753173828125, "loss": 0.3493, "rewards/accuracies": 0.75, "rewards/chosen": 0.2497176229953766, "rewards/margins": 0.7261520624160767, "rewards/rejected": -0.4764344394207001, "step": 767 }, { "epoch": 1.23, "learning_rate": 4.794887039239001e-07, "logits/chosen": -1.3172869682312012, "logits/rejected": -1.3385329246520996, "logps/chosen": -73.50481414794922, "logps/rejected": -106.94889831542969, "loss": 0.2853, "rewards/accuracies": 1.0, "rewards/chosen": -1.029346227645874, "rewards/margins": 1.350346565246582, "rewards/rejected": -2.379692792892456, "step": 768 }, { "epoch": 1.23, "learning_rate": 4.793896155370591e-07, "logits/chosen": -1.2399847507476807, "logits/rejected": -1.2732670307159424, "logps/chosen": -82.31910705566406, "logps/rejected": -112.87742614746094, "loss": 0.3784, "rewards/accuracies": 1.0, "rewards/chosen": -0.09287625551223755, "rewards/margins": 2.260348320007324, "rewards/rejected": -2.353224277496338, "step": 769 }, { "epoch": 1.24, "learning_rate": 4.79290527150218e-07, "logits/chosen": -1.49796462059021, "logits/rejected": -1.391014575958252, "logps/chosen": -85.11933898925781, "logps/rejected": -112.528076171875, "loss": 0.2938, "rewards/accuracies": 1.0, "rewards/chosen": -0.12838402390480042, "rewards/margins": 3.7660458087921143, "rewards/rejected": -3.8944296836853027, "step": 770 }, { "epoch": 1.24, "learning_rate": 4.791914387633769e-07, "logits/chosen": -1.401871681213379, "logits/rejected": -1.3404828310012817, "logps/chosen": -105.64678955078125, "logps/rejected": -113.98499298095703, "loss": 0.3378, "rewards/accuracies": 1.0, "rewards/chosen": -0.8528120517730713, "rewards/margins": 0.8839074969291687, "rewards/rejected": -1.7367196083068848, "step": 771 }, { "epoch": 1.24, "learning_rate": 4.790923503765359e-07, "logits/chosen": -1.4404196739196777, "logits/rejected": -1.4589464664459229, "logps/chosen": -125.09049987792969, "logps/rejected": -104.7862777709961, "loss": 0.3652, "rewards/accuracies": 0.5, "rewards/chosen": -0.3844785988330841, "rewards/margins": 0.9573222398757935, "rewards/rejected": -1.3418008089065552, "step": 772 }, { "epoch": 1.24, "learning_rate": 4.789932619896948e-07, "logits/chosen": -1.568264126777649, "logits/rejected": -1.5459086894989014, "logps/chosen": -82.12254333496094, "logps/rejected": -100.47122192382812, "loss": 0.4974, "rewards/accuracies": 0.75, "rewards/chosen": 0.10537920892238617, "rewards/margins": 1.7193565368652344, "rewards/rejected": -1.613977313041687, "step": 773 }, { "epoch": 1.24, "learning_rate": 4.788941736028537e-07, "logits/chosen": -1.446671962738037, "logits/rejected": -1.472130537033081, "logps/chosen": -96.64337921142578, "logps/rejected": -119.39864349365234, "loss": 0.3869, "rewards/accuracies": 0.75, "rewards/chosen": -1.2387334108352661, "rewards/margins": 1.7509739398956299, "rewards/rejected": -2.9897074699401855, "step": 774 }, { "epoch": 1.24, "learning_rate": 4.787950852160126e-07, "logits/chosen": -1.2101062536239624, "logits/rejected": -1.1693065166473389, "logps/chosen": -88.53765106201172, "logps/rejected": -89.48745727539062, "loss": 0.4579, "rewards/accuracies": 0.75, "rewards/chosen": -0.019217725843191147, "rewards/margins": 1.8564460277557373, "rewards/rejected": -1.8756637573242188, "step": 775 }, { "epoch": 1.25, "learning_rate": 4.786959968291716e-07, "logits/chosen": -1.3010807037353516, "logits/rejected": -1.3451924324035645, "logps/chosen": -91.60025024414062, "logps/rejected": -106.26217651367188, "loss": 0.3164, "rewards/accuracies": 1.0, "rewards/chosen": 0.031602099537849426, "rewards/margins": 2.002230644226074, "rewards/rejected": -1.9706284999847412, "step": 776 }, { "epoch": 1.25, "learning_rate": 4.785969084423305e-07, "logits/chosen": -1.4144748449325562, "logits/rejected": -1.3029708862304688, "logps/chosen": -92.47920989990234, "logps/rejected": -105.9443130493164, "loss": 0.4098, "rewards/accuracies": 0.75, "rewards/chosen": 0.5129703879356384, "rewards/margins": 0.44194185733795166, "rewards/rejected": 0.07102852314710617, "step": 777 }, { "epoch": 1.25, "learning_rate": 4.784978200554895e-07, "logits/chosen": -1.6324808597564697, "logits/rejected": -1.603705644607544, "logps/chosen": -100.6744613647461, "logps/rejected": -106.129150390625, "loss": 0.4851, "rewards/accuracies": 0.75, "rewards/chosen": -0.9775441884994507, "rewards/margins": 1.110002040863037, "rewards/rejected": -2.0875461101531982, "step": 778 }, { "epoch": 1.25, "learning_rate": 4.783987316686484e-07, "logits/chosen": -1.4224759340286255, "logits/rejected": -1.3920093774795532, "logps/chosen": -108.40132141113281, "logps/rejected": -145.1244354248047, "loss": 0.3878, "rewards/accuracies": 1.0, "rewards/chosen": 0.19152098894119263, "rewards/margins": 3.070927381515503, "rewards/rejected": -2.879406690597534, "step": 779 }, { "epoch": 1.25, "learning_rate": 4.782996432818073e-07, "logits/chosen": -1.336031436920166, "logits/rejected": -1.286874532699585, "logps/chosen": -79.22531127929688, "logps/rejected": -83.72010803222656, "loss": 0.302, "rewards/accuracies": 1.0, "rewards/chosen": 0.5669018030166626, "rewards/margins": 1.361464500427246, "rewards/rejected": -0.7945627570152283, "step": 780 }, { "epoch": 1.25, "learning_rate": 4.782005548949663e-07, "logits/chosen": -1.3366363048553467, "logits/rejected": -1.3390461206436157, "logps/chosen": -111.70911407470703, "logps/rejected": -119.69509887695312, "loss": 0.4836, "rewards/accuracies": 0.75, "rewards/chosen": 0.3170236647129059, "rewards/margins": 1.4456250667572021, "rewards/rejected": -1.1286015510559082, "step": 781 }, { "epoch": 1.26, "learning_rate": 4.781014665081253e-07, "logits/chosen": -1.2851723432540894, "logits/rejected": -1.2348060607910156, "logps/chosen": -114.98124694824219, "logps/rejected": -146.65357971191406, "loss": 0.3465, "rewards/accuracies": 1.0, "rewards/chosen": -0.5482298135757446, "rewards/margins": 2.464938163757324, "rewards/rejected": -3.0131678581237793, "step": 782 }, { "epoch": 1.26, "learning_rate": 4.780023781212842e-07, "logits/chosen": -1.3472552299499512, "logits/rejected": -1.3802173137664795, "logps/chosen": -79.37167358398438, "logps/rejected": -90.9935302734375, "loss": 0.4883, "rewards/accuracies": 0.5, "rewards/chosen": -0.3286859691143036, "rewards/margins": 0.35907477140426636, "rewards/rejected": -0.6877607107162476, "step": 783 }, { "epoch": 1.26, "learning_rate": 4.779032897344431e-07, "logits/chosen": -1.3687454462051392, "logits/rejected": -1.3064881563186646, "logps/chosen": -67.77781677246094, "logps/rejected": -91.71720123291016, "loss": 0.5268, "rewards/accuracies": 1.0, "rewards/chosen": 0.24964535236358643, "rewards/margins": 1.000247836112976, "rewards/rejected": -0.7506026029586792, "step": 784 }, { "epoch": 1.26, "learning_rate": 4.77804201347602e-07, "logits/chosen": -1.2812796831130981, "logits/rejected": -1.290711760520935, "logps/chosen": -80.43958282470703, "logps/rejected": -94.7020263671875, "loss": 0.5236, "rewards/accuracies": 0.75, "rewards/chosen": 0.04517107084393501, "rewards/margins": 0.333005428314209, "rewards/rejected": -0.2878343462944031, "step": 785 }, { "epoch": 1.26, "learning_rate": 4.77705112960761e-07, "logits/chosen": -1.2289416790008545, "logits/rejected": -1.2923582792282104, "logps/chosen": -95.5247802734375, "logps/rejected": -114.56204986572266, "loss": 0.3423, "rewards/accuracies": 1.0, "rewards/chosen": 0.09593506157398224, "rewards/margins": 3.24476957321167, "rewards/rejected": -3.148834705352783, "step": 786 }, { "epoch": 1.26, "learning_rate": 4.776060245739199e-07, "logits/chosen": -1.3092231750488281, "logits/rejected": -1.3618532419204712, "logps/chosen": -98.99152374267578, "logps/rejected": -138.1865997314453, "loss": 0.433, "rewards/accuracies": 1.0, "rewards/chosen": 0.01213989220559597, "rewards/margins": 2.2375378608703613, "rewards/rejected": -2.225397825241089, "step": 787 }, { "epoch": 1.26, "learning_rate": 4.775069361870789e-07, "logits/chosen": -1.476326823234558, "logits/rejected": -1.5099215507507324, "logps/chosen": -86.89228820800781, "logps/rejected": -93.55575561523438, "loss": 0.2251, "rewards/accuracies": 0.5, "rewards/chosen": -0.8330244421958923, "rewards/margins": 1.0462112426757812, "rewards/rejected": -1.8792357444763184, "step": 788 }, { "epoch": 1.27, "learning_rate": 4.774078478002378e-07, "logits/chosen": -1.4523626565933228, "logits/rejected": -1.3699564933776855, "logps/chosen": -80.46049499511719, "logps/rejected": -99.6191635131836, "loss": 0.2756, "rewards/accuracies": 1.0, "rewards/chosen": -0.1960269957780838, "rewards/margins": 2.0651562213897705, "rewards/rejected": -2.261183261871338, "step": 789 }, { "epoch": 1.27, "learning_rate": 4.773087594133967e-07, "logits/chosen": -1.428037405014038, "logits/rejected": -1.3527019023895264, "logps/chosen": -96.73248291015625, "logps/rejected": -92.84661865234375, "loss": 0.3244, "rewards/accuracies": 1.0, "rewards/chosen": -0.11638011038303375, "rewards/margins": 0.6474985480308533, "rewards/rejected": -0.763878583908081, "step": 790 }, { "epoch": 1.27, "learning_rate": 4.772096710265556e-07, "logits/chosen": -1.5963654518127441, "logits/rejected": -1.5859627723693848, "logps/chosen": -85.25105285644531, "logps/rejected": -118.58192443847656, "loss": 0.3557, "rewards/accuracies": 0.75, "rewards/chosen": -0.040251344442367554, "rewards/margins": 1.8689073324203491, "rewards/rejected": -1.9091588258743286, "step": 791 }, { "epoch": 1.27, "learning_rate": 4.771105826397145e-07, "logits/chosen": -1.3078620433807373, "logits/rejected": -1.2893251180648804, "logps/chosen": -87.25511932373047, "logps/rejected": -100.44789123535156, "loss": 0.3867, "rewards/accuracies": 1.0, "rewards/chosen": -0.2798977792263031, "rewards/margins": 1.7422631978988647, "rewards/rejected": -2.0221610069274902, "step": 792 }, { "epoch": 1.27, "learning_rate": 4.770114942528736e-07, "logits/chosen": -1.4090783596038818, "logits/rejected": -1.4879786968231201, "logps/chosen": -67.404052734375, "logps/rejected": -100.92681884765625, "loss": 0.329, "rewards/accuracies": 1.0, "rewards/chosen": -0.1649092733860016, "rewards/margins": 1.3881254196166992, "rewards/rejected": -1.5530345439910889, "step": 793 }, { "epoch": 1.27, "learning_rate": 4.769124058660325e-07, "logits/chosen": -1.4320560693740845, "logits/rejected": -1.4101804494857788, "logps/chosen": -84.84634399414062, "logps/rejected": -102.83170318603516, "loss": 0.2506, "rewards/accuracies": 1.0, "rewards/chosen": 0.27835631370544434, "rewards/margins": 2.1748709678649902, "rewards/rejected": -1.8965147733688354, "step": 794 }, { "epoch": 1.28, "learning_rate": 4.7681331747919143e-07, "logits/chosen": -1.3854682445526123, "logits/rejected": -1.4578721523284912, "logps/chosen": -94.77523803710938, "logps/rejected": -81.2493667602539, "loss": 0.3842, "rewards/accuracies": 0.5, "rewards/chosen": -0.55183345079422, "rewards/margins": 0.6745489835739136, "rewards/rejected": -1.2263824939727783, "step": 795 }, { "epoch": 1.28, "learning_rate": 4.767142290923504e-07, "logits/chosen": -1.3333821296691895, "logits/rejected": -1.3415969610214233, "logps/chosen": -94.12651824951172, "logps/rejected": -84.15394592285156, "loss": 0.4578, "rewards/accuracies": 1.0, "rewards/chosen": -0.17549963295459747, "rewards/margins": 2.398294448852539, "rewards/rejected": -2.57379412651062, "step": 796 }, { "epoch": 1.28, "learning_rate": 4.766151407055093e-07, "logits/chosen": -1.2892152070999146, "logits/rejected": -1.2784297466278076, "logps/chosen": -95.19532012939453, "logps/rejected": -88.51426696777344, "loss": 0.4148, "rewards/accuracies": 0.5, "rewards/chosen": -0.4015122354030609, "rewards/margins": 0.2201913744211197, "rewards/rejected": -0.6217036247253418, "step": 797 }, { "epoch": 1.28, "learning_rate": 4.765160523186682e-07, "logits/chosen": -1.4371483325958252, "logits/rejected": -1.3843356370925903, "logps/chosen": -94.23515319824219, "logps/rejected": -100.93545532226562, "loss": 0.2513, "rewards/accuracies": 1.0, "rewards/chosen": -0.008195213973522186, "rewards/margins": 0.8663699626922607, "rewards/rejected": -0.8745651245117188, "step": 798 }, { "epoch": 1.28, "learning_rate": 4.7641696393182716e-07, "logits/chosen": -1.3883440494537354, "logits/rejected": -1.328714370727539, "logps/chosen": -91.39260864257812, "logps/rejected": -101.96568298339844, "loss": 0.3789, "rewards/accuracies": 1.0, "rewards/chosen": -0.7243200540542603, "rewards/margins": 2.4840056896209717, "rewards/rejected": -3.2083256244659424, "step": 799 }, { "epoch": 1.28, "learning_rate": 4.7631787554498607e-07, "logits/chosen": -1.4376999139785767, "logits/rejected": -1.4248183965682983, "logps/chosen": -111.62651062011719, "logps/rejected": -126.81134033203125, "loss": 0.3822, "rewards/accuracies": 1.0, "rewards/chosen": -0.5171934366226196, "rewards/margins": 2.2485544681549072, "rewards/rejected": -2.7657480239868164, "step": 800 }, { "epoch": 1.29, "learning_rate": 4.762187871581451e-07, "logits/chosen": -1.489363670349121, "logits/rejected": -1.580581784248352, "logps/chosen": -84.1202163696289, "logps/rejected": -123.14726257324219, "loss": 0.2969, "rewards/accuracies": 1.0, "rewards/chosen": -0.4649463891983032, "rewards/margins": 1.1847479343414307, "rewards/rejected": -1.6496943235397339, "step": 801 }, { "epoch": 1.29, "learning_rate": 4.76119698771304e-07, "logits/chosen": -1.3916782140731812, "logits/rejected": -1.3603460788726807, "logps/chosen": -124.96878051757812, "logps/rejected": -117.596435546875, "loss": 0.2264, "rewards/accuracies": 1.0, "rewards/chosen": -0.42084693908691406, "rewards/margins": 1.2236419916152954, "rewards/rejected": -1.6444889307022095, "step": 802 }, { "epoch": 1.29, "learning_rate": 4.760206103844629e-07, "logits/chosen": -1.5669660568237305, "logits/rejected": -1.5764132738113403, "logps/chosen": -80.19837951660156, "logps/rejected": -101.46507263183594, "loss": 0.4359, "rewards/accuracies": 1.0, "rewards/chosen": -0.21972599625587463, "rewards/margins": 1.2959332466125488, "rewards/rejected": -1.515659213066101, "step": 803 }, { "epoch": 1.29, "learning_rate": 4.7592152199762185e-07, "logits/chosen": -1.4628448486328125, "logits/rejected": -1.5344501733779907, "logps/chosen": -79.56920623779297, "logps/rejected": -119.68403625488281, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": 0.05152454972267151, "rewards/margins": 3.940316677093506, "rewards/rejected": -3.888792037963867, "step": 804 }, { "epoch": 1.29, "learning_rate": 4.7582243361078076e-07, "logits/chosen": -1.3094149827957153, "logits/rejected": -1.3560928106307983, "logps/chosen": -94.40289306640625, "logps/rejected": -127.07909393310547, "loss": 0.3588, "rewards/accuracies": 1.0, "rewards/chosen": 0.11940211802721024, "rewards/margins": 3.2833023071289062, "rewards/rejected": -3.1638998985290527, "step": 805 }, { "epoch": 1.29, "learning_rate": 4.7572334522393977e-07, "logits/chosen": -1.4151484966278076, "logits/rejected": -1.46943998336792, "logps/chosen": -81.46760559082031, "logps/rejected": -118.49510192871094, "loss": 0.4325, "rewards/accuracies": 0.75, "rewards/chosen": -0.13408049941062927, "rewards/margins": 1.3974504470825195, "rewards/rejected": -1.5315309762954712, "step": 806 }, { "epoch": 1.3, "learning_rate": 4.756242568370987e-07, "logits/chosen": -1.5905771255493164, "logits/rejected": -1.5391255617141724, "logps/chosen": -105.71222686767578, "logps/rejected": -118.84329223632812, "loss": 0.2266, "rewards/accuracies": 0.75, "rewards/chosen": -1.0375512838363647, "rewards/margins": 2.1677026748657227, "rewards/rejected": -3.205254077911377, "step": 807 }, { "epoch": 1.3, "learning_rate": 4.755251684502576e-07, "logits/chosen": -1.2241172790527344, "logits/rejected": -1.2152931690216064, "logps/chosen": -99.58531188964844, "logps/rejected": -83.66168212890625, "loss": 0.4019, "rewards/accuracies": 0.75, "rewards/chosen": -0.31274864077568054, "rewards/margins": 1.1582330465316772, "rewards/rejected": -1.4709817171096802, "step": 808 }, { "epoch": 1.3, "learning_rate": 4.7542608006341655e-07, "logits/chosen": -1.3260515928268433, "logits/rejected": -1.312002182006836, "logps/chosen": -88.94812774658203, "logps/rejected": -120.5614013671875, "loss": 0.3252, "rewards/accuracies": 0.75, "rewards/chosen": -0.9283734560012817, "rewards/margins": 0.5633834004402161, "rewards/rejected": -1.4917569160461426, "step": 809 }, { "epoch": 1.3, "learning_rate": 4.7532699167657545e-07, "logits/chosen": -1.2858850955963135, "logits/rejected": -1.2942931652069092, "logps/chosen": -96.72728729248047, "logps/rejected": -92.34442138671875, "loss": 0.3519, "rewards/accuracies": 0.75, "rewards/chosen": -0.870337724685669, "rewards/margins": 0.4562051296234131, "rewards/rejected": -1.326542854309082, "step": 810 }, { "epoch": 1.3, "learning_rate": 4.7522790328973447e-07, "logits/chosen": -1.4587318897247314, "logits/rejected": -1.4320892095565796, "logps/chosen": -85.21399688720703, "logps/rejected": -120.8559341430664, "loss": 0.4321, "rewards/accuracies": 1.0, "rewards/chosen": -0.17783355712890625, "rewards/margins": 2.835641860961914, "rewards/rejected": -3.0134754180908203, "step": 811 }, { "epoch": 1.3, "learning_rate": 4.7512881490289337e-07, "logits/chosen": -1.5556178092956543, "logits/rejected": -1.5189120769500732, "logps/chosen": -90.63643646240234, "logps/rejected": -112.4111099243164, "loss": 0.429, "rewards/accuracies": 1.0, "rewards/chosen": -1.0250928401947021, "rewards/margins": 2.35780930519104, "rewards/rejected": -3.3829023838043213, "step": 812 }, { "epoch": 1.3, "learning_rate": 4.750297265160523e-07, "logits/chosen": -1.4209907054901123, "logits/rejected": -1.3812050819396973, "logps/chosen": -97.9415054321289, "logps/rejected": -124.76395416259766, "loss": 0.2622, "rewards/accuracies": 0.75, "rewards/chosen": -0.6255830526351929, "rewards/margins": 3.5239171981811523, "rewards/rejected": -4.149499893188477, "step": 813 }, { "epoch": 1.31, "learning_rate": 4.7493063812921124e-07, "logits/chosen": -1.4316115379333496, "logits/rejected": -1.4794877767562866, "logps/chosen": -100.68930053710938, "logps/rejected": -102.78739166259766, "loss": 0.3869, "rewards/accuracies": 0.75, "rewards/chosen": -0.8867339491844177, "rewards/margins": 1.838860273361206, "rewards/rejected": -2.7255942821502686, "step": 814 }, { "epoch": 1.31, "learning_rate": 4.7483154974237015e-07, "logits/chosen": -1.349687933921814, "logits/rejected": -1.2229033708572388, "logps/chosen": -99.9083023071289, "logps/rejected": -108.62406921386719, "loss": 0.356, "rewards/accuracies": 0.25, "rewards/chosen": -0.7291482090950012, "rewards/margins": 0.8065196871757507, "rewards/rejected": -1.535667896270752, "step": 815 }, { "epoch": 1.31, "learning_rate": 4.7473246135552916e-07, "logits/chosen": -1.3574572801589966, "logits/rejected": -1.313169240951538, "logps/chosen": -70.1456298828125, "logps/rejected": -88.99125671386719, "loss": 0.3236, "rewards/accuracies": 1.0, "rewards/chosen": -0.9733759760856628, "rewards/margins": 1.976133108139038, "rewards/rejected": -2.9495091438293457, "step": 816 }, { "epoch": 1.31, "learning_rate": 4.7463337296868807e-07, "logits/chosen": -1.4330763816833496, "logits/rejected": -1.4408726692199707, "logps/chosen": -114.13641357421875, "logps/rejected": -122.54708862304688, "loss": 0.3337, "rewards/accuracies": 1.0, "rewards/chosen": -0.7275932431221008, "rewards/margins": 3.0821681022644043, "rewards/rejected": -3.8097615242004395, "step": 817 }, { "epoch": 1.31, "learning_rate": 4.7453428458184697e-07, "logits/chosen": -1.4795721769332886, "logits/rejected": -1.4657690525054932, "logps/chosen": -99.6626968383789, "logps/rejected": -104.28457641601562, "loss": 0.4687, "rewards/accuracies": 0.75, "rewards/chosen": -1.472090482711792, "rewards/margins": 1.4098808765411377, "rewards/rejected": -2.8819713592529297, "step": 818 }, { "epoch": 1.31, "learning_rate": 4.7443519619500593e-07, "logits/chosen": -1.4973992109298706, "logits/rejected": -1.5581955909729004, "logps/chosen": -104.68208312988281, "logps/rejected": -133.37879943847656, "loss": 0.4575, "rewards/accuracies": 1.0, "rewards/chosen": -1.1233278512954712, "rewards/margins": 1.7829937934875488, "rewards/rejected": -2.9063215255737305, "step": 819 }, { "epoch": 1.32, "learning_rate": 4.7433610780816484e-07, "logits/chosen": -1.4560948610305786, "logits/rejected": -1.4262773990631104, "logps/chosen": -99.50623321533203, "logps/rejected": -126.90412902832031, "loss": 0.3399, "rewards/accuracies": 0.75, "rewards/chosen": -0.45924055576324463, "rewards/margins": 3.5874392986297607, "rewards/rejected": -4.046679973602295, "step": 820 }, { "epoch": 1.32, "learning_rate": 4.742370194213238e-07, "logits/chosen": -1.4778159856796265, "logits/rejected": -1.4926472902297974, "logps/chosen": -74.88053894042969, "logps/rejected": -94.5679931640625, "loss": 0.3512, "rewards/accuracies": 1.0, "rewards/chosen": -0.6245787143707275, "rewards/margins": 0.8575637340545654, "rewards/rejected": -1.4821425676345825, "step": 821 }, { "epoch": 1.32, "learning_rate": 4.7413793103448276e-07, "logits/chosen": -1.427869439125061, "logits/rejected": -1.5102508068084717, "logps/chosen": -88.70858001708984, "logps/rejected": -106.18560791015625, "loss": 0.4032, "rewards/accuracies": 0.75, "rewards/chosen": -1.2300591468811035, "rewards/margins": 2.059913158416748, "rewards/rejected": -3.2899725437164307, "step": 822 }, { "epoch": 1.32, "learning_rate": 4.7403884264764167e-07, "logits/chosen": -1.4518811702728271, "logits/rejected": -1.4276622533798218, "logps/chosen": -98.82361602783203, "logps/rejected": -102.69120788574219, "loss": 0.2692, "rewards/accuracies": 0.75, "rewards/chosen": -0.9755848050117493, "rewards/margins": 1.6921327114105225, "rewards/rejected": -2.667717456817627, "step": 823 }, { "epoch": 1.32, "learning_rate": 4.739397542608006e-07, "logits/chosen": -1.5273962020874023, "logits/rejected": -1.518336534500122, "logps/chosen": -83.89656066894531, "logps/rejected": -120.76232147216797, "loss": 0.3174, "rewards/accuracies": 1.0, "rewards/chosen": -0.40375664830207825, "rewards/margins": 4.0516204833984375, "rewards/rejected": -4.455377101898193, "step": 824 }, { "epoch": 1.32, "learning_rate": 4.7384066587395953e-07, "logits/chosen": -1.3329288959503174, "logits/rejected": -1.3248867988586426, "logps/chosen": -92.15515899658203, "logps/rejected": -92.53300476074219, "loss": 0.3749, "rewards/accuracies": 0.5, "rewards/chosen": -0.3596939146518707, "rewards/margins": 0.8146473169326782, "rewards/rejected": -1.1743412017822266, "step": 825 }, { "epoch": 1.33, "learning_rate": 4.737415774871185e-07, "logits/chosen": -1.4141978025436401, "logits/rejected": -1.4220844507217407, "logps/chosen": -85.89578247070312, "logps/rejected": -105.0001449584961, "loss": 0.4503, "rewards/accuracies": 1.0, "rewards/chosen": -1.0263237953186035, "rewards/margins": 1.174168348312378, "rewards/rejected": -2.2004921436309814, "step": 826 }, { "epoch": 1.33, "learning_rate": 4.7364248910027745e-07, "logits/chosen": -1.2557504177093506, "logits/rejected": -1.3233401775360107, "logps/chosen": -86.15840148925781, "logps/rejected": -125.18978881835938, "loss": 0.3324, "rewards/accuracies": 0.75, "rewards/chosen": -0.7103544473648071, "rewards/margins": 1.6715140342712402, "rewards/rejected": -2.381868362426758, "step": 827 }, { "epoch": 1.33, "learning_rate": 4.7354340071343636e-07, "logits/chosen": -1.5153026580810547, "logits/rejected": -1.4926087856292725, "logps/chosen": -86.0960693359375, "logps/rejected": -102.19439697265625, "loss": 0.3415, "rewards/accuracies": 1.0, "rewards/chosen": -0.24425727128982544, "rewards/margins": 1.7412840127944946, "rewards/rejected": -1.9855413436889648, "step": 828 }, { "epoch": 1.33, "learning_rate": 4.734443123265953e-07, "logits/chosen": -1.4682657718658447, "logits/rejected": -1.5269055366516113, "logps/chosen": -89.58148193359375, "logps/rejected": -123.80720520019531, "loss": 0.2744, "rewards/accuracies": 1.0, "rewards/chosen": 0.14977970719337463, "rewards/margins": 3.2305266857147217, "rewards/rejected": -3.08074688911438, "step": 829 }, { "epoch": 1.33, "learning_rate": 4.733452239397542e-07, "logits/chosen": -1.4432027339935303, "logits/rejected": -1.4204756021499634, "logps/chosen": -90.01183319091797, "logps/rejected": -113.59738159179688, "loss": 0.3498, "rewards/accuracies": 0.75, "rewards/chosen": 0.07716560363769531, "rewards/margins": 2.3963987827301025, "rewards/rejected": -2.3192331790924072, "step": 830 }, { "epoch": 1.33, "learning_rate": 4.7324613555291313e-07, "logits/chosen": -1.361814022064209, "logits/rejected": -1.3850769996643066, "logps/chosen": -108.47561645507812, "logps/rejected": -137.21133422851562, "loss": 0.4797, "rewards/accuracies": 1.0, "rewards/chosen": -0.07563057541847229, "rewards/margins": 2.7891335487365723, "rewards/rejected": -2.8647642135620117, "step": 831 }, { "epoch": 1.34, "learning_rate": 4.7314704716607214e-07, "logits/chosen": -1.262779712677002, "logits/rejected": -1.3241057395935059, "logps/chosen": -95.19525909423828, "logps/rejected": -104.71087646484375, "loss": 0.3867, "rewards/accuracies": 1.0, "rewards/chosen": -0.34044378995895386, "rewards/margins": 0.7455928921699524, "rewards/rejected": -1.0860366821289062, "step": 832 }, { "epoch": 1.34, "learning_rate": 4.7304795877923105e-07, "logits/chosen": -1.3406627178192139, "logits/rejected": -1.2960156202316284, "logps/chosen": -99.34365844726562, "logps/rejected": -98.27256774902344, "loss": 0.3434, "rewards/accuracies": 0.75, "rewards/chosen": -0.1813381314277649, "rewards/margins": 0.7208836674690247, "rewards/rejected": -0.9022217988967896, "step": 833 }, { "epoch": 1.34, "learning_rate": 4.7294887039239e-07, "logits/chosen": -1.338902473449707, "logits/rejected": -1.4231345653533936, "logps/chosen": -98.46669006347656, "logps/rejected": -107.97233581542969, "loss": 0.3863, "rewards/accuracies": 1.0, "rewards/chosen": -0.6137487292289734, "rewards/margins": 1.6740732192993164, "rewards/rejected": -2.2878217697143555, "step": 834 }, { "epoch": 1.34, "learning_rate": 4.728497820055489e-07, "logits/chosen": -1.3430230617523193, "logits/rejected": -1.348879337310791, "logps/chosen": -88.46080017089844, "logps/rejected": -117.81168365478516, "loss": 0.2051, "rewards/accuracies": 1.0, "rewards/chosen": -0.6006889343261719, "rewards/margins": 4.595475196838379, "rewards/rejected": -5.196164608001709, "step": 835 }, { "epoch": 1.34, "learning_rate": 4.727506936187078e-07, "logits/chosen": -1.4564151763916016, "logits/rejected": -1.3803036212921143, "logps/chosen": -94.81198120117188, "logps/rejected": -110.26583862304688, "loss": 0.292, "rewards/accuracies": 1.0, "rewards/chosen": -0.01928987354040146, "rewards/margins": 2.689044713973999, "rewards/rejected": -2.7083346843719482, "step": 836 }, { "epoch": 1.34, "learning_rate": 4.7265160523186684e-07, "logits/chosen": -1.4067498445510864, "logits/rejected": -1.4680211544036865, "logps/chosen": -79.41033935546875, "logps/rejected": -118.60881042480469, "loss": 0.4621, "rewards/accuracies": 1.0, "rewards/chosen": -0.9674829840660095, "rewards/margins": 3.1892507076263428, "rewards/rejected": -4.156733512878418, "step": 837 }, { "epoch": 1.35, "learning_rate": 4.7255251684502574e-07, "logits/chosen": -1.284503698348999, "logits/rejected": -1.3900878429412842, "logps/chosen": -80.24241638183594, "logps/rejected": -128.83541870117188, "loss": 0.212, "rewards/accuracies": 1.0, "rewards/chosen": 0.18779459595680237, "rewards/margins": 2.7223052978515625, "rewards/rejected": -2.534510612487793, "step": 838 }, { "epoch": 1.35, "learning_rate": 4.724534284581847e-07, "logits/chosen": -1.1986953020095825, "logits/rejected": -1.223322868347168, "logps/chosen": -99.10812377929688, "logps/rejected": -115.06632995605469, "loss": 0.3217, "rewards/accuracies": 1.0, "rewards/chosen": -0.5027133226394653, "rewards/margins": 1.7482997179031372, "rewards/rejected": -2.2510130405426025, "step": 839 }, { "epoch": 1.35, "learning_rate": 4.723543400713436e-07, "logits/chosen": -1.1942161321640015, "logits/rejected": -1.2155028581619263, "logps/chosen": -99.63776397705078, "logps/rejected": -135.48985290527344, "loss": 0.3249, "rewards/accuracies": 0.75, "rewards/chosen": -0.9701915979385376, "rewards/margins": 1.749567985534668, "rewards/rejected": -2.719759464263916, "step": 840 }, { "epoch": 1.35, "learning_rate": 4.722552516845025e-07, "logits/chosen": -1.3764268159866333, "logits/rejected": -1.3348503112792969, "logps/chosen": -84.84028625488281, "logps/rejected": -101.60136413574219, "loss": 0.3091, "rewards/accuracies": 1.0, "rewards/chosen": 0.2655399441719055, "rewards/margins": 1.7303149700164795, "rewards/rejected": -1.4647752046585083, "step": 841 }, { "epoch": 1.35, "learning_rate": 4.721561632976615e-07, "logits/chosen": -1.3331114053726196, "logits/rejected": -1.3470219373703003, "logps/chosen": -104.82176208496094, "logps/rejected": -126.66482543945312, "loss": 0.337, "rewards/accuracies": 0.75, "rewards/chosen": -0.4801672101020813, "rewards/margins": 3.138784170150757, "rewards/rejected": -3.6189515590667725, "step": 842 }, { "epoch": 1.35, "learning_rate": 4.7205707491082044e-07, "logits/chosen": -1.4033527374267578, "logits/rejected": -1.392135739326477, "logps/chosen": -80.78723907470703, "logps/rejected": -142.7035675048828, "loss": 0.3103, "rewards/accuracies": 1.0, "rewards/chosen": 0.08001328259706497, "rewards/margins": 4.300027847290039, "rewards/rejected": -4.220014572143555, "step": 843 }, { "epoch": 1.35, "learning_rate": 4.719579865239794e-07, "logits/chosen": -1.3801264762878418, "logits/rejected": -1.4802361726760864, "logps/chosen": -93.86898803710938, "logps/rejected": -124.66912841796875, "loss": 0.3826, "rewards/accuracies": 0.5, "rewards/chosen": -0.04860353469848633, "rewards/margins": 1.5310704708099365, "rewards/rejected": -1.5796740055084229, "step": 844 }, { "epoch": 1.36, "learning_rate": 4.718588981371383e-07, "logits/chosen": -1.1968863010406494, "logits/rejected": -1.1818851232528687, "logps/chosen": -85.33483123779297, "logps/rejected": -102.30375671386719, "loss": 0.278, "rewards/accuracies": 1.0, "rewards/chosen": -0.637770414352417, "rewards/margins": 2.469513177871704, "rewards/rejected": -3.107283592224121, "step": 845 }, { "epoch": 1.36, "learning_rate": 4.717598097502972e-07, "logits/chosen": -1.434950590133667, "logits/rejected": -1.4414716958999634, "logps/chosen": -89.59851837158203, "logps/rejected": -128.78265380859375, "loss": 0.2589, "rewards/accuracies": 0.75, "rewards/chosen": -0.28020554780960083, "rewards/margins": 2.49346923828125, "rewards/rejected": -2.773674726486206, "step": 846 }, { "epoch": 1.36, "learning_rate": 4.7166072136345617e-07, "logits/chosen": -1.4361101388931274, "logits/rejected": -1.4887644052505493, "logps/chosen": -64.52383422851562, "logps/rejected": -101.86747741699219, "loss": 0.3651, "rewards/accuracies": 1.0, "rewards/chosen": -0.4031468331813812, "rewards/margins": 3.014817237854004, "rewards/rejected": -3.417963981628418, "step": 847 }, { "epoch": 1.36, "learning_rate": 4.7156163297661513e-07, "logits/chosen": -1.3596985340118408, "logits/rejected": -1.3542417287826538, "logps/chosen": -81.66444396972656, "logps/rejected": -84.52822875976562, "loss": 0.4375, "rewards/accuracies": 0.75, "rewards/chosen": -0.7115099430084229, "rewards/margins": 0.5217678546905518, "rewards/rejected": -1.2332777976989746, "step": 848 }, { "epoch": 1.36, "learning_rate": 4.714625445897741e-07, "logits/chosen": -1.4938799142837524, "logits/rejected": -1.4043649435043335, "logps/chosen": -102.88548278808594, "logps/rejected": -112.03765869140625, "loss": 0.3989, "rewards/accuracies": 0.75, "rewards/chosen": -0.9157569408416748, "rewards/margins": 0.7887274622917175, "rewards/rejected": -1.704484462738037, "step": 849 }, { "epoch": 1.36, "learning_rate": 4.71363456202933e-07, "logits/chosen": -1.4704091548919678, "logits/rejected": -1.51737642288208, "logps/chosen": -104.00901794433594, "logps/rejected": -109.41404724121094, "loss": 0.3468, "rewards/accuracies": 0.75, "rewards/chosen": -1.6444734334945679, "rewards/margins": 1.5698599815368652, "rewards/rejected": -3.2143335342407227, "step": 850 }, { "epoch": 1.37, "learning_rate": 4.712643678160919e-07, "logits/chosen": -1.4539318084716797, "logits/rejected": -1.4793615341186523, "logps/chosen": -100.28477478027344, "logps/rejected": -109.55717468261719, "loss": 0.2531, "rewards/accuracies": 1.0, "rewards/chosen": -0.1670963317155838, "rewards/margins": 1.9242877960205078, "rewards/rejected": -2.091383934020996, "step": 851 }, { "epoch": 1.37, "learning_rate": 4.7116527942925086e-07, "logits/chosen": -1.5180578231811523, "logits/rejected": -1.4551713466644287, "logps/chosen": -82.52430725097656, "logps/rejected": -100.09239959716797, "loss": 0.2784, "rewards/accuracies": 1.0, "rewards/chosen": -0.31973332166671753, "rewards/margins": 2.3931357860565186, "rewards/rejected": -2.712869167327881, "step": 852 }, { "epoch": 1.37, "learning_rate": 4.710661910424098e-07, "logits/chosen": -1.2889223098754883, "logits/rejected": -1.2503786087036133, "logps/chosen": -100.99467468261719, "logps/rejected": -102.57280731201172, "loss": 0.4632, "rewards/accuracies": 0.75, "rewards/chosen": -0.447457492351532, "rewards/margins": 1.597954273223877, "rewards/rejected": -2.0454115867614746, "step": 853 }, { "epoch": 1.37, "learning_rate": 4.709671026555688e-07, "logits/chosen": -1.4053295850753784, "logits/rejected": -1.5117361545562744, "logps/chosen": -79.9698715209961, "logps/rejected": -114.35211944580078, "loss": 0.3274, "rewards/accuracies": 0.75, "rewards/chosen": -0.36292287707328796, "rewards/margins": 1.8638670444488525, "rewards/rejected": -2.226789951324463, "step": 854 }, { "epoch": 1.37, "learning_rate": 4.708680142687277e-07, "logits/chosen": -1.518999695777893, "logits/rejected": -1.445088505744934, "logps/chosen": -100.81340026855469, "logps/rejected": -114.19766235351562, "loss": 0.4015, "rewards/accuracies": 0.75, "rewards/chosen": -2.027595043182373, "rewards/margins": 1.0249669551849365, "rewards/rejected": -3.0525619983673096, "step": 855 }, { "epoch": 1.37, "learning_rate": 4.707689258818866e-07, "logits/chosen": -1.387768268585205, "logits/rejected": -1.3432828187942505, "logps/chosen": -84.4090805053711, "logps/rejected": -107.17613220214844, "loss": 0.6037, "rewards/accuracies": 0.5, "rewards/chosen": -1.4655961990356445, "rewards/margins": 1.1840386390686035, "rewards/rejected": -2.649634838104248, "step": 856 }, { "epoch": 1.38, "learning_rate": 4.7066983749504556e-07, "logits/chosen": -1.5120022296905518, "logits/rejected": -1.5483226776123047, "logps/chosen": -107.60599517822266, "logps/rejected": -122.77335357666016, "loss": 0.3014, "rewards/accuracies": 1.0, "rewards/chosen": -1.149163842201233, "rewards/margins": 1.7211452722549438, "rewards/rejected": -2.8703091144561768, "step": 857 }, { "epoch": 1.38, "learning_rate": 4.7057074910820446e-07, "logits/chosen": -1.5409016609191895, "logits/rejected": -1.5365424156188965, "logps/chosen": -90.19618225097656, "logps/rejected": -89.18219757080078, "loss": 0.4012, "rewards/accuracies": 0.75, "rewards/chosen": -0.931050181388855, "rewards/margins": 0.15985578298568726, "rewards/rejected": -1.0909059047698975, "step": 858 }, { "epoch": 1.38, "learning_rate": 4.704716607213635e-07, "logits/chosen": -1.4588663578033447, "logits/rejected": -1.3916168212890625, "logps/chosen": -108.2625732421875, "logps/rejected": -132.9908905029297, "loss": 0.3648, "rewards/accuracies": 1.0, "rewards/chosen": -0.6725414991378784, "rewards/margins": 2.2511508464813232, "rewards/rejected": -2.923692226409912, "step": 859 }, { "epoch": 1.38, "learning_rate": 4.703725723345224e-07, "logits/chosen": -1.47183096408844, "logits/rejected": -1.4623241424560547, "logps/chosen": -86.77767944335938, "logps/rejected": -83.90248107910156, "loss": 0.4381, "rewards/accuracies": 1.0, "rewards/chosen": -0.20253248512744904, "rewards/margins": 2.0272626876831055, "rewards/rejected": -2.229794979095459, "step": 860 }, { "epoch": 1.38, "learning_rate": 4.702734839476813e-07, "logits/chosen": -1.457409143447876, "logits/rejected": -1.4948185682296753, "logps/chosen": -84.48971557617188, "logps/rejected": -74.28926086425781, "loss": 0.5545, "rewards/accuracies": 0.25, "rewards/chosen": -0.6398317217826843, "rewards/margins": -0.026532717049121857, "rewards/rejected": -0.6132989525794983, "step": 861 }, { "epoch": 1.38, "learning_rate": 4.7017439556084025e-07, "logits/chosen": -1.4775032997131348, "logits/rejected": -1.4491175413131714, "logps/chosen": -87.09159851074219, "logps/rejected": -84.67426300048828, "loss": 0.3967, "rewards/accuracies": 0.5, "rewards/chosen": -1.1899977922439575, "rewards/margins": 0.10587304830551147, "rewards/rejected": -1.2958707809448242, "step": 862 }, { "epoch": 1.39, "learning_rate": 4.7007530717399916e-07, "logits/chosen": -1.4812092781066895, "logits/rejected": -1.5229347944259644, "logps/chosen": -76.72782897949219, "logps/rejected": -141.01931762695312, "loss": 0.3109, "rewards/accuracies": 1.0, "rewards/chosen": 0.020018011331558228, "rewards/margins": 4.005414009094238, "rewards/rejected": -3.985395908355713, "step": 863 }, { "epoch": 1.39, "learning_rate": 4.699762187871581e-07, "logits/chosen": -1.2668180465698242, "logits/rejected": -1.4081354141235352, "logps/chosen": -91.11351013183594, "logps/rejected": -149.130859375, "loss": 0.3815, "rewards/accuracies": 1.0, "rewards/chosen": -0.40102118253707886, "rewards/margins": 2.5794661045074463, "rewards/rejected": -2.98048734664917, "step": 864 }, { "epoch": 1.39, "learning_rate": 4.698771304003171e-07, "logits/chosen": -1.6380810737609863, "logits/rejected": -1.5794364213943481, "logps/chosen": -91.00669860839844, "logps/rejected": -99.55502319335938, "loss": 0.249, "rewards/accuracies": 1.0, "rewards/chosen": 0.11631183326244354, "rewards/margins": 1.5324782133102417, "rewards/rejected": -1.4161663055419922, "step": 865 }, { "epoch": 1.39, "learning_rate": 4.69778042013476e-07, "logits/chosen": -1.5631719827651978, "logits/rejected": -1.5234401226043701, "logps/chosen": -93.40147399902344, "logps/rejected": -135.0677947998047, "loss": 0.322, "rewards/accuracies": 1.0, "rewards/chosen": -0.530066192150116, "rewards/margins": 3.126030206680298, "rewards/rejected": -3.6560962200164795, "step": 866 }, { "epoch": 1.39, "learning_rate": 4.6967895362663494e-07, "logits/chosen": -1.3415358066558838, "logits/rejected": -1.4452779293060303, "logps/chosen": -67.5946273803711, "logps/rejected": -98.42156219482422, "loss": 0.2048, "rewards/accuracies": 0.75, "rewards/chosen": -0.2359386533498764, "rewards/margins": 2.9021894931793213, "rewards/rejected": -3.1381282806396484, "step": 867 }, { "epoch": 1.39, "learning_rate": 4.6957986523979385e-07, "logits/chosen": -1.4457942247390747, "logits/rejected": -1.4557454586029053, "logps/chosen": -74.195068359375, "logps/rejected": -94.54876708984375, "loss": 0.3586, "rewards/accuracies": 1.0, "rewards/chosen": -0.38051164150238037, "rewards/margins": 2.1703076362609863, "rewards/rejected": -2.5508193969726562, "step": 868 }, { "epoch": 1.39, "learning_rate": 4.694807768529528e-07, "logits/chosen": -1.2388012409210205, "logits/rejected": -1.1936167478561401, "logps/chosen": -66.7254409790039, "logps/rejected": -87.50965881347656, "loss": 0.3173, "rewards/accuracies": 0.75, "rewards/chosen": -0.6898751854896545, "rewards/margins": 1.0155903100967407, "rewards/rejected": -1.70546555519104, "step": 869 }, { "epoch": 1.4, "learning_rate": 4.6938168846611177e-07, "logits/chosen": -1.4142231941223145, "logits/rejected": -1.3796236515045166, "logps/chosen": -86.62666320800781, "logps/rejected": -124.37645721435547, "loss": 0.3945, "rewards/accuracies": 1.0, "rewards/chosen": -0.9362061023712158, "rewards/margins": 2.50740122795105, "rewards/rejected": -3.4436073303222656, "step": 870 }, { "epoch": 1.4, "learning_rate": 4.692826000792707e-07, "logits/chosen": -1.451601266860962, "logits/rejected": -1.4383888244628906, "logps/chosen": -94.58566284179688, "logps/rejected": -90.20590209960938, "loss": 0.3182, "rewards/accuracies": 0.75, "rewards/chosen": -0.6215428113937378, "rewards/margins": 0.9986903667449951, "rewards/rejected": -1.6202332973480225, "step": 871 }, { "epoch": 1.4, "learning_rate": 4.6918351169242964e-07, "logits/chosen": -1.4720675945281982, "logits/rejected": -1.484776496887207, "logps/chosen": -93.87531280517578, "logps/rejected": -101.55762481689453, "loss": 0.3522, "rewards/accuracies": 0.75, "rewards/chosen": -1.0442681312561035, "rewards/margins": 1.2048218250274658, "rewards/rejected": -2.2490899562835693, "step": 872 }, { "epoch": 1.4, "learning_rate": 4.6908442330558854e-07, "logits/chosen": -1.3939129114151, "logits/rejected": -1.4021170139312744, "logps/chosen": -86.27481842041016, "logps/rejected": -115.74496459960938, "loss": 0.3233, "rewards/accuracies": 1.0, "rewards/chosen": -0.906700849533081, "rewards/margins": 1.3566888570785522, "rewards/rejected": -2.2633895874023438, "step": 873 }, { "epoch": 1.4, "learning_rate": 4.689853349187475e-07, "logits/chosen": -1.4139723777770996, "logits/rejected": -1.4083621501922607, "logps/chosen": -113.32118225097656, "logps/rejected": -113.8725357055664, "loss": 0.4618, "rewards/accuracies": 0.75, "rewards/chosen": -1.456540822982788, "rewards/margins": 1.6386935710906982, "rewards/rejected": -3.0952343940734863, "step": 874 }, { "epoch": 1.4, "learning_rate": 4.6888624653190646e-07, "logits/chosen": -1.3117401599884033, "logits/rejected": -1.3930554389953613, "logps/chosen": -113.69795227050781, "logps/rejected": -151.40768432617188, "loss": 0.4017, "rewards/accuracies": 0.5, "rewards/chosen": -1.424972653388977, "rewards/margins": 2.1442532539367676, "rewards/rejected": -3.569225788116455, "step": 875 }, { "epoch": 1.41, "learning_rate": 4.6878715814506537e-07, "logits/chosen": -1.317976951599121, "logits/rejected": -1.3304226398468018, "logps/chosen": -90.28089904785156, "logps/rejected": -96.00550079345703, "loss": 0.2934, "rewards/accuracies": 0.75, "rewards/chosen": -0.8654288649559021, "rewards/margins": 1.6907366514205933, "rewards/rejected": -2.5561654567718506, "step": 876 }, { "epoch": 1.41, "learning_rate": 4.6868806975822433e-07, "logits/chosen": -1.5312275886535645, "logits/rejected": -1.5187056064605713, "logps/chosen": -77.1458740234375, "logps/rejected": -107.45299530029297, "loss": 0.3769, "rewards/accuracies": 0.75, "rewards/chosen": 0.10662470012903214, "rewards/margins": 1.978196144104004, "rewards/rejected": -1.8715713024139404, "step": 877 }, { "epoch": 1.41, "learning_rate": 4.6858898137138324e-07, "logits/chosen": -1.434421420097351, "logits/rejected": -1.461845874786377, "logps/chosen": -103.7794418334961, "logps/rejected": -115.21441650390625, "loss": 0.4336, "rewards/accuracies": 0.75, "rewards/chosen": -1.2939445972442627, "rewards/margins": 1.128411054611206, "rewards/rejected": -2.422355890274048, "step": 878 }, { "epoch": 1.41, "learning_rate": 4.6848989298454214e-07, "logits/chosen": -1.437088966369629, "logits/rejected": -1.3501012325286865, "logps/chosen": -107.9480209350586, "logps/rejected": -109.37284088134766, "loss": 0.2885, "rewards/accuracies": 0.75, "rewards/chosen": -0.7518547177314758, "rewards/margins": 1.5608041286468506, "rewards/rejected": -2.3126587867736816, "step": 879 }, { "epoch": 1.41, "learning_rate": 4.6839080459770116e-07, "logits/chosen": -1.5032329559326172, "logits/rejected": -1.4395248889923096, "logps/chosen": -71.29470825195312, "logps/rejected": -100.636962890625, "loss": 0.3382, "rewards/accuracies": 1.0, "rewards/chosen": -0.19406695663928986, "rewards/margins": 2.7594258785247803, "rewards/rejected": -2.9534926414489746, "step": 880 }, { "epoch": 1.41, "learning_rate": 4.6829171621086006e-07, "logits/chosen": -1.4943766593933105, "logits/rejected": -1.4123082160949707, "logps/chosen": -113.8065414428711, "logps/rejected": -126.65861511230469, "loss": 0.3207, "rewards/accuracies": 1.0, "rewards/chosen": -0.3267940282821655, "rewards/margins": 2.9686355590820312, "rewards/rejected": -3.295429229736328, "step": 881 }, { "epoch": 1.42, "learning_rate": 4.68192627824019e-07, "logits/chosen": -1.5374174118041992, "logits/rejected": -1.5144572257995605, "logps/chosen": -85.26484680175781, "logps/rejected": -116.28812408447266, "loss": 0.5004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1549571752548218, "rewards/margins": 2.4557230472564697, "rewards/rejected": -3.610680103302002, "step": 882 }, { "epoch": 1.42, "learning_rate": 4.6809353943717793e-07, "logits/chosen": -1.3149759769439697, "logits/rejected": -1.2872226238250732, "logps/chosen": -67.63557434082031, "logps/rejected": -100.64028930664062, "loss": 0.2515, "rewards/accuracies": 1.0, "rewards/chosen": 0.043538182973861694, "rewards/margins": 2.821659803390503, "rewards/rejected": -2.7781214714050293, "step": 883 }, { "epoch": 1.42, "learning_rate": 4.6799445105033684e-07, "logits/chosen": -1.3905415534973145, "logits/rejected": -1.2951328754425049, "logps/chosen": -98.84854125976562, "logps/rejected": -115.005126953125, "loss": 0.4011, "rewards/accuracies": 1.0, "rewards/chosen": -1.077976942062378, "rewards/margins": 1.758192539215088, "rewards/rejected": -2.836169481277466, "step": 884 }, { "epoch": 1.42, "learning_rate": 4.6789536266349585e-07, "logits/chosen": -1.3182439804077148, "logits/rejected": -1.366581678390503, "logps/chosen": -104.0657730102539, "logps/rejected": -114.08162689208984, "loss": 0.3041, "rewards/accuracies": 1.0, "rewards/chosen": -0.47874563932418823, "rewards/margins": 1.3076506853103638, "rewards/rejected": -1.7863962650299072, "step": 885 }, { "epoch": 1.42, "learning_rate": 4.6779627427665475e-07, "logits/chosen": -1.375813603401184, "logits/rejected": -1.4435150623321533, "logps/chosen": -71.88319396972656, "logps/rejected": -125.96277618408203, "loss": 0.2617, "rewards/accuracies": 1.0, "rewards/chosen": -0.5722928047180176, "rewards/margins": 3.961413860321045, "rewards/rejected": -4.5337066650390625, "step": 886 }, { "epoch": 1.42, "learning_rate": 4.676971858898137e-07, "logits/chosen": -1.3260478973388672, "logits/rejected": -1.3771055936813354, "logps/chosen": -73.5811538696289, "logps/rejected": -104.68587493896484, "loss": 0.4953, "rewards/accuracies": 0.75, "rewards/chosen": -0.9636367559432983, "rewards/margins": 0.5769127011299133, "rewards/rejected": -1.5405495166778564, "step": 887 }, { "epoch": 1.43, "learning_rate": 4.675980975029726e-07, "logits/chosen": -1.3535733222961426, "logits/rejected": -1.3486523628234863, "logps/chosen": -98.47283172607422, "logps/rejected": -106.98872375488281, "loss": 0.4608, "rewards/accuracies": 0.75, "rewards/chosen": -0.37851354479789734, "rewards/margins": 1.2820786237716675, "rewards/rejected": -1.6605923175811768, "step": 888 }, { "epoch": 1.43, "learning_rate": 4.6749900911613153e-07, "logits/chosen": -1.3645515441894531, "logits/rejected": -1.4006030559539795, "logps/chosen": -128.36959838867188, "logps/rejected": -154.19097900390625, "loss": 0.347, "rewards/accuracies": 1.0, "rewards/chosen": -0.7762003540992737, "rewards/margins": 2.6028060913085938, "rewards/rejected": -3.3790063858032227, "step": 889 }, { "epoch": 1.43, "learning_rate": 4.6739992072929054e-07, "logits/chosen": -1.5391274690628052, "logits/rejected": -1.5041810274124146, "logps/chosen": -97.58425903320312, "logps/rejected": -103.10050201416016, "loss": 0.549, "rewards/accuracies": 1.0, "rewards/chosen": -0.3001726269721985, "rewards/margins": 1.5541646480560303, "rewards/rejected": -1.8543373346328735, "step": 890 }, { "epoch": 1.43, "learning_rate": 4.6730083234244945e-07, "logits/chosen": -1.4837735891342163, "logits/rejected": -1.430382251739502, "logps/chosen": -85.65047454833984, "logps/rejected": -117.6192626953125, "loss": 0.1934, "rewards/accuracies": 1.0, "rewards/chosen": -0.5197610855102539, "rewards/margins": 4.435118198394775, "rewards/rejected": -4.954879283905029, "step": 891 }, { "epoch": 1.43, "learning_rate": 4.672017439556084e-07, "logits/chosen": -1.42747962474823, "logits/rejected": -1.4147052764892578, "logps/chosen": -90.48818969726562, "logps/rejected": -90.69126892089844, "loss": 0.2246, "rewards/accuracies": 0.75, "rewards/chosen": -0.3176918029785156, "rewards/margins": 1.1243195533752441, "rewards/rejected": -1.4420113563537598, "step": 892 }, { "epoch": 1.43, "learning_rate": 4.671026555687673e-07, "logits/chosen": -1.5101330280303955, "logits/rejected": -1.5449793338775635, "logps/chosen": -123.83343505859375, "logps/rejected": -119.34120178222656, "loss": 0.3698, "rewards/accuracies": 0.75, "rewards/chosen": -0.6401113867759705, "rewards/margins": 2.1373038291931152, "rewards/rejected": -2.7774152755737305, "step": 893 }, { "epoch": 1.43, "learning_rate": 4.670035671819262e-07, "logits/chosen": -1.3743782043457031, "logits/rejected": -1.359755516052246, "logps/chosen": -91.97722625732422, "logps/rejected": -103.02430725097656, "loss": 0.2837, "rewards/accuracies": 1.0, "rewards/chosen": -0.5685886144638062, "rewards/margins": 1.3630508184432983, "rewards/rejected": -1.9316394329071045, "step": 894 }, { "epoch": 1.44, "learning_rate": 4.6690447879508523e-07, "logits/chosen": -1.3846518993377686, "logits/rejected": -1.4043997526168823, "logps/chosen": -61.48664855957031, "logps/rejected": -117.44918823242188, "loss": 0.1898, "rewards/accuracies": 1.0, "rewards/chosen": -0.14072465896606445, "rewards/margins": 4.380633354187012, "rewards/rejected": -4.521358013153076, "step": 895 }, { "epoch": 1.44, "learning_rate": 4.6680539040824414e-07, "logits/chosen": -1.5434738397598267, "logits/rejected": -1.4383676052093506, "logps/chosen": -98.13920593261719, "logps/rejected": -105.07804870605469, "loss": 0.3408, "rewards/accuracies": 1.0, "rewards/chosen": 0.2275211662054062, "rewards/margins": 3.0952024459838867, "rewards/rejected": -2.8676812648773193, "step": 896 }, { "epoch": 1.44, "learning_rate": 4.667063020214031e-07, "logits/chosen": -1.438215732574463, "logits/rejected": -1.40582275390625, "logps/chosen": -66.88848876953125, "logps/rejected": -95.39006042480469, "loss": 0.2331, "rewards/accuracies": 1.0, "rewards/chosen": -0.5136940479278564, "rewards/margins": 2.819331645965576, "rewards/rejected": -3.3330259323120117, "step": 897 }, { "epoch": 1.44, "learning_rate": 4.66607213634562e-07, "logits/chosen": -1.4046623706817627, "logits/rejected": -1.4286394119262695, "logps/chosen": -101.06310272216797, "logps/rejected": -115.10508728027344, "loss": 0.3611, "rewards/accuracies": 0.75, "rewards/chosen": -0.3098629117012024, "rewards/margins": 1.7852933406829834, "rewards/rejected": -2.09515643119812, "step": 898 }, { "epoch": 1.44, "learning_rate": 4.665081252477209e-07, "logits/chosen": -1.2360715866088867, "logits/rejected": -1.307201623916626, "logps/chosen": -93.83660888671875, "logps/rejected": -114.37376403808594, "loss": 0.3033, "rewards/accuracies": 0.75, "rewards/chosen": -0.6931122541427612, "rewards/margins": 1.3132197856903076, "rewards/rejected": -2.0063319206237793, "step": 899 }, { "epoch": 1.44, "learning_rate": 4.6640903686087993e-07, "logits/chosen": -1.2921111583709717, "logits/rejected": -1.2395963668823242, "logps/chosen": -103.99440002441406, "logps/rejected": -94.9100341796875, "loss": 0.4323, "rewards/accuracies": 0.75, "rewards/chosen": -0.040859609842300415, "rewards/margins": 1.0148534774780273, "rewards/rejected": -1.0557130575180054, "step": 900 }, { "epoch": 1.45, "learning_rate": 4.6630994847403883e-07, "logits/chosen": -1.406374454498291, "logits/rejected": -1.4066002368927002, "logps/chosen": -71.69100952148438, "logps/rejected": -122.65707397460938, "loss": 0.3685, "rewards/accuracies": 1.0, "rewards/chosen": -0.0006952248513698578, "rewards/margins": 2.781062364578247, "rewards/rejected": -2.7817575931549072, "step": 901 }, { "epoch": 1.45, "learning_rate": 4.6621086008719774e-07, "logits/chosen": -1.4664994478225708, "logits/rejected": -1.4376964569091797, "logps/chosen": -97.77851867675781, "logps/rejected": -92.36683654785156, "loss": 0.4842, "rewards/accuracies": 1.0, "rewards/chosen": -0.5444978475570679, "rewards/margins": 1.086294174194336, "rewards/rejected": -1.6307920217514038, "step": 902 }, { "epoch": 1.45, "learning_rate": 4.661117717003567e-07, "logits/chosen": -1.3745418787002563, "logits/rejected": -1.368604063987732, "logps/chosen": -81.39274597167969, "logps/rejected": -96.85607147216797, "loss": 0.3506, "rewards/accuracies": 0.5, "rewards/chosen": -0.01571149379014969, "rewards/margins": 0.7546843886375427, "rewards/rejected": -0.770395815372467, "step": 903 }, { "epoch": 1.45, "learning_rate": 4.660126833135156e-07, "logits/chosen": -1.4186973571777344, "logits/rejected": -1.4188430309295654, "logps/chosen": -92.38774108886719, "logps/rejected": -153.6502685546875, "loss": 0.3403, "rewards/accuracies": 0.75, "rewards/chosen": -0.26283854246139526, "rewards/margins": 2.900681257247925, "rewards/rejected": -3.163519859313965, "step": 904 }, { "epoch": 1.45, "learning_rate": 4.6591359492667457e-07, "logits/chosen": -1.501907467842102, "logits/rejected": -1.4764553308486938, "logps/chosen": -83.87001037597656, "logps/rejected": -137.6434783935547, "loss": 0.4696, "rewards/accuracies": 1.0, "rewards/chosen": -0.34312400221824646, "rewards/margins": 3.8961918354034424, "rewards/rejected": -4.239315986633301, "step": 905 }, { "epoch": 1.45, "learning_rate": 4.6581450653983353e-07, "logits/chosen": -1.4294248819351196, "logits/rejected": -1.403000831604004, "logps/chosen": -89.98622131347656, "logps/rejected": -127.34136199951172, "loss": 0.3404, "rewards/accuracies": 1.0, "rewards/chosen": 0.16145972907543182, "rewards/margins": 4.4762678146362305, "rewards/rejected": -4.314807891845703, "step": 906 }, { "epoch": 1.46, "learning_rate": 4.6571541815299243e-07, "logits/chosen": -1.4761618375778198, "logits/rejected": -1.4380292892456055, "logps/chosen": -77.11106872558594, "logps/rejected": -148.62921142578125, "loss": 0.216, "rewards/accuracies": 1.0, "rewards/chosen": -0.9934589862823486, "rewards/margins": 3.18493914604187, "rewards/rejected": -4.178398132324219, "step": 907 }, { "epoch": 1.46, "learning_rate": 4.656163297661514e-07, "logits/chosen": -1.4347518682479858, "logits/rejected": -1.4876621961593628, "logps/chosen": -81.31510162353516, "logps/rejected": -120.35031127929688, "loss": 0.2697, "rewards/accuracies": 0.75, "rewards/chosen": -0.06777878850698471, "rewards/margins": 2.143982172012329, "rewards/rejected": -2.211760997772217, "step": 908 }, { "epoch": 1.46, "learning_rate": 4.655172413793103e-07, "logits/chosen": -1.3443711996078491, "logits/rejected": -1.3774003982543945, "logps/chosen": -79.68647766113281, "logps/rejected": -100.96809387207031, "loss": 0.2346, "rewards/accuracies": 1.0, "rewards/chosen": 0.4161483943462372, "rewards/margins": 3.059497833251953, "rewards/rejected": -2.6433496475219727, "step": 909 }, { "epoch": 1.46, "learning_rate": 4.6541815299246926e-07, "logits/chosen": -1.3228216171264648, "logits/rejected": -1.344215989112854, "logps/chosen": -98.95500946044922, "logps/rejected": -113.42491149902344, "loss": 0.4985, "rewards/accuracies": 0.75, "rewards/chosen": 0.6723181009292603, "rewards/margins": 1.2785285711288452, "rewards/rejected": -0.6062105298042297, "step": 910 }, { "epoch": 1.46, "learning_rate": 4.653190646056282e-07, "logits/chosen": -1.356433391571045, "logits/rejected": -1.2758326530456543, "logps/chosen": -80.32989501953125, "logps/rejected": -115.40151977539062, "loss": 0.2239, "rewards/accuracies": 1.0, "rewards/chosen": -0.07580652832984924, "rewards/margins": 4.578855514526367, "rewards/rejected": -4.654662609100342, "step": 911 }, { "epoch": 1.46, "learning_rate": 4.652199762187871e-07, "logits/chosen": -1.5282936096191406, "logits/rejected": -1.514829397201538, "logps/chosen": -86.63407135009766, "logps/rejected": -109.25668334960938, "loss": 0.1867, "rewards/accuracies": 1.0, "rewards/chosen": 0.4665224552154541, "rewards/margins": 1.9845013618469238, "rewards/rejected": -1.5179787874221802, "step": 912 }, { "epoch": 1.47, "learning_rate": 4.651208878319461e-07, "logits/chosen": -1.5002914667129517, "logits/rejected": -1.400328516960144, "logps/chosen": -116.97653198242188, "logps/rejected": -112.76649475097656, "loss": 0.2876, "rewards/accuracies": 1.0, "rewards/chosen": 0.48691269755363464, "rewards/margins": 1.9900314807891846, "rewards/rejected": -1.5031187534332275, "step": 913 }, { "epoch": 1.47, "learning_rate": 4.65021799445105e-07, "logits/chosen": -1.3893752098083496, "logits/rejected": -1.3898335695266724, "logps/chosen": -73.63349914550781, "logps/rejected": -96.27476501464844, "loss": 0.3311, "rewards/accuracies": 1.0, "rewards/chosen": -0.45642921328544617, "rewards/margins": 0.7621561884880066, "rewards/rejected": -1.2185853719711304, "step": 914 }, { "epoch": 1.47, "learning_rate": 4.6492271105826395e-07, "logits/chosen": -1.3102054595947266, "logits/rejected": -1.3869819641113281, "logps/chosen": -95.91415405273438, "logps/rejected": -108.11830139160156, "loss": 0.3124, "rewards/accuracies": 0.75, "rewards/chosen": 0.5101137757301331, "rewards/margins": 2.0352354049682617, "rewards/rejected": -1.5251215696334839, "step": 915 }, { "epoch": 1.47, "learning_rate": 4.648236226714229e-07, "logits/chosen": -1.4694966077804565, "logits/rejected": -1.3997411727905273, "logps/chosen": -77.66216278076172, "logps/rejected": -111.53197479248047, "loss": 0.3479, "rewards/accuracies": 1.0, "rewards/chosen": 0.3091737926006317, "rewards/margins": 3.4702582359313965, "rewards/rejected": -3.1610846519470215, "step": 916 }, { "epoch": 1.47, "learning_rate": 4.647245342845818e-07, "logits/chosen": -1.4848833084106445, "logits/rejected": -1.4953184127807617, "logps/chosen": -80.35623168945312, "logps/rejected": -105.97962188720703, "loss": 0.2591, "rewards/accuracies": 1.0, "rewards/chosen": -0.056652069091796875, "rewards/margins": 2.926330089569092, "rewards/rejected": -2.9829821586608887, "step": 917 }, { "epoch": 1.47, "learning_rate": 4.646254458977408e-07, "logits/chosen": -1.2830451726913452, "logits/rejected": -1.289259433746338, "logps/chosen": -86.11398315429688, "logps/rejected": -95.72228240966797, "loss": 0.34, "rewards/accuracies": 0.75, "rewards/chosen": 0.04075498506426811, "rewards/margins": 1.557837724685669, "rewards/rejected": -1.517082691192627, "step": 918 }, { "epoch": 1.48, "learning_rate": 4.645263575108997e-07, "logits/chosen": -1.3636250495910645, "logits/rejected": -1.3746397495269775, "logps/chosen": -97.27259826660156, "logps/rejected": -105.15577697753906, "loss": 0.5179, "rewards/accuracies": 0.5, "rewards/chosen": -0.4429548382759094, "rewards/margins": 0.5924179553985596, "rewards/rejected": -1.0353728532791138, "step": 919 }, { "epoch": 1.48, "learning_rate": 4.6442726912405865e-07, "logits/chosen": -1.442186713218689, "logits/rejected": -1.4289838075637817, "logps/chosen": -69.01321411132812, "logps/rejected": -110.20108032226562, "loss": 0.24, "rewards/accuracies": 1.0, "rewards/chosen": -0.039875805377960205, "rewards/margins": 4.544281482696533, "rewards/rejected": -4.584157466888428, "step": 920 }, { "epoch": 1.48, "learning_rate": 4.6432818073721755e-07, "logits/chosen": -1.2980499267578125, "logits/rejected": -1.353243350982666, "logps/chosen": -65.94352722167969, "logps/rejected": -153.7466583251953, "loss": 0.2812, "rewards/accuracies": 0.75, "rewards/chosen": -0.20981617271900177, "rewards/margins": 3.5718603134155273, "rewards/rejected": -3.781676769256592, "step": 921 }, { "epoch": 1.48, "learning_rate": 4.642290923503765e-07, "logits/chosen": -1.4613715410232544, "logits/rejected": -1.4368832111358643, "logps/chosen": -90.64324951171875, "logps/rejected": -137.30572509765625, "loss": 0.3476, "rewards/accuracies": 0.75, "rewards/chosen": -0.6795763969421387, "rewards/margins": 3.4955599308013916, "rewards/rejected": -4.175136089324951, "step": 922 }, { "epoch": 1.48, "learning_rate": 4.6413000396353547e-07, "logits/chosen": -1.3887336254119873, "logits/rejected": -1.4102400541305542, "logps/chosen": -73.77508544921875, "logps/rejected": -97.16085052490234, "loss": 0.3743, "rewards/accuracies": 0.75, "rewards/chosen": -0.11372499167919159, "rewards/margins": 1.213293194770813, "rewards/rejected": -1.327018141746521, "step": 923 }, { "epoch": 1.48, "learning_rate": 4.640309155766944e-07, "logits/chosen": -1.537772536277771, "logits/rejected": -1.4633550643920898, "logps/chosen": -97.54913330078125, "logps/rejected": -116.5672836303711, "loss": 0.2697, "rewards/accuracies": 0.75, "rewards/chosen": -0.035451143980026245, "rewards/margins": 4.348438262939453, "rewards/rejected": -4.383890151977539, "step": 924 }, { "epoch": 1.48, "learning_rate": 4.6393182718985334e-07, "logits/chosen": -1.4375576972961426, "logits/rejected": -1.2967908382415771, "logps/chosen": -108.53079223632812, "logps/rejected": -88.81774139404297, "loss": 0.3434, "rewards/accuracies": 0.75, "rewards/chosen": -0.08600884675979614, "rewards/margins": 0.5733117461204529, "rewards/rejected": -0.659320592880249, "step": 925 }, { "epoch": 1.49, "learning_rate": 4.6383273880301225e-07, "logits/chosen": -1.3495575189590454, "logits/rejected": -1.4659491777420044, "logps/chosen": -97.14730072021484, "logps/rejected": -124.65142822265625, "loss": 0.2685, "rewards/accuracies": 1.0, "rewards/chosen": -0.0404483824968338, "rewards/margins": 2.682218551635742, "rewards/rejected": -2.7226672172546387, "step": 926 }, { "epoch": 1.49, "learning_rate": 4.637336504161712e-07, "logits/chosen": -1.3588322401046753, "logits/rejected": -1.3224797248840332, "logps/chosen": -65.47914123535156, "logps/rejected": -100.37338256835938, "loss": 0.5262, "rewards/accuracies": 1.0, "rewards/chosen": 0.12276049703359604, "rewards/margins": 1.8228716850280762, "rewards/rejected": -1.7001111507415771, "step": 927 }, { "epoch": 1.49, "learning_rate": 4.6363456202933017e-07, "logits/chosen": -1.5068373680114746, "logits/rejected": -1.6560909748077393, "logps/chosen": -88.4123764038086, "logps/rejected": -149.53627014160156, "loss": 0.3686, "rewards/accuracies": 1.0, "rewards/chosen": -0.44875484704971313, "rewards/margins": 1.9377626180648804, "rewards/rejected": -2.386517286300659, "step": 928 }, { "epoch": 1.49, "learning_rate": 4.6353547364248907e-07, "logits/chosen": -1.4852499961853027, "logits/rejected": -1.3858520984649658, "logps/chosen": -98.94664001464844, "logps/rejected": -112.57235717773438, "loss": 0.2965, "rewards/accuracies": 1.0, "rewards/chosen": -0.155055433511734, "rewards/margins": 2.3171205520629883, "rewards/rejected": -2.4721760749816895, "step": 929 }, { "epoch": 1.49, "learning_rate": 4.6343638525564803e-07, "logits/chosen": -1.4822731018066406, "logits/rejected": -1.5193202495574951, "logps/chosen": -84.67195129394531, "logps/rejected": -97.95703125, "loss": 0.3186, "rewards/accuracies": 1.0, "rewards/chosen": 0.11891861259937286, "rewards/margins": 2.794870376586914, "rewards/rejected": -2.6759514808654785, "step": 930 }, { "epoch": 1.49, "learning_rate": 4.6333729686880694e-07, "logits/chosen": -1.214950680732727, "logits/rejected": -1.2424671649932861, "logps/chosen": -78.85543060302734, "logps/rejected": -117.67984771728516, "loss": 0.3866, "rewards/accuracies": 1.0, "rewards/chosen": 0.330367773771286, "rewards/margins": 2.4168190956115723, "rewards/rejected": -2.0864510536193848, "step": 931 }, { "epoch": 1.5, "learning_rate": 4.632382084819659e-07, "logits/chosen": -1.3984066247940063, "logits/rejected": -1.469548225402832, "logps/chosen": -93.10995483398438, "logps/rejected": -119.76518249511719, "loss": 0.3683, "rewards/accuracies": 1.0, "rewards/chosen": -0.4178352355957031, "rewards/margins": 3.8203067779541016, "rewards/rejected": -4.238142013549805, "step": 932 }, { "epoch": 1.5, "learning_rate": 4.6313912009512486e-07, "logits/chosen": -1.4618090391159058, "logits/rejected": -1.4438858032226562, "logps/chosen": -65.69481658935547, "logps/rejected": -107.59117126464844, "loss": 0.3207, "rewards/accuracies": 0.75, "rewards/chosen": 0.2945247292518616, "rewards/margins": 1.9162434339523315, "rewards/rejected": -1.6217188835144043, "step": 933 }, { "epoch": 1.5, "learning_rate": 4.6304003170828376e-07, "logits/chosen": -1.5085370540618896, "logits/rejected": -1.4700191020965576, "logps/chosen": -96.97554016113281, "logps/rejected": -111.40365600585938, "loss": 0.3341, "rewards/accuracies": 0.75, "rewards/chosen": -0.6325744986534119, "rewards/margins": 1.8233253955841064, "rewards/rejected": -2.455899953842163, "step": 934 }, { "epoch": 1.5, "learning_rate": 4.6294094332144267e-07, "logits/chosen": -1.4889116287231445, "logits/rejected": -1.5522984266281128, "logps/chosen": -84.9996337890625, "logps/rejected": -128.31568908691406, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": -0.18504923582077026, "rewards/margins": 2.8658769130706787, "rewards/rejected": -3.0509262084960938, "step": 935 }, { "epoch": 1.5, "learning_rate": 4.6284185493460163e-07, "logits/chosen": -1.457676649093628, "logits/rejected": -1.517907738685608, "logps/chosen": -61.2161750793457, "logps/rejected": -110.05174255371094, "loss": 0.2535, "rewards/accuracies": 1.0, "rewards/chosen": 0.44081756472587585, "rewards/margins": 3.097536325454712, "rewards/rejected": -2.6567187309265137, "step": 936 }, { "epoch": 1.5, "learning_rate": 4.627427665477606e-07, "logits/chosen": -1.43813157081604, "logits/rejected": -1.4654161930084229, "logps/chosen": -67.3921890258789, "logps/rejected": -99.4871597290039, "loss": 0.3411, "rewards/accuracies": 0.5, "rewards/chosen": -0.44057297706604004, "rewards/margins": 2.2915494441986084, "rewards/rejected": -2.7321224212646484, "step": 937 }, { "epoch": 1.51, "learning_rate": 4.6264367816091955e-07, "logits/chosen": -1.3067927360534668, "logits/rejected": -1.2871670722961426, "logps/chosen": -105.64652252197266, "logps/rejected": -126.95310974121094, "loss": 0.3251, "rewards/accuracies": 0.5, "rewards/chosen": -0.34107863903045654, "rewards/margins": 2.6164193153381348, "rewards/rejected": -2.957498073577881, "step": 938 }, { "epoch": 1.51, "learning_rate": 4.6254458977407846e-07, "logits/chosen": -1.4242613315582275, "logits/rejected": -1.4869288206100464, "logps/chosen": -87.24775695800781, "logps/rejected": -102.61793518066406, "loss": 0.46, "rewards/accuracies": 0.75, "rewards/chosen": 0.4714038372039795, "rewards/margins": 1.0430142879486084, "rewards/rejected": -0.5716104507446289, "step": 939 }, { "epoch": 1.51, "learning_rate": 4.6244550138723736e-07, "logits/chosen": -1.5490344762802124, "logits/rejected": -1.5137293338775635, "logps/chosen": -96.7865982055664, "logps/rejected": -85.44868469238281, "loss": 0.4011, "rewards/accuracies": 0.75, "rewards/chosen": -0.7109324932098389, "rewards/margins": 0.5258219242095947, "rewards/rejected": -1.2367545366287231, "step": 940 }, { "epoch": 1.51, "learning_rate": 4.623464130003963e-07, "logits/chosen": -1.3627170324325562, "logits/rejected": -1.3364777565002441, "logps/chosen": -94.70975494384766, "logps/rejected": -121.93834686279297, "loss": 0.4101, "rewards/accuracies": 1.0, "rewards/chosen": 0.049361519515514374, "rewards/margins": 2.155473232269287, "rewards/rejected": -2.106111764907837, "step": 941 }, { "epoch": 1.51, "learning_rate": 4.6224732461355523e-07, "logits/chosen": -1.5533548593521118, "logits/rejected": -1.5177409648895264, "logps/chosen": -85.73707580566406, "logps/rejected": -104.2629623413086, "loss": 0.2723, "rewards/accuracies": 0.5, "rewards/chosen": -0.33773577213287354, "rewards/margins": 0.7632884979248047, "rewards/rejected": -1.1010242700576782, "step": 942 }, { "epoch": 1.51, "learning_rate": 4.6214823622671424e-07, "logits/chosen": -1.483729362487793, "logits/rejected": -1.495360016822815, "logps/chosen": -70.50445556640625, "logps/rejected": -124.27365112304688, "loss": 0.3316, "rewards/accuracies": 0.75, "rewards/chosen": -0.3351503312587738, "rewards/margins": 3.1213865280151367, "rewards/rejected": -3.4565367698669434, "step": 943 }, { "epoch": 1.52, "learning_rate": 4.6204914783987315e-07, "logits/chosen": -1.4071054458618164, "logits/rejected": -1.5064728260040283, "logps/chosen": -81.556884765625, "logps/rejected": -101.21453857421875, "loss": 0.2191, "rewards/accuracies": 0.75, "rewards/chosen": -0.7193591594696045, "rewards/margins": 2.1385772228240967, "rewards/rejected": -2.857936382293701, "step": 944 }, { "epoch": 1.52, "learning_rate": 4.6195005945303206e-07, "logits/chosen": -1.3786580562591553, "logits/rejected": -1.4068266153335571, "logps/chosen": -91.22728729248047, "logps/rejected": -88.6808853149414, "loss": 0.4014, "rewards/accuracies": 0.75, "rewards/chosen": -0.7144365310668945, "rewards/margins": 1.931282877922058, "rewards/rejected": -2.645719528198242, "step": 945 }, { "epoch": 1.52, "learning_rate": 4.61850971066191e-07, "logits/chosen": -1.3399759531021118, "logits/rejected": -1.2982051372528076, "logps/chosen": -73.30511474609375, "logps/rejected": -103.96459197998047, "loss": 0.3539, "rewards/accuracies": 0.75, "rewards/chosen": -0.23546436429023743, "rewards/margins": 2.8628687858581543, "rewards/rejected": -3.0983331203460693, "step": 946 }, { "epoch": 1.52, "learning_rate": 4.617518826793499e-07, "logits/chosen": -1.259671688079834, "logits/rejected": -1.2187355756759644, "logps/chosen": -83.87641906738281, "logps/rejected": -96.2335205078125, "loss": 0.4012, "rewards/accuracies": 0.75, "rewards/chosen": -0.2932940721511841, "rewards/margins": 1.050594449043274, "rewards/rejected": -1.343888521194458, "step": 947 }, { "epoch": 1.52, "learning_rate": 4.6165279429250894e-07, "logits/chosen": -1.4927632808685303, "logits/rejected": -1.4773638248443604, "logps/chosen": -66.66869354248047, "logps/rejected": -106.94563293457031, "loss": 0.2935, "rewards/accuracies": 1.0, "rewards/chosen": -0.13209104537963867, "rewards/margins": 3.3130054473876953, "rewards/rejected": -3.445096969604492, "step": 948 }, { "epoch": 1.52, "learning_rate": 4.6155370590566784e-07, "logits/chosen": -1.4594792127609253, "logits/rejected": -1.480252742767334, "logps/chosen": -93.0942153930664, "logps/rejected": -115.61190032958984, "loss": 0.209, "rewards/accuracies": 1.0, "rewards/chosen": -0.24139881134033203, "rewards/margins": 0.8090328574180603, "rewards/rejected": -1.050431728363037, "step": 949 }, { "epoch": 1.52, "learning_rate": 4.6145461751882675e-07, "logits/chosen": -1.384578824043274, "logits/rejected": -1.375487208366394, "logps/chosen": -110.3106689453125, "logps/rejected": -130.86264038085938, "loss": 0.3879, "rewards/accuracies": 1.0, "rewards/chosen": -0.5186079144477844, "rewards/margins": 2.4797332286834717, "rewards/rejected": -2.9983413219451904, "step": 950 }, { "epoch": 1.53, "learning_rate": 4.613555291319857e-07, "logits/chosen": -1.523979663848877, "logits/rejected": -1.4815473556518555, "logps/chosen": -101.7852554321289, "logps/rejected": -112.23004150390625, "loss": 0.4285, "rewards/accuracies": 0.75, "rewards/chosen": -0.4888429343700409, "rewards/margins": 2.2766075134277344, "rewards/rejected": -2.7654504776000977, "step": 951 }, { "epoch": 1.53, "learning_rate": 4.612564407451446e-07, "logits/chosen": -1.4715187549591064, "logits/rejected": -1.5455567836761475, "logps/chosen": -99.2022705078125, "logps/rejected": -112.26203155517578, "loss": 0.3656, "rewards/accuracies": 1.0, "rewards/chosen": -0.3115118145942688, "rewards/margins": 2.956911087036133, "rewards/rejected": -3.268423080444336, "step": 952 }, { "epoch": 1.53, "learning_rate": 4.6115735235830363e-07, "logits/chosen": -1.3547937870025635, "logits/rejected": -1.3612223863601685, "logps/chosen": -113.89076232910156, "logps/rejected": -106.14749908447266, "loss": 0.4605, "rewards/accuracies": 0.5, "rewards/chosen": -1.281350016593933, "rewards/margins": 1.468174934387207, "rewards/rejected": -2.7495250701904297, "step": 953 }, { "epoch": 1.53, "learning_rate": 4.6105826397146254e-07, "logits/chosen": -1.3937907218933105, "logits/rejected": -1.3948326110839844, "logps/chosen": -98.12734985351562, "logps/rejected": -103.2861099243164, "loss": 0.4333, "rewards/accuracies": 0.5, "rewards/chosen": -1.4036295413970947, "rewards/margins": -0.6857084631919861, "rewards/rejected": -0.7179210782051086, "step": 954 }, { "epoch": 1.53, "learning_rate": 4.6095917558462144e-07, "logits/chosen": -1.6271742582321167, "logits/rejected": -1.4446191787719727, "logps/chosen": -110.58407592773438, "logps/rejected": -113.29644012451172, "loss": 0.3598, "rewards/accuracies": 0.75, "rewards/chosen": -0.1455158293247223, "rewards/margins": 1.3364981412887573, "rewards/rejected": -1.4820139408111572, "step": 955 }, { "epoch": 1.53, "learning_rate": 4.608600871977804e-07, "logits/chosen": -1.4119004011154175, "logits/rejected": -1.4209461212158203, "logps/chosen": -90.01239776611328, "logps/rejected": -113.68350219726562, "loss": 0.4242, "rewards/accuracies": 1.0, "rewards/chosen": -0.09090936928987503, "rewards/margins": 2.229572057723999, "rewards/rejected": -2.320481300354004, "step": 956 }, { "epoch": 1.54, "learning_rate": 4.607609988109393e-07, "logits/chosen": -1.4334943294525146, "logits/rejected": -1.4541555643081665, "logps/chosen": -84.09185791015625, "logps/rejected": -125.57305908203125, "loss": 0.3691, "rewards/accuracies": 1.0, "rewards/chosen": 0.10397548973560333, "rewards/margins": 4.778892517089844, "rewards/rejected": -4.674917221069336, "step": 957 }, { "epoch": 1.54, "learning_rate": 4.606619104240983e-07, "logits/chosen": -1.390702247619629, "logits/rejected": -1.426508903503418, "logps/chosen": -97.09410095214844, "logps/rejected": -145.97866821289062, "loss": 0.3638, "rewards/accuracies": 0.75, "rewards/chosen": -0.844190776348114, "rewards/margins": 4.024328231811523, "rewards/rejected": -4.868518829345703, "step": 958 }, { "epoch": 1.54, "learning_rate": 4.6056282203725723e-07, "logits/chosen": -1.6196069717407227, "logits/rejected": -1.6169894933700562, "logps/chosen": -94.96128845214844, "logps/rejected": -112.91299438476562, "loss": 0.4615, "rewards/accuracies": 0.75, "rewards/chosen": -0.8193411827087402, "rewards/margins": 1.3217518329620361, "rewards/rejected": -2.1410927772521973, "step": 959 }, { "epoch": 1.54, "learning_rate": 4.6046373365041614e-07, "logits/chosen": -1.168076753616333, "logits/rejected": -1.2507717609405518, "logps/chosen": -102.6574935913086, "logps/rejected": -117.6396484375, "loss": 0.2856, "rewards/accuracies": 0.5, "rewards/chosen": -0.5214269757270813, "rewards/margins": 0.38820287585258484, "rewards/rejected": -0.9096298217773438, "step": 960 }, { "epoch": 1.54, "learning_rate": 4.603646452635751e-07, "logits/chosen": -1.4920493364334106, "logits/rejected": -1.4531677961349487, "logps/chosen": -99.96607971191406, "logps/rejected": -108.853515625, "loss": 0.4082, "rewards/accuracies": 0.75, "rewards/chosen": -0.6153227090835571, "rewards/margins": 1.878732442855835, "rewards/rejected": -2.4940550327301025, "step": 961 }, { "epoch": 1.54, "learning_rate": 4.60265556876734e-07, "logits/chosen": -1.429014801979065, "logits/rejected": -1.380807876586914, "logps/chosen": -89.24000549316406, "logps/rejected": -89.98797607421875, "loss": 0.4782, "rewards/accuracies": 0.75, "rewards/chosen": -0.562167763710022, "rewards/margins": 0.8800352215766907, "rewards/rejected": -1.4422030448913574, "step": 962 }, { "epoch": 1.55, "learning_rate": 4.60166468489893e-07, "logits/chosen": -1.4058364629745483, "logits/rejected": -1.370947003364563, "logps/chosen": -59.75323486328125, "logps/rejected": -95.22298431396484, "loss": 0.3141, "rewards/accuracies": 1.0, "rewards/chosen": -0.6056379675865173, "rewards/margins": 2.2762327194213867, "rewards/rejected": -2.8818705081939697, "step": 963 }, { "epoch": 1.55, "learning_rate": 4.600673801030519e-07, "logits/chosen": -1.3904266357421875, "logits/rejected": -1.3679680824279785, "logps/chosen": -106.01644897460938, "logps/rejected": -102.35601043701172, "loss": 0.4476, "rewards/accuracies": 0.5, "rewards/chosen": -0.6941536664962769, "rewards/margins": 1.4509857892990112, "rewards/rejected": -2.145139217376709, "step": 964 }, { "epoch": 1.55, "learning_rate": 4.5996829171621083e-07, "logits/chosen": -1.452620506286621, "logits/rejected": -1.4555031061172485, "logps/chosen": -88.37944793701172, "logps/rejected": -132.0923309326172, "loss": 0.3684, "rewards/accuracies": 1.0, "rewards/chosen": -0.22034265100955963, "rewards/margins": 4.042835235595703, "rewards/rejected": -4.263177871704102, "step": 965 }, { "epoch": 1.55, "learning_rate": 4.598692033293698e-07, "logits/chosen": -1.443289041519165, "logits/rejected": -1.413745641708374, "logps/chosen": -107.01828002929688, "logps/rejected": -123.83030700683594, "loss": 0.2466, "rewards/accuracies": 0.75, "rewards/chosen": -0.19925576448440552, "rewards/margins": 0.5510685443878174, "rewards/rejected": -0.7503242492675781, "step": 966 }, { "epoch": 1.55, "learning_rate": 4.597701149425287e-07, "logits/chosen": -1.2009878158569336, "logits/rejected": -1.2387077808380127, "logps/chosen": -98.01970672607422, "logps/rejected": -121.35942077636719, "loss": 0.3574, "rewards/accuracies": 0.75, "rewards/chosen": -1.3604060411453247, "rewards/margins": 1.6319060325622559, "rewards/rejected": -2.99231219291687, "step": 967 }, { "epoch": 1.55, "learning_rate": 4.5967102655568766e-07, "logits/chosen": -1.4834645986557007, "logits/rejected": -1.4672602415084839, "logps/chosen": -68.78179931640625, "logps/rejected": -86.08404541015625, "loss": 0.2592, "rewards/accuracies": 1.0, "rewards/chosen": -0.5369505286216736, "rewards/margins": 2.483149528503418, "rewards/rejected": -3.0200998783111572, "step": 968 }, { "epoch": 1.56, "learning_rate": 4.595719381688466e-07, "logits/chosen": -1.356366753578186, "logits/rejected": -1.3418768644332886, "logps/chosen": -88.74575805664062, "logps/rejected": -148.3057403564453, "loss": 0.3358, "rewards/accuracies": 1.0, "rewards/chosen": 0.10303860902786255, "rewards/margins": 4.134504318237305, "rewards/rejected": -4.031466007232666, "step": 969 }, { "epoch": 1.56, "learning_rate": 4.594728497820055e-07, "logits/chosen": -1.3647425174713135, "logits/rejected": -1.386062741279602, "logps/chosen": -91.63006591796875, "logps/rejected": -106.4744873046875, "loss": 0.2851, "rewards/accuracies": 1.0, "rewards/chosen": -0.6428052186965942, "rewards/margins": 1.6235640048980713, "rewards/rejected": -2.266369342803955, "step": 970 }, { "epoch": 1.56, "learning_rate": 4.593737613951645e-07, "logits/chosen": -1.3042144775390625, "logits/rejected": -1.3781462907791138, "logps/chosen": -84.85135650634766, "logps/rejected": -115.61234283447266, "loss": 0.3928, "rewards/accuracies": 1.0, "rewards/chosen": 0.24445706605911255, "rewards/margins": 3.415691614151001, "rewards/rejected": -3.171234607696533, "step": 971 }, { "epoch": 1.56, "learning_rate": 4.592746730083234e-07, "logits/chosen": -1.2134361267089844, "logits/rejected": -1.3466843366622925, "logps/chosen": -78.45404052734375, "logps/rejected": -119.23548889160156, "loss": 0.317, "rewards/accuracies": 1.0, "rewards/chosen": -0.0621427483856678, "rewards/margins": 2.0400919914245605, "rewards/rejected": -2.1022346019744873, "step": 972 }, { "epoch": 1.56, "learning_rate": 4.591755846214823e-07, "logits/chosen": -1.4419981241226196, "logits/rejected": -1.4349675178527832, "logps/chosen": -90.95451354980469, "logps/rejected": -131.35958862304688, "loss": 0.3428, "rewards/accuracies": 1.0, "rewards/chosen": -0.09131927043199539, "rewards/margins": 2.6647560596466064, "rewards/rejected": -2.756075620651245, "step": 973 }, { "epoch": 1.56, "learning_rate": 4.590764962346413e-07, "logits/chosen": -1.2763333320617676, "logits/rejected": -1.3109709024429321, "logps/chosen": -108.03046417236328, "logps/rejected": -119.31826782226562, "loss": 0.2601, "rewards/accuracies": 1.0, "rewards/chosen": -0.5908733606338501, "rewards/margins": 0.7891178131103516, "rewards/rejected": -1.379991054534912, "step": 974 }, { "epoch": 1.57, "learning_rate": 4.589774078478002e-07, "logits/chosen": -1.5588736534118652, "logits/rejected": -1.598820686340332, "logps/chosen": -88.26329040527344, "logps/rejected": -136.84153747558594, "loss": 0.4334, "rewards/accuracies": 1.0, "rewards/chosen": -0.18850375711917877, "rewards/margins": 3.8607687950134277, "rewards/rejected": -4.049272537231445, "step": 975 }, { "epoch": 1.57, "learning_rate": 4.588783194609592e-07, "logits/chosen": -1.415099024772644, "logits/rejected": -1.5621082782745361, "logps/chosen": -83.30136108398438, "logps/rejected": -128.627197265625, "loss": 0.2912, "rewards/accuracies": 1.0, "rewards/chosen": -0.7063982486724854, "rewards/margins": 3.1114447116851807, "rewards/rejected": -3.817842960357666, "step": 976 }, { "epoch": 1.57, "learning_rate": 4.587792310741181e-07, "logits/chosen": -1.4861282110214233, "logits/rejected": -1.4439729452133179, "logps/chosen": -99.50328063964844, "logps/rejected": -111.69810485839844, "loss": 0.4274, "rewards/accuracies": 0.75, "rewards/chosen": -0.8899587988853455, "rewards/margins": 1.1180057525634766, "rewards/rejected": -2.0079643726348877, "step": 977 }, { "epoch": 1.57, "learning_rate": 4.58680142687277e-07, "logits/chosen": -1.4032317399978638, "logits/rejected": -1.4168193340301514, "logps/chosen": -79.800048828125, "logps/rejected": -120.53334045410156, "loss": 0.3218, "rewards/accuracies": 1.0, "rewards/chosen": -1.2418707609176636, "rewards/margins": 2.364065408706665, "rewards/rejected": -3.605936050415039, "step": 978 }, { "epoch": 1.57, "learning_rate": 4.58581054300436e-07, "logits/chosen": -1.3687342405319214, "logits/rejected": -1.4235602617263794, "logps/chosen": -96.25276184082031, "logps/rejected": -122.10169982910156, "loss": 0.3405, "rewards/accuracies": 0.75, "rewards/chosen": -0.47098851203918457, "rewards/margins": 3.1929450035095215, "rewards/rejected": -3.663933277130127, "step": 979 }, { "epoch": 1.57, "learning_rate": 4.584819659135949e-07, "logits/chosen": -1.5551085472106934, "logits/rejected": -1.5704835653305054, "logps/chosen": -90.42012023925781, "logps/rejected": -130.96365356445312, "loss": 0.3104, "rewards/accuracies": 1.0, "rewards/chosen": -0.8971171379089355, "rewards/margins": 3.2032976150512695, "rewards/rejected": -4.100414752960205, "step": 980 }, { "epoch": 1.57, "learning_rate": 4.5838287752675387e-07, "logits/chosen": -1.258089542388916, "logits/rejected": -1.3572567701339722, "logps/chosen": -100.72764587402344, "logps/rejected": -139.6934814453125, "loss": 0.293, "rewards/accuracies": 1.0, "rewards/chosen": -1.1437933444976807, "rewards/margins": 3.6742823123931885, "rewards/rejected": -4.818075656890869, "step": 981 }, { "epoch": 1.58, "learning_rate": 4.582837891399128e-07, "logits/chosen": -1.4393433332443237, "logits/rejected": -1.5022224187850952, "logps/chosen": -100.34344482421875, "logps/rejected": -93.6104507446289, "loss": 0.2943, "rewards/accuracies": 1.0, "rewards/chosen": -0.5749017596244812, "rewards/margins": 1.9625037908554077, "rewards/rejected": -2.537405490875244, "step": 982 }, { "epoch": 1.58, "learning_rate": 4.581847007530717e-07, "logits/chosen": -1.4920133352279663, "logits/rejected": -1.4392980337142944, "logps/chosen": -98.83134460449219, "logps/rejected": -119.5103759765625, "loss": 0.1781, "rewards/accuracies": 1.0, "rewards/chosen": 0.44587594270706177, "rewards/margins": 1.9595189094543457, "rewards/rejected": -1.5136430263519287, "step": 983 }, { "epoch": 1.58, "learning_rate": 4.5808561236623064e-07, "logits/chosen": -1.3878389596939087, "logits/rejected": -1.329796552658081, "logps/chosen": -112.741455078125, "logps/rejected": -135.997314453125, "loss": 0.3187, "rewards/accuracies": 0.75, "rewards/chosen": -0.3701050281524658, "rewards/margins": 4.739919662475586, "rewards/rejected": -5.110024452209473, "step": 984 }, { "epoch": 1.58, "learning_rate": 4.579865239793896e-07, "logits/chosen": -1.4800124168395996, "logits/rejected": -1.4861066341400146, "logps/chosen": -86.2996826171875, "logps/rejected": -126.10321044921875, "loss": 0.1605, "rewards/accuracies": 0.75, "rewards/chosen": -0.127766415476799, "rewards/margins": 3.2356300354003906, "rewards/rejected": -3.363396644592285, "step": 985 }, { "epoch": 1.58, "learning_rate": 4.5788743559254856e-07, "logits/chosen": -1.4616198539733887, "logits/rejected": -1.4643447399139404, "logps/chosen": -74.71321868896484, "logps/rejected": -131.0271453857422, "loss": 0.2104, "rewards/accuracies": 1.0, "rewards/chosen": -0.04787635803222656, "rewards/margins": 5.066827774047852, "rewards/rejected": -5.114704132080078, "step": 986 }, { "epoch": 1.58, "learning_rate": 4.5778834720570747e-07, "logits/chosen": -1.60130774974823, "logits/rejected": -1.6248971223831177, "logps/chosen": -80.91893005371094, "logps/rejected": -140.90005493164062, "loss": 0.3371, "rewards/accuracies": 1.0, "rewards/chosen": 0.610609233379364, "rewards/margins": 4.053956031799316, "rewards/rejected": -3.4433469772338867, "step": 987 }, { "epoch": 1.59, "learning_rate": 4.576892588188664e-07, "logits/chosen": -1.4479496479034424, "logits/rejected": -1.462864637374878, "logps/chosen": -85.21322631835938, "logps/rejected": -136.6701202392578, "loss": 0.2859, "rewards/accuracies": 1.0, "rewards/chosen": 0.45512983202934265, "rewards/margins": 3.6391189098358154, "rewards/rejected": -3.1839890480041504, "step": 988 }, { "epoch": 1.59, "learning_rate": 4.5759017043202533e-07, "logits/chosen": -1.487410068511963, "logits/rejected": -1.4980055093765259, "logps/chosen": -73.01914978027344, "logps/rejected": -82.78298950195312, "loss": 0.3762, "rewards/accuracies": 0.75, "rewards/chosen": -0.2930563986301422, "rewards/margins": 1.0506118535995483, "rewards/rejected": -1.3436682224273682, "step": 989 }, { "epoch": 1.59, "learning_rate": 4.574910820451843e-07, "logits/chosen": -1.4658799171447754, "logits/rejected": -1.4789515733718872, "logps/chosen": -88.97076416015625, "logps/rejected": -114.97970581054688, "loss": 0.3333, "rewards/accuracies": 1.0, "rewards/chosen": 0.2526715397834778, "rewards/margins": 2.98732852935791, "rewards/rejected": -2.7346572875976562, "step": 990 }, { "epoch": 1.59, "learning_rate": 4.5739199365834325e-07, "logits/chosen": -1.4229050874710083, "logits/rejected": -1.4959819316864014, "logps/chosen": -75.70956420898438, "logps/rejected": -132.67205810546875, "loss": 0.2682, "rewards/accuracies": 1.0, "rewards/chosen": -0.35063445568084717, "rewards/margins": 5.046791076660156, "rewards/rejected": -5.397425174713135, "step": 991 }, { "epoch": 1.59, "learning_rate": 4.5729290527150216e-07, "logits/chosen": -1.4041552543640137, "logits/rejected": -1.3783067464828491, "logps/chosen": -79.50238800048828, "logps/rejected": -138.04954528808594, "loss": 0.2417, "rewards/accuracies": 1.0, "rewards/chosen": -0.7246279120445251, "rewards/margins": 4.209466934204102, "rewards/rejected": -4.9340949058532715, "step": 992 }, { "epoch": 1.59, "learning_rate": 4.5719381688466107e-07, "logits/chosen": -1.435302495956421, "logits/rejected": -1.4003387689590454, "logps/chosen": -74.85476684570312, "logps/rejected": -106.66015625, "loss": 0.4571, "rewards/accuracies": 0.75, "rewards/chosen": 0.062444210052490234, "rewards/margins": 2.8713998794555664, "rewards/rejected": -2.808955669403076, "step": 993 }, { "epoch": 1.6, "learning_rate": 4.5709472849782003e-07, "logits/chosen": -1.3543423414230347, "logits/rejected": -1.394026517868042, "logps/chosen": -82.84480285644531, "logps/rejected": -95.69244384765625, "loss": 0.2889, "rewards/accuracies": 0.75, "rewards/chosen": -0.7850297093391418, "rewards/margins": 1.327632188796997, "rewards/rejected": -2.112661838531494, "step": 994 }, { "epoch": 1.6, "learning_rate": 4.56995640110979e-07, "logits/chosen": -1.4613194465637207, "logits/rejected": -1.39666748046875, "logps/chosen": -76.73270416259766, "logps/rejected": -140.2431640625, "loss": 0.428, "rewards/accuracies": 1.0, "rewards/chosen": 0.8633755445480347, "rewards/margins": 5.6245927810668945, "rewards/rejected": -4.76121711730957, "step": 995 }, { "epoch": 1.6, "learning_rate": 4.5689655172413795e-07, "logits/chosen": -1.471524715423584, "logits/rejected": -1.4095906019210815, "logps/chosen": -100.58306121826172, "logps/rejected": -141.66012573242188, "loss": 0.389, "rewards/accuracies": 0.75, "rewards/chosen": -0.08394812047481537, "rewards/margins": 2.3085222244262695, "rewards/rejected": -2.392470598220825, "step": 996 }, { "epoch": 1.6, "learning_rate": 4.5679746333729685e-07, "logits/chosen": -1.5060317516326904, "logits/rejected": -1.4719595909118652, "logps/chosen": -84.05213928222656, "logps/rejected": -137.65603637695312, "loss": 0.2451, "rewards/accuracies": 1.0, "rewards/chosen": 0.735716700553894, "rewards/margins": 3.574162721633911, "rewards/rejected": -2.8384461402893066, "step": 997 }, { "epoch": 1.6, "learning_rate": 4.5669837495045576e-07, "logits/chosen": -1.42974853515625, "logits/rejected": -1.3278484344482422, "logps/chosen": -87.77025604248047, "logps/rejected": -115.01262664794922, "loss": 0.291, "rewards/accuracies": 1.0, "rewards/chosen": -0.41628727316856384, "rewards/margins": 3.2024080753326416, "rewards/rejected": -3.6186954975128174, "step": 998 }, { "epoch": 1.6, "learning_rate": 4.565992865636147e-07, "logits/chosen": -1.3749524354934692, "logits/rejected": -1.3586478233337402, "logps/chosen": -76.98604583740234, "logps/rejected": -120.24432373046875, "loss": 0.2958, "rewards/accuracies": 1.0, "rewards/chosen": 0.298204243183136, "rewards/margins": 2.4421188831329346, "rewards/rejected": -2.1439146995544434, "step": 999 }, { "epoch": 1.61, "learning_rate": 4.565001981767737e-07, "logits/chosen": -1.36294686794281, "logits/rejected": -1.303245186805725, "logps/chosen": -72.94529724121094, "logps/rejected": -125.28617858886719, "loss": 0.2647, "rewards/accuracies": 1.0, "rewards/chosen": 0.5353199243545532, "rewards/margins": 4.001642227172852, "rewards/rejected": -3.466322422027588, "step": 1000 }, { "epoch": 1.61, "learning_rate": 4.5640110978993264e-07, "logits/chosen": -1.532013177871704, "logits/rejected": -1.4546864032745361, "logps/chosen": -91.13987731933594, "logps/rejected": -114.92549896240234, "loss": 0.3572, "rewards/accuracies": 0.75, "rewards/chosen": -0.7886568903923035, "rewards/margins": 1.5922943353652954, "rewards/rejected": -2.380951404571533, "step": 1001 }, { "epoch": 1.61, "learning_rate": 4.5630202140309155e-07, "logits/chosen": -1.2984434366226196, "logits/rejected": -1.2635446786880493, "logps/chosen": -101.37415313720703, "logps/rejected": -112.83323669433594, "loss": 0.326, "rewards/accuracies": 0.5, "rewards/chosen": -0.4112361967563629, "rewards/margins": 1.6298748254776, "rewards/rejected": -2.0411109924316406, "step": 1002 }, { "epoch": 1.61, "learning_rate": 4.5620293301625045e-07, "logits/chosen": -1.4084856510162354, "logits/rejected": -1.409132957458496, "logps/chosen": -72.82208251953125, "logps/rejected": -97.28045654296875, "loss": 0.3277, "rewards/accuracies": 1.0, "rewards/chosen": 0.09333716332912445, "rewards/margins": 1.447910189628601, "rewards/rejected": -1.3545730113983154, "step": 1003 }, { "epoch": 1.61, "learning_rate": 4.561038446294094e-07, "logits/chosen": -1.310517430305481, "logits/rejected": -1.2920944690704346, "logps/chosen": -108.4488754272461, "logps/rejected": -112.8048095703125, "loss": 0.3072, "rewards/accuracies": 0.5, "rewards/chosen": -0.3556026816368103, "rewards/margins": 0.9588367938995361, "rewards/rejected": -1.3144394159317017, "step": 1004 }, { "epoch": 1.61, "learning_rate": 4.560047562425683e-07, "logits/chosen": -1.4287546873092651, "logits/rejected": -1.437037706375122, "logps/chosen": -84.99720764160156, "logps/rejected": -121.86127471923828, "loss": 0.2149, "rewards/accuracies": 1.0, "rewards/chosen": 0.1464085578918457, "rewards/margins": 1.8218069076538086, "rewards/rejected": -1.675398349761963, "step": 1005 }, { "epoch": 1.61, "learning_rate": 4.559056678557273e-07, "logits/chosen": -1.443516492843628, "logits/rejected": -1.4030308723449707, "logps/chosen": -88.12187194824219, "logps/rejected": -108.17573547363281, "loss": 0.3085, "rewards/accuracies": 0.75, "rewards/chosen": -0.667259693145752, "rewards/margins": 3.160517930984497, "rewards/rejected": -3.82777738571167, "step": 1006 }, { "epoch": 1.62, "learning_rate": 4.5580657946888624e-07, "logits/chosen": -1.393242597579956, "logits/rejected": -1.3325488567352295, "logps/chosen": -110.41175842285156, "logps/rejected": -116.48114013671875, "loss": 0.442, "rewards/accuracies": 0.75, "rewards/chosen": -0.030962742865085602, "rewards/margins": 2.1447501182556152, "rewards/rejected": -2.175712823867798, "step": 1007 }, { "epoch": 1.62, "learning_rate": 4.5570749108204515e-07, "logits/chosen": -1.478918194770813, "logits/rejected": -1.3876875638961792, "logps/chosen": -119.7712173461914, "logps/rejected": -113.63029479980469, "loss": 0.3797, "rewards/accuracies": 0.75, "rewards/chosen": -1.0962377786636353, "rewards/margins": 2.2505481243133545, "rewards/rejected": -3.3467857837677, "step": 1008 }, { "epoch": 1.62, "learning_rate": 4.556084026952041e-07, "logits/chosen": -1.4165499210357666, "logits/rejected": -1.406442642211914, "logps/chosen": -84.16569519042969, "logps/rejected": -105.05816650390625, "loss": 0.2229, "rewards/accuracies": 1.0, "rewards/chosen": -0.06501388549804688, "rewards/margins": 1.4303061962127686, "rewards/rejected": -1.495320200920105, "step": 1009 }, { "epoch": 1.62, "learning_rate": 4.55509314308363e-07, "logits/chosen": -1.2490460872650146, "logits/rejected": -1.2305936813354492, "logps/chosen": -90.73574829101562, "logps/rejected": -82.40900421142578, "loss": 0.3891, "rewards/accuracies": 0.5, "rewards/chosen": -2.1964359283447266, "rewards/margins": 0.18889552354812622, "rewards/rejected": -2.385331630706787, "step": 1010 }, { "epoch": 1.62, "learning_rate": 4.5541022592152197e-07, "logits/chosen": -1.3748347759246826, "logits/rejected": -1.3894673585891724, "logps/chosen": -72.97128295898438, "logps/rejected": -137.94052124023438, "loss": 0.1897, "rewards/accuracies": 1.0, "rewards/chosen": -0.012136735022068024, "rewards/margins": 4.159322738647461, "rewards/rejected": -4.171459674835205, "step": 1011 }, { "epoch": 1.62, "learning_rate": 4.5531113753468093e-07, "logits/chosen": -1.637721300125122, "logits/rejected": -1.6014314889907837, "logps/chosen": -97.02202606201172, "logps/rejected": -110.29731750488281, "loss": 0.3532, "rewards/accuracies": 0.75, "rewards/chosen": 0.16981297731399536, "rewards/margins": 3.061370372772217, "rewards/rejected": -2.8915576934814453, "step": 1012 }, { "epoch": 1.63, "learning_rate": 4.5521204914783984e-07, "logits/chosen": -1.5178476572036743, "logits/rejected": -1.588550090789795, "logps/chosen": -63.49580764770508, "logps/rejected": -119.39292907714844, "loss": 0.3599, "rewards/accuracies": 0.75, "rewards/chosen": 0.2124195247888565, "rewards/margins": 2.638204574584961, "rewards/rejected": -2.4257850646972656, "step": 1013 }, { "epoch": 1.63, "learning_rate": 4.551129607609988e-07, "logits/chosen": -1.4542367458343506, "logits/rejected": -1.4799245595932007, "logps/chosen": -91.78561401367188, "logps/rejected": -125.91111755371094, "loss": 0.291, "rewards/accuracies": 0.75, "rewards/chosen": -0.038205333054065704, "rewards/margins": 2.898711919784546, "rewards/rejected": -2.936917304992676, "step": 1014 }, { "epoch": 1.63, "learning_rate": 4.550138723741577e-07, "logits/chosen": -1.4961981773376465, "logits/rejected": -1.5079925060272217, "logps/chosen": -79.53777313232422, "logps/rejected": -113.33588409423828, "loss": 0.2163, "rewards/accuracies": 0.75, "rewards/chosen": -0.9412255883216858, "rewards/margins": 2.387908935546875, "rewards/rejected": -3.329134464263916, "step": 1015 }, { "epoch": 1.63, "learning_rate": 4.5491478398731667e-07, "logits/chosen": -1.445568323135376, "logits/rejected": -1.3724085092544556, "logps/chosen": -87.11100769042969, "logps/rejected": -107.66336059570312, "loss": 0.4016, "rewards/accuracies": 1.0, "rewards/chosen": -0.32490110397338867, "rewards/margins": 2.8108139038085938, "rewards/rejected": -3.1357150077819824, "step": 1016 }, { "epoch": 1.63, "learning_rate": 4.548156956004756e-07, "logits/chosen": -1.4367178678512573, "logits/rejected": -1.3677453994750977, "logps/chosen": -88.8550796508789, "logps/rejected": -116.97498321533203, "loss": 0.2565, "rewards/accuracies": 1.0, "rewards/chosen": -0.7192248702049255, "rewards/margins": 5.035187244415283, "rewards/rejected": -5.754411697387695, "step": 1017 }, { "epoch": 1.63, "learning_rate": 4.5471660721363453e-07, "logits/chosen": -1.4269119501113892, "logits/rejected": -1.4527978897094727, "logps/chosen": -86.99848937988281, "logps/rejected": -138.4620361328125, "loss": 0.4902, "rewards/accuracies": 1.0, "rewards/chosen": -0.1984400749206543, "rewards/margins": 4.010796546936035, "rewards/rejected": -4.209236145019531, "step": 1018 }, { "epoch": 1.64, "learning_rate": 4.546175188267935e-07, "logits/chosen": -1.3719547986984253, "logits/rejected": -1.4726009368896484, "logps/chosen": -97.5919189453125, "logps/rejected": -150.20089721679688, "loss": 0.2409, "rewards/accuracies": 1.0, "rewards/chosen": -0.3679748475551605, "rewards/margins": 2.9058403968811035, "rewards/rejected": -3.273815393447876, "step": 1019 }, { "epoch": 1.64, "learning_rate": 4.545184304399524e-07, "logits/chosen": -1.5812867879867554, "logits/rejected": -1.559905767440796, "logps/chosen": -89.5531005859375, "logps/rejected": -95.82762145996094, "loss": 0.31, "rewards/accuracies": 0.75, "rewards/chosen": -0.4610322117805481, "rewards/margins": 2.7195839881896973, "rewards/rejected": -3.1806159019470215, "step": 1020 }, { "epoch": 1.64, "learning_rate": 4.544193420531113e-07, "logits/chosen": -1.208458662033081, "logits/rejected": -1.2733262777328491, "logps/chosen": -69.1107177734375, "logps/rejected": -101.6998291015625, "loss": 0.415, "rewards/accuracies": 1.0, "rewards/chosen": -0.005894392728805542, "rewards/margins": 2.5334744453430176, "rewards/rejected": -2.5393688678741455, "step": 1021 }, { "epoch": 1.64, "learning_rate": 4.543202536662703e-07, "logits/chosen": -1.2905462980270386, "logits/rejected": -1.3901782035827637, "logps/chosen": -69.9092025756836, "logps/rejected": -118.09671020507812, "loss": 0.2603, "rewards/accuracies": 1.0, "rewards/chosen": 0.08307639509439468, "rewards/margins": 3.8405025005340576, "rewards/rejected": -3.7574262619018555, "step": 1022 }, { "epoch": 1.64, "learning_rate": 4.542211652794292e-07, "logits/chosen": -1.5063741207122803, "logits/rejected": -1.5178205966949463, "logps/chosen": -89.95638275146484, "logps/rejected": -138.3484344482422, "loss": 0.3532, "rewards/accuracies": 1.0, "rewards/chosen": -0.10425510257482529, "rewards/margins": 4.110670566558838, "rewards/rejected": -4.214925765991211, "step": 1023 }, { "epoch": 1.64, "learning_rate": 4.541220768925882e-07, "logits/chosen": -1.5085582733154297, "logits/rejected": -1.5812225341796875, "logps/chosen": -84.1273193359375, "logps/rejected": -91.5704574584961, "loss": 0.2381, "rewards/accuracies": 0.75, "rewards/chosen": -0.8850663900375366, "rewards/margins": 1.9850561618804932, "rewards/rejected": -2.8701224327087402, "step": 1024 }, { "epoch": 1.65, "learning_rate": 4.540229885057471e-07, "logits/chosen": -1.422958254814148, "logits/rejected": -1.4511523246765137, "logps/chosen": -96.16607666015625, "logps/rejected": -127.87907409667969, "loss": 0.3469, "rewards/accuracies": 1.0, "rewards/chosen": -0.7912813425064087, "rewards/margins": 2.0947113037109375, "rewards/rejected": -2.8859927654266357, "step": 1025 }, { "epoch": 1.65, "learning_rate": 4.53923900118906e-07, "logits/chosen": -1.431478500366211, "logits/rejected": -1.3970046043395996, "logps/chosen": -98.1852798461914, "logps/rejected": -118.80416870117188, "loss": 0.5227, "rewards/accuracies": 0.75, "rewards/chosen": -0.6585584878921509, "rewards/margins": 0.5556396245956421, "rewards/rejected": -1.214198112487793, "step": 1026 }, { "epoch": 1.65, "learning_rate": 4.53824811732065e-07, "logits/chosen": -1.4372775554656982, "logits/rejected": -1.54835045337677, "logps/chosen": -106.04728698730469, "logps/rejected": -134.54307556152344, "loss": 0.4286, "rewards/accuracies": 0.75, "rewards/chosen": -0.72065269947052, "rewards/margins": 1.7566152811050415, "rewards/rejected": -2.4772679805755615, "step": 1027 }, { "epoch": 1.65, "learning_rate": 4.537257233452239e-07, "logits/chosen": -1.4035193920135498, "logits/rejected": -1.4397928714752197, "logps/chosen": -88.20167541503906, "logps/rejected": -132.9433135986328, "loss": 0.1244, "rewards/accuracies": 1.0, "rewards/chosen": -0.796866774559021, "rewards/margins": 3.4757883548736572, "rewards/rejected": -4.272655487060547, "step": 1028 }, { "epoch": 1.65, "learning_rate": 4.536266349583829e-07, "logits/chosen": -1.269483208656311, "logits/rejected": -1.1745611429214478, "logps/chosen": -79.4634780883789, "logps/rejected": -109.49413299560547, "loss": 0.2756, "rewards/accuracies": 1.0, "rewards/chosen": -0.6240548491477966, "rewards/margins": 1.3824043273925781, "rewards/rejected": -2.0064592361450195, "step": 1029 }, { "epoch": 1.65, "learning_rate": 4.535275465715418e-07, "logits/chosen": -1.4180091619491577, "logits/rejected": -1.4124488830566406, "logps/chosen": -88.13539123535156, "logps/rejected": -142.025390625, "loss": 0.1989, "rewards/accuracies": 1.0, "rewards/chosen": -0.1091129332780838, "rewards/margins": 4.110642910003662, "rewards/rejected": -4.21975564956665, "step": 1030 }, { "epoch": 1.65, "learning_rate": 4.534284581847007e-07, "logits/chosen": -1.4264159202575684, "logits/rejected": -1.3508621454238892, "logps/chosen": -99.21466827392578, "logps/rejected": -127.93582153320312, "loss": 0.2642, "rewards/accuracies": 1.0, "rewards/chosen": -0.3012750744819641, "rewards/margins": 4.089751243591309, "rewards/rejected": -4.391026496887207, "step": 1031 }, { "epoch": 1.66, "learning_rate": 4.533293697978597e-07, "logits/chosen": -1.4626266956329346, "logits/rejected": -1.4811855554580688, "logps/chosen": -80.46440887451172, "logps/rejected": -124.21766662597656, "loss": 0.4178, "rewards/accuracies": 1.0, "rewards/chosen": 0.02819681167602539, "rewards/margins": 4.2998456954956055, "rewards/rejected": -4.27164888381958, "step": 1032 }, { "epoch": 1.66, "learning_rate": 4.532302814110186e-07, "logits/chosen": -1.3586946725845337, "logits/rejected": -1.3178521394729614, "logps/chosen": -84.30183410644531, "logps/rejected": -113.43319702148438, "loss": 0.4027, "rewards/accuracies": 1.0, "rewards/chosen": -0.047150999307632446, "rewards/margins": 3.6903724670410156, "rewards/rejected": -3.7375235557556152, "step": 1033 }, { "epoch": 1.66, "learning_rate": 4.5313119302417757e-07, "logits/chosen": -1.518182635307312, "logits/rejected": -1.574660301208496, "logps/chosen": -116.23222351074219, "logps/rejected": -120.66960144042969, "loss": 0.2081, "rewards/accuracies": 1.0, "rewards/chosen": -0.8400304317474365, "rewards/margins": 2.0284104347229004, "rewards/rejected": -2.868440866470337, "step": 1034 }, { "epoch": 1.66, "learning_rate": 4.530321046373365e-07, "logits/chosen": -1.4003864526748657, "logits/rejected": -1.5168800354003906, "logps/chosen": -89.8826904296875, "logps/rejected": -133.73251342773438, "loss": 0.3533, "rewards/accuracies": 0.75, "rewards/chosen": -0.8977352976799011, "rewards/margins": 3.55932879447937, "rewards/rejected": -4.457064151763916, "step": 1035 }, { "epoch": 1.66, "learning_rate": 4.529330162504954e-07, "logits/chosen": -1.442535400390625, "logits/rejected": -1.3254705667495728, "logps/chosen": -72.97885131835938, "logps/rejected": -102.29803466796875, "loss": 0.2506, "rewards/accuracies": 1.0, "rewards/chosen": -0.22098512947559357, "rewards/margins": 2.9353365898132324, "rewards/rejected": -3.1563215255737305, "step": 1036 }, { "epoch": 1.66, "learning_rate": 4.528339278636544e-07, "logits/chosen": -1.3702672719955444, "logits/rejected": -1.4936965703964233, "logps/chosen": -62.087730407714844, "logps/rejected": -154.0878448486328, "loss": 0.3268, "rewards/accuracies": 1.0, "rewards/chosen": 0.31587573885917664, "rewards/margins": 7.245948791503906, "rewards/rejected": -6.930073261260986, "step": 1037 }, { "epoch": 1.67, "learning_rate": 4.527348394768133e-07, "logits/chosen": -1.4480103254318237, "logits/rejected": -1.411965250968933, "logps/chosen": -82.81105041503906, "logps/rejected": -102.310791015625, "loss": 0.3935, "rewards/accuracies": 0.75, "rewards/chosen": -0.1755250245332718, "rewards/margins": 1.6865530014038086, "rewards/rejected": -1.8620779514312744, "step": 1038 }, { "epoch": 1.67, "learning_rate": 4.5263575108997226e-07, "logits/chosen": -1.4558117389678955, "logits/rejected": -1.495263934135437, "logps/chosen": -97.67805480957031, "logps/rejected": -106.39985656738281, "loss": 0.1979, "rewards/accuracies": 1.0, "rewards/chosen": 0.482225626707077, "rewards/margins": 4.91463565826416, "rewards/rejected": -4.43241024017334, "step": 1039 }, { "epoch": 1.67, "learning_rate": 4.5253666270313117e-07, "logits/chosen": -1.5360808372497559, "logits/rejected": -1.5772955417633057, "logps/chosen": -86.48149108886719, "logps/rejected": -139.98680114746094, "loss": 0.3908, "rewards/accuracies": 1.0, "rewards/chosen": -0.23683978617191315, "rewards/margins": 3.8171653747558594, "rewards/rejected": -4.0540056228637695, "step": 1040 }, { "epoch": 1.67, "learning_rate": 4.524375743162901e-07, "logits/chosen": -1.536886215209961, "logits/rejected": -1.602403998374939, "logps/chosen": -93.90997314453125, "logps/rejected": -121.5748519897461, "loss": 0.3027, "rewards/accuracies": 1.0, "rewards/chosen": 0.40353500843048096, "rewards/margins": 3.748650312423706, "rewards/rejected": -3.3451151847839355, "step": 1041 }, { "epoch": 1.67, "learning_rate": 4.523384859294491e-07, "logits/chosen": -1.3762619495391846, "logits/rejected": -1.3634415864944458, "logps/chosen": -81.89701843261719, "logps/rejected": -155.55929565429688, "loss": 0.2559, "rewards/accuracies": 1.0, "rewards/chosen": -0.5206536650657654, "rewards/margins": 5.212940692901611, "rewards/rejected": -5.7335944175720215, "step": 1042 }, { "epoch": 1.67, "learning_rate": 4.52239397542608e-07, "logits/chosen": -1.452566146850586, "logits/rejected": -1.4392071962356567, "logps/chosen": -107.33621978759766, "logps/rejected": -116.746826171875, "loss": 0.2486, "rewards/accuracies": 0.5, "rewards/chosen": -0.5756004452705383, "rewards/margins": 1.6708297729492188, "rewards/rejected": -2.2464301586151123, "step": 1043 }, { "epoch": 1.68, "learning_rate": 4.521403091557669e-07, "logits/chosen": -1.5514459609985352, "logits/rejected": -1.47868013381958, "logps/chosen": -96.85918426513672, "logps/rejected": -128.58297729492188, "loss": 0.3398, "rewards/accuracies": 1.0, "rewards/chosen": -0.42612600326538086, "rewards/margins": 3.6576123237609863, "rewards/rejected": -4.083738327026367, "step": 1044 }, { "epoch": 1.68, "learning_rate": 4.5204122076892586e-07, "logits/chosen": -1.350089430809021, "logits/rejected": -1.439801812171936, "logps/chosen": -84.6789321899414, "logps/rejected": -134.66452026367188, "loss": 0.3003, "rewards/accuracies": 0.75, "rewards/chosen": -1.2945765256881714, "rewards/margins": 3.2298054695129395, "rewards/rejected": -4.524381637573242, "step": 1045 }, { "epoch": 1.68, "learning_rate": 4.5194213238208477e-07, "logits/chosen": -1.4133107662200928, "logits/rejected": -1.3913521766662598, "logps/chosen": -78.6299819946289, "logps/rejected": -134.9194793701172, "loss": 0.2404, "rewards/accuracies": 1.0, "rewards/chosen": -0.20943795144557953, "rewards/margins": 4.958282470703125, "rewards/rejected": -5.167720317840576, "step": 1046 }, { "epoch": 1.68, "learning_rate": 4.5184304399524373e-07, "logits/chosen": -1.3453984260559082, "logits/rejected": -1.3584482669830322, "logps/chosen": -97.62211608886719, "logps/rejected": -104.47257995605469, "loss": 0.251, "rewards/accuracies": 0.5, "rewards/chosen": -2.30606746673584, "rewards/margins": 0.358056902885437, "rewards/rejected": -2.6641244888305664, "step": 1047 }, { "epoch": 1.68, "learning_rate": 4.517439556084027e-07, "logits/chosen": -1.6222773790359497, "logits/rejected": -1.5341486930847168, "logps/chosen": -75.28490447998047, "logps/rejected": -107.370361328125, "loss": 0.2633, "rewards/accuracies": 1.0, "rewards/chosen": 0.17127564549446106, "rewards/margins": 3.4766008853912354, "rewards/rejected": -3.305325508117676, "step": 1048 }, { "epoch": 1.68, "learning_rate": 4.516448672215616e-07, "logits/chosen": -1.5295909643173218, "logits/rejected": -1.514620304107666, "logps/chosen": -93.69155883789062, "logps/rejected": -108.19889068603516, "loss": 0.2554, "rewards/accuracies": 1.0, "rewards/chosen": 0.36300355195999146, "rewards/margins": 2.3641197681427, "rewards/rejected": -2.0011165142059326, "step": 1049 }, { "epoch": 1.69, "learning_rate": 4.5154577883472056e-07, "logits/chosen": -1.283711552619934, "logits/rejected": -1.3033336400985718, "logps/chosen": -117.27137756347656, "logps/rejected": -150.84112548828125, "loss": 0.3192, "rewards/accuracies": 1.0, "rewards/chosen": 0.18366223573684692, "rewards/margins": 3.317580223083496, "rewards/rejected": -3.133917808532715, "step": 1050 }, { "epoch": 1.69, "learning_rate": 4.5144669044787946e-07, "logits/chosen": -1.3378827571868896, "logits/rejected": -1.3847885131835938, "logps/chosen": -116.90763854980469, "logps/rejected": -114.95821380615234, "loss": 0.284, "rewards/accuracies": 0.5, "rewards/chosen": -1.2768419981002808, "rewards/margins": 1.3386785984039307, "rewards/rejected": -2.615520477294922, "step": 1051 }, { "epoch": 1.69, "learning_rate": 4.513476020610384e-07, "logits/chosen": -1.3551827669143677, "logits/rejected": -1.3204442262649536, "logps/chosen": -103.71885681152344, "logps/rejected": -80.59847259521484, "loss": 0.2533, "rewards/accuracies": 0.5, "rewards/chosen": -1.6585525274276733, "rewards/margins": 0.158555269241333, "rewards/rejected": -1.817107915878296, "step": 1052 }, { "epoch": 1.69, "learning_rate": 4.512485136741974e-07, "logits/chosen": -1.430235743522644, "logits/rejected": -1.3302371501922607, "logps/chosen": -100.59490966796875, "logps/rejected": -123.57693481445312, "loss": 0.2913, "rewards/accuracies": 1.0, "rewards/chosen": -0.6153760552406311, "rewards/margins": 1.5174223184585571, "rewards/rejected": -2.132798433303833, "step": 1053 }, { "epoch": 1.69, "learning_rate": 4.511494252873563e-07, "logits/chosen": -1.262465238571167, "logits/rejected": -1.2192037105560303, "logps/chosen": -86.59632110595703, "logps/rejected": -132.90249633789062, "loss": 0.2199, "rewards/accuracies": 1.0, "rewards/chosen": -0.5873657464981079, "rewards/margins": 3.2408978939056396, "rewards/rejected": -3.828263521194458, "step": 1054 }, { "epoch": 1.69, "learning_rate": 4.5105033690051525e-07, "logits/chosen": -1.5265709161758423, "logits/rejected": -1.5036979913711548, "logps/chosen": -88.03260040283203, "logps/rejected": -95.89689636230469, "loss": 0.18, "rewards/accuracies": 0.75, "rewards/chosen": -0.35405415296554565, "rewards/margins": 0.6474675536155701, "rewards/rejected": -1.0015215873718262, "step": 1055 }, { "epoch": 1.7, "learning_rate": 4.5095124851367416e-07, "logits/chosen": -1.3549554347991943, "logits/rejected": -1.325401782989502, "logps/chosen": -92.337158203125, "logps/rejected": -135.90496826171875, "loss": 0.2754, "rewards/accuracies": 1.0, "rewards/chosen": -0.36584606766700745, "rewards/margins": 3.4960970878601074, "rewards/rejected": -3.861943006515503, "step": 1056 }, { "epoch": 1.7, "learning_rate": 4.508521601268331e-07, "logits/chosen": -1.4413080215454102, "logits/rejected": -1.4919180870056152, "logps/chosen": -106.11381530761719, "logps/rejected": -128.36026000976562, "loss": 0.2685, "rewards/accuracies": 0.75, "rewards/chosen": -0.9212981462478638, "rewards/margins": 2.254791736602783, "rewards/rejected": -3.1760897636413574, "step": 1057 }, { "epoch": 1.7, "learning_rate": 4.507530717399921e-07, "logits/chosen": -1.6301758289337158, "logits/rejected": -1.5613455772399902, "logps/chosen": -95.39458465576172, "logps/rejected": -81.304443359375, "loss": 0.295, "rewards/accuracies": 0.25, "rewards/chosen": -0.9442867636680603, "rewards/margins": -0.02147933840751648, "rewards/rejected": -0.9228074550628662, "step": 1058 }, { "epoch": 1.7, "learning_rate": 4.50653983353151e-07, "logits/chosen": -1.3225690126419067, "logits/rejected": -1.3751764297485352, "logps/chosen": -79.12251281738281, "logps/rejected": -108.90084075927734, "loss": 0.2922, "rewards/accuracies": 0.75, "rewards/chosen": -0.512040376663208, "rewards/margins": 2.5163044929504395, "rewards/rejected": -3.0283448696136475, "step": 1059 }, { "epoch": 1.7, "learning_rate": 4.5055489496630994e-07, "logits/chosen": -1.4742083549499512, "logits/rejected": -1.4695045948028564, "logps/chosen": -74.79436492919922, "logps/rejected": -96.78173828125, "loss": 0.3838, "rewards/accuracies": 1.0, "rewards/chosen": 0.04843759536743164, "rewards/margins": 1.8436591625213623, "rewards/rejected": -1.7952215671539307, "step": 1060 }, { "epoch": 1.7, "learning_rate": 4.5045580657946885e-07, "logits/chosen": -1.4232239723205566, "logits/rejected": -1.4293829202651978, "logps/chosen": -79.98274230957031, "logps/rejected": -117.50627136230469, "loss": 0.318, "rewards/accuracies": 0.75, "rewards/chosen": -0.6750813722610474, "rewards/margins": 1.3818950653076172, "rewards/rejected": -2.056976556777954, "step": 1061 }, { "epoch": 1.7, "learning_rate": 4.503567181926278e-07, "logits/chosen": -1.6783498525619507, "logits/rejected": -1.6045315265655518, "logps/chosen": -125.43904113769531, "logps/rejected": -85.48737335205078, "loss": 0.5179, "rewards/accuracies": 0.5, "rewards/chosen": -1.3787710666656494, "rewards/margins": -0.5639164447784424, "rewards/rejected": -0.814854621887207, "step": 1062 }, { "epoch": 1.71, "learning_rate": 4.5025762980578677e-07, "logits/chosen": -1.379183292388916, "logits/rejected": -1.3570401668548584, "logps/chosen": -100.41577911376953, "logps/rejected": -113.66314697265625, "loss": 0.4148, "rewards/accuracies": 0.5, "rewards/chosen": -0.2815616726875305, "rewards/margins": 1.755793571472168, "rewards/rejected": -2.0373551845550537, "step": 1063 }, { "epoch": 1.71, "learning_rate": 4.501585414189457e-07, "logits/chosen": -1.5282171964645386, "logits/rejected": -1.4297924041748047, "logps/chosen": -86.63506317138672, "logps/rejected": -107.22927856445312, "loss": 0.3617, "rewards/accuracies": 0.75, "rewards/chosen": -0.9416707754135132, "rewards/margins": 2.338803291320801, "rewards/rejected": -3.2804737091064453, "step": 1064 }, { "epoch": 1.71, "learning_rate": 4.5005945303210464e-07, "logits/chosen": -1.4039726257324219, "logits/rejected": -1.3784432411193848, "logps/chosen": -76.39154052734375, "logps/rejected": -98.84832763671875, "loss": 0.2535, "rewards/accuracies": 1.0, "rewards/chosen": -0.8477097749710083, "rewards/margins": 1.53617525100708, "rewards/rejected": -2.383884906768799, "step": 1065 }, { "epoch": 1.71, "learning_rate": 4.4996036464526354e-07, "logits/chosen": -1.3545516729354858, "logits/rejected": -1.3703863620758057, "logps/chosen": -98.21018981933594, "logps/rejected": -141.488525390625, "loss": 0.303, "rewards/accuracies": 1.0, "rewards/chosen": -0.16830329596996307, "rewards/margins": 4.347773551940918, "rewards/rejected": -4.516077041625977, "step": 1066 }, { "epoch": 1.71, "learning_rate": 4.498612762584225e-07, "logits/chosen": -1.5420668125152588, "logits/rejected": -1.4028730392456055, "logps/chosen": -104.7833251953125, "logps/rejected": -111.3916244506836, "loss": 0.2399, "rewards/accuracies": 1.0, "rewards/chosen": -1.2366269826889038, "rewards/margins": 0.6806936264038086, "rewards/rejected": -1.9173206090927124, "step": 1067 }, { "epoch": 1.71, "learning_rate": 4.497621878715814e-07, "logits/chosen": -1.4813660383224487, "logits/rejected": -1.3661673069000244, "logps/chosen": -105.32963562011719, "logps/rejected": -100.39279174804688, "loss": 0.331, "rewards/accuracies": 1.0, "rewards/chosen": 0.45572999119758606, "rewards/margins": 2.8203160762786865, "rewards/rejected": -2.364586114883423, "step": 1068 }, { "epoch": 1.72, "learning_rate": 4.4966309948474037e-07, "logits/chosen": -1.4581712484359741, "logits/rejected": -1.4627397060394287, "logps/chosen": -76.64862823486328, "logps/rejected": -90.83354187011719, "loss": 0.3431, "rewards/accuracies": 1.0, "rewards/chosen": -0.4006434381008148, "rewards/margins": 2.1035637855529785, "rewards/rejected": -2.504207134246826, "step": 1069 }, { "epoch": 1.72, "learning_rate": 4.4956401109789933e-07, "logits/chosen": -1.4435802698135376, "logits/rejected": -1.5501172542572021, "logps/chosen": -83.49974060058594, "logps/rejected": -95.44267272949219, "loss": 0.3087, "rewards/accuracies": 1.0, "rewards/chosen": -1.327453851699829, "rewards/margins": 1.3115978240966797, "rewards/rejected": -2.639051914215088, "step": 1070 }, { "epoch": 1.72, "learning_rate": 4.4946492271105824e-07, "logits/chosen": -1.3069149255752563, "logits/rejected": -1.3026816844940186, "logps/chosen": -71.71449279785156, "logps/rejected": -125.48612976074219, "loss": 0.4668, "rewards/accuracies": 0.75, "rewards/chosen": -0.8398980498313904, "rewards/margins": 3.1339049339294434, "rewards/rejected": -3.9738028049468994, "step": 1071 }, { "epoch": 1.72, "learning_rate": 4.493658343242172e-07, "logits/chosen": -1.4482446908950806, "logits/rejected": -1.4602234363555908, "logps/chosen": -82.65309143066406, "logps/rejected": -105.39080810546875, "loss": 0.2768, "rewards/accuracies": 1.0, "rewards/chosen": -0.903878390789032, "rewards/margins": 1.622797966003418, "rewards/rejected": -2.5266764163970947, "step": 1072 }, { "epoch": 1.72, "learning_rate": 4.492667459373761e-07, "logits/chosen": -1.600611925125122, "logits/rejected": -1.5449509620666504, "logps/chosen": -92.88256072998047, "logps/rejected": -121.6624984741211, "loss": 0.3896, "rewards/accuracies": 0.75, "rewards/chosen": -0.6486048102378845, "rewards/margins": 2.363903522491455, "rewards/rejected": -3.0125083923339844, "step": 1073 }, { "epoch": 1.72, "learning_rate": 4.4916765755053506e-07, "logits/chosen": -1.4748024940490723, "logits/rejected": -1.4480395317077637, "logps/chosen": -86.96412658691406, "logps/rejected": -105.46617126464844, "loss": 0.3819, "rewards/accuracies": 1.0, "rewards/chosen": -0.39156973361968994, "rewards/margins": 3.0855212211608887, "rewards/rejected": -3.477090835571289, "step": 1074 }, { "epoch": 1.73, "learning_rate": 4.49068569163694e-07, "logits/chosen": -1.464053750038147, "logits/rejected": -1.46418297290802, "logps/chosen": -80.85247802734375, "logps/rejected": -158.75973510742188, "loss": 0.3946, "rewards/accuracies": 1.0, "rewards/chosen": 0.06091576814651489, "rewards/margins": 5.31303596496582, "rewards/rejected": -5.252120018005371, "step": 1075 }, { "epoch": 1.73, "learning_rate": 4.4896948077685293e-07, "logits/chosen": -1.538384199142456, "logits/rejected": -1.5664702653884888, "logps/chosen": -114.67498779296875, "logps/rejected": -130.79258728027344, "loss": 0.4148, "rewards/accuracies": 0.75, "rewards/chosen": -1.3525482416152954, "rewards/margins": 1.1498795747756958, "rewards/rejected": -2.502427577972412, "step": 1076 }, { "epoch": 1.73, "learning_rate": 4.4887039239001184e-07, "logits/chosen": -1.5144250392913818, "logits/rejected": -1.4580241441726685, "logps/chosen": -81.31788635253906, "logps/rejected": -114.04806518554688, "loss": 0.232, "rewards/accuracies": 1.0, "rewards/chosen": -0.20634660124778748, "rewards/margins": 3.0916788578033447, "rewards/rejected": -3.298025369644165, "step": 1077 }, { "epoch": 1.73, "learning_rate": 4.487713040031708e-07, "logits/chosen": -1.3794095516204834, "logits/rejected": -1.3467565774917603, "logps/chosen": -94.88206481933594, "logps/rejected": -128.71141052246094, "loss": 0.3459, "rewards/accuracies": 0.75, "rewards/chosen": -0.6721189618110657, "rewards/margins": 2.6404361724853516, "rewards/rejected": -3.3125553131103516, "step": 1078 }, { "epoch": 1.73, "learning_rate": 4.4867221561632975e-07, "logits/chosen": -1.440730333328247, "logits/rejected": -1.5799013376235962, "logps/chosen": -68.32929229736328, "logps/rejected": -132.81112670898438, "loss": 0.2737, "rewards/accuracies": 1.0, "rewards/chosen": -0.23945817351341248, "rewards/margins": 3.8150885105133057, "rewards/rejected": -4.054546356201172, "step": 1079 }, { "epoch": 1.73, "learning_rate": 4.485731272294887e-07, "logits/chosen": -1.590339183807373, "logits/rejected": -1.6012974977493286, "logps/chosen": -83.38775634765625, "logps/rejected": -145.67654418945312, "loss": 0.3173, "rewards/accuracies": 1.0, "rewards/chosen": 0.3507019281387329, "rewards/margins": 5.332632541656494, "rewards/rejected": -4.981930732727051, "step": 1080 }, { "epoch": 1.74, "learning_rate": 4.484740388426476e-07, "logits/chosen": -1.4874387979507446, "logits/rejected": -1.4385697841644287, "logps/chosen": -94.24652099609375, "logps/rejected": -140.5498046875, "loss": 0.2392, "rewards/accuracies": 1.0, "rewards/chosen": -0.8936440348625183, "rewards/margins": 2.709390878677368, "rewards/rejected": -3.6030349731445312, "step": 1081 }, { "epoch": 1.74, "learning_rate": 4.4837495045580653e-07, "logits/chosen": -1.470646619796753, "logits/rejected": -1.4598760604858398, "logps/chosen": -86.2561264038086, "logps/rejected": -120.43228149414062, "loss": 0.2968, "rewards/accuracies": 0.75, "rewards/chosen": -0.6623983383178711, "rewards/margins": 3.943162441253662, "rewards/rejected": -4.605561256408691, "step": 1082 }, { "epoch": 1.74, "learning_rate": 4.482758620689655e-07, "logits/chosen": -1.332229495048523, "logits/rejected": -1.4012945890426636, "logps/chosen": -89.82843017578125, "logps/rejected": -164.0804443359375, "loss": 0.2837, "rewards/accuracies": 1.0, "rewards/chosen": -0.8525623679161072, "rewards/margins": 3.3464608192443848, "rewards/rejected": -4.199023246765137, "step": 1083 }, { "epoch": 1.74, "learning_rate": 4.481767736821244e-07, "logits/chosen": -1.4827896356582642, "logits/rejected": -1.4563536643981934, "logps/chosen": -75.58658599853516, "logps/rejected": -99.70442962646484, "loss": 0.5232, "rewards/accuracies": 1.0, "rewards/chosen": -0.4295731782913208, "rewards/margins": 2.659235715866089, "rewards/rejected": -3.0888092517852783, "step": 1084 }, { "epoch": 1.74, "learning_rate": 4.480776852952834e-07, "logits/chosen": -1.4456613063812256, "logits/rejected": -1.3833328485488892, "logps/chosen": -109.15028381347656, "logps/rejected": -126.31649780273438, "loss": 0.4026, "rewards/accuracies": 0.75, "rewards/chosen": -0.4069059491157532, "rewards/margins": 4.313304901123047, "rewards/rejected": -4.720211029052734, "step": 1085 }, { "epoch": 1.74, "learning_rate": 4.479785969084423e-07, "logits/chosen": -1.521033763885498, "logits/rejected": -1.5559215545654297, "logps/chosen": -70.56893920898438, "logps/rejected": -145.7672576904297, "loss": 0.2977, "rewards/accuracies": 1.0, "rewards/chosen": -0.4125838577747345, "rewards/margins": 4.364575386047363, "rewards/rejected": -4.777159690856934, "step": 1086 }, { "epoch": 1.74, "learning_rate": 4.478795085216012e-07, "logits/chosen": -1.349228858947754, "logits/rejected": -1.3225287199020386, "logps/chosen": -99.99003601074219, "logps/rejected": -121.83547973632812, "loss": 0.2852, "rewards/accuracies": 1.0, "rewards/chosen": -0.4755058288574219, "rewards/margins": 2.7556769847869873, "rewards/rejected": -3.231182813644409, "step": 1087 }, { "epoch": 1.75, "learning_rate": 4.477804201347602e-07, "logits/chosen": -1.381913185119629, "logits/rejected": -1.4258053302764893, "logps/chosen": -76.29012298583984, "logps/rejected": -96.83831787109375, "loss": 0.2796, "rewards/accuracies": 1.0, "rewards/chosen": -0.4505760371685028, "rewards/margins": 1.8614392280578613, "rewards/rejected": -2.3120152950286865, "step": 1088 }, { "epoch": 1.75, "learning_rate": 4.476813317479191e-07, "logits/chosen": -1.4700889587402344, "logits/rejected": -1.4731850624084473, "logps/chosen": -94.60760498046875, "logps/rejected": -131.19912719726562, "loss": 0.239, "rewards/accuracies": 1.0, "rewards/chosen": -0.4476810693740845, "rewards/margins": 2.7072272300720215, "rewards/rejected": -3.1549081802368164, "step": 1089 }, { "epoch": 1.75, "learning_rate": 4.475822433610781e-07, "logits/chosen": -1.578474760055542, "logits/rejected": -1.491403579711914, "logps/chosen": -69.76750946044922, "logps/rejected": -112.67369079589844, "loss": 0.1784, "rewards/accuracies": 1.0, "rewards/chosen": 0.11113031208515167, "rewards/margins": 4.865101337432861, "rewards/rejected": -4.753970623016357, "step": 1090 }, { "epoch": 1.75, "learning_rate": 4.47483154974237e-07, "logits/chosen": -1.526633620262146, "logits/rejected": -1.5314507484436035, "logps/chosen": -86.86927032470703, "logps/rejected": -106.52400970458984, "loss": 0.331, "rewards/accuracies": 0.75, "rewards/chosen": -0.5156723260879517, "rewards/margins": 1.6470139026641846, "rewards/rejected": -2.162686347961426, "step": 1091 }, { "epoch": 1.75, "learning_rate": 4.473840665873959e-07, "logits/chosen": -1.43513822555542, "logits/rejected": -1.4202666282653809, "logps/chosen": -79.26705932617188, "logps/rejected": -104.52147674560547, "loss": 0.2807, "rewards/accuracies": 0.75, "rewards/chosen": -0.08364409953355789, "rewards/margins": 3.00492000579834, "rewards/rejected": -3.088563919067383, "step": 1092 }, { "epoch": 1.75, "learning_rate": 4.472849782005549e-07, "logits/chosen": -1.3463760614395142, "logits/rejected": -1.3760218620300293, "logps/chosen": -57.15502166748047, "logps/rejected": -119.88204956054688, "loss": 0.2827, "rewards/accuracies": 1.0, "rewards/chosen": -0.16354818642139435, "rewards/margins": 3.2702715396881104, "rewards/rejected": -3.4338200092315674, "step": 1093 }, { "epoch": 1.76, "learning_rate": 4.471858898137138e-07, "logits/chosen": -1.5014312267303467, "logits/rejected": -1.4830036163330078, "logps/chosen": -107.1961669921875, "logps/rejected": -95.06756591796875, "loss": 0.2722, "rewards/accuracies": 0.75, "rewards/chosen": -0.9419006705284119, "rewards/margins": 1.2118113040924072, "rewards/rejected": -2.153712034225464, "step": 1094 }, { "epoch": 1.76, "learning_rate": 4.470868014268728e-07, "logits/chosen": -1.4704844951629639, "logits/rejected": -1.4628537893295288, "logps/chosen": -92.28192138671875, "logps/rejected": -91.93578338623047, "loss": 0.3392, "rewards/accuracies": 0.5, "rewards/chosen": -1.0040804147720337, "rewards/margins": 1.0410929918289185, "rewards/rejected": -2.0451736450195312, "step": 1095 }, { "epoch": 1.76, "learning_rate": 4.469877130400317e-07, "logits/chosen": -1.3697460889816284, "logits/rejected": -1.407015085220337, "logps/chosen": -74.2623291015625, "logps/rejected": -120.96308898925781, "loss": 0.3164, "rewards/accuracies": 1.0, "rewards/chosen": -0.13908891379833221, "rewards/margins": 3.4588427543640137, "rewards/rejected": -3.5979321002960205, "step": 1096 }, { "epoch": 1.76, "learning_rate": 4.468886246531906e-07, "logits/chosen": -1.4322913885116577, "logits/rejected": -1.4076693058013916, "logps/chosen": -93.02253723144531, "logps/rejected": -120.03521728515625, "loss": 0.4141, "rewards/accuracies": 1.0, "rewards/chosen": -1.2982065677642822, "rewards/margins": 2.507784605026245, "rewards/rejected": -3.8059911727905273, "step": 1097 }, { "epoch": 1.76, "learning_rate": 4.4678953626634957e-07, "logits/chosen": -1.3977349996566772, "logits/rejected": -1.3514240980148315, "logps/chosen": -98.68428039550781, "logps/rejected": -125.46593475341797, "loss": 0.2221, "rewards/accuracies": 1.0, "rewards/chosen": -0.6498430967330933, "rewards/margins": 3.013096809387207, "rewards/rejected": -3.6629397869110107, "step": 1098 }, { "epoch": 1.76, "learning_rate": 4.4669044787950847e-07, "logits/chosen": -1.5610103607177734, "logits/rejected": -1.5673413276672363, "logps/chosen": -130.42335510253906, "logps/rejected": -122.40464782714844, "loss": 0.2406, "rewards/accuracies": 1.0, "rewards/chosen": -1.40500009059906, "rewards/margins": 1.704103708267212, "rewards/rejected": -3.1091039180755615, "step": 1099 }, { "epoch": 1.77, "learning_rate": 4.465913594926675e-07, "logits/chosen": -1.437214970588684, "logits/rejected": -1.3925405740737915, "logps/chosen": -77.01715850830078, "logps/rejected": -119.50595092773438, "loss": 0.3254, "rewards/accuracies": 0.75, "rewards/chosen": -1.0707886219024658, "rewards/margins": 2.7880611419677734, "rewards/rejected": -3.8588497638702393, "step": 1100 }, { "epoch": 1.77, "learning_rate": 4.464922711058264e-07, "logits/chosen": -1.5650629997253418, "logits/rejected": -1.4647763967514038, "logps/chosen": -85.6757583618164, "logps/rejected": -117.74117279052734, "loss": 0.2368, "rewards/accuracies": 1.0, "rewards/chosen": -1.123727798461914, "rewards/margins": 2.451791763305664, "rewards/rejected": -3.575519561767578, "step": 1101 }, { "epoch": 1.77, "learning_rate": 4.463931827189853e-07, "logits/chosen": -1.6730221509933472, "logits/rejected": -1.6567529439926147, "logps/chosen": -121.6965103149414, "logps/rejected": -108.98847961425781, "loss": 0.2056, "rewards/accuracies": 0.75, "rewards/chosen": -0.5443521738052368, "rewards/margins": 2.574545383453369, "rewards/rejected": -3.1188974380493164, "step": 1102 }, { "epoch": 1.77, "learning_rate": 4.4629409433214426e-07, "logits/chosen": -1.2245882749557495, "logits/rejected": -1.2482190132141113, "logps/chosen": -101.35781860351562, "logps/rejected": -124.62907409667969, "loss": 0.1774, "rewards/accuracies": 1.0, "rewards/chosen": -0.7636957168579102, "rewards/margins": 2.8759989738464355, "rewards/rejected": -3.6396946907043457, "step": 1103 }, { "epoch": 1.77, "learning_rate": 4.4619500594530317e-07, "logits/chosen": -1.3786354064941406, "logits/rejected": -1.4013890027999878, "logps/chosen": -73.0827865600586, "logps/rejected": -130.68959045410156, "loss": 0.2194, "rewards/accuracies": 1.0, "rewards/chosen": -0.5975524187088013, "rewards/margins": 4.1290435791015625, "rewards/rejected": -4.726595878601074, "step": 1104 }, { "epoch": 1.77, "learning_rate": 4.460959175584622e-07, "logits/chosen": -1.4596422910690308, "logits/rejected": -1.475716471672058, "logps/chosen": -99.69544982910156, "logps/rejected": -114.70996856689453, "loss": 0.348, "rewards/accuracies": 0.75, "rewards/chosen": -0.7219473123550415, "rewards/margins": 1.53074312210083, "rewards/rejected": -2.252690315246582, "step": 1105 }, { "epoch": 1.78, "learning_rate": 4.459968291716211e-07, "logits/chosen": -1.2893527746200562, "logits/rejected": -1.3132071495056152, "logps/chosen": -82.46931457519531, "logps/rejected": -121.64779663085938, "loss": 0.2415, "rewards/accuracies": 1.0, "rewards/chosen": -1.0110448598861694, "rewards/margins": 4.144122123718262, "rewards/rejected": -5.155167102813721, "step": 1106 }, { "epoch": 1.78, "learning_rate": 4.4589774078478e-07, "logits/chosen": -1.297234058380127, "logits/rejected": -1.2998920679092407, "logps/chosen": -83.27278900146484, "logps/rejected": -108.21867370605469, "loss": 0.3584, "rewards/accuracies": 1.0, "rewards/chosen": -0.07370337843894958, "rewards/margins": 3.460516929626465, "rewards/rejected": -3.5342204570770264, "step": 1107 }, { "epoch": 1.78, "learning_rate": 4.4579865239793895e-07, "logits/chosen": -1.4301910400390625, "logits/rejected": -1.4035015106201172, "logps/chosen": -92.59555053710938, "logps/rejected": -123.40750885009766, "loss": 0.1784, "rewards/accuracies": 0.75, "rewards/chosen": -0.8181484341621399, "rewards/margins": 1.2193174362182617, "rewards/rejected": -2.037465810775757, "step": 1108 }, { "epoch": 1.78, "learning_rate": 4.4569956401109786e-07, "logits/chosen": -1.413170337677002, "logits/rejected": -1.388722538948059, "logps/chosen": -100.48464965820312, "logps/rejected": -124.29618835449219, "loss": 0.2461, "rewards/accuracies": 1.0, "rewards/chosen": -0.6696633100509644, "rewards/margins": 2.691835641860962, "rewards/rejected": -3.3614988327026367, "step": 1109 }, { "epoch": 1.78, "learning_rate": 4.456004756242568e-07, "logits/chosen": -1.4763774871826172, "logits/rejected": -1.4763110876083374, "logps/chosen": -75.666259765625, "logps/rejected": -115.72643280029297, "loss": 0.2558, "rewards/accuracies": 1.0, "rewards/chosen": -0.1455482542514801, "rewards/margins": 3.333186626434326, "rewards/rejected": -3.4787349700927734, "step": 1110 }, { "epoch": 1.78, "learning_rate": 4.455013872374158e-07, "logits/chosen": -1.5698230266571045, "logits/rejected": -1.5455045700073242, "logps/chosen": -126.57763671875, "logps/rejected": -131.6348876953125, "loss": 0.2967, "rewards/accuracies": 1.0, "rewards/chosen": -0.7747678756713867, "rewards/margins": 1.7775055170059204, "rewards/rejected": -2.5522732734680176, "step": 1111 }, { "epoch": 1.78, "learning_rate": 4.454022988505747e-07, "logits/chosen": -1.5321733951568604, "logits/rejected": -1.5652269124984741, "logps/chosen": -95.04378509521484, "logps/rejected": -126.42357635498047, "loss": 0.1919, "rewards/accuracies": 1.0, "rewards/chosen": -0.9762493371963501, "rewards/margins": 2.4454522132873535, "rewards/rejected": -3.421701669692993, "step": 1112 }, { "epoch": 1.79, "learning_rate": 4.4530321046373365e-07, "logits/chosen": -1.5584806203842163, "logits/rejected": -1.5548278093338013, "logps/chosen": -82.56033325195312, "logps/rejected": -144.74102783203125, "loss": 0.2406, "rewards/accuracies": 1.0, "rewards/chosen": -0.48829445242881775, "rewards/margins": 3.8273651599884033, "rewards/rejected": -4.315659523010254, "step": 1113 }, { "epoch": 1.79, "learning_rate": 4.4520412207689255e-07, "logits/chosen": -1.426113486289978, "logits/rejected": -1.440136194229126, "logps/chosen": -111.581298828125, "logps/rejected": -136.80992126464844, "loss": 0.2704, "rewards/accuracies": 0.5, "rewards/chosen": -1.1083316802978516, "rewards/margins": 1.2845232486724854, "rewards/rejected": -2.392854928970337, "step": 1114 }, { "epoch": 1.79, "learning_rate": 4.4510503369005146e-07, "logits/chosen": -1.4045774936676025, "logits/rejected": -1.3577958345413208, "logps/chosen": -104.32609558105469, "logps/rejected": -135.82655334472656, "loss": 0.2046, "rewards/accuracies": 1.0, "rewards/chosen": 0.26910191774368286, "rewards/margins": 4.609500408172607, "rewards/rejected": -4.34039831161499, "step": 1115 }, { "epoch": 1.79, "learning_rate": 4.4500594530321047e-07, "logits/chosen": -1.506330132484436, "logits/rejected": -1.4744391441345215, "logps/chosen": -82.18101501464844, "logps/rejected": -106.8227310180664, "loss": 0.4005, "rewards/accuracies": 1.0, "rewards/chosen": 0.5574297308921814, "rewards/margins": 4.268188953399658, "rewards/rejected": -3.710759162902832, "step": 1116 }, { "epoch": 1.79, "learning_rate": 4.449068569163694e-07, "logits/chosen": -1.369265079498291, "logits/rejected": -1.416106104850769, "logps/chosen": -100.55365753173828, "logps/rejected": -112.09442901611328, "loss": 0.2224, "rewards/accuracies": 0.5, "rewards/chosen": -2.40224552154541, "rewards/margins": 1.5883125066757202, "rewards/rejected": -3.99055814743042, "step": 1117 }, { "epoch": 1.79, "learning_rate": 4.4480776852952834e-07, "logits/chosen": -1.5501878261566162, "logits/rejected": -1.5113954544067383, "logps/chosen": -109.77569580078125, "logps/rejected": -131.84716796875, "loss": 0.3382, "rewards/accuracies": 0.75, "rewards/chosen": -1.8075101375579834, "rewards/margins": 1.9141361713409424, "rewards/rejected": -3.721646308898926, "step": 1118 }, { "epoch": 1.8, "learning_rate": 4.4470868014268725e-07, "logits/chosen": -1.4656833410263062, "logits/rejected": -1.4832531213760376, "logps/chosen": -106.02562713623047, "logps/rejected": -136.04916381835938, "loss": 0.3436, "rewards/accuracies": 1.0, "rewards/chosen": 0.14781494438648224, "rewards/margins": 4.71645975112915, "rewards/rejected": -4.5686445236206055, "step": 1119 }, { "epoch": 1.8, "learning_rate": 4.4460959175584615e-07, "logits/chosen": -1.4737012386322021, "logits/rejected": -1.5346091985702515, "logps/chosen": -101.71392822265625, "logps/rejected": -116.49826049804688, "loss": 0.1841, "rewards/accuracies": 1.0, "rewards/chosen": -0.7643362283706665, "rewards/margins": 1.8072431087493896, "rewards/rejected": -2.5715794563293457, "step": 1120 }, { "epoch": 1.8, "learning_rate": 4.4451050336900516e-07, "logits/chosen": -1.734053134918213, "logits/rejected": -1.7957476377487183, "logps/chosen": -79.16026306152344, "logps/rejected": -130.35885620117188, "loss": 0.2557, "rewards/accuracies": 1.0, "rewards/chosen": -0.31450358033180237, "rewards/margins": 5.348422050476074, "rewards/rejected": -5.662926197052002, "step": 1121 }, { "epoch": 1.8, "learning_rate": 4.4441141498216407e-07, "logits/chosen": -1.588083028793335, "logits/rejected": -1.5142014026641846, "logps/chosen": -119.87068176269531, "logps/rejected": -118.298095703125, "loss": 0.2849, "rewards/accuracies": 0.5, "rewards/chosen": -1.2999893426895142, "rewards/margins": 0.9374510645866394, "rewards/rejected": -2.237440347671509, "step": 1122 }, { "epoch": 1.8, "learning_rate": 4.4431232659532303e-07, "logits/chosen": -1.384404182434082, "logits/rejected": -1.4901758432388306, "logps/chosen": -88.91020965576172, "logps/rejected": -123.14022827148438, "loss": 0.3611, "rewards/accuracies": 0.75, "rewards/chosen": -1.0145708322525024, "rewards/margins": 2.124969244003296, "rewards/rejected": -3.139539957046509, "step": 1123 }, { "epoch": 1.8, "learning_rate": 4.4421323820848194e-07, "logits/chosen": -1.3161647319793701, "logits/rejected": -1.2584619522094727, "logps/chosen": -125.53278350830078, "logps/rejected": -118.26549530029297, "loss": 0.442, "rewards/accuracies": 1.0, "rewards/chosen": -1.0059583187103271, "rewards/margins": 3.3712921142578125, "rewards/rejected": -4.377250671386719, "step": 1124 }, { "epoch": 1.81, "learning_rate": 4.4411414982164085e-07, "logits/chosen": -1.4043500423431396, "logits/rejected": -1.3250643014907837, "logps/chosen": -89.66082763671875, "logps/rejected": -107.77639770507812, "loss": 0.3072, "rewards/accuracies": 1.0, "rewards/chosen": -0.16894254088401794, "rewards/margins": 3.3816473484039307, "rewards/rejected": -3.5505897998809814, "step": 1125 }, { "epoch": 1.81, "learning_rate": 4.440150614347998e-07, "logits/chosen": -1.3786492347717285, "logits/rejected": -1.3569953441619873, "logps/chosen": -131.62857055664062, "logps/rejected": -144.17645263671875, "loss": 0.2589, "rewards/accuracies": 0.75, "rewards/chosen": -0.5294662714004517, "rewards/margins": 2.5310564041137695, "rewards/rejected": -3.0605227947235107, "step": 1126 }, { "epoch": 1.81, "learning_rate": 4.4391597304795876e-07, "logits/chosen": -1.3096436262130737, "logits/rejected": -1.3075162172317505, "logps/chosen": -104.18170928955078, "logps/rejected": -109.2806167602539, "loss": 0.4172, "rewards/accuracies": 0.75, "rewards/chosen": -1.0973542928695679, "rewards/margins": 0.8495367765426636, "rewards/rejected": -1.946890950202942, "step": 1127 }, { "epoch": 1.81, "learning_rate": 4.438168846611177e-07, "logits/chosen": -1.5556668043136597, "logits/rejected": -1.526099681854248, "logps/chosen": -89.51115417480469, "logps/rejected": -119.6861801147461, "loss": 0.2982, "rewards/accuracies": 1.0, "rewards/chosen": 0.2201475203037262, "rewards/margins": 2.9350225925445557, "rewards/rejected": -2.7148749828338623, "step": 1128 }, { "epoch": 1.81, "learning_rate": 4.4371779627427663e-07, "logits/chosen": -1.4071581363677979, "logits/rejected": -1.3497520685195923, "logps/chosen": -83.18069458007812, "logps/rejected": -99.1688461303711, "loss": 0.3109, "rewards/accuracies": 1.0, "rewards/chosen": -0.6145305037498474, "rewards/margins": 2.515110492706299, "rewards/rejected": -3.129640817642212, "step": 1129 }, { "epoch": 1.81, "learning_rate": 4.4361870788743554e-07, "logits/chosen": -1.6019237041473389, "logits/rejected": -1.585770845413208, "logps/chosen": -93.832763671875, "logps/rejected": -140.34056091308594, "loss": 0.3235, "rewards/accuracies": 0.75, "rewards/chosen": -0.3641242980957031, "rewards/margins": 4.994027137756348, "rewards/rejected": -5.358151435852051, "step": 1130 }, { "epoch": 1.82, "learning_rate": 4.435196195005945e-07, "logits/chosen": -1.541964054107666, "logits/rejected": -1.5506433248519897, "logps/chosen": -98.80640411376953, "logps/rejected": -125.52391052246094, "loss": 0.412, "rewards/accuracies": 0.75, "rewards/chosen": -0.6522318124771118, "rewards/margins": 3.1384477615356445, "rewards/rejected": -3.790679454803467, "step": 1131 }, { "epoch": 1.82, "learning_rate": 4.4342053111375346e-07, "logits/chosen": -1.3447757959365845, "logits/rejected": -1.4185353517532349, "logps/chosen": -129.2393341064453, "logps/rejected": -111.12335205078125, "loss": 0.4856, "rewards/accuracies": 0.75, "rewards/chosen": -1.0187926292419434, "rewards/margins": 0.2227599322795868, "rewards/rejected": -1.2415525913238525, "step": 1132 }, { "epoch": 1.82, "learning_rate": 4.433214427269124e-07, "logits/chosen": -1.3354644775390625, "logits/rejected": -1.343255877494812, "logps/chosen": -89.66532897949219, "logps/rejected": -142.23681640625, "loss": 0.21, "rewards/accuracies": 1.0, "rewards/chosen": -0.41055622696876526, "rewards/margins": 4.547921180725098, "rewards/rejected": -4.958477973937988, "step": 1133 }, { "epoch": 1.82, "learning_rate": 4.432223543400713e-07, "logits/chosen": -1.4616037607192993, "logits/rejected": -1.4725106954574585, "logps/chosen": -87.1032485961914, "logps/rejected": -111.62832641601562, "loss": 0.1409, "rewards/accuracies": 1.0, "rewards/chosen": -0.14945009350776672, "rewards/margins": 3.4181478023529053, "rewards/rejected": -3.5675978660583496, "step": 1134 }, { "epoch": 1.82, "learning_rate": 4.4312326595323023e-07, "logits/chosen": -1.463274598121643, "logits/rejected": -1.4280974864959717, "logps/chosen": -103.09146118164062, "logps/rejected": -118.93177032470703, "loss": 0.2004, "rewards/accuracies": 0.75, "rewards/chosen": -0.5732893943786621, "rewards/margins": 1.032784104347229, "rewards/rejected": -1.6060736179351807, "step": 1135 }, { "epoch": 1.82, "learning_rate": 4.430241775663892e-07, "logits/chosen": -1.410856008529663, "logits/rejected": -1.511013150215149, "logps/chosen": -104.16395568847656, "logps/rejected": -124.48725128173828, "loss": 0.3189, "rewards/accuracies": 0.75, "rewards/chosen": -0.1084146574139595, "rewards/margins": 2.943117380142212, "rewards/rejected": -3.051532030105591, "step": 1136 }, { "epoch": 1.83, "learning_rate": 4.4292508917954815e-07, "logits/chosen": -1.3448143005371094, "logits/rejected": -1.364131212234497, "logps/chosen": -74.6818618774414, "logps/rejected": -106.64903259277344, "loss": 0.1943, "rewards/accuracies": 1.0, "rewards/chosen": -0.185372456908226, "rewards/margins": 1.9979119300842285, "rewards/rejected": -2.183284282684326, "step": 1137 }, { "epoch": 1.83, "learning_rate": 4.428260007927071e-07, "logits/chosen": -1.6170040369033813, "logits/rejected": -1.4395461082458496, "logps/chosen": -121.81144714355469, "logps/rejected": -112.10791015625, "loss": 0.3195, "rewards/accuracies": 1.0, "rewards/chosen": 0.08702030777931213, "rewards/margins": 2.5545389652252197, "rewards/rejected": -2.4675185680389404, "step": 1138 }, { "epoch": 1.83, "learning_rate": 4.42726912405866e-07, "logits/chosen": -1.4348466396331787, "logits/rejected": -1.3767318725585938, "logps/chosen": -87.33828735351562, "logps/rejected": -127.47589111328125, "loss": 0.2368, "rewards/accuracies": 1.0, "rewards/chosen": -0.17514675855636597, "rewards/margins": 4.8033671379089355, "rewards/rejected": -4.978513717651367, "step": 1139 }, { "epoch": 1.83, "learning_rate": 4.426278240190249e-07, "logits/chosen": -1.611411213874817, "logits/rejected": -1.6266846656799316, "logps/chosen": -89.77645874023438, "logps/rejected": -128.29222106933594, "loss": 0.3166, "rewards/accuracies": 1.0, "rewards/chosen": -0.6310341954231262, "rewards/margins": 3.7073488235473633, "rewards/rejected": -4.338383197784424, "step": 1140 }, { "epoch": 1.83, "learning_rate": 4.425287356321839e-07, "logits/chosen": -1.3666199445724487, "logits/rejected": -1.3877220153808594, "logps/chosen": -74.13837432861328, "logps/rejected": -108.85874938964844, "loss": 0.2623, "rewards/accuracies": 1.0, "rewards/chosen": -0.5633593797683716, "rewards/margins": 2.4729535579681396, "rewards/rejected": -3.036313056945801, "step": 1141 }, { "epoch": 1.83, "learning_rate": 4.4242964724534284e-07, "logits/chosen": -1.2901839017868042, "logits/rejected": -1.351101040840149, "logps/chosen": -91.44749450683594, "logps/rejected": -134.0978240966797, "loss": 0.1988, "rewards/accuracies": 1.0, "rewards/chosen": -0.1326790750026703, "rewards/margins": 3.1114661693573, "rewards/rejected": -3.244145393371582, "step": 1142 }, { "epoch": 1.83, "learning_rate": 4.423305588585018e-07, "logits/chosen": -1.5362646579742432, "logits/rejected": -1.5638731718063354, "logps/chosen": -107.24700927734375, "logps/rejected": -118.99673461914062, "loss": 0.2757, "rewards/accuracies": 0.75, "rewards/chosen": -0.27695101499557495, "rewards/margins": 1.2578264474868774, "rewards/rejected": -1.5347775220870972, "step": 1143 }, { "epoch": 1.84, "learning_rate": 4.422314704716607e-07, "logits/chosen": -1.5572936534881592, "logits/rejected": -1.5107938051223755, "logps/chosen": -87.86715698242188, "logps/rejected": -157.091552734375, "loss": 0.2193, "rewards/accuracies": 1.0, "rewards/chosen": -0.053384698927402496, "rewards/margins": 5.0714802742004395, "rewards/rejected": -5.124865531921387, "step": 1144 }, { "epoch": 1.84, "learning_rate": 4.421323820848196e-07, "logits/chosen": -1.6188743114471436, "logits/rejected": -1.6524907350540161, "logps/chosen": -115.95286560058594, "logps/rejected": -129.67098999023438, "loss": 0.3088, "rewards/accuracies": 0.75, "rewards/chosen": -0.3764764666557312, "rewards/margins": 1.1846709251403809, "rewards/rejected": -1.5611473321914673, "step": 1145 }, { "epoch": 1.84, "learning_rate": 4.420332936979786e-07, "logits/chosen": -1.4773540496826172, "logits/rejected": -1.4369218349456787, "logps/chosen": -92.95063018798828, "logps/rejected": -131.6063690185547, "loss": 0.2607, "rewards/accuracies": 1.0, "rewards/chosen": -0.1739906370639801, "rewards/margins": 2.9875521659851074, "rewards/rejected": -3.1615428924560547, "step": 1146 }, { "epoch": 1.84, "learning_rate": 4.419342053111375e-07, "logits/chosen": -1.4959989786148071, "logits/rejected": -1.5219298601150513, "logps/chosen": -113.57337951660156, "logps/rejected": -128.2231903076172, "loss": 0.2603, "rewards/accuracies": 0.75, "rewards/chosen": -1.2344778776168823, "rewards/margins": 1.5401537418365479, "rewards/rejected": -2.7746315002441406, "step": 1147 }, { "epoch": 1.84, "learning_rate": 4.4183511692429644e-07, "logits/chosen": -1.2300662994384766, "logits/rejected": -1.2637790441513062, "logps/chosen": -97.12615203857422, "logps/rejected": -157.53573608398438, "loss": 0.1482, "rewards/accuracies": 1.0, "rewards/chosen": -0.4664745330810547, "rewards/margins": 4.387078285217285, "rewards/rejected": -4.85355281829834, "step": 1148 }, { "epoch": 1.84, "learning_rate": 4.417360285374554e-07, "logits/chosen": -1.4424107074737549, "logits/rejected": -1.4952259063720703, "logps/chosen": -64.2048568725586, "logps/rejected": -137.88482666015625, "loss": 0.1871, "rewards/accuracies": 1.0, "rewards/chosen": 0.23397760093212128, "rewards/margins": 5.420592308044434, "rewards/rejected": -5.186614990234375, "step": 1149 }, { "epoch": 1.85, "learning_rate": 4.416369401506143e-07, "logits/chosen": -1.4054783582687378, "logits/rejected": -1.4213998317718506, "logps/chosen": -95.42218780517578, "logps/rejected": -118.47805786132812, "loss": 0.3047, "rewards/accuracies": 0.75, "rewards/chosen": 0.11318788677453995, "rewards/margins": 3.284956216812134, "rewards/rejected": -3.1717681884765625, "step": 1150 }, { "epoch": 1.85, "learning_rate": 4.4153785176377327e-07, "logits/chosen": -1.436145544052124, "logits/rejected": -1.408247709274292, "logps/chosen": -104.36213684082031, "logps/rejected": -110.92447662353516, "loss": 0.3663, "rewards/accuracies": 0.5, "rewards/chosen": -1.4875376224517822, "rewards/margins": 0.5683408379554749, "rewards/rejected": -2.0558786392211914, "step": 1151 }, { "epoch": 1.85, "learning_rate": 4.414387633769322e-07, "logits/chosen": -1.4633475542068481, "logits/rejected": -1.4694527387619019, "logps/chosen": -103.59783935546875, "logps/rejected": -109.9482421875, "loss": 0.3141, "rewards/accuracies": 0.75, "rewards/chosen": -0.0534193217754364, "rewards/margins": 1.4282761812210083, "rewards/rejected": -1.4816954135894775, "step": 1152 }, { "epoch": 1.85, "learning_rate": 4.4133967499009114e-07, "logits/chosen": -1.469909906387329, "logits/rejected": -1.544114351272583, "logps/chosen": -89.6146240234375, "logps/rejected": -159.44137573242188, "loss": 0.2757, "rewards/accuracies": 1.0, "rewards/chosen": -0.5161523222923279, "rewards/margins": 5.252115249633789, "rewards/rejected": -5.768267631530762, "step": 1153 }, { "epoch": 1.85, "learning_rate": 4.412405866032501e-07, "logits/chosen": -1.4402068853378296, "logits/rejected": -1.442682147026062, "logps/chosen": -84.98143768310547, "logps/rejected": -106.58256530761719, "loss": 0.246, "rewards/accuracies": 1.0, "rewards/chosen": -0.1329687088727951, "rewards/margins": 1.7999500036239624, "rewards/rejected": -1.9329187870025635, "step": 1154 }, { "epoch": 1.85, "learning_rate": 4.41141498216409e-07, "logits/chosen": -1.2598341703414917, "logits/rejected": -1.2805542945861816, "logps/chosen": -114.25552368164062, "logps/rejected": -148.3730010986328, "loss": 0.2338, "rewards/accuracies": 1.0, "rewards/chosen": -0.3386014997959137, "rewards/margins": 3.543799638748169, "rewards/rejected": -3.88240122795105, "step": 1155 }, { "epoch": 1.86, "learning_rate": 4.4104240982956796e-07, "logits/chosen": -1.4567644596099854, "logits/rejected": -1.436119556427002, "logps/chosen": -99.5984115600586, "logps/rejected": -150.86492919921875, "loss": 0.1541, "rewards/accuracies": 1.0, "rewards/chosen": -0.6485260128974915, "rewards/margins": 4.840339183807373, "rewards/rejected": -5.488865375518799, "step": 1156 }, { "epoch": 1.86, "learning_rate": 4.4094332144272687e-07, "logits/chosen": -1.5326189994812012, "logits/rejected": -1.4579757452011108, "logps/chosen": -85.36509704589844, "logps/rejected": -118.99436950683594, "loss": 0.2472, "rewards/accuracies": 1.0, "rewards/chosen": -0.22316496074199677, "rewards/margins": 3.429727077484131, "rewards/rejected": -3.6528921127319336, "step": 1157 }, { "epoch": 1.86, "learning_rate": 4.4084423305588583e-07, "logits/chosen": -1.4845690727233887, "logits/rejected": -1.5604444742202759, "logps/chosen": -90.93140411376953, "logps/rejected": -116.77921295166016, "loss": 0.435, "rewards/accuracies": 1.0, "rewards/chosen": -0.044199369847774506, "rewards/margins": 1.256335973739624, "rewards/rejected": -1.3005354404449463, "step": 1158 }, { "epoch": 1.86, "learning_rate": 4.407451446690448e-07, "logits/chosen": -1.3148713111877441, "logits/rejected": -1.3215723037719727, "logps/chosen": -90.12710571289062, "logps/rejected": -129.8048553466797, "loss": 0.3258, "rewards/accuracies": 0.75, "rewards/chosen": -0.5240349173545837, "rewards/margins": 3.599539279937744, "rewards/rejected": -4.123574256896973, "step": 1159 }, { "epoch": 1.86, "learning_rate": 4.406460562822037e-07, "logits/chosen": -1.5579198598861694, "logits/rejected": -1.5163450241088867, "logps/chosen": -81.92881774902344, "logps/rejected": -103.28079986572266, "loss": 0.3282, "rewards/accuracies": 1.0, "rewards/chosen": -0.15590152144432068, "rewards/margins": 1.3994874954223633, "rewards/rejected": -1.555389165878296, "step": 1160 }, { "epoch": 1.86, "learning_rate": 4.4054696789536266e-07, "logits/chosen": -1.308337688446045, "logits/rejected": -1.3262479305267334, "logps/chosen": -92.2761459350586, "logps/rejected": -116.37548065185547, "loss": 0.2404, "rewards/accuracies": 0.75, "rewards/chosen": -0.7341585159301758, "rewards/margins": 2.045443058013916, "rewards/rejected": -2.779601573944092, "step": 1161 }, { "epoch": 1.87, "learning_rate": 4.4044787950852156e-07, "logits/chosen": -1.3823343515396118, "logits/rejected": -1.424808382987976, "logps/chosen": -98.17412567138672, "logps/rejected": -127.60133361816406, "loss": 0.2772, "rewards/accuracies": 0.75, "rewards/chosen": -0.3343179225921631, "rewards/margins": 2.9977846145629883, "rewards/rejected": -3.3321025371551514, "step": 1162 }, { "epoch": 1.87, "learning_rate": 4.403487911216805e-07, "logits/chosen": -1.581839680671692, "logits/rejected": -1.5514025688171387, "logps/chosen": -99.49407196044922, "logps/rejected": -150.81179809570312, "loss": 0.2488, "rewards/accuracies": 1.0, "rewards/chosen": -1.2450356483459473, "rewards/margins": 4.203108787536621, "rewards/rejected": -5.44814395904541, "step": 1163 }, { "epoch": 1.87, "learning_rate": 4.402497027348395e-07, "logits/chosen": -1.5067238807678223, "logits/rejected": -1.4321720600128174, "logps/chosen": -114.78170776367188, "logps/rejected": -115.44966888427734, "loss": 0.3481, "rewards/accuracies": 1.0, "rewards/chosen": -0.8183711767196655, "rewards/margins": 2.0778887271881104, "rewards/rejected": -2.8962597846984863, "step": 1164 }, { "epoch": 1.87, "learning_rate": 4.401506143479984e-07, "logits/chosen": -1.4995510578155518, "logits/rejected": -1.4862397909164429, "logps/chosen": -91.75689697265625, "logps/rejected": -116.84862518310547, "loss": 0.2571, "rewards/accuracies": 0.75, "rewards/chosen": -0.09284163266420364, "rewards/margins": 2.978724479675293, "rewards/rejected": -3.071566104888916, "step": 1165 }, { "epoch": 1.87, "learning_rate": 4.4005152596115735e-07, "logits/chosen": -1.406922459602356, "logits/rejected": -1.3857518434524536, "logps/chosen": -106.92034149169922, "logps/rejected": -117.32227325439453, "loss": 0.2817, "rewards/accuracies": 0.75, "rewards/chosen": -1.2269392013549805, "rewards/margins": 1.7019212245941162, "rewards/rejected": -2.9288604259490967, "step": 1166 }, { "epoch": 1.87, "learning_rate": 4.3995243757431626e-07, "logits/chosen": -1.4643381834030151, "logits/rejected": -1.4687392711639404, "logps/chosen": -90.43661499023438, "logps/rejected": -104.12216186523438, "loss": 0.2751, "rewards/accuracies": 0.75, "rewards/chosen": -0.6796445250511169, "rewards/margins": 2.532625675201416, "rewards/rejected": -3.2122702598571777, "step": 1167 }, { "epoch": 1.87, "learning_rate": 4.3985334918747516e-07, "logits/chosen": -1.3387072086334229, "logits/rejected": -1.3641663789749146, "logps/chosen": -75.56430053710938, "logps/rejected": -97.12013244628906, "loss": 0.2837, "rewards/accuracies": 1.0, "rewards/chosen": -0.5694966316223145, "rewards/margins": 2.289414405822754, "rewards/rejected": -2.8589110374450684, "step": 1168 }, { "epoch": 1.88, "learning_rate": 4.397542608006342e-07, "logits/chosen": -1.5141063928604126, "logits/rejected": -1.517671823501587, "logps/chosen": -88.77940368652344, "logps/rejected": -114.4530029296875, "loss": 0.377, "rewards/accuracies": 1.0, "rewards/chosen": -0.45236361026763916, "rewards/margins": 3.090421199798584, "rewards/rejected": -3.5427846908569336, "step": 1169 }, { "epoch": 1.88, "learning_rate": 4.396551724137931e-07, "logits/chosen": -1.4767060279846191, "logits/rejected": -1.534056305885315, "logps/chosen": -90.92230224609375, "logps/rejected": -134.12405395507812, "loss": 0.2503, "rewards/accuracies": 1.0, "rewards/chosen": 0.24561309814453125, "rewards/margins": 4.372223854064941, "rewards/rejected": -4.12661075592041, "step": 1170 }, { "epoch": 1.88, "learning_rate": 4.3955608402695204e-07, "logits/chosen": -1.469968557357788, "logits/rejected": -1.4431248903274536, "logps/chosen": -89.18746948242188, "logps/rejected": -163.82220458984375, "loss": 0.3064, "rewards/accuracies": 1.0, "rewards/chosen": 0.21563082933425903, "rewards/margins": 4.211465358734131, "rewards/rejected": -3.9958345890045166, "step": 1171 }, { "epoch": 1.88, "learning_rate": 4.3945699564011095e-07, "logits/chosen": -1.3797200918197632, "logits/rejected": -1.3380579948425293, "logps/chosen": -101.0837173461914, "logps/rejected": -100.41899108886719, "loss": 0.2941, "rewards/accuracies": 0.5, "rewards/chosen": -1.1465388536453247, "rewards/margins": 0.983749508857727, "rewards/rejected": -2.1302883625030518, "step": 1172 }, { "epoch": 1.88, "learning_rate": 4.3935790725326986e-07, "logits/chosen": -1.4740022420883179, "logits/rejected": -1.4680290222167969, "logps/chosen": -92.69856262207031, "logps/rejected": -111.7843246459961, "loss": 0.3638, "rewards/accuracies": 0.5, "rewards/chosen": -1.9018385410308838, "rewards/margins": 0.5402414798736572, "rewards/rejected": -2.442080020904541, "step": 1173 }, { "epoch": 1.88, "learning_rate": 4.3925881886642887e-07, "logits/chosen": -1.492368221282959, "logits/rejected": -1.536002516746521, "logps/chosen": -96.66949462890625, "logps/rejected": -135.04945373535156, "loss": 0.2372, "rewards/accuracies": 1.0, "rewards/chosen": -0.373558908700943, "rewards/margins": 3.316476583480835, "rewards/rejected": -3.690035581588745, "step": 1174 }, { "epoch": 1.89, "learning_rate": 4.391597304795878e-07, "logits/chosen": -1.6056745052337646, "logits/rejected": -1.5846647024154663, "logps/chosen": -96.71900939941406, "logps/rejected": -144.92958068847656, "loss": 0.2513, "rewards/accuracies": 1.0, "rewards/chosen": -0.3814198672771454, "rewards/margins": 3.488560438156128, "rewards/rejected": -3.8699803352355957, "step": 1175 }, { "epoch": 1.89, "learning_rate": 4.3906064209274673e-07, "logits/chosen": -1.3526395559310913, "logits/rejected": -1.2676095962524414, "logps/chosen": -104.36775970458984, "logps/rejected": -116.3917007446289, "loss": 0.2284, "rewards/accuracies": 1.0, "rewards/chosen": -1.0459997653961182, "rewards/margins": 1.393481731414795, "rewards/rejected": -2.439481496810913, "step": 1176 }, { "epoch": 1.89, "learning_rate": 4.3896155370590564e-07, "logits/chosen": -1.3678573369979858, "logits/rejected": -1.2836650609970093, "logps/chosen": -77.66122436523438, "logps/rejected": -131.73995971679688, "loss": 0.2468, "rewards/accuracies": 1.0, "rewards/chosen": -0.6688933372497559, "rewards/margins": 2.3928449153900146, "rewards/rejected": -3.0617382526397705, "step": 1177 }, { "epoch": 1.89, "learning_rate": 4.3886246531906455e-07, "logits/chosen": -1.380350112915039, "logits/rejected": -1.3832687139511108, "logps/chosen": -94.66304016113281, "logps/rejected": -120.02565002441406, "loss": 0.2431, "rewards/accuracies": 1.0, "rewards/chosen": -0.2853204905986786, "rewards/margins": 3.1459436416625977, "rewards/rejected": -3.4312639236450195, "step": 1178 }, { "epoch": 1.89, "learning_rate": 4.3876337693222356e-07, "logits/chosen": -1.4767565727233887, "logits/rejected": -1.4750235080718994, "logps/chosen": -60.376197814941406, "logps/rejected": -127.66610717773438, "loss": 0.1602, "rewards/accuracies": 1.0, "rewards/chosen": 0.08595733344554901, "rewards/margins": 5.693400859832764, "rewards/rejected": -5.607443332672119, "step": 1179 }, { "epoch": 1.89, "learning_rate": 4.3866428854538247e-07, "logits/chosen": -1.5496578216552734, "logits/rejected": -1.5842692852020264, "logps/chosen": -82.91809844970703, "logps/rejected": -116.15814208984375, "loss": 0.354, "rewards/accuracies": 0.75, "rewards/chosen": -0.4345642328262329, "rewards/margins": 3.0495121479034424, "rewards/rejected": -3.484076499938965, "step": 1180 }, { "epoch": 1.9, "learning_rate": 4.3856520015854143e-07, "logits/chosen": -1.4776942729949951, "logits/rejected": -1.3530009984970093, "logps/chosen": -89.12120056152344, "logps/rejected": -132.88653564453125, "loss": 0.2379, "rewards/accuracies": 1.0, "rewards/chosen": -0.3749876320362091, "rewards/margins": 4.288053512573242, "rewards/rejected": -4.663041591644287, "step": 1181 }, { "epoch": 1.9, "learning_rate": 4.3846611177170033e-07, "logits/chosen": -1.582237720489502, "logits/rejected": -1.5600926876068115, "logps/chosen": -91.9615707397461, "logps/rejected": -131.00454711914062, "loss": 0.2929, "rewards/accuracies": 0.75, "rewards/chosen": -0.6899434328079224, "rewards/margins": 1.436309814453125, "rewards/rejected": -2.126253128051758, "step": 1182 }, { "epoch": 1.9, "learning_rate": 4.3836702338485924e-07, "logits/chosen": -1.2876200675964355, "logits/rejected": -1.5342828035354614, "logps/chosen": -84.59527587890625, "logps/rejected": -142.63845825195312, "loss": 0.4152, "rewards/accuracies": 1.0, "rewards/chosen": 0.6641427874565125, "rewards/margins": 5.849085807800293, "rewards/rejected": -5.184943199157715, "step": 1183 }, { "epoch": 1.9, "learning_rate": 4.3826793499801825e-07, "logits/chosen": -1.3560165166854858, "logits/rejected": -1.2364223003387451, "logps/chosen": -90.56877136230469, "logps/rejected": -119.66017150878906, "loss": 0.2816, "rewards/accuracies": 0.75, "rewards/chosen": -0.7133146524429321, "rewards/margins": 2.9192657470703125, "rewards/rejected": -3.632580280303955, "step": 1184 }, { "epoch": 1.9, "learning_rate": 4.3816884661117716e-07, "logits/chosen": -1.4050040245056152, "logits/rejected": -1.3800404071807861, "logps/chosen": -89.04590606689453, "logps/rejected": -109.23188781738281, "loss": 0.3454, "rewards/accuracies": 1.0, "rewards/chosen": -0.45159685611724854, "rewards/margins": 3.573042631149292, "rewards/rejected": -4.02463960647583, "step": 1185 }, { "epoch": 1.9, "learning_rate": 4.3806975822433607e-07, "logits/chosen": -1.356734037399292, "logits/rejected": -1.3993432521820068, "logps/chosen": -81.81169891357422, "logps/rejected": -96.60589599609375, "loss": 0.327, "rewards/accuracies": 1.0, "rewards/chosen": -0.5225321650505066, "rewards/margins": 1.9893995523452759, "rewards/rejected": -2.511931896209717, "step": 1186 }, { "epoch": 1.91, "learning_rate": 4.3797066983749503e-07, "logits/chosen": -1.3335936069488525, "logits/rejected": -1.2813397645950317, "logps/chosen": -95.46517181396484, "logps/rejected": -149.29734802246094, "loss": 0.3013, "rewards/accuracies": 0.75, "rewards/chosen": -1.1299850940704346, "rewards/margins": 3.518430709838867, "rewards/rejected": -4.648416042327881, "step": 1187 }, { "epoch": 1.91, "learning_rate": 4.3787158145065393e-07, "logits/chosen": -1.3815181255340576, "logits/rejected": -1.4207813739776611, "logps/chosen": -103.36969757080078, "logps/rejected": -113.4195556640625, "loss": 0.3333, "rewards/accuracies": 1.0, "rewards/chosen": -0.317679762840271, "rewards/margins": 2.782815933227539, "rewards/rejected": -3.1004958152770996, "step": 1188 }, { "epoch": 1.91, "learning_rate": 4.377724930638129e-07, "logits/chosen": -1.511907696723938, "logits/rejected": -1.4634897708892822, "logps/chosen": -98.75439453125, "logps/rejected": -126.3021240234375, "loss": 0.2392, "rewards/accuracies": 0.75, "rewards/chosen": -1.095939040184021, "rewards/margins": 4.268267631530762, "rewards/rejected": -5.3642072677612305, "step": 1189 }, { "epoch": 1.91, "learning_rate": 4.3767340467697185e-07, "logits/chosen": -1.5224276781082153, "logits/rejected": -1.4237174987792969, "logps/chosen": -63.86384201049805, "logps/rejected": -131.6849822998047, "loss": 0.1929, "rewards/accuracies": 1.0, "rewards/chosen": 0.8415687084197998, "rewards/margins": 7.135977268218994, "rewards/rejected": -6.294408798217773, "step": 1190 }, { "epoch": 1.91, "learning_rate": 4.3757431629013076e-07, "logits/chosen": -1.4060953855514526, "logits/rejected": -1.4652597904205322, "logps/chosen": -92.06958770751953, "logps/rejected": -133.45664978027344, "loss": 0.312, "rewards/accuracies": 0.75, "rewards/chosen": -0.5821828246116638, "rewards/margins": 3.153031349182129, "rewards/rejected": -3.7352142333984375, "step": 1191 }, { "epoch": 1.91, "learning_rate": 4.374752279032897e-07, "logits/chosen": -1.48503839969635, "logits/rejected": -1.4470348358154297, "logps/chosen": -78.3155517578125, "logps/rejected": -119.70085906982422, "loss": 0.1562, "rewards/accuracies": 1.0, "rewards/chosen": -0.5490559339523315, "rewards/margins": 4.884414196014404, "rewards/rejected": -5.433470249176025, "step": 1192 }, { "epoch": 1.91, "learning_rate": 4.3737613951644863e-07, "logits/chosen": -1.419858455657959, "logits/rejected": -1.4778980016708374, "logps/chosen": -69.89869689941406, "logps/rejected": -105.43055725097656, "loss": 0.2846, "rewards/accuracies": 1.0, "rewards/chosen": -0.38694679737091064, "rewards/margins": 3.2332277297973633, "rewards/rejected": -3.6201744079589844, "step": 1193 }, { "epoch": 1.92, "learning_rate": 4.372770511296076e-07, "logits/chosen": -1.4774479866027832, "logits/rejected": -1.5088386535644531, "logps/chosen": -97.91586303710938, "logps/rejected": -120.28801727294922, "loss": 0.1481, "rewards/accuracies": 1.0, "rewards/chosen": -1.6129424571990967, "rewards/margins": 2.5898637771606445, "rewards/rejected": -4.20280647277832, "step": 1194 }, { "epoch": 1.92, "learning_rate": 4.3717796274276655e-07, "logits/chosen": -1.2684803009033203, "logits/rejected": -1.2789084911346436, "logps/chosen": -92.2757568359375, "logps/rejected": -139.07098388671875, "loss": 0.221, "rewards/accuracies": 0.75, "rewards/chosen": -1.0744972229003906, "rewards/margins": 1.691317081451416, "rewards/rejected": -2.7658143043518066, "step": 1195 }, { "epoch": 1.92, "learning_rate": 4.3707887435592545e-07, "logits/chosen": -1.4931836128234863, "logits/rejected": -1.477477788925171, "logps/chosen": -125.53368377685547, "logps/rejected": -124.0606460571289, "loss": 0.2894, "rewards/accuracies": 0.75, "rewards/chosen": -1.3477426767349243, "rewards/margins": 1.2647098302841187, "rewards/rejected": -2.612452507019043, "step": 1196 }, { "epoch": 1.92, "learning_rate": 4.369797859690844e-07, "logits/chosen": -1.4589720964431763, "logits/rejected": -1.4705286026000977, "logps/chosen": -62.70899200439453, "logps/rejected": -95.15110778808594, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": -0.6232596635818481, "rewards/margins": 4.047474384307861, "rewards/rejected": -4.67073392868042, "step": 1197 }, { "epoch": 1.92, "learning_rate": 4.368806975822433e-07, "logits/chosen": -1.388379454612732, "logits/rejected": -1.4100706577301025, "logps/chosen": -112.4080810546875, "logps/rejected": -113.77107238769531, "loss": 0.2772, "rewards/accuracies": 0.75, "rewards/chosen": -1.108973503112793, "rewards/margins": 2.375863552093506, "rewards/rejected": -3.484837055206299, "step": 1198 }, { "epoch": 1.92, "learning_rate": 4.367816091954023e-07, "logits/chosen": -1.5462995767593384, "logits/rejected": -1.634330153465271, "logps/chosen": -110.84967803955078, "logps/rejected": -161.56552124023438, "loss": 0.2869, "rewards/accuracies": 1.0, "rewards/chosen": -0.7772539258003235, "rewards/margins": 4.820531845092773, "rewards/rejected": -5.597785949707031, "step": 1199 }, { "epoch": 1.93, "learning_rate": 4.3668252080856124e-07, "logits/chosen": -1.506972074508667, "logits/rejected": -1.5112676620483398, "logps/chosen": -85.52877807617188, "logps/rejected": -126.54415130615234, "loss": 0.2746, "rewards/accuracies": 1.0, "rewards/chosen": -0.37539368867874146, "rewards/margins": 3.2514519691467285, "rewards/rejected": -3.626845598220825, "step": 1200 }, { "epoch": 1.93, "learning_rate": 4.3658343242172015e-07, "logits/chosen": -1.465179443359375, "logits/rejected": -1.312491774559021, "logps/chosen": -115.31640625, "logps/rejected": -104.85160827636719, "loss": 0.2729, "rewards/accuracies": 0.75, "rewards/chosen": -1.4553813934326172, "rewards/margins": 0.5893468856811523, "rewards/rejected": -2.0447282791137695, "step": 1201 }, { "epoch": 1.93, "learning_rate": 4.364843440348791e-07, "logits/chosen": -1.4024361371994019, "logits/rejected": -1.3177199363708496, "logps/chosen": -89.24357604980469, "logps/rejected": -103.09367370605469, "loss": 0.2277, "rewards/accuracies": 0.75, "rewards/chosen": -0.541273295879364, "rewards/margins": 1.818574070930481, "rewards/rejected": -2.3598473072052, "step": 1202 }, { "epoch": 1.93, "learning_rate": 4.36385255648038e-07, "logits/chosen": -1.315090537071228, "logits/rejected": -1.3472480773925781, "logps/chosen": -95.97174072265625, "logps/rejected": -118.57977294921875, "loss": 0.214, "rewards/accuracies": 1.0, "rewards/chosen": -0.5579822063446045, "rewards/margins": 1.6121611595153809, "rewards/rejected": -2.1701436042785645, "step": 1203 }, { "epoch": 1.93, "learning_rate": 4.3628616726119697e-07, "logits/chosen": -1.5062263011932373, "logits/rejected": -1.5360233783721924, "logps/chosen": -99.39713287353516, "logps/rejected": -120.14836120605469, "loss": 0.2556, "rewards/accuracies": 1.0, "rewards/chosen": -0.6822240948677063, "rewards/margins": 1.3009471893310547, "rewards/rejected": -1.9831712245941162, "step": 1204 }, { "epoch": 1.93, "learning_rate": 4.3618707887435593e-07, "logits/chosen": -1.3749008178710938, "logits/rejected": -1.3927260637283325, "logps/chosen": -81.13267517089844, "logps/rejected": -136.30914306640625, "loss": 0.2492, "rewards/accuracies": 1.0, "rewards/chosen": -1.1035301685333252, "rewards/margins": 3.7183332443237305, "rewards/rejected": -4.821863651275635, "step": 1205 }, { "epoch": 1.94, "learning_rate": 4.3608799048751484e-07, "logits/chosen": -1.5020678043365479, "logits/rejected": -1.5592821836471558, "logps/chosen": -67.457763671875, "logps/rejected": -144.3207244873047, "loss": 0.2164, "rewards/accuracies": 1.0, "rewards/chosen": -0.48146894574165344, "rewards/margins": 5.109084129333496, "rewards/rejected": -5.590552806854248, "step": 1206 }, { "epoch": 1.94, "learning_rate": 4.359889021006738e-07, "logits/chosen": -1.530219316482544, "logits/rejected": -1.5946519374847412, "logps/chosen": -94.7843246459961, "logps/rejected": -160.2211456298828, "loss": 0.2677, "rewards/accuracies": 1.0, "rewards/chosen": -1.0008361339569092, "rewards/margins": 5.517521858215332, "rewards/rejected": -6.518357753753662, "step": 1207 }, { "epoch": 1.94, "learning_rate": 4.358898137138327e-07, "logits/chosen": -1.4555699825286865, "logits/rejected": -1.3856714963912964, "logps/chosen": -110.25713348388672, "logps/rejected": -133.6761474609375, "loss": 0.3717, "rewards/accuracies": 0.75, "rewards/chosen": -1.7305872440338135, "rewards/margins": 2.612600803375244, "rewards/rejected": -4.343188285827637, "step": 1208 }, { "epoch": 1.94, "learning_rate": 4.3579072532699167e-07, "logits/chosen": -1.5927643775939941, "logits/rejected": -1.5313929319381714, "logps/chosen": -102.90740966796875, "logps/rejected": -126.32066345214844, "loss": 0.2574, "rewards/accuracies": 1.0, "rewards/chosen": -0.581920862197876, "rewards/margins": 3.2381186485290527, "rewards/rejected": -3.8200392723083496, "step": 1209 }, { "epoch": 1.94, "learning_rate": 4.3569163694015057e-07, "logits/chosen": -1.4828330278396606, "logits/rejected": -1.4520206451416016, "logps/chosen": -87.55947875976562, "logps/rejected": -82.96576690673828, "loss": 0.2426, "rewards/accuracies": 0.75, "rewards/chosen": -1.3584098815917969, "rewards/margins": 0.6665577292442322, "rewards/rejected": -2.024967670440674, "step": 1210 }, { "epoch": 1.94, "learning_rate": 4.3559254855330953e-07, "logits/chosen": -1.4611014127731323, "logits/rejected": -1.4348227977752686, "logps/chosen": -62.47296905517578, "logps/rejected": -110.3625717163086, "loss": 0.2131, "rewards/accuracies": 0.75, "rewards/chosen": -0.8339658379554749, "rewards/margins": 2.628286123275757, "rewards/rejected": -3.462251901626587, "step": 1211 }, { "epoch": 1.95, "learning_rate": 4.354934601664685e-07, "logits/chosen": -1.5113213062286377, "logits/rejected": -1.5640305280685425, "logps/chosen": -113.55604553222656, "logps/rejected": -142.66705322265625, "loss": 0.3585, "rewards/accuracies": 0.75, "rewards/chosen": -0.8062220215797424, "rewards/margins": 2.268556594848633, "rewards/rejected": -3.0747785568237305, "step": 1212 }, { "epoch": 1.95, "learning_rate": 4.353943717796274e-07, "logits/chosen": -1.4309282302856445, "logits/rejected": -1.4370200634002686, "logps/chosen": -100.47332000732422, "logps/rejected": -133.22781372070312, "loss": 0.3919, "rewards/accuracies": 1.0, "rewards/chosen": -0.8284553289413452, "rewards/margins": 3.3672025203704834, "rewards/rejected": -4.195657730102539, "step": 1213 }, { "epoch": 1.95, "learning_rate": 4.3529528339278636e-07, "logits/chosen": -1.4369029998779297, "logits/rejected": -1.4550859928131104, "logps/chosen": -116.9339599609375, "logps/rejected": -141.95718383789062, "loss": 0.2986, "rewards/accuracies": 0.5, "rewards/chosen": -0.682491660118103, "rewards/margins": 4.178305625915527, "rewards/rejected": -4.86079740524292, "step": 1214 }, { "epoch": 1.95, "learning_rate": 4.3519619500594527e-07, "logits/chosen": -1.616718053817749, "logits/rejected": -1.6303207874298096, "logps/chosen": -80.79972839355469, "logps/rejected": -122.4210433959961, "loss": 0.32, "rewards/accuracies": 1.0, "rewards/chosen": 0.10389319062232971, "rewards/margins": 4.316219806671143, "rewards/rejected": -4.212326526641846, "step": 1215 }, { "epoch": 1.95, "learning_rate": 4.350971066191042e-07, "logits/chosen": -1.53462815284729, "logits/rejected": -1.5679481029510498, "logps/chosen": -105.13465881347656, "logps/rejected": -108.72409057617188, "loss": 0.256, "rewards/accuracies": 0.5, "rewards/chosen": -0.2821117341518402, "rewards/margins": 2.0878329277038574, "rewards/rejected": -2.3699445724487305, "step": 1216 }, { "epoch": 1.95, "learning_rate": 4.349980182322632e-07, "logits/chosen": -1.534961223602295, "logits/rejected": -1.487816572189331, "logps/chosen": -88.59142303466797, "logps/rejected": -139.10031127929688, "loss": 0.205, "rewards/accuracies": 1.0, "rewards/chosen": -0.6139363646507263, "rewards/margins": 5.768303394317627, "rewards/rejected": -6.38223934173584, "step": 1217 }, { "epoch": 1.96, "learning_rate": 4.348989298454221e-07, "logits/chosen": -1.6388590335845947, "logits/rejected": -1.641633152961731, "logps/chosen": -85.38260650634766, "logps/rejected": -139.23995971679688, "loss": 0.1915, "rewards/accuracies": 1.0, "rewards/chosen": 0.1994098722934723, "rewards/margins": 5.395689010620117, "rewards/rejected": -5.196279525756836, "step": 1218 }, { "epoch": 1.96, "learning_rate": 4.34799841458581e-07, "logits/chosen": -1.5533031225204468, "logits/rejected": -1.5126149654388428, "logps/chosen": -86.78959655761719, "logps/rejected": -99.01382446289062, "loss": 0.3161, "rewards/accuracies": 0.75, "rewards/chosen": -1.265673041343689, "rewards/margins": 2.393893003463745, "rewards/rejected": -3.6595659255981445, "step": 1219 }, { "epoch": 1.96, "learning_rate": 4.3470075307173996e-07, "logits/chosen": -1.330951452255249, "logits/rejected": -1.292251706123352, "logps/chosen": -73.68235778808594, "logps/rejected": -105.49578857421875, "loss": 0.3047, "rewards/accuracies": 1.0, "rewards/chosen": -0.1640346348285675, "rewards/margins": 2.724595069885254, "rewards/rejected": -2.888629913330078, "step": 1220 }, { "epoch": 1.96, "learning_rate": 4.346016646848989e-07, "logits/chosen": -1.6905272006988525, "logits/rejected": -1.6001838445663452, "logps/chosen": -79.28993225097656, "logps/rejected": -95.04454040527344, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -0.012753300368785858, "rewards/margins": 2.346951484680176, "rewards/rejected": -2.3597049713134766, "step": 1221 }, { "epoch": 1.96, "learning_rate": 4.345025762980579e-07, "logits/chosen": -1.5398207902908325, "logits/rejected": -1.6387784481048584, "logps/chosen": -100.39404296875, "logps/rejected": -117.72673797607422, "loss": 0.3331, "rewards/accuracies": 0.75, "rewards/chosen": -0.18828639388084412, "rewards/margins": 2.2205238342285156, "rewards/rejected": -2.4088103771209717, "step": 1222 }, { "epoch": 1.96, "learning_rate": 4.344034879112168e-07, "logits/chosen": -1.6562045812606812, "logits/rejected": -1.6230638027191162, "logps/chosen": -85.01177978515625, "logps/rejected": -103.04752349853516, "loss": 0.389, "rewards/accuracies": 1.0, "rewards/chosen": -0.3874492943286896, "rewards/margins": 1.6778422594070435, "rewards/rejected": -2.0652916431427, "step": 1223 }, { "epoch": 1.96, "learning_rate": 4.343043995243757e-07, "logits/chosen": -1.2507611513137817, "logits/rejected": -1.2746949195861816, "logps/chosen": -98.89669036865234, "logps/rejected": -119.34732055664062, "loss": 0.4176, "rewards/accuracies": 0.75, "rewards/chosen": -1.4274120330810547, "rewards/margins": 0.17870986461639404, "rewards/rejected": -1.6061218976974487, "step": 1224 }, { "epoch": 1.97, "learning_rate": 4.3420531113753465e-07, "logits/chosen": -1.62452232837677, "logits/rejected": -1.6041109561920166, "logps/chosen": -105.58642578125, "logps/rejected": -117.57750701904297, "loss": 0.2538, "rewards/accuracies": 0.75, "rewards/chosen": -0.2381969541311264, "rewards/margins": 2.327038288116455, "rewards/rejected": -2.5652356147766113, "step": 1225 }, { "epoch": 1.97, "learning_rate": 4.341062227506936e-07, "logits/chosen": -1.580676794052124, "logits/rejected": -1.6270904541015625, "logps/chosen": -84.481689453125, "logps/rejected": -123.74541473388672, "loss": 0.3911, "rewards/accuracies": 0.5, "rewards/chosen": -0.6112198829650879, "rewards/margins": 2.1724674701690674, "rewards/rejected": -2.783687114715576, "step": 1226 }, { "epoch": 1.97, "learning_rate": 4.3400713436385257e-07, "logits/chosen": -1.458744764328003, "logits/rejected": -1.4648215770721436, "logps/chosen": -93.22073364257812, "logps/rejected": -145.3765411376953, "loss": 0.2707, "rewards/accuracies": 0.5, "rewards/chosen": -0.7115075588226318, "rewards/margins": 0.5977874994277954, "rewards/rejected": -1.3092951774597168, "step": 1227 }, { "epoch": 1.97, "learning_rate": 4.339080459770115e-07, "logits/chosen": -1.544055461883545, "logits/rejected": -1.4529588222503662, "logps/chosen": -88.87449645996094, "logps/rejected": -100.42530822753906, "loss": 0.277, "rewards/accuracies": 1.0, "rewards/chosen": 0.1658649444580078, "rewards/margins": 2.3713438510894775, "rewards/rejected": -2.205479145050049, "step": 1228 }, { "epoch": 1.97, "learning_rate": 4.338089575901704e-07, "logits/chosen": -1.331592321395874, "logits/rejected": -1.3773057460784912, "logps/chosen": -93.46331787109375, "logps/rejected": -120.49371337890625, "loss": 0.328, "rewards/accuracies": 0.75, "rewards/chosen": -1.0815529823303223, "rewards/margins": 2.121675968170166, "rewards/rejected": -3.203228712081909, "step": 1229 }, { "epoch": 1.97, "learning_rate": 4.3370986920332934e-07, "logits/chosen": -1.3839352130889893, "logits/rejected": -1.35260009765625, "logps/chosen": -111.54293823242188, "logps/rejected": -145.79348754882812, "loss": 0.3156, "rewards/accuracies": 1.0, "rewards/chosen": -0.8885555267333984, "rewards/margins": 3.0533628463745117, "rewards/rejected": -3.94191837310791, "step": 1230 }, { "epoch": 1.98, "learning_rate": 4.3361078081648825e-07, "logits/chosen": -1.406173825263977, "logits/rejected": -1.3496291637420654, "logps/chosen": -83.7090072631836, "logps/rejected": -130.5758056640625, "loss": 0.2577, "rewards/accuracies": 1.0, "rewards/chosen": -0.28057652711868286, "rewards/margins": 4.063328742980957, "rewards/rejected": -4.343904972076416, "step": 1231 }, { "epoch": 1.98, "learning_rate": 4.3351169242964726e-07, "logits/chosen": -1.454321026802063, "logits/rejected": -1.42905855178833, "logps/chosen": -100.27388000488281, "logps/rejected": -111.19540405273438, "loss": 0.285, "rewards/accuracies": 0.75, "rewards/chosen": -0.24707776308059692, "rewards/margins": 2.591421365737915, "rewards/rejected": -2.838499069213867, "step": 1232 }, { "epoch": 1.98, "learning_rate": 4.3341260404280617e-07, "logits/chosen": -1.4066498279571533, "logits/rejected": -1.4142098426818848, "logps/chosen": -96.74811553955078, "logps/rejected": -124.33453369140625, "loss": 0.2701, "rewards/accuracies": 1.0, "rewards/chosen": -0.17986297607421875, "rewards/margins": 2.3474373817443848, "rewards/rejected": -2.5273003578186035, "step": 1233 }, { "epoch": 1.98, "learning_rate": 4.333135156559651e-07, "logits/chosen": -1.3803982734680176, "logits/rejected": -1.3716531991958618, "logps/chosen": -71.3891830444336, "logps/rejected": -121.86614227294922, "loss": 0.3086, "rewards/accuracies": 1.0, "rewards/chosen": 0.014169782400131226, "rewards/margins": 3.4527339935302734, "rewards/rejected": -3.4385643005371094, "step": 1234 }, { "epoch": 1.98, "learning_rate": 4.3321442726912404e-07, "logits/chosen": -1.467852234840393, "logits/rejected": -1.502821922302246, "logps/chosen": -84.32377624511719, "logps/rejected": -120.20381164550781, "loss": 0.3257, "rewards/accuracies": 0.75, "rewards/chosen": -1.8148152828216553, "rewards/margins": 1.5343060493469238, "rewards/rejected": -3.349121570587158, "step": 1235 }, { "epoch": 1.98, "learning_rate": 4.3311533888228294e-07, "logits/chosen": -1.5057395696640015, "logits/rejected": -1.4680556058883667, "logps/chosen": -60.75223922729492, "logps/rejected": -116.8771743774414, "loss": 0.2273, "rewards/accuracies": 1.0, "rewards/chosen": -0.30038952827453613, "rewards/margins": 4.097169876098633, "rewards/rejected": -4.397559642791748, "step": 1236 }, { "epoch": 1.99, "learning_rate": 4.3301625049544196e-07, "logits/chosen": -1.5608302354812622, "logits/rejected": -1.5874555110931396, "logps/chosen": -67.79867553710938, "logps/rejected": -128.5220184326172, "loss": 0.2444, "rewards/accuracies": 1.0, "rewards/chosen": 0.13367435336112976, "rewards/margins": 5.033227920532227, "rewards/rejected": -4.8995537757873535, "step": 1237 }, { "epoch": 1.99, "learning_rate": 4.3291716210860086e-07, "logits/chosen": -1.5044618844985962, "logits/rejected": -1.5467292070388794, "logps/chosen": -76.59243774414062, "logps/rejected": -122.72437286376953, "loss": 0.2551, "rewards/accuracies": 1.0, "rewards/chosen": -0.15947704017162323, "rewards/margins": 3.9455676078796387, "rewards/rejected": -4.105044364929199, "step": 1238 }, { "epoch": 1.99, "learning_rate": 4.3281807372175977e-07, "logits/chosen": -1.3771075010299683, "logits/rejected": -1.4101756811141968, "logps/chosen": -118.90798950195312, "logps/rejected": -124.225341796875, "loss": 0.3909, "rewards/accuracies": 0.5, "rewards/chosen": -1.3187206983566284, "rewards/margins": 2.3943827152252197, "rewards/rejected": -3.7131035327911377, "step": 1239 }, { "epoch": 1.99, "learning_rate": 4.3271898533491873e-07, "logits/chosen": -1.5012879371643066, "logits/rejected": -1.4926526546478271, "logps/chosen": -91.9200439453125, "logps/rejected": -124.88258361816406, "loss": 0.2289, "rewards/accuracies": 0.75, "rewards/chosen": -0.9201165437698364, "rewards/margins": 1.5434389114379883, "rewards/rejected": -2.4635555744171143, "step": 1240 }, { "epoch": 1.99, "learning_rate": 4.3261989694807764e-07, "logits/chosen": -1.5616192817687988, "logits/rejected": -1.617734432220459, "logps/chosen": -99.5982437133789, "logps/rejected": -142.7492218017578, "loss": 0.2113, "rewards/accuracies": 1.0, "rewards/chosen": -0.6395364999771118, "rewards/margins": 2.485624074935913, "rewards/rejected": -3.1251606941223145, "step": 1241 }, { "epoch": 1.99, "learning_rate": 4.3252080856123665e-07, "logits/chosen": -1.5555245876312256, "logits/rejected": -1.4841028451919556, "logps/chosen": -108.14250946044922, "logps/rejected": -136.6805877685547, "loss": 0.1811, "rewards/accuracies": 1.0, "rewards/chosen": -0.2455122023820877, "rewards/margins": 4.978642463684082, "rewards/rejected": -5.224154472351074, "step": 1242 }, { "epoch": 2.0, "learning_rate": 4.3242172017439556e-07, "logits/chosen": -1.5388514995574951, "logits/rejected": -1.5513019561767578, "logps/chosen": -107.13815307617188, "logps/rejected": -106.31368255615234, "loss": 0.2627, "rewards/accuracies": 1.0, "rewards/chosen": -0.4078955054283142, "rewards/margins": 2.496858835220337, "rewards/rejected": -2.904754638671875, "step": 1243 }, { "epoch": 2.0, "learning_rate": 4.3232263178755446e-07, "logits/chosen": -1.4790377616882324, "logits/rejected": -1.4332636594772339, "logps/chosen": -101.67962646484375, "logps/rejected": -111.42196655273438, "loss": 0.3392, "rewards/accuracies": 1.0, "rewards/chosen": -1.0826828479766846, "rewards/margins": 1.8899434804916382, "rewards/rejected": -2.972626209259033, "step": 1244 }, { "epoch": 2.0, "learning_rate": 4.322235434007134e-07, "logits/chosen": -1.527238368988037, "logits/rejected": -1.5337402820587158, "logps/chosen": -119.0438232421875, "logps/rejected": -122.93560028076172, "loss": 0.2947, "rewards/accuracies": 0.75, "rewards/chosen": -0.01847878098487854, "rewards/margins": 2.083226203918457, "rewards/rejected": -2.1017048358917236, "step": 1245 }, { "epoch": 2.0, "learning_rate": 4.3212445501387233e-07, "logits/chosen": -1.484950065612793, "logits/rejected": -1.4397361278533936, "logps/chosen": -75.26400756835938, "logps/rejected": -100.62810516357422, "loss": 0.3457, "rewards/accuracies": 0.75, "rewards/chosen": -0.28235310316085815, "rewards/margins": 2.8017115592956543, "rewards/rejected": -3.0840647220611572, "step": 1246 }, { "epoch": 2.0, "learning_rate": 4.3202536662703134e-07, "logits/chosen": -1.3811062574386597, "logits/rejected": -1.4219859838485718, "logps/chosen": -81.69984436035156, "logps/rejected": -124.89698791503906, "loss": 0.1273, "rewards/accuracies": 1.0, "rewards/chosen": -0.2544810175895691, "rewards/margins": 4.803673267364502, "rewards/rejected": -5.058154106140137, "step": 1247 }, { "epoch": 2.0, "learning_rate": 4.3192627824019025e-07, "logits/chosen": -1.466765284538269, "logits/rejected": -1.4616584777832031, "logps/chosen": -93.19338989257812, "logps/rejected": -127.35762023925781, "loss": 0.1427, "rewards/accuracies": 1.0, "rewards/chosen": 0.0927833616733551, "rewards/margins": 3.1084632873535156, "rewards/rejected": -3.0156800746917725, "step": 1248 }, { "epoch": 2.0, "learning_rate": 4.3182718985334916e-07, "logits/chosen": -1.5544555187225342, "logits/rejected": -1.5313024520874023, "logps/chosen": -119.27314758300781, "logps/rejected": -139.83743286132812, "loss": 0.1927, "rewards/accuracies": 1.0, "rewards/chosen": -0.22092323005199432, "rewards/margins": 4.216795444488525, "rewards/rejected": -4.437718391418457, "step": 1249 }, { "epoch": 2.01, "learning_rate": 4.317281014665081e-07, "logits/chosen": -1.5011337995529175, "logits/rejected": -1.508955955505371, "logps/chosen": -72.34799194335938, "logps/rejected": -153.66970825195312, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": 0.15802927315235138, "rewards/margins": 5.3063530921936035, "rewards/rejected": -5.1483235359191895, "step": 1250 }, { "epoch": 2.01, "learning_rate": 4.31629013079667e-07, "logits/chosen": -1.3680545091629028, "logits/rejected": -1.3004566431045532, "logps/chosen": -115.002685546875, "logps/rejected": -113.25267028808594, "loss": 0.1608, "rewards/accuracies": 1.0, "rewards/chosen": 0.21965044736862183, "rewards/margins": 3.625779390335083, "rewards/rejected": -3.4061288833618164, "step": 1251 }, { "epoch": 2.01, "learning_rate": 4.31529924692826e-07, "logits/chosen": -1.4229012727737427, "logits/rejected": -1.4551069736480713, "logps/chosen": -67.79412078857422, "logps/rejected": -148.41510009765625, "loss": 0.1167, "rewards/accuracies": 1.0, "rewards/chosen": 0.22255849838256836, "rewards/margins": 4.245892524719238, "rewards/rejected": -4.023333549499512, "step": 1252 }, { "epoch": 2.01, "learning_rate": 4.3143083630598494e-07, "logits/chosen": -1.407235860824585, "logits/rejected": -1.4739642143249512, "logps/chosen": -87.83484649658203, "logps/rejected": -133.2175750732422, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": -1.1398205757141113, "rewards/margins": 3.8166840076446533, "rewards/rejected": -4.9565043449401855, "step": 1253 }, { "epoch": 2.01, "learning_rate": 4.3133174791914385e-07, "logits/chosen": -1.4445710182189941, "logits/rejected": -1.44650137424469, "logps/chosen": -81.70764923095703, "logps/rejected": -110.63765716552734, "loss": 0.106, "rewards/accuracies": 1.0, "rewards/chosen": -0.5410981774330139, "rewards/margins": 3.733992099761963, "rewards/rejected": -4.275090217590332, "step": 1254 }, { "epoch": 2.01, "learning_rate": 4.312326595323028e-07, "logits/chosen": -1.3969769477844238, "logits/rejected": -1.4331176280975342, "logps/chosen": -82.84854125976562, "logps/rejected": -101.40031433105469, "loss": 0.1919, "rewards/accuracies": 1.0, "rewards/chosen": -0.522128701210022, "rewards/margins": 2.6896324157714844, "rewards/rejected": -3.211760997772217, "step": 1255 }, { "epoch": 2.02, "learning_rate": 4.311335711454617e-07, "logits/chosen": -1.4795727729797363, "logits/rejected": -1.3479630947113037, "logps/chosen": -102.34103393554688, "logps/rejected": -118.60768127441406, "loss": 0.1861, "rewards/accuracies": 1.0, "rewards/chosen": -0.15835285186767578, "rewards/margins": 3.472829580307007, "rewards/rejected": -3.6311821937561035, "step": 1256 }, { "epoch": 2.02, "learning_rate": 4.310344827586206e-07, "logits/chosen": -1.5303661823272705, "logits/rejected": -1.4470382928848267, "logps/chosen": -103.25740051269531, "logps/rejected": -124.37181854248047, "loss": 0.3216, "rewards/accuracies": 1.0, "rewards/chosen": -0.3342008590698242, "rewards/margins": 3.673947811126709, "rewards/rejected": -4.008148193359375, "step": 1257 }, { "epoch": 2.02, "learning_rate": 4.3093539437177964e-07, "logits/chosen": -1.4062355756759644, "logits/rejected": -1.433314561843872, "logps/chosen": -82.02986145019531, "logps/rejected": -156.03985595703125, "loss": 0.1258, "rewards/accuracies": 1.0, "rewards/chosen": 0.26443159580230713, "rewards/margins": 7.591645240783691, "rewards/rejected": -7.327213764190674, "step": 1258 }, { "epoch": 2.02, "learning_rate": 4.3083630598493854e-07, "logits/chosen": -1.3208235502243042, "logits/rejected": -1.4586138725280762, "logps/chosen": -61.9752197265625, "logps/rejected": -106.89537811279297, "loss": 0.1496, "rewards/accuracies": 0.75, "rewards/chosen": -0.14034700393676758, "rewards/margins": 2.4393868446350098, "rewards/rejected": -2.5797338485717773, "step": 1259 }, { "epoch": 2.02, "learning_rate": 4.307372175980975e-07, "logits/chosen": -1.5099453926086426, "logits/rejected": -1.4955332279205322, "logps/chosen": -66.140869140625, "logps/rejected": -106.26004028320312, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": 0.16897733509540558, "rewards/margins": 5.1604390144348145, "rewards/rejected": -4.991461753845215, "step": 1260 }, { "epoch": 2.02, "learning_rate": 4.306381292112564e-07, "logits/chosen": -1.465770959854126, "logits/rejected": -1.4089298248291016, "logps/chosen": -99.04536437988281, "logps/rejected": -128.20281982421875, "loss": 0.1208, "rewards/accuracies": 0.75, "rewards/chosen": -0.33492976427078247, "rewards/margins": 4.085660457611084, "rewards/rejected": -4.420590400695801, "step": 1261 }, { "epoch": 2.03, "learning_rate": 4.305390408244153e-07, "logits/chosen": -1.4141796827316284, "logits/rejected": -1.4638724327087402, "logps/chosen": -97.93463134765625, "logps/rejected": -132.31378173828125, "loss": 0.0929, "rewards/accuracies": 1.0, "rewards/chosen": -1.6094722747802734, "rewards/margins": 3.4959359169006348, "rewards/rejected": -5.105408191680908, "step": 1262 }, { "epoch": 2.03, "learning_rate": 4.3043995243757433e-07, "logits/chosen": -1.493220329284668, "logits/rejected": -1.524265170097351, "logps/chosen": -81.64726257324219, "logps/rejected": -129.051513671875, "loss": 0.1432, "rewards/accuracies": 0.75, "rewards/chosen": -1.677672028541565, "rewards/margins": 2.242954730987549, "rewards/rejected": -3.920626640319824, "step": 1263 }, { "epoch": 2.03, "learning_rate": 4.3034086405073324e-07, "logits/chosen": -1.5418639183044434, "logits/rejected": -1.5842581987380981, "logps/chosen": -83.81095886230469, "logps/rejected": -98.02456665039062, "loss": 0.1422, "rewards/accuracies": 1.0, "rewards/chosen": 0.003964036703109741, "rewards/margins": 2.7386045455932617, "rewards/rejected": -2.734640598297119, "step": 1264 }, { "epoch": 2.03, "learning_rate": 4.302417756638922e-07, "logits/chosen": -1.5881929397583008, "logits/rejected": -1.6196166276931763, "logps/chosen": -88.98538208007812, "logps/rejected": -112.26771545410156, "loss": 0.2088, "rewards/accuracies": 0.75, "rewards/chosen": -0.06040230393409729, "rewards/margins": 2.9770991802215576, "rewards/rejected": -3.037501811981201, "step": 1265 }, { "epoch": 2.03, "learning_rate": 4.301426872770511e-07, "logits/chosen": -1.5618680715560913, "logits/rejected": -1.5513938665390015, "logps/chosen": -77.24048614501953, "logps/rejected": -121.65913391113281, "loss": 0.1451, "rewards/accuracies": 0.75, "rewards/chosen": -0.19279365241527557, "rewards/margins": 5.798108100891113, "rewards/rejected": -5.990901470184326, "step": 1266 }, { "epoch": 2.03, "learning_rate": 4.3004359889021e-07, "logits/chosen": -1.341871738433838, "logits/rejected": -1.3327093124389648, "logps/chosen": -83.49139404296875, "logps/rejected": -128.03341674804688, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": 0.3953245282173157, "rewards/margins": 4.485246181488037, "rewards/rejected": -4.089921951293945, "step": 1267 }, { "epoch": 2.04, "learning_rate": 4.29944510503369e-07, "logits/chosen": -1.5106936693191528, "logits/rejected": -1.5004298686981201, "logps/chosen": -88.78207397460938, "logps/rejected": -149.87152099609375, "loss": 0.0829, "rewards/accuracies": 1.0, "rewards/chosen": -0.4655628502368927, "rewards/margins": 6.004593372344971, "rewards/rejected": -6.470156192779541, "step": 1268 }, { "epoch": 2.04, "learning_rate": 4.2984542211652793e-07, "logits/chosen": -1.6028718948364258, "logits/rejected": -1.5930198431015015, "logps/chosen": -80.30803680419922, "logps/rejected": -100.2142333984375, "loss": 0.2871, "rewards/accuracies": 1.0, "rewards/chosen": -0.9147788286209106, "rewards/margins": 2.1014444828033447, "rewards/rejected": -3.016223430633545, "step": 1269 }, { "epoch": 2.04, "learning_rate": 4.297463337296869e-07, "logits/chosen": -1.4755864143371582, "logits/rejected": -1.4524469375610352, "logps/chosen": -84.04154205322266, "logps/rejected": -109.63292694091797, "loss": 0.1721, "rewards/accuracies": 1.0, "rewards/chosen": -0.31514739990234375, "rewards/margins": 3.342761278152466, "rewards/rejected": -3.6579086780548096, "step": 1270 }, { "epoch": 2.04, "learning_rate": 4.296472453428458e-07, "logits/chosen": -1.3040111064910889, "logits/rejected": -1.4470059871673584, "logps/chosen": -97.77716064453125, "logps/rejected": -135.5336456298828, "loss": 0.1121, "rewards/accuracies": 1.0, "rewards/chosen": -0.46023786067962646, "rewards/margins": 2.5814943313598633, "rewards/rejected": -3.0417323112487793, "step": 1271 }, { "epoch": 2.04, "learning_rate": 4.295481569560047e-07, "logits/chosen": -1.6371796131134033, "logits/rejected": -1.6856129169464111, "logps/chosen": -73.14612579345703, "logps/rejected": -111.6485824584961, "loss": 0.1653, "rewards/accuracies": 1.0, "rewards/chosen": -0.6499501466751099, "rewards/margins": 3.002352476119995, "rewards/rejected": -3.6523025035858154, "step": 1272 }, { "epoch": 2.04, "learning_rate": 4.2944906856916366e-07, "logits/chosen": -1.5682568550109863, "logits/rejected": -1.52036452293396, "logps/chosen": -68.01469421386719, "logps/rejected": -116.35298156738281, "loss": 0.1507, "rewards/accuracies": 1.0, "rewards/chosen": -0.010186254978179932, "rewards/margins": 5.229900360107422, "rewards/rejected": -5.240086555480957, "step": 1273 }, { "epoch": 2.04, "learning_rate": 4.293499801823226e-07, "logits/chosen": -1.3977530002593994, "logits/rejected": -1.3857927322387695, "logps/chosen": -107.8603286743164, "logps/rejected": -125.48271179199219, "loss": 0.266, "rewards/accuracies": 1.0, "rewards/chosen": -0.5130864977836609, "rewards/margins": 2.9510769844055176, "rewards/rejected": -3.4641637802124023, "step": 1274 }, { "epoch": 2.05, "learning_rate": 4.292508917954816e-07, "logits/chosen": -1.387150526046753, "logits/rejected": -1.375797986984253, "logps/chosen": -80.3109359741211, "logps/rejected": -107.121337890625, "loss": 0.2509, "rewards/accuracies": 1.0, "rewards/chosen": 0.11880046129226685, "rewards/margins": 2.9024014472961426, "rewards/rejected": -2.7836012840270996, "step": 1275 }, { "epoch": 2.05, "learning_rate": 4.291518034086405e-07, "logits/chosen": -1.60823392868042, "logits/rejected": -1.5337653160095215, "logps/chosen": -66.01141357421875, "logps/rejected": -98.07562255859375, "loss": 0.2106, "rewards/accuracies": 1.0, "rewards/chosen": -0.6017864942550659, "rewards/margins": 1.9843767881393433, "rewards/rejected": -2.586163282394409, "step": 1276 }, { "epoch": 2.05, "learning_rate": 4.290527150217994e-07, "logits/chosen": -1.4426965713500977, "logits/rejected": -1.488049864768982, "logps/chosen": -89.1236801147461, "logps/rejected": -109.98504638671875, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -0.02813863568007946, "rewards/margins": 2.3508238792419434, "rewards/rejected": -2.378962516784668, "step": 1277 }, { "epoch": 2.05, "learning_rate": 4.2895362663495835e-07, "logits/chosen": -1.5795447826385498, "logits/rejected": -1.6237201690673828, "logps/chosen": -68.04721069335938, "logps/rejected": -111.66845703125, "loss": 0.098, "rewards/accuracies": 0.75, "rewards/chosen": -1.1038775444030762, "rewards/margins": 2.7163329124450684, "rewards/rejected": -3.8202106952667236, "step": 1278 }, { "epoch": 2.05, "learning_rate": 4.288545382481173e-07, "logits/chosen": -1.4876430034637451, "logits/rejected": -1.550061821937561, "logps/chosen": -78.9802017211914, "logps/rejected": -119.94223022460938, "loss": 0.1467, "rewards/accuracies": 0.75, "rewards/chosen": -0.671481728553772, "rewards/margins": 3.715240955352783, "rewards/rejected": -4.386722564697266, "step": 1279 }, { "epoch": 2.05, "learning_rate": 4.287554498612763e-07, "logits/chosen": -1.4919487237930298, "logits/rejected": -1.4765551090240479, "logps/chosen": -92.02061462402344, "logps/rejected": -138.22731018066406, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": -0.7657157778739929, "rewards/margins": 4.477447509765625, "rewards/rejected": -5.243163585662842, "step": 1280 }, { "epoch": 2.06, "learning_rate": 4.286563614744352e-07, "logits/chosen": -1.5616093873977661, "logits/rejected": -1.5489097833633423, "logps/chosen": -107.42478942871094, "logps/rejected": -136.08245849609375, "loss": 0.1117, "rewards/accuracies": 1.0, "rewards/chosen": -1.282527208328247, "rewards/margins": 3.766399383544922, "rewards/rejected": -5.048926830291748, "step": 1281 }, { "epoch": 2.06, "learning_rate": 4.285572730875941e-07, "logits/chosen": -1.4993157386779785, "logits/rejected": -1.5647921562194824, "logps/chosen": -90.16441345214844, "logps/rejected": -150.462646484375, "loss": 0.1052, "rewards/accuracies": 1.0, "rewards/chosen": -0.7772684693336487, "rewards/margins": 4.079909324645996, "rewards/rejected": -4.857177734375, "step": 1282 }, { "epoch": 2.06, "learning_rate": 4.2845818470075305e-07, "logits/chosen": -1.4780125617980957, "logits/rejected": -1.4558207988739014, "logps/chosen": -80.77523803710938, "logps/rejected": -141.18577575683594, "loss": 0.1572, "rewards/accuracies": 1.0, "rewards/chosen": -0.5898529887199402, "rewards/margins": 3.213545322418213, "rewards/rejected": -3.803398370742798, "step": 1283 }, { "epoch": 2.06, "learning_rate": 4.28359096313912e-07, "logits/chosen": -1.4974738359451294, "logits/rejected": -1.572270393371582, "logps/chosen": -117.81903076171875, "logps/rejected": -140.6079559326172, "loss": 0.1678, "rewards/accuracies": 1.0, "rewards/chosen": -0.7976266741752625, "rewards/margins": 3.3109230995178223, "rewards/rejected": -4.108550071716309, "step": 1284 }, { "epoch": 2.06, "learning_rate": 4.2826000792707097e-07, "logits/chosen": -1.6578664779663086, "logits/rejected": -1.6431982517242432, "logps/chosen": -82.29998016357422, "logps/rejected": -149.7062225341797, "loss": 0.0892, "rewards/accuracies": 1.0, "rewards/chosen": -1.1788790225982666, "rewards/margins": 5.040714263916016, "rewards/rejected": -6.219593524932861, "step": 1285 }, { "epoch": 2.06, "learning_rate": 4.281609195402299e-07, "logits/chosen": -1.647971510887146, "logits/rejected": -1.7033724784851074, "logps/chosen": -98.63599395751953, "logps/rejected": -138.95445251464844, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": -0.8130593299865723, "rewards/margins": 3.6318981647491455, "rewards/rejected": -4.444957733154297, "step": 1286 }, { "epoch": 2.07, "learning_rate": 4.280618311533888e-07, "logits/chosen": -1.4832065105438232, "logits/rejected": -1.4377533197402954, "logps/chosen": -90.29832458496094, "logps/rejected": -111.72748565673828, "loss": 0.114, "rewards/accuracies": 0.75, "rewards/chosen": -0.8295157551765442, "rewards/margins": 1.7472118139266968, "rewards/rejected": -2.576727867126465, "step": 1287 }, { "epoch": 2.07, "learning_rate": 4.2796274276654774e-07, "logits/chosen": -1.6706310510635376, "logits/rejected": -1.6082743406295776, "logps/chosen": -108.583984375, "logps/rejected": -155.32073974609375, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": -1.1076383590698242, "rewards/margins": 4.575101852416992, "rewards/rejected": -5.682740211486816, "step": 1288 }, { "epoch": 2.07, "learning_rate": 4.2786365437970665e-07, "logits/chosen": -1.553928017616272, "logits/rejected": -1.464813470840454, "logps/chosen": -114.1164321899414, "logps/rejected": -138.7145233154297, "loss": 0.1022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1080217361450195, "rewards/margins": 3.4946556091308594, "rewards/rejected": -4.602676868438721, "step": 1289 }, { "epoch": 2.07, "learning_rate": 4.277645659928656e-07, "logits/chosen": -1.6050275564193726, "logits/rejected": -1.5640316009521484, "logps/chosen": -99.86495971679688, "logps/rejected": -158.56036376953125, "loss": 0.1845, "rewards/accuracies": 1.0, "rewards/chosen": -0.66361403465271, "rewards/margins": 5.470688819885254, "rewards/rejected": -6.134303092956543, "step": 1290 }, { "epoch": 2.07, "learning_rate": 4.2766547760602457e-07, "logits/chosen": -1.4872000217437744, "logits/rejected": -1.4102535247802734, "logps/chosen": -79.47628784179688, "logps/rejected": -114.87718963623047, "loss": 0.1067, "rewards/accuracies": 1.0, "rewards/chosen": -0.42906662821769714, "rewards/margins": 3.4579083919525146, "rewards/rejected": -3.886974811553955, "step": 1291 }, { "epoch": 2.07, "learning_rate": 4.2756638921918347e-07, "logits/chosen": -1.534097671508789, "logits/rejected": -1.536102533340454, "logps/chosen": -107.53334045410156, "logps/rejected": -158.9542236328125, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -1.1069401502609253, "rewards/margins": 4.953769683837891, "rewards/rejected": -6.060709476470947, "step": 1292 }, { "epoch": 2.08, "learning_rate": 4.2746730083234243e-07, "logits/chosen": -1.6231398582458496, "logits/rejected": -1.532738208770752, "logps/chosen": -84.05986785888672, "logps/rejected": -100.68190002441406, "loss": 0.1366, "rewards/accuracies": 1.0, "rewards/chosen": -0.3910371661186218, "rewards/margins": 2.09564471244812, "rewards/rejected": -2.4866819381713867, "step": 1293 }, { "epoch": 2.08, "learning_rate": 4.2736821244550134e-07, "logits/chosen": -1.509333610534668, "logits/rejected": -1.4769201278686523, "logps/chosen": -87.03996276855469, "logps/rejected": -117.03504943847656, "loss": 0.1587, "rewards/accuracies": 1.0, "rewards/chosen": -0.3784305453300476, "rewards/margins": 2.687852382659912, "rewards/rejected": -3.0662829875946045, "step": 1294 }, { "epoch": 2.08, "learning_rate": 4.272691240586603e-07, "logits/chosen": -1.4786738157272339, "logits/rejected": -1.4909909963607788, "logps/chosen": -91.95360565185547, "logps/rejected": -119.86714172363281, "loss": 0.171, "rewards/accuracies": 1.0, "rewards/chosen": -1.3409894704818726, "rewards/margins": 1.2484445571899414, "rewards/rejected": -2.5894339084625244, "step": 1295 }, { "epoch": 2.08, "learning_rate": 4.2717003567181926e-07, "logits/chosen": -1.5702946186065674, "logits/rejected": -1.5587682723999023, "logps/chosen": -114.6760025024414, "logps/rejected": -136.5312957763672, "loss": 0.2047, "rewards/accuracies": 1.0, "rewards/chosen": -0.44146645069122314, "rewards/margins": 4.101672172546387, "rewards/rejected": -4.54313850402832, "step": 1296 }, { "epoch": 2.08, "learning_rate": 4.2707094728497817e-07, "logits/chosen": -1.4429200887680054, "logits/rejected": -1.4038357734680176, "logps/chosen": -85.59593200683594, "logps/rejected": -104.63298034667969, "loss": 0.1582, "rewards/accuracies": 1.0, "rewards/chosen": -0.9341185092926025, "rewards/margins": 3.3808820247650146, "rewards/rejected": -4.315000534057617, "step": 1297 }, { "epoch": 2.08, "learning_rate": 4.269718588981371e-07, "logits/chosen": -1.5841290950775146, "logits/rejected": -1.6385498046875, "logps/chosen": -89.0302505493164, "logps/rejected": -145.2225799560547, "loss": 0.1178, "rewards/accuracies": 1.0, "rewards/chosen": -1.2459421157836914, "rewards/margins": 3.768739700317383, "rewards/rejected": -5.014681816101074, "step": 1298 }, { "epoch": 2.09, "learning_rate": 4.2687277051129603e-07, "logits/chosen": -1.495919942855835, "logits/rejected": -1.5673154592514038, "logps/chosen": -62.68367385864258, "logps/rejected": -147.11851501464844, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -0.22919194400310516, "rewards/margins": 6.810031414031982, "rewards/rejected": -7.039223670959473, "step": 1299 }, { "epoch": 2.09, "learning_rate": 4.26773682124455e-07, "logits/chosen": -1.636296272277832, "logits/rejected": -1.5996400117874146, "logps/chosen": -85.95165252685547, "logps/rejected": -119.95809936523438, "loss": 0.1003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2713234424591064, "rewards/margins": 4.2623114585876465, "rewards/rejected": -5.533635139465332, "step": 1300 }, { "epoch": 2.09, "learning_rate": 4.2667459373761395e-07, "logits/chosen": -1.5026570558547974, "logits/rejected": -1.4609479904174805, "logps/chosen": -82.52363586425781, "logps/rejected": -135.9720001220703, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": -0.7348797917366028, "rewards/margins": 4.531760215759277, "rewards/rejected": -5.2666401863098145, "step": 1301 }, { "epoch": 2.09, "learning_rate": 4.2657550535077286e-07, "logits/chosen": -1.491819143295288, "logits/rejected": -1.3917618989944458, "logps/chosen": -98.51071166992188, "logps/rejected": -115.27081298828125, "loss": 0.1793, "rewards/accuracies": 0.75, "rewards/chosen": -1.411280632019043, "rewards/margins": 1.1156457662582397, "rewards/rejected": -2.5269265174865723, "step": 1302 }, { "epoch": 2.09, "learning_rate": 4.264764169639318e-07, "logits/chosen": -1.605468511581421, "logits/rejected": -1.6631664037704468, "logps/chosen": -71.73342895507812, "logps/rejected": -161.34703063964844, "loss": 0.1208, "rewards/accuracies": 1.0, "rewards/chosen": -0.491355836391449, "rewards/margins": 7.22337007522583, "rewards/rejected": -7.714725971221924, "step": 1303 }, { "epoch": 2.09, "learning_rate": 4.263773285770907e-07, "logits/chosen": -1.6132770776748657, "logits/rejected": -1.5890449285507202, "logps/chosen": -102.54973602294922, "logps/rejected": -139.01644897460938, "loss": 0.2029, "rewards/accuracies": 1.0, "rewards/chosen": -1.4258519411087036, "rewards/margins": 2.742027521133423, "rewards/rejected": -4.167879104614258, "step": 1304 }, { "epoch": 2.09, "learning_rate": 4.262782401902497e-07, "logits/chosen": -1.5892622470855713, "logits/rejected": -1.6467266082763672, "logps/chosen": -101.92103576660156, "logps/rejected": -179.23594665527344, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -0.5818711519241333, "rewards/margins": 6.830059051513672, "rewards/rejected": -7.411930084228516, "step": 1305 }, { "epoch": 2.1, "learning_rate": 4.2617915180340865e-07, "logits/chosen": -1.691285490989685, "logits/rejected": -1.570646047592163, "logps/chosen": -126.37004089355469, "logps/rejected": -130.4638214111328, "loss": 0.2313, "rewards/accuracies": 1.0, "rewards/chosen": -2.1759960651397705, "rewards/margins": 2.188406229019165, "rewards/rejected": -4.364401817321777, "step": 1306 }, { "epoch": 2.1, "learning_rate": 4.2608006341656755e-07, "logits/chosen": -1.681898832321167, "logits/rejected": -1.7083895206451416, "logps/chosen": -98.27877044677734, "logps/rejected": -124.12835693359375, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": -0.12323473393917084, "rewards/margins": 2.5677223205566406, "rewards/rejected": -2.6909570693969727, "step": 1307 }, { "epoch": 2.1, "learning_rate": 4.259809750297265e-07, "logits/chosen": -1.708587884902954, "logits/rejected": -1.7394015789031982, "logps/chosen": -112.41564178466797, "logps/rejected": -131.631103515625, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": -1.4892308712005615, "rewards/margins": 2.9743919372558594, "rewards/rejected": -4.463622570037842, "step": 1308 }, { "epoch": 2.1, "learning_rate": 4.258818866428854e-07, "logits/chosen": -1.49684476852417, "logits/rejected": -1.5309290885925293, "logps/chosen": -100.93248748779297, "logps/rejected": -152.20045471191406, "loss": 0.108, "rewards/accuracies": 1.0, "rewards/chosen": -1.3335492610931396, "rewards/margins": 3.2699379920959473, "rewards/rejected": -4.603487491607666, "step": 1309 }, { "epoch": 2.1, "learning_rate": 4.257827982560443e-07, "logits/chosen": -1.671911358833313, "logits/rejected": -1.5683895349502563, "logps/chosen": -106.39058685302734, "logps/rejected": -133.99887084960938, "loss": 0.1588, "rewards/accuracies": 1.0, "rewards/chosen": -1.1430447101593018, "rewards/margins": 4.7854156494140625, "rewards/rejected": -5.928460121154785, "step": 1310 }, { "epoch": 2.1, "learning_rate": 4.2568370986920334e-07, "logits/chosen": -1.5010700225830078, "logits/rejected": -1.5616753101348877, "logps/chosen": -96.9313735961914, "logps/rejected": -132.6835479736328, "loss": 0.1598, "rewards/accuracies": 1.0, "rewards/chosen": -1.3041895627975464, "rewards/margins": 2.4039015769958496, "rewards/rejected": -3.7080912590026855, "step": 1311 }, { "epoch": 2.11, "learning_rate": 4.2558462148236225e-07, "logits/chosen": -1.5929434299468994, "logits/rejected": -1.7059326171875, "logps/chosen": -70.94236755371094, "logps/rejected": -128.29991149902344, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": -0.6376080513000488, "rewards/margins": 5.028254985809326, "rewards/rejected": -5.665863037109375, "step": 1312 }, { "epoch": 2.11, "learning_rate": 4.254855330955212e-07, "logits/chosen": -1.5884292125701904, "logits/rejected": -1.557417869567871, "logps/chosen": -92.57377624511719, "logps/rejected": -125.71702575683594, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": -1.7722769975662231, "rewards/margins": 3.0792806148529053, "rewards/rejected": -4.851557731628418, "step": 1313 }, { "epoch": 2.11, "learning_rate": 4.253864447086801e-07, "logits/chosen": -1.4356108903884888, "logits/rejected": -1.4120104312896729, "logps/chosen": -83.44403076171875, "logps/rejected": -164.661376953125, "loss": 0.1642, "rewards/accuracies": 1.0, "rewards/chosen": -1.1125842332839966, "rewards/margins": 5.615421295166016, "rewards/rejected": -6.728005409240723, "step": 1314 }, { "epoch": 2.11, "learning_rate": 4.25287356321839e-07, "logits/chosen": -1.499813437461853, "logits/rejected": -1.523715615272522, "logps/chosen": -96.14567565917969, "logps/rejected": -143.21331787109375, "loss": 0.2052, "rewards/accuracies": 1.0, "rewards/chosen": -1.221178650856018, "rewards/margins": 4.39111328125, "rewards/rejected": -5.612292289733887, "step": 1315 }, { "epoch": 2.11, "learning_rate": 4.2518826793499803e-07, "logits/chosen": -1.6569608449935913, "logits/rejected": -1.7023682594299316, "logps/chosen": -75.20372009277344, "logps/rejected": -135.3825225830078, "loss": 0.1672, "rewards/accuracies": 1.0, "rewards/chosen": -0.04155281186103821, "rewards/margins": 5.630866527557373, "rewards/rejected": -5.672419548034668, "step": 1316 }, { "epoch": 2.11, "learning_rate": 4.2508917954815694e-07, "logits/chosen": -1.6912086009979248, "logits/rejected": -1.6092019081115723, "logps/chosen": -122.26627349853516, "logps/rejected": -125.98919677734375, "loss": 0.1749, "rewards/accuracies": 1.0, "rewards/chosen": -0.9559603929519653, "rewards/margins": 5.213711261749268, "rewards/rejected": -6.169672012329102, "step": 1317 }, { "epoch": 2.12, "learning_rate": 4.249900911613159e-07, "logits/chosen": -1.5831563472747803, "logits/rejected": -1.6024799346923828, "logps/chosen": -128.5894775390625, "logps/rejected": -148.31863403320312, "loss": 0.2064, "rewards/accuracies": 1.0, "rewards/chosen": -1.634151816368103, "rewards/margins": 3.184436321258545, "rewards/rejected": -4.8185882568359375, "step": 1318 }, { "epoch": 2.12, "learning_rate": 4.248910027744748e-07, "logits/chosen": -1.5459542274475098, "logits/rejected": -1.5303688049316406, "logps/chosen": -107.01530456542969, "logps/rejected": -172.8377685546875, "loss": 0.2366, "rewards/accuracies": 1.0, "rewards/chosen": -1.4035950899124146, "rewards/margins": 4.822530746459961, "rewards/rejected": -6.226125240325928, "step": 1319 }, { "epoch": 2.12, "learning_rate": 4.247919143876337e-07, "logits/chosen": -1.5114527940750122, "logits/rejected": -1.5106487274169922, "logps/chosen": -113.87954711914062, "logps/rejected": -150.77975463867188, "loss": 0.113, "rewards/accuracies": 1.0, "rewards/chosen": -1.2779321670532227, "rewards/margins": 4.351925849914551, "rewards/rejected": -5.629858016967773, "step": 1320 }, { "epoch": 2.12, "learning_rate": 4.246928260007927e-07, "logits/chosen": -1.5606937408447266, "logits/rejected": -1.5328292846679688, "logps/chosen": -108.00250244140625, "logps/rejected": -135.02781677246094, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -1.3235188722610474, "rewards/margins": 2.9059031009674072, "rewards/rejected": -4.229422092437744, "step": 1321 }, { "epoch": 2.12, "learning_rate": 4.2459373761395163e-07, "logits/chosen": -1.5690838098526, "logits/rejected": -1.5560059547424316, "logps/chosen": -114.4980697631836, "logps/rejected": -148.616943359375, "loss": 0.0875, "rewards/accuracies": 1.0, "rewards/chosen": -0.988562822341919, "rewards/margins": 3.3928020000457764, "rewards/rejected": -4.381364822387695, "step": 1322 }, { "epoch": 2.12, "learning_rate": 4.244946492271106e-07, "logits/chosen": -1.6794424057006836, "logits/rejected": -1.6560338735580444, "logps/chosen": -98.9140853881836, "logps/rejected": -128.63052368164062, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": -1.119200348854065, "rewards/margins": 3.031686305999756, "rewards/rejected": -4.150886535644531, "step": 1323 }, { "epoch": 2.13, "learning_rate": 4.243955608402695e-07, "logits/chosen": -1.6823468208312988, "logits/rejected": -1.6544523239135742, "logps/chosen": -70.92399597167969, "logps/rejected": -110.52389526367188, "loss": 0.1474, "rewards/accuracies": 1.0, "rewards/chosen": -0.7853749394416809, "rewards/margins": 3.3389179706573486, "rewards/rejected": -4.124292373657227, "step": 1324 }, { "epoch": 2.13, "learning_rate": 4.242964724534284e-07, "logits/chosen": -1.5590219497680664, "logits/rejected": -1.5179688930511475, "logps/chosen": -74.6555404663086, "logps/rejected": -100.9283447265625, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": -0.44475603103637695, "rewards/margins": 2.5158450603485107, "rewards/rejected": -2.9606008529663086, "step": 1325 }, { "epoch": 2.13, "learning_rate": 4.241973840665874e-07, "logits/chosen": -1.4580512046813965, "logits/rejected": -1.4559087753295898, "logps/chosen": -101.34197235107422, "logps/rejected": -115.62162780761719, "loss": 0.1282, "rewards/accuracies": 0.5, "rewards/chosen": -1.6387038230895996, "rewards/margins": 1.962522029876709, "rewards/rejected": -3.6012260913848877, "step": 1326 }, { "epoch": 2.13, "learning_rate": 4.240982956797463e-07, "logits/chosen": -1.5342864990234375, "logits/rejected": -1.4762542247772217, "logps/chosen": -111.10257720947266, "logps/rejected": -158.4173583984375, "loss": 0.1687, "rewards/accuracies": 1.0, "rewards/chosen": -1.4809216260910034, "rewards/margins": 6.160635471343994, "rewards/rejected": -7.641557216644287, "step": 1327 }, { "epoch": 2.13, "learning_rate": 4.2399920729290523e-07, "logits/chosen": -1.7026541233062744, "logits/rejected": -1.7147884368896484, "logps/chosen": -93.839599609375, "logps/rejected": -153.12818908691406, "loss": 0.1134, "rewards/accuracies": 1.0, "rewards/chosen": -0.7958133816719055, "rewards/margins": 6.794947147369385, "rewards/rejected": -7.590760231018066, "step": 1328 }, { "epoch": 2.13, "learning_rate": 4.239001189060642e-07, "logits/chosen": -1.5107717514038086, "logits/rejected": -1.4952237606048584, "logps/chosen": -110.3841323852539, "logps/rejected": -170.31298828125, "loss": 0.1944, "rewards/accuracies": 1.0, "rewards/chosen": -1.7613985538482666, "rewards/margins": 4.644292831420898, "rewards/rejected": -6.405691623687744, "step": 1329 }, { "epoch": 2.13, "learning_rate": 4.238010305192231e-07, "logits/chosen": -1.5817116498947144, "logits/rejected": -1.4830749034881592, "logps/chosen": -73.76655578613281, "logps/rejected": -111.94356536865234, "loss": 0.0671, "rewards/accuracies": 1.0, "rewards/chosen": 0.06894490122795105, "rewards/margins": 5.265876293182373, "rewards/rejected": -5.1969313621521, "step": 1330 }, { "epoch": 2.14, "learning_rate": 4.237019421323821e-07, "logits/chosen": -1.5164767503738403, "logits/rejected": -1.6011509895324707, "logps/chosen": -93.28781127929688, "logps/rejected": -165.05960083007812, "loss": 0.1573, "rewards/accuracies": 1.0, "rewards/chosen": -0.7919735312461853, "rewards/margins": 6.57158899307251, "rewards/rejected": -7.36356258392334, "step": 1331 }, { "epoch": 2.14, "learning_rate": 4.23602853745541e-07, "logits/chosen": -1.6472721099853516, "logits/rejected": -1.6341397762298584, "logps/chosen": -108.298583984375, "logps/rejected": -110.4988021850586, "loss": 0.2449, "rewards/accuracies": 0.5, "rewards/chosen": -1.889204978942871, "rewards/margins": 0.37306874990463257, "rewards/rejected": -2.2622737884521484, "step": 1332 }, { "epoch": 2.14, "learning_rate": 4.235037653586999e-07, "logits/chosen": -1.5981478691101074, "logits/rejected": -1.6397006511688232, "logps/chosen": -93.00361633300781, "logps/rejected": -153.46026611328125, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": -0.598907470703125, "rewards/margins": 5.169729232788086, "rewards/rejected": -5.768636703491211, "step": 1333 }, { "epoch": 2.14, "learning_rate": 4.234046769718589e-07, "logits/chosen": -1.6387025117874146, "logits/rejected": -1.5589203834533691, "logps/chosen": -100.74415588378906, "logps/rejected": -129.70848083496094, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": -0.7530797719955444, "rewards/margins": 4.149608135223389, "rewards/rejected": -4.902688026428223, "step": 1334 }, { "epoch": 2.14, "learning_rate": 4.233055885850178e-07, "logits/chosen": -1.68950617313385, "logits/rejected": -1.6537532806396484, "logps/chosen": -87.95309448242188, "logps/rejected": -136.2963409423828, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": -0.2102571427822113, "rewards/margins": 6.490574359893799, "rewards/rejected": -6.700831413269043, "step": 1335 }, { "epoch": 2.14, "learning_rate": 4.2320650019817675e-07, "logits/chosen": -1.5810567140579224, "logits/rejected": -1.687589168548584, "logps/chosen": -80.44093322753906, "logps/rejected": -113.61875915527344, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": -0.49664080142974854, "rewards/margins": 3.9355032444000244, "rewards/rejected": -4.4321441650390625, "step": 1336 }, { "epoch": 2.15, "learning_rate": 4.231074118113357e-07, "logits/chosen": -1.6273839473724365, "logits/rejected": -1.614226222038269, "logps/chosen": -84.90538024902344, "logps/rejected": -124.92545318603516, "loss": 0.0709, "rewards/accuracies": 1.0, "rewards/chosen": 0.009976577013731003, "rewards/margins": 4.270120143890381, "rewards/rejected": -4.260143280029297, "step": 1337 }, { "epoch": 2.15, "learning_rate": 4.230083234244946e-07, "logits/chosen": -1.5504019260406494, "logits/rejected": -1.4655872583389282, "logps/chosen": -113.11661529541016, "logps/rejected": -151.08946228027344, "loss": 0.2112, "rewards/accuracies": 1.0, "rewards/chosen": -1.1648348569869995, "rewards/margins": 3.0206949710845947, "rewards/rejected": -4.185529708862305, "step": 1338 }, { "epoch": 2.15, "learning_rate": 4.229092350376536e-07, "logits/chosen": -1.428726077079773, "logits/rejected": -1.3749570846557617, "logps/chosen": -88.1656265258789, "logps/rejected": -121.31806945800781, "loss": 0.1139, "rewards/accuracies": 1.0, "rewards/chosen": -0.4561257064342499, "rewards/margins": 3.113236427307129, "rewards/rejected": -3.569362163543701, "step": 1339 }, { "epoch": 2.15, "learning_rate": 4.228101466508125e-07, "logits/chosen": -1.45125412940979, "logits/rejected": -1.37581467628479, "logps/chosen": -117.9089584350586, "logps/rejected": -139.68336486816406, "loss": 0.1857, "rewards/accuracies": 1.0, "rewards/chosen": -1.8905314207077026, "rewards/margins": 3.9694597721099854, "rewards/rejected": -5.859991073608398, "step": 1340 }, { "epoch": 2.15, "learning_rate": 4.2271105826397144e-07, "logits/chosen": -1.6286014318466187, "logits/rejected": -1.7025301456451416, "logps/chosen": -84.77810668945312, "logps/rejected": -163.83836364746094, "loss": 0.1563, "rewards/accuracies": 1.0, "rewards/chosen": -0.2818801999092102, "rewards/margins": 7.251411437988281, "rewards/rejected": -7.533291816711426, "step": 1341 }, { "epoch": 2.15, "learning_rate": 4.226119698771304e-07, "logits/chosen": -1.5373176336288452, "logits/rejected": -1.6316320896148682, "logps/chosen": -100.63174438476562, "logps/rejected": -122.50738525390625, "loss": 0.1647, "rewards/accuracies": 1.0, "rewards/chosen": -1.4930139780044556, "rewards/margins": 1.776943564414978, "rewards/rejected": -3.2699577808380127, "step": 1342 }, { "epoch": 2.16, "learning_rate": 4.225128814902893e-07, "logits/chosen": -1.5572986602783203, "logits/rejected": -1.530056118965149, "logps/chosen": -115.38034057617188, "logps/rejected": -121.03761291503906, "loss": 0.1052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1698150634765625, "rewards/margins": 2.0185537338256836, "rewards/rejected": -3.188368797302246, "step": 1343 }, { "epoch": 2.16, "learning_rate": 4.2241379310344827e-07, "logits/chosen": -1.6170669794082642, "logits/rejected": -1.69415283203125, "logps/chosen": -69.53729248046875, "logps/rejected": -144.02574157714844, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -0.21852093935012817, "rewards/margins": 4.8164896965026855, "rewards/rejected": -5.035010814666748, "step": 1344 }, { "epoch": 2.16, "learning_rate": 4.223147047166072e-07, "logits/chosen": -1.5123857259750366, "logits/rejected": -1.5019184350967407, "logps/chosen": -111.23906707763672, "logps/rejected": -149.93643188476562, "loss": 0.2242, "rewards/accuracies": 1.0, "rewards/chosen": -0.6050783395767212, "rewards/margins": 5.282061576843262, "rewards/rejected": -5.887139797210693, "step": 1345 }, { "epoch": 2.16, "learning_rate": 4.2221561632976614e-07, "logits/chosen": -1.6199452877044678, "logits/rejected": -1.6723341941833496, "logps/chosen": -66.60102844238281, "logps/rejected": -121.03868865966797, "loss": 0.1255, "rewards/accuracies": 1.0, "rewards/chosen": 0.14879541099071503, "rewards/margins": 4.57734489440918, "rewards/rejected": -4.428549289703369, "step": 1346 }, { "epoch": 2.16, "learning_rate": 4.221165279429251e-07, "logits/chosen": -1.4633084535598755, "logits/rejected": -1.52935791015625, "logps/chosen": -99.8571548461914, "logps/rejected": -134.326416015625, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": -0.8046347498893738, "rewards/margins": 2.899245500564575, "rewards/rejected": -3.7038803100585938, "step": 1347 }, { "epoch": 2.16, "learning_rate": 4.22017439556084e-07, "logits/chosen": -1.646421194076538, "logits/rejected": -1.5971318483352661, "logps/chosen": -98.25833892822266, "logps/rejected": -145.51687622070312, "loss": 0.077, "rewards/accuracies": 0.75, "rewards/chosen": -0.6878421902656555, "rewards/margins": 5.739898681640625, "rewards/rejected": -6.427741050720215, "step": 1348 }, { "epoch": 2.17, "learning_rate": 4.2191835116924296e-07, "logits/chosen": -1.601574182510376, "logits/rejected": -1.6203757524490356, "logps/chosen": -94.61051940917969, "logps/rejected": -190.1167755126953, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -1.6150786876678467, "rewards/margins": 7.861876487731934, "rewards/rejected": -9.47695541381836, "step": 1349 }, { "epoch": 2.17, "learning_rate": 4.2181926278240187e-07, "logits/chosen": -1.778205156326294, "logits/rejected": -1.7022258043289185, "logps/chosen": -102.38400268554688, "logps/rejected": -130.54403686523438, "loss": 0.0751, "rewards/accuracies": 1.0, "rewards/chosen": -1.063835620880127, "rewards/margins": 4.951713562011719, "rewards/rejected": -6.0155487060546875, "step": 1350 }, { "epoch": 2.17, "learning_rate": 4.2172017439556083e-07, "logits/chosen": -1.4367897510528564, "logits/rejected": -1.4633172750473022, "logps/chosen": -111.20235443115234, "logps/rejected": -157.0748291015625, "loss": 0.1627, "rewards/accuracies": 1.0, "rewards/chosen": -1.6279869079589844, "rewards/margins": 4.826987266540527, "rewards/rejected": -6.454974174499512, "step": 1351 }, { "epoch": 2.17, "learning_rate": 4.2162108600871974e-07, "logits/chosen": -1.7334613800048828, "logits/rejected": -1.5678167343139648, "logps/chosen": -114.53622436523438, "logps/rejected": -148.06655883789062, "loss": 0.1574, "rewards/accuracies": 1.0, "rewards/chosen": -0.9566946625709534, "rewards/margins": 3.5877747535705566, "rewards/rejected": -4.544469833374023, "step": 1352 }, { "epoch": 2.17, "learning_rate": 4.215219976218787e-07, "logits/chosen": -1.3929617404937744, "logits/rejected": -1.3696813583374023, "logps/chosen": -90.74357604980469, "logps/rejected": -132.06875610351562, "loss": 0.2188, "rewards/accuracies": 0.75, "rewards/chosen": -1.7109330892562866, "rewards/margins": 2.5364789962768555, "rewards/rejected": -4.247411727905273, "step": 1353 }, { "epoch": 2.17, "learning_rate": 4.2142290923503766e-07, "logits/chosen": -1.6520874500274658, "logits/rejected": -1.5476669073104858, "logps/chosen": -93.54718017578125, "logps/rejected": -111.56974792480469, "loss": 0.1573, "rewards/accuracies": 0.75, "rewards/chosen": -1.2347793579101562, "rewards/margins": 3.2729408740997314, "rewards/rejected": -4.507720470428467, "step": 1354 }, { "epoch": 2.17, "learning_rate": 4.2132382084819656e-07, "logits/chosen": -1.7001110315322876, "logits/rejected": -1.683689832687378, "logps/chosen": -130.0028076171875, "logps/rejected": -143.41941833496094, "loss": 0.1303, "rewards/accuracies": 1.0, "rewards/chosen": -2.154772996902466, "rewards/margins": 2.82552433013916, "rewards/rejected": -4.980297088623047, "step": 1355 }, { "epoch": 2.18, "learning_rate": 4.212247324613555e-07, "logits/chosen": -1.6617591381072998, "logits/rejected": -1.6486166715621948, "logps/chosen": -69.67792510986328, "logps/rejected": -133.50640869140625, "loss": 0.0995, "rewards/accuracies": 1.0, "rewards/chosen": -0.32201167941093445, "rewards/margins": 5.735640048980713, "rewards/rejected": -6.057651519775391, "step": 1356 }, { "epoch": 2.18, "learning_rate": 4.2112564407451443e-07, "logits/chosen": -1.4548852443695068, "logits/rejected": -1.4365870952606201, "logps/chosen": -93.72889709472656, "logps/rejected": -135.6934051513672, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -1.0648747682571411, "rewards/margins": 4.195525646209717, "rewards/rejected": -5.260400772094727, "step": 1357 }, { "epoch": 2.18, "learning_rate": 4.210265556876734e-07, "logits/chosen": -1.4290966987609863, "logits/rejected": -1.601818323135376, "logps/chosen": -83.42930603027344, "logps/rejected": -147.025146484375, "loss": 0.1952, "rewards/accuracies": 1.0, "rewards/chosen": -0.8801031112670898, "rewards/margins": 3.3791656494140625, "rewards/rejected": -4.259268760681152, "step": 1358 }, { "epoch": 2.18, "learning_rate": 4.2092746730083235e-07, "logits/chosen": -1.6270151138305664, "logits/rejected": -1.6147451400756836, "logps/chosen": -107.90476989746094, "logps/rejected": -140.41925048828125, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": -1.2110953330993652, "rewards/margins": 2.666970729827881, "rewards/rejected": -3.878066062927246, "step": 1359 }, { "epoch": 2.18, "learning_rate": 4.2082837891399126e-07, "logits/chosen": -1.4868501424789429, "logits/rejected": -1.4638943672180176, "logps/chosen": -117.21194458007812, "logps/rejected": -138.11985778808594, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": -1.6306129693984985, "rewards/margins": 4.488436698913574, "rewards/rejected": -6.119049549102783, "step": 1360 }, { "epoch": 2.18, "learning_rate": 4.2072929052715016e-07, "logits/chosen": -1.581730842590332, "logits/rejected": -1.6036536693572998, "logps/chosen": -82.90255737304688, "logps/rejected": -142.49960327148438, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": -0.32142436504364014, "rewards/margins": 5.161440849304199, "rewards/rejected": -5.482865333557129, "step": 1361 }, { "epoch": 2.19, "learning_rate": 4.206302021403091e-07, "logits/chosen": -1.6582212448120117, "logits/rejected": -1.734370470046997, "logps/chosen": -98.61082458496094, "logps/rejected": -156.35775756835938, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": -0.8819622993469238, "rewards/margins": 3.191786050796509, "rewards/rejected": -4.0737481117248535, "step": 1362 }, { "epoch": 2.19, "learning_rate": 4.205311137534681e-07, "logits/chosen": -1.631563425064087, "logits/rejected": -1.652240514755249, "logps/chosen": -97.85725402832031, "logps/rejected": -141.07749938964844, "loss": 0.0752, "rewards/accuracies": 1.0, "rewards/chosen": -0.8314801454544067, "rewards/margins": 4.55560302734375, "rewards/rejected": -5.387083053588867, "step": 1363 }, { "epoch": 2.19, "learning_rate": 4.2043202536662704e-07, "logits/chosen": -1.7441623210906982, "logits/rejected": -1.7486480474472046, "logps/chosen": -106.75997161865234, "logps/rejected": -176.14306640625, "loss": 0.2211, "rewards/accuracies": 1.0, "rewards/chosen": -2.407038927078247, "rewards/margins": 5.091558456420898, "rewards/rejected": -7.498597145080566, "step": 1364 }, { "epoch": 2.19, "learning_rate": 4.2033293697978595e-07, "logits/chosen": -1.6614198684692383, "logits/rejected": -1.6448270082473755, "logps/chosen": -105.43278503417969, "logps/rejected": -165.24612426757812, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": -1.2480764389038086, "rewards/margins": 6.223727226257324, "rewards/rejected": -7.471804141998291, "step": 1365 }, { "epoch": 2.19, "learning_rate": 4.2023384859294486e-07, "logits/chosen": -1.512107253074646, "logits/rejected": -1.6087093353271484, "logps/chosen": -89.76960754394531, "logps/rejected": -187.8627166748047, "loss": 0.2553, "rewards/accuracies": 1.0, "rewards/chosen": -1.3199329376220703, "rewards/margins": 6.251929759979248, "rewards/rejected": -7.57186222076416, "step": 1366 }, { "epoch": 2.19, "learning_rate": 4.201347602061038e-07, "logits/chosen": -1.4802409410476685, "logits/rejected": -1.4524266719818115, "logps/chosen": -125.40880584716797, "logps/rejected": -145.90513610839844, "loss": 0.1283, "rewards/accuracies": 1.0, "rewards/chosen": -1.538398265838623, "rewards/margins": 3.8400521278381348, "rewards/rejected": -5.378450870513916, "step": 1367 }, { "epoch": 2.2, "learning_rate": 4.200356718192628e-07, "logits/chosen": -1.8189306259155273, "logits/rejected": -1.862351417541504, "logps/chosen": -96.68977355957031, "logps/rejected": -121.31135559082031, "loss": 0.1026, "rewards/accuracies": 0.75, "rewards/chosen": -1.1385712623596191, "rewards/margins": 1.94465970993042, "rewards/rejected": -3.083230972290039, "step": 1368 }, { "epoch": 2.2, "learning_rate": 4.1993658343242173e-07, "logits/chosen": -1.5251394510269165, "logits/rejected": -1.5063621997833252, "logps/chosen": -111.1533203125, "logps/rejected": -132.92153930664062, "loss": 0.1697, "rewards/accuracies": 1.0, "rewards/chosen": -2.482330799102783, "rewards/margins": 2.9730148315429688, "rewards/rejected": -5.455345630645752, "step": 1369 }, { "epoch": 2.2, "learning_rate": 4.1983749504558064e-07, "logits/chosen": -1.7229578495025635, "logits/rejected": -1.7039639949798584, "logps/chosen": -87.02064514160156, "logps/rejected": -140.60501098632812, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -1.7793865203857422, "rewards/margins": 4.140188217163086, "rewards/rejected": -5.919575214385986, "step": 1370 }, { "epoch": 2.2, "learning_rate": 4.1973840665873955e-07, "logits/chosen": -1.6554880142211914, "logits/rejected": -1.6555004119873047, "logps/chosen": -94.99942016601562, "logps/rejected": -143.53350830078125, "loss": 0.1106, "rewards/accuracies": 0.75, "rewards/chosen": -1.3716769218444824, "rewards/margins": 4.968088150024414, "rewards/rejected": -6.339764595031738, "step": 1371 }, { "epoch": 2.2, "learning_rate": 4.196393182718985e-07, "logits/chosen": -1.681357979774475, "logits/rejected": -1.7014377117156982, "logps/chosen": -99.43440246582031, "logps/rejected": -140.5343780517578, "loss": 0.1715, "rewards/accuracies": 0.75, "rewards/chosen": -1.3499858379364014, "rewards/margins": 3.8148181438446045, "rewards/rejected": -5.164804458618164, "step": 1372 }, { "epoch": 2.2, "learning_rate": 4.195402298850574e-07, "logits/chosen": -1.665648341178894, "logits/rejected": -1.7325962781906128, "logps/chosen": -102.22709655761719, "logps/rejected": -161.62277221679688, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -2.0520827770233154, "rewards/margins": 4.549915313720703, "rewards/rejected": -6.601997375488281, "step": 1373 }, { "epoch": 2.21, "learning_rate": 4.1944114149821643e-07, "logits/chosen": -1.755373477935791, "logits/rejected": -1.6984219551086426, "logps/chosen": -102.77658081054688, "logps/rejected": -141.46978759765625, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": -1.304837942123413, "rewards/margins": 4.596761703491211, "rewards/rejected": -5.901599884033203, "step": 1374 }, { "epoch": 2.21, "learning_rate": 4.1934205311137533e-07, "logits/chosen": -1.5537302494049072, "logits/rejected": -1.5209190845489502, "logps/chosen": -96.80744934082031, "logps/rejected": -136.14761352539062, "loss": 0.1563, "rewards/accuracies": 0.75, "rewards/chosen": -0.9905681610107422, "rewards/margins": 5.139268398284912, "rewards/rejected": -6.129836559295654, "step": 1375 }, { "epoch": 2.21, "learning_rate": 4.1924296472453424e-07, "logits/chosen": -1.5534892082214355, "logits/rejected": -1.5238069295883179, "logps/chosen": -94.27818298339844, "logps/rejected": -171.1336212158203, "loss": 0.2713, "rewards/accuracies": 1.0, "rewards/chosen": -0.9838035106658936, "rewards/margins": 8.661515235900879, "rewards/rejected": -9.645318984985352, "step": 1376 }, { "epoch": 2.21, "learning_rate": 4.191438763376932e-07, "logits/chosen": -1.57021963596344, "logits/rejected": -1.683388113975525, "logps/chosen": -84.00981140136719, "logps/rejected": -127.963134765625, "loss": 0.124, "rewards/accuracies": 1.0, "rewards/chosen": -1.3468647003173828, "rewards/margins": 3.8721721172332764, "rewards/rejected": -5.219037055969238, "step": 1377 }, { "epoch": 2.21, "learning_rate": 4.190447879508521e-07, "logits/chosen": -1.649670958518982, "logits/rejected": -1.554040789604187, "logps/chosen": -118.57820129394531, "logps/rejected": -154.29811096191406, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": -1.2263187170028687, "rewards/margins": 4.598365783691406, "rewards/rejected": -5.8246846199035645, "step": 1378 }, { "epoch": 2.21, "learning_rate": 4.189456995640111e-07, "logits/chosen": -1.5287781953811646, "logits/rejected": -1.5195878744125366, "logps/chosen": -78.37095642089844, "logps/rejected": -140.1475830078125, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -0.7970118522644043, "rewards/margins": 5.096306800842285, "rewards/rejected": -5.8933186531066895, "step": 1379 }, { "epoch": 2.22, "learning_rate": 4.1884661117717003e-07, "logits/chosen": -1.8367159366607666, "logits/rejected": -1.797780156135559, "logps/chosen": -96.09795379638672, "logps/rejected": -142.07186889648438, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": 0.1155853271484375, "rewards/margins": 4.8792266845703125, "rewards/rejected": -4.763641834259033, "step": 1380 }, { "epoch": 2.22, "learning_rate": 4.1874752279032893e-07, "logits/chosen": -1.683313250541687, "logits/rejected": -1.6235566139221191, "logps/chosen": -122.1700439453125, "logps/rejected": -152.3981475830078, "loss": 0.1572, "rewards/accuracies": 1.0, "rewards/chosen": -2.255657196044922, "rewards/margins": 3.869410514831543, "rewards/rejected": -6.125067710876465, "step": 1381 }, { "epoch": 2.22, "learning_rate": 4.186484344034879e-07, "logits/chosen": -1.5765355825424194, "logits/rejected": -1.6029249429702759, "logps/chosen": -75.38615417480469, "logps/rejected": -148.80088806152344, "loss": 0.169, "rewards/accuracies": 1.0, "rewards/chosen": -0.42776206135749817, "rewards/margins": 7.808515548706055, "rewards/rejected": -8.23627758026123, "step": 1382 }, { "epoch": 2.22, "learning_rate": 4.185493460166468e-07, "logits/chosen": -1.6555942296981812, "logits/rejected": -1.5520952939987183, "logps/chosen": -98.8029556274414, "logps/rejected": -117.80635833740234, "loss": 0.1483, "rewards/accuracies": 1.0, "rewards/chosen": -0.1686222106218338, "rewards/margins": 4.035799026489258, "rewards/rejected": -4.204421520233154, "step": 1383 }, { "epoch": 2.22, "learning_rate": 4.184502576298058e-07, "logits/chosen": -1.6027350425720215, "logits/rejected": -1.5766801834106445, "logps/chosen": -90.74070739746094, "logps/rejected": -147.1011962890625, "loss": 0.1556, "rewards/accuracies": 1.0, "rewards/chosen": -0.7096178531646729, "rewards/margins": 4.580148696899414, "rewards/rejected": -5.289766311645508, "step": 1384 }, { "epoch": 2.22, "learning_rate": 4.183511692429647e-07, "logits/chosen": -1.5872423648834229, "logits/rejected": -1.633470892906189, "logps/chosen": -100.7939224243164, "logps/rejected": -150.63735961914062, "loss": 0.1308, "rewards/accuracies": 1.0, "rewards/chosen": -2.1119298934936523, "rewards/margins": 3.558668613433838, "rewards/rejected": -5.67059850692749, "step": 1385 }, { "epoch": 2.22, "learning_rate": 4.1825208085612363e-07, "logits/chosen": -1.5905462503433228, "logits/rejected": -1.619890809059143, "logps/chosen": -100.66637420654297, "logps/rejected": -133.8612518310547, "loss": 0.1408, "rewards/accuracies": 0.75, "rewards/chosen": -1.5988945960998535, "rewards/margins": 3.444685220718384, "rewards/rejected": -5.043579578399658, "step": 1386 }, { "epoch": 2.23, "learning_rate": 4.181529924692826e-07, "logits/chosen": -1.5711575746536255, "logits/rejected": -1.5867186784744263, "logps/chosen": -121.06836700439453, "logps/rejected": -148.47393798828125, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -1.0618984699249268, "rewards/margins": 3.504014492034912, "rewards/rejected": -4.565913200378418, "step": 1387 }, { "epoch": 2.23, "learning_rate": 4.180539040824415e-07, "logits/chosen": -1.5405067205429077, "logits/rejected": -1.595107078552246, "logps/chosen": -107.74894714355469, "logps/rejected": -139.64031982421875, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": -1.4199810028076172, "rewards/margins": 4.260577201843262, "rewards/rejected": -5.680558204650879, "step": 1388 }, { "epoch": 2.23, "learning_rate": 4.179548156956005e-07, "logits/chosen": -1.633763074874878, "logits/rejected": -1.6057320833206177, "logps/chosen": -81.29264831542969, "logps/rejected": -134.66732788085938, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -1.0964432954788208, "rewards/margins": 5.743333339691162, "rewards/rejected": -6.839776515960693, "step": 1389 }, { "epoch": 2.23, "learning_rate": 4.178557273087594e-07, "logits/chosen": -1.6215623617172241, "logits/rejected": -1.693090558052063, "logps/chosen": -108.61741638183594, "logps/rejected": -176.4752655029297, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": -4.143627166748047, "rewards/margins": 5.481383800506592, "rewards/rejected": -9.625011444091797, "step": 1390 }, { "epoch": 2.23, "learning_rate": 4.177566389219183e-07, "logits/chosen": -1.7449121475219727, "logits/rejected": -1.7119545936584473, "logps/chosen": -89.17000579833984, "logps/rejected": -146.4475860595703, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": -1.7062932252883911, "rewards/margins": 5.982682228088379, "rewards/rejected": -7.6889753341674805, "step": 1391 }, { "epoch": 2.23, "learning_rate": 4.176575505350773e-07, "logits/chosen": -1.6247495412826538, "logits/rejected": -1.6377195119857788, "logps/chosen": -101.07286071777344, "logps/rejected": -139.48452758789062, "loss": 0.2497, "rewards/accuracies": 1.0, "rewards/chosen": -1.5493354797363281, "rewards/margins": 3.958782911300659, "rewards/rejected": -5.508118629455566, "step": 1392 }, { "epoch": 2.24, "learning_rate": 4.175584621482362e-07, "logits/chosen": -1.4251034259796143, "logits/rejected": -1.4867981672286987, "logps/chosen": -95.80135345458984, "logps/rejected": -145.24655151367188, "loss": 0.1444, "rewards/accuracies": 0.75, "rewards/chosen": -2.892636299133301, "rewards/margins": 3.496647357940674, "rewards/rejected": -6.389283657073975, "step": 1393 }, { "epoch": 2.24, "learning_rate": 4.174593737613952e-07, "logits/chosen": -1.5205965042114258, "logits/rejected": -1.600287914276123, "logps/chosen": -97.33863830566406, "logps/rejected": -143.8543243408203, "loss": 0.1227, "rewards/accuracies": 0.75, "rewards/chosen": -1.9116039276123047, "rewards/margins": 3.9765703678131104, "rewards/rejected": -5.888174057006836, "step": 1394 }, { "epoch": 2.24, "learning_rate": 4.173602853745541e-07, "logits/chosen": -1.4192677736282349, "logits/rejected": -1.4076875448226929, "logps/chosen": -91.93575286865234, "logps/rejected": -130.5423126220703, "loss": 0.1014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1891921758651733, "rewards/margins": 3.532787322998047, "rewards/rejected": -4.72197961807251, "step": 1395 }, { "epoch": 2.24, "learning_rate": 4.17261196987713e-07, "logits/chosen": -1.5313304662704468, "logits/rejected": -1.5571908950805664, "logps/chosen": -95.07061767578125, "logps/rejected": -134.188232421875, "loss": 0.0989, "rewards/accuracies": 0.75, "rewards/chosen": -2.5752620697021484, "rewards/margins": 3.2661354541778564, "rewards/rejected": -5.841397285461426, "step": 1396 }, { "epoch": 2.24, "learning_rate": 4.1716210860087197e-07, "logits/chosen": -1.7214677333831787, "logits/rejected": -1.8196989297866821, "logps/chosen": -109.36639404296875, "logps/rejected": -157.03326416015625, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -1.008872628211975, "rewards/margins": 3.504770040512085, "rewards/rejected": -4.51364278793335, "step": 1397 }, { "epoch": 2.24, "learning_rate": 4.170630202140309e-07, "logits/chosen": -1.5224758386611938, "logits/rejected": -1.5698275566101074, "logps/chosen": -61.47170639038086, "logps/rejected": -197.53671264648438, "loss": 0.1511, "rewards/accuracies": 1.0, "rewards/chosen": -0.180962473154068, "rewards/margins": 9.377080917358398, "rewards/rejected": -9.558042526245117, "step": 1398 }, { "epoch": 2.25, "learning_rate": 4.169639318271898e-07, "logits/chosen": -1.7194334268569946, "logits/rejected": -1.5725449323654175, "logps/chosen": -115.42164611816406, "logps/rejected": -126.91925048828125, "loss": 0.1765, "rewards/accuracies": 1.0, "rewards/chosen": -0.8592162728309631, "rewards/margins": 4.355167388916016, "rewards/rejected": -5.214383602142334, "step": 1399 }, { "epoch": 2.25, "learning_rate": 4.168648434403488e-07, "logits/chosen": -1.621013879776001, "logits/rejected": -1.6337149143218994, "logps/chosen": -110.72764587402344, "logps/rejected": -118.30073547363281, "loss": 0.1178, "rewards/accuracies": 0.75, "rewards/chosen": -2.193861961364746, "rewards/margins": 1.8688832521438599, "rewards/rejected": -4.062745094299316, "step": 1400 }, { "epoch": 2.25, "learning_rate": 4.167657550535077e-07, "logits/chosen": -1.5210278034210205, "logits/rejected": -1.598259687423706, "logps/chosen": -100.45932006835938, "logps/rejected": -135.28759765625, "loss": 0.1723, "rewards/accuracies": 1.0, "rewards/chosen": -1.1652535200119019, "rewards/margins": 3.519408941268921, "rewards/rejected": -4.684662342071533, "step": 1401 }, { "epoch": 2.25, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -1.7496660947799683, "logits/rejected": -1.799971342086792, "logps/chosen": -97.34063720703125, "logps/rejected": -124.48173522949219, "loss": 0.1302, "rewards/accuracies": 0.75, "rewards/chosen": -0.8495504856109619, "rewards/margins": 1.5857621431350708, "rewards/rejected": -2.435312509536743, "step": 1402 }, { "epoch": 2.25, "learning_rate": 4.1656757827982557e-07, "logits/chosen": -1.4571690559387207, "logits/rejected": -1.4530446529388428, "logps/chosen": -84.40528869628906, "logps/rejected": -134.15277099609375, "loss": 0.1221, "rewards/accuracies": 1.0, "rewards/chosen": -0.4465563893318176, "rewards/margins": 5.36118745803833, "rewards/rejected": -5.807743549346924, "step": 1403 }, { "epoch": 2.25, "learning_rate": 4.164684898929845e-07, "logits/chosen": -1.5063141584396362, "logits/rejected": -1.5741297006607056, "logps/chosen": -84.13290405273438, "logps/rejected": -156.12757873535156, "loss": 0.1507, "rewards/accuracies": 1.0, "rewards/chosen": -0.5198636054992676, "rewards/margins": 5.057949542999268, "rewards/rejected": -5.577813148498535, "step": 1404 }, { "epoch": 2.26, "learning_rate": 4.163694015061435e-07, "logits/chosen": -1.695873498916626, "logits/rejected": -1.6493498086929321, "logps/chosen": -114.68113708496094, "logps/rejected": -139.91079711914062, "loss": 0.1752, "rewards/accuracies": 1.0, "rewards/chosen": -0.12272852659225464, "rewards/margins": 5.852752685546875, "rewards/rejected": -5.975481033325195, "step": 1405 }, { "epoch": 2.26, "learning_rate": 4.162703131193024e-07, "logits/chosen": -1.5643233060836792, "logits/rejected": -1.5608785152435303, "logps/chosen": -88.06491088867188, "logps/rejected": -111.77442932128906, "loss": 0.073, "rewards/accuracies": 0.75, "rewards/chosen": -1.3164451122283936, "rewards/margins": 2.7446956634521484, "rewards/rejected": -4.061140537261963, "step": 1406 }, { "epoch": 2.26, "learning_rate": 4.1617122473246136e-07, "logits/chosen": -1.6841155290603638, "logits/rejected": -1.6822530031204224, "logps/chosen": -109.94157409667969, "logps/rejected": -145.05624389648438, "loss": 0.1478, "rewards/accuracies": 1.0, "rewards/chosen": -1.2020177841186523, "rewards/margins": 4.178867340087891, "rewards/rejected": -5.380885124206543, "step": 1407 }, { "epoch": 2.26, "learning_rate": 4.1607213634562027e-07, "logits/chosen": -1.532741665840149, "logits/rejected": -1.4998644590377808, "logps/chosen": -126.99593353271484, "logps/rejected": -160.82803344726562, "loss": 0.1312, "rewards/accuracies": 1.0, "rewards/chosen": -2.1753928661346436, "rewards/margins": 4.831905364990234, "rewards/rejected": -7.007298469543457, "step": 1408 }, { "epoch": 2.26, "learning_rate": 4.1597304795877917e-07, "logits/chosen": -1.7240633964538574, "logits/rejected": -1.694566011428833, "logps/chosen": -105.62854766845703, "logps/rejected": -159.24432373046875, "loss": 0.1686, "rewards/accuracies": 0.75, "rewards/chosen": -2.0441250801086426, "rewards/margins": 3.712026834487915, "rewards/rejected": -5.756152153015137, "step": 1409 }, { "epoch": 2.26, "learning_rate": 4.158739595719382e-07, "logits/chosen": -1.665038824081421, "logits/rejected": -1.6948949098587036, "logps/chosen": -113.11761474609375, "logps/rejected": -156.4816131591797, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": -2.344834327697754, "rewards/margins": 3.006448268890381, "rewards/rejected": -5.351282119750977, "step": 1410 }, { "epoch": 2.26, "learning_rate": 4.157748711850971e-07, "logits/chosen": -1.5335407257080078, "logits/rejected": -1.5148626565933228, "logps/chosen": -99.43252563476562, "logps/rejected": -125.01875305175781, "loss": 0.1707, "rewards/accuracies": 1.0, "rewards/chosen": -0.4898495376110077, "rewards/margins": 3.6444034576416016, "rewards/rejected": -4.134253025054932, "step": 1411 }, { "epoch": 2.27, "learning_rate": 4.1567578279825605e-07, "logits/chosen": -1.4480806589126587, "logits/rejected": -1.53104829788208, "logps/chosen": -94.06268310546875, "logps/rejected": -155.015380859375, "loss": 0.2428, "rewards/accuracies": 1.0, "rewards/chosen": -1.587809443473816, "rewards/margins": 5.697615623474121, "rewards/rejected": -7.285425186157227, "step": 1412 }, { "epoch": 2.27, "learning_rate": 4.1557669441141496e-07, "logits/chosen": -1.770002841949463, "logits/rejected": -1.7534785270690918, "logps/chosen": -83.01763916015625, "logps/rejected": -132.87599182128906, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": -0.35075482726097107, "rewards/margins": 3.57771372795105, "rewards/rejected": -3.928468704223633, "step": 1413 }, { "epoch": 2.27, "learning_rate": 4.1547760602457387e-07, "logits/chosen": -1.7614374160766602, "logits/rejected": -1.7426267862319946, "logps/chosen": -102.25765991210938, "logps/rejected": -160.68905639648438, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -0.43564948439598083, "rewards/margins": 5.448760032653809, "rewards/rejected": -5.8844099044799805, "step": 1414 }, { "epoch": 2.27, "learning_rate": 4.153785176377328e-07, "logits/chosen": -1.6535499095916748, "logits/rejected": -1.721013069152832, "logps/chosen": -108.98887634277344, "logps/rejected": -138.59413146972656, "loss": 0.1496, "rewards/accuracies": 0.75, "rewards/chosen": -2.02699875831604, "rewards/margins": 1.3645784854888916, "rewards/rejected": -3.3915772438049316, "step": 1415 }, { "epoch": 2.27, "learning_rate": 4.152794292508918e-07, "logits/chosen": -1.5745267868041992, "logits/rejected": -1.6279399394989014, "logps/chosen": -72.76484680175781, "logps/rejected": -119.44187927246094, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": -0.4794832766056061, "rewards/margins": 4.1045002937316895, "rewards/rejected": -4.583983421325684, "step": 1416 }, { "epoch": 2.27, "learning_rate": 4.1518034086405074e-07, "logits/chosen": -1.5368263721466064, "logits/rejected": -1.414686679840088, "logps/chosen": -117.1923828125, "logps/rejected": -135.6180419921875, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": -0.29243677854537964, "rewards/margins": 3.7475361824035645, "rewards/rejected": -4.03997278213501, "step": 1417 }, { "epoch": 2.28, "learning_rate": 4.1508125247720965e-07, "logits/chosen": -1.74886953830719, "logits/rejected": -1.717078685760498, "logps/chosen": -87.514892578125, "logps/rejected": -124.32083129882812, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -1.277405858039856, "rewards/margins": 4.973484039306641, "rewards/rejected": -6.250889778137207, "step": 1418 }, { "epoch": 2.28, "learning_rate": 4.1498216409036856e-07, "logits/chosen": -1.7288612127304077, "logits/rejected": -1.70843505859375, "logps/chosen": -84.53071594238281, "logps/rejected": -153.1609649658203, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": -0.19075021147727966, "rewards/margins": 5.475516319274902, "rewards/rejected": -5.666266918182373, "step": 1419 }, { "epoch": 2.28, "learning_rate": 4.148830757035275e-07, "logits/chosen": -1.6198716163635254, "logits/rejected": -1.6274473667144775, "logps/chosen": -72.00983428955078, "logps/rejected": -164.80831909179688, "loss": 0.0832, "rewards/accuracies": 1.0, "rewards/chosen": -0.4486519694328308, "rewards/margins": 6.212048053741455, "rewards/rejected": -6.660699844360352, "step": 1420 }, { "epoch": 2.28, "learning_rate": 4.147839873166865e-07, "logits/chosen": -1.6145879030227661, "logits/rejected": -1.5420849323272705, "logps/chosen": -108.18470764160156, "logps/rejected": -138.5733642578125, "loss": 0.1778, "rewards/accuracies": 1.0, "rewards/chosen": -0.508213222026825, "rewards/margins": 3.697749137878418, "rewards/rejected": -4.205962657928467, "step": 1421 }, { "epoch": 2.28, "learning_rate": 4.1468489892984544e-07, "logits/chosen": -1.624552607536316, "logits/rejected": -1.5389845371246338, "logps/chosen": -90.34996795654297, "logps/rejected": -147.56124877929688, "loss": 0.1521, "rewards/accuracies": 0.75, "rewards/chosen": -1.3849163055419922, "rewards/margins": 1.8597416877746582, "rewards/rejected": -3.2446579933166504, "step": 1422 }, { "epoch": 2.28, "learning_rate": 4.1458581054300434e-07, "logits/chosen": -1.6913083791732788, "logits/rejected": -1.578614354133606, "logps/chosen": -95.01055908203125, "logps/rejected": -144.44972229003906, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": -0.9704793691635132, "rewards/margins": 3.2797422409057617, "rewards/rejected": -4.250221252441406, "step": 1423 }, { "epoch": 2.29, "learning_rate": 4.1448672215616325e-07, "logits/chosen": -1.4310641288757324, "logits/rejected": -1.529903531074524, "logps/chosen": -72.06104278564453, "logps/rejected": -167.68148803710938, "loss": 0.1686, "rewards/accuracies": 1.0, "rewards/chosen": -1.4669650793075562, "rewards/margins": 7.871921539306641, "rewards/rejected": -9.338886260986328, "step": 1424 }, { "epoch": 2.29, "learning_rate": 4.143876337693222e-07, "logits/chosen": -1.7280652523040771, "logits/rejected": -1.7508034706115723, "logps/chosen": -87.68287658691406, "logps/rejected": -149.13851928710938, "loss": 0.1065, "rewards/accuracies": 1.0, "rewards/chosen": -2.5115649700164795, "rewards/margins": 4.6422295570373535, "rewards/rejected": -7.153794765472412, "step": 1425 }, { "epoch": 2.29, "learning_rate": 4.1428854538248117e-07, "logits/chosen": -1.707735300064087, "logits/rejected": -1.7250267267227173, "logps/chosen": -108.06665802001953, "logps/rejected": -128.3871307373047, "loss": 0.1406, "rewards/accuracies": 1.0, "rewards/chosen": -1.103027582168579, "rewards/margins": 2.5704457759857178, "rewards/rejected": -3.673473358154297, "step": 1426 }, { "epoch": 2.29, "learning_rate": 4.1418945699564013e-07, "logits/chosen": -1.6493210792541504, "logits/rejected": -1.652307152748108, "logps/chosen": -84.65473175048828, "logps/rejected": -128.85260009765625, "loss": 0.081, "rewards/accuracies": 1.0, "rewards/chosen": -1.577272653579712, "rewards/margins": 4.614554405212402, "rewards/rejected": -6.191826820373535, "step": 1427 }, { "epoch": 2.29, "learning_rate": 4.1409036860879904e-07, "logits/chosen": -1.5915358066558838, "logits/rejected": -1.674373745918274, "logps/chosen": -88.54325866699219, "logps/rejected": -140.00729370117188, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": -2.864208221435547, "rewards/margins": 3.250596046447754, "rewards/rejected": -6.114804267883301, "step": 1428 }, { "epoch": 2.29, "learning_rate": 4.1399128022195794e-07, "logits/chosen": -1.512673020362854, "logits/rejected": -1.4942142963409424, "logps/chosen": -104.28711700439453, "logps/rejected": -181.19386291503906, "loss": 0.0973, "rewards/accuracies": 1.0, "rewards/chosen": -1.8862333297729492, "rewards/margins": 7.505236625671387, "rewards/rejected": -9.391469955444336, "step": 1429 }, { "epoch": 2.3, "learning_rate": 4.138921918351169e-07, "logits/chosen": -1.8132683038711548, "logits/rejected": -1.8515625, "logps/chosen": -108.30857849121094, "logps/rejected": -155.1065216064453, "loss": 0.1316, "rewards/accuracies": 1.0, "rewards/chosen": -1.947566032409668, "rewards/margins": 3.149091958999634, "rewards/rejected": -5.096657752990723, "step": 1430 }, { "epoch": 2.3, "learning_rate": 4.1379310344827586e-07, "logits/chosen": -1.6492600440979004, "logits/rejected": -1.6190731525421143, "logps/chosen": -119.80763244628906, "logps/rejected": -151.47560119628906, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": -1.244690179824829, "rewards/margins": 5.1907854080200195, "rewards/rejected": -6.435475826263428, "step": 1431 }, { "epoch": 2.3, "learning_rate": 4.1369401506143477e-07, "logits/chosen": -1.5136934518814087, "logits/rejected": -1.4396708011627197, "logps/chosen": -111.11389923095703, "logps/rejected": -162.59791564941406, "loss": 0.1467, "rewards/accuracies": 1.0, "rewards/chosen": -1.7716938257217407, "rewards/margins": 3.139373540878296, "rewards/rejected": -4.911067485809326, "step": 1432 }, { "epoch": 2.3, "learning_rate": 4.1359492667459373e-07, "logits/chosen": -1.5370800495147705, "logits/rejected": -1.489763617515564, "logps/chosen": -84.74977111816406, "logps/rejected": -163.06277465820312, "loss": 0.0929, "rewards/accuracies": 1.0, "rewards/chosen": -1.17057204246521, "rewards/margins": 5.720549583435059, "rewards/rejected": -6.8911213874816895, "step": 1433 }, { "epoch": 2.3, "learning_rate": 4.1349583828775264e-07, "logits/chosen": -1.6201831102371216, "logits/rejected": -1.6319034099578857, "logps/chosen": -86.96894836425781, "logps/rejected": -123.63758850097656, "loss": 0.1033, "rewards/accuracies": 0.75, "rewards/chosen": -1.7245900630950928, "rewards/margins": 4.917116165161133, "rewards/rejected": -6.641706466674805, "step": 1434 }, { "epoch": 2.3, "learning_rate": 4.133967499009116e-07, "logits/chosen": -1.5964241027832031, "logits/rejected": -1.6553490161895752, "logps/chosen": -136.25421142578125, "logps/rejected": -124.70254516601562, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -0.6843993663787842, "rewards/margins": 3.7857375144958496, "rewards/rejected": -4.470136642456055, "step": 1435 }, { "epoch": 2.3, "learning_rate": 4.132976615140705e-07, "logits/chosen": -1.5536327362060547, "logits/rejected": -1.5600950717926025, "logps/chosen": -87.00469207763672, "logps/rejected": -123.5153579711914, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": -1.023163080215454, "rewards/margins": 3.332223415374756, "rewards/rejected": -4.355386257171631, "step": 1436 }, { "epoch": 2.31, "learning_rate": 4.1319857312722946e-07, "logits/chosen": -1.5688555240631104, "logits/rejected": -1.5039918422698975, "logps/chosen": -121.16756439208984, "logps/rejected": -155.6558380126953, "loss": 0.1189, "rewards/accuracies": 0.75, "rewards/chosen": -1.8975698947906494, "rewards/margins": 3.4383363723754883, "rewards/rejected": -5.335906505584717, "step": 1437 }, { "epoch": 2.31, "learning_rate": 4.130994847403884e-07, "logits/chosen": -1.684728741645813, "logits/rejected": -1.6818584203720093, "logps/chosen": -80.01072692871094, "logps/rejected": -128.4375, "loss": 0.0533, "rewards/accuracies": 1.0, "rewards/chosen": -1.169456958770752, "rewards/margins": 4.071681022644043, "rewards/rejected": -5.241138458251953, "step": 1438 }, { "epoch": 2.31, "learning_rate": 4.1300039635354733e-07, "logits/chosen": -1.6396100521087646, "logits/rejected": -1.66339111328125, "logps/chosen": -76.57654571533203, "logps/rejected": -125.15562438964844, "loss": 0.1606, "rewards/accuracies": 0.75, "rewards/chosen": -0.7067886590957642, "rewards/margins": 4.442413330078125, "rewards/rejected": -5.149201393127441, "step": 1439 }, { "epoch": 2.31, "learning_rate": 4.129013079667063e-07, "logits/chosen": -1.7070128917694092, "logits/rejected": -1.6729650497436523, "logps/chosen": -101.57573699951172, "logps/rejected": -135.72735595703125, "loss": 0.1842, "rewards/accuracies": 1.0, "rewards/chosen": -0.8113504648208618, "rewards/margins": 3.8701703548431396, "rewards/rejected": -4.681520938873291, "step": 1440 }, { "epoch": 2.31, "learning_rate": 4.128022195798652e-07, "logits/chosen": -1.675486445426941, "logits/rejected": -1.647981882095337, "logps/chosen": -146.17349243164062, "logps/rejected": -184.56954956054688, "loss": 0.1239, "rewards/accuracies": 1.0, "rewards/chosen": -1.5168029069900513, "rewards/margins": 6.445296764373779, "rewards/rejected": -7.962100028991699, "step": 1441 }, { "epoch": 2.31, "learning_rate": 4.1270313119302416e-07, "logits/chosen": -1.679695725440979, "logits/rejected": -1.7266314029693604, "logps/chosen": -98.33006286621094, "logps/rejected": -138.36788940429688, "loss": 0.2102, "rewards/accuracies": 1.0, "rewards/chosen": -0.8881416320800781, "rewards/margins": 4.195207595825195, "rewards/rejected": -5.083349227905273, "step": 1442 }, { "epoch": 2.32, "learning_rate": 4.126040428061831e-07, "logits/chosen": -1.6798985004425049, "logits/rejected": -1.671850562095642, "logps/chosen": -70.52931213378906, "logps/rejected": -126.93759155273438, "loss": 0.1237, "rewards/accuracies": 1.0, "rewards/chosen": -1.42653489112854, "rewards/margins": 5.238668441772461, "rewards/rejected": -6.665203094482422, "step": 1443 }, { "epoch": 2.32, "learning_rate": 4.12504954419342e-07, "logits/chosen": -1.7151775360107422, "logits/rejected": -1.6514531373977661, "logps/chosen": -112.63961029052734, "logps/rejected": -111.74440002441406, "loss": 0.194, "rewards/accuracies": 1.0, "rewards/chosen": -1.2234121561050415, "rewards/margins": 2.2096331119537354, "rewards/rejected": -3.4330451488494873, "step": 1444 }, { "epoch": 2.32, "learning_rate": 4.12405866032501e-07, "logits/chosen": -1.5797570943832397, "logits/rejected": -1.5617091655731201, "logps/chosen": -88.10249328613281, "logps/rejected": -141.6862030029297, "loss": 0.1021, "rewards/accuracies": 0.75, "rewards/chosen": -0.900860071182251, "rewards/margins": 5.411687850952148, "rewards/rejected": -6.312548637390137, "step": 1445 }, { "epoch": 2.32, "learning_rate": 4.123067776456599e-07, "logits/chosen": -1.6567573547363281, "logits/rejected": -1.7103627920150757, "logps/chosen": -161.1071319580078, "logps/rejected": -195.2894287109375, "loss": 0.0972, "rewards/accuracies": 1.0, "rewards/chosen": -2.773641586303711, "rewards/margins": 2.472651481628418, "rewards/rejected": -5.246293067932129, "step": 1446 }, { "epoch": 2.32, "learning_rate": 4.1220768925881885e-07, "logits/chosen": -1.5796881914138794, "logits/rejected": -1.5277118682861328, "logps/chosen": -79.33329772949219, "logps/rejected": -145.93910217285156, "loss": 0.1282, "rewards/accuracies": 1.0, "rewards/chosen": -0.31882575154304504, "rewards/margins": 6.093873977661133, "rewards/rejected": -6.4126996994018555, "step": 1447 }, { "epoch": 2.32, "learning_rate": 4.121086008719778e-07, "logits/chosen": -1.7332220077514648, "logits/rejected": -1.7883186340332031, "logps/chosen": -94.91268920898438, "logps/rejected": -141.56381225585938, "loss": 0.0995, "rewards/accuracies": 0.75, "rewards/chosen": -1.9615980386734009, "rewards/margins": 3.266876459121704, "rewards/rejected": -5.2284746170043945, "step": 1448 }, { "epoch": 2.33, "learning_rate": 4.120095124851367e-07, "logits/chosen": -1.6668601036071777, "logits/rejected": -1.6362234354019165, "logps/chosen": -116.24885559082031, "logps/rejected": -142.1651153564453, "loss": 0.1208, "rewards/accuracies": 1.0, "rewards/chosen": -1.7617164850234985, "rewards/margins": 3.271109104156494, "rewards/rejected": -5.032825469970703, "step": 1449 }, { "epoch": 2.33, "learning_rate": 4.119104240982957e-07, "logits/chosen": -1.4589879512786865, "logits/rejected": -1.432215690612793, "logps/chosen": -101.90811920166016, "logps/rejected": -133.19284057617188, "loss": 0.1882, "rewards/accuracies": 1.0, "rewards/chosen": -1.986182689666748, "rewards/margins": 3.918524742126465, "rewards/rejected": -5.904707431793213, "step": 1450 }, { "epoch": 2.33, "learning_rate": 4.118113357114546e-07, "logits/chosen": -1.551537036895752, "logits/rejected": -1.5290002822875977, "logps/chosen": -106.24058532714844, "logps/rejected": -132.0687255859375, "loss": 0.1608, "rewards/accuracies": 0.5, "rewards/chosen": -1.8513425588607788, "rewards/margins": 2.477311372756958, "rewards/rejected": -4.328653812408447, "step": 1451 }, { "epoch": 2.33, "learning_rate": 4.117122473246135e-07, "logits/chosen": -1.5224637985229492, "logits/rejected": -1.6093188524246216, "logps/chosen": -83.37364196777344, "logps/rejected": -208.93910217285156, "loss": 0.1379, "rewards/accuracies": 1.0, "rewards/chosen": -1.077101230621338, "rewards/margins": 7.790935516357422, "rewards/rejected": -8.868036270141602, "step": 1452 }, { "epoch": 2.33, "learning_rate": 4.116131589377725e-07, "logits/chosen": -1.4874601364135742, "logits/rejected": -1.4890401363372803, "logps/chosen": -85.50657653808594, "logps/rejected": -141.89614868164062, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -0.7631651163101196, "rewards/margins": 5.255190849304199, "rewards/rejected": -6.018355846405029, "step": 1453 }, { "epoch": 2.33, "learning_rate": 4.115140705509314e-07, "logits/chosen": -1.642543911933899, "logits/rejected": -1.6349819898605347, "logps/chosen": -89.24288940429688, "logps/rejected": -109.95523071289062, "loss": 0.2564, "rewards/accuracies": 0.5, "rewards/chosen": -1.783313512802124, "rewards/margins": 2.202544689178467, "rewards/rejected": -3.9858579635620117, "step": 1454 }, { "epoch": 2.34, "learning_rate": 4.1141498216409037e-07, "logits/chosen": -1.7209001779556274, "logits/rejected": -1.7945139408111572, "logps/chosen": -95.30863189697266, "logps/rejected": -162.94439697265625, "loss": 0.1491, "rewards/accuracies": 1.0, "rewards/chosen": -1.1124944686889648, "rewards/margins": 4.865020275115967, "rewards/rejected": -5.977514743804932, "step": 1455 }, { "epoch": 2.34, "learning_rate": 4.113158937772493e-07, "logits/chosen": -1.6660478115081787, "logits/rejected": -1.663987159729004, "logps/chosen": -98.72095489501953, "logps/rejected": -167.54493713378906, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": -1.956188440322876, "rewards/margins": 7.595315933227539, "rewards/rejected": -9.551504135131836, "step": 1456 }, { "epoch": 2.34, "learning_rate": 4.112168053904082e-07, "logits/chosen": -1.7016232013702393, "logits/rejected": -1.6945881843566895, "logps/chosen": -86.76763153076172, "logps/rejected": -142.9833526611328, "loss": 0.0912, "rewards/accuracies": 1.0, "rewards/chosen": -2.0745856761932373, "rewards/margins": 5.578383922576904, "rewards/rejected": -7.6529693603515625, "step": 1457 }, { "epoch": 2.34, "learning_rate": 4.111177170035672e-07, "logits/chosen": -1.3999191522598267, "logits/rejected": -1.3795396089553833, "logps/chosen": -97.30729675292969, "logps/rejected": -111.57603454589844, "loss": 0.1648, "rewards/accuracies": 0.75, "rewards/chosen": -1.916393756866455, "rewards/margins": 1.9566725492477417, "rewards/rejected": -3.8730661869049072, "step": 1458 }, { "epoch": 2.34, "learning_rate": 4.110186286167261e-07, "logits/chosen": -1.6614223718643188, "logits/rejected": -1.6390676498413086, "logps/chosen": -106.58448791503906, "logps/rejected": -117.39623260498047, "loss": 0.102, "rewards/accuracies": 1.0, "rewards/chosen": -0.403724730014801, "rewards/margins": 3.7722063064575195, "rewards/rejected": -4.175930976867676, "step": 1459 }, { "epoch": 2.34, "learning_rate": 4.1091954022988506e-07, "logits/chosen": -1.7239326238632202, "logits/rejected": -1.6126232147216797, "logps/chosen": -114.9096908569336, "logps/rejected": -142.51705932617188, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": -1.0271408557891846, "rewards/margins": 3.7078652381896973, "rewards/rejected": -4.735005855560303, "step": 1460 }, { "epoch": 2.35, "learning_rate": 4.1082045184304397e-07, "logits/chosen": -1.831423282623291, "logits/rejected": -1.7994070053100586, "logps/chosen": -124.40634155273438, "logps/rejected": -134.9991912841797, "loss": 0.1319, "rewards/accuracies": 0.75, "rewards/chosen": -2.2699074745178223, "rewards/margins": 1.617745280265808, "rewards/rejected": -3.88765287399292, "step": 1461 }, { "epoch": 2.35, "learning_rate": 4.107213634562029e-07, "logits/chosen": -1.5814533233642578, "logits/rejected": -1.5658690929412842, "logps/chosen": -85.51536560058594, "logps/rejected": -163.7235107421875, "loss": 0.159, "rewards/accuracies": 1.0, "rewards/chosen": -0.07296298444271088, "rewards/margins": 7.300739288330078, "rewards/rejected": -7.373702049255371, "step": 1462 }, { "epoch": 2.35, "learning_rate": 4.106222750693619e-07, "logits/chosen": -1.4326744079589844, "logits/rejected": -1.4111225605010986, "logps/chosen": -107.56144714355469, "logps/rejected": -161.078857421875, "loss": 0.2144, "rewards/accuracies": 1.0, "rewards/chosen": -0.5512489080429077, "rewards/margins": 4.711457252502441, "rewards/rejected": -5.262706279754639, "step": 1463 }, { "epoch": 2.35, "learning_rate": 4.105231866825208e-07, "logits/chosen": -1.6741074323654175, "logits/rejected": -1.6438548564910889, "logps/chosen": -115.98644256591797, "logps/rejected": -145.0465087890625, "loss": 0.1246, "rewards/accuracies": 1.0, "rewards/chosen": -1.5428389310836792, "rewards/margins": 5.310937404632568, "rewards/rejected": -6.853775978088379, "step": 1464 }, { "epoch": 2.35, "learning_rate": 4.1042409829567975e-07, "logits/chosen": -1.72212815284729, "logits/rejected": -1.728501796722412, "logps/chosen": -88.19808197021484, "logps/rejected": -158.05972290039062, "loss": 0.295, "rewards/accuracies": 0.75, "rewards/chosen": -0.7325023412704468, "rewards/margins": 6.568589687347412, "rewards/rejected": -7.30109167098999, "step": 1465 }, { "epoch": 2.35, "learning_rate": 4.1032500990883866e-07, "logits/chosen": -1.624548316001892, "logits/rejected": -1.6008844375610352, "logps/chosen": -82.06725311279297, "logps/rejected": -112.7343978881836, "loss": 0.1092, "rewards/accuracies": 0.75, "rewards/chosen": -0.7870576977729797, "rewards/margins": 2.94651198387146, "rewards/rejected": -3.733569860458374, "step": 1466 }, { "epoch": 2.35, "learning_rate": 4.1022592152199757e-07, "logits/chosen": -1.6599711179733276, "logits/rejected": -1.743025779724121, "logps/chosen": -72.84785461425781, "logps/rejected": -179.69009399414062, "loss": 0.1209, "rewards/accuracies": 1.0, "rewards/chosen": -0.16680295765399933, "rewards/margins": 8.748961448669434, "rewards/rejected": -8.915763854980469, "step": 1467 }, { "epoch": 2.36, "learning_rate": 4.101268331351566e-07, "logits/chosen": -1.7131197452545166, "logits/rejected": -1.700234293937683, "logps/chosen": -102.97457122802734, "logps/rejected": -181.13185119628906, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -1.9319212436676025, "rewards/margins": 6.669839382171631, "rewards/rejected": -8.601760864257812, "step": 1468 }, { "epoch": 2.36, "learning_rate": 4.100277447483155e-07, "logits/chosen": -1.6140414476394653, "logits/rejected": -1.633556842803955, "logps/chosen": -102.66398620605469, "logps/rejected": -156.39764404296875, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": -2.1847665309906006, "rewards/margins": 4.14161491394043, "rewards/rejected": -6.326381683349609, "step": 1469 }, { "epoch": 2.36, "learning_rate": 4.099286563614744e-07, "logits/chosen": -1.7232232093811035, "logits/rejected": -1.725425124168396, "logps/chosen": -96.1461181640625, "logps/rejected": -121.1124038696289, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -0.30971431732177734, "rewards/margins": 3.893807888031006, "rewards/rejected": -4.203522682189941, "step": 1470 }, { "epoch": 2.36, "learning_rate": 4.0982956797463335e-07, "logits/chosen": -1.6902096271514893, "logits/rejected": -1.5749232769012451, "logps/chosen": -93.50181579589844, "logps/rejected": -157.57406616210938, "loss": 0.1107, "rewards/accuracies": 1.0, "rewards/chosen": -1.4387052059173584, "rewards/margins": 5.998256683349609, "rewards/rejected": -7.436962127685547, "step": 1471 }, { "epoch": 2.36, "learning_rate": 4.0973047958779226e-07, "logits/chosen": -1.8048862218856812, "logits/rejected": -1.772298812866211, "logps/chosen": -99.40337371826172, "logps/rejected": -160.56207275390625, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -1.3581384420394897, "rewards/margins": 5.388286590576172, "rewards/rejected": -6.746425151824951, "step": 1472 }, { "epoch": 2.36, "learning_rate": 4.096313912009513e-07, "logits/chosen": -1.6528825759887695, "logits/rejected": -1.6474099159240723, "logps/chosen": -118.61327362060547, "logps/rejected": -172.26715087890625, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": -1.5727038383483887, "rewards/margins": 3.4714102745056152, "rewards/rejected": -5.044114112854004, "step": 1473 }, { "epoch": 2.37, "learning_rate": 4.095323028141102e-07, "logits/chosen": -1.633191704750061, "logits/rejected": -1.5426292419433594, "logps/chosen": -97.20809936523438, "logps/rejected": -129.29098510742188, "loss": 0.1139, "rewards/accuracies": 1.0, "rewards/chosen": -0.5058740973472595, "rewards/margins": 4.510974407196045, "rewards/rejected": -5.016848564147949, "step": 1474 }, { "epoch": 2.37, "learning_rate": 4.094332144272691e-07, "logits/chosen": -1.6834478378295898, "logits/rejected": -1.6823031902313232, "logps/chosen": -80.05238342285156, "logps/rejected": -153.88021850585938, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": -1.3101203441619873, "rewards/margins": 6.712024211883545, "rewards/rejected": -8.022144317626953, "step": 1475 }, { "epoch": 2.37, "learning_rate": 4.0933412604042805e-07, "logits/chosen": -1.536747932434082, "logits/rejected": -1.472947120666504, "logps/chosen": -100.44734954833984, "logps/rejected": -152.25790405273438, "loss": 0.1627, "rewards/accuracies": 1.0, "rewards/chosen": -1.2932469844818115, "rewards/margins": 5.839528560638428, "rewards/rejected": -7.132775783538818, "step": 1476 }, { "epoch": 2.37, "learning_rate": 4.0923503765358695e-07, "logits/chosen": -1.5412077903747559, "logits/rejected": -1.5260741710662842, "logps/chosen": -105.67164611816406, "logps/rejected": -176.1360321044922, "loss": 0.1176, "rewards/accuracies": 1.0, "rewards/chosen": -1.1501874923706055, "rewards/margins": 6.643045425415039, "rewards/rejected": -7.7932329177856445, "step": 1477 }, { "epoch": 2.37, "learning_rate": 4.091359492667459e-07, "logits/chosen": -1.7576298713684082, "logits/rejected": -1.7769145965576172, "logps/chosen": -94.69011688232422, "logps/rejected": -133.70437622070312, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": -0.726377010345459, "rewards/margins": 3.5425403118133545, "rewards/rejected": -4.268917083740234, "step": 1478 }, { "epoch": 2.37, "learning_rate": 4.0903686087990487e-07, "logits/chosen": -1.511179804801941, "logits/rejected": -1.5482220649719238, "logps/chosen": -117.68193054199219, "logps/rejected": -152.30963134765625, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": -1.6891249418258667, "rewards/margins": 4.555817604064941, "rewards/rejected": -6.2449421882629395, "step": 1479 }, { "epoch": 2.38, "learning_rate": 4.089377724930638e-07, "logits/chosen": -1.6805524826049805, "logits/rejected": -1.7040472030639648, "logps/chosen": -125.67372131347656, "logps/rejected": -134.75502014160156, "loss": 0.1451, "rewards/accuracies": 1.0, "rewards/chosen": -1.8552608489990234, "rewards/margins": 3.225457191467285, "rewards/rejected": -5.080718040466309, "step": 1480 }, { "epoch": 2.38, "learning_rate": 4.0883868410622274e-07, "logits/chosen": -1.5982657670974731, "logits/rejected": -1.601035714149475, "logps/chosen": -90.72936248779297, "logps/rejected": -137.61961364746094, "loss": 0.1915, "rewards/accuracies": 1.0, "rewards/chosen": -2.2122385501861572, "rewards/margins": 5.0202741622924805, "rewards/rejected": -7.232512950897217, "step": 1481 }, { "epoch": 2.38, "learning_rate": 4.0873959571938165e-07, "logits/chosen": -1.5307133197784424, "logits/rejected": -1.5169103145599365, "logps/chosen": -114.79058837890625, "logps/rejected": -134.68020629882812, "loss": 0.1638, "rewards/accuracies": 0.75, "rewards/chosen": -1.518075942993164, "rewards/margins": 3.731745481491089, "rewards/rejected": -5.249821662902832, "step": 1482 }, { "epoch": 2.38, "learning_rate": 4.086405073325406e-07, "logits/chosen": -1.618375539779663, "logits/rejected": -1.6335082054138184, "logps/chosen": -100.37903594970703, "logps/rejected": -151.8387451171875, "loss": 0.0829, "rewards/accuracies": 1.0, "rewards/chosen": -1.6983686685562134, "rewards/margins": 5.765535354614258, "rewards/rejected": -7.463903427124023, "step": 1483 }, { "epoch": 2.38, "learning_rate": 4.0854141894569957e-07, "logits/chosen": -1.7722855806350708, "logits/rejected": -1.7039296627044678, "logps/chosen": -113.68299865722656, "logps/rejected": -173.16778564453125, "loss": 0.1118, "rewards/accuracies": 1.0, "rewards/chosen": -0.16755028069019318, "rewards/margins": 5.9268798828125, "rewards/rejected": -6.094430446624756, "step": 1484 }, { "epoch": 2.38, "learning_rate": 4.0844233055885847e-07, "logits/chosen": -1.690535545349121, "logits/rejected": -1.601304054260254, "logps/chosen": -113.0234375, "logps/rejected": -112.406005859375, "loss": 0.0815, "rewards/accuracies": 1.0, "rewards/chosen": -1.5554728507995605, "rewards/margins": 1.6039519309997559, "rewards/rejected": -3.1594250202178955, "step": 1485 }, { "epoch": 2.39, "learning_rate": 4.0834324217201743e-07, "logits/chosen": -1.661498785018921, "logits/rejected": -1.733902931213379, "logps/chosen": -102.24685668945312, "logps/rejected": -158.42613220214844, "loss": 0.319, "rewards/accuracies": 0.75, "rewards/chosen": -1.7509816884994507, "rewards/margins": 2.810352087020874, "rewards/rejected": -4.561333656311035, "step": 1486 }, { "epoch": 2.39, "learning_rate": 4.0824415378517634e-07, "logits/chosen": -1.666690707206726, "logits/rejected": -1.5835140943527222, "logps/chosen": -109.46944427490234, "logps/rejected": -117.82136535644531, "loss": 0.1768, "rewards/accuracies": 0.75, "rewards/chosen": 0.2317575365304947, "rewards/margins": 2.9080750942230225, "rewards/rejected": -2.6763179302215576, "step": 1487 }, { "epoch": 2.39, "learning_rate": 4.081450653983353e-07, "logits/chosen": -1.6321110725402832, "logits/rejected": -1.6530343294143677, "logps/chosen": -103.3187255859375, "logps/rejected": -147.22511291503906, "loss": 0.1528, "rewards/accuracies": 0.75, "rewards/chosen": -1.9517402648925781, "rewards/margins": 3.8201441764831543, "rewards/rejected": -5.771884441375732, "step": 1488 }, { "epoch": 2.39, "learning_rate": 4.0804597701149426e-07, "logits/chosen": -1.6847548484802246, "logits/rejected": -1.6763641834259033, "logps/chosen": -133.73110961914062, "logps/rejected": -176.3251190185547, "loss": 0.1663, "rewards/accuracies": 1.0, "rewards/chosen": -2.761630058288574, "rewards/margins": 3.6182892322540283, "rewards/rejected": -6.379919528961182, "step": 1489 }, { "epoch": 2.39, "learning_rate": 4.0794688862465317e-07, "logits/chosen": -1.5901468992233276, "logits/rejected": -1.6527047157287598, "logps/chosen": -106.97029113769531, "logps/rejected": -176.55593872070312, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -2.2518486976623535, "rewards/margins": 4.839935779571533, "rewards/rejected": -7.091784954071045, "step": 1490 }, { "epoch": 2.39, "learning_rate": 4.078478002378121e-07, "logits/chosen": -1.6851449012756348, "logits/rejected": -1.5898983478546143, "logps/chosen": -97.10938262939453, "logps/rejected": -125.79306030273438, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": -1.3101112842559814, "rewards/margins": 4.370337963104248, "rewards/rejected": -5.68044900894165, "step": 1491 }, { "epoch": 2.39, "learning_rate": 4.0774871185097103e-07, "logits/chosen": -1.6654438972473145, "logits/rejected": -1.6475812196731567, "logps/chosen": -91.38770294189453, "logps/rejected": -156.69041442871094, "loss": 0.0892, "rewards/accuracies": 1.0, "rewards/chosen": -0.9593333005905151, "rewards/margins": 7.783817291259766, "rewards/rejected": -8.743151664733887, "step": 1492 }, { "epoch": 2.4, "learning_rate": 4.0764962346413e-07, "logits/chosen": -1.764151930809021, "logits/rejected": -1.6202425956726074, "logps/chosen": -128.2392578125, "logps/rejected": -116.54849243164062, "loss": 0.1691, "rewards/accuracies": 1.0, "rewards/chosen": -1.8391079902648926, "rewards/margins": 2.0576417446136475, "rewards/rejected": -3.89674973487854, "step": 1493 }, { "epoch": 2.4, "learning_rate": 4.0755053507728895e-07, "logits/chosen": -1.6052769422531128, "logits/rejected": -1.5756547451019287, "logps/chosen": -77.28958129882812, "logps/rejected": -151.672607421875, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": -0.685112476348877, "rewards/margins": 7.137723922729492, "rewards/rejected": -7.822836875915527, "step": 1494 }, { "epoch": 2.4, "learning_rate": 4.0745144669044786e-07, "logits/chosen": -1.6648147106170654, "logits/rejected": -1.687241792678833, "logps/chosen": -78.1926040649414, "logps/rejected": -118.32796478271484, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": -0.25719472765922546, "rewards/margins": 3.386625289916992, "rewards/rejected": -3.64382004737854, "step": 1495 }, { "epoch": 2.4, "learning_rate": 4.073523583036068e-07, "logits/chosen": -1.6595323085784912, "logits/rejected": -1.6373927593231201, "logps/chosen": -128.678466796875, "logps/rejected": -138.0799560546875, "loss": 0.169, "rewards/accuracies": 0.75, "rewards/chosen": -2.9421684741973877, "rewards/margins": 2.1058425903320312, "rewards/rejected": -5.04801082611084, "step": 1496 }, { "epoch": 2.4, "learning_rate": 4.072532699167657e-07, "logits/chosen": -1.495861291885376, "logits/rejected": -1.4829118251800537, "logps/chosen": -92.12309265136719, "logps/rejected": -134.8983154296875, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -1.7952005863189697, "rewards/margins": 3.08030366897583, "rewards/rejected": -4.875504493713379, "step": 1497 }, { "epoch": 2.4, "learning_rate": 4.071541815299247e-07, "logits/chosen": -1.5180261135101318, "logits/rejected": -1.5774201154708862, "logps/chosen": -90.67567443847656, "logps/rejected": -143.63949584960938, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": -1.9251878261566162, "rewards/margins": 3.4453117847442627, "rewards/rejected": -5.370499610900879, "step": 1498 }, { "epoch": 2.41, "learning_rate": 4.070550931430836e-07, "logits/chosen": -1.7677569389343262, "logits/rejected": -1.7046844959259033, "logps/chosen": -96.19807434082031, "logps/rejected": -99.63695526123047, "loss": 0.1367, "rewards/accuracies": 0.75, "rewards/chosen": -1.2505528926849365, "rewards/margins": 2.849989414215088, "rewards/rejected": -4.100542068481445, "step": 1499 }, { "epoch": 2.41, "learning_rate": 4.0695600475624255e-07, "logits/chosen": -1.64897620677948, "logits/rejected": -1.632043719291687, "logps/chosen": -88.86354064941406, "logps/rejected": -126.76194763183594, "loss": 0.1331, "rewards/accuracies": 1.0, "rewards/chosen": -1.0777499675750732, "rewards/margins": 4.732696533203125, "rewards/rejected": -5.810446262359619, "step": 1500 }, { "epoch": 2.41, "learning_rate": 4.068569163694015e-07, "logits/chosen": -1.5667603015899658, "logits/rejected": -1.5248873233795166, "logps/chosen": -118.380615234375, "logps/rejected": -138.02365112304688, "loss": 0.074, "rewards/accuracies": 1.0, "rewards/chosen": -1.8939696550369263, "rewards/margins": 3.096045970916748, "rewards/rejected": -4.990015983581543, "step": 1501 }, { "epoch": 2.41, "learning_rate": 4.067578279825604e-07, "logits/chosen": -1.642941951751709, "logits/rejected": -1.5449742078781128, "logps/chosen": -112.85333251953125, "logps/rejected": -133.91156005859375, "loss": 0.1717, "rewards/accuracies": 0.75, "rewards/chosen": -2.91801118850708, "rewards/margins": 2.5385048389434814, "rewards/rejected": -5.456515789031982, "step": 1502 }, { "epoch": 2.41, "learning_rate": 4.066587395957193e-07, "logits/chosen": -1.5652997493743896, "logits/rejected": -1.5232667922973633, "logps/chosen": -96.29391479492188, "logps/rejected": -159.99798583984375, "loss": 0.0993, "rewards/accuracies": 1.0, "rewards/chosen": -1.9544934034347534, "rewards/margins": 5.283965110778809, "rewards/rejected": -7.238458633422852, "step": 1503 }, { "epoch": 2.41, "learning_rate": 4.065596512088783e-07, "logits/chosen": -1.6967377662658691, "logits/rejected": -1.6151225566864014, "logps/chosen": -99.17096710205078, "logps/rejected": -114.93666076660156, "loss": 0.0936, "rewards/accuracies": 1.0, "rewards/chosen": -1.2134594917297363, "rewards/margins": 2.3923895359039307, "rewards/rejected": -3.605849266052246, "step": 1504 }, { "epoch": 2.42, "learning_rate": 4.0646056282203725e-07, "logits/chosen": -1.7171893119812012, "logits/rejected": -1.7381590604782104, "logps/chosen": -100.01307678222656, "logps/rejected": -123.69398498535156, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": -1.7337982654571533, "rewards/margins": 3.064535140991211, "rewards/rejected": -4.798333168029785, "step": 1505 }, { "epoch": 2.42, "learning_rate": 4.063614744351962e-07, "logits/chosen": -1.5500264167785645, "logits/rejected": -1.5413289070129395, "logps/chosen": -78.89205169677734, "logps/rejected": -127.20889282226562, "loss": 0.142, "rewards/accuracies": 1.0, "rewards/chosen": -0.9197031855583191, "rewards/margins": 5.1773247718811035, "rewards/rejected": -6.097027778625488, "step": 1506 }, { "epoch": 2.42, "learning_rate": 4.062623860483551e-07, "logits/chosen": -1.619476079940796, "logits/rejected": -1.653322696685791, "logps/chosen": -95.01835632324219, "logps/rejected": -124.08586120605469, "loss": 0.1905, "rewards/accuracies": 1.0, "rewards/chosen": -1.3563987016677856, "rewards/margins": 2.917402982711792, "rewards/rejected": -4.273801803588867, "step": 1507 }, { "epoch": 2.42, "learning_rate": 4.06163297661514e-07, "logits/chosen": -1.720633625984192, "logits/rejected": -1.7363874912261963, "logps/chosen": -100.47887420654297, "logps/rejected": -149.72454833984375, "loss": 0.1443, "rewards/accuracies": 0.75, "rewards/chosen": -1.962073802947998, "rewards/margins": 4.306936264038086, "rewards/rejected": -6.269009590148926, "step": 1508 }, { "epoch": 2.42, "learning_rate": 4.06064209274673e-07, "logits/chosen": -1.7655538320541382, "logits/rejected": -1.66748046875, "logps/chosen": -87.0724868774414, "logps/rejected": -117.3565673828125, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": -0.24088314175605774, "rewards/margins": 4.770526885986328, "rewards/rejected": -5.011410236358643, "step": 1509 }, { "epoch": 2.42, "learning_rate": 4.0596512088783194e-07, "logits/chosen": -1.5929373502731323, "logits/rejected": -1.5808699131011963, "logps/chosen": -117.0266342163086, "logps/rejected": -132.70071411132812, "loss": 0.2016, "rewards/accuracies": 0.75, "rewards/chosen": -2.0866055488586426, "rewards/margins": 1.559330701828003, "rewards/rejected": -3.6459362506866455, "step": 1510 }, { "epoch": 2.43, "learning_rate": 4.058660325009909e-07, "logits/chosen": -1.553243637084961, "logits/rejected": -1.6054189205169678, "logps/chosen": -96.9155044555664, "logps/rejected": -144.506591796875, "loss": 0.1318, "rewards/accuracies": 0.75, "rewards/chosen": -2.3989510536193848, "rewards/margins": 3.78666090965271, "rewards/rejected": -6.185612201690674, "step": 1511 }, { "epoch": 2.43, "learning_rate": 4.057669441141498e-07, "logits/chosen": -1.7128180265426636, "logits/rejected": -1.725083351135254, "logps/chosen": -79.0287857055664, "logps/rejected": -120.95449829101562, "loss": 0.1127, "rewards/accuracies": 0.75, "rewards/chosen": -0.701920747756958, "rewards/margins": 3.174788236618042, "rewards/rejected": -3.876708984375, "step": 1512 }, { "epoch": 2.43, "learning_rate": 4.056678557273087e-07, "logits/chosen": -1.7242698669433594, "logits/rejected": -1.7452905178070068, "logps/chosen": -103.14733123779297, "logps/rejected": -149.8619384765625, "loss": 0.2453, "rewards/accuracies": 0.75, "rewards/chosen": -1.9115474224090576, "rewards/margins": 4.32144832611084, "rewards/rejected": -6.232995510101318, "step": 1513 }, { "epoch": 2.43, "learning_rate": 4.0556876734046767e-07, "logits/chosen": -1.4700727462768555, "logits/rejected": -1.5499107837677002, "logps/chosen": -82.6824951171875, "logps/rejected": -136.1915283203125, "loss": 0.1666, "rewards/accuracies": 1.0, "rewards/chosen": -0.9617751836776733, "rewards/margins": 4.227591037750244, "rewards/rejected": -5.189365863800049, "step": 1514 }, { "epoch": 2.43, "learning_rate": 4.054696789536266e-07, "logits/chosen": -1.6936482191085815, "logits/rejected": -1.635155439376831, "logps/chosen": -86.2714614868164, "logps/rejected": -153.77500915527344, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": -0.5294246673583984, "rewards/margins": 6.518670558929443, "rewards/rejected": -7.048095703125, "step": 1515 }, { "epoch": 2.43, "learning_rate": 4.053705905667856e-07, "logits/chosen": -1.6472275257110596, "logits/rejected": -1.6376121044158936, "logps/chosen": -84.46728515625, "logps/rejected": -156.71446228027344, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -1.4644246101379395, "rewards/margins": 7.579118728637695, "rewards/rejected": -9.043543815612793, "step": 1516 }, { "epoch": 2.43, "learning_rate": 4.052715021799445e-07, "logits/chosen": -1.510190725326538, "logits/rejected": -1.5585732460021973, "logps/chosen": -98.6636734008789, "logps/rejected": -164.86106872558594, "loss": 0.1878, "rewards/accuracies": 1.0, "rewards/chosen": -0.7325698733329773, "rewards/margins": 6.539140224456787, "rewards/rejected": -7.2717108726501465, "step": 1517 }, { "epoch": 2.44, "learning_rate": 4.051724137931034e-07, "logits/chosen": -1.6351839303970337, "logits/rejected": -1.476347804069519, "logps/chosen": -123.76448059082031, "logps/rejected": -155.6278076171875, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": -0.7456703186035156, "rewards/margins": 5.353232383728027, "rewards/rejected": -6.098902702331543, "step": 1518 }, { "epoch": 2.44, "learning_rate": 4.0507332540626236e-07, "logits/chosen": -1.5301094055175781, "logits/rejected": -1.5763418674468994, "logps/chosen": -85.13336944580078, "logps/rejected": -125.96330261230469, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": -0.7226637005805969, "rewards/margins": 3.7251012325286865, "rewards/rejected": -4.447764873504639, "step": 1519 }, { "epoch": 2.44, "learning_rate": 4.0497423701942127e-07, "logits/chosen": -1.6217589378356934, "logits/rejected": -1.603546142578125, "logps/chosen": -96.87796783447266, "logps/rejected": -134.1571807861328, "loss": 0.1388, "rewards/accuracies": 0.75, "rewards/chosen": -0.7026685476303101, "rewards/margins": 3.9957871437072754, "rewards/rejected": -4.698455810546875, "step": 1520 }, { "epoch": 2.44, "learning_rate": 4.048751486325803e-07, "logits/chosen": -1.5741209983825684, "logits/rejected": -1.7027390003204346, "logps/chosen": -78.73188781738281, "logps/rejected": -163.205078125, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -0.6702752113342285, "rewards/margins": 6.014985084533691, "rewards/rejected": -6.685259819030762, "step": 1521 }, { "epoch": 2.44, "learning_rate": 4.047760602457392e-07, "logits/chosen": -1.6477409601211548, "logits/rejected": -1.5877058506011963, "logps/chosen": -104.22853088378906, "logps/rejected": -139.50656127929688, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": -1.0314207077026367, "rewards/margins": 4.046950817108154, "rewards/rejected": -5.078371524810791, "step": 1522 }, { "epoch": 2.44, "learning_rate": 4.046769718588981e-07, "logits/chosen": -1.4774354696273804, "logits/rejected": -1.472226858139038, "logps/chosen": -113.31661987304688, "logps/rejected": -162.3009796142578, "loss": 0.0844, "rewards/accuracies": 0.75, "rewards/chosen": -2.58207368850708, "rewards/margins": 4.376163482666016, "rewards/rejected": -6.958237648010254, "step": 1523 }, { "epoch": 2.45, "learning_rate": 4.0457788347205706e-07, "logits/chosen": -1.5766592025756836, "logits/rejected": -1.4725664854049683, "logps/chosen": -112.04310607910156, "logps/rejected": -110.5008316040039, "loss": 0.246, "rewards/accuracies": 0.75, "rewards/chosen": -2.258474349975586, "rewards/margins": 2.486903429031372, "rewards/rejected": -4.745377540588379, "step": 1524 }, { "epoch": 2.45, "learning_rate": 4.0447879508521596e-07, "logits/chosen": -1.6960489749908447, "logits/rejected": -1.6271402835845947, "logps/chosen": -92.46038818359375, "logps/rejected": -116.66616821289062, "loss": 0.173, "rewards/accuracies": 1.0, "rewards/chosen": -0.8856247067451477, "rewards/margins": 1.6981244087219238, "rewards/rejected": -2.5837488174438477, "step": 1525 }, { "epoch": 2.45, "learning_rate": 4.04379706698375e-07, "logits/chosen": -1.7546565532684326, "logits/rejected": -1.7680683135986328, "logps/chosen": -84.20319366455078, "logps/rejected": -147.97560119628906, "loss": 0.1453, "rewards/accuracies": 1.0, "rewards/chosen": -0.43491172790527344, "rewards/margins": 6.219766616821289, "rewards/rejected": -6.6546783447265625, "step": 1526 }, { "epoch": 2.45, "learning_rate": 4.042806183115339e-07, "logits/chosen": -1.7380907535552979, "logits/rejected": -1.7533730268478394, "logps/chosen": -85.42982482910156, "logps/rejected": -141.46109008789062, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": -0.5380926132202148, "rewards/margins": 3.7549023628234863, "rewards/rejected": -4.292995452880859, "step": 1527 }, { "epoch": 2.45, "learning_rate": 4.041815299246928e-07, "logits/chosen": -1.5744197368621826, "logits/rejected": -1.5202808380126953, "logps/chosen": -91.15573120117188, "logps/rejected": -127.35403442382812, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4105987548828125, "rewards/margins": 5.134240627288818, "rewards/rejected": -6.544839859008789, "step": 1528 }, { "epoch": 2.45, "learning_rate": 4.0408244153785175e-07, "logits/chosen": -1.639115571975708, "logits/rejected": -1.7042914628982544, "logps/chosen": -111.60499572753906, "logps/rejected": -145.43470764160156, "loss": 0.181, "rewards/accuracies": 0.75, "rewards/chosen": -1.4815019369125366, "rewards/margins": 1.939723253250122, "rewards/rejected": -3.4212253093719482, "step": 1529 }, { "epoch": 2.46, "learning_rate": 4.0398335315101066e-07, "logits/chosen": -1.4887815713882446, "logits/rejected": -1.5208137035369873, "logps/chosen": -101.59739685058594, "logps/rejected": -123.59337615966797, "loss": 0.1407, "rewards/accuracies": 1.0, "rewards/chosen": -1.792190670967102, "rewards/margins": 2.4735848903656006, "rewards/rejected": -4.265775680541992, "step": 1530 }, { "epoch": 2.46, "learning_rate": 4.0388426476416967e-07, "logits/chosen": -1.7882249355316162, "logits/rejected": -1.7644145488739014, "logps/chosen": -90.46642303466797, "logps/rejected": -140.2609405517578, "loss": 0.2106, "rewards/accuracies": 0.75, "rewards/chosen": -1.1742628812789917, "rewards/margins": 5.204369068145752, "rewards/rejected": -6.378632068634033, "step": 1531 }, { "epoch": 2.46, "learning_rate": 4.037851763773286e-07, "logits/chosen": -1.7011228799819946, "logits/rejected": -1.6157504320144653, "logps/chosen": -75.59807586669922, "logps/rejected": -139.660888671875, "loss": 0.1558, "rewards/accuracies": 1.0, "rewards/chosen": -0.05092087388038635, "rewards/margins": 6.48737907409668, "rewards/rejected": -6.538300037384033, "step": 1532 }, { "epoch": 2.46, "learning_rate": 4.036860879904875e-07, "logits/chosen": -1.6336381435394287, "logits/rejected": -1.611140489578247, "logps/chosen": -106.57609558105469, "logps/rejected": -136.16159057617188, "loss": 0.2473, "rewards/accuracies": 1.0, "rewards/chosen": -1.8734407424926758, "rewards/margins": 2.5583033561706543, "rewards/rejected": -4.431743621826172, "step": 1533 }, { "epoch": 2.46, "learning_rate": 4.0358699960364644e-07, "logits/chosen": -1.6688958406448364, "logits/rejected": -1.6682106256484985, "logps/chosen": -117.84190368652344, "logps/rejected": -139.07240295410156, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": -2.3286666870117188, "rewards/margins": 3.041973114013672, "rewards/rejected": -5.370639801025391, "step": 1534 }, { "epoch": 2.46, "learning_rate": 4.0348791121680535e-07, "logits/chosen": -1.5337060689926147, "logits/rejected": -1.5028185844421387, "logps/chosen": -83.20748901367188, "logps/rejected": -148.07830810546875, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": -0.4818728268146515, "rewards/margins": 6.372774600982666, "rewards/rejected": -6.854646682739258, "step": 1535 }, { "epoch": 2.47, "learning_rate": 4.0338882282996436e-07, "logits/chosen": -1.5931715965270996, "logits/rejected": -1.5353106260299683, "logps/chosen": -105.6378402709961, "logps/rejected": -162.3219451904297, "loss": 0.0849, "rewards/accuracies": 1.0, "rewards/chosen": -1.3020728826522827, "rewards/margins": 7.837519645690918, "rewards/rejected": -9.139593124389648, "step": 1536 }, { "epoch": 2.47, "learning_rate": 4.0328973444312327e-07, "logits/chosen": -1.6676157712936401, "logits/rejected": -1.6871238946914673, "logps/chosen": -126.96955108642578, "logps/rejected": -149.88645935058594, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -1.9541144371032715, "rewards/margins": 4.077503681182861, "rewards/rejected": -6.031618118286133, "step": 1537 }, { "epoch": 2.47, "learning_rate": 4.031906460562822e-07, "logits/chosen": -1.756999135017395, "logits/rejected": -1.8051962852478027, "logps/chosen": -88.68598937988281, "logps/rejected": -173.6085662841797, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5411515831947327, "rewards/margins": 6.178904056549072, "rewards/rejected": -6.72005558013916, "step": 1538 }, { "epoch": 2.47, "learning_rate": 4.0309155766944114e-07, "logits/chosen": -1.6535563468933105, "logits/rejected": -1.626357913017273, "logps/chosen": -81.6829833984375, "logps/rejected": -101.99337005615234, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -0.5963294506072998, "rewards/margins": 2.718825101852417, "rewards/rejected": -3.315154552459717, "step": 1539 }, { "epoch": 2.47, "learning_rate": 4.0299246928260004e-07, "logits/chosen": -1.7715355157852173, "logits/rejected": -1.717128038406372, "logps/chosen": -104.78874969482422, "logps/rejected": -124.2789535522461, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": -1.814539909362793, "rewards/margins": 2.5212225914001465, "rewards/rejected": -4.3357625007629395, "step": 1540 }, { "epoch": 2.47, "learning_rate": 4.0289338089575895e-07, "logits/chosen": -1.700101375579834, "logits/rejected": -1.6317527294158936, "logps/chosen": -110.13068389892578, "logps/rejected": -147.58824157714844, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": -1.3511316776275635, "rewards/margins": 5.090221405029297, "rewards/rejected": -6.441352844238281, "step": 1541 }, { "epoch": 2.48, "learning_rate": 4.0279429250891796e-07, "logits/chosen": -1.9065167903900146, "logits/rejected": -1.7883718013763428, "logps/chosen": -86.64779663085938, "logps/rejected": -135.6343231201172, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": -0.2539987564086914, "rewards/margins": 6.186154365539551, "rewards/rejected": -6.440153121948242, "step": 1542 }, { "epoch": 2.48, "learning_rate": 4.0269520412207687e-07, "logits/chosen": -1.6166367530822754, "logits/rejected": -1.695024013519287, "logps/chosen": -104.8846435546875, "logps/rejected": -147.3314971923828, "loss": 0.0752, "rewards/accuracies": 1.0, "rewards/chosen": -0.7993589639663696, "rewards/margins": 5.564014434814453, "rewards/rejected": -6.363373279571533, "step": 1543 }, { "epoch": 2.48, "learning_rate": 4.0259611573523583e-07, "logits/chosen": -1.5517076253890991, "logits/rejected": -1.5266845226287842, "logps/chosen": -87.93705749511719, "logps/rejected": -170.57333374023438, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": 0.6302905082702637, "rewards/margins": 6.197275161743164, "rewards/rejected": -5.566984176635742, "step": 1544 }, { "epoch": 2.48, "learning_rate": 4.0249702734839474e-07, "logits/chosen": -1.5856419801712036, "logits/rejected": -1.6471059322357178, "logps/chosen": -97.21983337402344, "logps/rejected": -150.32290649414062, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": -0.8491491675376892, "rewards/margins": 4.633986949920654, "rewards/rejected": -5.4831366539001465, "step": 1545 }, { "epoch": 2.48, "learning_rate": 4.0239793896155364e-07, "logits/chosen": -1.6190826892852783, "logits/rejected": -1.5980753898620605, "logps/chosen": -108.26102447509766, "logps/rejected": -135.6266326904297, "loss": 0.1354, "rewards/accuracies": 1.0, "rewards/chosen": -1.9202570915222168, "rewards/margins": 3.27478289604187, "rewards/rejected": -5.195040225982666, "step": 1546 }, { "epoch": 2.48, "learning_rate": 4.0229885057471266e-07, "logits/chosen": -1.6052682399749756, "logits/rejected": -1.6060292720794678, "logps/chosen": -93.7086181640625, "logps/rejected": -164.75408935546875, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -1.161731481552124, "rewards/margins": 5.436088562011719, "rewards/rejected": -6.597820281982422, "step": 1547 }, { "epoch": 2.48, "learning_rate": 4.0219976218787156e-07, "logits/chosen": -1.6968213319778442, "logits/rejected": -1.6516337394714355, "logps/chosen": -107.81195068359375, "logps/rejected": -149.43711853027344, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": -1.0313009023666382, "rewards/margins": 6.073184967041016, "rewards/rejected": -7.104485511779785, "step": 1548 }, { "epoch": 2.49, "learning_rate": 4.021006738010305e-07, "logits/chosen": -1.584862470626831, "logits/rejected": -1.6257168054580688, "logps/chosen": -91.61494445800781, "logps/rejected": -134.11886596679688, "loss": 0.1308, "rewards/accuracies": 0.75, "rewards/chosen": -0.05718068778514862, "rewards/margins": 3.3961164951324463, "rewards/rejected": -3.4532973766326904, "step": 1549 }, { "epoch": 2.49, "learning_rate": 4.0200158541418943e-07, "logits/chosen": -1.5913203954696655, "logits/rejected": -1.6108205318450928, "logps/chosen": -82.05184936523438, "logps/rejected": -151.38418579101562, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -1.029408574104309, "rewards/margins": 6.295265197753906, "rewards/rejected": -7.324673652648926, "step": 1550 }, { "epoch": 2.49, "learning_rate": 4.0190249702734834e-07, "logits/chosen": -1.6398818492889404, "logits/rejected": -1.6251544952392578, "logps/chosen": -71.79005432128906, "logps/rejected": -116.27836608886719, "loss": 0.1784, "rewards/accuracies": 1.0, "rewards/chosen": 0.06548377871513367, "rewards/margins": 4.368442058563232, "rewards/rejected": -4.3029584884643555, "step": 1551 }, { "epoch": 2.49, "learning_rate": 4.0180340864050735e-07, "logits/chosen": -1.6087193489074707, "logits/rejected": -1.5955302715301514, "logps/chosen": -63.87066650390625, "logps/rejected": -115.04846954345703, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": 0.12157170474529266, "rewards/margins": 6.131241798400879, "rewards/rejected": -6.009669303894043, "step": 1552 }, { "epoch": 2.49, "learning_rate": 4.0170432025366626e-07, "logits/chosen": -1.6494303941726685, "logits/rejected": -1.742544412612915, "logps/chosen": -86.73298645019531, "logps/rejected": -138.45066833496094, "loss": 0.1101, "rewards/accuracies": 1.0, "rewards/chosen": -1.009513258934021, "rewards/margins": 3.5808708667755127, "rewards/rejected": -4.590384006500244, "step": 1553 }, { "epoch": 2.49, "learning_rate": 4.016052318668252e-07, "logits/chosen": -1.726935863494873, "logits/rejected": -1.702909231185913, "logps/chosen": -94.014892578125, "logps/rejected": -174.6034393310547, "loss": 0.1032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0027711391448975, "rewards/margins": 8.567899703979492, "rewards/rejected": -9.570670127868652, "step": 1554 }, { "epoch": 2.5, "learning_rate": 4.015061434799841e-07, "logits/chosen": -1.4530725479125977, "logits/rejected": -1.584232211112976, "logps/chosen": -109.99172973632812, "logps/rejected": -148.54454040527344, "loss": 0.1122, "rewards/accuracies": 0.75, "rewards/chosen": -0.4750698208808899, "rewards/margins": 3.9654531478881836, "rewards/rejected": -4.440523147583008, "step": 1555 }, { "epoch": 2.5, "learning_rate": 4.0140705509314303e-07, "logits/chosen": -1.640792965888977, "logits/rejected": -1.6850590705871582, "logps/chosen": -83.00566101074219, "logps/rejected": -139.25643920898438, "loss": 0.1624, "rewards/accuracies": 0.75, "rewards/chosen": -0.630255937576294, "rewards/margins": 4.776291370391846, "rewards/rejected": -5.406547546386719, "step": 1556 }, { "epoch": 2.5, "learning_rate": 4.01307966706302e-07, "logits/chosen": -1.7018249034881592, "logits/rejected": -1.6817395687103271, "logps/chosen": -105.99757385253906, "logps/rejected": -152.07693481445312, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -0.6234740018844604, "rewards/margins": 4.7787322998046875, "rewards/rejected": -5.402205944061279, "step": 1557 }, { "epoch": 2.5, "learning_rate": 4.0120887831946095e-07, "logits/chosen": -1.4879838228225708, "logits/rejected": -1.6117268800735474, "logps/chosen": -99.0791015625, "logps/rejected": -160.3154754638672, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": -1.1313610076904297, "rewards/margins": 4.499762058258057, "rewards/rejected": -5.631122589111328, "step": 1558 }, { "epoch": 2.5, "learning_rate": 4.011097899326199e-07, "logits/chosen": -1.5913962125778198, "logits/rejected": -1.5591298341751099, "logps/chosen": -100.78785705566406, "logps/rejected": -154.5647735595703, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -2.278231382369995, "rewards/margins": 4.117045879364014, "rewards/rejected": -6.39527702331543, "step": 1559 }, { "epoch": 2.5, "learning_rate": 4.010107015457788e-07, "logits/chosen": -1.7632079124450684, "logits/rejected": -1.7530444860458374, "logps/chosen": -98.27313995361328, "logps/rejected": -142.61573791503906, "loss": 0.2421, "rewards/accuracies": 1.0, "rewards/chosen": -0.828830361366272, "rewards/margins": 5.477084636688232, "rewards/rejected": -6.305914402008057, "step": 1560 }, { "epoch": 2.51, "learning_rate": 4.009116131589377e-07, "logits/chosen": -1.6196459531784058, "logits/rejected": -1.6568164825439453, "logps/chosen": -95.01111602783203, "logps/rejected": -134.52291870117188, "loss": 0.1845, "rewards/accuracies": 0.75, "rewards/chosen": -1.5717241764068604, "rewards/margins": 2.791172504425049, "rewards/rejected": -4.362896919250488, "step": 1561 }, { "epoch": 2.51, "learning_rate": 4.008125247720967e-07, "logits/chosen": -1.7564078569412231, "logits/rejected": -1.62039315700531, "logps/chosen": -119.79519653320312, "logps/rejected": -136.345703125, "loss": 0.2036, "rewards/accuracies": 1.0, "rewards/chosen": -1.440758228302002, "rewards/margins": 4.721499443054199, "rewards/rejected": -6.162257671356201, "step": 1562 }, { "epoch": 2.51, "learning_rate": 4.0071343638525564e-07, "logits/chosen": -1.7026729583740234, "logits/rejected": -1.648584008216858, "logps/chosen": -91.40043640136719, "logps/rejected": -147.43435668945312, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -0.4549716114997864, "rewards/margins": 6.302824020385742, "rewards/rejected": -6.757795810699463, "step": 1563 }, { "epoch": 2.51, "learning_rate": 4.006143479984146e-07, "logits/chosen": -1.5825023651123047, "logits/rejected": -1.497096061706543, "logps/chosen": -100.11810302734375, "logps/rejected": -157.0918426513672, "loss": 0.0966, "rewards/accuracies": 1.0, "rewards/chosen": -0.6713564395904541, "rewards/margins": 4.074806213378906, "rewards/rejected": -4.746163368225098, "step": 1564 }, { "epoch": 2.51, "learning_rate": 4.005152596115735e-07, "logits/chosen": -1.6536504030227661, "logits/rejected": -1.6612420082092285, "logps/chosen": -108.87899780273438, "logps/rejected": -137.2091064453125, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": -0.7982808947563171, "rewards/margins": 2.0841941833496094, "rewards/rejected": -2.8824751377105713, "step": 1565 }, { "epoch": 2.51, "learning_rate": 4.004161712247324e-07, "logits/chosen": -1.727595329284668, "logits/rejected": -1.6760400533676147, "logps/chosen": -92.77307891845703, "logps/rejected": -147.87680053710938, "loss": 0.1908, "rewards/accuracies": 1.0, "rewards/chosen": -0.9082643389701843, "rewards/margins": 6.341095924377441, "rewards/rejected": -7.24936056137085, "step": 1566 }, { "epoch": 2.52, "learning_rate": 4.003170828378914e-07, "logits/chosen": -1.530552625656128, "logits/rejected": -1.4957835674285889, "logps/chosen": -151.33584594726562, "logps/rejected": -131.0881805419922, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": -0.8131237030029297, "rewards/margins": 3.316512107849121, "rewards/rejected": -4.129635810852051, "step": 1567 }, { "epoch": 2.52, "learning_rate": 4.0021799445105033e-07, "logits/chosen": -1.8309082984924316, "logits/rejected": -1.774534821510315, "logps/chosen": -98.18014526367188, "logps/rejected": -150.80996704101562, "loss": 0.123, "rewards/accuracies": 1.0, "rewards/chosen": -1.214583158493042, "rewards/margins": 5.488448619842529, "rewards/rejected": -6.70303201675415, "step": 1568 }, { "epoch": 2.52, "learning_rate": 4.001189060642093e-07, "logits/chosen": -1.5470460653305054, "logits/rejected": -1.5462857484817505, "logps/chosen": -88.42259216308594, "logps/rejected": -139.3072052001953, "loss": 0.1608, "rewards/accuracies": 1.0, "rewards/chosen": -0.6979385614395142, "rewards/margins": 4.841582775115967, "rewards/rejected": -5.539521217346191, "step": 1569 }, { "epoch": 2.52, "learning_rate": 4.000198176773682e-07, "logits/chosen": -1.6583056449890137, "logits/rejected": -1.6451210975646973, "logps/chosen": -85.47271728515625, "logps/rejected": -142.15225219726562, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -0.659433901309967, "rewards/margins": 5.212213039398193, "rewards/rejected": -5.871646881103516, "step": 1570 }, { "epoch": 2.52, "learning_rate": 3.999207292905271e-07, "logits/chosen": -1.6847831010818481, "logits/rejected": -1.674706220626831, "logps/chosen": -99.36507415771484, "logps/rejected": -184.79019165039062, "loss": 0.0757, "rewards/accuracies": 1.0, "rewards/chosen": -1.8249037265777588, "rewards/margins": 7.190060138702393, "rewards/rejected": -9.01496410369873, "step": 1571 }, { "epoch": 2.52, "learning_rate": 3.9982164090368607e-07, "logits/chosen": -1.5266740322113037, "logits/rejected": -1.620421290397644, "logps/chosen": -88.97676086425781, "logps/rejected": -176.86361694335938, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": -2.2191855907440186, "rewards/margins": 5.633896827697754, "rewards/rejected": -7.853082180023193, "step": 1572 }, { "epoch": 2.52, "learning_rate": 3.9972255251684503e-07, "logits/chosen": -1.6284303665161133, "logits/rejected": -1.575615406036377, "logps/chosen": -93.06710815429688, "logps/rejected": -181.22596740722656, "loss": 0.2806, "rewards/accuracies": 1.0, "rewards/chosen": -1.2214094400405884, "rewards/margins": 5.594162940979004, "rewards/rejected": -6.8155717849731445, "step": 1573 }, { "epoch": 2.53, "learning_rate": 3.9962346413000393e-07, "logits/chosen": -1.6996428966522217, "logits/rejected": -1.7534018754959106, "logps/chosen": -101.12648010253906, "logps/rejected": -206.93905639648438, "loss": 0.1393, "rewards/accuracies": 1.0, "rewards/chosen": -1.5073946714401245, "rewards/margins": 7.869297027587891, "rewards/rejected": -9.376691818237305, "step": 1574 }, { "epoch": 2.53, "learning_rate": 3.995243757431629e-07, "logits/chosen": -1.8097803592681885, "logits/rejected": -1.6685130596160889, "logps/chosen": -90.11126708984375, "logps/rejected": -117.04840850830078, "loss": 0.2093, "rewards/accuracies": 1.0, "rewards/chosen": -0.36917388439178467, "rewards/margins": 4.064358234405518, "rewards/rejected": -4.433532238006592, "step": 1575 }, { "epoch": 2.53, "learning_rate": 3.994252873563218e-07, "logits/chosen": -1.6094155311584473, "logits/rejected": -1.6077075004577637, "logps/chosen": -91.07878112792969, "logps/rejected": -126.92086029052734, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": -1.519827961921692, "rewards/margins": 2.472761392593384, "rewards/rejected": -3.9925894737243652, "step": 1576 }, { "epoch": 2.53, "learning_rate": 3.9932619896948076e-07, "logits/chosen": -1.5651779174804688, "logits/rejected": -1.532698392868042, "logps/chosen": -94.1179428100586, "logps/rejected": -179.32701110839844, "loss": 0.088, "rewards/accuracies": 1.0, "rewards/chosen": -1.5506263971328735, "rewards/margins": 6.633845806121826, "rewards/rejected": -8.18447208404541, "step": 1577 }, { "epoch": 2.53, "learning_rate": 3.9922711058263967e-07, "logits/chosen": -1.5924263000488281, "logits/rejected": -1.450516700744629, "logps/chosen": -104.43159484863281, "logps/rejected": -134.54637145996094, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": -2.454681396484375, "rewards/margins": 4.193905353546143, "rewards/rejected": -6.648587226867676, "step": 1578 }, { "epoch": 2.53, "learning_rate": 3.9912802219579863e-07, "logits/chosen": -1.4787063598632812, "logits/rejected": -1.5085644721984863, "logps/chosen": -71.24537658691406, "logps/rejected": -135.27700805664062, "loss": 0.1107, "rewards/accuracies": 1.0, "rewards/chosen": -1.253676176071167, "rewards/margins": 4.544859409332275, "rewards/rejected": -5.7985358238220215, "step": 1579 }, { "epoch": 2.54, "learning_rate": 3.990289338089576e-07, "logits/chosen": -1.5970096588134766, "logits/rejected": -1.558953881263733, "logps/chosen": -121.43826293945312, "logps/rejected": -143.6401824951172, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -1.821876049041748, "rewards/margins": 3.3712503910064697, "rewards/rejected": -5.193126678466797, "step": 1580 }, { "epoch": 2.54, "learning_rate": 3.989298454221165e-07, "logits/chosen": -1.7156455516815186, "logits/rejected": -1.7856324911117554, "logps/chosen": -101.93797302246094, "logps/rejected": -155.19033813476562, "loss": 0.1442, "rewards/accuracies": 1.0, "rewards/chosen": -2.4950289726257324, "rewards/margins": 3.09779953956604, "rewards/rejected": -5.592828750610352, "step": 1581 }, { "epoch": 2.54, "learning_rate": 3.9883075703527545e-07, "logits/chosen": -1.8148295879364014, "logits/rejected": -1.6176869869232178, "logps/chosen": -110.08763885498047, "logps/rejected": -125.24375915527344, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": -1.5866113901138306, "rewards/margins": 4.323555946350098, "rewards/rejected": -5.910167217254639, "step": 1582 }, { "epoch": 2.54, "learning_rate": 3.9873166864843436e-07, "logits/chosen": -1.5242574214935303, "logits/rejected": -1.649563193321228, "logps/chosen": -66.35914611816406, "logps/rejected": -134.16854858398438, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": -0.6833280324935913, "rewards/margins": 5.45789909362793, "rewards/rejected": -6.141226768493652, "step": 1583 }, { "epoch": 2.54, "learning_rate": 3.986325802615933e-07, "logits/chosen": -1.5379817485809326, "logits/rejected": -1.5086207389831543, "logps/chosen": -78.02264404296875, "logps/rejected": -144.61300659179688, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": -0.40447044372558594, "rewards/margins": 6.240036487579346, "rewards/rejected": -6.64450740814209, "step": 1584 }, { "epoch": 2.54, "learning_rate": 3.985334918747523e-07, "logits/chosen": -1.5179221630096436, "logits/rejected": -1.6315691471099854, "logps/chosen": -90.12229919433594, "logps/rejected": -145.6233367919922, "loss": 0.3098, "rewards/accuracies": 1.0, "rewards/chosen": -1.5580155849456787, "rewards/margins": 2.5816168785095215, "rewards/rejected": -4.139632701873779, "step": 1585 }, { "epoch": 2.55, "learning_rate": 3.984344034879112e-07, "logits/chosen": -1.600441813468933, "logits/rejected": -1.620929479598999, "logps/chosen": -110.63578796386719, "logps/rejected": -170.987548828125, "loss": 0.0912, "rewards/accuracies": 1.0, "rewards/chosen": -1.124908447265625, "rewards/margins": 5.0158891677856445, "rewards/rejected": -6.140798091888428, "step": 1586 }, { "epoch": 2.55, "learning_rate": 3.9833531510107015e-07, "logits/chosen": -1.560804843902588, "logits/rejected": -1.5974311828613281, "logps/chosen": -79.38702392578125, "logps/rejected": -143.84593200683594, "loss": 0.1227, "rewards/accuracies": 1.0, "rewards/chosen": -1.782480239868164, "rewards/margins": 4.673681735992432, "rewards/rejected": -6.456161975860596, "step": 1587 }, { "epoch": 2.55, "learning_rate": 3.9823622671422905e-07, "logits/chosen": -1.4683737754821777, "logits/rejected": -1.4383587837219238, "logps/chosen": -95.92796325683594, "logps/rejected": -175.01858520507812, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": -1.3718544244766235, "rewards/margins": 7.066441535949707, "rewards/rejected": -8.438295364379883, "step": 1588 }, { "epoch": 2.55, "learning_rate": 3.98137138327388e-07, "logits/chosen": -1.7322998046875, "logits/rejected": -1.710734248161316, "logps/chosen": -100.8972396850586, "logps/rejected": -131.4492950439453, "loss": 0.0557, "rewards/accuracies": 0.75, "rewards/chosen": -3.247551441192627, "rewards/margins": 3.1347036361694336, "rewards/rejected": -6.3822550773620605, "step": 1589 }, { "epoch": 2.55, "learning_rate": 3.9803804994054697e-07, "logits/chosen": -1.7330348491668701, "logits/rejected": -1.7054024934768677, "logps/chosen": -111.55364990234375, "logps/rejected": -131.46124267578125, "loss": 0.1887, "rewards/accuracies": 1.0, "rewards/chosen": -0.4935653805732727, "rewards/margins": 4.084343433380127, "rewards/rejected": -4.577908992767334, "step": 1590 }, { "epoch": 2.55, "learning_rate": 3.979389615537059e-07, "logits/chosen": -1.8348705768585205, "logits/rejected": -1.613537311553955, "logps/chosen": -118.7841796875, "logps/rejected": -124.66348266601562, "loss": 0.1372, "rewards/accuracies": 1.0, "rewards/chosen": -2.255164623260498, "rewards/margins": 2.899994373321533, "rewards/rejected": -5.155158996582031, "step": 1591 }, { "epoch": 2.56, "learning_rate": 3.9783987316686484e-07, "logits/chosen": -1.633135437965393, "logits/rejected": -1.589152216911316, "logps/chosen": -86.32500457763672, "logps/rejected": -143.56466674804688, "loss": 0.0894, "rewards/accuracies": 1.0, "rewards/chosen": -0.27792778611183167, "rewards/margins": 5.609485626220703, "rewards/rejected": -5.887413501739502, "step": 1592 }, { "epoch": 2.56, "learning_rate": 3.9774078478002375e-07, "logits/chosen": -1.6260837316513062, "logits/rejected": -1.5902106761932373, "logps/chosen": -110.70323181152344, "logps/rejected": -133.01734924316406, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.9546393156051636, "rewards/margins": 3.7209715843200684, "rewards/rejected": -4.6756110191345215, "step": 1593 }, { "epoch": 2.56, "learning_rate": 3.976416963931827e-07, "logits/chosen": -1.814721703529358, "logits/rejected": -1.793402910232544, "logps/chosen": -105.3807144165039, "logps/rejected": -129.46099853515625, "loss": 0.187, "rewards/accuracies": 0.75, "rewards/chosen": -2.3332536220550537, "rewards/margins": 2.9713120460510254, "rewards/rejected": -5.304565906524658, "step": 1594 }, { "epoch": 2.56, "learning_rate": 3.9754260800634167e-07, "logits/chosen": -1.599279761314392, "logits/rejected": -1.6641438007354736, "logps/chosen": -120.28355407714844, "logps/rejected": -186.97964477539062, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -2.368260622024536, "rewards/margins": 6.632912635803223, "rewards/rejected": -9.001173973083496, "step": 1595 }, { "epoch": 2.56, "learning_rate": 3.9744351961950057e-07, "logits/chosen": -1.7605721950531006, "logits/rejected": -1.779759168624878, "logps/chosen": -87.25627136230469, "logps/rejected": -174.69989013671875, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -0.6949288845062256, "rewards/margins": 7.828545570373535, "rewards/rejected": -8.523473739624023, "step": 1596 }, { "epoch": 2.56, "learning_rate": 3.9734443123265953e-07, "logits/chosen": -1.7103278636932373, "logits/rejected": -1.6767277717590332, "logps/chosen": -90.26966857910156, "logps/rejected": -139.5058135986328, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": -0.27963343262672424, "rewards/margins": 6.556266784667969, "rewards/rejected": -6.835899353027344, "step": 1597 }, { "epoch": 2.57, "learning_rate": 3.9724534284581844e-07, "logits/chosen": -1.6779993772506714, "logits/rejected": -1.6295992136001587, "logps/chosen": -112.949951171875, "logps/rejected": -138.36692810058594, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -0.3521217405796051, "rewards/margins": 3.686206579208374, "rewards/rejected": -4.038328647613525, "step": 1598 }, { "epoch": 2.57, "learning_rate": 3.9714625445897735e-07, "logits/chosen": -1.8347086906433105, "logits/rejected": -1.7368643283843994, "logps/chosen": -117.81710815429688, "logps/rejected": -162.89442443847656, "loss": 0.2033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3300954103469849, "rewards/margins": 5.813445091247559, "rewards/rejected": -7.143540382385254, "step": 1599 }, { "epoch": 2.57, "learning_rate": 3.9704716607213636e-07, "logits/chosen": -1.7208443880081177, "logits/rejected": -1.6522516012191772, "logps/chosen": -106.51007080078125, "logps/rejected": -142.56048583984375, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": -1.1910394430160522, "rewards/margins": 4.712832450866699, "rewards/rejected": -5.903872013092041, "step": 1600 }, { "epoch": 2.57, "learning_rate": 3.9694807768529527e-07, "logits/chosen": -1.5650928020477295, "logits/rejected": -1.6778311729431152, "logps/chosen": -113.66629028320312, "logps/rejected": -183.86599731445312, "loss": 0.0942, "rewards/accuracies": 1.0, "rewards/chosen": -1.6565715074539185, "rewards/margins": 6.361757278442383, "rewards/rejected": -8.018327713012695, "step": 1601 }, { "epoch": 2.57, "learning_rate": 3.968489892984542e-07, "logits/chosen": -1.7224628925323486, "logits/rejected": -1.7024729251861572, "logps/chosen": -136.17758178710938, "logps/rejected": -168.82843017578125, "loss": 0.0979, "rewards/accuracies": 0.75, "rewards/chosen": -2.0839645862579346, "rewards/margins": 3.2652587890625, "rewards/rejected": -5.349223613739014, "step": 1602 }, { "epoch": 2.57, "learning_rate": 3.9674990091161313e-07, "logits/chosen": -1.631959080696106, "logits/rejected": -1.5801258087158203, "logps/chosen": -99.63860321044922, "logps/rejected": -133.9796142578125, "loss": 0.0752, "rewards/accuracies": 1.0, "rewards/chosen": -1.2639216184616089, "rewards/margins": 3.943310022354126, "rewards/rejected": -5.207231521606445, "step": 1603 }, { "epoch": 2.57, "learning_rate": 3.9665081252477204e-07, "logits/chosen": -1.6479599475860596, "logits/rejected": -1.7490185499191284, "logps/chosen": -70.61756134033203, "logps/rejected": -124.90959167480469, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -0.9974884986877441, "rewards/margins": 3.7811386585235596, "rewards/rejected": -4.778626918792725, "step": 1604 }, { "epoch": 2.58, "learning_rate": 3.9655172413793105e-07, "logits/chosen": -1.6024096012115479, "logits/rejected": -1.698488473892212, "logps/chosen": -79.61404418945312, "logps/rejected": -158.06600952148438, "loss": 0.0832, "rewards/accuracies": 1.0, "rewards/chosen": -0.7447934150695801, "rewards/margins": 5.869002342224121, "rewards/rejected": -6.613795280456543, "step": 1605 }, { "epoch": 2.58, "learning_rate": 3.9645263575108996e-07, "logits/chosen": -1.7745715379714966, "logits/rejected": -1.7692279815673828, "logps/chosen": -107.55937194824219, "logps/rejected": -131.77157592773438, "loss": 0.2429, "rewards/accuracies": 0.75, "rewards/chosen": -1.4012285470962524, "rewards/margins": 3.2497127056121826, "rewards/rejected": -4.650940895080566, "step": 1606 }, { "epoch": 2.58, "learning_rate": 3.963535473642489e-07, "logits/chosen": -1.7383382320404053, "logits/rejected": -1.7177146673202515, "logps/chosen": -94.09730529785156, "logps/rejected": -142.09109497070312, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": -0.6904318928718567, "rewards/margins": 4.734926223754883, "rewards/rejected": -5.425358295440674, "step": 1607 }, { "epoch": 2.58, "learning_rate": 3.962544589774078e-07, "logits/chosen": -1.6292829513549805, "logits/rejected": -1.6647508144378662, "logps/chosen": -78.71209716796875, "logps/rejected": -140.18771362304688, "loss": 0.0711, "rewards/accuracies": 1.0, "rewards/chosen": -0.3100743889808655, "rewards/margins": 4.240592956542969, "rewards/rejected": -4.550667762756348, "step": 1608 }, { "epoch": 2.58, "learning_rate": 3.9615537059056673e-07, "logits/chosen": -1.6707358360290527, "logits/rejected": -1.6981303691864014, "logps/chosen": -96.77862548828125, "logps/rejected": -148.6749267578125, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -0.8654155731201172, "rewards/margins": 5.460432052612305, "rewards/rejected": -6.325847625732422, "step": 1609 }, { "epoch": 2.58, "learning_rate": 3.9605628220372574e-07, "logits/chosen": -1.7408658266067505, "logits/rejected": -1.7753620147705078, "logps/chosen": -87.6339111328125, "logps/rejected": -158.48348999023438, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": -0.9760258197784424, "rewards/margins": 5.855222702026367, "rewards/rejected": -6.831248760223389, "step": 1610 }, { "epoch": 2.59, "learning_rate": 3.9595719381688465e-07, "logits/chosen": -1.6654293537139893, "logits/rejected": -1.7186510562896729, "logps/chosen": -83.20941162109375, "logps/rejected": -145.1309814453125, "loss": 0.097, "rewards/accuracies": 1.0, "rewards/chosen": -0.16288454830646515, "rewards/margins": 7.108290195465088, "rewards/rejected": -7.271175384521484, "step": 1611 }, { "epoch": 2.59, "learning_rate": 3.9585810543004356e-07, "logits/chosen": -1.8012669086456299, "logits/rejected": -1.6597280502319336, "logps/chosen": -108.53759765625, "logps/rejected": -146.98770141601562, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": -1.2267045974731445, "rewards/margins": 6.570436477661133, "rewards/rejected": -7.797141075134277, "step": 1612 }, { "epoch": 2.59, "learning_rate": 3.957590170432025e-07, "logits/chosen": -1.7769131660461426, "logits/rejected": -1.8239631652832031, "logps/chosen": -105.0603256225586, "logps/rejected": -148.65687561035156, "loss": 0.1884, "rewards/accuracies": 1.0, "rewards/chosen": -0.6336904764175415, "rewards/margins": 4.147021293640137, "rewards/rejected": -4.780711650848389, "step": 1613 }, { "epoch": 2.59, "learning_rate": 3.956599286563614e-07, "logits/chosen": -1.4476923942565918, "logits/rejected": -1.4214524030685425, "logps/chosen": -84.91102600097656, "logps/rejected": -151.73471069335938, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": -1.0513105392456055, "rewards/margins": 5.665750503540039, "rewards/rejected": -6.717061519622803, "step": 1614 }, { "epoch": 2.59, "learning_rate": 3.9556084026952044e-07, "logits/chosen": -1.619481086730957, "logits/rejected": -1.5780150890350342, "logps/chosen": -142.35537719726562, "logps/rejected": -153.94061279296875, "loss": 0.0751, "rewards/accuracies": 1.0, "rewards/chosen": -2.8274528980255127, "rewards/margins": 4.383543968200684, "rewards/rejected": -7.210997104644775, "step": 1615 }, { "epoch": 2.59, "learning_rate": 3.9546175188267934e-07, "logits/chosen": -1.7960684299468994, "logits/rejected": -1.7529315948486328, "logps/chosen": -100.99296569824219, "logps/rejected": -152.49417114257812, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -1.2951539754867554, "rewards/margins": 4.537778854370117, "rewards/rejected": -5.83293342590332, "step": 1616 }, { "epoch": 2.6, "learning_rate": 3.9536266349583825e-07, "logits/chosen": -1.6651270389556885, "logits/rejected": -1.661226749420166, "logps/chosen": -103.29608154296875, "logps/rejected": -140.9802703857422, "loss": 0.1844, "rewards/accuracies": 0.75, "rewards/chosen": -1.9721553325653076, "rewards/margins": 3.407167673110962, "rewards/rejected": -5.3793230056762695, "step": 1617 }, { "epoch": 2.6, "learning_rate": 3.952635751089972e-07, "logits/chosen": -1.732515811920166, "logits/rejected": -1.6859455108642578, "logps/chosen": -113.89671325683594, "logps/rejected": -159.95726013183594, "loss": 0.2015, "rewards/accuracies": 1.0, "rewards/chosen": -1.439382553100586, "rewards/margins": 5.094992637634277, "rewards/rejected": -6.534374713897705, "step": 1618 }, { "epoch": 2.6, "learning_rate": 3.951644867221561e-07, "logits/chosen": -1.601952075958252, "logits/rejected": -1.5908163785934448, "logps/chosen": -73.8998031616211, "logps/rejected": -127.65390014648438, "loss": 0.2232, "rewards/accuracies": 1.0, "rewards/chosen": -1.1103142499923706, "rewards/margins": 4.05513334274292, "rewards/rejected": -5.165447235107422, "step": 1619 }, { "epoch": 2.6, "learning_rate": 3.950653983353151e-07, "logits/chosen": -1.6120493412017822, "logits/rejected": -1.571523666381836, "logps/chosen": -109.36521911621094, "logps/rejected": -146.45326232910156, "loss": 0.2561, "rewards/accuracies": 1.0, "rewards/chosen": -2.514857053756714, "rewards/margins": 4.719993591308594, "rewards/rejected": -7.2348504066467285, "step": 1620 }, { "epoch": 2.6, "learning_rate": 3.9496630994847404e-07, "logits/chosen": -1.6132532358169556, "logits/rejected": -1.6651500463485718, "logps/chosen": -120.26191711425781, "logps/rejected": -191.63925170898438, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": -2.6146912574768066, "rewards/margins": 6.109132289886475, "rewards/rejected": -8.723823547363281, "step": 1621 }, { "epoch": 2.6, "learning_rate": 3.9486722156163294e-07, "logits/chosen": -1.7061753273010254, "logits/rejected": -1.670372724533081, "logps/chosen": -89.03148651123047, "logps/rejected": -149.909912109375, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -0.9480960369110107, "rewards/margins": 5.918118000030518, "rewards/rejected": -6.866213798522949, "step": 1622 }, { "epoch": 2.61, "learning_rate": 3.947681331747919e-07, "logits/chosen": -1.4570914506912231, "logits/rejected": -1.540759563446045, "logps/chosen": -87.44071197509766, "logps/rejected": -191.045654296875, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -1.6355862617492676, "rewards/margins": 6.893369674682617, "rewards/rejected": -8.528956413269043, "step": 1623 }, { "epoch": 2.61, "learning_rate": 3.946690447879508e-07, "logits/chosen": -1.5185577869415283, "logits/rejected": -1.449926495552063, "logps/chosen": -83.57771301269531, "logps/rejected": -137.5116729736328, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -1.6756622791290283, "rewards/margins": 5.110085487365723, "rewards/rejected": -6.785747528076172, "step": 1624 }, { "epoch": 2.61, "learning_rate": 3.9456995640110977e-07, "logits/chosen": -1.5396792888641357, "logits/rejected": -1.5881719589233398, "logps/chosen": -121.74185943603516, "logps/rejected": -178.02171325683594, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": -2.811814308166504, "rewards/margins": 5.857349395751953, "rewards/rejected": -8.669163703918457, "step": 1625 }, { "epoch": 2.61, "learning_rate": 3.9447086801426873e-07, "logits/chosen": -1.6754686832427979, "logits/rejected": -1.6324403285980225, "logps/chosen": -100.95988464355469, "logps/rejected": -142.52105712890625, "loss": 0.2119, "rewards/accuracies": 1.0, "rewards/chosen": -1.5382416248321533, "rewards/margins": 3.7480950355529785, "rewards/rejected": -5.286336898803711, "step": 1626 }, { "epoch": 2.61, "learning_rate": 3.9437177962742764e-07, "logits/chosen": -1.6739939451217651, "logits/rejected": -1.6295630931854248, "logps/chosen": -110.15270233154297, "logps/rejected": -150.19964599609375, "loss": 0.1191, "rewards/accuracies": 1.0, "rewards/chosen": -2.1927270889282227, "rewards/margins": 4.530815601348877, "rewards/rejected": -6.7235426902771, "step": 1627 }, { "epoch": 2.61, "learning_rate": 3.942726912405866e-07, "logits/chosen": -1.6827847957611084, "logits/rejected": -1.7039899826049805, "logps/chosen": -100.11036682128906, "logps/rejected": -111.03311157226562, "loss": 0.111, "rewards/accuracies": 1.0, "rewards/chosen": -0.9681674242019653, "rewards/margins": 1.950844168663025, "rewards/rejected": -2.9190115928649902, "step": 1628 }, { "epoch": 2.61, "learning_rate": 3.941736028537455e-07, "logits/chosen": -1.7222727537155151, "logits/rejected": -1.6499079465866089, "logps/chosen": -112.16838073730469, "logps/rejected": -167.1329803466797, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": -1.204603910446167, "rewards/margins": 6.814640045166016, "rewards/rejected": -8.019244194030762, "step": 1629 }, { "epoch": 2.62, "learning_rate": 3.9407451446690446e-07, "logits/chosen": -1.5573862791061401, "logits/rejected": -1.5188207626342773, "logps/chosen": -90.4585189819336, "logps/rejected": -136.2434539794922, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -1.6311439275741577, "rewards/margins": 2.859203338623047, "rewards/rejected": -4.490347385406494, "step": 1630 }, { "epoch": 2.62, "learning_rate": 3.939754260800634e-07, "logits/chosen": -1.667012095451355, "logits/rejected": -1.673702597618103, "logps/chosen": -109.96087646484375, "logps/rejected": -165.41712951660156, "loss": 0.0994, "rewards/accuracies": 1.0, "rewards/chosen": -3.1286392211914062, "rewards/margins": 6.292535781860352, "rewards/rejected": -9.421175003051758, "step": 1631 }, { "epoch": 2.62, "learning_rate": 3.9387633769322233e-07, "logits/chosen": -1.6863280534744263, "logits/rejected": -1.6358871459960938, "logps/chosen": -101.52421569824219, "logps/rejected": -137.13604736328125, "loss": 0.2113, "rewards/accuracies": 0.75, "rewards/chosen": -2.3501996994018555, "rewards/margins": 4.562976360321045, "rewards/rejected": -6.9131760597229, "step": 1632 }, { "epoch": 2.62, "learning_rate": 3.937772493063813e-07, "logits/chosen": -1.7107231616973877, "logits/rejected": -1.6725667715072632, "logps/chosen": -107.75257873535156, "logps/rejected": -139.10394287109375, "loss": 0.0953, "rewards/accuracies": 0.75, "rewards/chosen": -1.8070709705352783, "rewards/margins": 3.5965301990509033, "rewards/rejected": -5.403601169586182, "step": 1633 }, { "epoch": 2.62, "learning_rate": 3.936781609195402e-07, "logits/chosen": -1.7599291801452637, "logits/rejected": -1.8071138858795166, "logps/chosen": -117.89910888671875, "logps/rejected": -148.1265411376953, "loss": 0.3147, "rewards/accuracies": 1.0, "rewards/chosen": -1.7654922008514404, "rewards/margins": 3.4320321083068848, "rewards/rejected": -5.197524547576904, "step": 1634 }, { "epoch": 2.62, "learning_rate": 3.9357907253269916e-07, "logits/chosen": -1.8023946285247803, "logits/rejected": -1.7016016244888306, "logps/chosen": -109.1348876953125, "logps/rejected": -132.32748413085938, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": -1.310671329498291, "rewards/margins": 4.045626163482666, "rewards/rejected": -5.356297492980957, "step": 1635 }, { "epoch": 2.63, "learning_rate": 3.934799841458581e-07, "logits/chosen": -1.6300444602966309, "logits/rejected": -1.7276816368103027, "logps/chosen": -87.09320831298828, "logps/rejected": -163.82447814941406, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": -0.5919895172119141, "rewards/margins": 4.596068859100342, "rewards/rejected": -5.188058376312256, "step": 1636 }, { "epoch": 2.63, "learning_rate": 3.93380895759017e-07, "logits/chosen": -1.719331979751587, "logits/rejected": -1.8132953643798828, "logps/chosen": -68.77217102050781, "logps/rejected": -135.25283813476562, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -0.9111250638961792, "rewards/margins": 4.569169044494629, "rewards/rejected": -5.480294227600098, "step": 1637 }, { "epoch": 2.63, "learning_rate": 3.93281807372176e-07, "logits/chosen": -1.542251706123352, "logits/rejected": -1.569412112236023, "logps/chosen": -95.82078552246094, "logps/rejected": -145.8072967529297, "loss": 0.1139, "rewards/accuracies": 1.0, "rewards/chosen": -0.13409289717674255, "rewards/margins": 5.254614353179932, "rewards/rejected": -5.388707160949707, "step": 1638 }, { "epoch": 2.63, "learning_rate": 3.931827189853349e-07, "logits/chosen": -1.65717351436615, "logits/rejected": -1.6844940185546875, "logps/chosen": -105.9080581665039, "logps/rejected": -177.64984130859375, "loss": 0.0697, "rewards/accuracies": 1.0, "rewards/chosen": -1.2233009338378906, "rewards/margins": 6.619023323059082, "rewards/rejected": -7.842324256896973, "step": 1639 }, { "epoch": 2.63, "learning_rate": 3.9308363059849385e-07, "logits/chosen": -1.7214059829711914, "logits/rejected": -1.6457960605621338, "logps/chosen": -112.06330871582031, "logps/rejected": -136.50428771972656, "loss": 0.1834, "rewards/accuracies": 1.0, "rewards/chosen": -1.821772813796997, "rewards/margins": 4.65543794631958, "rewards/rejected": -6.4772114753723145, "step": 1640 }, { "epoch": 2.63, "learning_rate": 3.9298454221165276e-07, "logits/chosen": -1.6323341131210327, "logits/rejected": -1.721718192100525, "logps/chosen": -83.76187133789062, "logps/rejected": -139.0671844482422, "loss": 0.3235, "rewards/accuracies": 0.75, "rewards/chosen": -0.34867894649505615, "rewards/margins": 4.016147613525391, "rewards/rejected": -4.364826679229736, "step": 1641 }, { "epoch": 2.64, "learning_rate": 3.928854538248117e-07, "logits/chosen": -1.7433383464813232, "logits/rejected": -1.6081993579864502, "logps/chosen": -105.66068267822266, "logps/rejected": -129.79730224609375, "loss": 0.2014, "rewards/accuracies": 1.0, "rewards/chosen": -0.4628137946128845, "rewards/margins": 4.714909076690674, "rewards/rejected": -5.177722930908203, "step": 1642 }, { "epoch": 2.64, "learning_rate": 3.927863654379707e-07, "logits/chosen": -1.6566319465637207, "logits/rejected": -1.6039808988571167, "logps/chosen": -97.6040267944336, "logps/rejected": -179.27122497558594, "loss": 0.0803, "rewards/accuracies": 1.0, "rewards/chosen": -0.6683167815208435, "rewards/margins": 7.4173383712768555, "rewards/rejected": -8.085655212402344, "step": 1643 }, { "epoch": 2.64, "learning_rate": 3.926872770511296e-07, "logits/chosen": -1.64591646194458, "logits/rejected": -1.6679062843322754, "logps/chosen": -90.45459747314453, "logps/rejected": -126.66781616210938, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": -1.577660083770752, "rewards/margins": 4.666001796722412, "rewards/rejected": -6.243661403656006, "step": 1644 }, { "epoch": 2.64, "learning_rate": 3.925881886642885e-07, "logits/chosen": -1.5339325666427612, "logits/rejected": -1.5786104202270508, "logps/chosen": -79.47030639648438, "logps/rejected": -158.0842742919922, "loss": 0.0704, "rewards/accuracies": 1.0, "rewards/chosen": 0.11824533343315125, "rewards/margins": 6.55366325378418, "rewards/rejected": -6.435418128967285, "step": 1645 }, { "epoch": 2.64, "learning_rate": 3.9248910027744745e-07, "logits/chosen": -1.686988115310669, "logits/rejected": -1.5854737758636475, "logps/chosen": -86.6767807006836, "logps/rejected": -138.66416931152344, "loss": 0.081, "rewards/accuracies": 1.0, "rewards/chosen": -0.8170170187950134, "rewards/margins": 4.332840919494629, "rewards/rejected": -5.149857997894287, "step": 1646 }, { "epoch": 2.64, "learning_rate": 3.923900118906064e-07, "logits/chosen": -1.768094778060913, "logits/rejected": -1.7337628602981567, "logps/chosen": -66.6224136352539, "logps/rejected": -152.81776428222656, "loss": 0.1206, "rewards/accuracies": 1.0, "rewards/chosen": -1.2800143957138062, "rewards/margins": 7.653653144836426, "rewards/rejected": -8.933667182922363, "step": 1647 }, { "epoch": 2.65, "learning_rate": 3.9229092350376537e-07, "logits/chosen": -1.5408319234848022, "logits/rejected": -1.6437922716140747, "logps/chosen": -65.5640869140625, "logps/rejected": -136.1582794189453, "loss": 0.1353, "rewards/accuracies": 1.0, "rewards/chosen": -0.12592744827270508, "rewards/margins": 4.883445739746094, "rewards/rejected": -5.009373188018799, "step": 1648 }, { "epoch": 2.65, "learning_rate": 3.921918351169243e-07, "logits/chosen": -1.719387173652649, "logits/rejected": -1.730057716369629, "logps/chosen": -103.0885238647461, "logps/rejected": -122.94817352294922, "loss": 0.3114, "rewards/accuracies": 0.75, "rewards/chosen": -1.0969889163970947, "rewards/margins": 1.1272996664047241, "rewards/rejected": -2.2242887020111084, "step": 1649 }, { "epoch": 2.65, "learning_rate": 3.920927467300832e-07, "logits/chosen": -1.6782052516937256, "logits/rejected": -1.6316736936569214, "logps/chosen": -104.43545532226562, "logps/rejected": -124.58712768554688, "loss": 0.0916, "rewards/accuracies": 1.0, "rewards/chosen": -2.75673246383667, "rewards/margins": 2.613795042037964, "rewards/rejected": -5.370527267456055, "step": 1650 }, { "epoch": 2.65, "learning_rate": 3.9199365834324214e-07, "logits/chosen": -1.6421464681625366, "logits/rejected": -1.6272259950637817, "logps/chosen": -107.66386413574219, "logps/rejected": -150.99456787109375, "loss": 0.0742, "rewards/accuracies": 1.0, "rewards/chosen": -1.6330490112304688, "rewards/margins": 4.535783290863037, "rewards/rejected": -6.168832302093506, "step": 1651 }, { "epoch": 2.65, "learning_rate": 3.918945699564011e-07, "logits/chosen": -1.7215654850006104, "logits/rejected": -1.7525396347045898, "logps/chosen": -118.01673126220703, "logps/rejected": -156.47991943359375, "loss": 0.1416, "rewards/accuracies": 1.0, "rewards/chosen": -3.0125925540924072, "rewards/margins": 3.54396915435791, "rewards/rejected": -6.5565619468688965, "step": 1652 }, { "epoch": 2.65, "learning_rate": 3.9179548156956006e-07, "logits/chosen": -1.737659215927124, "logits/rejected": -1.725931167602539, "logps/chosen": -93.76824188232422, "logps/rejected": -154.07403564453125, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": -1.9895079135894775, "rewards/margins": 6.329216003417969, "rewards/rejected": -8.318724632263184, "step": 1653 }, { "epoch": 2.65, "learning_rate": 3.9169639318271897e-07, "logits/chosen": -1.5703163146972656, "logits/rejected": -1.6204674243927002, "logps/chosen": -68.430908203125, "logps/rejected": -154.0151824951172, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": -0.7096720337867737, "rewards/margins": 7.791020393371582, "rewards/rejected": -8.500692367553711, "step": 1654 }, { "epoch": 2.66, "learning_rate": 3.915973047958779e-07, "logits/chosen": -1.654592752456665, "logits/rejected": -1.6313996315002441, "logps/chosen": -102.76555633544922, "logps/rejected": -154.89437866210938, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/chosen": -0.35646411776542664, "rewards/margins": 4.78443717956543, "rewards/rejected": -5.140901565551758, "step": 1655 }, { "epoch": 2.66, "learning_rate": 3.9149821640903683e-07, "logits/chosen": -1.7689440250396729, "logits/rejected": -1.6736929416656494, "logps/chosen": -86.84187316894531, "logps/rejected": -120.33537292480469, "loss": 0.1224, "rewards/accuracies": 1.0, "rewards/chosen": -0.6355079412460327, "rewards/margins": 3.3994030952453613, "rewards/rejected": -4.034911155700684, "step": 1656 }, { "epoch": 2.66, "learning_rate": 3.913991280221958e-07, "logits/chosen": -1.5503085851669312, "logits/rejected": -1.580747127532959, "logps/chosen": -83.42243957519531, "logps/rejected": -134.90499877929688, "loss": 0.0931, "rewards/accuracies": 1.0, "rewards/chosen": -1.0180339813232422, "rewards/margins": 4.432450771331787, "rewards/rejected": -5.450484752655029, "step": 1657 }, { "epoch": 2.66, "learning_rate": 3.9130003963535475e-07, "logits/chosen": -1.7036112546920776, "logits/rejected": -1.6509032249450684, "logps/chosen": -73.76993560791016, "logps/rejected": -139.8185272216797, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": -0.5025233030319214, "rewards/margins": 6.139401435852051, "rewards/rejected": -6.641924858093262, "step": 1658 }, { "epoch": 2.66, "learning_rate": 3.9120095124851366e-07, "logits/chosen": -1.7205610275268555, "logits/rejected": -1.6802823543548584, "logps/chosen": -106.723876953125, "logps/rejected": -154.71156311035156, "loss": 0.1218, "rewards/accuracies": 1.0, "rewards/chosen": -1.1316072940826416, "rewards/margins": 4.3703765869140625, "rewards/rejected": -5.501983642578125, "step": 1659 }, { "epoch": 2.66, "learning_rate": 3.9110186286167257e-07, "logits/chosen": -1.7646477222442627, "logits/rejected": -1.6980911493301392, "logps/chosen": -108.64151763916016, "logps/rejected": -150.76380920410156, "loss": 0.0624, "rewards/accuracies": 1.0, "rewards/chosen": -1.2869582176208496, "rewards/margins": 2.9378268718719482, "rewards/rejected": -4.224785327911377, "step": 1660 }, { "epoch": 2.67, "learning_rate": 3.9100277447483153e-07, "logits/chosen": -1.604628086090088, "logits/rejected": -1.5901622772216797, "logps/chosen": -127.52660369873047, "logps/rejected": -163.03546142578125, "loss": 0.1586, "rewards/accuracies": 0.75, "rewards/chosen": -1.7445952892303467, "rewards/margins": 4.948258876800537, "rewards/rejected": -6.692853927612305, "step": 1661 }, { "epoch": 2.67, "learning_rate": 3.9090368608799043e-07, "logits/chosen": -1.60129714012146, "logits/rejected": -1.5919628143310547, "logps/chosen": -102.20618438720703, "logps/rejected": -151.06893920898438, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": -0.35329896211624146, "rewards/margins": 4.015526294708252, "rewards/rejected": -4.368825435638428, "step": 1662 }, { "epoch": 2.67, "learning_rate": 3.9080459770114945e-07, "logits/chosen": -1.763448715209961, "logits/rejected": -1.712659478187561, "logps/chosen": -99.34208679199219, "logps/rejected": -130.9228057861328, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -0.3112873136997223, "rewards/margins": 3.0040805339813232, "rewards/rejected": -3.3153679370880127, "step": 1663 }, { "epoch": 2.67, "learning_rate": 3.9070550931430835e-07, "logits/chosen": -1.66643488407135, "logits/rejected": -1.6019322872161865, "logps/chosen": -107.14747619628906, "logps/rejected": -113.06047058105469, "loss": 0.2551, "rewards/accuracies": 0.5, "rewards/chosen": -1.734014868736267, "rewards/margins": 2.1405088901519775, "rewards/rejected": -3.874523639678955, "step": 1664 }, { "epoch": 2.67, "learning_rate": 3.9060642092746726e-07, "logits/chosen": -1.453410029411316, "logits/rejected": -1.5284217596054077, "logps/chosen": -62.22781753540039, "logps/rejected": -102.10479736328125, "loss": 0.3672, "rewards/accuracies": 0.5, "rewards/chosen": -1.4065132141113281, "rewards/margins": 1.8811142444610596, "rewards/rejected": -3.2876274585723877, "step": 1665 }, { "epoch": 2.67, "learning_rate": 3.905073325406262e-07, "logits/chosen": -1.6353001594543457, "logits/rejected": -1.6529649496078491, "logps/chosen": -105.6570816040039, "logps/rejected": -144.5390625, "loss": 0.1454, "rewards/accuracies": 1.0, "rewards/chosen": -1.5073972940444946, "rewards/margins": 2.818286895751953, "rewards/rejected": -4.325684070587158, "step": 1666 }, { "epoch": 2.68, "learning_rate": 3.9040824415378513e-07, "logits/chosen": -1.5287467241287231, "logits/rejected": -1.507435917854309, "logps/chosen": -96.50396728515625, "logps/rejected": -127.56505584716797, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -2.232048273086548, "rewards/margins": 3.746950149536133, "rewards/rejected": -5.97899866104126, "step": 1667 }, { "epoch": 2.68, "learning_rate": 3.9030915576694414e-07, "logits/chosen": -1.5859839916229248, "logits/rejected": -1.5464189052581787, "logps/chosen": -96.02423095703125, "logps/rejected": -126.1764144897461, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": -1.1699787378311157, "rewards/margins": 3.4963724613189697, "rewards/rejected": -4.666351318359375, "step": 1668 }, { "epoch": 2.68, "learning_rate": 3.9021006738010305e-07, "logits/chosen": -1.600571632385254, "logits/rejected": -1.582901954650879, "logps/chosen": -92.39420318603516, "logps/rejected": -130.1382293701172, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": -1.4282160997390747, "rewards/margins": 3.8117406368255615, "rewards/rejected": -5.239956855773926, "step": 1669 }, { "epoch": 2.68, "learning_rate": 3.9011097899326195e-07, "logits/chosen": -1.623052954673767, "logits/rejected": -1.7397581338882446, "logps/chosen": -90.87034606933594, "logps/rejected": -141.79522705078125, "loss": 0.0644, "rewards/accuracies": 1.0, "rewards/chosen": -1.7627394199371338, "rewards/margins": 3.422524929046631, "rewards/rejected": -5.185264587402344, "step": 1670 }, { "epoch": 2.68, "learning_rate": 3.900118906064209e-07, "logits/chosen": -1.4709466695785522, "logits/rejected": -1.6833181381225586, "logps/chosen": -81.34078216552734, "logps/rejected": -166.14144897460938, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": -1.602898120880127, "rewards/margins": 4.807818412780762, "rewards/rejected": -6.4107160568237305, "step": 1671 }, { "epoch": 2.68, "learning_rate": 3.899128022195798e-07, "logits/chosen": -1.5289719104766846, "logits/rejected": -1.4617083072662354, "logps/chosen": -96.2445297241211, "logps/rejected": -168.16632080078125, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -0.8574463129043579, "rewards/margins": 7.282916069030762, "rewards/rejected": -8.140362739562988, "step": 1672 }, { "epoch": 2.69, "learning_rate": 3.8981371383273883e-07, "logits/chosen": -1.6871217489242554, "logits/rejected": -1.5598878860473633, "logps/chosen": -122.20726776123047, "logps/rejected": -152.1883544921875, "loss": 0.2009, "rewards/accuracies": 1.0, "rewards/chosen": -1.95392906665802, "rewards/margins": 3.9447951316833496, "rewards/rejected": -5.898724555969238, "step": 1673 }, { "epoch": 2.69, "learning_rate": 3.8971462544589774e-07, "logits/chosen": -1.5609062910079956, "logits/rejected": -1.6250025033950806, "logps/chosen": -62.397216796875, "logps/rejected": -127.29566955566406, "loss": 0.0648, "rewards/accuracies": 1.0, "rewards/chosen": -0.802670955657959, "rewards/margins": 4.677715301513672, "rewards/rejected": -5.480386734008789, "step": 1674 }, { "epoch": 2.69, "learning_rate": 3.8961553705905665e-07, "logits/chosen": -1.766790509223938, "logits/rejected": -1.5937047004699707, "logps/chosen": -101.36880493164062, "logps/rejected": -144.26031494140625, "loss": 0.1347, "rewards/accuracies": 1.0, "rewards/chosen": -1.9222066402435303, "rewards/margins": 4.550838470458984, "rewards/rejected": -6.4730448722839355, "step": 1675 }, { "epoch": 2.69, "learning_rate": 3.895164486722156e-07, "logits/chosen": -1.495752215385437, "logits/rejected": -1.5141485929489136, "logps/chosen": -95.90216064453125, "logps/rejected": -169.1507568359375, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -1.0071563720703125, "rewards/margins": 6.518420219421387, "rewards/rejected": -7.525576591491699, "step": 1676 }, { "epoch": 2.69, "learning_rate": 3.894173602853745e-07, "logits/chosen": -1.6498146057128906, "logits/rejected": -1.5136171579360962, "logps/chosen": -133.72706604003906, "logps/rejected": -147.18910217285156, "loss": 0.0928, "rewards/accuracies": 1.0, "rewards/chosen": -1.643066644668579, "rewards/margins": 4.318610668182373, "rewards/rejected": -5.961677074432373, "step": 1677 }, { "epoch": 2.69, "learning_rate": 3.893182718985335e-07, "logits/chosen": -1.7261035442352295, "logits/rejected": -1.6475547552108765, "logps/chosen": -91.88957977294922, "logps/rejected": -128.22500610351562, "loss": 0.2556, "rewards/accuracies": 1.0, "rewards/chosen": -1.905906319618225, "rewards/margins": 4.8238372802734375, "rewards/rejected": -6.729743957519531, "step": 1678 }, { "epoch": 2.7, "learning_rate": 3.8921918351169243e-07, "logits/chosen": -1.538452386856079, "logits/rejected": -1.52724027633667, "logps/chosen": -99.5289535522461, "logps/rejected": -143.4537811279297, "loss": 0.0986, "rewards/accuracies": 1.0, "rewards/chosen": -2.5178170204162598, "rewards/margins": 4.421525955200195, "rewards/rejected": -6.939342498779297, "step": 1679 }, { "epoch": 2.7, "learning_rate": 3.8912009512485134e-07, "logits/chosen": -1.6561017036437988, "logits/rejected": -1.621412992477417, "logps/chosen": -113.26424407958984, "logps/rejected": -141.66610717773438, "loss": 0.2159, "rewards/accuracies": 1.0, "rewards/chosen": -1.5307321548461914, "rewards/margins": 5.143934726715088, "rewards/rejected": -6.6746673583984375, "step": 1680 }, { "epoch": 2.7, "learning_rate": 3.890210067380103e-07, "logits/chosen": -1.7042752504348755, "logits/rejected": -1.6139485836029053, "logps/chosen": -109.80953979492188, "logps/rejected": -154.41958618164062, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": -1.8617050647735596, "rewards/margins": 5.367158889770508, "rewards/rejected": -7.228863716125488, "step": 1681 }, { "epoch": 2.7, "learning_rate": 3.889219183511692e-07, "logits/chosen": -1.649864673614502, "logits/rejected": -1.688380241394043, "logps/chosen": -115.98057556152344, "logps/rejected": -138.5621337890625, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": -0.35208797454833984, "rewards/margins": 3.658139228820801, "rewards/rejected": -4.010227203369141, "step": 1682 }, { "epoch": 2.7, "learning_rate": 3.888228299643281e-07, "logits/chosen": -1.7194029092788696, "logits/rejected": -1.638472557067871, "logps/chosen": -126.67015075683594, "logps/rejected": -183.37649536132812, "loss": 0.124, "rewards/accuracies": 1.0, "rewards/chosen": -2.1979000568389893, "rewards/margins": 5.530783653259277, "rewards/rejected": -7.728683948516846, "step": 1683 }, { "epoch": 2.7, "learning_rate": 3.887237415774871e-07, "logits/chosen": -1.6808104515075684, "logits/rejected": -1.8558576107025146, "logps/chosen": -81.59571838378906, "logps/rejected": -141.98764038085938, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -0.3140886425971985, "rewards/margins": 4.188577651977539, "rewards/rejected": -4.502665996551514, "step": 1684 }, { "epoch": 2.7, "learning_rate": 3.8862465319064603e-07, "logits/chosen": -1.5889551639556885, "logits/rejected": -1.7213510274887085, "logps/chosen": -100.0901870727539, "logps/rejected": -129.26759338378906, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -1.4790693521499634, "rewards/margins": 2.9535489082336426, "rewards/rejected": -4.432618618011475, "step": 1685 }, { "epoch": 2.71, "learning_rate": 3.88525564803805e-07, "logits/chosen": -1.6408355236053467, "logits/rejected": -1.6618602275848389, "logps/chosen": -124.66455841064453, "logps/rejected": -138.0692901611328, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -2.2381107807159424, "rewards/margins": 2.3944685459136963, "rewards/rejected": -4.632579326629639, "step": 1686 }, { "epoch": 2.71, "learning_rate": 3.884264764169639e-07, "logits/chosen": -1.6937484741210938, "logits/rejected": -1.676285982131958, "logps/chosen": -92.24946594238281, "logps/rejected": -126.63739013671875, "loss": 0.1181, "rewards/accuracies": 1.0, "rewards/chosen": -0.898006796836853, "rewards/margins": 4.092273712158203, "rewards/rejected": -4.9902801513671875, "step": 1687 }, { "epoch": 2.71, "learning_rate": 3.883273880301228e-07, "logits/chosen": -1.518448829650879, "logits/rejected": -1.5355606079101562, "logps/chosen": -94.283935546875, "logps/rejected": -152.62081909179688, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -1.9616947174072266, "rewards/margins": 5.596569061279297, "rewards/rejected": -7.55826473236084, "step": 1688 }, { "epoch": 2.71, "learning_rate": 3.882282996432818e-07, "logits/chosen": -1.5508153438568115, "logits/rejected": -1.5178205966949463, "logps/chosen": -130.1893310546875, "logps/rejected": -172.04115295410156, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": -2.773143768310547, "rewards/margins": 3.7177200317382812, "rewards/rejected": -6.490863800048828, "step": 1689 }, { "epoch": 2.71, "learning_rate": 3.881292112564407e-07, "logits/chosen": -1.8021749258041382, "logits/rejected": -1.7691079378128052, "logps/chosen": -77.19468688964844, "logps/rejected": -165.72084045410156, "loss": 0.1617, "rewards/accuracies": 1.0, "rewards/chosen": 0.9005052447319031, "rewards/margins": 8.021329879760742, "rewards/rejected": -7.120824813842773, "step": 1690 }, { "epoch": 2.71, "learning_rate": 3.880301228695997e-07, "logits/chosen": -1.6311252117156982, "logits/rejected": -1.64441978931427, "logps/chosen": -100.53656768798828, "logps/rejected": -139.1445770263672, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4896967113018036, "rewards/margins": 4.9500885009765625, "rewards/rejected": -5.439785003662109, "step": 1691 }, { "epoch": 2.72, "learning_rate": 3.879310344827586e-07, "logits/chosen": -1.6958050727844238, "logits/rejected": -1.670047640800476, "logps/chosen": -92.33221435546875, "logps/rejected": -122.91683959960938, "loss": 0.2279, "rewards/accuracies": 1.0, "rewards/chosen": -1.3675527572631836, "rewards/margins": 3.019754409790039, "rewards/rejected": -4.387307167053223, "step": 1692 }, { "epoch": 2.72, "learning_rate": 3.878319460959175e-07, "logits/chosen": -1.7322396039962769, "logits/rejected": -1.6827070713043213, "logps/chosen": -93.61825561523438, "logps/rejected": -102.63182067871094, "loss": 0.2242, "rewards/accuracies": 1.0, "rewards/chosen": -1.5245802402496338, "rewards/margins": 1.9215037822723389, "rewards/rejected": -3.4460840225219727, "step": 1693 }, { "epoch": 2.72, "learning_rate": 3.877328577090765e-07, "logits/chosen": -1.6442080736160278, "logits/rejected": -1.6633580923080444, "logps/chosen": -77.29707336425781, "logps/rejected": -166.5149383544922, "loss": 0.0849, "rewards/accuracies": 1.0, "rewards/chosen": -0.8206380009651184, "rewards/margins": 6.532628059387207, "rewards/rejected": -7.35326623916626, "step": 1694 }, { "epoch": 2.72, "learning_rate": 3.876337693222354e-07, "logits/chosen": -1.5950648784637451, "logits/rejected": -1.685260534286499, "logps/chosen": -88.7186279296875, "logps/rejected": -172.9910888671875, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -0.9035142660140991, "rewards/margins": 6.522806167602539, "rewards/rejected": -7.426320552825928, "step": 1695 }, { "epoch": 2.72, "learning_rate": 3.875346809353944e-07, "logits/chosen": -1.6567819118499756, "logits/rejected": -1.6497957706451416, "logps/chosen": -124.39613342285156, "logps/rejected": -156.3587646484375, "loss": 0.2054, "rewards/accuracies": 1.0, "rewards/chosen": -1.6777167320251465, "rewards/margins": 4.277494430541992, "rewards/rejected": -5.955211162567139, "step": 1696 }, { "epoch": 2.72, "learning_rate": 3.874355925485533e-07, "logits/chosen": -1.4963115453720093, "logits/rejected": -1.4358872175216675, "logps/chosen": -129.52008056640625, "logps/rejected": -153.43161010742188, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": -1.7815998792648315, "rewards/margins": 5.206716537475586, "rewards/rejected": -6.988316535949707, "step": 1697 }, { "epoch": 2.73, "learning_rate": 3.873365041617122e-07, "logits/chosen": -1.764592170715332, "logits/rejected": -1.7095646858215332, "logps/chosen": -96.08341979980469, "logps/rejected": -143.89962768554688, "loss": 0.1223, "rewards/accuracies": 1.0, "rewards/chosen": -0.9924697875976562, "rewards/margins": 5.921590328216553, "rewards/rejected": -6.914060592651367, "step": 1698 }, { "epoch": 2.73, "learning_rate": 3.872374157748712e-07, "logits/chosen": -1.684032917022705, "logits/rejected": -1.6870431900024414, "logps/chosen": -93.80326843261719, "logps/rejected": -157.6763153076172, "loss": 0.2543, "rewards/accuracies": 1.0, "rewards/chosen": -0.31217020750045776, "rewards/margins": 6.128981590270996, "rewards/rejected": -6.441151142120361, "step": 1699 }, { "epoch": 2.73, "learning_rate": 3.871383273880301e-07, "logits/chosen": -1.7220619916915894, "logits/rejected": -1.7787582874298096, "logps/chosen": -55.565162658691406, "logps/rejected": -136.3475799560547, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": 0.18339119851589203, "rewards/margins": 6.834646224975586, "rewards/rejected": -6.651254653930664, "step": 1700 }, { "epoch": 2.73, "learning_rate": 3.8703923900118907e-07, "logits/chosen": -1.6423935890197754, "logits/rejected": -1.5884943008422852, "logps/chosen": -105.61051177978516, "logps/rejected": -122.33573913574219, "loss": 0.1882, "rewards/accuracies": 1.0, "rewards/chosen": -1.2204642295837402, "rewards/margins": 2.960963726043701, "rewards/rejected": -4.181427955627441, "step": 1701 }, { "epoch": 2.73, "learning_rate": 3.86940150614348e-07, "logits/chosen": -1.5610558986663818, "logits/rejected": -1.5710076093673706, "logps/chosen": -91.81001281738281, "logps/rejected": -132.9902801513672, "loss": 0.2061, "rewards/accuracies": 1.0, "rewards/chosen": -1.9544978141784668, "rewards/margins": 2.82768177986145, "rewards/rejected": -4.782179832458496, "step": 1702 }, { "epoch": 2.73, "learning_rate": 3.868410622275069e-07, "logits/chosen": -1.7032796144485474, "logits/rejected": -1.7363215684890747, "logps/chosen": -110.2620849609375, "logps/rejected": -154.64883422851562, "loss": 0.1856, "rewards/accuracies": 1.0, "rewards/chosen": -1.5105397701263428, "rewards/margins": 5.057180881500244, "rewards/rejected": -6.567720890045166, "step": 1703 }, { "epoch": 2.74, "learning_rate": 3.8674197384066584e-07, "logits/chosen": -1.5895330905914307, "logits/rejected": -1.633446216583252, "logps/chosen": -114.31243133544922, "logps/rejected": -168.3142547607422, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": -3.2529311180114746, "rewards/margins": 3.871659994125366, "rewards/rejected": -7.124590873718262, "step": 1704 }, { "epoch": 2.74, "learning_rate": 3.866428854538248e-07, "logits/chosen": -1.6556551456451416, "logits/rejected": -1.563711166381836, "logps/chosen": -111.71294403076172, "logps/rejected": -148.7470703125, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -1.3790028095245361, "rewards/margins": 5.243337631225586, "rewards/rejected": -6.622340679168701, "step": 1705 }, { "epoch": 2.74, "learning_rate": 3.8654379706698376e-07, "logits/chosen": -1.7390375137329102, "logits/rejected": -1.7449493408203125, "logps/chosen": -94.03425598144531, "logps/rejected": -149.23817443847656, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -1.2745352983474731, "rewards/margins": 4.810523986816406, "rewards/rejected": -6.085059642791748, "step": 1706 }, { "epoch": 2.74, "learning_rate": 3.8644470868014267e-07, "logits/chosen": -1.5817800760269165, "logits/rejected": -1.6437551975250244, "logps/chosen": -99.87609100341797, "logps/rejected": -179.95062255859375, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": -1.6728297472000122, "rewards/margins": 6.5248212814331055, "rewards/rejected": -8.197650909423828, "step": 1707 }, { "epoch": 2.74, "learning_rate": 3.863456202933016e-07, "logits/chosen": -1.5573639869689941, "logits/rejected": -1.5173733234405518, "logps/chosen": -126.54251098632812, "logps/rejected": -177.5182647705078, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": -2.6056582927703857, "rewards/margins": 6.196355819702148, "rewards/rejected": -8.802013397216797, "step": 1708 }, { "epoch": 2.74, "learning_rate": 3.8624653190646054e-07, "logits/chosen": -1.667578101158142, "logits/rejected": -1.602408766746521, "logps/chosen": -113.88560485839844, "logps/rejected": -139.5880584716797, "loss": 0.0591, "rewards/accuracies": 0.75, "rewards/chosen": -3.759606122970581, "rewards/margins": 2.4520180225372314, "rewards/rejected": -6.211623668670654, "step": 1709 }, { "epoch": 2.74, "learning_rate": 3.861474435196195e-07, "logits/chosen": -1.5796071290969849, "logits/rejected": -1.5116478204727173, "logps/chosen": -96.09815979003906, "logps/rejected": -159.8899688720703, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -1.0038764476776123, "rewards/margins": 6.6698317527771, "rewards/rejected": -7.673708915710449, "step": 1710 }, { "epoch": 2.75, "learning_rate": 3.8604835513277846e-07, "logits/chosen": -1.691290259361267, "logits/rejected": -1.672302007675171, "logps/chosen": -96.06298828125, "logps/rejected": -175.695068359375, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -0.5973865389823914, "rewards/margins": 7.333518028259277, "rewards/rejected": -7.930904388427734, "step": 1711 }, { "epoch": 2.75, "learning_rate": 3.8594926674593736e-07, "logits/chosen": -1.6229920387268066, "logits/rejected": -1.6706695556640625, "logps/chosen": -129.38009643554688, "logps/rejected": -183.19676208496094, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": -2.332888603210449, "rewards/margins": 4.283064842224121, "rewards/rejected": -6.61595344543457, "step": 1712 }, { "epoch": 2.75, "learning_rate": 3.8585017835909627e-07, "logits/chosen": -1.5527560710906982, "logits/rejected": -1.5401363372802734, "logps/chosen": -107.97278594970703, "logps/rejected": -136.66148376464844, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": -1.9458701610565186, "rewards/margins": 5.365225315093994, "rewards/rejected": -7.311095714569092, "step": 1713 }, { "epoch": 2.75, "learning_rate": 3.8575108997225523e-07, "logits/chosen": -1.7033445835113525, "logits/rejected": -1.746883511543274, "logps/chosen": -113.42610168457031, "logps/rejected": -152.87213134765625, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -1.8968216180801392, "rewards/margins": 3.7497458457946777, "rewards/rejected": -5.646567344665527, "step": 1714 }, { "epoch": 2.75, "learning_rate": 3.856520015854142e-07, "logits/chosen": -1.8013803958892822, "logits/rejected": -1.7154548168182373, "logps/chosen": -105.42173767089844, "logps/rejected": -144.68460083007812, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -0.6395660042762756, "rewards/margins": 6.105227470397949, "rewards/rejected": -6.744793891906738, "step": 1715 }, { "epoch": 2.75, "learning_rate": 3.855529131985731e-07, "logits/chosen": -1.4797344207763672, "logits/rejected": -1.5067167282104492, "logps/chosen": -105.45658874511719, "logps/rejected": -159.64166259765625, "loss": 0.195, "rewards/accuracies": 1.0, "rewards/chosen": -0.855002224445343, "rewards/margins": 5.003995895385742, "rewards/rejected": -5.8589982986450195, "step": 1716 }, { "epoch": 2.76, "learning_rate": 3.8545382481173206e-07, "logits/chosen": -1.6864482164382935, "logits/rejected": -1.6967101097106934, "logps/chosen": -96.51136779785156, "logps/rejected": -127.41197967529297, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": -1.3239071369171143, "rewards/margins": 3.8827710151672363, "rewards/rejected": -5.20667839050293, "step": 1717 }, { "epoch": 2.76, "learning_rate": 3.8535473642489096e-07, "logits/chosen": -1.5164958238601685, "logits/rejected": -1.5359840393066406, "logps/chosen": -103.20779418945312, "logps/rejected": -129.1023406982422, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": -1.8564060926437378, "rewards/margins": 4.21718692779541, "rewards/rejected": -6.0735931396484375, "step": 1718 }, { "epoch": 2.76, "learning_rate": 3.852556480380499e-07, "logits/chosen": -1.6447176933288574, "logits/rejected": -1.7807071208953857, "logps/chosen": -72.67943572998047, "logps/rejected": -135.22320556640625, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -0.6120203137397766, "rewards/margins": 5.0758562088012695, "rewards/rejected": -5.687876224517822, "step": 1719 }, { "epoch": 2.76, "learning_rate": 3.8515655965120883e-07, "logits/chosen": -1.7441954612731934, "logits/rejected": -1.6575230360031128, "logps/chosen": -117.86317443847656, "logps/rejected": -157.60690307617188, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": -2.133054256439209, "rewards/margins": 5.585491180419922, "rewards/rejected": -7.718544960021973, "step": 1720 }, { "epoch": 2.76, "learning_rate": 3.850574712643678e-07, "logits/chosen": -1.6561346054077148, "logits/rejected": -1.5754518508911133, "logps/chosen": -94.59526062011719, "logps/rejected": -139.864013671875, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -2.570056915283203, "rewards/margins": 5.326308250427246, "rewards/rejected": -7.896364688873291, "step": 1721 }, { "epoch": 2.76, "learning_rate": 3.8495838287752675e-07, "logits/chosen": -1.9106751680374146, "logits/rejected": -1.8608661890029907, "logps/chosen": -107.29853820800781, "logps/rejected": -155.08224487304688, "loss": 0.1335, "rewards/accuracies": 1.0, "rewards/chosen": -0.231898695230484, "rewards/margins": 4.3463521003723145, "rewards/rejected": -4.578250885009766, "step": 1722 }, { "epoch": 2.77, "learning_rate": 3.8485929449068566e-07, "logits/chosen": -1.8106595277786255, "logits/rejected": -1.7741972208023071, "logps/chosen": -123.20651245117188, "logps/rejected": -169.97357177734375, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -1.031836748123169, "rewards/margins": 4.454592227935791, "rewards/rejected": -5.486429214477539, "step": 1723 }, { "epoch": 2.77, "learning_rate": 3.847602061038446e-07, "logits/chosen": -1.54625403881073, "logits/rejected": -1.5438166856765747, "logps/chosen": -85.87701416015625, "logps/rejected": -156.4394989013672, "loss": 0.1544, "rewards/accuracies": 1.0, "rewards/chosen": -0.43809774518013, "rewards/margins": 7.436060428619385, "rewards/rejected": -7.874157905578613, "step": 1724 }, { "epoch": 2.77, "learning_rate": 3.846611177170035e-07, "logits/chosen": -1.6615509986877441, "logits/rejected": -1.628709077835083, "logps/chosen": -75.87600708007812, "logps/rejected": -181.4813232421875, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -0.061121970415115356, "rewards/margins": 11.005786895751953, "rewards/rejected": -11.06690788269043, "step": 1725 }, { "epoch": 2.77, "learning_rate": 3.845620293301625e-07, "logits/chosen": -1.639305591583252, "logits/rejected": -1.5316998958587646, "logps/chosen": -106.21957397460938, "logps/rejected": -156.9407958984375, "loss": 0.1703, "rewards/accuracies": 1.0, "rewards/chosen": -1.6505632400512695, "rewards/margins": 6.693097114562988, "rewards/rejected": -8.343660354614258, "step": 1726 }, { "epoch": 2.77, "learning_rate": 3.8446294094332144e-07, "logits/chosen": -1.551335096359253, "logits/rejected": -1.5738424062728882, "logps/chosen": -88.29304504394531, "logps/rejected": -162.49609375, "loss": 0.2563, "rewards/accuracies": 1.0, "rewards/chosen": -0.515679657459259, "rewards/margins": 5.855982780456543, "rewards/rejected": -6.371662616729736, "step": 1727 }, { "epoch": 2.77, "learning_rate": 3.8436385255648035e-07, "logits/chosen": -1.5758488178253174, "logits/rejected": -1.6274272203445435, "logps/chosen": -89.54116821289062, "logps/rejected": -137.7141876220703, "loss": 0.081, "rewards/accuracies": 0.75, "rewards/chosen": -1.3884612321853638, "rewards/margins": 2.479238986968994, "rewards/rejected": -3.8677000999450684, "step": 1728 }, { "epoch": 2.78, "learning_rate": 3.842647641696393e-07, "logits/chosen": -1.672809362411499, "logits/rejected": -1.6774704456329346, "logps/chosen": -100.63175964355469, "logps/rejected": -143.12005615234375, "loss": 0.2362, "rewards/accuracies": 1.0, "rewards/chosen": -1.6171278953552246, "rewards/margins": 4.297855854034424, "rewards/rejected": -5.914984226226807, "step": 1729 }, { "epoch": 2.78, "learning_rate": 3.841656757827982e-07, "logits/chosen": -1.7278759479522705, "logits/rejected": -1.850433588027954, "logps/chosen": -76.5829086303711, "logps/rejected": -156.71005249023438, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": -0.22703400254249573, "rewards/margins": 5.895695686340332, "rewards/rejected": -6.122729778289795, "step": 1730 }, { "epoch": 2.78, "learning_rate": 3.840665873959572e-07, "logits/chosen": -1.6309189796447754, "logits/rejected": -1.7040116786956787, "logps/chosen": -68.90851593017578, "logps/rejected": -123.36503601074219, "loss": 0.0945, "rewards/accuracies": 1.0, "rewards/chosen": -0.26131296157836914, "rewards/margins": 4.654858589172363, "rewards/rejected": -4.916171550750732, "step": 1731 }, { "epoch": 2.78, "learning_rate": 3.8396749900911614e-07, "logits/chosen": -1.5577058792114258, "logits/rejected": -1.5882019996643066, "logps/chosen": -92.9170913696289, "logps/rejected": -140.0607147216797, "loss": 0.1521, "rewards/accuracies": 0.5, "rewards/chosen": -1.5169906616210938, "rewards/margins": 4.608637809753418, "rewards/rejected": -6.125628471374512, "step": 1732 }, { "epoch": 2.78, "learning_rate": 3.8386841062227504e-07, "logits/chosen": -1.7199769020080566, "logits/rejected": -1.6817606687545776, "logps/chosen": -118.91400909423828, "logps/rejected": -149.94818115234375, "loss": 0.3036, "rewards/accuracies": 0.75, "rewards/chosen": -1.3995593786239624, "rewards/margins": 3.4038994312286377, "rewards/rejected": -4.8034586906433105, "step": 1733 }, { "epoch": 2.78, "learning_rate": 3.83769322235434e-07, "logits/chosen": -1.7233734130859375, "logits/rejected": -1.685361385345459, "logps/chosen": -99.91777801513672, "logps/rejected": -150.06031799316406, "loss": 0.1402, "rewards/accuracies": 1.0, "rewards/chosen": -1.7346800565719604, "rewards/margins": 4.67967414855957, "rewards/rejected": -6.414353847503662, "step": 1734 }, { "epoch": 2.78, "learning_rate": 3.836702338485929e-07, "logits/chosen": -1.6008861064910889, "logits/rejected": -1.5808324813842773, "logps/chosen": -78.57119750976562, "logps/rejected": -135.6722412109375, "loss": 0.1447, "rewards/accuracies": 1.0, "rewards/chosen": -0.34248214960098267, "rewards/margins": 5.296980857849121, "rewards/rejected": -5.639463424682617, "step": 1735 }, { "epoch": 2.79, "learning_rate": 3.8357114546175187e-07, "logits/chosen": -1.735021948814392, "logits/rejected": -1.740487813949585, "logps/chosen": -99.78807067871094, "logps/rejected": -183.1121063232422, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": -0.1788601279258728, "rewards/margins": 7.770191192626953, "rewards/rejected": -7.949051856994629, "step": 1736 }, { "epoch": 2.79, "learning_rate": 3.8347205707491083e-07, "logits/chosen": -1.50991690158844, "logits/rejected": -1.532413125038147, "logps/chosen": -93.66630554199219, "logps/rejected": -142.24966430664062, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": -1.6525890827178955, "rewards/margins": 6.384730339050293, "rewards/rejected": -8.03731918334961, "step": 1737 }, { "epoch": 2.79, "learning_rate": 3.8337296868806974e-07, "logits/chosen": -1.6704424619674683, "logits/rejected": -1.6005035638809204, "logps/chosen": -94.4825439453125, "logps/rejected": -150.9242706298828, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -0.3587244153022766, "rewards/margins": 4.706463813781738, "rewards/rejected": -5.065187931060791, "step": 1738 }, { "epoch": 2.79, "learning_rate": 3.832738803012287e-07, "logits/chosen": -1.788480520248413, "logits/rejected": -1.7944239377975464, "logps/chosen": -142.67686462402344, "logps/rejected": -133.83688354492188, "loss": 0.0747, "rewards/accuracies": 1.0, "rewards/chosen": -1.9966156482696533, "rewards/margins": 1.9196555614471436, "rewards/rejected": -3.9162709712982178, "step": 1739 }, { "epoch": 2.79, "learning_rate": 3.831747919143876e-07, "logits/chosen": -1.5799955129623413, "logits/rejected": -1.623917818069458, "logps/chosen": -68.50028991699219, "logps/rejected": -186.34466552734375, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": -0.7705042362213135, "rewards/margins": 8.460528373718262, "rewards/rejected": -9.231033325195312, "step": 1740 }, { "epoch": 2.79, "learning_rate": 3.830757035275465e-07, "logits/chosen": -1.5403428077697754, "logits/rejected": -1.5773061513900757, "logps/chosen": -108.82228088378906, "logps/rejected": -149.12045288085938, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/chosen": -0.8979717493057251, "rewards/margins": 4.018382549285889, "rewards/rejected": -4.916354179382324, "step": 1741 }, { "epoch": 2.8, "learning_rate": 3.829766151407055e-07, "logits/chosen": -1.7603864669799805, "logits/rejected": -1.661365032196045, "logps/chosen": -99.40763854980469, "logps/rejected": -127.48089599609375, "loss": 0.1884, "rewards/accuracies": 1.0, "rewards/chosen": -1.0572607517242432, "rewards/margins": 3.4675674438476562, "rewards/rejected": -4.5248284339904785, "step": 1742 }, { "epoch": 2.8, "learning_rate": 3.8287752675386443e-07, "logits/chosen": -1.6430116891860962, "logits/rejected": -1.6068503856658936, "logps/chosen": -74.22874450683594, "logps/rejected": -125.27676391601562, "loss": 0.0818, "rewards/accuracies": 1.0, "rewards/chosen": 1.0395439863204956, "rewards/margins": 5.395846366882324, "rewards/rejected": -4.356302738189697, "step": 1743 }, { "epoch": 2.8, "learning_rate": 3.827784383670234e-07, "logits/chosen": -1.824817419052124, "logits/rejected": -1.8087174892425537, "logps/chosen": -122.9127197265625, "logps/rejected": -184.99200439453125, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": -2.604060173034668, "rewards/margins": 3.8104066848754883, "rewards/rejected": -6.414466857910156, "step": 1744 }, { "epoch": 2.8, "learning_rate": 3.826793499801823e-07, "logits/chosen": -1.6561815738677979, "logits/rejected": -1.6514068841934204, "logps/chosen": -101.97447967529297, "logps/rejected": -167.2471923828125, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": -2.2644758224487305, "rewards/margins": 4.895671367645264, "rewards/rejected": -7.160147666931152, "step": 1745 }, { "epoch": 2.8, "learning_rate": 3.825802615933412e-07, "logits/chosen": -1.7136898040771484, "logits/rejected": -1.715869426727295, "logps/chosen": -96.61660766601562, "logps/rejected": -115.6915512084961, "loss": 0.1511, "rewards/accuracies": 1.0, "rewards/chosen": -2.109031915664673, "rewards/margins": 2.5076656341552734, "rewards/rejected": -4.616697788238525, "step": 1746 }, { "epoch": 2.8, "learning_rate": 3.824811732065002e-07, "logits/chosen": -1.6088203191757202, "logits/rejected": -1.6484349966049194, "logps/chosen": -92.48394012451172, "logps/rejected": -148.11355590820312, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -1.6739041805267334, "rewards/margins": 5.164997100830078, "rewards/rejected": -6.838900566101074, "step": 1747 }, { "epoch": 2.81, "learning_rate": 3.823820848196591e-07, "logits/chosen": -1.6835582256317139, "logits/rejected": -1.7297847270965576, "logps/chosen": -81.65963745117188, "logps/rejected": -142.24658203125, "loss": 0.289, "rewards/accuracies": 1.0, "rewards/chosen": -0.986751139163971, "rewards/margins": 4.716616153717041, "rewards/rejected": -5.703367233276367, "step": 1748 }, { "epoch": 2.81, "learning_rate": 3.822829964328181e-07, "logits/chosen": -1.593947410583496, "logits/rejected": -1.5817863941192627, "logps/chosen": -137.06143188476562, "logps/rejected": -145.72488403320312, "loss": 0.1489, "rewards/accuracies": 1.0, "rewards/chosen": -1.3429783582687378, "rewards/margins": 3.469151496887207, "rewards/rejected": -4.812129974365234, "step": 1749 }, { "epoch": 2.81, "learning_rate": 3.82183908045977e-07, "logits/chosen": -1.6540350914001465, "logits/rejected": -1.5765328407287598, "logps/chosen": -132.54258728027344, "logps/rejected": -153.92657470703125, "loss": 0.0623, "rewards/accuracies": 0.75, "rewards/chosen": -2.8090574741363525, "rewards/margins": 3.0394363403320312, "rewards/rejected": -5.848493576049805, "step": 1750 }, { "epoch": 2.81, "learning_rate": 3.820848196591359e-07, "logits/chosen": -1.792435884475708, "logits/rejected": -1.8211042881011963, "logps/chosen": -98.53768920898438, "logps/rejected": -152.32498168945312, "loss": 0.2383, "rewards/accuracies": 1.0, "rewards/chosen": -1.8069242238998413, "rewards/margins": 4.923348426818848, "rewards/rejected": -6.7302727699279785, "step": 1751 }, { "epoch": 2.81, "learning_rate": 3.819857312722949e-07, "logits/chosen": -1.766941785812378, "logits/rejected": -1.7398006916046143, "logps/chosen": -104.31485748291016, "logps/rejected": -147.36483764648438, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -1.5594552755355835, "rewards/margins": 5.265119552612305, "rewards/rejected": -6.8245744705200195, "step": 1752 }, { "epoch": 2.81, "learning_rate": 3.818866428854538e-07, "logits/chosen": -1.6534035205841064, "logits/rejected": -1.556052327156067, "logps/chosen": -91.64700317382812, "logps/rejected": -126.10535430908203, "loss": 0.1364, "rewards/accuracies": 0.75, "rewards/chosen": -0.3308826684951782, "rewards/margins": 2.6697590351104736, "rewards/rejected": -3.0006415843963623, "step": 1753 }, { "epoch": 2.82, "learning_rate": 3.817875544986127e-07, "logits/chosen": -1.587235927581787, "logits/rejected": -1.5724472999572754, "logps/chosen": -85.00242614746094, "logps/rejected": -128.0253448486328, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -0.7019557952880859, "rewards/margins": 3.877869129180908, "rewards/rejected": -4.579824447631836, "step": 1754 }, { "epoch": 2.82, "learning_rate": 3.816884661117717e-07, "logits/chosen": -1.7187707424163818, "logits/rejected": -1.769547462463379, "logps/chosen": -76.54315185546875, "logps/rejected": -151.90740966796875, "loss": 0.2259, "rewards/accuracies": 0.75, "rewards/chosen": -1.3100498914718628, "rewards/margins": 6.1684136390686035, "rewards/rejected": -7.478463172912598, "step": 1755 }, { "epoch": 2.82, "learning_rate": 3.815893777249306e-07, "logits/chosen": -1.5695879459381104, "logits/rejected": -1.6588401794433594, "logps/chosen": -86.45618438720703, "logps/rejected": -131.11285400390625, "loss": 0.2785, "rewards/accuracies": 1.0, "rewards/chosen": -1.007049322128296, "rewards/margins": 3.660254955291748, "rewards/rejected": -4.667304515838623, "step": 1756 }, { "epoch": 2.82, "learning_rate": 3.814902893380896e-07, "logits/chosen": -1.6101922988891602, "logits/rejected": -1.578542947769165, "logps/chosen": -91.38618469238281, "logps/rejected": -131.7822723388672, "loss": 0.1204, "rewards/accuracies": 1.0, "rewards/chosen": -0.9607866406440735, "rewards/margins": 3.6881613731384277, "rewards/rejected": -4.648947715759277, "step": 1757 }, { "epoch": 2.82, "learning_rate": 3.813912009512485e-07, "logits/chosen": -1.8011674880981445, "logits/rejected": -1.6972105503082275, "logps/chosen": -111.57304382324219, "logps/rejected": -144.32330322265625, "loss": 0.2546, "rewards/accuracies": 1.0, "rewards/chosen": -2.2235825061798096, "rewards/margins": 4.009917736053467, "rewards/rejected": -6.233500003814697, "step": 1758 }, { "epoch": 2.82, "learning_rate": 3.812921125644074e-07, "logits/chosen": -1.5903282165527344, "logits/rejected": -1.5985771417617798, "logps/chosen": -51.76702880859375, "logps/rejected": -118.87730407714844, "loss": 0.0765, "rewards/accuracies": 1.0, "rewards/chosen": 0.11593732237815857, "rewards/margins": 5.692895889282227, "rewards/rejected": -5.576958656311035, "step": 1759 }, { "epoch": 2.83, "learning_rate": 3.811930241775664e-07, "logits/chosen": -1.5937902927398682, "logits/rejected": -1.6603013277053833, "logps/chosen": -86.8178939819336, "logps/rejected": -153.36459350585938, "loss": 0.1647, "rewards/accuracies": 1.0, "rewards/chosen": -1.2910090684890747, "rewards/margins": 5.230852127075195, "rewards/rejected": -6.5218610763549805, "step": 1760 }, { "epoch": 2.83, "learning_rate": 3.810939357907253e-07, "logits/chosen": -1.7027860879898071, "logits/rejected": -1.6819080114364624, "logps/chosen": -118.70219421386719, "logps/rejected": -144.90090942382812, "loss": 0.078, "rewards/accuracies": 0.75, "rewards/chosen": -2.6176843643188477, "rewards/margins": 2.1665663719177246, "rewards/rejected": -4.784250736236572, "step": 1761 }, { "epoch": 2.83, "learning_rate": 3.809948474038843e-07, "logits/chosen": -1.8001539707183838, "logits/rejected": -1.8497732877731323, "logps/chosen": -73.74909973144531, "logps/rejected": -163.430419921875, "loss": 0.1442, "rewards/accuracies": 1.0, "rewards/chosen": -0.6573901176452637, "rewards/margins": 8.50106143951416, "rewards/rejected": -9.158451080322266, "step": 1762 }, { "epoch": 2.83, "learning_rate": 3.808957590170432e-07, "logits/chosen": -1.753782868385315, "logits/rejected": -1.7618824243545532, "logps/chosen": -109.74986267089844, "logps/rejected": -148.46038818359375, "loss": 0.0906, "rewards/accuracies": 0.75, "rewards/chosen": -1.5627260208129883, "rewards/margins": 3.018367290496826, "rewards/rejected": -4.5810933113098145, "step": 1763 }, { "epoch": 2.83, "learning_rate": 3.807966706302021e-07, "logits/chosen": -1.6510804891586304, "logits/rejected": -1.6993743181228638, "logps/chosen": -126.80740356445312, "logps/rejected": -150.28273010253906, "loss": 0.1583, "rewards/accuracies": 0.75, "rewards/chosen": -1.643937110900879, "rewards/margins": 2.1633071899414062, "rewards/rejected": -3.8072447776794434, "step": 1764 }, { "epoch": 2.83, "learning_rate": 3.8069758224336107e-07, "logits/chosen": -1.7592511177062988, "logits/rejected": -1.69386887550354, "logps/chosen": -102.38301086425781, "logps/rejected": -142.7652587890625, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": -0.8317674994468689, "rewards/margins": 4.902235507965088, "rewards/rejected": -5.734002113342285, "step": 1765 }, { "epoch": 2.83, "learning_rate": 3.8059849385652e-07, "logits/chosen": -1.685221791267395, "logits/rejected": -1.6991008520126343, "logps/chosen": -84.57648468017578, "logps/rejected": -166.98175048828125, "loss": 0.1114, "rewards/accuracies": 1.0, "rewards/chosen": -2.5754833221435547, "rewards/margins": 6.0096435546875, "rewards/rejected": -8.585126876831055, "step": 1766 }, { "epoch": 2.84, "learning_rate": 3.8049940546967893e-07, "logits/chosen": -1.6797466278076172, "logits/rejected": -1.6582083702087402, "logps/chosen": -118.40974426269531, "logps/rejected": -143.94381713867188, "loss": 0.1118, "rewards/accuracies": 1.0, "rewards/chosen": -1.5311591625213623, "rewards/margins": 2.851269245147705, "rewards/rejected": -4.3824286460876465, "step": 1767 }, { "epoch": 2.84, "learning_rate": 3.804003170828379e-07, "logits/chosen": -1.6236010789871216, "logits/rejected": -1.5500752925872803, "logps/chosen": -74.86937713623047, "logps/rejected": -99.06585693359375, "loss": 0.1158, "rewards/accuracies": 1.0, "rewards/chosen": -0.5848981738090515, "rewards/margins": 2.9403905868530273, "rewards/rejected": -3.5252888202667236, "step": 1768 }, { "epoch": 2.84, "learning_rate": 3.803012286959968e-07, "logits/chosen": -1.5713002681732178, "logits/rejected": -1.6217745542526245, "logps/chosen": -113.93025207519531, "logps/rejected": -190.17652893066406, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": -2.879446268081665, "rewards/margins": 7.012668132781982, "rewards/rejected": -9.892114639282227, "step": 1769 }, { "epoch": 2.84, "learning_rate": 3.8020214030915576e-07, "logits/chosen": -1.4542591571807861, "logits/rejected": -1.4850891828536987, "logps/chosen": -83.11414337158203, "logps/rejected": -162.91851806640625, "loss": 0.1201, "rewards/accuracies": 1.0, "rewards/chosen": -1.0939154624938965, "rewards/margins": 6.6386237144470215, "rewards/rejected": -7.732539176940918, "step": 1770 }, { "epoch": 2.84, "learning_rate": 3.8010305192231467e-07, "logits/chosen": -1.6708972454071045, "logits/rejected": -1.7647781372070312, "logps/chosen": -97.3715591430664, "logps/rejected": -204.25997924804688, "loss": 0.2406, "rewards/accuracies": 1.0, "rewards/chosen": -2.127810478210449, "rewards/margins": 7.065168380737305, "rewards/rejected": -9.192978858947754, "step": 1771 }, { "epoch": 2.84, "learning_rate": 3.8000396353547363e-07, "logits/chosen": -1.6088831424713135, "logits/rejected": -1.645223617553711, "logps/chosen": -81.86747741699219, "logps/rejected": -178.09152221679688, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": -0.5065360069274902, "rewards/margins": 7.853977203369141, "rewards/rejected": -8.360513687133789, "step": 1772 }, { "epoch": 2.85, "learning_rate": 3.799048751486326e-07, "logits/chosen": -1.7509613037109375, "logits/rejected": -1.7273695468902588, "logps/chosen": -95.68939208984375, "logps/rejected": -126.20100402832031, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -1.1065633296966553, "rewards/margins": 3.528517484664917, "rewards/rejected": -4.635080814361572, "step": 1773 }, { "epoch": 2.85, "learning_rate": 3.798057867617915e-07, "logits/chosen": -1.6854461431503296, "logits/rejected": -1.6701722145080566, "logps/chosen": -105.21220397949219, "logps/rejected": -152.93569946289062, "loss": 0.1845, "rewards/accuracies": 1.0, "rewards/chosen": -1.337505578994751, "rewards/margins": 4.891748905181885, "rewards/rejected": -6.229255199432373, "step": 1774 }, { "epoch": 2.85, "learning_rate": 3.7970669837495045e-07, "logits/chosen": -1.7481276988983154, "logits/rejected": -1.7513872385025024, "logps/chosen": -75.9001693725586, "logps/rejected": -120.05902099609375, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": -0.9432734251022339, "rewards/margins": 3.886230230331421, "rewards/rejected": -4.829503536224365, "step": 1775 }, { "epoch": 2.85, "learning_rate": 3.7960760998810936e-07, "logits/chosen": -1.388261079788208, "logits/rejected": -1.4001448154449463, "logps/chosen": -100.7335433959961, "logps/rejected": -162.1338348388672, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": -1.609378457069397, "rewards/margins": 6.518288612365723, "rewards/rejected": -8.127667427062988, "step": 1776 }, { "epoch": 2.85, "learning_rate": 3.795085216012683e-07, "logits/chosen": -1.66798734664917, "logits/rejected": -1.5416667461395264, "logps/chosen": -95.4244613647461, "logps/rejected": -131.13430786132812, "loss": 0.1188, "rewards/accuracies": 1.0, "rewards/chosen": -0.9205071926116943, "rewards/margins": 4.767366409301758, "rewards/rejected": -5.687872886657715, "step": 1777 }, { "epoch": 2.85, "learning_rate": 3.794094332144273e-07, "logits/chosen": -1.5265100002288818, "logits/rejected": -1.5652284622192383, "logps/chosen": -67.69842529296875, "logps/rejected": -142.65243530273438, "loss": 0.332, "rewards/accuracies": 1.0, "rewards/chosen": -0.884462833404541, "rewards/margins": 6.6511335372924805, "rewards/rejected": -7.53559684753418, "step": 1778 }, { "epoch": 2.86, "learning_rate": 3.793103448275862e-07, "logits/chosen": -1.6109681129455566, "logits/rejected": -1.6076509952545166, "logps/chosen": -81.2899169921875, "logps/rejected": -118.94900512695312, "loss": 0.2052, "rewards/accuracies": 0.75, "rewards/chosen": -0.9782448410987854, "rewards/margins": 3.7560534477233887, "rewards/rejected": -4.7342987060546875, "step": 1779 }, { "epoch": 2.86, "learning_rate": 3.7921125644074515e-07, "logits/chosen": -1.7216942310333252, "logits/rejected": -1.7283964157104492, "logps/chosen": -108.41961669921875, "logps/rejected": -154.80162048339844, "loss": 0.2486, "rewards/accuracies": 1.0, "rewards/chosen": -1.6344870328903198, "rewards/margins": 4.183652400970459, "rewards/rejected": -5.818139553070068, "step": 1780 }, { "epoch": 2.86, "learning_rate": 3.7911216805390405e-07, "logits/chosen": -1.6718034744262695, "logits/rejected": -1.691701054573059, "logps/chosen": -108.55213928222656, "logps/rejected": -165.3509979248047, "loss": 0.1214, "rewards/accuracies": 1.0, "rewards/chosen": -1.5856109857559204, "rewards/margins": 5.235071659088135, "rewards/rejected": -6.820682525634766, "step": 1781 }, { "epoch": 2.86, "learning_rate": 3.79013079667063e-07, "logits/chosen": -1.5459458827972412, "logits/rejected": -1.5469999313354492, "logps/chosen": -105.33699035644531, "logps/rejected": -184.712158203125, "loss": 0.1882, "rewards/accuracies": 1.0, "rewards/chosen": -2.427011489868164, "rewards/margins": 6.092480659484863, "rewards/rejected": -8.519493103027344, "step": 1782 }, { "epoch": 2.86, "learning_rate": 3.789139912802219e-07, "logits/chosen": -1.6517573595046997, "logits/rejected": -1.7072194814682007, "logps/chosen": -105.84122467041016, "logps/rejected": -138.7044677734375, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -1.0288937091827393, "rewards/margins": 4.313884735107422, "rewards/rejected": -5.342778205871582, "step": 1783 }, { "epoch": 2.86, "learning_rate": 3.788149028933809e-07, "logits/chosen": -1.7199172973632812, "logits/rejected": -1.6935867071151733, "logps/chosen": -105.45834350585938, "logps/rejected": -164.40560913085938, "loss": 0.1851, "rewards/accuracies": 1.0, "rewards/chosen": -0.9580499529838562, "rewards/margins": 6.656195640563965, "rewards/rejected": -7.614245414733887, "step": 1784 }, { "epoch": 2.87, "learning_rate": 3.7871581450653984e-07, "logits/chosen": -1.8847829103469849, "logits/rejected": -1.6701092720031738, "logps/chosen": -118.7100601196289, "logps/rejected": -138.61398315429688, "loss": 0.1633, "rewards/accuracies": 0.75, "rewards/chosen": -1.6034716367721558, "rewards/margins": 3.640238046646118, "rewards/rejected": -5.243709564208984, "step": 1785 }, { "epoch": 2.87, "learning_rate": 3.7861672611969875e-07, "logits/chosen": -1.8477497100830078, "logits/rejected": -1.7315752506256104, "logps/chosen": -135.99404907226562, "logps/rejected": -122.90853881835938, "loss": 0.1248, "rewards/accuracies": 1.0, "rewards/chosen": -1.9037501811981201, "rewards/margins": 1.650345802307129, "rewards/rejected": -3.554095983505249, "step": 1786 }, { "epoch": 2.87, "learning_rate": 3.7851763773285765e-07, "logits/chosen": -1.7583935260772705, "logits/rejected": -1.8241909742355347, "logps/chosen": -94.69377899169922, "logps/rejected": -144.22958374023438, "loss": 0.0688, "rewards/accuracies": 1.0, "rewards/chosen": -1.9883744716644287, "rewards/margins": 3.6275508403778076, "rewards/rejected": -5.615924835205078, "step": 1787 }, { "epoch": 2.87, "learning_rate": 3.784185493460166e-07, "logits/chosen": -1.6794418096542358, "logits/rejected": -1.6705546379089355, "logps/chosen": -115.58597564697266, "logps/rejected": -154.1556396484375, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": -2.2622792720794678, "rewards/margins": 5.815978050231934, "rewards/rejected": -8.07825756072998, "step": 1788 }, { "epoch": 2.87, "learning_rate": 3.7831946095917557e-07, "logits/chosen": -1.6077405214309692, "logits/rejected": -1.6480562686920166, "logps/chosen": -104.37975311279297, "logps/rejected": -167.54733276367188, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -3.3206868171691895, "rewards/margins": 5.607962608337402, "rewards/rejected": -8.92864990234375, "step": 1789 }, { "epoch": 2.87, "learning_rate": 3.7822037257233453e-07, "logits/chosen": -1.7588698863983154, "logits/rejected": -1.698569893836975, "logps/chosen": -107.63155364990234, "logps/rejected": -146.66319274902344, "loss": 0.1297, "rewards/accuracies": 1.0, "rewards/chosen": -0.5554607510566711, "rewards/margins": 6.162705898284912, "rewards/rejected": -6.718166828155518, "step": 1790 }, { "epoch": 2.87, "learning_rate": 3.7812128418549344e-07, "logits/chosen": -1.5314441919326782, "logits/rejected": -1.5583475828170776, "logps/chosen": -106.9226303100586, "logps/rejected": -159.56634521484375, "loss": 0.0762, "rewards/accuracies": 1.0, "rewards/chosen": -1.707304835319519, "rewards/margins": 4.517581462860107, "rewards/rejected": -6.224885940551758, "step": 1791 }, { "epoch": 2.88, "learning_rate": 3.7802219579865235e-07, "logits/chosen": -1.6321523189544678, "logits/rejected": -1.662232756614685, "logps/chosen": -100.90032958984375, "logps/rejected": -142.9011688232422, "loss": 0.1641, "rewards/accuracies": 1.0, "rewards/chosen": -1.7500076293945312, "rewards/margins": 3.2798030376434326, "rewards/rejected": -5.029810905456543, "step": 1792 }, { "epoch": 2.88, "learning_rate": 3.779231074118113e-07, "logits/chosen": -1.7939060926437378, "logits/rejected": -1.7558579444885254, "logps/chosen": -117.99163818359375, "logps/rejected": -197.872802734375, "loss": 0.1082, "rewards/accuracies": 1.0, "rewards/chosen": -3.3665931224823, "rewards/margins": 7.062250137329102, "rewards/rejected": -10.42884349822998, "step": 1793 }, { "epoch": 2.88, "learning_rate": 3.7782401902497026e-07, "logits/chosen": -1.657088041305542, "logits/rejected": -1.6200149059295654, "logps/chosen": -121.12468719482422, "logps/rejected": -133.60653686523438, "loss": 0.2105, "rewards/accuracies": 1.0, "rewards/chosen": -2.2447307109832764, "rewards/margins": 2.37349009513855, "rewards/rejected": -4.618220806121826, "step": 1794 }, { "epoch": 2.88, "learning_rate": 3.777249306381292e-07, "logits/chosen": -1.6224212646484375, "logits/rejected": -1.6546275615692139, "logps/chosen": -110.99869537353516, "logps/rejected": -156.47677612304688, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -1.9481110572814941, "rewards/margins": 6.501053810119629, "rewards/rejected": -8.449165344238281, "step": 1795 }, { "epoch": 2.88, "learning_rate": 3.7762584225128813e-07, "logits/chosen": -1.6862252950668335, "logits/rejected": -1.6314691305160522, "logps/chosen": -116.3354263305664, "logps/rejected": -163.01638793945312, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": -0.4760788381099701, "rewards/margins": 6.421491622924805, "rewards/rejected": -6.897570610046387, "step": 1796 }, { "epoch": 2.88, "learning_rate": 3.7752675386444704e-07, "logits/chosen": -1.6113824844360352, "logits/rejected": -1.6036114692687988, "logps/chosen": -95.64356231689453, "logps/rejected": -121.98859405517578, "loss": 0.2034, "rewards/accuracies": 1.0, "rewards/chosen": -2.3950374126434326, "rewards/margins": 3.210289716720581, "rewards/rejected": -5.605327129364014, "step": 1797 }, { "epoch": 2.89, "learning_rate": 3.77427665477606e-07, "logits/chosen": -1.622185468673706, "logits/rejected": -1.546464443206787, "logps/chosen": -104.50080871582031, "logps/rejected": -163.09286499023438, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": -0.16539974510669708, "rewards/margins": 8.776033401489258, "rewards/rejected": -8.94143295288086, "step": 1798 }, { "epoch": 2.89, "learning_rate": 3.7732857709076496e-07, "logits/chosen": -1.4777554273605347, "logits/rejected": -1.5220603942871094, "logps/chosen": -99.87274169921875, "logps/rejected": -98.11062622070312, "loss": 0.1556, "rewards/accuracies": 0.5, "rewards/chosen": -3.034170150756836, "rewards/margins": 0.34193193912506104, "rewards/rejected": -3.3761019706726074, "step": 1799 }, { "epoch": 2.89, "learning_rate": 3.772294887039239e-07, "logits/chosen": -1.7738866806030273, "logits/rejected": -1.6994258165359497, "logps/chosen": -80.92544555664062, "logps/rejected": -106.82350158691406, "loss": 0.2368, "rewards/accuracies": 1.0, "rewards/chosen": -1.1291041374206543, "rewards/margins": 4.073250770568848, "rewards/rejected": -5.20235538482666, "step": 1800 }, { "epoch": 2.89, "learning_rate": 3.771304003170828e-07, "logits/chosen": -1.5356467962265015, "logits/rejected": -1.6702868938446045, "logps/chosen": -82.96348571777344, "logps/rejected": -170.287841796875, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": -0.6384035348892212, "rewards/margins": 8.618170738220215, "rewards/rejected": -9.256574630737305, "step": 1801 }, { "epoch": 2.89, "learning_rate": 3.7703131193024173e-07, "logits/chosen": -1.6392366886138916, "logits/rejected": -1.5786510705947876, "logps/chosen": -103.74851989746094, "logps/rejected": -144.15748596191406, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -1.4483575820922852, "rewards/margins": 4.92008113861084, "rewards/rejected": -6.368438720703125, "step": 1802 }, { "epoch": 2.89, "learning_rate": 3.769322235434007e-07, "logits/chosen": -1.677764892578125, "logits/rejected": -1.743652582168579, "logps/chosen": -92.18203735351562, "logps/rejected": -197.68563842773438, "loss": 0.1996, "rewards/accuracies": 1.0, "rewards/chosen": -0.710017204284668, "rewards/margins": 9.06793212890625, "rewards/rejected": -9.777950286865234, "step": 1803 }, { "epoch": 2.9, "learning_rate": 3.768331351565596e-07, "logits/chosen": -1.6745946407318115, "logits/rejected": -1.691635251045227, "logps/chosen": -120.13055419921875, "logps/rejected": -166.5257568359375, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": -0.6820070743560791, "rewards/margins": 5.661100387573242, "rewards/rejected": -6.343107223510742, "step": 1804 }, { "epoch": 2.9, "learning_rate": 3.767340467697186e-07, "logits/chosen": -1.7669814825057983, "logits/rejected": -1.7743035554885864, "logps/chosen": -108.32340240478516, "logps/rejected": -126.83101654052734, "loss": 0.2007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4231961965560913, "rewards/margins": 2.030694007873535, "rewards/rejected": -3.453890323638916, "step": 1805 }, { "epoch": 2.9, "learning_rate": 3.766349583828775e-07, "logits/chosen": -1.7148598432540894, "logits/rejected": -1.6273465156555176, "logps/chosen": -113.26337432861328, "logps/rejected": -156.97796630859375, "loss": 0.1076, "rewards/accuracies": 1.0, "rewards/chosen": -1.7466990947723389, "rewards/margins": 4.992303371429443, "rewards/rejected": -6.739002704620361, "step": 1806 }, { "epoch": 2.9, "learning_rate": 3.765358699960364e-07, "logits/chosen": -1.4694433212280273, "logits/rejected": -1.5362205505371094, "logps/chosen": -80.19744110107422, "logps/rejected": -152.014404296875, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -0.7547639608383179, "rewards/margins": 6.60331916809082, "rewards/rejected": -7.358083248138428, "step": 1807 }, { "epoch": 2.9, "learning_rate": 3.764367816091954e-07, "logits/chosen": -1.7102206945419312, "logits/rejected": -1.732323169708252, "logps/chosen": -122.63440704345703, "logps/rejected": -157.1143341064453, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -2.5858144760131836, "rewards/margins": 5.267463207244873, "rewards/rejected": -7.853277683258057, "step": 1808 }, { "epoch": 2.9, "learning_rate": 3.763376932223543e-07, "logits/chosen": -1.6916388273239136, "logits/rejected": -1.693162441253662, "logps/chosen": -92.6587142944336, "logps/rejected": -134.86993408203125, "loss": 0.1484, "rewards/accuracies": 1.0, "rewards/chosen": -0.8916627764701843, "rewards/margins": 3.9587302207946777, "rewards/rejected": -4.850393295288086, "step": 1809 }, { "epoch": 2.91, "learning_rate": 3.762386048355133e-07, "logits/chosen": -1.603738784790039, "logits/rejected": -1.635077714920044, "logps/chosen": -74.41727447509766, "logps/rejected": -126.33573913574219, "loss": 0.1966, "rewards/accuracies": 1.0, "rewards/chosen": -0.8321354389190674, "rewards/margins": 4.7907280921936035, "rewards/rejected": -5.622863292694092, "step": 1810 }, { "epoch": 2.91, "learning_rate": 3.761395164486722e-07, "logits/chosen": -1.7277610301971436, "logits/rejected": -1.7426552772521973, "logps/chosen": -103.33746337890625, "logps/rejected": -169.03765869140625, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": -2.020691394805908, "rewards/margins": 7.0620012283325195, "rewards/rejected": -9.08269214630127, "step": 1811 }, { "epoch": 2.91, "learning_rate": 3.760404280618311e-07, "logits/chosen": -1.732521653175354, "logits/rejected": -1.776577353477478, "logps/chosen": -99.52017974853516, "logps/rejected": -161.783203125, "loss": 0.1416, "rewards/accuracies": 1.0, "rewards/chosen": -0.5220286846160889, "rewards/margins": 7.633773326873779, "rewards/rejected": -8.155801773071289, "step": 1812 }, { "epoch": 2.91, "learning_rate": 3.759413396749901e-07, "logits/chosen": -1.6191545724868774, "logits/rejected": -1.5511865615844727, "logps/chosen": -122.78934478759766, "logps/rejected": -169.49647521972656, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": -2.176661252975464, "rewards/margins": 6.4275102615356445, "rewards/rejected": -8.604171752929688, "step": 1813 }, { "epoch": 2.91, "learning_rate": 3.75842251288149e-07, "logits/chosen": -1.8406273126602173, "logits/rejected": -1.7240077257156372, "logps/chosen": -115.03936004638672, "logps/rejected": -144.81439208984375, "loss": 0.0732, "rewards/accuracies": 1.0, "rewards/chosen": -1.1487236022949219, "rewards/margins": 4.783183574676514, "rewards/rejected": -5.9319071769714355, "step": 1814 }, { "epoch": 2.91, "learning_rate": 3.75743162901308e-07, "logits/chosen": -1.5763864517211914, "logits/rejected": -1.5711785554885864, "logps/chosen": -77.22325897216797, "logps/rejected": -126.6904525756836, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -1.3793689012527466, "rewards/margins": 5.253995418548584, "rewards/rejected": -6.633364200592041, "step": 1815 }, { "epoch": 2.91, "learning_rate": 3.756440745144669e-07, "logits/chosen": -1.531294584274292, "logits/rejected": -1.5590920448303223, "logps/chosen": -93.43187713623047, "logps/rejected": -157.0635223388672, "loss": 0.113, "rewards/accuracies": 1.0, "rewards/chosen": -0.6699162125587463, "rewards/margins": 5.294865608215332, "rewards/rejected": -5.964782238006592, "step": 1816 }, { "epoch": 2.92, "learning_rate": 3.755449861276258e-07, "logits/chosen": -1.7886762619018555, "logits/rejected": -1.7001674175262451, "logps/chosen": -114.40238189697266, "logps/rejected": -168.64208984375, "loss": 0.1559, "rewards/accuracies": 1.0, "rewards/chosen": -0.7566229104995728, "rewards/margins": 6.680775165557861, "rewards/rejected": -7.4373979568481445, "step": 1817 }, { "epoch": 2.92, "learning_rate": 3.7544589774078477e-07, "logits/chosen": -1.7443194389343262, "logits/rejected": -1.68293035030365, "logps/chosen": -103.60906982421875, "logps/rejected": -138.01133728027344, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -1.0838513374328613, "rewards/margins": 4.983738422393799, "rewards/rejected": -6.06758975982666, "step": 1818 }, { "epoch": 2.92, "learning_rate": 3.753468093539437e-07, "logits/chosen": -1.6506086587905884, "logits/rejected": -1.6421537399291992, "logps/chosen": -96.27152252197266, "logps/rejected": -169.48428344726562, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -1.3307204246520996, "rewards/margins": 5.7270026206970215, "rewards/rejected": -7.057723045349121, "step": 1819 }, { "epoch": 2.92, "learning_rate": 3.752477209671027e-07, "logits/chosen": -1.6307271718978882, "logits/rejected": -1.655086636543274, "logps/chosen": -86.7942123413086, "logps/rejected": -129.18511962890625, "loss": 0.1442, "rewards/accuracies": 0.75, "rewards/chosen": -1.605810284614563, "rewards/margins": 4.8568115234375, "rewards/rejected": -6.462621688842773, "step": 1820 }, { "epoch": 2.92, "learning_rate": 3.751486325802616e-07, "logits/chosen": -1.5474883317947388, "logits/rejected": -1.5981955528259277, "logps/chosen": -101.75738525390625, "logps/rejected": -160.26876831054688, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": -1.0146722793579102, "rewards/margins": 7.573986530303955, "rewards/rejected": -8.588659286499023, "step": 1821 }, { "epoch": 2.92, "learning_rate": 3.750495441934205e-07, "logits/chosen": -1.6465976238250732, "logits/rejected": -1.6424157619476318, "logps/chosen": -79.42121887207031, "logps/rejected": -136.84591674804688, "loss": 0.1349, "rewards/accuracies": 1.0, "rewards/chosen": 0.3404337167739868, "rewards/margins": 7.089875221252441, "rewards/rejected": -6.749441146850586, "step": 1822 }, { "epoch": 2.93, "learning_rate": 3.7495045580657946e-07, "logits/chosen": -1.749340295791626, "logits/rejected": -1.7375813722610474, "logps/chosen": -117.66568756103516, "logps/rejected": -121.6031265258789, "loss": 0.1332, "rewards/accuracies": 1.0, "rewards/chosen": -2.396629810333252, "rewards/margins": 1.7363895177841187, "rewards/rejected": -4.13301944732666, "step": 1823 }, { "epoch": 2.93, "learning_rate": 3.7485136741973837e-07, "logits/chosen": -1.5258640050888062, "logits/rejected": -1.5364243984222412, "logps/chosen": -97.76651763916016, "logps/rejected": -158.37901306152344, "loss": 0.1586, "rewards/accuracies": 1.0, "rewards/chosen": -2.404465436935425, "rewards/margins": 5.261653423309326, "rewards/rejected": -7.666119575500488, "step": 1824 }, { "epoch": 2.93, "learning_rate": 3.747522790328973e-07, "logits/chosen": -1.6214470863342285, "logits/rejected": -1.754777193069458, "logps/chosen": -70.96511840820312, "logps/rejected": -122.2367935180664, "loss": 0.1451, "rewards/accuracies": 0.75, "rewards/chosen": -0.4130151867866516, "rewards/margins": 1.8369543552398682, "rewards/rejected": -2.249969482421875, "step": 1825 }, { "epoch": 2.93, "learning_rate": 3.746531906460563e-07, "logits/chosen": -1.5367629528045654, "logits/rejected": -1.5843758583068848, "logps/chosen": -101.90836334228516, "logps/rejected": -170.86422729492188, "loss": 0.15, "rewards/accuracies": 1.0, "rewards/chosen": -1.6052725315093994, "rewards/margins": 3.9625608921051025, "rewards/rejected": -5.567833423614502, "step": 1826 }, { "epoch": 2.93, "learning_rate": 3.745541022592152e-07, "logits/chosen": -1.6687405109405518, "logits/rejected": -1.5846121311187744, "logps/chosen": -110.18712615966797, "logps/rejected": -141.54400634765625, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -1.6952368021011353, "rewards/margins": 4.221129417419434, "rewards/rejected": -5.9163665771484375, "step": 1827 }, { "epoch": 2.93, "learning_rate": 3.7445501387237416e-07, "logits/chosen": -1.565414309501648, "logits/rejected": -1.597106695175171, "logps/chosen": -116.12474060058594, "logps/rejected": -152.18399047851562, "loss": 0.1885, "rewards/accuracies": 0.75, "rewards/chosen": -2.8293275833129883, "rewards/margins": 3.52860164642334, "rewards/rejected": -6.357929229736328, "step": 1828 }, { "epoch": 2.94, "learning_rate": 3.7435592548553306e-07, "logits/chosen": -1.5327577590942383, "logits/rejected": -1.519581913948059, "logps/chosen": -103.36714172363281, "logps/rejected": -164.3252410888672, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": -1.9687784910202026, "rewards/margins": 5.351982116699219, "rewards/rejected": -7.320760726928711, "step": 1829 }, { "epoch": 2.94, "learning_rate": 3.7425683709869197e-07, "logits/chosen": -1.6294317245483398, "logits/rejected": -1.6953259706497192, "logps/chosen": -103.53370666503906, "logps/rejected": -159.46585083007812, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": -1.6010355949401855, "rewards/margins": 3.8423380851745605, "rewards/rejected": -5.443373680114746, "step": 1830 }, { "epoch": 2.94, "learning_rate": 3.74157748711851e-07, "logits/chosen": -1.714198112487793, "logits/rejected": -1.8112832307815552, "logps/chosen": -107.547607421875, "logps/rejected": -173.31353759765625, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": -1.4005768299102783, "rewards/margins": 4.298473358154297, "rewards/rejected": -5.699049949645996, "step": 1831 }, { "epoch": 2.94, "learning_rate": 3.740586603250099e-07, "logits/chosen": -1.5286470651626587, "logits/rejected": -1.4846584796905518, "logps/chosen": -101.46790313720703, "logps/rejected": -181.7661590576172, "loss": 0.1304, "rewards/accuracies": 1.0, "rewards/chosen": -1.7079964876174927, "rewards/margins": 8.209821701049805, "rewards/rejected": -9.917818069458008, "step": 1832 }, { "epoch": 2.94, "learning_rate": 3.7395957193816885e-07, "logits/chosen": -1.7027385234832764, "logits/rejected": -1.6326727867126465, "logps/chosen": -94.35637664794922, "logps/rejected": -123.62086486816406, "loss": 0.1038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1007566452026367, "rewards/margins": 3.4199986457824707, "rewards/rejected": -4.520754814147949, "step": 1833 }, { "epoch": 2.94, "learning_rate": 3.7386048355132776e-07, "logits/chosen": -1.676821231842041, "logits/rejected": -1.6876411437988281, "logps/chosen": -86.18657684326172, "logps/rejected": -132.56622314453125, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": -1.4426305294036865, "rewards/margins": 2.5332581996917725, "rewards/rejected": -3.975888729095459, "step": 1834 }, { "epoch": 2.95, "learning_rate": 3.7376139516448666e-07, "logits/chosen": -1.7392170429229736, "logits/rejected": -1.6444953680038452, "logps/chosen": -124.28804016113281, "logps/rejected": -160.623291015625, "loss": 0.1119, "rewards/accuracies": 1.0, "rewards/chosen": -2.2173526287078857, "rewards/margins": 5.181742191314697, "rewards/rejected": -7.39909553527832, "step": 1835 }, { "epoch": 2.95, "learning_rate": 3.736623067776457e-07, "logits/chosen": -1.5613404512405396, "logits/rejected": -1.5801303386688232, "logps/chosen": -105.79557800292969, "logps/rejected": -168.4930877685547, "loss": 0.1988, "rewards/accuracies": 1.0, "rewards/chosen": -2.643540859222412, "rewards/margins": 4.194326400756836, "rewards/rejected": -6.83786678314209, "step": 1836 }, { "epoch": 2.95, "learning_rate": 3.735632183908046e-07, "logits/chosen": -1.7099552154541016, "logits/rejected": -1.618041753768921, "logps/chosen": -99.18416595458984, "logps/rejected": -146.6209716796875, "loss": 0.2075, "rewards/accuracies": 1.0, "rewards/chosen": -0.6705077886581421, "rewards/margins": 5.2806715965271, "rewards/rejected": -5.951179504394531, "step": 1837 }, { "epoch": 2.95, "learning_rate": 3.7346413000396354e-07, "logits/chosen": -1.7018520832061768, "logits/rejected": -1.6403980255126953, "logps/chosen": -106.02590942382812, "logps/rejected": -110.58439636230469, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": -1.9982521533966064, "rewards/margins": 2.072617769241333, "rewards/rejected": -4.0708699226379395, "step": 1838 }, { "epoch": 2.95, "learning_rate": 3.7336504161712245e-07, "logits/chosen": -1.6169241666793823, "logits/rejected": -1.6344153881072998, "logps/chosen": -97.32797241210938, "logps/rejected": -165.24551391601562, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": -1.4419846534729004, "rewards/margins": 5.967092037200928, "rewards/rejected": -7.409076690673828, "step": 1839 }, { "epoch": 2.95, "learning_rate": 3.7326595323028136e-07, "logits/chosen": -1.7090115547180176, "logits/rejected": -1.647094488143921, "logps/chosen": -113.08973693847656, "logps/rejected": -174.51031494140625, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": -1.9218418598175049, "rewards/margins": 5.865823745727539, "rewards/rejected": -7.787665367126465, "step": 1840 }, { "epoch": 2.96, "learning_rate": 3.7316686484344037e-07, "logits/chosen": -1.822242259979248, "logits/rejected": -1.7936092615127563, "logps/chosen": -100.0579605102539, "logps/rejected": -107.98933410644531, "loss": 0.1689, "rewards/accuracies": 1.0, "rewards/chosen": -1.3148176670074463, "rewards/margins": 2.1842103004455566, "rewards/rejected": -3.499027967453003, "step": 1841 }, { "epoch": 2.96, "learning_rate": 3.730677764565993e-07, "logits/chosen": -1.785272240638733, "logits/rejected": -1.8058993816375732, "logps/chosen": -97.46770477294922, "logps/rejected": -163.251708984375, "loss": 0.1495, "rewards/accuracies": 1.0, "rewards/chosen": -0.7274589538574219, "rewards/margins": 6.838157653808594, "rewards/rejected": -7.565616607666016, "step": 1842 }, { "epoch": 2.96, "learning_rate": 3.7296868806975823e-07, "logits/chosen": -1.6869449615478516, "logits/rejected": -1.6811470985412598, "logps/chosen": -93.52455139160156, "logps/rejected": -158.1514129638672, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": -1.242169976234436, "rewards/margins": 5.656375885009766, "rewards/rejected": -6.898545742034912, "step": 1843 }, { "epoch": 2.96, "learning_rate": 3.7286959968291714e-07, "logits/chosen": -1.5931137800216675, "logits/rejected": -1.5227012634277344, "logps/chosen": -133.9796142578125, "logps/rejected": -131.98158264160156, "loss": 0.1518, "rewards/accuracies": 1.0, "rewards/chosen": -2.447665214538574, "rewards/margins": 3.7390403747558594, "rewards/rejected": -6.186705589294434, "step": 1844 }, { "epoch": 2.96, "learning_rate": 3.7277051129607605e-07, "logits/chosen": -1.5257635116577148, "logits/rejected": -1.5521966218948364, "logps/chosen": -123.79499816894531, "logps/rejected": -154.1683807373047, "loss": 0.2141, "rewards/accuracies": 0.75, "rewards/chosen": -2.4627857208251953, "rewards/margins": 3.3797600269317627, "rewards/rejected": -5.842545986175537, "step": 1845 }, { "epoch": 2.96, "learning_rate": 3.72671422909235e-07, "logits/chosen": -1.4717140197753906, "logits/rejected": -1.7383618354797363, "logps/chosen": -78.05812072753906, "logps/rejected": -209.42141723632812, "loss": 0.1034, "rewards/accuracies": 1.0, "rewards/chosen": -1.4537895917892456, "rewards/margins": 5.240690231323242, "rewards/rejected": -6.694479942321777, "step": 1846 }, { "epoch": 2.96, "learning_rate": 3.7257233452239397e-07, "logits/chosen": -1.7382279634475708, "logits/rejected": -1.6613696813583374, "logps/chosen": -144.49481201171875, "logps/rejected": -149.93467712402344, "loss": 0.1187, "rewards/accuracies": 1.0, "rewards/chosen": -3.299633741378784, "rewards/margins": 3.491069793701172, "rewards/rejected": -6.790703773498535, "step": 1847 }, { "epoch": 2.97, "learning_rate": 3.7247324613555293e-07, "logits/chosen": -1.625678539276123, "logits/rejected": -1.6220862865447998, "logps/chosen": -101.85519409179688, "logps/rejected": -162.093017578125, "loss": 0.0957, "rewards/accuracies": 1.0, "rewards/chosen": -0.8012583255767822, "rewards/margins": 6.4540181159973145, "rewards/rejected": -7.255276203155518, "step": 1848 }, { "epoch": 2.97, "learning_rate": 3.7237415774871183e-07, "logits/chosen": -1.6036503314971924, "logits/rejected": -1.5726630687713623, "logps/chosen": -107.09656524658203, "logps/rejected": -185.03575134277344, "loss": 0.2031, "rewards/accuracies": 1.0, "rewards/chosen": -2.364143133163452, "rewards/margins": 5.4613542556762695, "rewards/rejected": -7.825497627258301, "step": 1849 }, { "epoch": 2.97, "learning_rate": 3.7227506936187074e-07, "logits/chosen": -1.684525966644287, "logits/rejected": -1.7338085174560547, "logps/chosen": -125.98589324951172, "logps/rejected": -182.1653594970703, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -2.374876022338867, "rewards/margins": 4.519647598266602, "rewards/rejected": -6.894523620605469, "step": 1850 }, { "epoch": 2.97, "learning_rate": 3.721759809750297e-07, "logits/chosen": -1.6773960590362549, "logits/rejected": -1.6852432489395142, "logps/chosen": -106.29759216308594, "logps/rejected": -180.97177124023438, "loss": 0.0799, "rewards/accuracies": 1.0, "rewards/chosen": -2.2513556480407715, "rewards/margins": 5.7772536277771, "rewards/rejected": -8.028609275817871, "step": 1851 }, { "epoch": 2.97, "learning_rate": 3.7207689258818866e-07, "logits/chosen": -1.6329138278961182, "logits/rejected": -1.5798695087432861, "logps/chosen": -97.25318145751953, "logps/rejected": -138.33181762695312, "loss": 0.0852, "rewards/accuracies": 1.0, "rewards/chosen": -1.4794821739196777, "rewards/margins": 5.570281028747559, "rewards/rejected": -7.049763202667236, "step": 1852 }, { "epoch": 2.97, "learning_rate": 3.719778042013476e-07, "logits/chosen": -1.5611621141433716, "logits/rejected": -1.5081110000610352, "logps/chosen": -107.39796447753906, "logps/rejected": -133.10415649414062, "loss": 0.1684, "rewards/accuracies": 0.75, "rewards/chosen": -2.0686638355255127, "rewards/margins": 3.479729175567627, "rewards/rejected": -5.548393249511719, "step": 1853 }, { "epoch": 2.98, "learning_rate": 3.7187871581450653e-07, "logits/chosen": -1.5460736751556396, "logits/rejected": -1.5407496690750122, "logps/chosen": -115.52072143554688, "logps/rejected": -149.84645080566406, "loss": 0.1453, "rewards/accuracies": 1.0, "rewards/chosen": -1.8644421100616455, "rewards/margins": 4.501913070678711, "rewards/rejected": -6.366355895996094, "step": 1854 }, { "epoch": 2.98, "learning_rate": 3.7177962742766543e-07, "logits/chosen": -1.5871484279632568, "logits/rejected": -1.5384513139724731, "logps/chosen": -111.2124252319336, "logps/rejected": -156.21543884277344, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": -1.4195386171340942, "rewards/margins": 5.0207414627075195, "rewards/rejected": -6.440280437469482, "step": 1855 }, { "epoch": 2.98, "learning_rate": 3.716805390408244e-07, "logits/chosen": -1.775153636932373, "logits/rejected": -1.6535683870315552, "logps/chosen": -100.07649993896484, "logps/rejected": -129.04507446289062, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": 0.029498115181922913, "rewards/margins": 4.148833274841309, "rewards/rejected": -4.119335174560547, "step": 1856 }, { "epoch": 2.98, "learning_rate": 3.7158145065398335e-07, "logits/chosen": -1.6579911708831787, "logits/rejected": -1.6191534996032715, "logps/chosen": -88.49031066894531, "logps/rejected": -123.34803771972656, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": -1.1533992290496826, "rewards/margins": 3.927980661392212, "rewards/rejected": -5.0813798904418945, "step": 1857 }, { "epoch": 2.98, "learning_rate": 3.714823622671423e-07, "logits/chosen": -1.6502625942230225, "logits/rejected": -1.6449239253997803, "logps/chosen": -88.060302734375, "logps/rejected": -115.62471008300781, "loss": 0.0718, "rewards/accuracies": 1.0, "rewards/chosen": -0.8267252445220947, "rewards/margins": 3.535227060317993, "rewards/rejected": -4.361952304840088, "step": 1858 }, { "epoch": 2.98, "learning_rate": 3.713832738803012e-07, "logits/chosen": -1.6126909255981445, "logits/rejected": -1.6365091800689697, "logps/chosen": -86.65589904785156, "logps/rejected": -137.86685180664062, "loss": 0.0781, "rewards/accuracies": 1.0, "rewards/chosen": -1.286224365234375, "rewards/margins": 3.535869598388672, "rewards/rejected": -4.822093963623047, "step": 1859 }, { "epoch": 2.99, "learning_rate": 3.7128418549346013e-07, "logits/chosen": -1.6097652912139893, "logits/rejected": -1.6258894205093384, "logps/chosen": -74.16486358642578, "logps/rejected": -122.19659423828125, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": -1.8506567478179932, "rewards/margins": 4.871476173400879, "rewards/rejected": -6.722132682800293, "step": 1860 }, { "epoch": 2.99, "learning_rate": 3.711850971066191e-07, "logits/chosen": -1.6427104473114014, "logits/rejected": -1.5989556312561035, "logps/chosen": -95.17898559570312, "logps/rejected": -144.50433349609375, "loss": 0.1138, "rewards/accuracies": 0.75, "rewards/chosen": -1.644016981124878, "rewards/margins": 4.418154716491699, "rewards/rejected": -6.062171459197998, "step": 1861 }, { "epoch": 2.99, "learning_rate": 3.7108600871977805e-07, "logits/chosen": -1.654820442199707, "logits/rejected": -1.6354401111602783, "logps/chosen": -87.06553649902344, "logps/rejected": -145.74801635742188, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": -2.06065034866333, "rewards/margins": 5.746522426605225, "rewards/rejected": -7.807172775268555, "step": 1862 }, { "epoch": 2.99, "learning_rate": 3.7098692033293695e-07, "logits/chosen": -1.904773235321045, "logits/rejected": -1.9184315204620361, "logps/chosen": -122.90592956542969, "logps/rejected": -192.02857971191406, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": -2.773087739944458, "rewards/margins": 5.274674892425537, "rewards/rejected": -8.047762870788574, "step": 1863 }, { "epoch": 2.99, "learning_rate": 3.708878319460959e-07, "logits/chosen": -1.712644100189209, "logits/rejected": -1.6661410331726074, "logps/chosen": -118.72415161132812, "logps/rejected": -142.90203857421875, "loss": 0.1228, "rewards/accuracies": 0.75, "rewards/chosen": -1.5256692171096802, "rewards/margins": 3.928725242614746, "rewards/rejected": -5.454394817352295, "step": 1864 }, { "epoch": 2.99, "learning_rate": 3.707887435592548e-07, "logits/chosen": -1.595664620399475, "logits/rejected": -1.577643871307373, "logps/chosen": -82.77230834960938, "logps/rejected": -156.54432678222656, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -1.5633922815322876, "rewards/margins": 7.7410688400268555, "rewards/rejected": -9.304461479187012, "step": 1865 }, { "epoch": 3.0, "learning_rate": 3.706896551724138e-07, "logits/chosen": -1.680959701538086, "logits/rejected": -1.6975562572479248, "logps/chosen": -85.92871856689453, "logps/rejected": -145.62026977539062, "loss": 0.2369, "rewards/accuracies": 1.0, "rewards/chosen": -1.231262445449829, "rewards/margins": 5.183119773864746, "rewards/rejected": -6.414382457733154, "step": 1866 }, { "epoch": 3.0, "learning_rate": 3.705905667855727e-07, "logits/chosen": -1.6391139030456543, "logits/rejected": -1.6536986827850342, "logps/chosen": -103.18669891357422, "logps/rejected": -173.68785095214844, "loss": 0.2614, "rewards/accuracies": 0.75, "rewards/chosen": -3.0394108295440674, "rewards/margins": 5.660988807678223, "rewards/rejected": -8.700399398803711, "step": 1867 }, { "epoch": 3.0, "learning_rate": 3.7049147839873165e-07, "logits/chosen": -1.7592315673828125, "logits/rejected": -1.620063066482544, "logps/chosen": -130.3143310546875, "logps/rejected": -127.0625, "loss": 0.1578, "rewards/accuracies": 0.75, "rewards/chosen": -2.701396942138672, "rewards/margins": 1.934197187423706, "rewards/rejected": -4.635594367980957, "step": 1868 }, { "epoch": 3.0, "learning_rate": 3.703923900118906e-07, "logits/chosen": -1.558750033378601, "logits/rejected": -1.6264644861221313, "logps/chosen": -94.35810089111328, "logps/rejected": -133.89646911621094, "loss": 0.189, "rewards/accuracies": 1.0, "rewards/chosen": -1.9421181678771973, "rewards/margins": 3.864265203475952, "rewards/rejected": -5.8063836097717285, "step": 1869 }, { "epoch": 3.0, "learning_rate": 3.702933016250495e-07, "logits/chosen": -1.7621326446533203, "logits/rejected": -1.806652545928955, "logps/chosen": -99.29528045654297, "logps/rejected": -171.24449157714844, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": -1.0293467044830322, "rewards/margins": 6.328563690185547, "rewards/rejected": -7.35791015625, "step": 1870 }, { "epoch": 3.0, "learning_rate": 3.7019421323820847e-07, "logits/chosen": -1.5703579187393188, "logits/rejected": -1.555256962776184, "logps/chosen": -109.84393310546875, "logps/rejected": -178.0146942138672, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.639150619506836, "rewards/margins": 7.076193809509277, "rewards/rejected": -9.715344429016113, "step": 1871 }, { "epoch": 3.0, "learning_rate": 3.700951248513674e-07, "logits/chosen": -1.674302577972412, "logits/rejected": -1.6335704326629639, "logps/chosen": -94.05751037597656, "logps/rejected": -139.27871704101562, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -1.4134095907211304, "rewards/margins": 3.8592147827148438, "rewards/rejected": -5.272624492645264, "step": 1872 }, { "epoch": 3.01, "learning_rate": 3.6999603646452634e-07, "logits/chosen": -1.599224328994751, "logits/rejected": -1.6235761642456055, "logps/chosen": -88.71532440185547, "logps/rejected": -168.3770751953125, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": -1.3061755895614624, "rewards/margins": 7.155682563781738, "rewards/rejected": -8.461857795715332, "step": 1873 }, { "epoch": 3.01, "learning_rate": 3.698969480776853e-07, "logits/chosen": -1.6920950412750244, "logits/rejected": -1.7486002445220947, "logps/chosen": -105.12274932861328, "logps/rejected": -146.84295654296875, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -1.425275206565857, "rewards/margins": 4.147624969482422, "rewards/rejected": -5.572900295257568, "step": 1874 }, { "epoch": 3.01, "learning_rate": 3.697978596908442e-07, "logits/chosen": -1.5562728643417358, "logits/rejected": -1.4507569074630737, "logps/chosen": -120.26089477539062, "logps/rejected": -153.0841522216797, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.5183202028274536, "rewards/margins": 7.01490592956543, "rewards/rejected": -8.533226013183594, "step": 1875 }, { "epoch": 3.01, "learning_rate": 3.6969877130400317e-07, "logits/chosen": -1.648351788520813, "logits/rejected": -1.6494344472885132, "logps/chosen": -99.00519561767578, "logps/rejected": -154.11337280273438, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": -0.7693948149681091, "rewards/margins": 7.378775596618652, "rewards/rejected": -8.148170471191406, "step": 1876 }, { "epoch": 3.01, "learning_rate": 3.6959968291716207e-07, "logits/chosen": -1.6667989492416382, "logits/rejected": -1.5795297622680664, "logps/chosen": -109.90059661865234, "logps/rejected": -164.38848876953125, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.845070481300354, "rewards/margins": 6.159598350524902, "rewards/rejected": -8.004669189453125, "step": 1877 }, { "epoch": 3.01, "learning_rate": 3.6950059453032103e-07, "logits/chosen": -1.568537712097168, "logits/rejected": -1.6190099716186523, "logps/chosen": -81.86335754394531, "logps/rejected": -141.58892822265625, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -0.5047804713249207, "rewards/margins": 6.584012985229492, "rewards/rejected": -7.088793754577637, "step": 1878 }, { "epoch": 3.02, "learning_rate": 3.6940150614348e-07, "logits/chosen": -1.637345552444458, "logits/rejected": -1.6550883054733276, "logps/chosen": -73.62660217285156, "logps/rejected": -137.79254150390625, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -1.406909465789795, "rewards/margins": 6.262750625610352, "rewards/rejected": -7.669660568237305, "step": 1879 }, { "epoch": 3.02, "learning_rate": 3.693024177566389e-07, "logits/chosen": -1.7395377159118652, "logits/rejected": -1.7102490663528442, "logps/chosen": -114.62059783935547, "logps/rejected": -154.3107147216797, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": -1.4890029430389404, "rewards/margins": 4.7981157302856445, "rewards/rejected": -6.287118911743164, "step": 1880 }, { "epoch": 3.02, "learning_rate": 3.6920332936979786e-07, "logits/chosen": -1.5865892171859741, "logits/rejected": -1.4998939037322998, "logps/chosen": -114.85581970214844, "logps/rejected": -165.06552124023438, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.196744441986084, "rewards/margins": 5.685003757476807, "rewards/rejected": -6.881748199462891, "step": 1881 }, { "epoch": 3.02, "learning_rate": 3.6910424098295677e-07, "logits/chosen": -1.7192896604537964, "logits/rejected": -1.7427091598510742, "logps/chosen": -101.73321533203125, "logps/rejected": -192.13999938964844, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -2.506147623062134, "rewards/margins": 6.993900299072266, "rewards/rejected": -9.500048637390137, "step": 1882 }, { "epoch": 3.02, "learning_rate": 3.6900515259611567e-07, "logits/chosen": -1.5768084526062012, "logits/rejected": -1.5447890758514404, "logps/chosen": -92.79623413085938, "logps/rejected": -143.77194213867188, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -0.6340055465698242, "rewards/margins": 6.512142181396484, "rewards/rejected": -7.146147727966309, "step": 1883 }, { "epoch": 3.02, "learning_rate": 3.689060642092747e-07, "logits/chosen": -1.6432958841323853, "logits/rejected": -1.6737364530563354, "logps/chosen": -119.97509765625, "logps/rejected": -172.07504272460938, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -1.0013494491577148, "rewards/margins": 6.485969066619873, "rewards/rejected": -7.487318992614746, "step": 1884 }, { "epoch": 3.03, "learning_rate": 3.688069758224336e-07, "logits/chosen": -1.580551028251648, "logits/rejected": -1.4564507007598877, "logps/chosen": -122.84117126464844, "logps/rejected": -156.1721954345703, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -1.7995476722717285, "rewards/margins": 4.610548973083496, "rewards/rejected": -6.410096645355225, "step": 1885 }, { "epoch": 3.03, "learning_rate": 3.6870788743559255e-07, "logits/chosen": -1.5368965864181519, "logits/rejected": -1.5106050968170166, "logps/chosen": -81.06300354003906, "logps/rejected": -120.94497680664062, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -0.6516866683959961, "rewards/margins": 5.176954746246338, "rewards/rejected": -5.828641414642334, "step": 1886 }, { "epoch": 3.03, "learning_rate": 3.6860879904875146e-07, "logits/chosen": -1.5713231563568115, "logits/rejected": -1.5903701782226562, "logps/chosen": -92.16338348388672, "logps/rejected": -160.01678466796875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -2.40602970123291, "rewards/margins": 6.362042427062988, "rewards/rejected": -8.768072128295898, "step": 1887 }, { "epoch": 3.03, "learning_rate": 3.6850971066191037e-07, "logits/chosen": -1.8924490213394165, "logits/rejected": -1.7989635467529297, "logps/chosen": -118.22969818115234, "logps/rejected": -151.23269653320312, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.7644065618515015, "rewards/margins": 4.755973815917969, "rewards/rejected": -6.520380020141602, "step": 1888 }, { "epoch": 3.03, "learning_rate": 3.684106222750694e-07, "logits/chosen": -1.5663931369781494, "logits/rejected": -1.5189484357833862, "logps/chosen": -83.39598083496094, "logps/rejected": -126.71826171875, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -1.3664265871047974, "rewards/margins": 3.816039562225342, "rewards/rejected": -5.182466506958008, "step": 1889 }, { "epoch": 3.03, "learning_rate": 3.683115338882283e-07, "logits/chosen": -1.7923333644866943, "logits/rejected": -1.8274815082550049, "logps/chosen": -108.31106567382812, "logps/rejected": -163.31524658203125, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -1.0676989555358887, "rewards/margins": 4.607069969177246, "rewards/rejected": -5.674768447875977, "step": 1890 }, { "epoch": 3.04, "learning_rate": 3.6821244550138724e-07, "logits/chosen": -1.6667779684066772, "logits/rejected": -1.6493017673492432, "logps/chosen": -105.67617797851562, "logps/rejected": -148.6350555419922, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -1.107917070388794, "rewards/margins": 5.460620880126953, "rewards/rejected": -6.568537712097168, "step": 1891 }, { "epoch": 3.04, "learning_rate": 3.6811335711454615e-07, "logits/chosen": -1.7100728750228882, "logits/rejected": -1.6290254592895508, "logps/chosen": -104.39954376220703, "logps/rejected": -146.69679260253906, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.34755825996398926, "rewards/margins": 5.27253532409668, "rewards/rejected": -5.620093822479248, "step": 1892 }, { "epoch": 3.04, "learning_rate": 3.6801426872770506e-07, "logits/chosen": -1.6277227401733398, "logits/rejected": -1.5788722038269043, "logps/chosen": -85.88333129882812, "logps/rejected": -155.81558227539062, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.7997020483016968, "rewards/margins": 5.331783294677734, "rewards/rejected": -7.1314849853515625, "step": 1893 }, { "epoch": 3.04, "learning_rate": 3.6791518034086407e-07, "logits/chosen": -1.6799894571304321, "logits/rejected": -1.692346215248108, "logps/chosen": -80.70146942138672, "logps/rejected": -136.03201293945312, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.8223770260810852, "rewards/margins": 6.562894344329834, "rewards/rejected": -7.3852715492248535, "step": 1894 }, { "epoch": 3.04, "learning_rate": 3.67816091954023e-07, "logits/chosen": -1.6415435075759888, "logits/rejected": -1.5249104499816895, "logps/chosen": -103.30278015136719, "logps/rejected": -126.67869567871094, "loss": 0.094, "rewards/accuracies": 0.75, "rewards/chosen": -1.2904064655303955, "rewards/margins": 2.6864137649536133, "rewards/rejected": -3.976820230484009, "step": 1895 }, { "epoch": 3.04, "learning_rate": 3.677170035671819e-07, "logits/chosen": -1.4976716041564941, "logits/rejected": -1.5245343446731567, "logps/chosen": -81.58033752441406, "logps/rejected": -145.6389617919922, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -2.5117979049682617, "rewards/margins": 5.154696464538574, "rewards/rejected": -7.666494369506836, "step": 1896 }, { "epoch": 3.04, "learning_rate": 3.6761791518034084e-07, "logits/chosen": -1.7200483083724976, "logits/rejected": -1.620421290397644, "logps/chosen": -99.7545166015625, "logps/rejected": -111.57674407958984, "loss": 0.0511, "rewards/accuracies": 1.0, "rewards/chosen": -0.4283807873725891, "rewards/margins": 3.263841390609741, "rewards/rejected": -3.6922223567962646, "step": 1897 }, { "epoch": 3.05, "learning_rate": 3.6751882679349975e-07, "logits/chosen": -1.6229528188705444, "logits/rejected": -1.6154208183288574, "logps/chosen": -96.90663146972656, "logps/rejected": -154.357421875, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -1.6037845611572266, "rewards/margins": 6.531915664672852, "rewards/rejected": -8.135700225830078, "step": 1898 }, { "epoch": 3.05, "learning_rate": 3.6741973840665876e-07, "logits/chosen": -1.87437903881073, "logits/rejected": -1.8073971271514893, "logps/chosen": -110.45804595947266, "logps/rejected": -170.51812744140625, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -1.1526765823364258, "rewards/margins": 7.192652702331543, "rewards/rejected": -8.345329284667969, "step": 1899 }, { "epoch": 3.05, "learning_rate": 3.6732065001981767e-07, "logits/chosen": -1.6587483882904053, "logits/rejected": -1.5802404880523682, "logps/chosen": -111.92886352539062, "logps/rejected": -137.61181640625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -1.6742111444473267, "rewards/margins": 3.9953184127807617, "rewards/rejected": -5.669529914855957, "step": 1900 }, { "epoch": 3.05, "learning_rate": 3.672215616329766e-07, "logits/chosen": -1.6973333358764648, "logits/rejected": -1.6282687187194824, "logps/chosen": -110.10041046142578, "logps/rejected": -157.08056640625, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -2.484410047531128, "rewards/margins": 5.230213165283203, "rewards/rejected": -7.71462345123291, "step": 1901 }, { "epoch": 3.05, "learning_rate": 3.6712247324613554e-07, "logits/chosen": -1.6041487455368042, "logits/rejected": -1.7238198518753052, "logps/chosen": -95.08317565917969, "logps/rejected": -164.3568115234375, "loss": 0.0533, "rewards/accuracies": 1.0, "rewards/chosen": -1.4543869495391846, "rewards/margins": 3.182302951812744, "rewards/rejected": -4.63668966293335, "step": 1902 }, { "epoch": 3.05, "learning_rate": 3.6702338485929444e-07, "logits/chosen": -1.6339761018753052, "logits/rejected": -1.574198842048645, "logps/chosen": -82.62008666992188, "logps/rejected": -141.4514617919922, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.2512325048446655, "rewards/margins": 5.288807392120361, "rewards/rejected": -6.540039539337158, "step": 1903 }, { "epoch": 3.06, "learning_rate": 3.6692429647245346e-07, "logits/chosen": -1.6735442876815796, "logits/rejected": -1.653478980064392, "logps/chosen": -103.46018981933594, "logps/rejected": -184.48081970214844, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -2.0918941497802734, "rewards/margins": 7.652433395385742, "rewards/rejected": -9.744327545166016, "step": 1904 }, { "epoch": 3.06, "learning_rate": 3.6682520808561236e-07, "logits/chosen": -1.7127230167388916, "logits/rejected": -1.6702903509140015, "logps/chosen": -99.74884033203125, "logps/rejected": -158.19534301757812, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -2.1497256755828857, "rewards/margins": 5.801769256591797, "rewards/rejected": -7.951494216918945, "step": 1905 }, { "epoch": 3.06, "learning_rate": 3.6672611969877127e-07, "logits/chosen": -1.7548975944519043, "logits/rejected": -1.7401463985443115, "logps/chosen": -98.95782470703125, "logps/rejected": -155.8157958984375, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -2.436131715774536, "rewards/margins": 6.115757465362549, "rewards/rejected": -8.551889419555664, "step": 1906 }, { "epoch": 3.06, "learning_rate": 3.6662703131193023e-07, "logits/chosen": -1.6533514261245728, "logits/rejected": -1.760128378868103, "logps/chosen": -118.9019546508789, "logps/rejected": -162.83978271484375, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -2.769501209259033, "rewards/margins": 4.0259199142456055, "rewards/rejected": -6.7954206466674805, "step": 1907 }, { "epoch": 3.06, "learning_rate": 3.6652794292508914e-07, "logits/chosen": -1.7508225440979004, "logits/rejected": -1.8303509950637817, "logps/chosen": -128.1343536376953, "logps/rejected": -178.4276885986328, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -2.702071189880371, "rewards/margins": 4.746828556060791, "rewards/rejected": -7.448899269104004, "step": 1908 }, { "epoch": 3.06, "learning_rate": 3.664288545382481e-07, "logits/chosen": -1.7263387441635132, "logits/rejected": -1.7603390216827393, "logps/chosen": -144.83370971679688, "logps/rejected": -182.39764404296875, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -2.4596524238586426, "rewards/margins": 4.986607074737549, "rewards/rejected": -7.44625997543335, "step": 1909 }, { "epoch": 3.07, "learning_rate": 3.6632976615140706e-07, "logits/chosen": -1.8220547437667847, "logits/rejected": -1.7363783121109009, "logps/chosen": -102.36355590820312, "logps/rejected": -185.99185180664062, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": -1.8659863471984863, "rewards/margins": 8.897928237915039, "rewards/rejected": -10.763915061950684, "step": 1910 }, { "epoch": 3.07, "learning_rate": 3.6623067776456596e-07, "logits/chosen": -1.6040081977844238, "logits/rejected": -1.6430306434631348, "logps/chosen": -86.89701843261719, "logps/rejected": -132.38198852539062, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.3414196968078613, "rewards/margins": 5.169520854949951, "rewards/rejected": -6.5109405517578125, "step": 1911 }, { "epoch": 3.07, "learning_rate": 3.661315893777249e-07, "logits/chosen": -1.688725471496582, "logits/rejected": -1.7015926837921143, "logps/chosen": -99.93431091308594, "logps/rejected": -181.14259338378906, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -1.2033534049987793, "rewards/margins": 5.790416240692139, "rewards/rejected": -6.993769645690918, "step": 1912 }, { "epoch": 3.07, "learning_rate": 3.6603250099088383e-07, "logits/chosen": -1.7488212585449219, "logits/rejected": -1.795146107673645, "logps/chosen": -110.70306396484375, "logps/rejected": -145.4078369140625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.3580440282821655, "rewards/margins": 4.888909339904785, "rewards/rejected": -6.246953010559082, "step": 1913 }, { "epoch": 3.07, "learning_rate": 3.659334126040428e-07, "logits/chosen": -1.6552610397338867, "logits/rejected": -1.662558913230896, "logps/chosen": -99.43937683105469, "logps/rejected": -187.4552459716797, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -2.2031972408294678, "rewards/margins": 6.932158946990967, "rewards/rejected": -9.135356903076172, "step": 1914 }, { "epoch": 3.07, "learning_rate": 3.6583432421720175e-07, "logits/chosen": -1.5724084377288818, "logits/rejected": -1.692056655883789, "logps/chosen": -113.00341796875, "logps/rejected": -215.00323486328125, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": -2.779017210006714, "rewards/margins": 5.822019577026367, "rewards/rejected": -8.601036071777344, "step": 1915 }, { "epoch": 3.08, "learning_rate": 3.6573523583036066e-07, "logits/chosen": -1.7610795497894287, "logits/rejected": -1.7060661315917969, "logps/chosen": -127.24043273925781, "logps/rejected": -183.40130615234375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -2.5935006141662598, "rewards/margins": 6.849026679992676, "rewards/rejected": -9.442526817321777, "step": 1916 }, { "epoch": 3.08, "learning_rate": 3.656361474435196e-07, "logits/chosen": -1.7431848049163818, "logits/rejected": -1.6702219247817993, "logps/chosen": -95.4879150390625, "logps/rejected": -168.80035400390625, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -0.5997374057769775, "rewards/margins": 7.886066436767578, "rewards/rejected": -8.485803604125977, "step": 1917 }, { "epoch": 3.08, "learning_rate": 3.655370590566785e-07, "logits/chosen": -1.7646522521972656, "logits/rejected": -1.642569899559021, "logps/chosen": -103.41246032714844, "logps/rejected": -132.5789337158203, "loss": 0.0992, "rewards/accuracies": 1.0, "rewards/chosen": -0.6851685047149658, "rewards/margins": 4.765220642089844, "rewards/rejected": -5.450389385223389, "step": 1918 }, { "epoch": 3.08, "learning_rate": 3.654379706698375e-07, "logits/chosen": -1.71641206741333, "logits/rejected": -1.7225399017333984, "logps/chosen": -117.28567504882812, "logps/rejected": -180.14553833007812, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -3.6157400608062744, "rewards/margins": 5.102658748626709, "rewards/rejected": -8.718399047851562, "step": 1919 }, { "epoch": 3.08, "learning_rate": 3.6533888228299644e-07, "logits/chosen": -1.6854426860809326, "logits/rejected": -1.7167248725891113, "logps/chosen": -101.82542419433594, "logps/rejected": -177.07296752929688, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -2.0744495391845703, "rewards/margins": 7.362520217895508, "rewards/rejected": -9.436969757080078, "step": 1920 }, { "epoch": 3.08, "learning_rate": 3.6523979389615535e-07, "logits/chosen": -1.6945387125015259, "logits/rejected": -1.7681795358657837, "logps/chosen": -131.51300048828125, "logps/rejected": -178.84646606445312, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -3.876291036605835, "rewards/margins": 4.3888115882873535, "rewards/rejected": -8.26510238647461, "step": 1921 }, { "epoch": 3.09, "learning_rate": 3.651407055093143e-07, "logits/chosen": -1.6427534818649292, "logits/rejected": -1.6236960887908936, "logps/chosen": -95.70513916015625, "logps/rejected": -193.684326171875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.1455950736999512, "rewards/margins": 10.101436614990234, "rewards/rejected": -11.247031211853027, "step": 1922 }, { "epoch": 3.09, "learning_rate": 3.650416171224732e-07, "logits/chosen": -1.6943484544754028, "logits/rejected": -1.7048935890197754, "logps/chosen": -82.8721694946289, "logps/rejected": -149.16055297851562, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -1.3443591594696045, "rewards/margins": 6.0919013023376465, "rewards/rejected": -7.436261177062988, "step": 1923 }, { "epoch": 3.09, "learning_rate": 3.649425287356322e-07, "logits/chosen": -1.5582890510559082, "logits/rejected": -1.5721954107284546, "logps/chosen": -116.88792419433594, "logps/rejected": -179.40170288085938, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": -2.71346378326416, "rewards/margins": 6.799563407897949, "rewards/rejected": -9.51302719116211, "step": 1924 }, { "epoch": 3.09, "learning_rate": 3.6484344034879114e-07, "logits/chosen": -1.7036488056182861, "logits/rejected": -1.7554304599761963, "logps/chosen": -103.85214233398438, "logps/rejected": -205.3756561279297, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -2.534808397293091, "rewards/margins": 7.539812088012695, "rewards/rejected": -10.074620246887207, "step": 1925 }, { "epoch": 3.09, "learning_rate": 3.6474435196195004e-07, "logits/chosen": -1.5587064027786255, "logits/rejected": -1.623528242111206, "logps/chosen": -97.46810913085938, "logps/rejected": -174.80950927734375, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -2.5073888301849365, "rewards/margins": 7.149669647216797, "rewards/rejected": -9.657058715820312, "step": 1926 }, { "epoch": 3.09, "learning_rate": 3.64645263575109e-07, "logits/chosen": -1.5745210647583008, "logits/rejected": -1.5146420001983643, "logps/chosen": -94.89555358886719, "logps/rejected": -168.1523895263672, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -2.169896364212036, "rewards/margins": 8.105024337768555, "rewards/rejected": -10.274920463562012, "step": 1927 }, { "epoch": 3.09, "learning_rate": 3.645461751882679e-07, "logits/chosen": -1.7285041809082031, "logits/rejected": -1.730893850326538, "logps/chosen": -118.09146118164062, "logps/rejected": -132.76002502441406, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -1.500150442123413, "rewards/margins": 3.345635414123535, "rewards/rejected": -4.845786094665527, "step": 1928 }, { "epoch": 3.1, "learning_rate": 3.6444708680142687e-07, "logits/chosen": -1.7215481996536255, "logits/rejected": -1.7563947439193726, "logps/chosen": -115.05442810058594, "logps/rejected": -214.53884887695312, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -1.7084250450134277, "rewards/margins": 8.851237297058105, "rewards/rejected": -10.559661865234375, "step": 1929 }, { "epoch": 3.1, "learning_rate": 3.643479984145858e-07, "logits/chosen": -1.717126727104187, "logits/rejected": -1.6685242652893066, "logps/chosen": -116.08157348632812, "logps/rejected": -148.3933868408203, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -1.075526475906372, "rewards/margins": 5.374039173126221, "rewards/rejected": -6.449565410614014, "step": 1930 }, { "epoch": 3.1, "learning_rate": 3.6424891002774474e-07, "logits/chosen": -1.665290117263794, "logits/rejected": -1.6367485523223877, "logps/chosen": -130.36013793945312, "logps/rejected": -177.5913848876953, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -3.711521625518799, "rewards/margins": 6.114994049072266, "rewards/rejected": -9.826516151428223, "step": 1931 }, { "epoch": 3.1, "learning_rate": 3.641498216409037e-07, "logits/chosen": -1.6750272512435913, "logits/rejected": -1.644675612449646, "logps/chosen": -98.60137939453125, "logps/rejected": -120.58511352539062, "loss": 0.0379, "rewards/accuracies": 0.75, "rewards/chosen": -1.7334991693496704, "rewards/margins": 3.1803090572357178, "rewards/rejected": -4.913808345794678, "step": 1932 }, { "epoch": 3.1, "learning_rate": 3.640507332540626e-07, "logits/chosen": -1.5439561605453491, "logits/rejected": -1.6064728498458862, "logps/chosen": -116.88392639160156, "logps/rejected": -194.9021453857422, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -3.370835065841675, "rewards/margins": 6.219663619995117, "rewards/rejected": -9.590497970581055, "step": 1933 }, { "epoch": 3.1, "learning_rate": 3.639516448672215e-07, "logits/chosen": -1.7127487659454346, "logits/rejected": -1.7000490427017212, "logps/chosen": -123.20008850097656, "logps/rejected": -201.34494018554688, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -2.6485095024108887, "rewards/margins": 7.709593772888184, "rewards/rejected": -10.358102798461914, "step": 1934 }, { "epoch": 3.11, "learning_rate": 3.6385255648038047e-07, "logits/chosen": -1.46981680393219, "logits/rejected": -1.5378495454788208, "logps/chosen": -80.08000946044922, "logps/rejected": -135.05169677734375, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -1.0323357582092285, "rewards/margins": 5.417774677276611, "rewards/rejected": -6.45011043548584, "step": 1935 }, { "epoch": 3.11, "learning_rate": 3.6375346809353943e-07, "logits/chosen": -1.7383027076721191, "logits/rejected": -1.7675490379333496, "logps/chosen": -124.0751953125, "logps/rejected": -205.1046600341797, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -3.445425033569336, "rewards/margins": 5.516932487487793, "rewards/rejected": -8.962356567382812, "step": 1936 }, { "epoch": 3.11, "learning_rate": 3.636543797066984e-07, "logits/chosen": -1.8075617551803589, "logits/rejected": -1.8700815439224243, "logps/chosen": -114.66032409667969, "logps/rejected": -173.7946014404297, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": -2.7023115158081055, "rewards/margins": 4.775672912597656, "rewards/rejected": -7.47798490524292, "step": 1937 }, { "epoch": 3.11, "learning_rate": 3.635552913198573e-07, "logits/chosen": -1.8957135677337646, "logits/rejected": -1.8277348279953003, "logps/chosen": -123.88986206054688, "logps/rejected": -168.5408935546875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -3.42256760597229, "rewards/margins": 5.346343994140625, "rewards/rejected": -8.768912315368652, "step": 1938 }, { "epoch": 3.11, "learning_rate": 3.634562029330162e-07, "logits/chosen": -1.6349012851715088, "logits/rejected": -1.6503089666366577, "logps/chosen": -84.57666778564453, "logps/rejected": -171.06443786621094, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -0.8964742422103882, "rewards/margins": 7.850900650024414, "rewards/rejected": -8.74737548828125, "step": 1939 }, { "epoch": 3.11, "learning_rate": 3.6335711454617516e-07, "logits/chosen": -1.5477144718170166, "logits/rejected": -1.6328811645507812, "logps/chosen": -133.12603759765625, "logps/rejected": -205.72183227539062, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -3.266352891921997, "rewards/margins": 7.226635932922363, "rewards/rejected": -10.492989540100098, "step": 1940 }, { "epoch": 3.12, "learning_rate": 3.632580261593341e-07, "logits/chosen": -1.7440824508666992, "logits/rejected": -1.8198513984680176, "logps/chosen": -90.61750793457031, "logps/rejected": -216.2113037109375, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -0.024347588419914246, "rewards/margins": 12.375530242919922, "rewards/rejected": -12.399877548217773, "step": 1941 }, { "epoch": 3.12, "learning_rate": 3.631589377724931e-07, "logits/chosen": -1.4591419696807861, "logits/rejected": -1.4938799142837524, "logps/chosen": -107.21939086914062, "logps/rejected": -197.74029541015625, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -2.802433967590332, "rewards/margins": 8.58193302154541, "rewards/rejected": -11.384367942810059, "step": 1942 }, { "epoch": 3.12, "learning_rate": 3.63059849385652e-07, "logits/chosen": -1.6882543563842773, "logits/rejected": -1.7257355451583862, "logps/chosen": -97.52288818359375, "logps/rejected": -158.1234893798828, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.0082539319992065, "rewards/margins": 5.645055294036865, "rewards/rejected": -6.653309345245361, "step": 1943 }, { "epoch": 3.12, "learning_rate": 3.629607609988109e-07, "logits/chosen": -1.7466353178024292, "logits/rejected": -1.7213224172592163, "logps/chosen": -83.61992645263672, "logps/rejected": -208.62783813476562, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.03688819706439972, "rewards/margins": 7.528218746185303, "rewards/rejected": -7.491330146789551, "step": 1944 }, { "epoch": 3.12, "learning_rate": 3.6286167261196985e-07, "logits/chosen": -1.7166410684585571, "logits/rejected": -1.6794699430465698, "logps/chosen": -133.43780517578125, "logps/rejected": -174.02767944335938, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -3.8693768978118896, "rewards/margins": 5.477082252502441, "rewards/rejected": -9.346458435058594, "step": 1945 }, { "epoch": 3.12, "learning_rate": 3.6276258422512876e-07, "logits/chosen": -1.748315453529358, "logits/rejected": -1.6389821767807007, "logps/chosen": -127.88972473144531, "logps/rejected": -150.11944580078125, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -3.0441665649414062, "rewards/margins": 4.500893592834473, "rewards/rejected": -7.545060157775879, "step": 1946 }, { "epoch": 3.13, "learning_rate": 3.626634958382878e-07, "logits/chosen": -1.6809368133544922, "logits/rejected": -1.7901078462600708, "logps/chosen": -77.57942199707031, "logps/rejected": -152.99945068359375, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -1.8548580408096313, "rewards/margins": 4.466367244720459, "rewards/rejected": -6.321225643157959, "step": 1947 }, { "epoch": 3.13, "learning_rate": 3.625644074514467e-07, "logits/chosen": -1.7235873937606812, "logits/rejected": -1.7226086854934692, "logps/chosen": -78.4059829711914, "logps/rejected": -128.07118225097656, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.2847692966461182, "rewards/margins": 4.415901184082031, "rewards/rejected": -5.70067024230957, "step": 1948 }, { "epoch": 3.13, "learning_rate": 3.624653190646056e-07, "logits/chosen": -1.6852827072143555, "logits/rejected": -1.6790168285369873, "logps/chosen": -150.095703125, "logps/rejected": -216.11949157714844, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -3.3690452575683594, "rewards/margins": 7.016724586486816, "rewards/rejected": -10.385769844055176, "step": 1949 }, { "epoch": 3.13, "learning_rate": 3.6236623067776455e-07, "logits/chosen": -1.823380947113037, "logits/rejected": -1.6570295095443726, "logps/chosen": -123.5429458618164, "logps/rejected": -162.1097412109375, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -2.4376399517059326, "rewards/margins": 5.464521408081055, "rewards/rejected": -7.902161121368408, "step": 1950 }, { "epoch": 3.13, "learning_rate": 3.6226714229092345e-07, "logits/chosen": -1.6425719261169434, "logits/rejected": -1.6099767684936523, "logps/chosen": -141.56192016601562, "logps/rejected": -216.4085693359375, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -5.039567470550537, "rewards/margins": 7.744546890258789, "rewards/rejected": -12.784114837646484, "step": 1951 }, { "epoch": 3.13, "learning_rate": 3.6216805390408247e-07, "logits/chosen": -1.8270808458328247, "logits/rejected": -1.8234386444091797, "logps/chosen": -126.98995971679688, "logps/rejected": -173.7485809326172, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -2.884312152862549, "rewards/margins": 3.3831474781036377, "rewards/rejected": -6.267459869384766, "step": 1952 }, { "epoch": 3.13, "learning_rate": 3.620689655172414e-07, "logits/chosen": -1.5415632724761963, "logits/rejected": -1.6515536308288574, "logps/chosen": -111.42669677734375, "logps/rejected": -182.29058837890625, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -3.138338088989258, "rewards/margins": 5.588000774383545, "rewards/rejected": -8.726339340209961, "step": 1953 }, { "epoch": 3.14, "learning_rate": 3.619698771304003e-07, "logits/chosen": -1.5850507020950317, "logits/rejected": -1.5958012342453003, "logps/chosen": -116.91487884521484, "logps/rejected": -195.30686950683594, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.919752597808838, "rewards/margins": 7.000920295715332, "rewards/rejected": -10.920673370361328, "step": 1954 }, { "epoch": 3.14, "learning_rate": 3.6187078874355924e-07, "logits/chosen": -1.7460577487945557, "logits/rejected": -1.7089674472808838, "logps/chosen": -124.64060974121094, "logps/rejected": -185.37905883789062, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -4.035760879516602, "rewards/margins": 6.420534133911133, "rewards/rejected": -10.456295013427734, "step": 1955 }, { "epoch": 3.14, "learning_rate": 3.6177170035671815e-07, "logits/chosen": -1.6647393703460693, "logits/rejected": -1.706511378288269, "logps/chosen": -147.89768981933594, "logps/rejected": -198.69247436523438, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -2.0797901153564453, "rewards/margins": 5.071946144104004, "rewards/rejected": -7.151736259460449, "step": 1956 }, { "epoch": 3.14, "learning_rate": 3.6167261196987716e-07, "logits/chosen": -1.6850024461746216, "logits/rejected": -1.684111475944519, "logps/chosen": -105.60826110839844, "logps/rejected": -175.80361938476562, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": -2.4007818698883057, "rewards/margins": 7.041140079498291, "rewards/rejected": -9.441922187805176, "step": 1957 }, { "epoch": 3.14, "learning_rate": 3.6157352358303607e-07, "logits/chosen": -1.5616878271102905, "logits/rejected": -1.5326266288757324, "logps/chosen": -68.1089096069336, "logps/rejected": -115.59246826171875, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": -0.35430896282196045, "rewards/margins": 4.422353744506836, "rewards/rejected": -4.776662826538086, "step": 1958 }, { "epoch": 3.14, "learning_rate": 3.61474435196195e-07, "logits/chosen": -1.7346173524856567, "logits/rejected": -1.7499836683273315, "logps/chosen": -97.03572845458984, "logps/rejected": -184.90524291992188, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.6955863237380981, "rewards/margins": 8.96204948425293, "rewards/rejected": -10.657635688781738, "step": 1959 }, { "epoch": 3.15, "learning_rate": 3.6137534680935393e-07, "logits/chosen": -1.772837519645691, "logits/rejected": -1.7603347301483154, "logps/chosen": -65.09259033203125, "logps/rejected": -137.53564453125, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -0.1786889135837555, "rewards/margins": 7.673479080200195, "rewards/rejected": -7.852168083190918, "step": 1960 }, { "epoch": 3.15, "learning_rate": 3.6127625842251284e-07, "logits/chosen": -1.5729438066482544, "logits/rejected": -1.4959604740142822, "logps/chosen": -86.2054443359375, "logps/rejected": -184.04727172851562, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": -0.12659834325313568, "rewards/margins": 10.28763198852539, "rewards/rejected": -10.414229393005371, "step": 1961 }, { "epoch": 3.15, "learning_rate": 3.6117717003567185e-07, "logits/chosen": -1.7164199352264404, "logits/rejected": -1.6199469566345215, "logps/chosen": -136.36248779296875, "logps/rejected": -187.34881591796875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.8692572116851807, "rewards/margins": 6.836688995361328, "rewards/rejected": -9.705946922302246, "step": 1962 }, { "epoch": 3.15, "learning_rate": 3.6107808164883076e-07, "logits/chosen": -1.7264463901519775, "logits/rejected": -1.685098648071289, "logps/chosen": -94.50041198730469, "logps/rejected": -181.68499755859375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.5757334232330322, "rewards/margins": 8.881714820861816, "rewards/rejected": -10.45744800567627, "step": 1963 }, { "epoch": 3.15, "learning_rate": 3.6097899326198967e-07, "logits/chosen": -1.6349058151245117, "logits/rejected": -1.7028789520263672, "logps/chosen": -102.79163360595703, "logps/rejected": -198.27064514160156, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -2.6038267612457275, "rewards/margins": 8.8462553024292, "rewards/rejected": -11.450082778930664, "step": 1964 }, { "epoch": 3.15, "learning_rate": 3.608799048751486e-07, "logits/chosen": -1.7351312637329102, "logits/rejected": -1.723825454711914, "logps/chosen": -104.37217712402344, "logps/rejected": -152.84677124023438, "loss": 0.1197, "rewards/accuracies": 1.0, "rewards/chosen": -2.8030707836151123, "rewards/margins": 4.907454490661621, "rewards/rejected": -7.710525989532471, "step": 1965 }, { "epoch": 3.16, "learning_rate": 3.6078081648830753e-07, "logits/chosen": -1.638871192932129, "logits/rejected": -1.560049295425415, "logps/chosen": -110.66050720214844, "logps/rejected": -142.51060485839844, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -2.7966654300689697, "rewards/margins": 4.6570281982421875, "rewards/rejected": -7.453693389892578, "step": 1966 }, { "epoch": 3.16, "learning_rate": 3.6068172810146644e-07, "logits/chosen": -1.6663594245910645, "logits/rejected": -1.719827651977539, "logps/chosen": -98.63490295410156, "logps/rejected": -183.7691650390625, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -2.237114906311035, "rewards/margins": 6.978353500366211, "rewards/rejected": -9.215468406677246, "step": 1967 }, { "epoch": 3.16, "learning_rate": 3.6058263971462545e-07, "logits/chosen": -1.6410138607025146, "logits/rejected": -1.6698615550994873, "logps/chosen": -114.33976745605469, "logps/rejected": -172.77833557128906, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -2.735752582550049, "rewards/margins": 5.422980308532715, "rewards/rejected": -8.158733367919922, "step": 1968 }, { "epoch": 3.16, "learning_rate": 3.6048355132778436e-07, "logits/chosen": -1.6621994972229004, "logits/rejected": -1.6747206449508667, "logps/chosen": -129.36508178710938, "logps/rejected": -202.16140747070312, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -3.641920328140259, "rewards/margins": 5.4655938148498535, "rewards/rejected": -9.107514381408691, "step": 1969 }, { "epoch": 3.16, "learning_rate": 3.603844629409433e-07, "logits/chosen": -1.7725540399551392, "logits/rejected": -1.7223410606384277, "logps/chosen": -84.36936950683594, "logps/rejected": -165.73927307128906, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -0.3146694302558899, "rewards/margins": 7.3898396492004395, "rewards/rejected": -7.704509735107422, "step": 1970 }, { "epoch": 3.16, "learning_rate": 3.602853745541022e-07, "logits/chosen": -1.573026180267334, "logits/rejected": -1.6656849384307861, "logps/chosen": -96.93243408203125, "logps/rejected": -174.966064453125, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": -3.5613150596618652, "rewards/margins": 6.243129730224609, "rewards/rejected": -9.804445266723633, "step": 1971 }, { "epoch": 3.17, "learning_rate": 3.6018628616726113e-07, "logits/chosen": -1.7056617736816406, "logits/rejected": -1.749609351158142, "logps/chosen": -125.49136352539062, "logps/rejected": -199.64938354492188, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -4.734984874725342, "rewards/margins": 6.830042839050293, "rewards/rejected": -11.565028190612793, "step": 1972 }, { "epoch": 3.17, "learning_rate": 3.6008719778042015e-07, "logits/chosen": -1.6128945350646973, "logits/rejected": -1.7019072771072388, "logps/chosen": -108.23746490478516, "logps/rejected": -201.17156982421875, "loss": 0.0922, "rewards/accuracies": 1.0, "rewards/chosen": -3.24153470993042, "rewards/margins": 6.931545734405518, "rewards/rejected": -10.173080444335938, "step": 1973 }, { "epoch": 3.17, "learning_rate": 3.5998810939357905e-07, "logits/chosen": -1.805866003036499, "logits/rejected": -1.8104450702667236, "logps/chosen": -106.85108947753906, "logps/rejected": -187.06568908691406, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.86531662940979, "rewards/margins": 8.228012084960938, "rewards/rejected": -10.093328475952148, "step": 1974 }, { "epoch": 3.17, "learning_rate": 3.59889021006738e-07, "logits/chosen": -1.631588101387024, "logits/rejected": -1.6011061668395996, "logps/chosen": -123.61515808105469, "logps/rejected": -213.03077697753906, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -3.7904090881347656, "rewards/margins": 8.409296989440918, "rewards/rejected": -12.19970703125, "step": 1975 }, { "epoch": 3.17, "learning_rate": 3.597899326198969e-07, "logits/chosen": -1.624626636505127, "logits/rejected": -1.605358362197876, "logps/chosen": -117.13568878173828, "logps/rejected": -158.8237762451172, "loss": 0.0624, "rewards/accuracies": 0.75, "rewards/chosen": -3.0105838775634766, "rewards/margins": 4.174419403076172, "rewards/rejected": -7.185003280639648, "step": 1976 }, { "epoch": 3.17, "learning_rate": 3.596908442330558e-07, "logits/chosen": -1.8333104848861694, "logits/rejected": -1.8280823230743408, "logps/chosen": -108.53047943115234, "logps/rejected": -177.74520874023438, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.5441758632659912, "rewards/margins": 6.97042989730835, "rewards/rejected": -8.514606475830078, "step": 1977 }, { "epoch": 3.17, "learning_rate": 3.5959175584621484e-07, "logits/chosen": -1.7583786249160767, "logits/rejected": -1.7734776735305786, "logps/chosen": -131.54200744628906, "logps/rejected": -220.95298767089844, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.2204041481018066, "rewards/margins": 7.116202354431152, "rewards/rejected": -9.336606979370117, "step": 1978 }, { "epoch": 3.18, "learning_rate": 3.5949266745937375e-07, "logits/chosen": -1.510981798171997, "logits/rejected": -1.580195426940918, "logps/chosen": -71.58564758300781, "logps/rejected": -175.59471130371094, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -1.4252915382385254, "rewards/margins": 7.644321441650391, "rewards/rejected": -9.069613456726074, "step": 1979 }, { "epoch": 3.18, "learning_rate": 3.593935790725327e-07, "logits/chosen": -1.5241928100585938, "logits/rejected": -1.5479671955108643, "logps/chosen": -95.62603759765625, "logps/rejected": -178.81036376953125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.6505924463272095, "rewards/margins": 7.531933784484863, "rewards/rejected": -9.182526588439941, "step": 1980 }, { "epoch": 3.18, "learning_rate": 3.592944906856916e-07, "logits/chosen": -1.7434297800064087, "logits/rejected": -1.7364239692687988, "logps/chosen": -100.96076202392578, "logps/rejected": -164.77230834960938, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -2.4954776763916016, "rewards/margins": 6.1485066413879395, "rewards/rejected": -8.643983840942383, "step": 1981 }, { "epoch": 3.18, "learning_rate": 3.591954022988505e-07, "logits/chosen": -1.7545653581619263, "logits/rejected": -1.7757402658462524, "logps/chosen": -138.83709716796875, "logps/rejected": -198.08169555664062, "loss": 0.1579, "rewards/accuracies": 1.0, "rewards/chosen": -3.481916904449463, "rewards/margins": 6.665650844573975, "rewards/rejected": -10.147567749023438, "step": 1982 }, { "epoch": 3.18, "learning_rate": 3.5909631391200953e-07, "logits/chosen": -1.5850155353546143, "logits/rejected": -1.6293792724609375, "logps/chosen": -117.47493743896484, "logps/rejected": -218.18447875976562, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -2.609663248062134, "rewards/margins": 9.055994033813477, "rewards/rejected": -11.665657997131348, "step": 1983 }, { "epoch": 3.18, "learning_rate": 3.5899722552516844e-07, "logits/chosen": -1.714773416519165, "logits/rejected": -1.8290891647338867, "logps/chosen": -86.82098388671875, "logps/rejected": -173.24423217773438, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -2.050837278366089, "rewards/margins": 4.450195789337158, "rewards/rejected": -6.501032829284668, "step": 1984 }, { "epoch": 3.19, "learning_rate": 3.588981371383274e-07, "logits/chosen": -1.7641794681549072, "logits/rejected": -1.7192003726959229, "logps/chosen": -132.29916381835938, "logps/rejected": -158.6502685546875, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -2.4948902130126953, "rewards/margins": 4.208545684814453, "rewards/rejected": -6.703434944152832, "step": 1985 }, { "epoch": 3.19, "learning_rate": 3.587990487514863e-07, "logits/chosen": -1.6659367084503174, "logits/rejected": -1.6605466604232788, "logps/chosen": -111.20159149169922, "logps/rejected": -207.7238006591797, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -2.381382942199707, "rewards/margins": 7.288689136505127, "rewards/rejected": -9.670072555541992, "step": 1986 }, { "epoch": 3.19, "learning_rate": 3.586999603646452e-07, "logits/chosen": -1.7060295343399048, "logits/rejected": -1.7931212186813354, "logps/chosen": -126.69080352783203, "logps/rejected": -210.21237182617188, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.326317310333252, "rewards/margins": 6.684223651885986, "rewards/rejected": -9.010540962219238, "step": 1987 }, { "epoch": 3.19, "learning_rate": 3.5860087197780417e-07, "logits/chosen": -1.7053873538970947, "logits/rejected": -1.739250898361206, "logps/chosen": -106.90412902832031, "logps/rejected": -183.924560546875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.281558036804199, "rewards/margins": 7.322775840759277, "rewards/rejected": -9.604333877563477, "step": 1988 }, { "epoch": 3.19, "learning_rate": 3.5850178359096313e-07, "logits/chosen": -1.678769826889038, "logits/rejected": -1.5989044904708862, "logps/chosen": -115.54940795898438, "logps/rejected": -163.576416015625, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -3.7568955421447754, "rewards/margins": 5.199860572814941, "rewards/rejected": -8.956756591796875, "step": 1989 }, { "epoch": 3.19, "learning_rate": 3.584026952041221e-07, "logits/chosen": -1.6448458433151245, "logits/rejected": -1.584925889968872, "logps/chosen": -124.52859497070312, "logps/rejected": -180.59735107421875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.3303802013397217, "rewards/margins": 5.922438621520996, "rewards/rejected": -8.252819061279297, "step": 1990 }, { "epoch": 3.2, "learning_rate": 3.58303606817281e-07, "logits/chosen": -1.73057222366333, "logits/rejected": -1.7992959022521973, "logps/chosen": -99.14570617675781, "logps/rejected": -202.18441772460938, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.8036044836044312, "rewards/margins": 7.1999616622924805, "rewards/rejected": -9.00356674194336, "step": 1991 }, { "epoch": 3.2, "learning_rate": 3.582045184304399e-07, "logits/chosen": -1.635919451713562, "logits/rejected": -1.7112751007080078, "logps/chosen": -92.94445037841797, "logps/rejected": -181.21163940429688, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -1.7323309183120728, "rewards/margins": 7.687793254852295, "rewards/rejected": -9.420124053955078, "step": 1992 }, { "epoch": 3.2, "learning_rate": 3.5810543004359886e-07, "logits/chosen": -1.671657919883728, "logits/rejected": -1.6368800401687622, "logps/chosen": -136.6543426513672, "logps/rejected": -147.5520477294922, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": -4.31401252746582, "rewards/margins": 3.441311836242676, "rewards/rejected": -7.755324840545654, "step": 1993 }, { "epoch": 3.2, "learning_rate": 3.580063416567578e-07, "logits/chosen": -1.6248106956481934, "logits/rejected": -1.677144169807434, "logps/chosen": -60.86071014404297, "logps/rejected": -161.8345947265625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.7413819432258606, "rewards/margins": 8.462782859802246, "rewards/rejected": -9.2041654586792, "step": 1994 }, { "epoch": 3.2, "learning_rate": 3.579072532699168e-07, "logits/chosen": -1.7373908758163452, "logits/rejected": -1.693756341934204, "logps/chosen": -116.62831115722656, "logps/rejected": -173.82101440429688, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.8449108600616455, "rewards/margins": 6.4041748046875, "rewards/rejected": -9.249085426330566, "step": 1995 }, { "epoch": 3.2, "learning_rate": 3.578081648830757e-07, "logits/chosen": -1.6260175704956055, "logits/rejected": -1.689884901046753, "logps/chosen": -99.6757583618164, "logps/rejected": -169.79754638671875, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -2.9919350147247314, "rewards/margins": 5.179461479187012, "rewards/rejected": -8.171396255493164, "step": 1996 }, { "epoch": 3.21, "learning_rate": 3.577090764962346e-07, "logits/chosen": -1.6278773546218872, "logits/rejected": -1.5755157470703125, "logps/chosen": -140.30148315429688, "logps/rejected": -187.70242309570312, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -3.143270492553711, "rewards/margins": 7.0506672859191895, "rewards/rejected": -10.193938255310059, "step": 1997 }, { "epoch": 3.21, "learning_rate": 3.5760998810939356e-07, "logits/chosen": -1.6571062803268433, "logits/rejected": -1.7408782243728638, "logps/chosen": -119.50012969970703, "logps/rejected": -171.8124542236328, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -2.3525402545928955, "rewards/margins": 5.181175231933594, "rewards/rejected": -7.533716201782227, "step": 1998 }, { "epoch": 3.21, "learning_rate": 3.575108997225525e-07, "logits/chosen": -1.4913583993911743, "logits/rejected": -1.5393571853637695, "logps/chosen": -137.9495086669922, "logps/rejected": -205.7068328857422, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -5.592309951782227, "rewards/margins": 5.664676666259766, "rewards/rejected": -11.256987571716309, "step": 1999 }, { "epoch": 3.21, "learning_rate": 3.574118113357115e-07, "logits/chosen": -1.6846952438354492, "logits/rejected": -1.597804069519043, "logps/chosen": -153.3061981201172, "logps/rejected": -180.0919952392578, "loss": 0.0488, "rewards/accuracies": 0.75, "rewards/chosen": -4.228933811187744, "rewards/margins": 3.2838704586029053, "rewards/rejected": -7.51280403137207, "step": 2000 }, { "epoch": 3.21, "learning_rate": 3.573127229488704e-07, "logits/chosen": -1.4437700510025024, "logits/rejected": -1.4576162099838257, "logps/chosen": -113.46170806884766, "logps/rejected": -228.89572143554688, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.1835033893585205, "rewards/margins": 6.617332935333252, "rewards/rejected": -9.800836563110352, "step": 2001 }, { "epoch": 3.21, "learning_rate": 3.572136345620293e-07, "logits/chosen": -1.7463536262512207, "logits/rejected": -1.7170718908309937, "logps/chosen": -123.6358642578125, "logps/rejected": -190.8805694580078, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -2.3949577808380127, "rewards/margins": 7.933981895446777, "rewards/rejected": -10.328940391540527, "step": 2002 }, { "epoch": 3.22, "learning_rate": 3.5711454617518825e-07, "logits/chosen": -1.539290189743042, "logits/rejected": -1.5693296194076538, "logps/chosen": -116.95975494384766, "logps/rejected": -194.82623291015625, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": -2.3267292976379395, "rewards/margins": 8.51527214050293, "rewards/rejected": -10.842000961303711, "step": 2003 }, { "epoch": 3.22, "learning_rate": 3.570154577883472e-07, "logits/chosen": -1.698521375656128, "logits/rejected": -1.6985746622085571, "logps/chosen": -92.02882385253906, "logps/rejected": -129.2461700439453, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -2.4635863304138184, "rewards/margins": 3.5712997913360596, "rewards/rejected": -6.034885883331299, "step": 2004 }, { "epoch": 3.22, "learning_rate": 3.569163694015061e-07, "logits/chosen": -1.75165855884552, "logits/rejected": -1.7594168186187744, "logps/chosen": -126.77601623535156, "logps/rejected": -177.39732360839844, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -2.361426591873169, "rewards/margins": 5.126023292541504, "rewards/rejected": -7.487449645996094, "step": 2005 }, { "epoch": 3.22, "learning_rate": 3.568172810146651e-07, "logits/chosen": -1.6680991649627686, "logits/rejected": -1.6535958051681519, "logps/chosen": -118.80451965332031, "logps/rejected": -203.21603393554688, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -3.221660852432251, "rewards/margins": 7.459354400634766, "rewards/rejected": -10.681015014648438, "step": 2006 }, { "epoch": 3.22, "learning_rate": 3.56718192627824e-07, "logits/chosen": -1.7694975137710571, "logits/rejected": -1.6701669692993164, "logps/chosen": -129.56613159179688, "logps/rejected": -195.66006469726562, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -4.583385467529297, "rewards/margins": 7.497419834136963, "rewards/rejected": -12.080804824829102, "step": 2007 }, { "epoch": 3.22, "learning_rate": 3.5661910424098294e-07, "logits/chosen": -1.7256627082824707, "logits/rejected": -1.687364101409912, "logps/chosen": -132.4951629638672, "logps/rejected": -167.18020629882812, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -2.7311692237854004, "rewards/margins": 4.674322128295898, "rewards/rejected": -7.405491352081299, "step": 2008 }, { "epoch": 3.22, "learning_rate": 3.5652001585414185e-07, "logits/chosen": -1.6575243473052979, "logits/rejected": -1.675766944885254, "logps/chosen": -87.92431640625, "logps/rejected": -159.44053649902344, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.9726520776748657, "rewards/margins": 6.444147109985352, "rewards/rejected": -7.416799545288086, "step": 2009 }, { "epoch": 3.23, "learning_rate": 3.564209274673008e-07, "logits/chosen": -1.6902023553848267, "logits/rejected": -1.6616640090942383, "logps/chosen": -123.70252227783203, "logps/rejected": -195.3353271484375, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.8088459968566895, "rewards/margins": 6.850536346435547, "rewards/rejected": -9.659382820129395, "step": 2010 }, { "epoch": 3.23, "learning_rate": 3.5632183908045977e-07, "logits/chosen": -1.7516603469848633, "logits/rejected": -1.7181164026260376, "logps/chosen": -96.66256713867188, "logps/rejected": -182.65757751464844, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": -1.6014363765716553, "rewards/margins": 8.602636337280273, "rewards/rejected": -10.204072952270508, "step": 2011 }, { "epoch": 3.23, "learning_rate": 3.562227506936187e-07, "logits/chosen": -1.7618237733840942, "logits/rejected": -1.7404905557632446, "logps/chosen": -116.84169006347656, "logps/rejected": -170.95925903320312, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -2.472209930419922, "rewards/margins": 5.421300411224365, "rewards/rejected": -7.893510341644287, "step": 2012 }, { "epoch": 3.23, "learning_rate": 3.5612366230677764e-07, "logits/chosen": -1.8653661012649536, "logits/rejected": -1.8255833387374878, "logps/chosen": -124.89971923828125, "logps/rejected": -186.58786010742188, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -3.7147672176361084, "rewards/margins": 5.521827220916748, "rewards/rejected": -9.236594200134277, "step": 2013 }, { "epoch": 3.23, "learning_rate": 3.5602457391993654e-07, "logits/chosen": -1.733572244644165, "logits/rejected": -1.725052833557129, "logps/chosen": -105.62425231933594, "logps/rejected": -181.5944366455078, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -2.207209825515747, "rewards/margins": 7.438213348388672, "rewards/rejected": -9.645423889160156, "step": 2014 }, { "epoch": 3.23, "learning_rate": 3.559254855330955e-07, "logits/chosen": -1.714752197265625, "logits/rejected": -1.6862417459487915, "logps/chosen": -136.73931884765625, "logps/rejected": -188.9892578125, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -4.014031410217285, "rewards/margins": 6.40467643737793, "rewards/rejected": -10.418707847595215, "step": 2015 }, { "epoch": 3.24, "learning_rate": 3.5582639714625446e-07, "logits/chosen": -1.7709604501724243, "logits/rejected": -1.74626624584198, "logps/chosen": -123.44064331054688, "logps/rejected": -195.44447326660156, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0272455215454102, "rewards/margins": 9.278097152709961, "rewards/rejected": -10.305341720581055, "step": 2016 }, { "epoch": 3.24, "learning_rate": 3.5572730875941337e-07, "logits/chosen": -1.5063961744308472, "logits/rejected": -1.6065428256988525, "logps/chosen": -84.97294616699219, "logps/rejected": -193.00779724121094, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -1.5014123916625977, "rewards/margins": 7.171283721923828, "rewards/rejected": -8.67269515991211, "step": 2017 }, { "epoch": 3.24, "learning_rate": 3.5562822037257233e-07, "logits/chosen": -1.7142449617385864, "logits/rejected": -1.572502613067627, "logps/chosen": -103.47219848632812, "logps/rejected": -141.6439666748047, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -1.7490415573120117, "rewards/margins": 4.820174217224121, "rewards/rejected": -6.569215774536133, "step": 2018 }, { "epoch": 3.24, "learning_rate": 3.5552913198573124e-07, "logits/chosen": -1.6510255336761475, "logits/rejected": -1.6544229984283447, "logps/chosen": -93.62248992919922, "logps/rejected": -196.1938018798828, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -1.779899001121521, "rewards/margins": 8.541215896606445, "rewards/rejected": -10.321115493774414, "step": 2019 }, { "epoch": 3.24, "learning_rate": 3.554300435988902e-07, "logits/chosen": -1.5723965167999268, "logits/rejected": -1.5600991249084473, "logps/chosen": -130.86013793945312, "logps/rejected": -167.15878295898438, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -3.158167839050293, "rewards/margins": 4.35741662979126, "rewards/rejected": -7.5155839920043945, "step": 2020 }, { "epoch": 3.24, "learning_rate": 3.5533095521204916e-07, "logits/chosen": -1.7211904525756836, "logits/rejected": -1.6674188375473022, "logps/chosen": -103.51260375976562, "logps/rejected": -153.24549865722656, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -2.587942361831665, "rewards/margins": 4.041879653930664, "rewards/rejected": -6.62982177734375, "step": 2021 }, { "epoch": 3.25, "learning_rate": 3.5523186682520806e-07, "logits/chosen": -1.8160046339035034, "logits/rejected": -1.8378138542175293, "logps/chosen": -110.88774108886719, "logps/rejected": -141.93838500976562, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -1.352081537246704, "rewards/margins": 4.052038192749023, "rewards/rejected": -5.404119491577148, "step": 2022 }, { "epoch": 3.25, "learning_rate": 3.55132778438367e-07, "logits/chosen": -1.6357476711273193, "logits/rejected": -1.7729158401489258, "logps/chosen": -90.22354125976562, "logps/rejected": -144.4239959716797, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -2.064680576324463, "rewards/margins": 5.501731872558594, "rewards/rejected": -7.566412925720215, "step": 2023 }, { "epoch": 3.25, "learning_rate": 3.5503369005152593e-07, "logits/chosen": -1.7298825979232788, "logits/rejected": -1.7840473651885986, "logps/chosen": -98.15716552734375, "logps/rejected": -202.7715301513672, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.7900609970092773, "rewards/margins": 8.75582504272461, "rewards/rejected": -10.545886039733887, "step": 2024 }, { "epoch": 3.25, "learning_rate": 3.549346016646849e-07, "logits/chosen": -1.6559823751449585, "logits/rejected": -1.631261944770813, "logps/chosen": -68.30438995361328, "logps/rejected": -154.02804565429688, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -0.9928219318389893, "rewards/margins": 7.781617164611816, "rewards/rejected": -8.774438858032227, "step": 2025 }, { "epoch": 3.25, "learning_rate": 3.5483551327784385e-07, "logits/chosen": -1.6812584400177002, "logits/rejected": -1.660265326499939, "logps/chosen": -138.43495178222656, "logps/rejected": -183.21591186523438, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -3.618306875228882, "rewards/margins": 5.05946159362793, "rewards/rejected": -8.67776870727539, "step": 2026 }, { "epoch": 3.25, "learning_rate": 3.5473642489100276e-07, "logits/chosen": -1.690529704093933, "logits/rejected": -1.6533970832824707, "logps/chosen": -116.55496215820312, "logps/rejected": -195.35899353027344, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -4.213809967041016, "rewards/margins": 7.62592887878418, "rewards/rejected": -11.839737892150879, "step": 2027 }, { "epoch": 3.26, "learning_rate": 3.546373365041617e-07, "logits/chosen": -1.5723062753677368, "logits/rejected": -1.6559556722640991, "logps/chosen": -84.3291015625, "logps/rejected": -188.81622314453125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.4844000339508057, "rewards/margins": 9.447090148925781, "rewards/rejected": -10.931489944458008, "step": 2028 }, { "epoch": 3.26, "learning_rate": 3.545382481173206e-07, "logits/chosen": -1.584083080291748, "logits/rejected": -1.6116832494735718, "logps/chosen": -107.50037384033203, "logps/rejected": -186.1376495361328, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": -2.5716426372528076, "rewards/margins": 7.57182502746582, "rewards/rejected": -10.143467903137207, "step": 2029 }, { "epoch": 3.26, "learning_rate": 3.5443915973047953e-07, "logits/chosen": -1.5774288177490234, "logits/rejected": -1.6623879671096802, "logps/chosen": -112.900390625, "logps/rejected": -195.7937774658203, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -3.689795970916748, "rewards/margins": 6.9362382888793945, "rewards/rejected": -10.626033782958984, "step": 2030 }, { "epoch": 3.26, "learning_rate": 3.5434007134363854e-07, "logits/chosen": -1.645391821861267, "logits/rejected": -1.7327213287353516, "logps/chosen": -124.74850463867188, "logps/rejected": -217.0048828125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -3.2198710441589355, "rewards/margins": 7.360556602478027, "rewards/rejected": -10.580427169799805, "step": 2031 }, { "epoch": 3.26, "learning_rate": 3.5424098295679745e-07, "logits/chosen": -1.651674509048462, "logits/rejected": -1.7258096933364868, "logps/chosen": -110.44580841064453, "logps/rejected": -179.6291961669922, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -2.8107266426086426, "rewards/margins": 4.630771636962891, "rewards/rejected": -7.441497802734375, "step": 2032 }, { "epoch": 3.26, "learning_rate": 3.541418945699564e-07, "logits/chosen": -1.7522649765014648, "logits/rejected": -1.744690179824829, "logps/chosen": -124.28913879394531, "logps/rejected": -196.3479766845703, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -3.483734130859375, "rewards/margins": 6.694919586181641, "rewards/rejected": -10.178653717041016, "step": 2033 }, { "epoch": 3.26, "learning_rate": 3.540428061831153e-07, "logits/chosen": -1.6912932395935059, "logits/rejected": -1.7010061740875244, "logps/chosen": -99.73137664794922, "logps/rejected": -167.2801513671875, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -2.113145351409912, "rewards/margins": 7.037293434143066, "rewards/rejected": -9.150439262390137, "step": 2034 }, { "epoch": 3.27, "learning_rate": 3.539437177962742e-07, "logits/chosen": -1.5355944633483887, "logits/rejected": -1.6451241970062256, "logps/chosen": -73.27796173095703, "logps/rejected": -204.64828491210938, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.1976823806762695, "rewards/margins": 10.020912170410156, "rewards/rejected": -11.218594551086426, "step": 2035 }, { "epoch": 3.27, "learning_rate": 3.5384462940943323e-07, "logits/chosen": -1.619011402130127, "logits/rejected": -1.646573781967163, "logps/chosen": -122.8797607421875, "logps/rejected": -204.96339416503906, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -3.8439865112304688, "rewards/margins": 6.5051703453063965, "rewards/rejected": -10.349156379699707, "step": 2036 }, { "epoch": 3.27, "learning_rate": 3.5374554102259214e-07, "logits/chosen": -1.7068995237350464, "logits/rejected": -1.7333273887634277, "logps/chosen": -121.65911865234375, "logps/rejected": -183.50997924804688, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -2.2939860820770264, "rewards/margins": 6.232802391052246, "rewards/rejected": -8.526788711547852, "step": 2037 }, { "epoch": 3.27, "learning_rate": 3.5364645263575105e-07, "logits/chosen": -1.7073216438293457, "logits/rejected": -1.6960726976394653, "logps/chosen": -98.9232177734375, "logps/rejected": -140.88519287109375, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -2.3781697750091553, "rewards/margins": 4.691478252410889, "rewards/rejected": -7.069647789001465, "step": 2038 }, { "epoch": 3.27, "learning_rate": 3.5354736424891e-07, "logits/chosen": -1.5573463439941406, "logits/rejected": -1.5077707767486572, "logps/chosen": -79.8752670288086, "logps/rejected": -148.6927032470703, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2021527290344238, "rewards/margins": 8.196165084838867, "rewards/rejected": -9.39831829071045, "step": 2039 }, { "epoch": 3.27, "learning_rate": 3.534482758620689e-07, "logits/chosen": -1.7919373512268066, "logits/rejected": -1.7153916358947754, "logps/chosen": -96.89486694335938, "logps/rejected": -167.23121643066406, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -0.8974096179008484, "rewards/margins": 7.23689079284668, "rewards/rejected": -8.134300231933594, "step": 2040 }, { "epoch": 3.28, "learning_rate": 3.5334918747522793e-07, "logits/chosen": -1.6336089372634888, "logits/rejected": -1.7141127586364746, "logps/chosen": -131.75634765625, "logps/rejected": -195.8248291015625, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -3.5211052894592285, "rewards/margins": 5.545111656188965, "rewards/rejected": -9.066216468811035, "step": 2041 }, { "epoch": 3.28, "learning_rate": 3.5325009908838683e-07, "logits/chosen": -1.7816723585128784, "logits/rejected": -1.6934444904327393, "logps/chosen": -138.10914611816406, "logps/rejected": -190.49066162109375, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -3.6673169136047363, "rewards/margins": 6.743302345275879, "rewards/rejected": -10.410619735717773, "step": 2042 }, { "epoch": 3.28, "learning_rate": 3.5315101070154574e-07, "logits/chosen": -1.7043250799179077, "logits/rejected": -1.696839451789856, "logps/chosen": -125.66586303710938, "logps/rejected": -178.55821228027344, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": -2.8805248737335205, "rewards/margins": 4.60069465637207, "rewards/rejected": -7.48121976852417, "step": 2043 }, { "epoch": 3.28, "learning_rate": 3.530519223147047e-07, "logits/chosen": -1.632690191268921, "logits/rejected": -1.6675313711166382, "logps/chosen": -96.94892883300781, "logps/rejected": -198.15162658691406, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.426142692565918, "rewards/margins": 7.072009086608887, "rewards/rejected": -8.498151779174805, "step": 2044 }, { "epoch": 3.28, "learning_rate": 3.529528339278636e-07, "logits/chosen": -1.8036823272705078, "logits/rejected": -1.6965363025665283, "logps/chosen": -89.57609558105469, "logps/rejected": -133.85401916503906, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.8678112030029297, "rewards/margins": 5.78618860244751, "rewards/rejected": -6.654000282287598, "step": 2045 }, { "epoch": 3.28, "learning_rate": 3.528537455410226e-07, "logits/chosen": -1.5618705749511719, "logits/rejected": -1.580300211906433, "logps/chosen": -85.90895080566406, "logps/rejected": -152.3944854736328, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -2.465034008026123, "rewards/margins": 5.579839706420898, "rewards/rejected": -8.044873237609863, "step": 2046 }, { "epoch": 3.29, "learning_rate": 3.5275465715418153e-07, "logits/chosen": -1.5931611061096191, "logits/rejected": -1.6863532066345215, "logps/chosen": -101.43183898925781, "logps/rejected": -163.30418395996094, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -2.275968074798584, "rewards/margins": 6.54271936416626, "rewards/rejected": -8.818687438964844, "step": 2047 }, { "epoch": 3.29, "learning_rate": 3.5265556876734043e-07, "logits/chosen": -1.661088228225708, "logits/rejected": -1.6374977827072144, "logps/chosen": -118.02924346923828, "logps/rejected": -200.5345458984375, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -3.752305030822754, "rewards/margins": 8.292831420898438, "rewards/rejected": -12.045136451721191, "step": 2048 }, { "epoch": 3.29, "learning_rate": 3.525564803804994e-07, "logits/chosen": -1.5887510776519775, "logits/rejected": -1.6357126235961914, "logps/chosen": -85.42509460449219, "logps/rejected": -175.87744140625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.4510061740875244, "rewards/margins": 7.103634357452393, "rewards/rejected": -8.554640769958496, "step": 2049 }, { "epoch": 3.29, "learning_rate": 3.524573919936583e-07, "logits/chosen": -1.7295176982879639, "logits/rejected": -1.6733283996582031, "logps/chosen": -81.71739196777344, "logps/rejected": -183.78436279296875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.04011097550392151, "rewards/margins": 10.382695198059082, "rewards/rejected": -10.422805786132812, "step": 2050 }, { "epoch": 3.29, "learning_rate": 3.5235830360681726e-07, "logits/chosen": -1.7350940704345703, "logits/rejected": -1.6993513107299805, "logps/chosen": -114.74260711669922, "logps/rejected": -195.00222778320312, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -2.893986701965332, "rewards/margins": 8.711647987365723, "rewards/rejected": -11.605634689331055, "step": 2051 }, { "epoch": 3.29, "learning_rate": 3.522592152199762e-07, "logits/chosen": -1.8325505256652832, "logits/rejected": -1.7374728918075562, "logps/chosen": -134.5269775390625, "logps/rejected": -184.96864318847656, "loss": 0.1428, "rewards/accuracies": 1.0, "rewards/chosen": -3.158799648284912, "rewards/margins": 8.088876724243164, "rewards/rejected": -11.247675895690918, "step": 2052 }, { "epoch": 3.3, "learning_rate": 3.5216012683313513e-07, "logits/chosen": -1.6940503120422363, "logits/rejected": -1.7987241744995117, "logps/chosen": -97.38296508789062, "logps/rejected": -199.442626953125, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -2.4544386863708496, "rewards/margins": 7.868283748626709, "rewards/rejected": -10.322722434997559, "step": 2053 }, { "epoch": 3.3, "learning_rate": 3.520610384462941e-07, "logits/chosen": -1.6412770748138428, "logits/rejected": -1.7698283195495605, "logps/chosen": -84.8344497680664, "logps/rejected": -170.0762176513672, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -2.9975993633270264, "rewards/margins": 6.987544059753418, "rewards/rejected": -9.985143661499023, "step": 2054 }, { "epoch": 3.3, "learning_rate": 3.51961950059453e-07, "logits/chosen": -1.6144872903823853, "logits/rejected": -1.6565890312194824, "logps/chosen": -95.73251342773438, "logps/rejected": -153.00527954101562, "loss": 0.1312, "rewards/accuracies": 1.0, "rewards/chosen": -2.9153337478637695, "rewards/margins": 4.173851013183594, "rewards/rejected": -7.089184761047363, "step": 2055 }, { "epoch": 3.3, "learning_rate": 3.5186286167261195e-07, "logits/chosen": -1.5612393617630005, "logits/rejected": -1.4966504573822021, "logps/chosen": -142.116943359375, "logps/rejected": -194.21397399902344, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -5.988341808319092, "rewards/margins": 6.339868068695068, "rewards/rejected": -12.32820987701416, "step": 2056 }, { "epoch": 3.3, "learning_rate": 3.517637732857709e-07, "logits/chosen": -1.5660104751586914, "logits/rejected": -1.6873427629470825, "logps/chosen": -114.38330078125, "logps/rejected": -203.99484252929688, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.250114679336548, "rewards/margins": 7.195102691650391, "rewards/rejected": -9.44521713256836, "step": 2057 }, { "epoch": 3.3, "learning_rate": 3.516646848989298e-07, "logits/chosen": -1.7787103652954102, "logits/rejected": -1.6974260807037354, "logps/chosen": -112.34767150878906, "logps/rejected": -176.05419921875, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -3.31773042678833, "rewards/margins": 6.7927045822143555, "rewards/rejected": -10.110435485839844, "step": 2058 }, { "epoch": 3.3, "learning_rate": 3.515655965120888e-07, "logits/chosen": -1.6437506675720215, "logits/rejected": -1.699434757232666, "logps/chosen": -131.1387176513672, "logps/rejected": -206.95291137695312, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -2.9130077362060547, "rewards/margins": 7.901460647583008, "rewards/rejected": -10.814468383789062, "step": 2059 }, { "epoch": 3.31, "learning_rate": 3.514665081252477e-07, "logits/chosen": -1.7752704620361328, "logits/rejected": -1.8196146488189697, "logps/chosen": -135.15318298339844, "logps/rejected": -199.51034545898438, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -3.368276834487915, "rewards/margins": 6.969776153564453, "rewards/rejected": -10.338052749633789, "step": 2060 }, { "epoch": 3.31, "learning_rate": 3.5136741973840665e-07, "logits/chosen": -1.5261967182159424, "logits/rejected": -1.5570764541625977, "logps/chosen": -97.02388000488281, "logps/rejected": -178.44711303710938, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -3.1986265182495117, "rewards/margins": 6.216625213623047, "rewards/rejected": -9.415250778198242, "step": 2061 }, { "epoch": 3.31, "learning_rate": 3.512683313515656e-07, "logits/chosen": -1.6163796186447144, "logits/rejected": -1.5798059701919556, "logps/chosen": -112.79469299316406, "logps/rejected": -176.45101928710938, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -3.4859495162963867, "rewards/margins": 7.309941291809082, "rewards/rejected": -10.795890808105469, "step": 2062 }, { "epoch": 3.31, "learning_rate": 3.511692429647245e-07, "logits/chosen": -1.7428224086761475, "logits/rejected": -1.6137886047363281, "logps/chosen": -147.28561401367188, "logps/rejected": -182.53680419921875, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -3.783318042755127, "rewards/margins": 4.728327751159668, "rewards/rejected": -8.511646270751953, "step": 2063 }, { "epoch": 3.31, "learning_rate": 3.5107015457788347e-07, "logits/chosen": -1.6979378461837769, "logits/rejected": -1.7455346584320068, "logps/chosen": -124.53958129882812, "logps/rejected": -198.02040100097656, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -4.078638553619385, "rewards/margins": 5.82305908203125, "rewards/rejected": -9.901698112487793, "step": 2064 }, { "epoch": 3.31, "learning_rate": 3.509710661910424e-07, "logits/chosen": -1.5711442232131958, "logits/rejected": -1.5496519804000854, "logps/chosen": -104.2340087890625, "logps/rejected": -185.98040771484375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -3.1871848106384277, "rewards/margins": 7.526273250579834, "rewards/rejected": -10.713458061218262, "step": 2065 }, { "epoch": 3.32, "learning_rate": 3.5087197780420134e-07, "logits/chosen": -1.5481290817260742, "logits/rejected": -1.6790692806243896, "logps/chosen": -102.08615112304688, "logps/rejected": -188.74856567382812, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -2.82808256149292, "rewards/margins": 6.767024993896484, "rewards/rejected": -9.595108032226562, "step": 2066 }, { "epoch": 3.32, "learning_rate": 3.507728894173603e-07, "logits/chosen": -1.4970184564590454, "logits/rejected": -1.594588279724121, "logps/chosen": -95.92796325683594, "logps/rejected": -202.95010375976562, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -3.183837890625, "rewards/margins": 6.93454647064209, "rewards/rejected": -10.11838436126709, "step": 2067 }, { "epoch": 3.32, "learning_rate": 3.506738010305192e-07, "logits/chosen": -1.615171194076538, "logits/rejected": -1.5649290084838867, "logps/chosen": -161.8868408203125, "logps/rejected": -240.80953979492188, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -7.24836540222168, "rewards/margins": 6.343355178833008, "rewards/rejected": -13.591720581054688, "step": 2068 }, { "epoch": 3.32, "learning_rate": 3.5057471264367817e-07, "logits/chosen": -1.659646987915039, "logits/rejected": -1.6686872243881226, "logps/chosen": -142.30947875976562, "logps/rejected": -213.208740234375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -4.487746238708496, "rewards/margins": 7.394679069519043, "rewards/rejected": -11.882425308227539, "step": 2069 }, { "epoch": 3.32, "learning_rate": 3.5047562425683707e-07, "logits/chosen": -1.4991090297698975, "logits/rejected": -1.5452336072921753, "logps/chosen": -146.97239685058594, "logps/rejected": -198.33534240722656, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -4.92130708694458, "rewards/margins": 5.071410179138184, "rewards/rejected": -9.992717742919922, "step": 2070 }, { "epoch": 3.32, "learning_rate": 3.5037653586999603e-07, "logits/chosen": -1.6591300964355469, "logits/rejected": -1.6487138271331787, "logps/chosen": -104.8362045288086, "logps/rejected": -162.5994873046875, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -3.3694262504577637, "rewards/margins": 6.500641822814941, "rewards/rejected": -9.870068550109863, "step": 2071 }, { "epoch": 3.33, "learning_rate": 3.5027744748315494e-07, "logits/chosen": -1.7570536136627197, "logits/rejected": -1.7994208335876465, "logps/chosen": -125.41072845458984, "logps/rejected": -184.57676696777344, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -3.7761731147766113, "rewards/margins": 6.0106329917907715, "rewards/rejected": -9.7868070602417, "step": 2072 }, { "epoch": 3.33, "learning_rate": 3.501783590963139e-07, "logits/chosen": -1.6470446586608887, "logits/rejected": -1.6384143829345703, "logps/chosen": -147.060302734375, "logps/rejected": -202.508544921875, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -3.721890449523926, "rewards/margins": 6.80593204498291, "rewards/rejected": -10.527823448181152, "step": 2073 }, { "epoch": 3.33, "learning_rate": 3.5007927070947286e-07, "logits/chosen": -1.721962809562683, "logits/rejected": -1.8580856323242188, "logps/chosen": -134.58436584472656, "logps/rejected": -221.06863403320312, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -4.479290962219238, "rewards/margins": 6.382335186004639, "rewards/rejected": -10.861625671386719, "step": 2074 }, { "epoch": 3.33, "learning_rate": 3.4998018232263177e-07, "logits/chosen": -1.5944583415985107, "logits/rejected": -1.6397796869277954, "logps/chosen": -110.708984375, "logps/rejected": -185.36746215820312, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -2.9464404582977295, "rewards/margins": 6.239197731018066, "rewards/rejected": -9.185638427734375, "step": 2075 }, { "epoch": 3.33, "learning_rate": 3.4988109393579067e-07, "logits/chosen": -1.6801296472549438, "logits/rejected": -1.7167810201644897, "logps/chosen": -123.9905776977539, "logps/rejected": -195.42466735839844, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -4.2716193199157715, "rewards/margins": 7.981805324554443, "rewards/rejected": -12.253424644470215, "step": 2076 }, { "epoch": 3.33, "learning_rate": 3.4978200554894963e-07, "logits/chosen": -1.6201092004776, "logits/rejected": -1.60984468460083, "logps/chosen": -148.92037963867188, "logps/rejected": -210.60067749023438, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -4.497320175170898, "rewards/margins": 6.2794036865234375, "rewards/rejected": -10.776723861694336, "step": 2077 }, { "epoch": 3.34, "learning_rate": 3.496829171621086e-07, "logits/chosen": -1.7650374174118042, "logits/rejected": -1.5757511854171753, "logps/chosen": -163.24435424804688, "logps/rejected": -212.4429931640625, "loss": 0.1855, "rewards/accuracies": 1.0, "rewards/chosen": -5.880949020385742, "rewards/margins": 6.788045883178711, "rewards/rejected": -12.668994903564453, "step": 2078 }, { "epoch": 3.34, "learning_rate": 3.4958382877526755e-07, "logits/chosen": -1.6514322757720947, "logits/rejected": -1.6147501468658447, "logps/chosen": -125.0506591796875, "logps/rejected": -193.29022216796875, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -4.056339263916016, "rewards/margins": 6.6867451667785645, "rewards/rejected": -10.743084907531738, "step": 2079 }, { "epoch": 3.34, "learning_rate": 3.4948474038842646e-07, "logits/chosen": -1.6074570417404175, "logits/rejected": -1.5741498470306396, "logps/chosen": -84.68325805664062, "logps/rejected": -184.94464111328125, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.3913419246673584, "rewards/margins": 9.007447242736816, "rewards/rejected": -10.398789405822754, "step": 2080 }, { "epoch": 3.34, "learning_rate": 3.4938565200158537e-07, "logits/chosen": -1.7041853666305542, "logits/rejected": -1.6737600564956665, "logps/chosen": -119.42922973632812, "logps/rejected": -149.07240295410156, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -4.012264728546143, "rewards/margins": 3.462195634841919, "rewards/rejected": -7.474460124969482, "step": 2081 }, { "epoch": 3.34, "learning_rate": 3.492865636147443e-07, "logits/chosen": -1.4731776714324951, "logits/rejected": -1.544046401977539, "logps/chosen": -110.99830627441406, "logps/rejected": -197.74563598632812, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -3.954432964324951, "rewards/margins": 6.247214317321777, "rewards/rejected": -10.201647758483887, "step": 2082 }, { "epoch": 3.34, "learning_rate": 3.491874752279033e-07, "logits/chosen": -1.5051703453063965, "logits/rejected": -1.4734195470809937, "logps/chosen": -119.0909423828125, "logps/rejected": -207.31082153320312, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -4.827057838439941, "rewards/margins": 8.716464042663574, "rewards/rejected": -13.543521881103516, "step": 2083 }, { "epoch": 3.35, "learning_rate": 3.4908838684106224e-07, "logits/chosen": -1.633575677871704, "logits/rejected": -1.7270586490631104, "logps/chosen": -133.06439208984375, "logps/rejected": -203.20111083984375, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -3.6094703674316406, "rewards/margins": 7.152858257293701, "rewards/rejected": -10.7623291015625, "step": 2084 }, { "epoch": 3.35, "learning_rate": 3.4898929845422115e-07, "logits/chosen": -1.7078940868377686, "logits/rejected": -1.7678699493408203, "logps/chosen": -101.42133331298828, "logps/rejected": -194.13467407226562, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.351909875869751, "rewards/margins": 7.795727252960205, "rewards/rejected": -10.147636413574219, "step": 2085 }, { "epoch": 3.35, "learning_rate": 3.4889021006738006e-07, "logits/chosen": -1.5811543464660645, "logits/rejected": -1.719336986541748, "logps/chosen": -116.04046630859375, "logps/rejected": -228.47386169433594, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -3.0201780796051025, "rewards/margins": 7.708017826080322, "rewards/rejected": -10.728196144104004, "step": 2086 }, { "epoch": 3.35, "learning_rate": 3.48791121680539e-07, "logits/chosen": -1.7652583122253418, "logits/rejected": -1.8926763534545898, "logps/chosen": -80.40861511230469, "logps/rejected": -195.84689331054688, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -1.8227264881134033, "rewards/margins": 10.153970718383789, "rewards/rejected": -11.97669792175293, "step": 2087 }, { "epoch": 3.35, "learning_rate": 3.48692033293698e-07, "logits/chosen": -1.7359036207199097, "logits/rejected": -1.6724848747253418, "logps/chosen": -97.54639434814453, "logps/rejected": -190.151123046875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.19036602973938, "rewards/margins": 8.324312210083008, "rewards/rejected": -10.514678955078125, "step": 2088 }, { "epoch": 3.35, "learning_rate": 3.4859294490685694e-07, "logits/chosen": -1.6203641891479492, "logits/rejected": -1.6291577816009521, "logps/chosen": -77.82333374023438, "logps/rejected": -155.30172729492188, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -1.0014326572418213, "rewards/margins": 6.2169342041015625, "rewards/rejected": -7.218366622924805, "step": 2089 }, { "epoch": 3.35, "learning_rate": 3.4849385652001584e-07, "logits/chosen": -1.6725049018859863, "logits/rejected": -1.7031389474868774, "logps/chosen": -104.24005126953125, "logps/rejected": -203.46922302246094, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -2.535928249359131, "rewards/margins": 8.983169555664062, "rewards/rejected": -11.519098281860352, "step": 2090 }, { "epoch": 3.36, "learning_rate": 3.4839476813317475e-07, "logits/chosen": -1.7080307006835938, "logits/rejected": -1.7406466007232666, "logps/chosen": -137.87539672851562, "logps/rejected": -216.49749755859375, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -5.034592151641846, "rewards/margins": 7.497150897979736, "rewards/rejected": -12.531743049621582, "step": 2091 }, { "epoch": 3.36, "learning_rate": 3.482956797463337e-07, "logits/chosen": -1.5447242259979248, "logits/rejected": -1.5915106534957886, "logps/chosen": -99.11773681640625, "logps/rejected": -172.39376831054688, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.975186347961426, "rewards/margins": 7.174497127532959, "rewards/rejected": -10.149683952331543, "step": 2092 }, { "epoch": 3.36, "learning_rate": 3.481965913594926e-07, "logits/chosen": -1.6917927265167236, "logits/rejected": -1.6393178701400757, "logps/chosen": -118.53850555419922, "logps/rejected": -191.73538208007812, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -2.934405565261841, "rewards/margins": 8.804667472839355, "rewards/rejected": -11.739073753356934, "step": 2093 }, { "epoch": 3.36, "learning_rate": 3.4809750297265163e-07, "logits/chosen": -1.5695432424545288, "logits/rejected": -1.4879179000854492, "logps/chosen": -142.67584228515625, "logps/rejected": -213.89813232421875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -4.528069972991943, "rewards/margins": 7.256927967071533, "rewards/rejected": -11.784997940063477, "step": 2094 }, { "epoch": 3.36, "learning_rate": 3.4799841458581054e-07, "logits/chosen": -1.6946247816085815, "logits/rejected": -1.6985349655151367, "logps/chosen": -139.14996337890625, "logps/rejected": -219.64065551757812, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -3.2215428352355957, "rewards/margins": 7.263465404510498, "rewards/rejected": -10.485008239746094, "step": 2095 }, { "epoch": 3.36, "learning_rate": 3.4789932619896944e-07, "logits/chosen": -1.7050718069076538, "logits/rejected": -1.6117172241210938, "logps/chosen": -135.0051727294922, "logps/rejected": -194.73324584960938, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -3.129521369934082, "rewards/margins": 7.611749649047852, "rewards/rejected": -10.741271018981934, "step": 2096 }, { "epoch": 3.37, "learning_rate": 3.478002378121284e-07, "logits/chosen": -1.8147413730621338, "logits/rejected": -1.839887261390686, "logps/chosen": -119.82239532470703, "logps/rejected": -188.00216674804688, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -3.7494897842407227, "rewards/margins": 6.676491737365723, "rewards/rejected": -10.425981521606445, "step": 2097 }, { "epoch": 3.37, "learning_rate": 3.477011494252873e-07, "logits/chosen": -1.6380187273025513, "logits/rejected": -1.5468883514404297, "logps/chosen": -123.74588775634766, "logps/rejected": -183.67068481445312, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -2.301665782928467, "rewards/margins": 9.571096420288086, "rewards/rejected": -11.872761726379395, "step": 2098 }, { "epoch": 3.37, "learning_rate": 3.476020610384463e-07, "logits/chosen": -1.5602937936782837, "logits/rejected": -1.6781938076019287, "logps/chosen": -75.20357513427734, "logps/rejected": -211.9456329345703, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.9074466228485107, "rewards/margins": 9.166354179382324, "rewards/rejected": -11.073801040649414, "step": 2099 }, { "epoch": 3.37, "learning_rate": 3.4750297265160523e-07, "logits/chosen": -1.6872003078460693, "logits/rejected": -1.7494442462921143, "logps/chosen": -96.90390014648438, "logps/rejected": -193.1634521484375, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -2.773188591003418, "rewards/margins": 5.21782112121582, "rewards/rejected": -7.991009712219238, "step": 2100 }, { "epoch": 3.37, "learning_rate": 3.4740388426476414e-07, "logits/chosen": -1.735888957977295, "logits/rejected": -1.77731454372406, "logps/chosen": -111.6415023803711, "logps/rejected": -202.2088623046875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -2.4343886375427246, "rewards/margins": 9.205720901489258, "rewards/rejected": -11.64011001586914, "step": 2101 }, { "epoch": 3.37, "learning_rate": 3.473047958779231e-07, "logits/chosen": -1.6401386260986328, "logits/rejected": -1.64959716796875, "logps/chosen": -116.19493865966797, "logps/rejected": -205.484619140625, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -3.709129571914673, "rewards/margins": 8.487519264221191, "rewards/rejected": -12.196647644042969, "step": 2102 }, { "epoch": 3.38, "learning_rate": 3.47205707491082e-07, "logits/chosen": -1.7416654825210571, "logits/rejected": -1.7254853248596191, "logps/chosen": -120.11166381835938, "logps/rejected": -190.72552490234375, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -1.7783774137496948, "rewards/margins": 8.362340927124023, "rewards/rejected": -10.140718460083008, "step": 2103 }, { "epoch": 3.38, "learning_rate": 3.47106619104241e-07, "logits/chosen": -1.6619935035705566, "logits/rejected": -1.6697955131530762, "logps/chosen": -137.00656127929688, "logps/rejected": -199.46624755859375, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -3.244231939315796, "rewards/margins": 5.712350845336914, "rewards/rejected": -8.956583023071289, "step": 2104 }, { "epoch": 3.38, "learning_rate": 3.470075307173999e-07, "logits/chosen": -1.5250502824783325, "logits/rejected": -1.5787010192871094, "logps/chosen": -83.69013977050781, "logps/rejected": -198.79531860351562, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -2.0251948833465576, "rewards/margins": 9.062454223632812, "rewards/rejected": -11.087648391723633, "step": 2105 }, { "epoch": 3.38, "learning_rate": 3.4690844233055883e-07, "logits/chosen": -1.5914771556854248, "logits/rejected": -1.5557430982589722, "logps/chosen": -98.78699493408203, "logps/rejected": -163.16671752929688, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.5047211647033691, "rewards/margins": 6.872321128845215, "rewards/rejected": -8.377042770385742, "step": 2106 }, { "epoch": 3.38, "learning_rate": 3.468093539437178e-07, "logits/chosen": -1.7883110046386719, "logits/rejected": -1.8109058141708374, "logps/chosen": -104.06839752197266, "logps/rejected": -201.0550994873047, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.9065914154052734, "rewards/margins": 7.728572845458984, "rewards/rejected": -10.635164260864258, "step": 2107 }, { "epoch": 3.38, "learning_rate": 3.467102655568767e-07, "logits/chosen": -1.7161736488342285, "logits/rejected": -1.7161662578582764, "logps/chosen": -111.885498046875, "logps/rejected": -184.13526916503906, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.637997031211853, "rewards/margins": 7.011697769165039, "rewards/rejected": -8.64969539642334, "step": 2108 }, { "epoch": 3.39, "learning_rate": 3.466111771700356e-07, "logits/chosen": -1.6591455936431885, "logits/rejected": -1.6691287755966187, "logps/chosen": -135.85447692871094, "logps/rejected": -191.85086059570312, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.9727344512939453, "rewards/margins": 5.6169891357421875, "rewards/rejected": -8.589723587036133, "step": 2109 }, { "epoch": 3.39, "learning_rate": 3.465120887831946e-07, "logits/chosen": -1.702610731124878, "logits/rejected": -1.603570818901062, "logps/chosen": -93.38331604003906, "logps/rejected": -196.10940551757812, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.3597171306610107, "rewards/margins": 9.624948501586914, "rewards/rejected": -10.984665870666504, "step": 2110 }, { "epoch": 3.39, "learning_rate": 3.464130003963535e-07, "logits/chosen": -1.5169848203659058, "logits/rejected": -1.5099804401397705, "logps/chosen": -130.1094970703125, "logps/rejected": -196.49183654785156, "loss": 0.1969, "rewards/accuracies": 1.0, "rewards/chosen": -3.409634828567505, "rewards/margins": 6.069133758544922, "rewards/rejected": -9.478768348693848, "step": 2111 }, { "epoch": 3.39, "learning_rate": 3.463139120095125e-07, "logits/chosen": -1.6773943901062012, "logits/rejected": -1.6937811374664307, "logps/chosen": -87.21916198730469, "logps/rejected": -186.1132049560547, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.245816707611084, "rewards/margins": 8.923694610595703, "rewards/rejected": -10.169511795043945, "step": 2112 }, { "epoch": 3.39, "learning_rate": 3.462148236226714e-07, "logits/chosen": -1.559417963027954, "logits/rejected": -1.6223130226135254, "logps/chosen": -94.95071411132812, "logps/rejected": -168.73936462402344, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.944291353225708, "rewards/margins": 6.240782737731934, "rewards/rejected": -9.185073852539062, "step": 2113 }, { "epoch": 3.39, "learning_rate": 3.461157352358303e-07, "logits/chosen": -1.6678355932235718, "logits/rejected": -1.6845722198486328, "logps/chosen": -133.56356811523438, "logps/rejected": -201.32028198242188, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -4.0608415603637695, "rewards/margins": 4.923059463500977, "rewards/rejected": -8.983901023864746, "step": 2114 }, { "epoch": 3.39, "learning_rate": 3.460166468489893e-07, "logits/chosen": -1.569079875946045, "logits/rejected": -1.6221654415130615, "logps/chosen": -132.33856201171875, "logps/rejected": -213.79824829101562, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.9205539226531982, "rewards/margins": 9.98185920715332, "rewards/rejected": -12.902413368225098, "step": 2115 }, { "epoch": 3.4, "learning_rate": 3.459175584621482e-07, "logits/chosen": -1.5844814777374268, "logits/rejected": -1.6246367692947388, "logps/chosen": -105.90972137451172, "logps/rejected": -203.58642578125, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -2.0645363330841064, "rewards/margins": 7.86533784866333, "rewards/rejected": -9.9298734664917, "step": 2116 }, { "epoch": 3.4, "learning_rate": 3.458184700753072e-07, "logits/chosen": -1.746612548828125, "logits/rejected": -1.6887919902801514, "logps/chosen": -84.11842346191406, "logps/rejected": -147.73593139648438, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.5091730356216431, "rewards/margins": 5.924288749694824, "rewards/rejected": -6.4334611892700195, "step": 2117 }, { "epoch": 3.4, "learning_rate": 3.457193816884661e-07, "logits/chosen": -1.6343305110931396, "logits/rejected": -1.7002191543579102, "logps/chosen": -110.46508026123047, "logps/rejected": -196.3812713623047, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -3.3924858570098877, "rewards/margins": 7.36801815032959, "rewards/rejected": -10.760503768920898, "step": 2118 }, { "epoch": 3.4, "learning_rate": 3.45620293301625e-07, "logits/chosen": -1.7157305479049683, "logits/rejected": -1.6764408349990845, "logps/chosen": -131.21575927734375, "logps/rejected": -170.7235107421875, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -3.7900822162628174, "rewards/margins": 6.043600082397461, "rewards/rejected": -9.833683013916016, "step": 2119 }, { "epoch": 3.4, "learning_rate": 3.45521204914784e-07, "logits/chosen": -1.7368850708007812, "logits/rejected": -1.682816982269287, "logps/chosen": -86.04376983642578, "logps/rejected": -167.6602020263672, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -0.5637054443359375, "rewards/margins": 8.701826095581055, "rewards/rejected": -9.265531539916992, "step": 2120 }, { "epoch": 3.4, "learning_rate": 3.454221165279429e-07, "logits/chosen": -1.7683706283569336, "logits/rejected": -1.7663694620132446, "logps/chosen": -149.86444091796875, "logps/rejected": -201.77944946289062, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -5.168430805206299, "rewards/margins": 4.790733814239502, "rewards/rejected": -9.9591646194458, "step": 2121 }, { "epoch": 3.41, "learning_rate": 3.4532302814110187e-07, "logits/chosen": -1.4711833000183105, "logits/rejected": -1.5797113180160522, "logps/chosen": -106.86261749267578, "logps/rejected": -177.88555908203125, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -3.182180166244507, "rewards/margins": 6.106150150299072, "rewards/rejected": -9.288330078125, "step": 2122 }, { "epoch": 3.41, "learning_rate": 3.452239397542608e-07, "logits/chosen": -1.6761380434036255, "logits/rejected": -1.6101338863372803, "logps/chosen": -130.4086151123047, "logps/rejected": -195.48348999023438, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -3.1683037281036377, "rewards/margins": 6.8069868087768555, "rewards/rejected": -9.97529125213623, "step": 2123 }, { "epoch": 3.41, "learning_rate": 3.451248513674197e-07, "logits/chosen": -1.6642192602157593, "logits/rejected": -1.6988962888717651, "logps/chosen": -123.58120727539062, "logps/rejected": -211.19435119628906, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -2.4430480003356934, "rewards/margins": 9.166884422302246, "rewards/rejected": -11.609932899475098, "step": 2124 }, { "epoch": 3.41, "learning_rate": 3.450257629805787e-07, "logits/chosen": -1.730371117591858, "logits/rejected": -1.6416380405426025, "logps/chosen": -97.18882751464844, "logps/rejected": -174.91757202148438, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.8670603036880493, "rewards/margins": 7.787100791931152, "rewards/rejected": -9.65416145324707, "step": 2125 }, { "epoch": 3.41, "learning_rate": 3.449266745937376e-07, "logits/chosen": -1.6365939378738403, "logits/rejected": -1.7273750305175781, "logps/chosen": -116.5837173461914, "logps/rejected": -201.31570434570312, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -3.8996098041534424, "rewards/margins": 7.166316509246826, "rewards/rejected": -11.065925598144531, "step": 2126 }, { "epoch": 3.41, "learning_rate": 3.4482758620689656e-07, "logits/chosen": -1.6202144622802734, "logits/rejected": -1.5693854093551636, "logps/chosen": -142.09185791015625, "logps/rejected": -213.19493103027344, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -3.31125545501709, "rewards/margins": 8.871237754821777, "rewards/rejected": -12.182493209838867, "step": 2127 }, { "epoch": 3.42, "learning_rate": 3.4472849782005547e-07, "logits/chosen": -1.5818853378295898, "logits/rejected": -1.6726527214050293, "logps/chosen": -140.8394012451172, "logps/rejected": -207.14654541015625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -5.689122200012207, "rewards/margins": 4.647453308105469, "rewards/rejected": -10.336575508117676, "step": 2128 }, { "epoch": 3.42, "learning_rate": 3.446294094332144e-07, "logits/chosen": -1.607424259185791, "logits/rejected": -1.6349148750305176, "logps/chosen": -85.53028106689453, "logps/rejected": -153.42079162597656, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.7479677200317383, "rewards/margins": 6.573788642883301, "rewards/rejected": -8.321756362915039, "step": 2129 }, { "epoch": 3.42, "learning_rate": 3.445303210463734e-07, "logits/chosen": -1.6290106773376465, "logits/rejected": -1.5881041288375854, "logps/chosen": -172.427001953125, "logps/rejected": -220.6194305419922, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -5.804256916046143, "rewards/margins": 7.604361534118652, "rewards/rejected": -13.408618927001953, "step": 2130 }, { "epoch": 3.42, "learning_rate": 3.444312326595323e-07, "logits/chosen": -1.7298390865325928, "logits/rejected": -1.7589638233184814, "logps/chosen": -93.51658630371094, "logps/rejected": -198.98550415039062, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.4621872901916504, "rewards/margins": 8.678671836853027, "rewards/rejected": -10.140859603881836, "step": 2131 }, { "epoch": 3.42, "learning_rate": 3.4433214427269125e-07, "logits/chosen": -1.7806942462921143, "logits/rejected": -1.8234977722167969, "logps/chosen": -117.62294006347656, "logps/rejected": -206.7951202392578, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.904381036758423, "rewards/margins": 8.252351760864258, "rewards/rejected": -11.156732559204102, "step": 2132 }, { "epoch": 3.42, "learning_rate": 3.4423305588585016e-07, "logits/chosen": -1.6317096948623657, "logits/rejected": -1.5454776287078857, "logps/chosen": -117.43798828125, "logps/rejected": -140.1502685546875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.5689504146575928, "rewards/margins": 4.75799560546875, "rewards/rejected": -7.326945781707764, "step": 2133 }, { "epoch": 3.43, "learning_rate": 3.4413396749900907e-07, "logits/chosen": -1.6589980125427246, "logits/rejected": -1.6901204586029053, "logps/chosen": -121.6462631225586, "logps/rejected": -193.60992431640625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.958437204360962, "rewards/margins": 8.330808639526367, "rewards/rejected": -11.28924560546875, "step": 2134 }, { "epoch": 3.43, "learning_rate": 3.4403487911216803e-07, "logits/chosen": -1.6623107194900513, "logits/rejected": -1.7446346282958984, "logps/chosen": -120.94762420654297, "logps/rejected": -222.643798828125, "loss": 0.1269, "rewards/accuracies": 1.0, "rewards/chosen": -3.342770576477051, "rewards/margins": 8.552261352539062, "rewards/rejected": -11.895031929016113, "step": 2135 }, { "epoch": 3.43, "learning_rate": 3.43935790725327e-07, "logits/chosen": -1.6347672939300537, "logits/rejected": -1.6703367233276367, "logps/chosen": -106.63505554199219, "logps/rejected": -202.09063720703125, "loss": 0.0761, "rewards/accuracies": 0.75, "rewards/chosen": -3.0990548133850098, "rewards/margins": 7.805667400360107, "rewards/rejected": -10.904723167419434, "step": 2136 }, { "epoch": 3.43, "learning_rate": 3.4383670233848595e-07, "logits/chosen": -1.7434719800949097, "logits/rejected": -1.8070636987686157, "logps/chosen": -110.2618637084961, "logps/rejected": -199.59222412109375, "loss": 0.111, "rewards/accuracies": 1.0, "rewards/chosen": -3.5942676067352295, "rewards/margins": 8.111776351928711, "rewards/rejected": -11.70604419708252, "step": 2137 }, { "epoch": 3.43, "learning_rate": 3.4373761395164485e-07, "logits/chosen": -1.804626226425171, "logits/rejected": -1.8199589252471924, "logps/chosen": -138.87066650390625, "logps/rejected": -195.74761962890625, "loss": 0.1292, "rewards/accuracies": 1.0, "rewards/chosen": -3.5542654991149902, "rewards/margins": 6.609280109405518, "rewards/rejected": -10.163545608520508, "step": 2138 }, { "epoch": 3.43, "learning_rate": 3.4363852556480376e-07, "logits/chosen": -1.6549772024154663, "logits/rejected": -1.621967077255249, "logps/chosen": -133.55752563476562, "logps/rejected": -192.957275390625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -3.4377353191375732, "rewards/margins": 5.737541198730469, "rewards/rejected": -9.175275802612305, "step": 2139 }, { "epoch": 3.43, "learning_rate": 3.435394371779627e-07, "logits/chosen": -1.7712923288345337, "logits/rejected": -1.8134711980819702, "logps/chosen": -146.56890869140625, "logps/rejected": -207.39730834960938, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -4.399912357330322, "rewards/margins": 6.217414379119873, "rewards/rejected": -10.617326736450195, "step": 2140 }, { "epoch": 3.44, "learning_rate": 3.434403487911217e-07, "logits/chosen": -1.793640375137329, "logits/rejected": -1.7274856567382812, "logps/chosen": -115.07180786132812, "logps/rejected": -169.2913055419922, "loss": 0.1645, "rewards/accuracies": 0.75, "rewards/chosen": -2.1477694511413574, "rewards/margins": 7.205746650695801, "rewards/rejected": -9.353515625, "step": 2141 }, { "epoch": 3.44, "learning_rate": 3.4334126040428064e-07, "logits/chosen": -1.4984748363494873, "logits/rejected": -1.5911855697631836, "logps/chosen": -84.72413635253906, "logps/rejected": -171.7322998046875, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -2.6862950325012207, "rewards/margins": 5.60990047454834, "rewards/rejected": -8.296195030212402, "step": 2142 }, { "epoch": 3.44, "learning_rate": 3.4324217201743955e-07, "logits/chosen": -1.599632978439331, "logits/rejected": -1.5431455373764038, "logps/chosen": -152.15631103515625, "logps/rejected": -175.05084228515625, "loss": 0.0987, "rewards/accuracies": 0.75, "rewards/chosen": -3.8824384212493896, "rewards/margins": 3.907371759414673, "rewards/rejected": -7.7898101806640625, "step": 2143 }, { "epoch": 3.44, "learning_rate": 3.4314308363059845e-07, "logits/chosen": -1.4776054620742798, "logits/rejected": -1.522964358329773, "logps/chosen": -99.07505798339844, "logps/rejected": -193.6692352294922, "loss": 0.1172, "rewards/accuracies": 1.0, "rewards/chosen": -1.6232134103775024, "rewards/margins": 9.079904556274414, "rewards/rejected": -10.703117370605469, "step": 2144 }, { "epoch": 3.44, "learning_rate": 3.430439952437574e-07, "logits/chosen": -1.6149190664291382, "logits/rejected": -1.6089062690734863, "logps/chosen": -111.9411849975586, "logps/rejected": -193.4600067138672, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.6378111839294434, "rewards/margins": 7.838544845581055, "rewards/rejected": -9.476356506347656, "step": 2145 }, { "epoch": 3.44, "learning_rate": 3.429449068569164e-07, "logits/chosen": -1.6998226642608643, "logits/rejected": -1.805789589881897, "logps/chosen": -94.07997131347656, "logps/rejected": -194.38330078125, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -2.320855140686035, "rewards/margins": 7.899065017700195, "rewards/rejected": -10.219921112060547, "step": 2146 }, { "epoch": 3.45, "learning_rate": 3.428458184700753e-07, "logits/chosen": -1.6409660577774048, "logits/rejected": -1.6643823385238647, "logps/chosen": -93.9280776977539, "logps/rejected": -180.39480590820312, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -0.35299554467201233, "rewards/margins": 7.9883623123168945, "rewards/rejected": -8.341358184814453, "step": 2147 }, { "epoch": 3.45, "learning_rate": 3.4274673008323424e-07, "logits/chosen": -1.6759159564971924, "logits/rejected": -1.6278804540634155, "logps/chosen": -115.24301147460938, "logps/rejected": -178.8355712890625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.93955397605896, "rewards/margins": 7.745891094207764, "rewards/rejected": -9.685444831848145, "step": 2148 }, { "epoch": 3.45, "learning_rate": 3.4264764169639315e-07, "logits/chosen": -1.6501338481903076, "logits/rejected": -1.7302364110946655, "logps/chosen": -96.22069549560547, "logps/rejected": -189.64724731445312, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.2895054817199707, "rewards/margins": 8.564990997314453, "rewards/rejected": -10.854496002197266, "step": 2149 }, { "epoch": 3.45, "learning_rate": 3.425485533095521e-07, "logits/chosen": -1.8039555549621582, "logits/rejected": -1.6177948713302612, "logps/chosen": -149.35165405273438, "logps/rejected": -182.60836791992188, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -3.2662148475646973, "rewards/margins": 5.740684509277344, "rewards/rejected": -9.006898880004883, "step": 2150 }, { "epoch": 3.45, "learning_rate": 3.42449464922711e-07, "logits/chosen": -1.7074477672576904, "logits/rejected": -1.7142506837844849, "logps/chosen": -91.75123596191406, "logps/rejected": -152.29296875, "loss": 0.081, "rewards/accuracies": 1.0, "rewards/chosen": -1.9018985033035278, "rewards/margins": 6.245310306549072, "rewards/rejected": -8.147209167480469, "step": 2151 }, { "epoch": 3.45, "learning_rate": 3.4235037653586997e-07, "logits/chosen": -1.5900598764419556, "logits/rejected": -1.6221089363098145, "logps/chosen": -119.46685028076172, "logps/rejected": -194.05747985839844, "loss": 0.051, "rewards/accuracies": 0.75, "rewards/chosen": -4.081146240234375, "rewards/margins": 6.115410804748535, "rewards/rejected": -10.19655704498291, "step": 2152 }, { "epoch": 3.46, "learning_rate": 3.4225128814902893e-07, "logits/chosen": -1.8479186296463013, "logits/rejected": -1.8195433616638184, "logps/chosen": -80.700927734375, "logps/rejected": -155.3946533203125, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": -0.9450441598892212, "rewards/margins": 7.4348907470703125, "rewards/rejected": -8.379934310913086, "step": 2153 }, { "epoch": 3.46, "learning_rate": 3.4215219976218784e-07, "logits/chosen": -1.7731728553771973, "logits/rejected": -1.7697763442993164, "logps/chosen": -130.59503173828125, "logps/rejected": -223.90118408203125, "loss": 0.0658, "rewards/accuracies": 1.0, "rewards/chosen": -3.0572190284729004, "rewards/margins": 9.36357307434082, "rewards/rejected": -12.420791625976562, "step": 2154 }, { "epoch": 3.46, "learning_rate": 3.420531113753468e-07, "logits/chosen": -1.6587774753570557, "logits/rejected": -1.6017616987228394, "logps/chosen": -81.21226501464844, "logps/rejected": -160.55992126464844, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.1182959079742432, "rewards/margins": 9.271583557128906, "rewards/rejected": -10.38987922668457, "step": 2155 }, { "epoch": 3.46, "learning_rate": 3.419540229885057e-07, "logits/chosen": -1.5914591550827026, "logits/rejected": -1.5598258972167969, "logps/chosen": -100.12600708007812, "logps/rejected": -184.84909057617188, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -2.230988025665283, "rewards/margins": 7.8361358642578125, "rewards/rejected": -10.067123413085938, "step": 2156 }, { "epoch": 3.46, "learning_rate": 3.4185493460166467e-07, "logits/chosen": -1.7248643636703491, "logits/rejected": -1.703332781791687, "logps/chosen": -73.2528305053711, "logps/rejected": -125.4189682006836, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -0.4913840591907501, "rewards/margins": 5.6052398681640625, "rewards/rejected": -6.09662389755249, "step": 2157 }, { "epoch": 3.46, "learning_rate": 3.417558462148236e-07, "logits/chosen": -1.6010689735412598, "logits/rejected": -1.6112291812896729, "logps/chosen": -99.1246109008789, "logps/rejected": -206.788330078125, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -1.6860393285751343, "rewards/margins": 10.67763900756836, "rewards/rejected": -12.363677978515625, "step": 2158 }, { "epoch": 3.47, "learning_rate": 3.4165675782798253e-07, "logits/chosen": -1.7529363632202148, "logits/rejected": -1.6900386810302734, "logps/chosen": -150.697021484375, "logps/rejected": -179.15399169921875, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -2.6851181983947754, "rewards/margins": 5.068181991577148, "rewards/rejected": -7.753300666809082, "step": 2159 }, { "epoch": 3.47, "learning_rate": 3.415576694411415e-07, "logits/chosen": -1.6619263887405396, "logits/rejected": -1.7629179954528809, "logps/chosen": -111.65821838378906, "logps/rejected": -182.08128356933594, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": -1.945007562637329, "rewards/margins": 5.473406791687012, "rewards/rejected": -7.418414115905762, "step": 2160 }, { "epoch": 3.47, "learning_rate": 3.414585810543004e-07, "logits/chosen": -1.8316186666488647, "logits/rejected": -1.789556860923767, "logps/chosen": -77.19055938720703, "logps/rejected": -186.62420654296875, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -1.0987797975540161, "rewards/margins": 10.91048812866211, "rewards/rejected": -12.009267807006836, "step": 2161 }, { "epoch": 3.47, "learning_rate": 3.4135949266745936e-07, "logits/chosen": -1.777234435081482, "logits/rejected": -1.779771327972412, "logps/chosen": -109.99400329589844, "logps/rejected": -183.2122802734375, "loss": 0.0765, "rewards/accuracies": 1.0, "rewards/chosen": -2.4982142448425293, "rewards/margins": 7.62421989440918, "rewards/rejected": -10.122434616088867, "step": 2162 }, { "epoch": 3.47, "learning_rate": 3.412604042806183e-07, "logits/chosen": -1.716835856437683, "logits/rejected": -1.6154719591140747, "logps/chosen": -100.1056900024414, "logps/rejected": -193.4738006591797, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.131496548652649, "rewards/margins": 10.311134338378906, "rewards/rejected": -11.44262981414795, "step": 2163 }, { "epoch": 3.47, "learning_rate": 3.411613158937772e-07, "logits/chosen": -1.6234805583953857, "logits/rejected": -1.625514030456543, "logps/chosen": -108.8163070678711, "logps/rejected": -195.77316284179688, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -2.2255983352661133, "rewards/margins": 7.656390190124512, "rewards/rejected": -9.881988525390625, "step": 2164 }, { "epoch": 3.48, "learning_rate": 3.410622275069362e-07, "logits/chosen": -1.7151618003845215, "logits/rejected": -1.685014009475708, "logps/chosen": -157.894775390625, "logps/rejected": -198.50721740722656, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -5.278841495513916, "rewards/margins": 4.181025981903076, "rewards/rejected": -9.459867477416992, "step": 2165 }, { "epoch": 3.48, "learning_rate": 3.409631391200951e-07, "logits/chosen": -1.7651591300964355, "logits/rejected": -1.693076729774475, "logps/chosen": -145.2887420654297, "logps/rejected": -215.3388671875, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -4.234164714813232, "rewards/margins": 7.307461738586426, "rewards/rejected": -11.5416259765625, "step": 2166 }, { "epoch": 3.48, "learning_rate": 3.4086405073325405e-07, "logits/chosen": -1.5455334186553955, "logits/rejected": -1.5793042182922363, "logps/chosen": -104.68943786621094, "logps/rejected": -178.998046875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -2.0828378200531006, "rewards/margins": 8.346479415893555, "rewards/rejected": -10.429317474365234, "step": 2167 }, { "epoch": 3.48, "learning_rate": 3.40764962346413e-07, "logits/chosen": -1.5963637828826904, "logits/rejected": -1.6142406463623047, "logps/chosen": -79.9174575805664, "logps/rejected": -200.72665405273438, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -0.7019015550613403, "rewards/margins": 11.260303497314453, "rewards/rejected": -11.962204933166504, "step": 2168 }, { "epoch": 3.48, "learning_rate": 3.406658739595719e-07, "logits/chosen": -1.7945754528045654, "logits/rejected": -1.6899678707122803, "logps/chosen": -102.81196594238281, "logps/rejected": -158.63314819335938, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -1.9191205501556396, "rewards/margins": 6.1256937980651855, "rewards/rejected": -8.044815063476562, "step": 2169 }, { "epoch": 3.48, "learning_rate": 3.405667855727309e-07, "logits/chosen": -1.601143717765808, "logits/rejected": -1.599191427230835, "logps/chosen": -93.4360580444336, "logps/rejected": -151.4473876953125, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -2.0266518592834473, "rewards/margins": 6.015458106994629, "rewards/rejected": -8.042110443115234, "step": 2170 }, { "epoch": 3.48, "learning_rate": 3.404676971858898e-07, "logits/chosen": -1.770904779434204, "logits/rejected": -1.8050644397735596, "logps/chosen": -117.44557189941406, "logps/rejected": -191.75460815429688, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -2.874706745147705, "rewards/margins": 7.015566349029541, "rewards/rejected": -9.890273094177246, "step": 2171 }, { "epoch": 3.49, "learning_rate": 3.403686087990487e-07, "logits/chosen": -1.5838853120803833, "logits/rejected": -1.678159475326538, "logps/chosen": -85.75621795654297, "logps/rejected": -196.12625122070312, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": -1.6742981672286987, "rewards/margins": 8.555357933044434, "rewards/rejected": -10.229656219482422, "step": 2172 }, { "epoch": 3.49, "learning_rate": 3.402695204122077e-07, "logits/chosen": -1.595556616783142, "logits/rejected": -1.712563157081604, "logps/chosen": -100.57102966308594, "logps/rejected": -165.27557373046875, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.1026413440704346, "rewards/margins": 5.061697959899902, "rewards/rejected": -6.164339542388916, "step": 2173 }, { "epoch": 3.49, "learning_rate": 3.401704320253666e-07, "logits/chosen": -1.6338465213775635, "logits/rejected": -1.655766487121582, "logps/chosen": -139.91232299804688, "logps/rejected": -162.613525390625, "loss": 0.0559, "rewards/accuracies": 0.75, "rewards/chosen": -3.9903364181518555, "rewards/margins": 2.745312213897705, "rewards/rejected": -6.735648155212402, "step": 2174 }, { "epoch": 3.49, "learning_rate": 3.4007134363852557e-07, "logits/chosen": -1.583310604095459, "logits/rejected": -1.509516954421997, "logps/chosen": -129.80990600585938, "logps/rejected": -156.01466369628906, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -3.5212085247039795, "rewards/margins": 4.120067119598389, "rewards/rejected": -7.641275405883789, "step": 2175 }, { "epoch": 3.49, "learning_rate": 3.399722552516845e-07, "logits/chosen": -1.7003259658813477, "logits/rejected": -1.6756852865219116, "logps/chosen": -118.69117736816406, "logps/rejected": -203.1019287109375, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -1.8331968784332275, "rewards/margins": 7.614658355712891, "rewards/rejected": -9.447854995727539, "step": 2176 }, { "epoch": 3.49, "learning_rate": 3.398731668648434e-07, "logits/chosen": -1.5054864883422852, "logits/rejected": -1.5412368774414062, "logps/chosen": -99.67404174804688, "logps/rejected": -192.83258056640625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.4855995178222656, "rewards/margins": 9.165367126464844, "rewards/rejected": -11.65096664428711, "step": 2177 }, { "epoch": 3.5, "learning_rate": 3.397740784780024e-07, "logits/chosen": -1.7287299633026123, "logits/rejected": -1.7606894969940186, "logps/chosen": -76.34248352050781, "logps/rejected": -149.45217895507812, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -0.8506460189819336, "rewards/margins": 6.627985954284668, "rewards/rejected": -7.478631973266602, "step": 2178 }, { "epoch": 3.5, "learning_rate": 3.396749900911613e-07, "logits/chosen": -1.6097536087036133, "logits/rejected": -1.6574169397354126, "logps/chosen": -114.58055114746094, "logps/rejected": -158.13880920410156, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -2.5772671699523926, "rewards/margins": 5.356046676635742, "rewards/rejected": -7.933313846588135, "step": 2179 }, { "epoch": 3.5, "learning_rate": 3.395759017043202e-07, "logits/chosen": -1.6381224393844604, "logits/rejected": -1.7310000658035278, "logps/chosen": -125.16942596435547, "logps/rejected": -201.40597534179688, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -3.0556318759918213, "rewards/margins": 6.587934494018555, "rewards/rejected": -9.643567085266113, "step": 2180 }, { "epoch": 3.5, "learning_rate": 3.3947681331747917e-07, "logits/chosen": -1.6124820709228516, "logits/rejected": -1.594954013824463, "logps/chosen": -109.06853485107422, "logps/rejected": -179.33193969726562, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.205012321472168, "rewards/margins": 6.765749931335449, "rewards/rejected": -9.970762252807617, "step": 2181 }, { "epoch": 3.5, "learning_rate": 3.393777249306381e-07, "logits/chosen": -1.7686759233474731, "logits/rejected": -1.670459508895874, "logps/chosen": -133.2688751220703, "logps/rejected": -222.00338745117188, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -2.007737874984741, "rewards/margins": 9.152046203613281, "rewards/rejected": -11.159785270690918, "step": 2182 }, { "epoch": 3.5, "learning_rate": 3.392786365437971e-07, "logits/chosen": -1.6169896125793457, "logits/rejected": -1.6745312213897705, "logps/chosen": -78.66304016113281, "logps/rejected": -166.64132690429688, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.6870551705360413, "rewards/margins": 8.052303314208984, "rewards/rejected": -8.739358901977539, "step": 2183 }, { "epoch": 3.51, "learning_rate": 3.39179548156956e-07, "logits/chosen": -1.4757592678070068, "logits/rejected": -1.603187084197998, "logps/chosen": -112.26322174072266, "logps/rejected": -206.55282592773438, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -4.689795970916748, "rewards/margins": 5.225701332092285, "rewards/rejected": -9.915496826171875, "step": 2184 }, { "epoch": 3.51, "learning_rate": 3.390804597701149e-07, "logits/chosen": -1.6145761013031006, "logits/rejected": -1.5781753063201904, "logps/chosen": -87.34968566894531, "logps/rejected": -176.5531768798828, "loss": 0.1058, "rewards/accuracies": 0.75, "rewards/chosen": -2.6740260124206543, "rewards/margins": 7.926778793334961, "rewards/rejected": -10.600804328918457, "step": 2185 }, { "epoch": 3.51, "learning_rate": 3.3898137138327386e-07, "logits/chosen": -1.6855660676956177, "logits/rejected": -1.730371117591858, "logps/chosen": -108.11871337890625, "logps/rejected": -203.3104248046875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -2.306955575942993, "rewards/margins": 9.912744522094727, "rewards/rejected": -12.21969985961914, "step": 2186 }, { "epoch": 3.51, "learning_rate": 3.3888228299643277e-07, "logits/chosen": -1.676460862159729, "logits/rejected": -1.5703682899475098, "logps/chosen": -101.76032257080078, "logps/rejected": -182.075439453125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.0002734661102295, "rewards/margins": 8.948898315429688, "rewards/rejected": -10.94917106628418, "step": 2187 }, { "epoch": 3.51, "learning_rate": 3.387831946095918e-07, "logits/chosen": -1.664189100265503, "logits/rejected": -1.7848117351531982, "logps/chosen": -116.78555297851562, "logps/rejected": -207.26173400878906, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -3.7308411598205566, "rewards/margins": 8.269793510437012, "rewards/rejected": -12.000635147094727, "step": 2188 }, { "epoch": 3.51, "learning_rate": 3.386841062227507e-07, "logits/chosen": -1.7437033653259277, "logits/rejected": -1.7369760274887085, "logps/chosen": -136.3424072265625, "logps/rejected": -218.85086059570312, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -3.748770236968994, "rewards/margins": 7.828930854797363, "rewards/rejected": -11.577701568603516, "step": 2189 }, { "epoch": 3.52, "learning_rate": 3.385850178359096e-07, "logits/chosen": -1.6202707290649414, "logits/rejected": -1.5667678117752075, "logps/chosen": -120.86553192138672, "logps/rejected": -196.01837158203125, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": -4.343974590301514, "rewards/margins": 7.6044921875, "rewards/rejected": -11.948466300964355, "step": 2190 }, { "epoch": 3.52, "learning_rate": 3.3848592944906856e-07, "logits/chosen": -1.5916898250579834, "logits/rejected": -1.6580067873001099, "logps/chosen": -98.63236999511719, "logps/rejected": -202.04701232910156, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.987713575363159, "rewards/margins": 7.232316970825195, "rewards/rejected": -10.220030784606934, "step": 2191 }, { "epoch": 3.52, "learning_rate": 3.3838684106222746e-07, "logits/chosen": -1.618975281715393, "logits/rejected": -1.7239468097686768, "logps/chosen": -115.52989959716797, "logps/rejected": -212.44586181640625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -2.455399751663208, "rewards/margins": 7.932098388671875, "rewards/rejected": -10.387497901916504, "step": 2192 }, { "epoch": 3.52, "learning_rate": 3.382877526753865e-07, "logits/chosen": -1.7617734670639038, "logits/rejected": -1.6921441555023193, "logps/chosen": -125.24949645996094, "logps/rejected": -224.56398010253906, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -3.183316230773926, "rewards/margins": 11.336982727050781, "rewards/rejected": -14.520299911499023, "step": 2193 }, { "epoch": 3.52, "learning_rate": 3.381886642885454e-07, "logits/chosen": -1.779747724533081, "logits/rejected": -1.7777910232543945, "logps/chosen": -98.57221221923828, "logps/rejected": -198.21597290039062, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.7811456918716431, "rewards/margins": 10.550959587097168, "rewards/rejected": -11.33210563659668, "step": 2194 }, { "epoch": 3.52, "learning_rate": 3.380895759017043e-07, "logits/chosen": -1.68574857711792, "logits/rejected": -1.6739004850387573, "logps/chosen": -96.83192443847656, "logps/rejected": -220.6968994140625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.274838447570801, "rewards/margins": 11.156787872314453, "rewards/rejected": -13.431626319885254, "step": 2195 }, { "epoch": 3.52, "learning_rate": 3.3799048751486325e-07, "logits/chosen": -1.6576963663101196, "logits/rejected": -1.623194694519043, "logps/chosen": -94.11833953857422, "logps/rejected": -185.60928344726562, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -1.6137702465057373, "rewards/margins": 7.65414571762085, "rewards/rejected": -9.267916679382324, "step": 2196 }, { "epoch": 3.53, "learning_rate": 3.3789139912802216e-07, "logits/chosen": -1.6625865697860718, "logits/rejected": -1.682747483253479, "logps/chosen": -112.37965393066406, "logps/rejected": -187.8845977783203, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -5.299849987030029, "rewards/margins": 6.7868804931640625, "rewards/rejected": -12.08673095703125, "step": 2197 }, { "epoch": 3.53, "learning_rate": 3.377923107411811e-07, "logits/chosen": -1.5759074687957764, "logits/rejected": -1.5447046756744385, "logps/chosen": -136.9354248046875, "logps/rejected": -204.7698974609375, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -3.918977737426758, "rewards/margins": 6.905411720275879, "rewards/rejected": -10.824389457702637, "step": 2198 }, { "epoch": 3.53, "learning_rate": 3.376932223543401e-07, "logits/chosen": -1.5186904668807983, "logits/rejected": -1.5501371622085571, "logps/chosen": -113.7569580078125, "logps/rejected": -191.30023193359375, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -2.732248544692993, "rewards/margins": 7.999310493469238, "rewards/rejected": -10.731559753417969, "step": 2199 }, { "epoch": 3.53, "learning_rate": 3.37594133967499e-07, "logits/chosen": -1.6720471382141113, "logits/rejected": -1.6990073919296265, "logps/chosen": -95.16914367675781, "logps/rejected": -185.11512756347656, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -2.4308176040649414, "rewards/margins": 7.85296630859375, "rewards/rejected": -10.283783912658691, "step": 2200 }, { "epoch": 3.53, "learning_rate": 3.3749504558065794e-07, "logits/chosen": -1.5515761375427246, "logits/rejected": -1.7411341667175293, "logps/chosen": -119.11207580566406, "logps/rejected": -235.58932495117188, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -4.843124866485596, "rewards/margins": 8.674072265625, "rewards/rejected": -13.517196655273438, "step": 2201 }, { "epoch": 3.53, "learning_rate": 3.3739595719381685e-07, "logits/chosen": -1.733351707458496, "logits/rejected": -1.7337539196014404, "logps/chosen": -97.0371322631836, "logps/rejected": -170.8935546875, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -1.75014066696167, "rewards/margins": 7.784721374511719, "rewards/rejected": -9.534862518310547, "step": 2202 }, { "epoch": 3.54, "learning_rate": 3.372968688069758e-07, "logits/chosen": -1.5356403589248657, "logits/rejected": -1.596405267715454, "logps/chosen": -132.21670532226562, "logps/rejected": -191.58277893066406, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -4.619069576263428, "rewards/margins": 5.685756683349609, "rewards/rejected": -10.304825782775879, "step": 2203 }, { "epoch": 3.54, "learning_rate": 3.3719778042013477e-07, "logits/chosen": -1.8015656471252441, "logits/rejected": -1.842349886894226, "logps/chosen": -143.68746948242188, "logps/rejected": -231.2112274169922, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": -4.13079309463501, "rewards/margins": 8.553049087524414, "rewards/rejected": -12.683842658996582, "step": 2204 }, { "epoch": 3.54, "learning_rate": 3.370986920332937e-07, "logits/chosen": -1.589627981185913, "logits/rejected": -1.561662197113037, "logps/chosen": -93.67778778076172, "logps/rejected": -176.78030395507812, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.328676223754883, "rewards/margins": 6.6245317459106445, "rewards/rejected": -8.953207969665527, "step": 2205 }, { "epoch": 3.54, "learning_rate": 3.3699960364645264e-07, "logits/chosen": -1.4915751218795776, "logits/rejected": -1.5112242698669434, "logps/chosen": -120.44245910644531, "logps/rejected": -231.91175842285156, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -3.7680115699768066, "rewards/margins": 8.288870811462402, "rewards/rejected": -12.056882858276367, "step": 2206 }, { "epoch": 3.54, "learning_rate": 3.3690051525961154e-07, "logits/chosen": -1.7280364036560059, "logits/rejected": -1.8369883298873901, "logps/chosen": -88.6279067993164, "logps/rejected": -156.9542694091797, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -2.675112247467041, "rewards/margins": 6.126389026641846, "rewards/rejected": -8.801501274108887, "step": 2207 }, { "epoch": 3.54, "learning_rate": 3.368014268727705e-07, "logits/chosen": -1.5656670331954956, "logits/rejected": -1.603361964225769, "logps/chosen": -99.00666046142578, "logps/rejected": -164.24673461914062, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -2.722357749938965, "rewards/margins": 5.524351119995117, "rewards/rejected": -8.246708869934082, "step": 2208 }, { "epoch": 3.55, "learning_rate": 3.3670233848592946e-07, "logits/chosen": -1.7547831535339355, "logits/rejected": -1.7362349033355713, "logps/chosen": -123.79874420166016, "logps/rejected": -180.7222900390625, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -2.750986337661743, "rewards/margins": 5.894593238830566, "rewards/rejected": -8.645580291748047, "step": 2209 }, { "epoch": 3.55, "learning_rate": 3.3660325009908837e-07, "logits/chosen": -1.5473390817642212, "logits/rejected": -1.5781065225601196, "logps/chosen": -119.60375213623047, "logps/rejected": -187.90890502929688, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -3.1592600345611572, "rewards/margins": 7.436932563781738, "rewards/rejected": -10.596192359924316, "step": 2210 }, { "epoch": 3.55, "learning_rate": 3.3650416171224733e-07, "logits/chosen": -1.6235580444335938, "logits/rejected": -1.616828441619873, "logps/chosen": -99.59192657470703, "logps/rejected": -171.66177368164062, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": -2.604792356491089, "rewards/margins": 6.3724565505981445, "rewards/rejected": -8.977249145507812, "step": 2211 }, { "epoch": 3.55, "learning_rate": 3.3640507332540624e-07, "logits/chosen": -1.6397091150283813, "logits/rejected": -1.5779242515563965, "logps/chosen": -116.86198425292969, "logps/rejected": -172.9431610107422, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -3.9910216331481934, "rewards/margins": 6.268888473510742, "rewards/rejected": -10.259909629821777, "step": 2212 }, { "epoch": 3.55, "learning_rate": 3.363059849385652e-07, "logits/chosen": -1.5800119638442993, "logits/rejected": -1.5377910137176514, "logps/chosen": -141.0878448486328, "logps/rejected": -213.0290069580078, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -3.608948230743408, "rewards/margins": 8.761387825012207, "rewards/rejected": -12.370336532592773, "step": 2213 }, { "epoch": 3.55, "learning_rate": 3.362068965517241e-07, "logits/chosen": -1.6727968454360962, "logits/rejected": -1.7212371826171875, "logps/chosen": -94.72151947021484, "logps/rejected": -177.04563903808594, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.2064785957336426, "rewards/margins": 7.931061744689941, "rewards/rejected": -10.137540817260742, "step": 2214 }, { "epoch": 3.56, "learning_rate": 3.3610780816488306e-07, "logits/chosen": -1.7735649347305298, "logits/rejected": -1.820259690284729, "logps/chosen": -117.69949340820312, "logps/rejected": -202.99038696289062, "loss": 0.1164, "rewards/accuracies": 1.0, "rewards/chosen": -4.507907390594482, "rewards/margins": 6.491986274719238, "rewards/rejected": -10.999894142150879, "step": 2215 }, { "epoch": 3.56, "learning_rate": 3.36008719778042e-07, "logits/chosen": -1.6771914958953857, "logits/rejected": -1.6688172817230225, "logps/chosen": -117.81047058105469, "logps/rejected": -174.03167724609375, "loss": 0.0709, "rewards/accuracies": 1.0, "rewards/chosen": -4.7743401527404785, "rewards/margins": 6.387953281402588, "rewards/rejected": -11.16229248046875, "step": 2216 }, { "epoch": 3.56, "learning_rate": 3.3590963139120093e-07, "logits/chosen": -1.6570510864257812, "logits/rejected": -1.7413548231124878, "logps/chosen": -88.157958984375, "logps/rejected": -215.14117431640625, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -1.99481999874115, "rewards/margins": 9.930700302124023, "rewards/rejected": -11.925519943237305, "step": 2217 }, { "epoch": 3.56, "learning_rate": 3.3581054300435984e-07, "logits/chosen": -1.7173500061035156, "logits/rejected": -1.690964698791504, "logps/chosen": -138.04469299316406, "logps/rejected": -204.36044311523438, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -3.5717313289642334, "rewards/margins": 7.983512878417969, "rewards/rejected": -11.555244445800781, "step": 2218 }, { "epoch": 3.56, "learning_rate": 3.357114546175188e-07, "logits/chosen": -1.6061077117919922, "logits/rejected": -1.6889772415161133, "logps/chosen": -98.9211654663086, "logps/rejected": -181.8084259033203, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -2.8415679931640625, "rewards/margins": 6.557866096496582, "rewards/rejected": -9.399434089660645, "step": 2219 }, { "epoch": 3.56, "learning_rate": 3.3561236623067776e-07, "logits/chosen": -1.533785343170166, "logits/rejected": -1.6268900632858276, "logps/chosen": -114.76138305664062, "logps/rejected": -169.86744689941406, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": -3.4756481647491455, "rewards/margins": 5.946053504943848, "rewards/rejected": -9.421701431274414, "step": 2220 }, { "epoch": 3.57, "learning_rate": 3.355132778438367e-07, "logits/chosen": -1.7191898822784424, "logits/rejected": -1.7779878377914429, "logps/chosen": -116.95243072509766, "logps/rejected": -203.89085388183594, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": -3.502088785171509, "rewards/margins": 7.74766731262207, "rewards/rejected": -11.249754905700684, "step": 2221 }, { "epoch": 3.57, "learning_rate": 3.354141894569956e-07, "logits/chosen": -1.7274562120437622, "logits/rejected": -1.7726225852966309, "logps/chosen": -129.01365661621094, "logps/rejected": -201.13844299316406, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -5.553653240203857, "rewards/margins": 5.236922264099121, "rewards/rejected": -10.790575981140137, "step": 2222 }, { "epoch": 3.57, "learning_rate": 3.3531510107015453e-07, "logits/chosen": -1.6511423587799072, "logits/rejected": -1.6641439199447632, "logps/chosen": -111.41622161865234, "logps/rejected": -189.50990295410156, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -1.651482343673706, "rewards/margins": 8.005288124084473, "rewards/rejected": -9.656770706176758, "step": 2223 }, { "epoch": 3.57, "learning_rate": 3.352160126833135e-07, "logits/chosen": -1.6874476671218872, "logits/rejected": -1.5531210899353027, "logps/chosen": -153.70712280273438, "logps/rejected": -221.50775146484375, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -5.06813907623291, "rewards/margins": 8.35676383972168, "rewards/rejected": -13.424901962280273, "step": 2224 }, { "epoch": 3.57, "learning_rate": 3.3511692429647245e-07, "logits/chosen": -1.6181848049163818, "logits/rejected": -1.5689749717712402, "logps/chosen": -119.84063720703125, "logps/rejected": -175.09182739257812, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -2.7988457679748535, "rewards/margins": 7.491373538970947, "rewards/rejected": -10.2902193069458, "step": 2225 }, { "epoch": 3.57, "learning_rate": 3.350178359096314e-07, "logits/chosen": -1.5885916948318481, "logits/rejected": -1.5322649478912354, "logps/chosen": -90.09568786621094, "logps/rejected": -179.17556762695312, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.4758234024047852, "rewards/margins": 8.35731029510498, "rewards/rejected": -9.833133697509766, "step": 2226 }, { "epoch": 3.57, "learning_rate": 3.349187475227903e-07, "logits/chosen": -1.6924326419830322, "logits/rejected": -1.6814393997192383, "logps/chosen": -99.2391357421875, "logps/rejected": -131.8724822998047, "loss": 0.0894, "rewards/accuracies": 1.0, "rewards/chosen": -1.8938478231430054, "rewards/margins": 3.782175064086914, "rewards/rejected": -5.676023006439209, "step": 2227 }, { "epoch": 3.58, "learning_rate": 3.348196591359492e-07, "logits/chosen": -1.709230661392212, "logits/rejected": -1.7801133394241333, "logps/chosen": -126.95674896240234, "logps/rejected": -191.69265747070312, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -3.022183656692505, "rewards/margins": 6.344205379486084, "rewards/rejected": -9.366389274597168, "step": 2228 }, { "epoch": 3.58, "learning_rate": 3.347205707491082e-07, "logits/chosen": -1.4913146495819092, "logits/rejected": -1.4165433645248413, "logps/chosen": -138.1383056640625, "logps/rejected": -209.15576171875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.7259693145751953, "rewards/margins": 9.120784759521484, "rewards/rejected": -11.846753120422363, "step": 2229 }, { "epoch": 3.58, "learning_rate": 3.3462148236226714e-07, "logits/chosen": -1.560220718383789, "logits/rejected": -1.5830823183059692, "logps/chosen": -111.1532974243164, "logps/rejected": -198.36456298828125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -1.7562687397003174, "rewards/margins": 8.696372032165527, "rewards/rejected": -10.452640533447266, "step": 2230 }, { "epoch": 3.58, "learning_rate": 3.345223939754261e-07, "logits/chosen": -1.6603989601135254, "logits/rejected": -1.7132729291915894, "logps/chosen": -139.78591918945312, "logps/rejected": -170.65179443359375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -2.6671664714813232, "rewards/margins": 3.617896318435669, "rewards/rejected": -6.285062789916992, "step": 2231 }, { "epoch": 3.58, "learning_rate": 3.34423305588585e-07, "logits/chosen": -1.7766631841659546, "logits/rejected": -1.7438440322875977, "logps/chosen": -103.82136535644531, "logps/rejected": -197.41551208496094, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.6042543649673462, "rewards/margins": 8.577926635742188, "rewards/rejected": -10.182181358337402, "step": 2232 }, { "epoch": 3.58, "learning_rate": 3.343242172017439e-07, "logits/chosen": -1.6675878763198853, "logits/rejected": -1.6285542249679565, "logps/chosen": -130.81967163085938, "logps/rejected": -189.7350311279297, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -3.1990835666656494, "rewards/margins": 6.658381462097168, "rewards/rejected": -9.857464790344238, "step": 2233 }, { "epoch": 3.59, "learning_rate": 3.342251288149029e-07, "logits/chosen": -1.5268471240997314, "logits/rejected": -1.6294515132904053, "logps/chosen": -127.32374572753906, "logps/rejected": -232.93873596191406, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -4.314704418182373, "rewards/margins": 8.128697395324707, "rewards/rejected": -12.443401336669922, "step": 2234 }, { "epoch": 3.59, "learning_rate": 3.341260404280618e-07, "logits/chosen": -1.7119054794311523, "logits/rejected": -1.6640980243682861, "logps/chosen": -111.43547821044922, "logps/rejected": -157.20758056640625, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -3.134040594100952, "rewards/margins": 6.149867534637451, "rewards/rejected": -9.283907890319824, "step": 2235 }, { "epoch": 3.59, "learning_rate": 3.340269520412208e-07, "logits/chosen": -1.5928608179092407, "logits/rejected": -1.5748846530914307, "logps/chosen": -159.46542358398438, "logps/rejected": -208.7386016845703, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -5.980015754699707, "rewards/margins": 5.318713665008545, "rewards/rejected": -11.29872989654541, "step": 2236 }, { "epoch": 3.59, "learning_rate": 3.339278636543797e-07, "logits/chosen": -1.5073680877685547, "logits/rejected": -1.5554091930389404, "logps/chosen": -119.78199768066406, "logps/rejected": -179.97445678710938, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -3.7307143211364746, "rewards/margins": 6.590792655944824, "rewards/rejected": -10.32150650024414, "step": 2237 }, { "epoch": 3.59, "learning_rate": 3.338287752675386e-07, "logits/chosen": -1.6201958656311035, "logits/rejected": -1.6870075464248657, "logps/chosen": -118.17042541503906, "logps/rejected": -188.1943359375, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -3.3671867847442627, "rewards/margins": 6.514526844024658, "rewards/rejected": -9.8817138671875, "step": 2238 }, { "epoch": 3.59, "learning_rate": 3.3372968688069757e-07, "logits/chosen": -1.6522932052612305, "logits/rejected": -1.6521302461624146, "logps/chosen": -117.29963684082031, "logps/rejected": -162.43057250976562, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -3.402805805206299, "rewards/margins": 6.133418083190918, "rewards/rejected": -9.536224365234375, "step": 2239 }, { "epoch": 3.6, "learning_rate": 3.336305984938565e-07, "logits/chosen": -1.8291852474212646, "logits/rejected": -1.8003087043762207, "logps/chosen": -99.58392333984375, "logps/rejected": -214.86419677734375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.9135282039642334, "rewards/margins": 11.624619483947754, "rewards/rejected": -13.538147926330566, "step": 2240 }, { "epoch": 3.6, "learning_rate": 3.335315101070155e-07, "logits/chosen": -1.5295642614364624, "logits/rejected": -1.5127875804901123, "logps/chosen": -128.41290283203125, "logps/rejected": -181.0443115234375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -3.075294017791748, "rewards/margins": 6.431306838989258, "rewards/rejected": -9.506601333618164, "step": 2241 }, { "epoch": 3.6, "learning_rate": 3.334324217201744e-07, "logits/chosen": -1.8065533638000488, "logits/rejected": -1.7797086238861084, "logps/chosen": -132.897216796875, "logps/rejected": -226.3004913330078, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": -3.293903350830078, "rewards/margins": 9.431037902832031, "rewards/rejected": -12.72494125366211, "step": 2242 }, { "epoch": 3.6, "learning_rate": 3.333333333333333e-07, "logits/chosen": -1.7383289337158203, "logits/rejected": -1.7263422012329102, "logps/chosen": -116.19664001464844, "logps/rejected": -207.2869110107422, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -1.7998377084732056, "rewards/margins": 10.29233169555664, "rewards/rejected": -12.092168807983398, "step": 2243 }, { "epoch": 3.6, "learning_rate": 3.3323424494649226e-07, "logits/chosen": -1.4913913011550903, "logits/rejected": -1.5624334812164307, "logps/chosen": -101.63388061523438, "logps/rejected": -189.13377380371094, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -3.644965171813965, "rewards/margins": 8.185795783996582, "rewards/rejected": -11.830760955810547, "step": 2244 }, { "epoch": 3.6, "learning_rate": 3.3313515655965117e-07, "logits/chosen": -1.646781325340271, "logits/rejected": -1.6731444597244263, "logps/chosen": -112.21378326416016, "logps/rejected": -220.83624267578125, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -3.8901472091674805, "rewards/margins": 9.389264106750488, "rewards/rejected": -13.279411315917969, "step": 2245 }, { "epoch": 3.61, "learning_rate": 3.330360681728102e-07, "logits/chosen": -1.7876710891723633, "logits/rejected": -1.8417720794677734, "logps/chosen": -130.73916625976562, "logps/rejected": -213.50161743164062, "loss": 0.1048, "rewards/accuracies": 1.0, "rewards/chosen": -4.2430739402771, "rewards/margins": 7.426568984985352, "rewards/rejected": -11.669642448425293, "step": 2246 }, { "epoch": 3.61, "learning_rate": 3.329369797859691e-07, "logits/chosen": -1.8185949325561523, "logits/rejected": -1.741937279701233, "logps/chosen": -110.82194519042969, "logps/rejected": -183.70513916015625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.9620469808578491, "rewards/margins": 8.994590759277344, "rewards/rejected": -10.956637382507324, "step": 2247 }, { "epoch": 3.61, "learning_rate": 3.32837891399128e-07, "logits/chosen": -1.6111469268798828, "logits/rejected": -1.5694628953933716, "logps/chosen": -121.46887969970703, "logps/rejected": -185.1714630126953, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -2.657587766647339, "rewards/margins": 7.651818752288818, "rewards/rejected": -10.309406280517578, "step": 2248 }, { "epoch": 3.61, "learning_rate": 3.3273880301228695e-07, "logits/chosen": -1.6857022047042847, "logits/rejected": -1.6532087326049805, "logps/chosen": -125.72998046875, "logps/rejected": -168.48797607421875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.4643027782440186, "rewards/margins": 5.060924053192139, "rewards/rejected": -8.525227546691895, "step": 2249 }, { "epoch": 3.61, "learning_rate": 3.3263971462544586e-07, "logits/chosen": -1.6912109851837158, "logits/rejected": -1.6285903453826904, "logps/chosen": -117.2468032836914, "logps/rejected": -162.56112670898438, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -3.3808088302612305, "rewards/margins": 5.63465690612793, "rewards/rejected": -9.01546573638916, "step": 2250 }, { "epoch": 3.61, "learning_rate": 3.3254062623860477e-07, "logits/chosen": -1.6466959714889526, "logits/rejected": -1.692476511001587, "logps/chosen": -117.78041076660156, "logps/rejected": -179.09754943847656, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -2.7616307735443115, "rewards/margins": 5.663517951965332, "rewards/rejected": -8.425148963928223, "step": 2251 }, { "epoch": 3.61, "learning_rate": 3.324415378517638e-07, "logits/chosen": -1.559828519821167, "logits/rejected": -1.6599076986312866, "logps/chosen": -105.68682098388672, "logps/rejected": -194.6924591064453, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -2.4081475734710693, "rewards/margins": 7.569893836975098, "rewards/rejected": -9.97804069519043, "step": 2252 }, { "epoch": 3.62, "learning_rate": 3.323424494649227e-07, "logits/chosen": -1.7765926122665405, "logits/rejected": -1.7600340843200684, "logps/chosen": -95.71693420410156, "logps/rejected": -213.9748992919922, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -2.0061025619506836, "rewards/margins": 10.594067573547363, "rewards/rejected": -12.600170135498047, "step": 2253 }, { "epoch": 3.62, "learning_rate": 3.3224336107808165e-07, "logits/chosen": -1.6996288299560547, "logits/rejected": -1.7322828769683838, "logps/chosen": -137.75784301757812, "logps/rejected": -186.53627014160156, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -3.0883371829986572, "rewards/margins": 6.453249931335449, "rewards/rejected": -9.541586875915527, "step": 2254 }, { "epoch": 3.62, "learning_rate": 3.3214427269124055e-07, "logits/chosen": -1.4822311401367188, "logits/rejected": -1.4938368797302246, "logps/chosen": -106.28611755371094, "logps/rejected": -193.651611328125, "loss": 0.068, "rewards/accuracies": 1.0, "rewards/chosen": -2.1427273750305176, "rewards/margins": 8.771964073181152, "rewards/rejected": -10.914691925048828, "step": 2255 }, { "epoch": 3.62, "learning_rate": 3.3204518430439946e-07, "logits/chosen": -1.6558715105056763, "logits/rejected": -1.737040638923645, "logps/chosen": -102.8006820678711, "logps/rejected": -187.3025360107422, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -3.4277501106262207, "rewards/margins": 6.512253761291504, "rewards/rejected": -9.940003395080566, "step": 2256 }, { "epoch": 3.62, "learning_rate": 3.3194609591755847e-07, "logits/chosen": -1.6246564388275146, "logits/rejected": -1.6515123844146729, "logps/chosen": -157.03048706054688, "logps/rejected": -227.7128143310547, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -4.307823657989502, "rewards/margins": 8.545865058898926, "rewards/rejected": -12.85368824005127, "step": 2257 }, { "epoch": 3.62, "learning_rate": 3.318470075307174e-07, "logits/chosen": -1.52339768409729, "logits/rejected": -1.5662024021148682, "logps/chosen": -106.58567810058594, "logps/rejected": -207.32452392578125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.545586585998535, "rewards/margins": 9.422029495239258, "rewards/rejected": -12.967616081237793, "step": 2258 }, { "epoch": 3.63, "learning_rate": 3.3174791914387634e-07, "logits/chosen": -1.69525146484375, "logits/rejected": -1.6240246295928955, "logps/chosen": -111.90753173828125, "logps/rejected": -142.46334838867188, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -2.4758071899414062, "rewards/margins": 5.237011909484863, "rewards/rejected": -7.712818622589111, "step": 2259 }, { "epoch": 3.63, "learning_rate": 3.3164883075703525e-07, "logits/chosen": -1.6090526580810547, "logits/rejected": -1.6389533281326294, "logps/chosen": -112.5648193359375, "logps/rejected": -190.21910095214844, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.3296592235565186, "rewards/margins": 7.295497894287109, "rewards/rejected": -9.625157356262207, "step": 2260 }, { "epoch": 3.63, "learning_rate": 3.3154974237019415e-07, "logits/chosen": -1.6576104164123535, "logits/rejected": -1.5804264545440674, "logps/chosen": -131.62606811523438, "logps/rejected": -161.01837158203125, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": -3.2749502658843994, "rewards/margins": 6.147180557250977, "rewards/rejected": -9.422130584716797, "step": 2261 }, { "epoch": 3.63, "learning_rate": 3.3145065398335317e-07, "logits/chosen": -1.590188980102539, "logits/rejected": -1.5429344177246094, "logps/chosen": -120.29436492919922, "logps/rejected": -179.65237426757812, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -3.312166213989258, "rewards/margins": 6.210004806518555, "rewards/rejected": -9.522171020507812, "step": 2262 }, { "epoch": 3.63, "learning_rate": 3.3135156559651207e-07, "logits/chosen": -1.6708226203918457, "logits/rejected": -1.6554505825042725, "logps/chosen": -122.57577514648438, "logps/rejected": -201.19924926757812, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -2.784712553024292, "rewards/margins": 8.511367797851562, "rewards/rejected": -11.296079635620117, "step": 2263 }, { "epoch": 3.63, "learning_rate": 3.3125247720967103e-07, "logits/chosen": -1.6910431385040283, "logits/rejected": -1.6773285865783691, "logps/chosen": -155.87718200683594, "logps/rejected": -242.3700714111328, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -5.475171089172363, "rewards/margins": 7.212989807128906, "rewards/rejected": -12.68816089630127, "step": 2264 }, { "epoch": 3.64, "learning_rate": 3.3115338882282994e-07, "logits/chosen": -1.8337926864624023, "logits/rejected": -1.8796274662017822, "logps/chosen": -94.67475891113281, "logps/rejected": -220.69610595703125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.0445568561553955, "rewards/margins": 11.898097038269043, "rewards/rejected": -13.94265365600586, "step": 2265 }, { "epoch": 3.64, "learning_rate": 3.3105430043598885e-07, "logits/chosen": -1.6743953227996826, "logits/rejected": -1.6979258060455322, "logps/chosen": -104.16490173339844, "logps/rejected": -144.90725708007812, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": -3.1384482383728027, "rewards/margins": 4.224229335784912, "rewards/rejected": -7.362677574157715, "step": 2266 }, { "epoch": 3.64, "learning_rate": 3.3095521204914786e-07, "logits/chosen": -1.5063830614089966, "logits/rejected": -1.5557621717453003, "logps/chosen": -81.17405700683594, "logps/rejected": -172.343505859375, "loss": 0.0561, "rewards/accuracies": 0.75, "rewards/chosen": -2.417820930480957, "rewards/margins": 7.09390926361084, "rewards/rejected": -9.511731147766113, "step": 2267 }, { "epoch": 3.64, "learning_rate": 3.3085612366230677e-07, "logits/chosen": -1.5921920537948608, "logits/rejected": -1.6130906343460083, "logps/chosen": -129.16793823242188, "logps/rejected": -191.0594482421875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.917766809463501, "rewards/margins": 6.288747787475586, "rewards/rejected": -10.206514358520508, "step": 2268 }, { "epoch": 3.64, "learning_rate": 3.307570352754657e-07, "logits/chosen": -1.450277328491211, "logits/rejected": -1.6031291484832764, "logps/chosen": -101.74950408935547, "logps/rejected": -244.2327423095703, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -3.2815794944763184, "rewards/margins": 8.815677642822266, "rewards/rejected": -12.097256660461426, "step": 2269 }, { "epoch": 3.64, "learning_rate": 3.3065794688862463e-07, "logits/chosen": -1.6818299293518066, "logits/rejected": -1.6519874334335327, "logps/chosen": -95.14582061767578, "logps/rejected": -181.36199951171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.6170029640197754, "rewards/margins": 9.817673683166504, "rewards/rejected": -11.434676170349121, "step": 2270 }, { "epoch": 3.65, "learning_rate": 3.3055885850178354e-07, "logits/chosen": -1.5903297662734985, "logits/rejected": -1.63703191280365, "logps/chosen": -120.62324523925781, "logps/rejected": -194.5399169921875, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -4.434066295623779, "rewards/margins": 6.3973612785339355, "rewards/rejected": -10.831428527832031, "step": 2271 }, { "epoch": 3.65, "learning_rate": 3.3045977011494255e-07, "logits/chosen": -1.6517356634140015, "logits/rejected": -1.7616267204284668, "logps/chosen": -118.96813201904297, "logps/rejected": -231.98248291015625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.4106855392456055, "rewards/margins": 9.353771209716797, "rewards/rejected": -12.764455795288086, "step": 2272 }, { "epoch": 3.65, "learning_rate": 3.3036068172810146e-07, "logits/chosen": -1.747129201889038, "logits/rejected": -1.6903430223464966, "logps/chosen": -147.5254364013672, "logps/rejected": -197.29803466796875, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -2.9193177223205566, "rewards/margins": 7.916950225830078, "rewards/rejected": -10.836268424987793, "step": 2273 }, { "epoch": 3.65, "learning_rate": 3.302615933412604e-07, "logits/chosen": -1.4064290523529053, "logits/rejected": -1.4918147325515747, "logps/chosen": -108.15161895751953, "logps/rejected": -197.32269287109375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -2.2438278198242188, "rewards/margins": 8.106475830078125, "rewards/rejected": -10.350303649902344, "step": 2274 }, { "epoch": 3.65, "learning_rate": 3.301625049544193e-07, "logits/chosen": -1.8094675540924072, "logits/rejected": -1.7576035261154175, "logps/chosen": -134.0812530517578, "logps/rejected": -151.92526245117188, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": -3.8651912212371826, "rewards/margins": 3.761399269104004, "rewards/rejected": -7.626590728759766, "step": 2275 }, { "epoch": 3.65, "learning_rate": 3.3006341656757823e-07, "logits/chosen": -1.651664137840271, "logits/rejected": -1.6258695125579834, "logps/chosen": -138.14804077148438, "logps/rejected": -235.55526733398438, "loss": 0.0986, "rewards/accuracies": 1.0, "rewards/chosen": -4.085735321044922, "rewards/margins": 9.14148998260498, "rewards/rejected": -13.227226257324219, "step": 2276 }, { "epoch": 3.65, "learning_rate": 3.299643281807372e-07, "logits/chosen": -1.853964924812317, "logits/rejected": -1.6839885711669922, "logps/chosen": -142.66424560546875, "logps/rejected": -221.62692260742188, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -3.8068461418151855, "rewards/margins": 10.608430862426758, "rewards/rejected": -14.415277481079102, "step": 2277 }, { "epoch": 3.66, "learning_rate": 3.2986523979389615e-07, "logits/chosen": -1.6637663841247559, "logits/rejected": -1.7623672485351562, "logps/chosen": -107.79478454589844, "logps/rejected": -235.3152313232422, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -2.951864719390869, "rewards/margins": 11.422028541564941, "rewards/rejected": -14.373893737792969, "step": 2278 }, { "epoch": 3.66, "learning_rate": 3.297661514070551e-07, "logits/chosen": -1.6593060493469238, "logits/rejected": -1.6115483045578003, "logps/chosen": -164.79518127441406, "logps/rejected": -188.31771850585938, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": -5.276126384735107, "rewards/margins": 5.092874526977539, "rewards/rejected": -10.369000434875488, "step": 2279 }, { "epoch": 3.66, "learning_rate": 3.29667063020214e-07, "logits/chosen": -1.6324045658111572, "logits/rejected": -1.5869152545928955, "logps/chosen": -113.5484848022461, "logps/rejected": -168.6986083984375, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": -4.136707782745361, "rewards/margins": 5.263484954833984, "rewards/rejected": -9.400193214416504, "step": 2280 }, { "epoch": 3.66, "learning_rate": 3.295679746333729e-07, "logits/chosen": -1.819186806678772, "logits/rejected": -1.7145427465438843, "logps/chosen": -127.93392181396484, "logps/rejected": -188.18165588378906, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.8150625228881836, "rewards/margins": 8.453293800354004, "rewards/rejected": -10.268356323242188, "step": 2281 }, { "epoch": 3.66, "learning_rate": 3.294688862465319e-07, "logits/chosen": -1.799998164176941, "logits/rejected": -1.7497378587722778, "logps/chosen": -117.31425476074219, "logps/rejected": -199.05392456054688, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -3.5268406867980957, "rewards/margins": 8.482148170471191, "rewards/rejected": -12.008987426757812, "step": 2282 }, { "epoch": 3.66, "learning_rate": 3.2936979785969084e-07, "logits/chosen": -1.7611639499664307, "logits/rejected": -1.6965222358703613, "logps/chosen": -135.53115844726562, "logps/rejected": -242.65768432617188, "loss": 0.1409, "rewards/accuracies": 1.0, "rewards/chosen": -3.491403818130493, "rewards/margins": 9.956501007080078, "rewards/rejected": -13.447904586791992, "step": 2283 }, { "epoch": 3.67, "learning_rate": 3.292707094728498e-07, "logits/chosen": -1.9025449752807617, "logits/rejected": -1.8628513813018799, "logps/chosen": -82.82514953613281, "logps/rejected": -191.51644897460938, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.40048977732658386, "rewards/margins": 11.050196647644043, "rewards/rejected": -11.45068645477295, "step": 2284 }, { "epoch": 3.67, "learning_rate": 3.291716210860087e-07, "logits/chosen": -1.7823518514633179, "logits/rejected": -1.6091548204421997, "logps/chosen": -149.95021057128906, "logps/rejected": -186.4630584716797, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": -5.614593505859375, "rewards/margins": 5.507284164428711, "rewards/rejected": -11.121877670288086, "step": 2285 }, { "epoch": 3.67, "learning_rate": 3.290725326991676e-07, "logits/chosen": -1.5468422174453735, "logits/rejected": -1.511552333831787, "logps/chosen": -126.0462646484375, "logps/rejected": -197.65701293945312, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -4.5411200523376465, "rewards/margins": 6.903808116912842, "rewards/rejected": -11.444928169250488, "step": 2286 }, { "epoch": 3.67, "learning_rate": 3.289734443123266e-07, "logits/chosen": -1.5872944593429565, "logits/rejected": -1.6591380834579468, "logps/chosen": -108.10122680664062, "logps/rejected": -216.50051879882812, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.340151309967041, "rewards/margins": 10.292902946472168, "rewards/rejected": -12.633054733276367, "step": 2287 }, { "epoch": 3.67, "learning_rate": 3.2887435592548554e-07, "logits/chosen": -1.6974329948425293, "logits/rejected": -1.7717838287353516, "logps/chosen": -111.73988342285156, "logps/rejected": -168.21075439453125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -2.5773797035217285, "rewards/margins": 4.821328639984131, "rewards/rejected": -7.398708343505859, "step": 2288 }, { "epoch": 3.67, "learning_rate": 3.2877526753864444e-07, "logits/chosen": -1.8239482641220093, "logits/rejected": -1.8261752128601074, "logps/chosen": -135.1286163330078, "logps/rejected": -223.80990600585938, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -3.992556571960449, "rewards/margins": 8.393324851989746, "rewards/rejected": -12.385881423950195, "step": 2289 }, { "epoch": 3.68, "learning_rate": 3.286761791518034e-07, "logits/chosen": -1.7461308240890503, "logits/rejected": -1.6807665824890137, "logps/chosen": -120.10870361328125, "logps/rejected": -182.51837158203125, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -3.2903270721435547, "rewards/margins": 6.979238510131836, "rewards/rejected": -10.269564628601074, "step": 2290 }, { "epoch": 3.68, "learning_rate": 3.285770907649623e-07, "logits/chosen": -1.7494211196899414, "logits/rejected": -1.6254916191101074, "logps/chosen": -93.9747543334961, "logps/rejected": -175.0828857421875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.22830913960933685, "rewards/margins": 9.647789001464844, "rewards/rejected": -9.876097679138184, "step": 2291 }, { "epoch": 3.68, "learning_rate": 3.2847800237812127e-07, "logits/chosen": -1.586530327796936, "logits/rejected": -1.547570824623108, "logps/chosen": -128.0208740234375, "logps/rejected": -180.78468322753906, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.1164228916168213, "rewards/margins": 6.2070393562316895, "rewards/rejected": -9.32346248626709, "step": 2292 }, { "epoch": 3.68, "learning_rate": 3.2837891399128023e-07, "logits/chosen": -1.773360252380371, "logits/rejected": -1.787628173828125, "logps/chosen": -134.08721923828125, "logps/rejected": -180.96107482910156, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -3.66825008392334, "rewards/margins": 5.4873127937316895, "rewards/rejected": -9.155562400817871, "step": 2293 }, { "epoch": 3.68, "learning_rate": 3.2827982560443914e-07, "logits/chosen": -1.6673506498336792, "logits/rejected": -1.4731078147888184, "logps/chosen": -118.15168762207031, "logps/rejected": -180.10006713867188, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.9660184383392334, "rewards/margins": 7.075699806213379, "rewards/rejected": -9.041719436645508, "step": 2294 }, { "epoch": 3.68, "learning_rate": 3.281807372175981e-07, "logits/chosen": -1.6660858392715454, "logits/rejected": -1.7462584972381592, "logps/chosen": -123.79605865478516, "logps/rejected": -204.95999145507812, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -2.447722911834717, "rewards/margins": 8.043126106262207, "rewards/rejected": -10.490849494934082, "step": 2295 }, { "epoch": 3.69, "learning_rate": 3.28081648830757e-07, "logits/chosen": -1.7386506795883179, "logits/rejected": -1.7775291204452515, "logps/chosen": -161.58148193359375, "logps/rejected": -240.81842041015625, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -6.1474785804748535, "rewards/margins": 7.108044624328613, "rewards/rejected": -13.255523681640625, "step": 2296 }, { "epoch": 3.69, "learning_rate": 3.2798256044391596e-07, "logits/chosen": -1.6366751194000244, "logits/rejected": -1.6886268854141235, "logps/chosen": -117.91315460205078, "logps/rejected": -226.99070739746094, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -4.180783748626709, "rewards/margins": 8.522754669189453, "rewards/rejected": -12.70353889465332, "step": 2297 }, { "epoch": 3.69, "learning_rate": 3.2788347205707487e-07, "logits/chosen": -1.714130163192749, "logits/rejected": -1.8303064107894897, "logps/chosen": -110.7461929321289, "logps/rejected": -198.17526245117188, "loss": 0.195, "rewards/accuracies": 1.0, "rewards/chosen": -1.9007807970046997, "rewards/margins": 7.292250633239746, "rewards/rejected": -9.193031311035156, "step": 2298 }, { "epoch": 3.69, "learning_rate": 3.2778438367023383e-07, "logits/chosen": -1.532355546951294, "logits/rejected": -1.548356056213379, "logps/chosen": -87.28816223144531, "logps/rejected": -174.44683837890625, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -2.7970457077026367, "rewards/margins": 7.539129257202148, "rewards/rejected": -10.336174964904785, "step": 2299 }, { "epoch": 3.69, "learning_rate": 3.276852952833928e-07, "logits/chosen": -1.5283081531524658, "logits/rejected": -1.6062949895858765, "logps/chosen": -85.52682495117188, "logps/rejected": -214.8189239501953, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.180251121520996, "rewards/margins": 9.345355987548828, "rewards/rejected": -10.525606155395508, "step": 2300 }, { "epoch": 3.69, "learning_rate": 3.275862068965517e-07, "logits/chosen": -1.594812273979187, "logits/rejected": -1.5928547382354736, "logps/chosen": -127.20219421386719, "logps/rejected": -210.6011962890625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -3.8816609382629395, "rewards/margins": 8.342187881469727, "rewards/rejected": -12.223848342895508, "step": 2301 }, { "epoch": 3.7, "learning_rate": 3.2748711850971066e-07, "logits/chosen": -1.6104925870895386, "logits/rejected": -1.6324795484542847, "logps/chosen": -87.98396301269531, "logps/rejected": -168.87997436523438, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.2816630601882935, "rewards/margins": 7.584209442138672, "rewards/rejected": -8.865872383117676, "step": 2302 }, { "epoch": 3.7, "learning_rate": 3.2738803012286956e-07, "logits/chosen": -1.8444292545318604, "logits/rejected": -1.828599214553833, "logps/chosen": -121.47574615478516, "logps/rejected": -187.11708068847656, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.043778419494629, "rewards/margins": 7.6925048828125, "rewards/rejected": -10.736283302307129, "step": 2303 }, { "epoch": 3.7, "learning_rate": 3.272889417360285e-07, "logits/chosen": -1.6889960765838623, "logits/rejected": -1.655486822128296, "logps/chosen": -128.1184844970703, "logps/rejected": -197.08477783203125, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -4.1832966804504395, "rewards/margins": 8.031497955322266, "rewards/rejected": -12.214795112609863, "step": 2304 }, { "epoch": 3.7, "learning_rate": 3.271898533491875e-07, "logits/chosen": -1.6603795289993286, "logits/rejected": -1.6515603065490723, "logps/chosen": -137.33851623535156, "logps/rejected": -192.21627807617188, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -3.764671802520752, "rewards/margins": 7.3756608963012695, "rewards/rejected": -11.14033317565918, "step": 2305 }, { "epoch": 3.7, "learning_rate": 3.270907649623464e-07, "logits/chosen": -1.6981981992721558, "logits/rejected": -1.7338566780090332, "logps/chosen": -129.8831787109375, "logps/rejected": -206.4558868408203, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -4.410679340362549, "rewards/margins": 5.309022903442383, "rewards/rejected": -9.71970272064209, "step": 2306 }, { "epoch": 3.7, "learning_rate": 3.2699167657550535e-07, "logits/chosen": -1.7845847606658936, "logits/rejected": -1.7620651721954346, "logps/chosen": -98.66643524169922, "logps/rejected": -210.71356201171875, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -2.308903694152832, "rewards/margins": 11.332841873168945, "rewards/rejected": -13.641746520996094, "step": 2307 }, { "epoch": 3.7, "learning_rate": 3.2689258818866426e-07, "logits/chosen": -1.6928433179855347, "logits/rejected": -1.788573145866394, "logps/chosen": -103.06317138671875, "logps/rejected": -209.43600463867188, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -2.329413414001465, "rewards/margins": 9.216878890991211, "rewards/rejected": -11.546292304992676, "step": 2308 }, { "epoch": 3.71, "learning_rate": 3.267934998018232e-07, "logits/chosen": -1.7074053287506104, "logits/rejected": -1.7633161544799805, "logps/chosen": -112.15960693359375, "logps/rejected": -237.8001708984375, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": -3.1866791248321533, "rewards/margins": 8.852712631225586, "rewards/rejected": -12.03939151763916, "step": 2309 }, { "epoch": 3.71, "learning_rate": 3.266944114149822e-07, "logits/chosen": -1.6660215854644775, "logits/rejected": -1.6516250371932983, "logps/chosen": -135.48291015625, "logps/rejected": -227.9437255859375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.86887788772583, "rewards/margins": 9.827211380004883, "rewards/rejected": -13.696089744567871, "step": 2310 }, { "epoch": 3.71, "learning_rate": 3.265953230281411e-07, "logits/chosen": -1.5858216285705566, "logits/rejected": -1.658750295639038, "logps/chosen": -112.45283508300781, "logps/rejected": -191.83660888671875, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.0501890182495117, "rewards/margins": 8.776137351989746, "rewards/rejected": -9.826326370239258, "step": 2311 }, { "epoch": 3.71, "learning_rate": 3.2649623464130004e-07, "logits/chosen": -1.5980432033538818, "logits/rejected": -1.601485252380371, "logps/chosen": -140.13108825683594, "logps/rejected": -220.96124267578125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.156899452209473, "rewards/margins": 7.748292922973633, "rewards/rejected": -11.905192375183105, "step": 2312 }, { "epoch": 3.71, "learning_rate": 3.2639714625445895e-07, "logits/chosen": -1.7252349853515625, "logits/rejected": -1.8121204376220703, "logps/chosen": -149.87677001953125, "logps/rejected": -214.54954528808594, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -5.47411584854126, "rewards/margins": 7.312758445739746, "rewards/rejected": -12.786874771118164, "step": 2313 }, { "epoch": 3.71, "learning_rate": 3.2629805786761786e-07, "logits/chosen": -1.6578776836395264, "logits/rejected": -1.5784997940063477, "logps/chosen": -134.31613159179688, "logps/rejected": -183.14366149902344, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -3.6023056507110596, "rewards/margins": 7.38063907623291, "rewards/rejected": -10.982945442199707, "step": 2314 }, { "epoch": 3.72, "learning_rate": 3.2619896948077687e-07, "logits/chosen": -1.7434546947479248, "logits/rejected": -1.677778720855713, "logps/chosen": -145.8841552734375, "logps/rejected": -226.1613311767578, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -5.107231616973877, "rewards/margins": 8.385185241699219, "rewards/rejected": -13.492416381835938, "step": 2315 }, { "epoch": 3.72, "learning_rate": 3.260998810939358e-07, "logits/chosen": -1.6497191190719604, "logits/rejected": -1.6381596326828003, "logps/chosen": -101.49665069580078, "logps/rejected": -187.69161987304688, "loss": 0.0731, "rewards/accuracies": 1.0, "rewards/chosen": -2.0963196754455566, "rewards/margins": 7.026187896728516, "rewards/rejected": -9.12250804901123, "step": 2316 }, { "epoch": 3.72, "learning_rate": 3.2600079270709474e-07, "logits/chosen": -1.652151346206665, "logits/rejected": -1.6550602912902832, "logps/chosen": -114.57373046875, "logps/rejected": -200.62557983398438, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -3.6586036682128906, "rewards/margins": 8.001544952392578, "rewards/rejected": -11.660148620605469, "step": 2317 }, { "epoch": 3.72, "learning_rate": 3.2590170432025364e-07, "logits/chosen": -1.6246683597564697, "logits/rejected": -1.5807461738586426, "logps/chosen": -98.36683654785156, "logps/rejected": -167.97842407226562, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -1.9241061210632324, "rewards/margins": 6.956235885620117, "rewards/rejected": -8.880341529846191, "step": 2318 }, { "epoch": 3.72, "learning_rate": 3.2580261593341255e-07, "logits/chosen": -1.5350431203842163, "logits/rejected": -1.5612502098083496, "logps/chosen": -113.13674926757812, "logps/rejected": -195.98410034179688, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -4.065746307373047, "rewards/margins": 5.726138114929199, "rewards/rejected": -9.791884422302246, "step": 2319 }, { "epoch": 3.72, "learning_rate": 3.2570352754657156e-07, "logits/chosen": -1.5567461252212524, "logits/rejected": -1.6080724000930786, "logps/chosen": -101.8628158569336, "logps/rejected": -161.1023406982422, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.7515063285827637, "rewards/margins": 6.31574010848999, "rewards/rejected": -9.067246437072754, "step": 2320 }, { "epoch": 3.73, "learning_rate": 3.2560443915973047e-07, "logits/chosen": -1.7068179845809937, "logits/rejected": -1.7468098402023315, "logps/chosen": -119.6235122680664, "logps/rejected": -190.67669677734375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.3555235862731934, "rewards/margins": 8.19668960571289, "rewards/rejected": -11.552212715148926, "step": 2321 }, { "epoch": 3.73, "learning_rate": 3.255053507728894e-07, "logits/chosen": -1.6337318420410156, "logits/rejected": -1.7015316486358643, "logps/chosen": -114.07081604003906, "logps/rejected": -207.4059600830078, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -3.1083133220672607, "rewards/margins": 8.78349494934082, "rewards/rejected": -11.891809463500977, "step": 2322 }, { "epoch": 3.73, "learning_rate": 3.2540626238604833e-07, "logits/chosen": -1.8247907161712646, "logits/rejected": -1.716069221496582, "logps/chosen": -124.78273010253906, "logps/rejected": -208.68226623535156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -3.1644246578216553, "rewards/margins": 10.699517250061035, "rewards/rejected": -13.863941192626953, "step": 2323 }, { "epoch": 3.73, "learning_rate": 3.2530717399920724e-07, "logits/chosen": -1.5708287954330444, "logits/rejected": -1.6379966735839844, "logps/chosen": -157.41062927246094, "logps/rejected": -191.54852294921875, "loss": 0.0937, "rewards/accuracies": 0.75, "rewards/chosen": -6.938906192779541, "rewards/margins": 3.676086664199829, "rewards/rejected": -10.61499309539795, "step": 2324 }, { "epoch": 3.73, "learning_rate": 3.2520808561236625e-07, "logits/chosen": -1.6069053411483765, "logits/rejected": -1.6728436946868896, "logps/chosen": -117.98716735839844, "logps/rejected": -201.4302520751953, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -3.329711675643921, "rewards/margins": 7.865008354187012, "rewards/rejected": -11.194720268249512, "step": 2325 }, { "epoch": 3.73, "learning_rate": 3.2510899722552516e-07, "logits/chosen": -1.5425291061401367, "logits/rejected": -1.484020471572876, "logps/chosen": -124.76675415039062, "logps/rejected": -212.63079833984375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -3.8208587169647217, "rewards/margins": 8.365591049194336, "rewards/rejected": -12.18644905090332, "step": 2326 }, { "epoch": 3.74, "learning_rate": 3.2500990883868407e-07, "logits/chosen": -1.5872973203659058, "logits/rejected": -1.5947365760803223, "logps/chosen": -140.48973083496094, "logps/rejected": -178.7237548828125, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": -4.980966567993164, "rewards/margins": 5.018335342407227, "rewards/rejected": -9.999300956726074, "step": 2327 }, { "epoch": 3.74, "learning_rate": 3.2491082045184303e-07, "logits/chosen": -1.6542290449142456, "logits/rejected": -1.6306202411651611, "logps/chosen": -119.72705078125, "logps/rejected": -170.83331298828125, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -3.1729466915130615, "rewards/margins": 6.113062858581543, "rewards/rejected": -9.2860107421875, "step": 2328 }, { "epoch": 3.74, "learning_rate": 3.2481173206500193e-07, "logits/chosen": -1.6668219566345215, "logits/rejected": -1.6958229541778564, "logps/chosen": -94.63520812988281, "logps/rejected": -160.02536010742188, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": -1.857431173324585, "rewards/margins": 6.320005416870117, "rewards/rejected": -8.177435874938965, "step": 2329 }, { "epoch": 3.74, "learning_rate": 3.2471264367816095e-07, "logits/chosen": -1.610055923461914, "logits/rejected": -1.6626219749450684, "logps/chosen": -134.24740600585938, "logps/rejected": -247.36093139648438, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -4.773460865020752, "rewards/margins": 10.339125633239746, "rewards/rejected": -15.11258602142334, "step": 2330 }, { "epoch": 3.74, "learning_rate": 3.2461355529131985e-07, "logits/chosen": -1.6237703561782837, "logits/rejected": -1.675407886505127, "logps/chosen": -112.81748962402344, "logps/rejected": -183.80679321289062, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.484699249267578, "rewards/margins": 4.704216957092285, "rewards/rejected": -8.188915252685547, "step": 2331 }, { "epoch": 3.74, "learning_rate": 3.2451446690447876e-07, "logits/chosen": -1.8585542440414429, "logits/rejected": -1.794982671737671, "logps/chosen": -112.9943618774414, "logps/rejected": -195.46054077148438, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -2.6111416816711426, "rewards/margins": 7.873619079589844, "rewards/rejected": -10.484761238098145, "step": 2332 }, { "epoch": 3.74, "learning_rate": 3.244153785176377e-07, "logits/chosen": -1.6421979665756226, "logits/rejected": -1.751068353652954, "logps/chosen": -111.4361572265625, "logps/rejected": -242.04322814941406, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.3762691020965576, "rewards/margins": 9.660629272460938, "rewards/rejected": -12.036897659301758, "step": 2333 }, { "epoch": 3.75, "learning_rate": 3.2431629013079663e-07, "logits/chosen": -1.7615444660186768, "logits/rejected": -1.7758668661117554, "logps/chosen": -86.07138061523438, "logps/rejected": -232.09561157226562, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -2.0530905723571777, "rewards/margins": 12.606305122375488, "rewards/rejected": -14.659395217895508, "step": 2334 }, { "epoch": 3.75, "learning_rate": 3.2421720174395564e-07, "logits/chosen": -1.6577832698822021, "logits/rejected": -1.6700713634490967, "logps/chosen": -104.39529418945312, "logps/rejected": -190.57630920410156, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -2.4315361976623535, "rewards/margins": 7.798834800720215, "rewards/rejected": -10.230371475219727, "step": 2335 }, { "epoch": 3.75, "learning_rate": 3.2411811335711455e-07, "logits/chosen": -1.7692406177520752, "logits/rejected": -1.74700129032135, "logps/chosen": -124.5422592163086, "logps/rejected": -182.0872802734375, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -4.151364326477051, "rewards/margins": 5.880071640014648, "rewards/rejected": -10.0314359664917, "step": 2336 }, { "epoch": 3.75, "learning_rate": 3.2401902497027345e-07, "logits/chosen": -1.7526332139968872, "logits/rejected": -1.7095459699630737, "logps/chosen": -117.37751770019531, "logps/rejected": -168.01138305664062, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": -2.846921920776367, "rewards/margins": 5.821063995361328, "rewards/rejected": -8.667986869812012, "step": 2337 }, { "epoch": 3.75, "learning_rate": 3.239199365834324e-07, "logits/chosen": -1.5605442523956299, "logits/rejected": -1.5163935422897339, "logps/chosen": -134.75376892089844, "logps/rejected": -220.61788940429688, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.277048110961914, "rewards/margins": 8.188934326171875, "rewards/rejected": -11.465982437133789, "step": 2338 }, { "epoch": 3.75, "learning_rate": 3.238208481965913e-07, "logits/chosen": -1.803006649017334, "logits/rejected": -1.8004748821258545, "logps/chosen": -111.09736633300781, "logps/rejected": -191.33941650390625, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -2.6571481227874756, "rewards/margins": 6.831193923950195, "rewards/rejected": -9.48834228515625, "step": 2339 }, { "epoch": 3.76, "learning_rate": 3.237217598097503e-07, "logits/chosen": -1.6906901597976685, "logits/rejected": -1.7215460538864136, "logps/chosen": -122.63764953613281, "logps/rejected": -232.069580078125, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -2.855917453765869, "rewards/margins": 10.589788436889648, "rewards/rejected": -13.44570541381836, "step": 2340 }, { "epoch": 3.76, "learning_rate": 3.2362267142290924e-07, "logits/chosen": -1.758908987045288, "logits/rejected": -1.7656493186950684, "logps/chosen": -120.91925048828125, "logps/rejected": -209.1841583251953, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.3603930473327637, "rewards/margins": 8.657021522521973, "rewards/rejected": -11.017414093017578, "step": 2341 }, { "epoch": 3.76, "learning_rate": 3.2352358303606815e-07, "logits/chosen": -1.6986446380615234, "logits/rejected": -1.7664356231689453, "logps/chosen": -143.6707000732422, "logps/rejected": -224.37762451171875, "loss": 0.1141, "rewards/accuracies": 0.75, "rewards/chosen": -5.645977973937988, "rewards/margins": 5.843169212341309, "rewards/rejected": -11.489147186279297, "step": 2342 }, { "epoch": 3.76, "learning_rate": 3.234244946492271e-07, "logits/chosen": -1.698897123336792, "logits/rejected": -1.7202891111373901, "logps/chosen": -112.34027099609375, "logps/rejected": -222.67843627929688, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -3.553938388824463, "rewards/margins": 10.97614574432373, "rewards/rejected": -14.530084609985352, "step": 2343 }, { "epoch": 3.76, "learning_rate": 3.23325406262386e-07, "logits/chosen": -1.8231227397918701, "logits/rejected": -1.698907494544983, "logps/chosen": -145.13267517089844, "logps/rejected": -189.7211151123047, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": -3.2401304244995117, "rewards/margins": 7.391724586486816, "rewards/rejected": -10.631855010986328, "step": 2344 }, { "epoch": 3.76, "learning_rate": 3.2322631787554497e-07, "logits/chosen": -1.6546614170074463, "logits/rejected": -1.6445395946502686, "logps/chosen": -156.4005889892578, "logps/rejected": -213.60418701171875, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -4.281661033630371, "rewards/margins": 6.893617630004883, "rewards/rejected": -11.175278663635254, "step": 2345 }, { "epoch": 3.77, "learning_rate": 3.2312722948870393e-07, "logits/chosen": -1.6953094005584717, "logits/rejected": -1.758908987045288, "logps/chosen": -119.35540771484375, "logps/rejected": -227.37368774414062, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -4.35043478012085, "rewards/margins": 8.784727096557617, "rewards/rejected": -13.135162353515625, "step": 2346 }, { "epoch": 3.77, "learning_rate": 3.2302814110186284e-07, "logits/chosen": -1.7744874954223633, "logits/rejected": -1.7590372562408447, "logps/chosen": -122.46107482910156, "logps/rejected": -183.22793579101562, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -2.192976713180542, "rewards/margins": 8.055132865905762, "rewards/rejected": -10.248109817504883, "step": 2347 }, { "epoch": 3.77, "learning_rate": 3.229290527150218e-07, "logits/chosen": -1.6820406913757324, "logits/rejected": -1.6836700439453125, "logps/chosen": -101.22765350341797, "logps/rejected": -187.42156982421875, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -2.75233793258667, "rewards/margins": 7.89308500289917, "rewards/rejected": -10.645423889160156, "step": 2348 }, { "epoch": 3.77, "learning_rate": 3.228299643281807e-07, "logits/chosen": -1.5699925422668457, "logits/rejected": -1.5580295324325562, "logps/chosen": -111.20899200439453, "logps/rejected": -191.59292602539062, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -3.48868989944458, "rewards/margins": 5.292327404022217, "rewards/rejected": -8.781017303466797, "step": 2349 }, { "epoch": 3.77, "learning_rate": 3.2273087594133967e-07, "logits/chosen": -1.5493375062942505, "logits/rejected": -1.673671007156372, "logps/chosen": -111.25311279296875, "logps/rejected": -224.96951293945312, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": -3.7368862628936768, "rewards/margins": 8.661574363708496, "rewards/rejected": -12.398460388183594, "step": 2350 }, { "epoch": 3.77, "learning_rate": 3.226317875544986e-07, "logits/chosen": -1.6881983280181885, "logits/rejected": -1.6678149700164795, "logps/chosen": -142.19288635253906, "logps/rejected": -218.66885375976562, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -4.889679431915283, "rewards/margins": 7.520153999328613, "rewards/rejected": -12.409832954406738, "step": 2351 }, { "epoch": 3.78, "learning_rate": 3.2253269916765753e-07, "logits/chosen": -1.6784650087356567, "logits/rejected": -1.6658210754394531, "logps/chosen": -100.40452575683594, "logps/rejected": -190.6077117919922, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": -2.958136558532715, "rewards/margins": 8.885456085205078, "rewards/rejected": -11.843591690063477, "step": 2352 }, { "epoch": 3.78, "learning_rate": 3.224336107808165e-07, "logits/chosen": -1.7221271991729736, "logits/rejected": -1.7038224935531616, "logps/chosen": -140.16152954101562, "logps/rejected": -236.28660583496094, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -4.551783084869385, "rewards/margins": 9.807587623596191, "rewards/rejected": -14.359370231628418, "step": 2353 }, { "epoch": 3.78, "learning_rate": 3.223345223939754e-07, "logits/chosen": -1.620375633239746, "logits/rejected": -1.6621818542480469, "logps/chosen": -146.3239288330078, "logps/rejected": -211.66464233398438, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -5.47210693359375, "rewards/margins": 6.051090240478516, "rewards/rejected": -11.523197174072266, "step": 2354 }, { "epoch": 3.78, "learning_rate": 3.2223543400713436e-07, "logits/chosen": -1.57815420627594, "logits/rejected": -1.5759004354476929, "logps/chosen": -112.56422424316406, "logps/rejected": -195.11215209960938, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -3.9592342376708984, "rewards/margins": 6.746616840362549, "rewards/rejected": -10.705850601196289, "step": 2355 }, { "epoch": 3.78, "learning_rate": 3.221363456202933e-07, "logits/chosen": -1.7194838523864746, "logits/rejected": -1.7013369798660278, "logps/chosen": -128.4029998779297, "logps/rejected": -209.1298065185547, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -4.851563930511475, "rewards/margins": 6.782806396484375, "rewards/rejected": -11.634370803833008, "step": 2356 }, { "epoch": 3.78, "learning_rate": 3.220372572334522e-07, "logits/chosen": -1.5145419836044312, "logits/rejected": -1.5702106952667236, "logps/chosen": -101.69506072998047, "logps/rejected": -219.94424438476562, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -2.3172786235809326, "rewards/margins": 8.498668670654297, "rewards/rejected": -10.815947532653809, "step": 2357 }, { "epoch": 3.78, "learning_rate": 3.219381688466112e-07, "logits/chosen": -1.6025053262710571, "logits/rejected": -1.546595573425293, "logps/chosen": -138.9676513671875, "logps/rejected": -217.17454528808594, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -4.587996482849121, "rewards/margins": 8.27702808380127, "rewards/rejected": -12.86502456665039, "step": 2358 }, { "epoch": 3.79, "learning_rate": 3.218390804597701e-07, "logits/chosen": -1.6256418228149414, "logits/rejected": -1.726883053779602, "logps/chosen": -103.54171752929688, "logps/rejected": -241.27059936523438, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.119443893432617, "rewards/margins": 10.963056564331055, "rewards/rejected": -14.082500457763672, "step": 2359 }, { "epoch": 3.79, "learning_rate": 3.21739992072929e-07, "logits/chosen": -1.4867441654205322, "logits/rejected": -1.4229871034622192, "logps/chosen": -117.802490234375, "logps/rejected": -173.11228942871094, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": -4.639409065246582, "rewards/margins": 6.203106880187988, "rewards/rejected": -10.84251594543457, "step": 2360 }, { "epoch": 3.79, "learning_rate": 3.2164090368608796e-07, "logits/chosen": -1.6992586851119995, "logits/rejected": -1.718916654586792, "logps/chosen": -136.71060180664062, "logps/rejected": -229.361083984375, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -4.53383207321167, "rewards/margins": 8.447216987609863, "rewards/rejected": -12.981049537658691, "step": 2361 }, { "epoch": 3.79, "learning_rate": 3.215418152992469e-07, "logits/chosen": -1.5216670036315918, "logits/rejected": -1.5689162015914917, "logps/chosen": -120.87297058105469, "logps/rejected": -182.3489227294922, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.8417654037475586, "rewards/margins": 6.106678009033203, "rewards/rejected": -8.948444366455078, "step": 2362 }, { "epoch": 3.79, "learning_rate": 3.214427269124059e-07, "logits/chosen": -1.6336767673492432, "logits/rejected": -1.6985676288604736, "logps/chosen": -121.23904418945312, "logps/rejected": -194.4447479248047, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.633866548538208, "rewards/margins": 7.228724002838135, "rewards/rejected": -9.862590789794922, "step": 2363 }, { "epoch": 3.79, "learning_rate": 3.213436385255648e-07, "logits/chosen": -1.7223989963531494, "logits/rejected": -1.7175618410110474, "logps/chosen": -145.15274047851562, "logps/rejected": -217.1385498046875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -6.208574295043945, "rewards/margins": 6.679978847503662, "rewards/rejected": -12.888551712036133, "step": 2364 }, { "epoch": 3.8, "learning_rate": 3.212445501387237e-07, "logits/chosen": -1.4615838527679443, "logits/rejected": -1.5103819370269775, "logps/chosen": -134.48748779296875, "logps/rejected": -164.2128448486328, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -3.3462653160095215, "rewards/margins": 3.9434120655059814, "rewards/rejected": -7.289677619934082, "step": 2365 }, { "epoch": 3.8, "learning_rate": 3.2114546175188265e-07, "logits/chosen": -1.710462212562561, "logits/rejected": -1.6803940534591675, "logps/chosen": -119.16873168945312, "logps/rejected": -168.99615478515625, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -3.70656156539917, "rewards/margins": 4.804478645324707, "rewards/rejected": -8.511040687561035, "step": 2366 }, { "epoch": 3.8, "learning_rate": 3.210463733650416e-07, "logits/chosen": -1.8582797050476074, "logits/rejected": -1.869641661643982, "logps/chosen": -112.74285888671875, "logps/rejected": -196.01907348632812, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -3.1690802574157715, "rewards/margins": 5.020543575286865, "rewards/rejected": -8.189623832702637, "step": 2367 }, { "epoch": 3.8, "learning_rate": 3.2094728497820057e-07, "logits/chosen": -1.6420475244522095, "logits/rejected": -1.6821105480194092, "logps/chosen": -101.19828796386719, "logps/rejected": -136.92135620117188, "loss": 0.0792, "rewards/accuracies": 0.75, "rewards/chosen": -3.9937186241149902, "rewards/margins": 2.7031593322753906, "rewards/rejected": -6.696877479553223, "step": 2368 }, { "epoch": 3.8, "learning_rate": 3.208481965913595e-07, "logits/chosen": -1.6301823854446411, "logits/rejected": -1.7212028503417969, "logps/chosen": -111.843505859375, "logps/rejected": -192.34494018554688, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.7747948169708252, "rewards/margins": 9.053443908691406, "rewards/rejected": -10.828239440917969, "step": 2369 }, { "epoch": 3.8, "learning_rate": 3.207491082045184e-07, "logits/chosen": -1.6988943815231323, "logits/rejected": -1.6619665622711182, "logps/chosen": -116.53617858886719, "logps/rejected": -160.88766479492188, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -4.622095584869385, "rewards/margins": 4.9193315505981445, "rewards/rejected": -9.541427612304688, "step": 2370 }, { "epoch": 3.81, "learning_rate": 3.2065001981767734e-07, "logits/chosen": -1.588718056678772, "logits/rejected": -1.6597381830215454, "logps/chosen": -119.9001235961914, "logps/rejected": -257.44061279296875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.1289985179901123, "rewards/margins": 8.635900497436523, "rewards/rejected": -11.764899253845215, "step": 2371 }, { "epoch": 3.81, "learning_rate": 3.205509314308363e-07, "logits/chosen": -1.683728814125061, "logits/rejected": -1.7181702852249146, "logps/chosen": -114.10076141357422, "logps/rejected": -177.61656188964844, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -4.449475288391113, "rewards/margins": 5.39668607711792, "rewards/rejected": -9.846160888671875, "step": 2372 }, { "epoch": 3.81, "learning_rate": 3.2045184304399526e-07, "logits/chosen": -1.6036990880966187, "logits/rejected": -1.5912317037582397, "logps/chosen": -111.8170394897461, "logps/rejected": -181.03515625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.137857437133789, "rewards/margins": 8.951726913452148, "rewards/rejected": -10.089585304260254, "step": 2373 }, { "epoch": 3.81, "learning_rate": 3.2035275465715417e-07, "logits/chosen": -1.5701097249984741, "logits/rejected": -1.6142956018447876, "logps/chosen": -108.41219329833984, "logps/rejected": -192.8760986328125, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -2.445307970046997, "rewards/margins": 8.14690113067627, "rewards/rejected": -10.592208862304688, "step": 2374 }, { "epoch": 3.81, "learning_rate": 3.202536662703131e-07, "logits/chosen": -1.700121521949768, "logits/rejected": -1.7189815044403076, "logps/chosen": -105.03556060791016, "logps/rejected": -184.64761352539062, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": -3.426473379135132, "rewards/margins": 8.022259712219238, "rewards/rejected": -11.44873332977295, "step": 2375 }, { "epoch": 3.81, "learning_rate": 3.2015457788347204e-07, "logits/chosen": -1.6585737466812134, "logits/rejected": -1.6547126770019531, "logps/chosen": -146.72642517089844, "logps/rejected": -190.4581298828125, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -4.476183891296387, "rewards/margins": 6.1036906242370605, "rewards/rejected": -10.579874038696289, "step": 2376 }, { "epoch": 3.82, "learning_rate": 3.2005548949663094e-07, "logits/chosen": -1.567196249961853, "logits/rejected": -1.6953275203704834, "logps/chosen": -120.19379425048828, "logps/rejected": -219.40170288085938, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -3.877488613128662, "rewards/margins": 8.766547203063965, "rewards/rejected": -12.644036293029785, "step": 2377 }, { "epoch": 3.82, "learning_rate": 3.1995640110978996e-07, "logits/chosen": -1.5845746994018555, "logits/rejected": -1.6153608560562134, "logps/chosen": -104.49258422851562, "logps/rejected": -210.97772216796875, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -3.224562883377075, "rewards/margins": 7.893970489501953, "rewards/rejected": -11.118534088134766, "step": 2378 }, { "epoch": 3.82, "learning_rate": 3.1985731272294886e-07, "logits/chosen": -1.601705551147461, "logits/rejected": -1.6573858261108398, "logps/chosen": -129.14215087890625, "logps/rejected": -196.4967041015625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -4.214423656463623, "rewards/margins": 7.137455940246582, "rewards/rejected": -11.351880073547363, "step": 2379 }, { "epoch": 3.82, "learning_rate": 3.1975822433610777e-07, "logits/chosen": -1.604797601699829, "logits/rejected": -1.6751773357391357, "logps/chosen": -97.13069152832031, "logps/rejected": -149.95175170898438, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -3.6037068367004395, "rewards/margins": 3.871598243713379, "rewards/rejected": -7.475305557250977, "step": 2380 }, { "epoch": 3.82, "learning_rate": 3.1965913594926673e-07, "logits/chosen": -1.6576404571533203, "logits/rejected": -1.625542163848877, "logps/chosen": -105.68020629882812, "logps/rejected": -158.76905822753906, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": -2.7899646759033203, "rewards/margins": 5.787967205047607, "rewards/rejected": -8.577932357788086, "step": 2381 }, { "epoch": 3.82, "learning_rate": 3.1956004756242564e-07, "logits/chosen": -1.6971545219421387, "logits/rejected": -1.6002506017684937, "logps/chosen": -133.5108184814453, "logps/rejected": -222.41064453125, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -5.55681037902832, "rewards/margins": 7.985811710357666, "rewards/rejected": -13.542622566223145, "step": 2382 }, { "epoch": 3.83, "learning_rate": 3.1946095917558465e-07, "logits/chosen": -1.6375170946121216, "logits/rejected": -1.7156740427017212, "logps/chosen": -113.85963439941406, "logps/rejected": -235.859619140625, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -2.086759328842163, "rewards/margins": 9.886980056762695, "rewards/rejected": -11.973739624023438, "step": 2383 }, { "epoch": 3.83, "learning_rate": 3.1936187078874356e-07, "logits/chosen": -1.6829006671905518, "logits/rejected": -1.5552351474761963, "logps/chosen": -140.96824645996094, "logps/rejected": -206.91104125976562, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.9780287742614746, "rewards/margins": 9.050844192504883, "rewards/rejected": -12.028873443603516, "step": 2384 }, { "epoch": 3.83, "learning_rate": 3.1926278240190246e-07, "logits/chosen": -1.724962830543518, "logits/rejected": -1.739586353302002, "logps/chosen": -100.47004699707031, "logps/rejected": -179.01663208007812, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.5453970432281494, "rewards/margins": 7.040555000305176, "rewards/rejected": -9.585952758789062, "step": 2385 }, { "epoch": 3.83, "learning_rate": 3.191636940150614e-07, "logits/chosen": -1.701669692993164, "logits/rejected": -1.74045729637146, "logps/chosen": -129.35650634765625, "logps/rejected": -188.23065185546875, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -4.787810325622559, "rewards/margins": 6.5041728019714355, "rewards/rejected": -11.291983604431152, "step": 2386 }, { "epoch": 3.83, "learning_rate": 3.1906460562822033e-07, "logits/chosen": -1.563277244567871, "logits/rejected": -1.5770463943481445, "logps/chosen": -85.0738296508789, "logps/rejected": -186.9622802734375, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -0.8521764278411865, "rewards/margins": 9.412817001342773, "rewards/rejected": -10.264993667602539, "step": 2387 }, { "epoch": 3.83, "learning_rate": 3.1896551724137934e-07, "logits/chosen": -1.712384819984436, "logits/rejected": -1.623170256614685, "logps/chosen": -129.46151733398438, "logps/rejected": -197.38265991210938, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -5.055704593658447, "rewards/margins": 7.686938285827637, "rewards/rejected": -12.742643356323242, "step": 2388 }, { "epoch": 3.83, "learning_rate": 3.1886642885453825e-07, "logits/chosen": -1.5608283281326294, "logits/rejected": -1.583633542060852, "logps/chosen": -132.79287719726562, "logps/rejected": -207.98831176757812, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -3.2530317306518555, "rewards/margins": 7.404496669769287, "rewards/rejected": -10.657527923583984, "step": 2389 }, { "epoch": 3.84, "learning_rate": 3.1876734046769716e-07, "logits/chosen": -1.6924899816513062, "logits/rejected": -1.6144723892211914, "logps/chosen": -167.56300354003906, "logps/rejected": -210.65643310546875, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -4.864516258239746, "rewards/margins": 7.58135986328125, "rewards/rejected": -12.445877075195312, "step": 2390 }, { "epoch": 3.84, "learning_rate": 3.186682520808561e-07, "logits/chosen": -1.5679552555084229, "logits/rejected": -1.5654226541519165, "logps/chosen": -121.96119689941406, "logps/rejected": -176.71514892578125, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -4.647735118865967, "rewards/margins": 5.510859966278076, "rewards/rejected": -10.158595085144043, "step": 2391 }, { "epoch": 3.84, "learning_rate": 3.18569163694015e-07, "logits/chosen": -1.6476850509643555, "logits/rejected": -1.5574887990951538, "logps/chosen": -96.47245025634766, "logps/rejected": -156.91429138183594, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": -1.9922966957092285, "rewards/margins": 7.601269721984863, "rewards/rejected": -9.593565940856934, "step": 2392 }, { "epoch": 3.84, "learning_rate": 3.18470075307174e-07, "logits/chosen": -1.51857328414917, "logits/rejected": -1.5557715892791748, "logps/chosen": -91.85493469238281, "logps/rejected": -196.0801544189453, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -2.68640398979187, "rewards/margins": 9.26175308227539, "rewards/rejected": -11.948156356811523, "step": 2393 }, { "epoch": 3.84, "learning_rate": 3.1837098692033294e-07, "logits/chosen": -1.6385716199874878, "logits/rejected": -1.7108683586120605, "logps/chosen": -122.09945678710938, "logps/rejected": -212.04885864257812, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -2.8243677616119385, "rewards/margins": 7.172274589538574, "rewards/rejected": -9.996642112731934, "step": 2394 }, { "epoch": 3.84, "learning_rate": 3.1827189853349185e-07, "logits/chosen": -1.7196998596191406, "logits/rejected": -1.7281506061553955, "logps/chosen": -97.26560974121094, "logps/rejected": -155.4130859375, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -3.6287782192230225, "rewards/margins": 5.586073875427246, "rewards/rejected": -9.214852333068848, "step": 2395 }, { "epoch": 3.85, "learning_rate": 3.181728101466508e-07, "logits/chosen": -1.6173921823501587, "logits/rejected": -1.5268261432647705, "logps/chosen": -132.0015869140625, "logps/rejected": -200.84890747070312, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -5.302964210510254, "rewards/margins": 8.486019134521484, "rewards/rejected": -13.788982391357422, "step": 2396 }, { "epoch": 3.85, "learning_rate": 3.180737217598097e-07, "logits/chosen": -1.5480588674545288, "logits/rejected": -1.5868220329284668, "logps/chosen": -91.56251525878906, "logps/rejected": -177.05752563476562, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.7758076190948486, "rewards/margins": 6.836430072784424, "rewards/rejected": -8.612237930297852, "step": 2397 }, { "epoch": 3.85, "learning_rate": 3.179746333729686e-07, "logits/chosen": -1.6000757217407227, "logits/rejected": -1.7243598699569702, "logps/chosen": -112.73287963867188, "logps/rejected": -164.40501403808594, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -4.013267517089844, "rewards/margins": 3.5765650272369385, "rewards/rejected": -7.589832305908203, "step": 2398 }, { "epoch": 3.85, "learning_rate": 3.1787554498612764e-07, "logits/chosen": -1.6200958490371704, "logits/rejected": -1.592132568359375, "logps/chosen": -101.67574310302734, "logps/rejected": -186.0350341796875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -3.1768546104431152, "rewards/margins": 8.273351669311523, "rewards/rejected": -11.450206756591797, "step": 2399 }, { "epoch": 3.85, "learning_rate": 3.1777645659928654e-07, "logits/chosen": -1.6390913724899292, "logits/rejected": -1.641045331954956, "logps/chosen": -95.40827178955078, "logps/rejected": -186.0070343017578, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.262139320373535, "rewards/margins": 8.439342498779297, "rewards/rejected": -10.701482772827148, "step": 2400 }, { "epoch": 3.85, "learning_rate": 3.176773682124455e-07, "logits/chosen": -1.6030129194259644, "logits/rejected": -1.672055721282959, "logps/chosen": -83.38811492919922, "logps/rejected": -187.17803955078125, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": -1.2446540594100952, "rewards/margins": 7.308122634887695, "rewards/rejected": -8.552777290344238, "step": 2401 }, { "epoch": 3.86, "learning_rate": 3.175782798256044e-07, "logits/chosen": -1.5027625560760498, "logits/rejected": -1.535874843597412, "logps/chosen": -106.12118530273438, "logps/rejected": -182.5994873046875, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -3.196977138519287, "rewards/margins": 8.487594604492188, "rewards/rejected": -11.684571266174316, "step": 2402 }, { "epoch": 3.86, "learning_rate": 3.174791914387633e-07, "logits/chosen": -1.6047086715698242, "logits/rejected": -1.6685631275177002, "logps/chosen": -110.91162109375, "logps/rejected": -212.10720825195312, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -2.8099205493927, "rewards/margins": 7.41293478012085, "rewards/rejected": -10.222855567932129, "step": 2403 }, { "epoch": 3.86, "learning_rate": 3.1738010305192233e-07, "logits/chosen": -1.7531527280807495, "logits/rejected": -1.7570133209228516, "logps/chosen": -140.868408203125, "logps/rejected": -181.33047485351562, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -4.019111156463623, "rewards/margins": 3.6669156551361084, "rewards/rejected": -7.6860270500183105, "step": 2404 }, { "epoch": 3.86, "learning_rate": 3.1728101466508124e-07, "logits/chosen": -1.7270870208740234, "logits/rejected": -1.601033329963684, "logps/chosen": -180.95741271972656, "logps/rejected": -210.2803497314453, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -5.073790550231934, "rewards/margins": 5.729896545410156, "rewards/rejected": -10.80368709564209, "step": 2405 }, { "epoch": 3.86, "learning_rate": 3.171819262782402e-07, "logits/chosen": -1.7300307750701904, "logits/rejected": -1.6788334846496582, "logps/chosen": -128.97142028808594, "logps/rejected": -178.462158203125, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -3.3873605728149414, "rewards/margins": 4.844751358032227, "rewards/rejected": -8.232111930847168, "step": 2406 }, { "epoch": 3.86, "learning_rate": 3.170828378913991e-07, "logits/chosen": -1.6013938188552856, "logits/rejected": -1.6272945404052734, "logps/chosen": -91.98778533935547, "logps/rejected": -185.08042907714844, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -2.304780960083008, "rewards/margins": 8.771812438964844, "rewards/rejected": -11.076593399047852, "step": 2407 }, { "epoch": 3.87, "learning_rate": 3.16983749504558e-07, "logits/chosen": -1.7244064807891846, "logits/rejected": -1.61904776096344, "logps/chosen": -113.40894317626953, "logps/rejected": -199.55490112304688, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.7658300399780273, "rewards/margins": 11.38885498046875, "rewards/rejected": -13.154685020446777, "step": 2408 }, { "epoch": 3.87, "learning_rate": 3.16884661117717e-07, "logits/chosen": -1.6555628776550293, "logits/rejected": -1.6733663082122803, "logps/chosen": -101.77973937988281, "logps/rejected": -181.6728057861328, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -2.8746888637542725, "rewards/margins": 6.1852335929870605, "rewards/rejected": -9.059922218322754, "step": 2409 }, { "epoch": 3.87, "learning_rate": 3.1678557273087593e-07, "logits/chosen": -1.7198377847671509, "logits/rejected": -1.7709424495697021, "logps/chosen": -138.9071807861328, "logps/rejected": -242.89303588867188, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -5.182053089141846, "rewards/margins": 10.256742477416992, "rewards/rejected": -15.43879508972168, "step": 2410 }, { "epoch": 3.87, "learning_rate": 3.166864843440349e-07, "logits/chosen": -1.7640382051467896, "logits/rejected": -1.6901030540466309, "logps/chosen": -107.20628356933594, "logps/rejected": -206.52774047851562, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -3.213637113571167, "rewards/margins": 7.750899314880371, "rewards/rejected": -10.964536666870117, "step": 2411 }, { "epoch": 3.87, "learning_rate": 3.165873959571938e-07, "logits/chosen": -1.7535653114318848, "logits/rejected": -1.7580926418304443, "logps/chosen": -138.08677673339844, "logps/rejected": -189.32632446289062, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -3.3030920028686523, "rewards/margins": 5.568613052368164, "rewards/rejected": -8.871705055236816, "step": 2412 }, { "epoch": 3.87, "learning_rate": 3.164883075703527e-07, "logits/chosen": -1.5582382678985596, "logits/rejected": -1.52957022190094, "logps/chosen": -84.17105865478516, "logps/rejected": -192.63201904296875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.3845877647399902, "rewards/margins": 7.85618782043457, "rewards/rejected": -10.240775108337402, "step": 2413 }, { "epoch": 3.87, "learning_rate": 3.163892191835117e-07, "logits/chosen": -1.5962769985198975, "logits/rejected": -1.613531231880188, "logps/chosen": -98.41378784179688, "logps/rejected": -196.9261932373047, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -2.0163331031799316, "rewards/margins": 9.025720596313477, "rewards/rejected": -11.04205322265625, "step": 2414 }, { "epoch": 3.88, "learning_rate": 3.162901307966706e-07, "logits/chosen": -1.6582759618759155, "logits/rejected": -1.5894577503204346, "logps/chosen": -111.92469787597656, "logps/rejected": -187.90162658691406, "loss": 0.0361, "rewards/accuracies": 0.75, "rewards/chosen": -3.4613871574401855, "rewards/margins": 7.6016998291015625, "rewards/rejected": -11.06308650970459, "step": 2415 }, { "epoch": 3.88, "learning_rate": 3.161910424098296e-07, "logits/chosen": -1.8397626876831055, "logits/rejected": -1.8217613697052002, "logps/chosen": -91.76139068603516, "logps/rejected": -151.5057373046875, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -1.956761360168457, "rewards/margins": 5.563185214996338, "rewards/rejected": -7.519946575164795, "step": 2416 }, { "epoch": 3.88, "learning_rate": 3.160919540229885e-07, "logits/chosen": -1.671904444694519, "logits/rejected": -1.6388635635375977, "logps/chosen": -125.98857879638672, "logps/rejected": -209.8167724609375, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -4.444940090179443, "rewards/margins": 8.21549129486084, "rewards/rejected": -12.660431861877441, "step": 2417 }, { "epoch": 3.88, "learning_rate": 3.159928656361474e-07, "logits/chosen": -1.7634985446929932, "logits/rejected": -1.747597098350525, "logps/chosen": -155.8622589111328, "logps/rejected": -228.66717529296875, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": -4.420461654663086, "rewards/margins": 7.944371700286865, "rewards/rejected": -12.36483383178711, "step": 2418 }, { "epoch": 3.88, "learning_rate": 3.1589377724930635e-07, "logits/chosen": -1.8033969402313232, "logits/rejected": -1.7551662921905518, "logps/chosen": -125.44935607910156, "logps/rejected": -194.17868041992188, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -3.5067336559295654, "rewards/margins": 8.15127182006836, "rewards/rejected": -11.658005714416504, "step": 2419 }, { "epoch": 3.88, "learning_rate": 3.157946888624653e-07, "logits/chosen": -1.543353796005249, "logits/rejected": -1.6781220436096191, "logps/chosen": -101.99571228027344, "logps/rejected": -217.25936889648438, "loss": 0.075, "rewards/accuracies": 1.0, "rewards/chosen": -2.4381635189056396, "rewards/margins": 7.805092811584473, "rewards/rejected": -10.243256568908691, "step": 2420 }, { "epoch": 3.89, "learning_rate": 3.156956004756243e-07, "logits/chosen": -1.7035565376281738, "logits/rejected": -1.5942926406860352, "logps/chosen": -109.42706298828125, "logps/rejected": -146.5576629638672, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -2.7436060905456543, "rewards/margins": 5.860665321350098, "rewards/rejected": -8.60427188873291, "step": 2421 }, { "epoch": 3.89, "learning_rate": 3.155965120887832e-07, "logits/chosen": -1.6925337314605713, "logits/rejected": -1.6763392686843872, "logps/chosen": -132.4579620361328, "logps/rejected": -146.0277862548828, "loss": 0.1227, "rewards/accuracies": 0.75, "rewards/chosen": -3.8759727478027344, "rewards/margins": 2.685601234436035, "rewards/rejected": -6.5615739822387695, "step": 2422 }, { "epoch": 3.89, "learning_rate": 3.154974237019421e-07, "logits/chosen": -1.609245777130127, "logits/rejected": -1.595859169960022, "logps/chosen": -116.06646728515625, "logps/rejected": -199.58837890625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.944540500640869, "rewards/margins": 7.1418609619140625, "rewards/rejected": -10.08640193939209, "step": 2423 }, { "epoch": 3.89, "learning_rate": 3.1539833531510105e-07, "logits/chosen": -1.7667548656463623, "logits/rejected": -1.790874719619751, "logps/chosen": -104.67604064941406, "logps/rejected": -163.30999755859375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.7231812477111816, "rewards/margins": 4.874203681945801, "rewards/rejected": -7.597385406494141, "step": 2424 }, { "epoch": 3.89, "learning_rate": 3.1529924692826e-07, "logits/chosen": -1.6431548595428467, "logits/rejected": -1.65325927734375, "logps/chosen": -123.75814819335938, "logps/rejected": -229.10060119628906, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.299292802810669, "rewards/margins": 11.158443450927734, "rewards/rejected": -13.457735061645508, "step": 2425 }, { "epoch": 3.89, "learning_rate": 3.1520015854141897e-07, "logits/chosen": -1.630470633506775, "logits/rejected": -1.7551894187927246, "logps/chosen": -129.8077850341797, "logps/rejected": -240.2442626953125, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -4.040369987487793, "rewards/margins": 8.132010459899902, "rewards/rejected": -12.172380447387695, "step": 2426 }, { "epoch": 3.9, "learning_rate": 3.151010701545779e-07, "logits/chosen": -1.7194890975952148, "logits/rejected": -1.8156936168670654, "logps/chosen": -125.35464477539062, "logps/rejected": -221.14886474609375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.378241539001465, "rewards/margins": 7.97416353225708, "rewards/rejected": -12.352405548095703, "step": 2427 }, { "epoch": 3.9, "learning_rate": 3.150019817677368e-07, "logits/chosen": -1.606913685798645, "logits/rejected": -1.6412755250930786, "logps/chosen": -88.26358032226562, "logps/rejected": -188.1614227294922, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -2.7785019874572754, "rewards/margins": 9.391172409057617, "rewards/rejected": -12.16967487335205, "step": 2428 }, { "epoch": 3.9, "learning_rate": 3.1490289338089574e-07, "logits/chosen": -1.6635327339172363, "logits/rejected": -1.6889121532440186, "logps/chosen": -119.38616943359375, "logps/rejected": -192.55941772460938, "loss": 0.0822, "rewards/accuracies": 1.0, "rewards/chosen": -3.971400737762451, "rewards/margins": 7.428398609161377, "rewards/rejected": -11.399799346923828, "step": 2429 }, { "epoch": 3.9, "learning_rate": 3.148038049940547e-07, "logits/chosen": -1.522049903869629, "logits/rejected": -1.67695951461792, "logps/chosen": -149.18856811523438, "logps/rejected": -261.0108642578125, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -5.789420127868652, "rewards/margins": 9.74087905883789, "rewards/rejected": -15.530299186706543, "step": 2430 }, { "epoch": 3.9, "learning_rate": 3.147047166072136e-07, "logits/chosen": -1.5759978294372559, "logits/rejected": -1.5609081983566284, "logps/chosen": -92.87998962402344, "logps/rejected": -138.20327758789062, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -1.9554953575134277, "rewards/margins": 4.999082088470459, "rewards/rejected": -6.954577445983887, "step": 2431 }, { "epoch": 3.9, "learning_rate": 3.1460562822037257e-07, "logits/chosen": -1.6905136108398438, "logits/rejected": -1.6488957405090332, "logps/chosen": -99.2413558959961, "logps/rejected": -142.98231506347656, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -2.011648416519165, "rewards/margins": 4.788084983825684, "rewards/rejected": -6.7997331619262695, "step": 2432 }, { "epoch": 3.91, "learning_rate": 3.145065398335315e-07, "logits/chosen": -1.65487802028656, "logits/rejected": -1.7152732610702515, "logps/chosen": -150.5223388671875, "logps/rejected": -248.24354553222656, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -4.168529033660889, "rewards/margins": 7.358080863952637, "rewards/rejected": -11.526610374450684, "step": 2433 }, { "epoch": 3.91, "learning_rate": 3.1440745144669043e-07, "logits/chosen": -1.6715703010559082, "logits/rejected": -1.6871179342269897, "logps/chosen": -102.06703186035156, "logps/rejected": -209.58905029296875, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.3578950762748718, "rewards/margins": 12.237202644348145, "rewards/rejected": -12.595097541809082, "step": 2434 }, { "epoch": 3.91, "learning_rate": 3.143083630598494e-07, "logits/chosen": -1.440708041191101, "logits/rejected": -1.5244152545928955, "logps/chosen": -119.58341979980469, "logps/rejected": -207.9298095703125, "loss": 0.0624, "rewards/accuracies": 1.0, "rewards/chosen": -3.5864901542663574, "rewards/margins": 6.190683364868164, "rewards/rejected": -9.777173042297363, "step": 2435 }, { "epoch": 3.91, "learning_rate": 3.142092746730083e-07, "logits/chosen": -1.6872646808624268, "logits/rejected": -1.6220782995224, "logps/chosen": -110.09986877441406, "logps/rejected": -186.00332641601562, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -3.355742931365967, "rewards/margins": 9.229270935058594, "rewards/rejected": -12.585014343261719, "step": 2436 }, { "epoch": 3.91, "learning_rate": 3.1411018628616726e-07, "logits/chosen": -1.753859043121338, "logits/rejected": -1.7516181468963623, "logps/chosen": -133.18873596191406, "logps/rejected": -204.59027099609375, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -4.848236560821533, "rewards/margins": 6.3958258628845215, "rewards/rejected": -11.244062423706055, "step": 2437 }, { "epoch": 3.91, "learning_rate": 3.1401109789932617e-07, "logits/chosen": -1.6625372171401978, "logits/rejected": -1.650441288948059, "logps/chosen": -110.43911743164062, "logps/rejected": -194.53773498535156, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -2.168846845626831, "rewards/margins": 8.868406295776367, "rewards/rejected": -11.037252426147461, "step": 2438 }, { "epoch": 3.91, "learning_rate": 3.1391200951248513e-07, "logits/chosen": -1.4720935821533203, "logits/rejected": -1.5541760921478271, "logps/chosen": -89.5753173828125, "logps/rejected": -195.80728149414062, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -2.00138258934021, "rewards/margins": 9.80927562713623, "rewards/rejected": -11.81065845489502, "step": 2439 }, { "epoch": 3.92, "learning_rate": 3.1381292112564403e-07, "logits/chosen": -1.6719281673431396, "logits/rejected": -1.6534889936447144, "logps/chosen": -116.26509857177734, "logps/rejected": -188.3282470703125, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": -3.4362869262695312, "rewards/margins": 7.416840553283691, "rewards/rejected": -10.853127479553223, "step": 2440 }, { "epoch": 3.92, "learning_rate": 3.13713832738803e-07, "logits/chosen": -1.7264108657836914, "logits/rejected": -1.6836025714874268, "logps/chosen": -129.27008056640625, "logps/rejected": -184.9966583251953, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -3.797067165374756, "rewards/margins": 5.004114627838135, "rewards/rejected": -8.80118179321289, "step": 2441 }, { "epoch": 3.92, "learning_rate": 3.1361474435196195e-07, "logits/chosen": -1.7945678234100342, "logits/rejected": -1.785756230354309, "logps/chosen": -109.13490295410156, "logps/rejected": -221.2981414794922, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.3065757751464844, "rewards/margins": 10.880393981933594, "rewards/rejected": -13.186969757080078, "step": 2442 }, { "epoch": 3.92, "learning_rate": 3.1351565596512086e-07, "logits/chosen": -1.708480715751648, "logits/rejected": -1.664196252822876, "logps/chosen": -149.47067260742188, "logps/rejected": -216.32342529296875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.7134270668029785, "rewards/margins": 6.618880271911621, "rewards/rejected": -11.332306861877441, "step": 2443 }, { "epoch": 3.92, "learning_rate": 3.134165675782798e-07, "logits/chosen": -1.6031821966171265, "logits/rejected": -1.552596926689148, "logps/chosen": -135.0286865234375, "logps/rejected": -192.23204040527344, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.3780651092529297, "rewards/margins": 7.481261253356934, "rewards/rejected": -10.859326362609863, "step": 2444 }, { "epoch": 3.92, "learning_rate": 3.1331747919143873e-07, "logits/chosen": -1.6226836442947388, "logits/rejected": -1.6078803539276123, "logps/chosen": -132.01902770996094, "logps/rejected": -198.5802001953125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -4.22147798538208, "rewards/margins": 6.882156848907471, "rewards/rejected": -11.10363483428955, "step": 2445 }, { "epoch": 3.93, "learning_rate": 3.132183908045977e-07, "logits/chosen": -1.6077642440795898, "logits/rejected": -1.8075286149978638, "logps/chosen": -98.47418212890625, "logps/rejected": -247.69964599609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.258549451828003, "rewards/margins": 10.925354957580566, "rewards/rejected": -14.183904647827148, "step": 2446 }, { "epoch": 3.93, "learning_rate": 3.1311930241775665e-07, "logits/chosen": -1.5728838443756104, "logits/rejected": -1.4860873222351074, "logps/chosen": -124.60508728027344, "logps/rejected": -193.56524658203125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.9942266941070557, "rewards/margins": 8.412568092346191, "rewards/rejected": -11.406794548034668, "step": 2447 }, { "epoch": 3.93, "learning_rate": 3.1302021403091555e-07, "logits/chosen": -1.6562812328338623, "logits/rejected": -1.6881718635559082, "logps/chosen": -113.64115142822266, "logps/rejected": -195.18594360351562, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -3.3704047203063965, "rewards/margins": 7.333744049072266, "rewards/rejected": -10.70414924621582, "step": 2448 }, { "epoch": 3.93, "learning_rate": 3.129211256440745e-07, "logits/chosen": -1.5184731483459473, "logits/rejected": -1.5802011489868164, "logps/chosen": -76.17317962646484, "logps/rejected": -223.9153594970703, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.3385045528411865, "rewards/margins": 11.42319107055664, "rewards/rejected": -12.761695861816406, "step": 2449 }, { "epoch": 3.93, "learning_rate": 3.128220372572334e-07, "logits/chosen": -1.6177641153335571, "logits/rejected": -1.5687041282653809, "logps/chosen": -89.2684326171875, "logps/rejected": -168.19163513183594, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.2813916206359863, "rewards/margins": 7.792001724243164, "rewards/rejected": -10.073392868041992, "step": 2450 }, { "epoch": 3.93, "learning_rate": 3.127229488703924e-07, "logits/chosen": -1.6622493267059326, "logits/rejected": -1.7670118808746338, "logps/chosen": -111.56812286376953, "logps/rejected": -242.7255859375, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": -3.558161735534668, "rewards/margins": 9.724294662475586, "rewards/rejected": -13.282455444335938, "step": 2451 }, { "epoch": 3.94, "learning_rate": 3.1262386048355134e-07, "logits/chosen": -1.6927717924118042, "logits/rejected": -1.6943306922912598, "logps/chosen": -116.58832550048828, "logps/rejected": -190.60525512695312, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.0453872680664062, "rewards/margins": 7.068394660949707, "rewards/rejected": -9.113781929016113, "step": 2452 }, { "epoch": 3.94, "learning_rate": 3.1252477209671025e-07, "logits/chosen": -1.5557587146759033, "logits/rejected": -1.5377393960952759, "logps/chosen": -92.35574340820312, "logps/rejected": -169.23231506347656, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": -2.37526273727417, "rewards/margins": 8.486651420593262, "rewards/rejected": -10.861913681030273, "step": 2453 }, { "epoch": 3.94, "learning_rate": 3.124256837098692e-07, "logits/chosen": -1.7196012735366821, "logits/rejected": -1.7672115564346313, "logps/chosen": -88.59476470947266, "logps/rejected": -226.37001037597656, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -2.635707378387451, "rewards/margins": 12.937529563903809, "rewards/rejected": -15.573236465454102, "step": 2454 }, { "epoch": 3.94, "learning_rate": 3.123265953230281e-07, "logits/chosen": -1.7803711891174316, "logits/rejected": -1.7831135988235474, "logps/chosen": -105.15370178222656, "logps/rejected": -233.0884246826172, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -2.3229150772094727, "rewards/margins": 12.12285041809082, "rewards/rejected": -14.44576644897461, "step": 2455 }, { "epoch": 3.94, "learning_rate": 3.1222750693618707e-07, "logits/chosen": -1.6060926914215088, "logits/rejected": -1.6118249893188477, "logps/chosen": -116.92308044433594, "logps/rejected": -154.93817138671875, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -4.118552207946777, "rewards/margins": 4.979379177093506, "rewards/rejected": -9.097931861877441, "step": 2456 }, { "epoch": 3.94, "learning_rate": 3.1212841854934603e-07, "logits/chosen": -1.7049835920333862, "logits/rejected": -1.7385197877883911, "logps/chosen": -114.18585205078125, "logps/rejected": -205.79281616210938, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -3.688788414001465, "rewards/margins": 8.060035705566406, "rewards/rejected": -11.748824119567871, "step": 2457 }, { "epoch": 3.95, "learning_rate": 3.1202933016250494e-07, "logits/chosen": -1.6754369735717773, "logits/rejected": -1.7190947532653809, "logps/chosen": -95.128173828125, "logps/rejected": -186.01669311523438, "loss": 0.0819, "rewards/accuracies": 1.0, "rewards/chosen": -2.3451223373413086, "rewards/margins": 7.96246337890625, "rewards/rejected": -10.307586669921875, "step": 2458 }, { "epoch": 3.95, "learning_rate": 3.119302417756639e-07, "logits/chosen": -1.6589767932891846, "logits/rejected": -1.679428219795227, "logps/chosen": -125.38641357421875, "logps/rejected": -165.5476837158203, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -3.9961771965026855, "rewards/margins": 5.228609085083008, "rewards/rejected": -9.224785804748535, "step": 2459 }, { "epoch": 3.95, "learning_rate": 3.118311533888228e-07, "logits/chosen": -1.6189472675323486, "logits/rejected": -1.5783114433288574, "logps/chosen": -120.13316345214844, "logps/rejected": -213.81002807617188, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.497579574584961, "rewards/margins": 8.854448318481445, "rewards/rejected": -13.352027893066406, "step": 2460 }, { "epoch": 3.95, "learning_rate": 3.117320650019817e-07, "logits/chosen": -1.8136708736419678, "logits/rejected": -1.8605544567108154, "logps/chosen": -134.48548889160156, "logps/rejected": -229.24208068847656, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -3.3923707008361816, "rewards/margins": 9.32573127746582, "rewards/rejected": -12.71810245513916, "step": 2461 }, { "epoch": 3.95, "learning_rate": 3.116329766151407e-07, "logits/chosen": -1.6419748067855835, "logits/rejected": -1.7006802558898926, "logps/chosen": -101.52938079833984, "logps/rejected": -216.19224548339844, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.6780807971954346, "rewards/margins": 10.293095588684082, "rewards/rejected": -12.971175193786621, "step": 2462 }, { "epoch": 3.95, "learning_rate": 3.1153388822829963e-07, "logits/chosen": -1.6243489980697632, "logits/rejected": -1.7193981409072876, "logps/chosen": -78.58547973632812, "logps/rejected": -191.18389892578125, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -1.1677660942077637, "rewards/margins": 10.378124237060547, "rewards/rejected": -11.545890808105469, "step": 2463 }, { "epoch": 3.96, "learning_rate": 3.1143479984145854e-07, "logits/chosen": -1.73092520236969, "logits/rejected": -1.646431803703308, "logps/chosen": -127.85552215576172, "logps/rejected": -212.22242736816406, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -2.8785150051116943, "rewards/margins": 10.299612045288086, "rewards/rejected": -13.17812728881836, "step": 2464 }, { "epoch": 3.96, "learning_rate": 3.113357114546175e-07, "logits/chosen": -1.534735918045044, "logits/rejected": -1.514582872390747, "logps/chosen": -110.49726867675781, "logps/rejected": -209.98178100585938, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": -2.5886037349700928, "rewards/margins": 9.025491714477539, "rewards/rejected": -11.614095687866211, "step": 2465 }, { "epoch": 3.96, "learning_rate": 3.112366230677764e-07, "logits/chosen": -1.5980184078216553, "logits/rejected": -1.7133891582489014, "logps/chosen": -112.80120849609375, "logps/rejected": -195.59133911132812, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -3.245931386947632, "rewards/margins": 6.2734270095825195, "rewards/rejected": -9.51935863494873, "step": 2466 }, { "epoch": 3.96, "learning_rate": 3.111375346809354e-07, "logits/chosen": -1.7193608283996582, "logits/rejected": -1.6247200965881348, "logps/chosen": -125.89959716796875, "logps/rejected": -194.40093994140625, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -2.3265254497528076, "rewards/margins": 8.288331985473633, "rewards/rejected": -10.614856719970703, "step": 2467 }, { "epoch": 3.96, "learning_rate": 3.110384462940943e-07, "logits/chosen": -1.5964524745941162, "logits/rejected": -1.5829105377197266, "logps/chosen": -160.55581665039062, "logps/rejected": -263.0168762207031, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -3.3400487899780273, "rewards/margins": 10.445079803466797, "rewards/rejected": -13.785127639770508, "step": 2468 }, { "epoch": 3.96, "learning_rate": 3.1093935790725323e-07, "logits/chosen": -1.7840162515640259, "logits/rejected": -1.729468822479248, "logps/chosen": -157.90675354003906, "logps/rejected": -183.84255981445312, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -3.7088396549224854, "rewards/margins": 5.1570916175842285, "rewards/rejected": -8.865931510925293, "step": 2469 }, { "epoch": 3.96, "learning_rate": 3.108402695204122e-07, "logits/chosen": -1.7639405727386475, "logits/rejected": -1.6769871711730957, "logps/chosen": -120.47565460205078, "logps/rejected": -197.19239807128906, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -3.1731252670288086, "rewards/margins": 10.451326370239258, "rewards/rejected": -13.624451637268066, "step": 2470 }, { "epoch": 3.97, "learning_rate": 3.107411811335711e-07, "logits/chosen": -1.7406063079833984, "logits/rejected": -1.876851201057434, "logps/chosen": -101.6218490600586, "logps/rejected": -201.0334930419922, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.444594383239746, "rewards/margins": 8.630941390991211, "rewards/rejected": -11.07553482055664, "step": 2471 }, { "epoch": 3.97, "learning_rate": 3.106420927467301e-07, "logits/chosen": -1.5825328826904297, "logits/rejected": -1.559051275253296, "logps/chosen": -121.26351165771484, "logps/rejected": -162.42938232421875, "loss": 0.1091, "rewards/accuracies": 0.75, "rewards/chosen": -2.874772548675537, "rewards/margins": 4.640717506408691, "rewards/rejected": -7.5154900550842285, "step": 2472 }, { "epoch": 3.97, "learning_rate": 3.10543004359889e-07, "logits/chosen": -1.6133747100830078, "logits/rejected": -1.5922369956970215, "logps/chosen": -106.62471008300781, "logps/rejected": -200.8203887939453, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.5301971435546875, "rewards/margins": 7.611550807952881, "rewards/rejected": -11.141748428344727, "step": 2473 }, { "epoch": 3.97, "learning_rate": 3.104439159730479e-07, "logits/chosen": -1.8177268505096436, "logits/rejected": -1.7803781032562256, "logps/chosen": -115.55880737304688, "logps/rejected": -197.11056518554688, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.6015684604644775, "rewards/margins": 9.475107192993164, "rewards/rejected": -13.076675415039062, "step": 2474 }, { "epoch": 3.97, "learning_rate": 3.103448275862069e-07, "logits/chosen": -1.5184355974197388, "logits/rejected": -1.6421277523040771, "logps/chosen": -95.86965942382812, "logps/rejected": -188.05699157714844, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -3.6089060306549072, "rewards/margins": 6.7908430099487305, "rewards/rejected": -10.399748802185059, "step": 2475 }, { "epoch": 3.97, "learning_rate": 3.102457391993658e-07, "logits/chosen": -1.582395076751709, "logits/rejected": -1.5192985534667969, "logps/chosen": -117.06130981445312, "logps/rejected": -197.36050415039062, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -3.6889541149139404, "rewards/margins": 9.835277557373047, "rewards/rejected": -13.52423095703125, "step": 2476 }, { "epoch": 3.98, "learning_rate": 3.101466508125248e-07, "logits/chosen": -1.6509050130844116, "logits/rejected": -1.5921218395233154, "logps/chosen": -179.4667510986328, "logps/rejected": -206.12167358398438, "loss": 0.0655, "rewards/accuracies": 0.75, "rewards/chosen": -6.0866594314575195, "rewards/margins": 5.619956970214844, "rewards/rejected": -11.706616401672363, "step": 2477 }, { "epoch": 3.98, "learning_rate": 3.100475624256837e-07, "logits/chosen": -1.6941401958465576, "logits/rejected": -1.6584988832473755, "logps/chosen": -139.13841247558594, "logps/rejected": -207.24456787109375, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -5.4464640617370605, "rewards/margins": 7.151640892028809, "rewards/rejected": -12.598104476928711, "step": 2478 }, { "epoch": 3.98, "learning_rate": 3.099484740388426e-07, "logits/chosen": -1.7294784784317017, "logits/rejected": -1.7791050672531128, "logps/chosen": -118.61955261230469, "logps/rejected": -187.35745239257812, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -2.437647581100464, "rewards/margins": 6.815385341644287, "rewards/rejected": -9.253032684326172, "step": 2479 }, { "epoch": 3.98, "learning_rate": 3.098493856520016e-07, "logits/chosen": -1.7609450817108154, "logits/rejected": -1.7257410287857056, "logps/chosen": -129.36959838867188, "logps/rejected": -175.30084228515625, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -2.990910768508911, "rewards/margins": 6.790219783782959, "rewards/rejected": -9.78113079071045, "step": 2480 }, { "epoch": 3.98, "learning_rate": 3.097502972651605e-07, "logits/chosen": -1.6195313930511475, "logits/rejected": -1.5821988582611084, "logps/chosen": -82.0247802734375, "logps/rejected": -174.9648895263672, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.8905493021011353, "rewards/margins": 9.528448104858398, "rewards/rejected": -11.418996810913086, "step": 2481 }, { "epoch": 3.98, "learning_rate": 3.0965120887831944e-07, "logits/chosen": -1.475446105003357, "logits/rejected": -1.5017861127853394, "logps/chosen": -114.30125427246094, "logps/rejected": -196.71163940429688, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -3.1271605491638184, "rewards/margins": 7.872254371643066, "rewards/rejected": -10.999414443969727, "step": 2482 }, { "epoch": 3.99, "learning_rate": 3.095521204914784e-07, "logits/chosen": -1.646718978881836, "logits/rejected": -1.640291690826416, "logps/chosen": -152.05514526367188, "logps/rejected": -179.84446716308594, "loss": 0.1014, "rewards/accuracies": 0.75, "rewards/chosen": -5.914675712585449, "rewards/margins": 3.899415969848633, "rewards/rejected": -9.814091682434082, "step": 2483 }, { "epoch": 3.99, "learning_rate": 3.094530321046373e-07, "logits/chosen": -1.6995790004730225, "logits/rejected": -1.7138745784759521, "logps/chosen": -102.91109466552734, "logps/rejected": -197.96458435058594, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -3.323108434677124, "rewards/margins": 8.290371894836426, "rewards/rejected": -11.613479614257812, "step": 2484 }, { "epoch": 3.99, "learning_rate": 3.0935394371779627e-07, "logits/chosen": -1.5202114582061768, "logits/rejected": -1.5061296224594116, "logps/chosen": -114.546142578125, "logps/rejected": -204.1097412109375, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -3.161726236343384, "rewards/margins": 9.21014404296875, "rewards/rejected": -12.371870040893555, "step": 2485 }, { "epoch": 3.99, "learning_rate": 3.092548553309552e-07, "logits/chosen": -1.584567666053772, "logits/rejected": -1.721338152885437, "logps/chosen": -106.00804138183594, "logps/rejected": -220.09518432617188, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.6973540782928467, "rewards/margins": 9.451199531555176, "rewards/rejected": -12.148552894592285, "step": 2486 }, { "epoch": 3.99, "learning_rate": 3.0915576694411414e-07, "logits/chosen": -1.6046621799468994, "logits/rejected": -1.5530221462249756, "logps/chosen": -84.0755844116211, "logps/rejected": -132.17782592773438, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -2.125699043273926, "rewards/margins": 5.624811172485352, "rewards/rejected": -7.7505106925964355, "step": 2487 }, { "epoch": 3.99, "learning_rate": 3.090566785572731e-07, "logits/chosen": -1.7677592039108276, "logits/rejected": -1.6597754955291748, "logps/chosen": -123.61154174804688, "logps/rejected": -169.79759216308594, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.175506591796875, "rewards/margins": 6.205968379974365, "rewards/rejected": -9.381475448608398, "step": 2488 }, { "epoch": 4.0, "learning_rate": 3.08957590170432e-07, "logits/chosen": -1.6724743843078613, "logits/rejected": -1.678788661956787, "logps/chosen": -100.08451080322266, "logps/rejected": -156.104736328125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -2.8735365867614746, "rewards/margins": 5.581778526306152, "rewards/rejected": -8.455313682556152, "step": 2489 }, { "epoch": 4.0, "learning_rate": 3.0885850178359096e-07, "logits/chosen": -1.762566328048706, "logits/rejected": -1.6823458671569824, "logps/chosen": -133.17758178710938, "logps/rejected": -243.6854248046875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -4.439713478088379, "rewards/margins": 10.402105331420898, "rewards/rejected": -14.841818809509277, "step": 2490 }, { "epoch": 4.0, "learning_rate": 3.0875941339674987e-07, "logits/chosen": -1.453844666481018, "logits/rejected": -1.6031880378723145, "logps/chosen": -95.93086242675781, "logps/rejected": -197.94921875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -3.2596800327301025, "rewards/margins": 8.086135864257812, "rewards/rejected": -11.345815658569336, "step": 2491 }, { "epoch": 4.0, "learning_rate": 3.0866032500990883e-07, "logits/chosen": -1.6306859254837036, "logits/rejected": -1.6718180179595947, "logps/chosen": -112.61123657226562, "logps/rejected": -188.1778564453125, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -2.692707061767578, "rewards/margins": 7.12523078918457, "rewards/rejected": -9.817937850952148, "step": 2492 }, { "epoch": 4.0, "learning_rate": 3.085612366230678e-07, "logits/chosen": -1.72189199924469, "logits/rejected": -1.7278311252593994, "logps/chosen": -111.62344360351562, "logps/rejected": -198.2715606689453, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5825027227401733, "rewards/margins": 8.565530776977539, "rewards/rejected": -10.14803409576416, "step": 2493 }, { "epoch": 4.0, "learning_rate": 3.084621482362267e-07, "logits/chosen": -1.66962730884552, "logits/rejected": -1.698232650756836, "logps/chosen": -107.85733795166016, "logps/rejected": -216.23165893554688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.213002920150757, "rewards/margins": 9.97922134399414, "rewards/rejected": -12.19222354888916, "step": 2494 }, { "epoch": 4.0, "learning_rate": 3.0836305984938566e-07, "logits/chosen": -1.5406492948532104, "logits/rejected": -1.5379347801208496, "logps/chosen": -127.5925521850586, "logps/rejected": -200.87403869628906, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.7536542415618896, "rewards/margins": 8.005014419555664, "rewards/rejected": -11.758668899536133, "step": 2495 }, { "epoch": 4.01, "learning_rate": 3.0826397146254456e-07, "logits/chosen": -1.7422574758529663, "logits/rejected": -1.69615638256073, "logps/chosen": -136.8681640625, "logps/rejected": -218.6101837158203, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -4.528923511505127, "rewards/margins": 10.528447151184082, "rewards/rejected": -15.057371139526367, "step": 2496 }, { "epoch": 4.01, "learning_rate": 3.081648830757035e-07, "logits/chosen": -1.842346429824829, "logits/rejected": -1.825121521949768, "logps/chosen": -145.908203125, "logps/rejected": -221.43333435058594, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.2306928634643555, "rewards/margins": 8.66795825958252, "rewards/rejected": -12.898651123046875, "step": 2497 }, { "epoch": 4.01, "learning_rate": 3.080657946888625e-07, "logits/chosen": -1.5246798992156982, "logits/rejected": -1.5532732009887695, "logps/chosen": -106.9865951538086, "logps/rejected": -211.9129638671875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.9941115379333496, "rewards/margins": 8.991926193237305, "rewards/rejected": -11.986037254333496, "step": 2498 }, { "epoch": 4.01, "learning_rate": 3.079667063020214e-07, "logits/chosen": -1.6735752820968628, "logits/rejected": -1.6546440124511719, "logps/chosen": -138.49008178710938, "logps/rejected": -231.6451416015625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.962188243865967, "rewards/margins": 8.148418426513672, "rewards/rejected": -12.110607147216797, "step": 2499 }, { "epoch": 4.01, "learning_rate": 3.0786761791518035e-07, "logits/chosen": -1.8160932064056396, "logits/rejected": -1.8021754026412964, "logps/chosen": -149.5265350341797, "logps/rejected": -199.94308471679688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.741860866546631, "rewards/margins": 7.704150199890137, "rewards/rejected": -12.44601058959961, "step": 2500 }, { "epoch": 4.01, "learning_rate": 3.0776852952833926e-07, "logits/chosen": -1.6432619094848633, "logits/rejected": -1.6990611553192139, "logps/chosen": -111.1497802734375, "logps/rejected": -227.98007202148438, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.8291747570037842, "rewards/margins": 10.840398788452148, "rewards/rejected": -12.669573783874512, "step": 2501 }, { "epoch": 4.02, "learning_rate": 3.0766944114149816e-07, "logits/chosen": -1.659531831741333, "logits/rejected": -1.6324594020843506, "logps/chosen": -116.5029525756836, "logps/rejected": -190.52862548828125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.9654483795166016, "rewards/margins": 8.976431846618652, "rewards/rejected": -11.941880226135254, "step": 2502 }, { "epoch": 4.02, "learning_rate": 3.075703527546571e-07, "logits/chosen": -1.5678319931030273, "logits/rejected": -1.6212395429611206, "logps/chosen": -127.81037902832031, "logps/rejected": -216.714599609375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.440348148345947, "rewards/margins": 9.187931060791016, "rewards/rejected": -13.628279685974121, "step": 2503 }, { "epoch": 4.02, "learning_rate": 3.074712643678161e-07, "logits/chosen": -1.5374960899353027, "logits/rejected": -1.654158353805542, "logps/chosen": -93.89268493652344, "logps/rejected": -195.35914611816406, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -3.523164987564087, "rewards/margins": 7.022997856140137, "rewards/rejected": -10.546163558959961, "step": 2504 }, { "epoch": 4.02, "learning_rate": 3.0737217598097504e-07, "logits/chosen": -1.6651911735534668, "logits/rejected": -1.695858359336853, "logps/chosen": -147.72576904296875, "logps/rejected": -205.76388549804688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -5.914487838745117, "rewards/margins": 5.898893356323242, "rewards/rejected": -11.81338119506836, "step": 2505 }, { "epoch": 4.02, "learning_rate": 3.0727308759413395e-07, "logits/chosen": -1.6496708393096924, "logits/rejected": -1.627266764640808, "logps/chosen": -122.88634490966797, "logps/rejected": -203.31570434570312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.353158473968506, "rewards/margins": 7.399735927581787, "rewards/rejected": -9.752894401550293, "step": 2506 }, { "epoch": 4.02, "learning_rate": 3.0717399920729286e-07, "logits/chosen": -1.657367467880249, "logits/rejected": -1.6772427558898926, "logps/chosen": -141.00572204589844, "logps/rejected": -207.79397583007812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.019824028015137, "rewards/margins": 7.1086812019348145, "rewards/rejected": -12.128504753112793, "step": 2507 }, { "epoch": 4.03, "learning_rate": 3.070749108204518e-07, "logits/chosen": -1.492790699005127, "logits/rejected": -1.6124016046524048, "logps/chosen": -84.41968536376953, "logps/rejected": -184.6190643310547, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.242992401123047, "rewards/margins": 6.907172203063965, "rewards/rejected": -9.150163650512695, "step": 2508 }, { "epoch": 4.03, "learning_rate": 3.069758224336108e-07, "logits/chosen": -1.6366609334945679, "logits/rejected": -1.5938897132873535, "logps/chosen": -120.18865966796875, "logps/rejected": -209.8963623046875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.2974164485931396, "rewards/margins": 7.387589931488037, "rewards/rejected": -9.685007095336914, "step": 2509 }, { "epoch": 4.03, "learning_rate": 3.0687673404676974e-07, "logits/chosen": -1.6669700145721436, "logits/rejected": -1.693543791770935, "logps/chosen": -117.47119903564453, "logps/rejected": -219.45687866210938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.7207865715026855, "rewards/margins": 9.309667587280273, "rewards/rejected": -14.030454635620117, "step": 2510 }, { "epoch": 4.03, "learning_rate": 3.0677764565992864e-07, "logits/chosen": -1.686862826347351, "logits/rejected": -1.8006712198257446, "logps/chosen": -108.82182312011719, "logps/rejected": -232.6983184814453, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0155394077301025, "rewards/margins": 11.246267318725586, "rewards/rejected": -14.26180648803711, "step": 2511 }, { "epoch": 4.03, "learning_rate": 3.0667855727308755e-07, "logits/chosen": -1.659718632698059, "logits/rejected": -1.6852625608444214, "logps/chosen": -117.36539459228516, "logps/rejected": -228.60592651367188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.8146252632141113, "rewards/margins": 9.800826072692871, "rewards/rejected": -12.61545181274414, "step": 2512 }, { "epoch": 4.03, "learning_rate": 3.065794688862465e-07, "logits/chosen": -1.6858441829681396, "logits/rejected": -1.6066285371780396, "logps/chosen": -141.23336791992188, "logps/rejected": -219.733154296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.473121166229248, "rewards/margins": 9.888525009155273, "rewards/rejected": -14.361644744873047, "step": 2513 }, { "epoch": 4.04, "learning_rate": 3.0648038049940547e-07, "logits/chosen": -1.6965941190719604, "logits/rejected": -1.67086660861969, "logps/chosen": -114.09213256835938, "logps/rejected": -214.55226135253906, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.771657705307007, "rewards/margins": 11.059677124023438, "rewards/rejected": -14.831335067749023, "step": 2514 }, { "epoch": 4.04, "learning_rate": 3.0638129211256443e-07, "logits/chosen": -1.6366122961044312, "logits/rejected": -1.6601407527923584, "logps/chosen": -169.2470245361328, "logps/rejected": -258.90533447265625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -7.989646911621094, "rewards/margins": 9.243654251098633, "rewards/rejected": -17.233301162719727, "step": 2515 }, { "epoch": 4.04, "learning_rate": 3.0628220372572333e-07, "logits/chosen": -1.6648001670837402, "logits/rejected": -1.6920074224472046, "logps/chosen": -119.31245422363281, "logps/rejected": -224.4051513671875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.9948253631591797, "rewards/margins": 9.78790283203125, "rewards/rejected": -13.78272819519043, "step": 2516 }, { "epoch": 4.04, "learning_rate": 3.0618311533888224e-07, "logits/chosen": -1.6737489700317383, "logits/rejected": -1.7008857727050781, "logps/chosen": -142.32736206054688, "logps/rejected": -208.51507568359375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.365001678466797, "rewards/margins": 7.777826309204102, "rewards/rejected": -12.142827987670898, "step": 2517 }, { "epoch": 4.04, "learning_rate": 3.060840269520412e-07, "logits/chosen": -1.5795458555221558, "logits/rejected": -1.6330132484436035, "logps/chosen": -127.78146362304688, "logps/rejected": -208.53094482421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.783404588699341, "rewards/margins": 7.175398826599121, "rewards/rejected": -9.958803176879883, "step": 2518 }, { "epoch": 4.04, "learning_rate": 3.0598493856520016e-07, "logits/chosen": -1.5915993452072144, "logits/rejected": -1.5115429162979126, "logps/chosen": -94.33334350585938, "logps/rejected": -171.54515075683594, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.5520060062408447, "rewards/margins": 8.156268119812012, "rewards/rejected": -9.708274841308594, "step": 2519 }, { "epoch": 4.04, "learning_rate": 3.058858501783591e-07, "logits/chosen": -1.5666792392730713, "logits/rejected": -1.5621589422225952, "logps/chosen": -111.59307861328125, "logps/rejected": -227.45492553710938, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -4.135239601135254, "rewards/margins": 9.754109382629395, "rewards/rejected": -13.889348983764648, "step": 2520 }, { "epoch": 4.05, "learning_rate": 3.0578676179151803e-07, "logits/chosen": -1.6642191410064697, "logits/rejected": -1.6133276224136353, "logps/chosen": -137.95394897460938, "logps/rejected": -217.50267028808594, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.247903347015381, "rewards/margins": 8.80392074584961, "rewards/rejected": -14.051824569702148, "step": 2521 }, { "epoch": 4.05, "learning_rate": 3.0568767340467693e-07, "logits/chosen": -1.551016092300415, "logits/rejected": -1.655708909034729, "logps/chosen": -144.3777313232422, "logps/rejected": -258.6844787597656, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.179811000823975, "rewards/margins": 9.069574356079102, "rewards/rejected": -16.249385833740234, "step": 2522 }, { "epoch": 4.05, "learning_rate": 3.055885850178359e-07, "logits/chosen": -1.7133549451828003, "logits/rejected": -1.6551358699798584, "logps/chosen": -68.69253540039062, "logps/rejected": -158.48635864257812, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.8848367929458618, "rewards/margins": 9.9399995803833, "rewards/rejected": -10.824836730957031, "step": 2523 }, { "epoch": 4.05, "learning_rate": 3.054894966309948e-07, "logits/chosen": -1.6241545677185059, "logits/rejected": -1.6490187644958496, "logps/chosen": -118.51455688476562, "logps/rejected": -221.5877685546875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -3.2495946884155273, "rewards/margins": 9.885830879211426, "rewards/rejected": -13.13542652130127, "step": 2524 }, { "epoch": 4.05, "learning_rate": 3.053904082441538e-07, "logits/chosen": -1.5949119329452515, "logits/rejected": -1.5709125995635986, "logps/chosen": -152.59283447265625, "logps/rejected": -259.5760803222656, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.79520320892334, "rewards/margins": 9.761048316955566, "rewards/rejected": -14.556251525878906, "step": 2525 }, { "epoch": 4.05, "learning_rate": 3.052913198573127e-07, "logits/chosen": -1.6660544872283936, "logits/rejected": -1.730514645576477, "logps/chosen": -119.88259887695312, "logps/rejected": -216.29183959960938, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.487823486328125, "rewards/margins": 9.465367317199707, "rewards/rejected": -12.953190803527832, "step": 2526 }, { "epoch": 4.06, "learning_rate": 3.0519223147047163e-07, "logits/chosen": -1.7136340141296387, "logits/rejected": -1.679704189300537, "logps/chosen": -132.6939697265625, "logps/rejected": -217.55227661132812, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -5.650230407714844, "rewards/margins": 8.351633071899414, "rewards/rejected": -14.001862525939941, "step": 2527 }, { "epoch": 4.06, "learning_rate": 3.050931430836306e-07, "logits/chosen": -1.5165984630584717, "logits/rejected": -1.5513417720794678, "logps/chosen": -135.3675994873047, "logps/rejected": -242.787109375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -5.577577590942383, "rewards/margins": 9.986881256103516, "rewards/rejected": -15.564458847045898, "step": 2528 }, { "epoch": 4.06, "learning_rate": 3.049940546967895e-07, "logits/chosen": -1.6378387212753296, "logits/rejected": -1.6323111057281494, "logps/chosen": -143.7834930419922, "logps/rejected": -266.67132568359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.5051000118255615, "rewards/margins": 11.801870346069336, "rewards/rejected": -15.306970596313477, "step": 2529 }, { "epoch": 4.06, "learning_rate": 3.048949663099485e-07, "logits/chosen": -1.5396989583969116, "logits/rejected": -1.5699775218963623, "logps/chosen": -104.88870239257812, "logps/rejected": -198.44334411621094, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.284257650375366, "rewards/margins": 7.72358512878418, "rewards/rejected": -11.007843017578125, "step": 2530 }, { "epoch": 4.06, "learning_rate": 3.047958779231074e-07, "logits/chosen": -1.7101026773452759, "logits/rejected": -1.7011549472808838, "logps/chosen": -150.27099609375, "logps/rejected": -228.33152770996094, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.2168498039245605, "rewards/margins": 7.95550537109375, "rewards/rejected": -13.172356605529785, "step": 2531 }, { "epoch": 4.06, "learning_rate": 3.046967895362663e-07, "logits/chosen": -1.6548919677734375, "logits/rejected": -1.6244680881500244, "logps/chosen": -157.78302001953125, "logps/rejected": -287.2731018066406, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -5.519103050231934, "rewards/margins": 12.98355484008789, "rewards/rejected": -18.50265884399414, "step": 2532 }, { "epoch": 4.07, "learning_rate": 3.045977011494253e-07, "logits/chosen": -1.5498991012573242, "logits/rejected": -1.624609351158142, "logps/chosen": -133.81573486328125, "logps/rejected": -278.434814453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9922564029693604, "rewards/margins": 14.223941802978516, "rewards/rejected": -17.216197967529297, "step": 2533 }, { "epoch": 4.07, "learning_rate": 3.044986127625842e-07, "logits/chosen": -1.77744722366333, "logits/rejected": -1.6740620136260986, "logps/chosen": -131.01593017578125, "logps/rejected": -225.44760131835938, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.649707794189453, "rewards/margins": 10.70853042602539, "rewards/rejected": -15.358236312866211, "step": 2534 }, { "epoch": 4.07, "learning_rate": 3.0439952437574315e-07, "logits/chosen": -1.4686803817749023, "logits/rejected": -1.5744339227676392, "logps/chosen": -82.48048400878906, "logps/rejected": -192.45944213867188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.492063522338867, "rewards/margins": 9.67403793334961, "rewards/rejected": -12.166101455688477, "step": 2535 }, { "epoch": 4.07, "learning_rate": 3.043004359889021e-07, "logits/chosen": -1.5441677570343018, "logits/rejected": -1.5157268047332764, "logps/chosen": -123.55418395996094, "logps/rejected": -191.66433715820312, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -4.622587203979492, "rewards/margins": 7.4192705154418945, "rewards/rejected": -12.041857719421387, "step": 2536 }, { "epoch": 4.07, "learning_rate": 3.04201347602061e-07, "logits/chosen": -1.6571245193481445, "logits/rejected": -1.6640400886535645, "logps/chosen": -138.4080352783203, "logps/rejected": -201.17044067382812, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -5.673321723937988, "rewards/margins": 6.3547892570495605, "rewards/rejected": -12.028111457824707, "step": 2537 }, { "epoch": 4.07, "learning_rate": 3.0410225921521997e-07, "logits/chosen": -1.8054652214050293, "logits/rejected": -1.6736958026885986, "logps/chosen": -126.06246948242188, "logps/rejected": -232.80490112304688, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -3.5187931060791016, "rewards/margins": 10.52789306640625, "rewards/rejected": -14.046686172485352, "step": 2538 }, { "epoch": 4.08, "learning_rate": 3.040031708283789e-07, "logits/chosen": -1.6055035591125488, "logits/rejected": -1.6108424663543701, "logps/chosen": -159.64442443847656, "logps/rejected": -233.33419799804688, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -6.387627601623535, "rewards/margins": 6.720761775970459, "rewards/rejected": -13.108388900756836, "step": 2539 }, { "epoch": 4.08, "learning_rate": 3.039040824415378e-07, "logits/chosen": -1.5428630113601685, "logits/rejected": -1.5103716850280762, "logps/chosen": -163.2515106201172, "logps/rejected": -216.48638916015625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -7.328344345092773, "rewards/margins": 6.713250160217285, "rewards/rejected": -14.041594505310059, "step": 2540 }, { "epoch": 4.08, "learning_rate": 3.038049940546968e-07, "logits/chosen": -1.7772529125213623, "logits/rejected": -1.683284878730774, "logps/chosen": -148.89048767089844, "logps/rejected": -216.6741943359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.621286392211914, "rewards/margins": 8.243154525756836, "rewards/rejected": -12.86444091796875, "step": 2541 }, { "epoch": 4.08, "learning_rate": 3.037059056678557e-07, "logits/chosen": -1.5589646100997925, "logits/rejected": -1.4745087623596191, "logps/chosen": -138.81468200683594, "logps/rejected": -225.5457305908203, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7542495727539062, "rewards/margins": 8.419733047485352, "rewards/rejected": -12.173982620239258, "step": 2542 }, { "epoch": 4.08, "learning_rate": 3.0360681728101467e-07, "logits/chosen": -1.6079130172729492, "logits/rejected": -1.6610912084579468, "logps/chosen": -130.51315307617188, "logps/rejected": -242.71090698242188, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -4.121193885803223, "rewards/margins": 10.760823249816895, "rewards/rejected": -14.882017135620117, "step": 2543 }, { "epoch": 4.08, "learning_rate": 3.0350772889417357e-07, "logits/chosen": -1.6654467582702637, "logits/rejected": -1.7516627311706543, "logps/chosen": -153.14715576171875, "logps/rejected": -258.56219482421875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -5.577592372894287, "rewards/margins": 10.29360580444336, "rewards/rejected": -15.871198654174805, "step": 2544 }, { "epoch": 4.09, "learning_rate": 3.034086405073325e-07, "logits/chosen": -1.5852079391479492, "logits/rejected": -1.5281107425689697, "logps/chosen": -128.21902465820312, "logps/rejected": -233.83935546875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -3.8453893661499023, "rewards/margins": 11.467109680175781, "rewards/rejected": -15.312499046325684, "step": 2545 }, { "epoch": 4.09, "learning_rate": 3.033095521204915e-07, "logits/chosen": -1.6739469766616821, "logits/rejected": -1.6677422523498535, "logps/chosen": -132.0171356201172, "logps/rejected": -239.90591430664062, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.345278739929199, "rewards/margins": 10.095596313476562, "rewards/rejected": -14.440876007080078, "step": 2546 }, { "epoch": 4.09, "learning_rate": 3.032104637336504e-07, "logits/chosen": -1.6212173700332642, "logits/rejected": -1.5505297183990479, "logps/chosen": -120.74028778076172, "logps/rejected": -227.01953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.5874392986297607, "rewards/margins": 10.901639938354492, "rewards/rejected": -14.489079475402832, "step": 2547 }, { "epoch": 4.09, "learning_rate": 3.0311137534680936e-07, "logits/chosen": -1.5746961832046509, "logits/rejected": -1.7024953365325928, "logps/chosen": -115.47480773925781, "logps/rejected": -196.77029418945312, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.676499366760254, "rewards/margins": 7.817974090576172, "rewards/rejected": -10.494474411010742, "step": 2548 }, { "epoch": 4.09, "learning_rate": 3.0301228695996827e-07, "logits/chosen": -1.5974124670028687, "logits/rejected": -1.5515084266662598, "logps/chosen": -152.17413330078125, "logps/rejected": -230.42422485351562, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.098540306091309, "rewards/margins": 9.398162841796875, "rewards/rejected": -15.496703147888184, "step": 2549 }, { "epoch": 4.09, "learning_rate": 3.0291319857312717e-07, "logits/chosen": -1.6033787727355957, "logits/rejected": -1.6426105499267578, "logps/chosen": -121.32392883300781, "logps/rejected": -262.5384521484375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.8127214908599854, "rewards/margins": 13.885164260864258, "rewards/rejected": -17.697885513305664, "step": 2550 }, { "epoch": 4.09, "learning_rate": 3.028141101862862e-07, "logits/chosen": -1.6033382415771484, "logits/rejected": -1.685429334640503, "logps/chosen": -126.35247802734375, "logps/rejected": -243.22544860839844, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.649021625518799, "rewards/margins": 11.712108612060547, "rewards/rejected": -16.361129760742188, "step": 2551 }, { "epoch": 4.1, "learning_rate": 3.027150217994451e-07, "logits/chosen": -1.6666128635406494, "logits/rejected": -1.738156795501709, "logps/chosen": -122.02116394042969, "logps/rejected": -259.8206787109375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.637908935546875, "rewards/margins": 13.970306396484375, "rewards/rejected": -18.60821533203125, "step": 2552 }, { "epoch": 4.1, "learning_rate": 3.0261593341260405e-07, "logits/chosen": -1.649677038192749, "logits/rejected": -1.67061448097229, "logps/chosen": -110.00238037109375, "logps/rejected": -278.99560546875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.8164026737213135, "rewards/margins": 14.028656959533691, "rewards/rejected": -16.845060348510742, "step": 2553 }, { "epoch": 4.1, "learning_rate": 3.0251684502576296e-07, "logits/chosen": -1.6582057476043701, "logits/rejected": -1.6588548421859741, "logps/chosen": -122.07439422607422, "logps/rejected": -181.16204833984375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.212009906768799, "rewards/margins": 6.352017879486084, "rewards/rejected": -10.564027786254883, "step": 2554 }, { "epoch": 4.1, "learning_rate": 3.0241775663892187e-07, "logits/chosen": -1.634881615638733, "logits/rejected": -1.5738052129745483, "logps/chosen": -147.9556884765625, "logps/rejected": -210.35430908203125, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -4.146516799926758, "rewards/margins": 8.395234107971191, "rewards/rejected": -12.54175090789795, "step": 2555 }, { "epoch": 4.1, "learning_rate": 3.023186682520809e-07, "logits/chosen": -1.6197564601898193, "logits/rejected": -1.67807137966156, "logps/chosen": -117.95576477050781, "logps/rejected": -230.28277587890625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.085933685302734, "rewards/margins": 9.766239166259766, "rewards/rejected": -13.852171897888184, "step": 2556 }, { "epoch": 4.1, "learning_rate": 3.022195798652398e-07, "logits/chosen": -1.6173584461212158, "logits/rejected": -1.5965694189071655, "logps/chosen": -187.7185516357422, "logps/rejected": -282.83953857421875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -6.631080627441406, "rewards/margins": 10.573331832885742, "rewards/rejected": -17.20441246032715, "step": 2557 }, { "epoch": 4.11, "learning_rate": 3.0212049147839875e-07, "logits/chosen": -1.6408036947250366, "logits/rejected": -1.7023413181304932, "logps/chosen": -114.13460540771484, "logps/rejected": -250.00772094726562, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.787585973739624, "rewards/margins": 11.493175506591797, "rewards/rejected": -15.28076171875, "step": 2558 }, { "epoch": 4.11, "learning_rate": 3.0202140309155765e-07, "logits/chosen": -1.668805718421936, "logits/rejected": -1.6198583841323853, "logps/chosen": -131.968505859375, "logps/rejected": -209.96658325195312, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -4.04791259765625, "rewards/margins": 9.493781089782715, "rewards/rejected": -13.541693687438965, "step": 2559 }, { "epoch": 4.11, "learning_rate": 3.0192231470471656e-07, "logits/chosen": -1.6645163297653198, "logits/rejected": -1.6628834009170532, "logps/chosen": -147.61532592773438, "logps/rejected": -259.4848937988281, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.373167991638184, "rewards/margins": 11.232403755187988, "rewards/rejected": -16.605571746826172, "step": 2560 }, { "epoch": 4.11, "learning_rate": 3.0182322631787557e-07, "logits/chosen": -1.5379629135131836, "logits/rejected": -1.588653326034546, "logps/chosen": -122.39176940917969, "logps/rejected": -205.7824249267578, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -4.185626983642578, "rewards/margins": 7.4109649658203125, "rewards/rejected": -11.59659194946289, "step": 2561 }, { "epoch": 4.11, "learning_rate": 3.017241379310345e-07, "logits/chosen": -1.5906161069869995, "logits/rejected": -1.5917664766311646, "logps/chosen": -126.44559478759766, "logps/rejected": -187.01210021972656, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.393872261047363, "rewards/margins": 7.085519313812256, "rewards/rejected": -11.479392051696777, "step": 2562 }, { "epoch": 4.11, "learning_rate": 3.0162504954419344e-07, "logits/chosen": -1.789567470550537, "logits/rejected": -1.7433922290802002, "logps/chosen": -134.07528686523438, "logps/rejected": -246.52505493164062, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -4.736633777618408, "rewards/margins": 10.419671058654785, "rewards/rejected": -15.156304359436035, "step": 2563 }, { "epoch": 4.12, "learning_rate": 3.0152596115735234e-07, "logits/chosen": -1.5609800815582275, "logits/rejected": -1.6218181848526, "logps/chosen": -134.80758666992188, "logps/rejected": -254.07171630859375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.735422611236572, "rewards/margins": 9.650691986083984, "rewards/rejected": -15.386113166809082, "step": 2564 }, { "epoch": 4.12, "learning_rate": 3.0142687277051125e-07, "logits/chosen": -1.7480415105819702, "logits/rejected": -1.7085357904434204, "logps/chosen": -103.86094665527344, "logps/rejected": -210.26858520507812, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -3.009929895401001, "rewards/margins": 10.821799278259277, "rewards/rejected": -13.831729888916016, "step": 2565 }, { "epoch": 4.12, "learning_rate": 3.013277843836702e-07, "logits/chosen": -1.7534701824188232, "logits/rejected": -1.7408908605575562, "logps/chosen": -142.44049072265625, "logps/rejected": -248.9718017578125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.976529598236084, "rewards/margins": 10.983325004577637, "rewards/rejected": -13.959854125976562, "step": 2566 }, { "epoch": 4.12, "learning_rate": 3.0122869599682917e-07, "logits/chosen": -1.7440170049667358, "logits/rejected": -1.8171173334121704, "logps/chosen": -104.28142547607422, "logps/rejected": -216.24508666992188, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -4.2942423820495605, "rewards/margins": 10.07826042175293, "rewards/rejected": -14.372503280639648, "step": 2567 }, { "epoch": 4.12, "learning_rate": 3.0112960760998813e-07, "logits/chosen": -1.7844855785369873, "logits/rejected": -1.6897716522216797, "logps/chosen": -159.11456298828125, "logps/rejected": -208.8516387939453, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.382807731628418, "rewards/margins": 8.046588897705078, "rewards/rejected": -12.429396629333496, "step": 2568 }, { "epoch": 4.12, "learning_rate": 3.0103051922314704e-07, "logits/chosen": -1.479152798652649, "logits/rejected": -1.5450966358184814, "logps/chosen": -117.27633666992188, "logps/rejected": -198.5849609375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -4.834822177886963, "rewards/margins": 6.878600120544434, "rewards/rejected": -11.713421821594238, "step": 2569 }, { "epoch": 4.13, "learning_rate": 3.0093143083630594e-07, "logits/chosen": -1.7441937923431396, "logits/rejected": -1.7946717739105225, "logps/chosen": -140.7435302734375, "logps/rejected": -257.0694580078125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.195028781890869, "rewards/margins": 10.498964309692383, "rewards/rejected": -15.693994522094727, "step": 2570 }, { "epoch": 4.13, "learning_rate": 3.008323424494649e-07, "logits/chosen": -1.685577154159546, "logits/rejected": -1.693581461906433, "logps/chosen": -149.2362518310547, "logps/rejected": -243.56814575195312, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -7.435965538024902, "rewards/margins": 8.835838317871094, "rewards/rejected": -16.27180290222168, "step": 2571 }, { "epoch": 4.13, "learning_rate": 3.0073325406262386e-07, "logits/chosen": -1.717738389968872, "logits/rejected": -1.6944881677627563, "logps/chosen": -141.2256622314453, "logps/rejected": -200.75100708007812, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -5.498356819152832, "rewards/margins": 6.390169143676758, "rewards/rejected": -11.88852596282959, "step": 2572 }, { "epoch": 4.13, "learning_rate": 3.0063416567578277e-07, "logits/chosen": -1.6185858249664307, "logits/rejected": -1.7014288902282715, "logps/chosen": -161.23318481445312, "logps/rejected": -283.177734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.844430923461914, "rewards/margins": 10.8836669921875, "rewards/rejected": -17.728097915649414, "step": 2573 }, { "epoch": 4.13, "learning_rate": 3.0053507728894173e-07, "logits/chosen": -1.5734784603118896, "logits/rejected": -1.5832760334014893, "logps/chosen": -123.55335235595703, "logps/rejected": -213.60935974121094, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.444847583770752, "rewards/margins": 7.660919189453125, "rewards/rejected": -12.105767250061035, "step": 2574 }, { "epoch": 4.13, "learning_rate": 3.0043598890210064e-07, "logits/chosen": -1.56904137134552, "logits/rejected": -1.6897714138031006, "logps/chosen": -118.52400207519531, "logps/rejected": -236.43255615234375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.912258625030518, "rewards/margins": 8.800154685974121, "rewards/rejected": -13.712413787841797, "step": 2575 }, { "epoch": 4.13, "learning_rate": 3.003369005152596e-07, "logits/chosen": -1.5309813022613525, "logits/rejected": -1.566010594367981, "logps/chosen": -102.79072570800781, "logps/rejected": -212.0646209716797, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.0056135654449463, "rewards/margins": 10.745924949645996, "rewards/rejected": -13.75153923034668, "step": 2576 }, { "epoch": 4.14, "learning_rate": 3.0023781212841856e-07, "logits/chosen": -1.6338467597961426, "logits/rejected": -1.6406478881835938, "logps/chosen": -133.89312744140625, "logps/rejected": -248.25576782226562, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.517811298370361, "rewards/margins": 11.816007614135742, "rewards/rejected": -16.333818435668945, "step": 2577 }, { "epoch": 4.14, "learning_rate": 3.0013872374157746e-07, "logits/chosen": -1.4842942953109741, "logits/rejected": -1.4959633350372314, "logps/chosen": -86.3606948852539, "logps/rejected": -172.97286987304688, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.5633327960968018, "rewards/margins": 9.036480903625488, "rewards/rejected": -11.599813461303711, "step": 2578 }, { "epoch": 4.14, "learning_rate": 3.000396353547364e-07, "logits/chosen": -1.5501203536987305, "logits/rejected": -1.5455594062805176, "logps/chosen": -126.81220245361328, "logps/rejected": -241.97940063476562, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.425923824310303, "rewards/margins": 10.954816818237305, "rewards/rejected": -15.380741119384766, "step": 2579 }, { "epoch": 4.14, "learning_rate": 2.9994054696789533e-07, "logits/chosen": -1.6996294260025024, "logits/rejected": -1.6044483184814453, "logps/chosen": -164.78091430664062, "logps/rejected": -219.7285919189453, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.516902923583984, "rewards/margins": 6.464900970458984, "rewards/rejected": -12.981803894042969, "step": 2580 }, { "epoch": 4.14, "learning_rate": 2.998414585810543e-07, "logits/chosen": -1.5503407716751099, "logits/rejected": -1.5917922258377075, "logps/chosen": -103.84812927246094, "logps/rejected": -227.26223754882812, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.013312339782715, "rewards/margins": 10.86255168914795, "rewards/rejected": -14.875863075256348, "step": 2581 }, { "epoch": 4.14, "learning_rate": 2.997423701942132e-07, "logits/chosen": -1.5587021112442017, "logits/rejected": -1.6382226943969727, "logps/chosen": -113.75021362304688, "logps/rejected": -195.59405517578125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -4.2157111167907715, "rewards/margins": 7.25003719329834, "rewards/rejected": -11.46574878692627, "step": 2582 }, { "epoch": 4.15, "learning_rate": 2.9964328180737216e-07, "logits/chosen": -1.6200093030929565, "logits/rejected": -1.560253381729126, "logps/chosen": -127.14789581298828, "logps/rejected": -207.91180419921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.836456298828125, "rewards/margins": 9.17413330078125, "rewards/rejected": -13.010589599609375, "step": 2583 }, { "epoch": 4.15, "learning_rate": 2.995441934205311e-07, "logits/chosen": -1.454539179801941, "logits/rejected": -1.5354617834091187, "logps/chosen": -117.69158935546875, "logps/rejected": -240.65093994140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.230213165283203, "rewards/margins": 11.456697463989258, "rewards/rejected": -15.686910629272461, "step": 2584 }, { "epoch": 4.15, "learning_rate": 2.9944510503369e-07, "logits/chosen": -1.5963188409805298, "logits/rejected": -1.7004926204681396, "logps/chosen": -97.3895263671875, "logps/rejected": -225.81631469726562, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.391481399536133, "rewards/margins": 9.83773422241211, "rewards/rejected": -13.229215621948242, "step": 2585 }, { "epoch": 4.15, "learning_rate": 2.99346016646849e-07, "logits/chosen": -1.742470383644104, "logits/rejected": -1.6578419208526611, "logps/chosen": -168.33013916015625, "logps/rejected": -247.6545867919922, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.170949935913086, "rewards/margins": 10.295636177062988, "rewards/rejected": -15.466585159301758, "step": 2586 }, { "epoch": 4.15, "learning_rate": 2.992469282600079e-07, "logits/chosen": -1.524503231048584, "logits/rejected": -1.5889770984649658, "logps/chosen": -132.502197265625, "logps/rejected": -225.08746337890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.928086757659912, "rewards/margins": 8.00860595703125, "rewards/rejected": -13.936694145202637, "step": 2587 }, { "epoch": 4.15, "learning_rate": 2.9914783987316685e-07, "logits/chosen": -1.4620314836502075, "logits/rejected": -1.5217745304107666, "logps/chosen": -122.49758911132812, "logps/rejected": -239.345458984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.9297432899475098, "rewards/margins": 10.555765151977539, "rewards/rejected": -14.48550796508789, "step": 2588 }, { "epoch": 4.16, "learning_rate": 2.990487514863258e-07, "logits/chosen": -1.5210789442062378, "logits/rejected": -1.50363028049469, "logps/chosen": -140.1033172607422, "logps/rejected": -264.50567626953125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.304030418395996, "rewards/margins": 12.517658233642578, "rewards/rejected": -18.82168960571289, "step": 2589 }, { "epoch": 4.16, "learning_rate": 2.989496630994847e-07, "logits/chosen": -1.472170352935791, "logits/rejected": -1.5807462930679321, "logps/chosen": -143.32870483398438, "logps/rejected": -237.32826232910156, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.140076637268066, "rewards/margins": 8.737728118896484, "rewards/rejected": -13.87780475616455, "step": 2590 }, { "epoch": 4.16, "learning_rate": 2.988505747126437e-07, "logits/chosen": -1.571116328239441, "logits/rejected": -1.5420236587524414, "logps/chosen": -163.24951171875, "logps/rejected": -241.69207763671875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.0404887199401855, "rewards/margins": 9.048639297485352, "rewards/rejected": -15.089127540588379, "step": 2591 }, { "epoch": 4.16, "learning_rate": 2.987514863258026e-07, "logits/chosen": -1.679246425628662, "logits/rejected": -1.574311375617981, "logps/chosen": -127.45072174072266, "logps/rejected": -221.72891235351562, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.7748565673828125, "rewards/margins": 9.633292198181152, "rewards/rejected": -13.408148765563965, "step": 2592 }, { "epoch": 4.16, "learning_rate": 2.9865239793896154e-07, "logits/chosen": -1.7165629863739014, "logits/rejected": -1.619974136352539, "logps/chosen": -116.19605255126953, "logps/rejected": -204.73265075683594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.4122250080108643, "rewards/margins": 11.371054649353027, "rewards/rejected": -14.783280372619629, "step": 2593 }, { "epoch": 4.16, "learning_rate": 2.985533095521205e-07, "logits/chosen": -1.7190942764282227, "logits/rejected": -1.6454085111618042, "logps/chosen": -168.05853271484375, "logps/rejected": -259.9736633300781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.330255031585693, "rewards/margins": 10.43486213684082, "rewards/rejected": -15.765117645263672, "step": 2594 }, { "epoch": 4.17, "learning_rate": 2.984542211652794e-07, "logits/chosen": -1.5647408962249756, "logits/rejected": -1.561293125152588, "logps/chosen": -119.53518676757812, "logps/rejected": -240.32757568359375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.154478549957275, "rewards/margins": 11.267559051513672, "rewards/rejected": -15.422038078308105, "step": 2595 }, { "epoch": 4.17, "learning_rate": 2.9835513277843837e-07, "logits/chosen": -1.5635526180267334, "logits/rejected": -1.5776234865188599, "logps/chosen": -164.2215576171875, "logps/rejected": -240.31536865234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.346957206726074, "rewards/margins": 7.822229385375977, "rewards/rejected": -14.16918659210205, "step": 2596 }, { "epoch": 4.17, "learning_rate": 2.982560443915973e-07, "logits/chosen": -1.662605881690979, "logits/rejected": -1.648646593093872, "logps/chosen": -122.19505310058594, "logps/rejected": -172.97988891601562, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.8155975341796875, "rewards/margins": 5.74720573425293, "rewards/rejected": -9.562803268432617, "step": 2597 }, { "epoch": 4.17, "learning_rate": 2.9815695600475624e-07, "logits/chosen": -1.5425796508789062, "logits/rejected": -1.5020132064819336, "logps/chosen": -148.14537048339844, "logps/rejected": -241.3771514892578, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.6546406745910645, "rewards/margins": 9.21835994720459, "rewards/rejected": -15.873001098632812, "step": 2598 }, { "epoch": 4.17, "learning_rate": 2.980578676179152e-07, "logits/chosen": -1.7624741792678833, "logits/rejected": -1.7617411613464355, "logps/chosen": -140.3846435546875, "logps/rejected": -229.33656311035156, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.7605934143066406, "rewards/margins": 10.07968807220459, "rewards/rejected": -13.84028148651123, "step": 2599 }, { "epoch": 4.17, "learning_rate": 2.979587792310741e-07, "logits/chosen": -1.6464412212371826, "logits/rejected": -1.6403224468231201, "logps/chosen": -120.69850158691406, "logps/rejected": -237.12161254882812, "loss": 0.116, "rewards/accuracies": 1.0, "rewards/chosen": -4.063785552978516, "rewards/margins": 10.305391311645508, "rewards/rejected": -14.369176864624023, "step": 2600 }, { "epoch": 4.17, "learning_rate": 2.9785969084423306e-07, "logits/chosen": -1.5858052968978882, "logits/rejected": -1.5251717567443848, "logps/chosen": -144.09173583984375, "logps/rejected": -228.1300811767578, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.2009968757629395, "rewards/margins": 9.794378280639648, "rewards/rejected": -14.995375633239746, "step": 2601 }, { "epoch": 4.18, "learning_rate": 2.9776060245739197e-07, "logits/chosen": -1.604098916053772, "logits/rejected": -1.636525273323059, "logps/chosen": -102.31597900390625, "logps/rejected": -198.59017944335938, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.367818832397461, "rewards/margins": 10.49850082397461, "rewards/rejected": -12.86631965637207, "step": 2602 }, { "epoch": 4.18, "learning_rate": 2.976615140705509e-07, "logits/chosen": -1.543642520904541, "logits/rejected": -1.6041741371154785, "logps/chosen": -129.02154541015625, "logps/rejected": -228.4657745361328, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.8073549270629883, "rewards/margins": 10.307844161987305, "rewards/rejected": -13.11520004272461, "step": 2603 }, { "epoch": 4.18, "learning_rate": 2.975624256837099e-07, "logits/chosen": -1.4940179586410522, "logits/rejected": -1.5145294666290283, "logps/chosen": -173.17755126953125, "logps/rejected": -246.92498779296875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -7.378036975860596, "rewards/margins": 7.123447895050049, "rewards/rejected": -14.501483917236328, "step": 2604 }, { "epoch": 4.18, "learning_rate": 2.974633372968688e-07, "logits/chosen": -1.712327241897583, "logits/rejected": -1.733378291130066, "logps/chosen": -105.77377319335938, "logps/rejected": -227.8582763671875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.1166789531707764, "rewards/margins": 11.488521575927734, "rewards/rejected": -14.605198860168457, "step": 2605 }, { "epoch": 4.18, "learning_rate": 2.973642489100277e-07, "logits/chosen": -1.5718108415603638, "logits/rejected": -1.5769075155258179, "logps/chosen": -128.98143005371094, "logps/rejected": -208.38546752929688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.580320358276367, "rewards/margins": 7.900373935699463, "rewards/rejected": -12.480694770812988, "step": 2606 }, { "epoch": 4.18, "learning_rate": 2.9726516052318666e-07, "logits/chosen": -1.5935933589935303, "logits/rejected": -1.6050814390182495, "logps/chosen": -126.69969177246094, "logps/rejected": -220.27598571777344, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -3.5989503860473633, "rewards/margins": 10.732498168945312, "rewards/rejected": -14.33144760131836, "step": 2607 }, { "epoch": 4.19, "learning_rate": 2.9716607213634557e-07, "logits/chosen": -1.6414810419082642, "logits/rejected": -1.768264889717102, "logps/chosen": -141.17935180664062, "logps/rejected": -279.7181091308594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.9601593017578125, "rewards/margins": 12.411981582641602, "rewards/rejected": -17.372140884399414, "step": 2608 }, { "epoch": 4.19, "learning_rate": 2.970669837495046e-07, "logits/chosen": -1.7081685066223145, "logits/rejected": -1.631910800933838, "logps/chosen": -161.190185546875, "logps/rejected": -223.7712860107422, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -5.197986125946045, "rewards/margins": 8.058090209960938, "rewards/rejected": -13.25607681274414, "step": 2609 }, { "epoch": 4.19, "learning_rate": 2.969678953626635e-07, "logits/chosen": -1.671266794204712, "logits/rejected": -1.652588129043579, "logps/chosen": -107.98770141601562, "logps/rejected": -214.56912231445312, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -4.11541748046875, "rewards/margins": 9.896196365356445, "rewards/rejected": -14.011613845825195, "step": 2610 }, { "epoch": 4.19, "learning_rate": 2.968688069758224e-07, "logits/chosen": -1.39606773853302, "logits/rejected": -1.3900809288024902, "logps/chosen": -125.17507934570312, "logps/rejected": -218.57386779785156, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -3.4314990043640137, "rewards/margins": 9.602810859680176, "rewards/rejected": -13.034310340881348, "step": 2611 }, { "epoch": 4.19, "learning_rate": 2.9676971858898135e-07, "logits/chosen": -1.6833913326263428, "logits/rejected": -1.681294560432434, "logps/chosen": -105.56851959228516, "logps/rejected": -217.75962829589844, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.327134609222412, "rewards/margins": 10.370965003967285, "rewards/rejected": -12.698100090026855, "step": 2612 }, { "epoch": 4.19, "learning_rate": 2.9667063020214026e-07, "logits/chosen": -1.5989315509796143, "logits/rejected": -1.4526102542877197, "logps/chosen": -164.85728454589844, "logps/rejected": -224.6356964111328, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.553280830383301, "rewards/margins": 8.810578346252441, "rewards/rejected": -14.363859176635742, "step": 2613 }, { "epoch": 4.2, "learning_rate": 2.965715418152993e-07, "logits/chosen": -1.6716057062149048, "logits/rejected": -1.6490294933319092, "logps/chosen": -128.25872802734375, "logps/rejected": -217.73248291015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.069178104400635, "rewards/margins": 8.339736938476562, "rewards/rejected": -15.408914566040039, "step": 2614 }, { "epoch": 4.2, "learning_rate": 2.964724534284582e-07, "logits/chosen": -1.4590779542922974, "logits/rejected": -1.5756938457489014, "logps/chosen": -129.34506225585938, "logps/rejected": -227.45455932617188, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.6346306800842285, "rewards/margins": 8.044358253479004, "rewards/rejected": -12.67898941040039, "step": 2615 }, { "epoch": 4.2, "learning_rate": 2.963733650416171e-07, "logits/chosen": -1.6908906698226929, "logits/rejected": -1.766894817352295, "logps/chosen": -113.42901611328125, "logps/rejected": -248.98570251464844, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.78451681137085, "rewards/margins": 10.98194694519043, "rewards/rejected": -15.766463279724121, "step": 2616 }, { "epoch": 4.2, "learning_rate": 2.9627427665477605e-07, "logits/chosen": -1.600558876991272, "logits/rejected": -1.5202388763427734, "logps/chosen": -146.3251190185547, "logps/rejected": -227.97462463378906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.558346748352051, "rewards/margins": 8.172496795654297, "rewards/rejected": -13.730842590332031, "step": 2617 }, { "epoch": 4.2, "learning_rate": 2.9617518826793495e-07, "logits/chosen": -1.592308759689331, "logits/rejected": -1.6360368728637695, "logps/chosen": -120.90931701660156, "logps/rejected": -184.81846618652344, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.654166221618652, "rewards/margins": 6.997293472290039, "rewards/rejected": -11.651460647583008, "step": 2618 }, { "epoch": 4.2, "learning_rate": 2.9607609988109397e-07, "logits/chosen": -1.6177361011505127, "logits/rejected": -1.6399503946304321, "logps/chosen": -214.5901641845703, "logps/rejected": -306.0205078125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -10.040821075439453, "rewards/margins": 8.940839767456055, "rewards/rejected": -18.98166275024414, "step": 2619 }, { "epoch": 4.21, "learning_rate": 2.959770114942529e-07, "logits/chosen": -1.6378214359283447, "logits/rejected": -1.6378552913665771, "logps/chosen": -126.52810668945312, "logps/rejected": -203.5341339111328, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.536712884902954, "rewards/margins": 8.249670028686523, "rewards/rejected": -11.786382675170898, "step": 2620 }, { "epoch": 4.21, "learning_rate": 2.958779231074118e-07, "logits/chosen": -1.5713030099868774, "logits/rejected": -1.5706346035003662, "logps/chosen": -117.23066711425781, "logps/rejected": -251.73605346679688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.104074001312256, "rewards/margins": 13.040127754211426, "rewards/rejected": -17.144201278686523, "step": 2621 }, { "epoch": 4.21, "learning_rate": 2.9577883472057074e-07, "logits/chosen": -1.5413975715637207, "logits/rejected": -1.5913618803024292, "logps/chosen": -183.6466827392578, "logps/rejected": -268.5638122558594, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -7.479085445404053, "rewards/margins": 6.674108505249023, "rewards/rejected": -14.153193473815918, "step": 2622 }, { "epoch": 4.21, "learning_rate": 2.9567974633372965e-07, "logits/chosen": -1.6313267946243286, "logits/rejected": -1.6778851747512817, "logps/chosen": -102.08729553222656, "logps/rejected": -215.1317138671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.39048433303833, "rewards/margins": 9.797775268554688, "rewards/rejected": -13.18825912475586, "step": 2623 }, { "epoch": 4.21, "learning_rate": 2.9558065794688866e-07, "logits/chosen": -1.6312676668167114, "logits/rejected": -1.629957914352417, "logps/chosen": -145.45391845703125, "logps/rejected": -226.21234130859375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.25387716293335, "rewards/margins": 8.332586288452148, "rewards/rejected": -13.586463928222656, "step": 2624 }, { "epoch": 4.21, "learning_rate": 2.9548156956004757e-07, "logits/chosen": -1.5536497831344604, "logits/rejected": -1.6098591089248657, "logps/chosen": -114.69577026367188, "logps/rejected": -271.6200256347656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.895942449569702, "rewards/margins": 12.759693145751953, "rewards/rejected": -16.655635833740234, "step": 2625 }, { "epoch": 4.22, "learning_rate": 2.953824811732065e-07, "logits/chosen": -1.6172168254852295, "logits/rejected": -1.5654453039169312, "logps/chosen": -114.69781494140625, "logps/rejected": -217.96771240234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.6442770957946777, "rewards/margins": 11.275846481323242, "rewards/rejected": -14.920122146606445, "step": 2626 }, { "epoch": 4.22, "learning_rate": 2.9528339278636543e-07, "logits/chosen": -1.6785448789596558, "logits/rejected": -1.7810044288635254, "logps/chosen": -139.0729217529297, "logps/rejected": -254.35446166992188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.176102638244629, "rewards/margins": 10.639910697937012, "rewards/rejected": -15.816012382507324, "step": 2627 }, { "epoch": 4.22, "learning_rate": 2.9518430439952434e-07, "logits/chosen": -1.5462268590927124, "logits/rejected": -1.58240807056427, "logps/chosen": -100.69804382324219, "logps/rejected": -193.3378143310547, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.55835223197937, "rewards/margins": 10.003695487976074, "rewards/rejected": -12.562047004699707, "step": 2628 }, { "epoch": 4.22, "learning_rate": 2.950852160126833e-07, "logits/chosen": -1.6230099201202393, "logits/rejected": -1.5950812101364136, "logps/chosen": -141.2525177001953, "logps/rejected": -268.20867919921875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.783052444458008, "rewards/margins": 11.954780578613281, "rewards/rejected": -16.73783302307129, "step": 2629 }, { "epoch": 4.22, "learning_rate": 2.9498612762584226e-07, "logits/chosen": -1.4799995422363281, "logits/rejected": -1.551307201385498, "logps/chosen": -121.23284912109375, "logps/rejected": -212.97344970703125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.5722405910491943, "rewards/margins": 8.336201667785645, "rewards/rejected": -11.908441543579102, "step": 2630 }, { "epoch": 4.22, "learning_rate": 2.9488703923900117e-07, "logits/chosen": -1.402214765548706, "logits/rejected": -1.4962234497070312, "logps/chosen": -103.02789306640625, "logps/rejected": -214.31927490234375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -3.5429563522338867, "rewards/margins": 9.565313339233398, "rewards/rejected": -13.108268737792969, "step": 2631 }, { "epoch": 4.22, "learning_rate": 2.9478795085216013e-07, "logits/chosen": -1.5335835218429565, "logits/rejected": -1.4797885417938232, "logps/chosen": -134.79937744140625, "logps/rejected": -231.7010040283203, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -4.693737983703613, "rewards/margins": 9.09432601928711, "rewards/rejected": -13.788064956665039, "step": 2632 }, { "epoch": 4.23, "learning_rate": 2.9468886246531903e-07, "logits/chosen": -1.4911197423934937, "logits/rejected": -1.5144861936569214, "logps/chosen": -139.337646484375, "logps/rejected": -223.69522094726562, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.8994245529174805, "rewards/margins": 8.586593627929688, "rewards/rejected": -13.486018180847168, "step": 2633 }, { "epoch": 4.23, "learning_rate": 2.94589774078478e-07, "logits/chosen": -1.4754489660263062, "logits/rejected": -1.5762310028076172, "logps/chosen": -145.02239990234375, "logps/rejected": -256.0067443847656, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -5.152637004852295, "rewards/margins": 9.32961368560791, "rewards/rejected": -14.482251167297363, "step": 2634 }, { "epoch": 4.23, "learning_rate": 2.9449068569163695e-07, "logits/chosen": -1.3841404914855957, "logits/rejected": -1.5288054943084717, "logps/chosen": -120.25679779052734, "logps/rejected": -245.59136962890625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.515930414199829, "rewards/margins": 9.74242115020752, "rewards/rejected": -13.258352279663086, "step": 2635 }, { "epoch": 4.23, "learning_rate": 2.9439159730479586e-07, "logits/chosen": -1.516653060913086, "logits/rejected": -1.5398778915405273, "logps/chosen": -156.48106384277344, "logps/rejected": -294.3954772949219, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -6.712270736694336, "rewards/margins": 11.332633972167969, "rewards/rejected": -18.044906616210938, "step": 2636 }, { "epoch": 4.23, "learning_rate": 2.942925089179548e-07, "logits/chosen": -1.5660320520401, "logits/rejected": -1.6014647483825684, "logps/chosen": -116.04778289794922, "logps/rejected": -222.39625549316406, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -3.5275158882141113, "rewards/margins": 9.572059631347656, "rewards/rejected": -13.099575996398926, "step": 2637 }, { "epoch": 4.23, "learning_rate": 2.941934205311137e-07, "logits/chosen": -1.4754986763000488, "logits/rejected": -1.4315857887268066, "logps/chosen": -145.16091918945312, "logps/rejected": -218.84410095214844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.126506805419922, "rewards/margins": 8.765851974487305, "rewards/rejected": -13.892358779907227, "step": 2638 }, { "epoch": 4.24, "learning_rate": 2.940943321442727e-07, "logits/chosen": -1.6263965368270874, "logits/rejected": -1.635613203048706, "logps/chosen": -99.49462890625, "logps/rejected": -173.45802307128906, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -3.7284655570983887, "rewards/margins": 7.282393455505371, "rewards/rejected": -11.010858535766602, "step": 2639 }, { "epoch": 4.24, "learning_rate": 2.9399524375743165e-07, "logits/chosen": -1.5491924285888672, "logits/rejected": -1.636959433555603, "logps/chosen": -157.48828125, "logps/rejected": -249.24038696289062, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.92471981048584, "rewards/margins": 8.82001781463623, "rewards/rejected": -14.74473762512207, "step": 2640 }, { "epoch": 4.24, "learning_rate": 2.9389615537059055e-07, "logits/chosen": -1.456390380859375, "logits/rejected": -1.373331904411316, "logps/chosen": -95.78446197509766, "logps/rejected": -246.6943817138672, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.001149892807007, "rewards/margins": 13.160087585449219, "rewards/rejected": -15.161237716674805, "step": 2641 }, { "epoch": 4.24, "learning_rate": 2.937970669837495e-07, "logits/chosen": -1.4955196380615234, "logits/rejected": -1.5138293504714966, "logps/chosen": -123.02764129638672, "logps/rejected": -233.41293334960938, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.5924577713012695, "rewards/margins": 10.42428207397461, "rewards/rejected": -15.016739845275879, "step": 2642 }, { "epoch": 4.24, "learning_rate": 2.936979785969084e-07, "logits/chosen": -1.6056244373321533, "logits/rejected": -1.5511646270751953, "logps/chosen": -128.62457275390625, "logps/rejected": -218.11398315429688, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.9791293144226074, "rewards/margins": 9.845979690551758, "rewards/rejected": -13.825109481811523, "step": 2643 }, { "epoch": 4.24, "learning_rate": 2.935988902100673e-07, "logits/chosen": -1.7495043277740479, "logits/rejected": -1.7163230180740356, "logps/chosen": -148.57826232910156, "logps/rejected": -216.97166442871094, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -5.505868434906006, "rewards/margins": 7.896707057952881, "rewards/rejected": -13.402575492858887, "step": 2644 }, { "epoch": 4.25, "learning_rate": 2.934998018232263e-07, "logits/chosen": -1.5667736530303955, "logits/rejected": -1.420120120048523, "logps/chosen": -136.36166381835938, "logps/rejected": -195.2672576904297, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.5667595863342285, "rewards/margins": 8.515351295471191, "rewards/rejected": -11.082110404968262, "step": 2645 }, { "epoch": 4.25, "learning_rate": 2.9340071343638525e-07, "logits/chosen": -1.5338914394378662, "logits/rejected": -1.452678918838501, "logps/chosen": -129.51490783691406, "logps/rejected": -164.97705078125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -4.491064071655273, "rewards/margins": 5.408830642700195, "rewards/rejected": -9.899894714355469, "step": 2646 }, { "epoch": 4.25, "learning_rate": 2.933016250495442e-07, "logits/chosen": -1.5446820259094238, "logits/rejected": -1.6063745021820068, "logps/chosen": -155.09190368652344, "logps/rejected": -240.429931640625, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -6.7944655418396, "rewards/margins": 6.286945343017578, "rewards/rejected": -13.08141040802002, "step": 2647 }, { "epoch": 4.25, "learning_rate": 2.932025366627031e-07, "logits/chosen": -1.551790714263916, "logits/rejected": -1.574445366859436, "logps/chosen": -131.30039978027344, "logps/rejected": -252.68038940429688, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -4.639746189117432, "rewards/margins": 11.976330757141113, "rewards/rejected": -16.616077423095703, "step": 2648 }, { "epoch": 4.25, "learning_rate": 2.93103448275862e-07, "logits/chosen": -1.4791717529296875, "logits/rejected": -1.5024423599243164, "logps/chosen": -141.19644165039062, "logps/rejected": -222.9124755859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.0868330001831055, "rewards/margins": 10.219091415405273, "rewards/rejected": -15.305925369262695, "step": 2649 }, { "epoch": 4.25, "learning_rate": 2.93004359889021e-07, "logits/chosen": -1.52309250831604, "logits/rejected": -1.6297481060028076, "logps/chosen": -137.26234436035156, "logps/rejected": -244.86785888671875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.194770812988281, "rewards/margins": 8.577580451965332, "rewards/rejected": -13.77235221862793, "step": 2650 }, { "epoch": 4.26, "learning_rate": 2.9290527150217994e-07, "logits/chosen": -1.6286351680755615, "logits/rejected": -1.6019634008407593, "logps/chosen": -112.03473663330078, "logps/rejected": -215.87948608398438, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": -3.6193580627441406, "rewards/margins": 10.22908878326416, "rewards/rejected": -13.8484468460083, "step": 2651 }, { "epoch": 4.26, "learning_rate": 2.928061831153389e-07, "logits/chosen": -1.4716320037841797, "logits/rejected": -1.4713270664215088, "logps/chosen": -133.9058837890625, "logps/rejected": -208.60305786132812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.601775646209717, "rewards/margins": 9.07995891571045, "rewards/rejected": -13.681734085083008, "step": 2652 }, { "epoch": 4.26, "learning_rate": 2.927070947284978e-07, "logits/chosen": -1.5490361452102661, "logits/rejected": -1.5907597541809082, "logps/chosen": -113.91609954833984, "logps/rejected": -196.2628631591797, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.902942657470703, "rewards/margins": 7.6152167320251465, "rewards/rejected": -11.518158912658691, "step": 2653 }, { "epoch": 4.26, "learning_rate": 2.926080063416567e-07, "logits/chosen": -1.6606957912445068, "logits/rejected": -1.67069673538208, "logps/chosen": -138.542236328125, "logps/rejected": -228.2842254638672, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.081847667694092, "rewards/margins": 8.938965797424316, "rewards/rejected": -14.020814895629883, "step": 2654 }, { "epoch": 4.26, "learning_rate": 2.9250891795481567e-07, "logits/chosen": -1.7181811332702637, "logits/rejected": -1.7411872148513794, "logps/chosen": -94.76527404785156, "logps/rejected": -194.21701049804688, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.1614432334899902, "rewards/margins": 9.150632858276367, "rewards/rejected": -12.312076568603516, "step": 2655 }, { "epoch": 4.26, "learning_rate": 2.9240982956797463e-07, "logits/chosen": -1.6080694198608398, "logits/rejected": -1.6008875370025635, "logps/chosen": -138.1259307861328, "logps/rejected": -229.15032958984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.360541820526123, "rewards/margins": 8.205428123474121, "rewards/rejected": -12.565970420837402, "step": 2656 }, { "epoch": 4.26, "learning_rate": 2.923107411811336e-07, "logits/chosen": -1.5129817724227905, "logits/rejected": -1.6830817461013794, "logps/chosen": -132.26953125, "logps/rejected": -238.1708526611328, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.526209354400635, "rewards/margins": 9.413996696472168, "rewards/rejected": -13.940205574035645, "step": 2657 }, { "epoch": 4.27, "learning_rate": 2.922116527942925e-07, "logits/chosen": -1.6018728017807007, "logits/rejected": -1.7031590938568115, "logps/chosen": -100.26914978027344, "logps/rejected": -240.414794921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.7641754150390625, "rewards/margins": 11.135849952697754, "rewards/rejected": -14.900023460388184, "step": 2658 }, { "epoch": 4.27, "learning_rate": 2.921125644074514e-07, "logits/chosen": -1.649741291999817, "logits/rejected": -1.6461081504821777, "logps/chosen": -184.20306396484375, "logps/rejected": -252.39208984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.027582168579102, "rewards/margins": 7.404205799102783, "rewards/rejected": -14.431788444519043, "step": 2659 }, { "epoch": 4.27, "learning_rate": 2.9201347602061036e-07, "logits/chosen": -1.4926331043243408, "logits/rejected": -1.5130314826965332, "logps/chosen": -138.96786499023438, "logps/rejected": -208.95726013183594, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -5.880455017089844, "rewards/margins": 6.160386085510254, "rewards/rejected": -12.040841102600098, "step": 2660 }, { "epoch": 4.27, "learning_rate": 2.919143876337693e-07, "logits/chosen": -1.600313425064087, "logits/rejected": -1.5796160697937012, "logps/chosen": -119.24134826660156, "logps/rejected": -262.1336364746094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.269399642944336, "rewards/margins": 13.468500137329102, "rewards/rejected": -17.737899780273438, "step": 2661 }, { "epoch": 4.27, "learning_rate": 2.918152992469283e-07, "logits/chosen": -1.609178066253662, "logits/rejected": -1.552290439605713, "logps/chosen": -130.94107055664062, "logps/rejected": -240.32562255859375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.87797212600708, "rewards/margins": 11.946358680725098, "rewards/rejected": -15.82433032989502, "step": 2662 }, { "epoch": 4.27, "learning_rate": 2.917162108600872e-07, "logits/chosen": -1.5622427463531494, "logits/rejected": -1.5343575477600098, "logps/chosen": -132.42083740234375, "logps/rejected": -228.65069580078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.777279853820801, "rewards/margins": 9.582925796508789, "rewards/rejected": -14.360206604003906, "step": 2663 }, { "epoch": 4.28, "learning_rate": 2.916171224732461e-07, "logits/chosen": -1.6382904052734375, "logits/rejected": -1.6905285120010376, "logps/chosen": -91.34497833251953, "logps/rejected": -185.57675170898438, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5845413208007812, "rewards/margins": 8.893877029418945, "rewards/rejected": -12.478418350219727, "step": 2664 }, { "epoch": 4.28, "learning_rate": 2.9151803408640506e-07, "logits/chosen": -1.6895217895507812, "logits/rejected": -1.6123977899551392, "logps/chosen": -121.86882019042969, "logps/rejected": -232.92303466796875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -3.559264898300171, "rewards/margins": 11.711341857910156, "rewards/rejected": -15.270607948303223, "step": 2665 }, { "epoch": 4.28, "learning_rate": 2.9141894569956396e-07, "logits/chosen": -1.761647343635559, "logits/rejected": -1.6982779502868652, "logps/chosen": -151.42587280273438, "logps/rejected": -246.1985626220703, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -4.919920444488525, "rewards/margins": 10.464000701904297, "rewards/rejected": -15.383922576904297, "step": 2666 }, { "epoch": 4.28, "learning_rate": 2.91319857312723e-07, "logits/chosen": -1.484360933303833, "logits/rejected": -1.5196475982666016, "logps/chosen": -109.69369506835938, "logps/rejected": -205.6442413330078, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.9167914390563965, "rewards/margins": 8.419023513793945, "rewards/rejected": -12.3358154296875, "step": 2667 }, { "epoch": 4.28, "learning_rate": 2.912207689258819e-07, "logits/chosen": -1.5144636631011963, "logits/rejected": -1.4715849161148071, "logps/chosen": -135.67945861816406, "logps/rejected": -212.42593383789062, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.233280181884766, "rewards/margins": 8.244329452514648, "rewards/rejected": -13.477609634399414, "step": 2668 }, { "epoch": 4.28, "learning_rate": 2.911216805390408e-07, "logits/chosen": -1.4240036010742188, "logits/rejected": -1.5557358264923096, "logps/chosen": -148.37576293945312, "logps/rejected": -259.8450622558594, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.360055923461914, "rewards/margins": 9.257190704345703, "rewards/rejected": -14.6172456741333, "step": 2669 }, { "epoch": 4.29, "learning_rate": 2.9102259215219975e-07, "logits/chosen": -1.5485775470733643, "logits/rejected": -1.6267142295837402, "logps/chosen": -129.58853149414062, "logps/rejected": -246.10208129882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.617799758911133, "rewards/margins": 7.321616172790527, "rewards/rejected": -10.939414978027344, "step": 2670 }, { "epoch": 4.29, "learning_rate": 2.9092350376535866e-07, "logits/chosen": -1.755169153213501, "logits/rejected": -1.7670153379440308, "logps/chosen": -109.71975708007812, "logps/rejected": -269.9612121582031, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.9194440841674805, "rewards/margins": 14.892406463623047, "rewards/rejected": -17.81184959411621, "step": 2671 }, { "epoch": 4.29, "learning_rate": 2.9082441537851767e-07, "logits/chosen": -1.4579756259918213, "logits/rejected": -1.4883406162261963, "logps/chosen": -164.52708435058594, "logps/rejected": -243.89007568359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.527739524841309, "rewards/margins": 9.14558219909668, "rewards/rejected": -13.673321723937988, "step": 2672 }, { "epoch": 4.29, "learning_rate": 2.907253269916766e-07, "logits/chosen": -1.534324049949646, "logits/rejected": -1.5034745931625366, "logps/chosen": -135.1552276611328, "logps/rejected": -257.4395751953125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -3.7654848098754883, "rewards/margins": 13.033981323242188, "rewards/rejected": -16.79946517944336, "step": 2673 }, { "epoch": 4.29, "learning_rate": 2.906262386048355e-07, "logits/chosen": -1.5323336124420166, "logits/rejected": -1.512731909751892, "logps/chosen": -128.73117065429688, "logps/rejected": -254.79498291015625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -4.788881778717041, "rewards/margins": 12.37465763092041, "rewards/rejected": -17.16353988647461, "step": 2674 }, { "epoch": 4.29, "learning_rate": 2.9052715021799444e-07, "logits/chosen": -1.7603925466537476, "logits/rejected": -1.6577562093734741, "logps/chosen": -123.3717041015625, "logps/rejected": -151.15402221679688, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.863552451133728, "rewards/margins": 7.311705112457275, "rewards/rejected": -9.175257682800293, "step": 2675 }, { "epoch": 4.3, "learning_rate": 2.9042806183115335e-07, "logits/chosen": -1.587792158126831, "logits/rejected": -1.5856435298919678, "logps/chosen": -118.47705841064453, "logps/rejected": -229.99237060546875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.7816171646118164, "rewards/margins": 9.666367530822754, "rewards/rejected": -13.44798469543457, "step": 2676 }, { "epoch": 4.3, "learning_rate": 2.903289734443123e-07, "logits/chosen": -1.7034218311309814, "logits/rejected": -1.71919846534729, "logps/chosen": -109.50798797607422, "logps/rejected": -209.8534698486328, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.9680557250976562, "rewards/margins": 10.340243339538574, "rewards/rejected": -13.308300018310547, "step": 2677 }, { "epoch": 4.3, "learning_rate": 2.9022988505747127e-07, "logits/chosen": -1.6507941484451294, "logits/rejected": -1.6460776329040527, "logps/chosen": -137.9029998779297, "logps/rejected": -254.79286193847656, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.6089887619018555, "rewards/margins": 11.018564224243164, "rewards/rejected": -15.627553939819336, "step": 2678 }, { "epoch": 4.3, "learning_rate": 2.901307966706302e-07, "logits/chosen": -1.5858436822891235, "logits/rejected": -1.6932224035263062, "logps/chosen": -127.51107025146484, "logps/rejected": -242.91448974609375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -6.389698028564453, "rewards/margins": 9.750028610229492, "rewards/rejected": -16.139726638793945, "step": 2679 }, { "epoch": 4.3, "learning_rate": 2.9003170828378914e-07, "logits/chosen": -1.5720397233963013, "logits/rejected": -1.5701583623886108, "logps/chosen": -121.60831451416016, "logps/rejected": -254.4521942138672, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.35711669921875, "rewards/margins": 11.709243774414062, "rewards/rejected": -17.066360473632812, "step": 2680 }, { "epoch": 4.3, "learning_rate": 2.8993261989694804e-07, "logits/chosen": -1.5037003755569458, "logits/rejected": -1.3988037109375, "logps/chosen": -146.1622314453125, "logps/rejected": -215.40614318847656, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -4.389657974243164, "rewards/margins": 10.063405990600586, "rewards/rejected": -14.453062057495117, "step": 2681 }, { "epoch": 4.3, "learning_rate": 2.8983353151010695e-07, "logits/chosen": -1.532509684562683, "logits/rejected": -1.5747215747833252, "logps/chosen": -128.59323120117188, "logps/rejected": -233.13461303710938, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.497270107269287, "rewards/margins": 10.917905807495117, "rewards/rejected": -16.415176391601562, "step": 2682 }, { "epoch": 4.31, "learning_rate": 2.8973444312326596e-07, "logits/chosen": -1.641840934753418, "logits/rejected": -1.6387147903442383, "logps/chosen": -120.13775634765625, "logps/rejected": -244.57659912109375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.3169946670532227, "rewards/margins": 13.085742950439453, "rewards/rejected": -16.402738571166992, "step": 2683 }, { "epoch": 4.31, "learning_rate": 2.8963535473642487e-07, "logits/chosen": -1.5707632303237915, "logits/rejected": -1.5770695209503174, "logps/chosen": -120.60309600830078, "logps/rejected": -213.22705078125, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": -3.438572883605957, "rewards/margins": 9.825907707214355, "rewards/rejected": -13.264481544494629, "step": 2684 }, { "epoch": 4.31, "learning_rate": 2.8953626634958383e-07, "logits/chosen": -1.4449576139450073, "logits/rejected": -1.464577555656433, "logps/chosen": -125.71534729003906, "logps/rejected": -268.1496887207031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.71002721786499, "rewards/margins": 11.611729621887207, "rewards/rejected": -16.321758270263672, "step": 2685 }, { "epoch": 4.31, "learning_rate": 2.8943717796274274e-07, "logits/chosen": -1.7275912761688232, "logits/rejected": -1.7570582628250122, "logps/chosen": -140.91827392578125, "logps/rejected": -243.6029052734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.284562110900879, "rewards/margins": 8.819369316101074, "rewards/rejected": -14.103931427001953, "step": 2686 }, { "epoch": 4.31, "learning_rate": 2.8933808957590164e-07, "logits/chosen": -1.4731565713882446, "logits/rejected": -1.5452535152435303, "logps/chosen": -127.87910461425781, "logps/rejected": -200.79429626464844, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -4.215612411499023, "rewards/margins": 6.443398475646973, "rewards/rejected": -10.659010887145996, "step": 2687 }, { "epoch": 4.31, "learning_rate": 2.8923900118906066e-07, "logits/chosen": -1.526278018951416, "logits/rejected": -1.6020742654800415, "logps/chosen": -140.69908142089844, "logps/rejected": -270.2707824707031, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.121748447418213, "rewards/margins": 11.946529388427734, "rewards/rejected": -19.06827735900879, "step": 2688 }, { "epoch": 4.32, "learning_rate": 2.8913991280221956e-07, "logits/chosen": -1.6483566761016846, "logits/rejected": -1.6944273710250854, "logps/chosen": -133.17637634277344, "logps/rejected": -231.50006103515625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -4.576644420623779, "rewards/margins": 9.614761352539062, "rewards/rejected": -14.191404342651367, "step": 2689 }, { "epoch": 4.32, "learning_rate": 2.890408244153785e-07, "logits/chosen": -1.6106691360473633, "logits/rejected": -1.6255688667297363, "logps/chosen": -126.68218231201172, "logps/rejected": -179.02589416503906, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -3.230224847793579, "rewards/margins": 7.629091739654541, "rewards/rejected": -10.8593168258667, "step": 2690 }, { "epoch": 4.32, "learning_rate": 2.8894173602853743e-07, "logits/chosen": -1.6046873331069946, "logits/rejected": -1.6035925149917603, "logps/chosen": -139.38291931152344, "logps/rejected": -230.37042236328125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.214730739593506, "rewards/margins": 9.895200729370117, "rewards/rejected": -13.109931945800781, "step": 2691 }, { "epoch": 4.32, "learning_rate": 2.8884264764169634e-07, "logits/chosen": -1.5859174728393555, "logits/rejected": -1.4609119892120361, "logps/chosen": -181.66009521484375, "logps/rejected": -233.13478088378906, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -6.891443729400635, "rewards/margins": 6.592597961425781, "rewards/rejected": -13.484041213989258, "step": 2692 }, { "epoch": 4.32, "learning_rate": 2.8874355925485535e-07, "logits/chosen": -1.6162762641906738, "logits/rejected": -1.4732496738433838, "logps/chosen": -148.00950622558594, "logps/rejected": -233.1619415283203, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -4.830533981323242, "rewards/margins": 10.208687782287598, "rewards/rejected": -15.039223670959473, "step": 2693 }, { "epoch": 4.32, "learning_rate": 2.8864447086801426e-07, "logits/chosen": -1.6242276430130005, "logits/rejected": -1.814460277557373, "logps/chosen": -104.81797790527344, "logps/rejected": -196.026123046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.424649238586426, "rewards/margins": 7.702243328094482, "rewards/rejected": -11.12689208984375, "step": 2694 }, { "epoch": 4.33, "learning_rate": 2.885453824811732e-07, "logits/chosen": -1.5530041456222534, "logits/rejected": -1.4485989809036255, "logps/chosen": -161.65785217285156, "logps/rejected": -230.1293182373047, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -4.401159763336182, "rewards/margins": 9.867533683776855, "rewards/rejected": -14.268693923950195, "step": 2695 }, { "epoch": 4.33, "learning_rate": 2.884462940943321e-07, "logits/chosen": -1.5990560054779053, "logits/rejected": -1.5892136096954346, "logps/chosen": -75.73429107666016, "logps/rejected": -174.43191528320312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1946336030960083, "rewards/margins": 9.81296443939209, "rewards/rejected": -11.007597923278809, "step": 2696 }, { "epoch": 4.33, "learning_rate": 2.8834720570749103e-07, "logits/chosen": -1.5873782634735107, "logits/rejected": -1.5689404010772705, "logps/chosen": -147.67227172851562, "logps/rejected": -242.33389282226562, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -4.765568256378174, "rewards/margins": 11.271584510803223, "rewards/rejected": -16.037153244018555, "step": 2697 }, { "epoch": 4.33, "learning_rate": 2.8824811732065004e-07, "logits/chosen": -1.5447468757629395, "logits/rejected": -1.6158127784729004, "logps/chosen": -147.7467498779297, "logps/rejected": -248.13890075683594, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -4.734414577484131, "rewards/margins": 9.923940658569336, "rewards/rejected": -14.658355712890625, "step": 2698 }, { "epoch": 4.33, "learning_rate": 2.8814902893380895e-07, "logits/chosen": -1.6217620372772217, "logits/rejected": -1.6358007192611694, "logps/chosen": -119.26307678222656, "logps/rejected": -193.7361602783203, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -3.936152935028076, "rewards/margins": 6.994050979614258, "rewards/rejected": -10.930203437805176, "step": 2699 }, { "epoch": 4.33, "learning_rate": 2.880499405469679e-07, "logits/chosen": -1.7457430362701416, "logits/rejected": -1.7495160102844238, "logps/chosen": -92.64984130859375, "logps/rejected": -185.75360107421875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1108267307281494, "rewards/margins": 8.952719688415527, "rewards/rejected": -11.063547134399414, "step": 2700 }, { "epoch": 4.34, "learning_rate": 2.879508521601268e-07, "logits/chosen": -1.689592957496643, "logits/rejected": -1.6231684684753418, "logps/chosen": -167.611083984375, "logps/rejected": -251.2779998779297, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.3698248863220215, "rewards/margins": 9.618505477905273, "rewards/rejected": -15.988329887390137, "step": 2701 }, { "epoch": 4.34, "learning_rate": 2.878517637732857e-07, "logits/chosen": -1.5911628007888794, "logits/rejected": -1.6211574077606201, "logps/chosen": -123.07876586914062, "logps/rejected": -247.23683166503906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.087737560272217, "rewards/margins": 10.925151824951172, "rewards/rejected": -15.012889862060547, "step": 2702 }, { "epoch": 4.34, "learning_rate": 2.8775267538644473e-07, "logits/chosen": -1.7623565196990967, "logits/rejected": -1.7019540071487427, "logps/chosen": -115.10385131835938, "logps/rejected": -217.34971618652344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.661360025405884, "rewards/margins": 10.460493087768555, "rewards/rejected": -14.12185287475586, "step": 2703 }, { "epoch": 4.34, "learning_rate": 2.8765358699960364e-07, "logits/chosen": -1.4666502475738525, "logits/rejected": -1.465287446975708, "logps/chosen": -130.3800506591797, "logps/rejected": -219.73854064941406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.500557899475098, "rewards/margins": 9.050395011901855, "rewards/rejected": -14.55095386505127, "step": 2704 }, { "epoch": 4.34, "learning_rate": 2.875544986127626e-07, "logits/chosen": -1.509737491607666, "logits/rejected": -1.516382098197937, "logps/chosen": -125.42378234863281, "logps/rejected": -219.41705322265625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.027798652648926, "rewards/margins": 9.60063362121582, "rewards/rejected": -13.62843132019043, "step": 2705 }, { "epoch": 4.34, "learning_rate": 2.874554102259215e-07, "logits/chosen": -1.7992517948150635, "logits/rejected": -1.6944160461425781, "logps/chosen": -109.88286590576172, "logps/rejected": -209.97201538085938, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.0165963172912598, "rewards/margins": 11.080083847045898, "rewards/rejected": -14.096678733825684, "step": 2706 }, { "epoch": 4.35, "learning_rate": 2.873563218390804e-07, "logits/chosen": -1.5950607061386108, "logits/rejected": -1.6680513620376587, "logps/chosen": -129.33627319335938, "logps/rejected": -234.19615173339844, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.1628258228302, "rewards/margins": 10.332077026367188, "rewards/rejected": -13.494901657104492, "step": 2707 }, { "epoch": 4.35, "learning_rate": 2.872572334522394e-07, "logits/chosen": -1.6945271492004395, "logits/rejected": -1.7065246105194092, "logps/chosen": -150.19375610351562, "logps/rejected": -226.61611938476562, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.7568464279174805, "rewards/margins": 9.798002243041992, "rewards/rejected": -13.554847717285156, "step": 2708 }, { "epoch": 4.35, "learning_rate": 2.8715814506539833e-07, "logits/chosen": -1.7407609224319458, "logits/rejected": -1.6673810482025146, "logps/chosen": -171.8336181640625, "logps/rejected": -236.2877960205078, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -5.680035591125488, "rewards/margins": 9.95743179321289, "rewards/rejected": -15.637468338012695, "step": 2709 }, { "epoch": 4.35, "learning_rate": 2.870590566785573e-07, "logits/chosen": -1.5881246328353882, "logits/rejected": -1.6654787063598633, "logps/chosen": -120.13629150390625, "logps/rejected": -242.16006469726562, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -4.322103500366211, "rewards/margins": 9.560227394104004, "rewards/rejected": -13.882329940795898, "step": 2710 }, { "epoch": 4.35, "learning_rate": 2.869599682917162e-07, "logits/chosen": -1.5644659996032715, "logits/rejected": -1.5354502201080322, "logps/chosen": -176.0826416015625, "logps/rejected": -255.49224853515625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -7.151700019836426, "rewards/margins": 8.524535179138184, "rewards/rejected": -15.67623519897461, "step": 2711 }, { "epoch": 4.35, "learning_rate": 2.868608799048751e-07, "logits/chosen": -1.4831297397613525, "logits/rejected": -1.5800228118896484, "logps/chosen": -125.56117248535156, "logps/rejected": -268.6870422363281, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -3.863163471221924, "rewards/margins": 12.56589126586914, "rewards/rejected": -16.429054260253906, "step": 2712 }, { "epoch": 4.35, "learning_rate": 2.8676179151803407e-07, "logits/chosen": -1.7188048362731934, "logits/rejected": -1.8612301349639893, "logps/chosen": -113.70161437988281, "logps/rejected": -264.95050048828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.367321968078613, "rewards/margins": 13.113808631896973, "rewards/rejected": -17.481130599975586, "step": 2713 }, { "epoch": 4.36, "learning_rate": 2.8666270313119303e-07, "logits/chosen": -1.57486093044281, "logits/rejected": -1.533722162246704, "logps/chosen": -110.47323608398438, "logps/rejected": -252.98753356933594, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.601809501647949, "rewards/margins": 13.707547187805176, "rewards/rejected": -17.309356689453125, "step": 2714 }, { "epoch": 4.36, "learning_rate": 2.8656361474435193e-07, "logits/chosen": -1.54884934425354, "logits/rejected": -1.5179437398910522, "logps/chosen": -117.19139099121094, "logps/rejected": -229.16049194335938, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -4.040502548217773, "rewards/margins": 10.593888282775879, "rewards/rejected": -14.634389877319336, "step": 2715 }, { "epoch": 4.36, "learning_rate": 2.864645263575109e-07, "logits/chosen": -1.3936482667922974, "logits/rejected": -1.3749257326126099, "logps/chosen": -137.77694702148438, "logps/rejected": -229.060791015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.234468698501587, "rewards/margins": 9.033712387084961, "rewards/rejected": -12.268180847167969, "step": 2716 }, { "epoch": 4.36, "learning_rate": 2.863654379706698e-07, "logits/chosen": -1.5513296127319336, "logits/rejected": -1.5265400409698486, "logps/chosen": -121.88670349121094, "logps/rejected": -242.40025329589844, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -5.923501014709473, "rewards/margins": 10.814271926879883, "rewards/rejected": -16.737773895263672, "step": 2717 }, { "epoch": 4.36, "learning_rate": 2.8626634958382876e-07, "logits/chosen": -1.5112066268920898, "logits/rejected": -1.5811978578567505, "logps/chosen": -145.32713317871094, "logps/rejected": -279.7803955078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.312545299530029, "rewards/margins": 13.130022048950195, "rewards/rejected": -18.442567825317383, "step": 2718 }, { "epoch": 4.36, "learning_rate": 2.861672611969877e-07, "logits/chosen": -1.43589186668396, "logits/rejected": -1.5404096841812134, "logps/chosen": -105.24784851074219, "logps/rejected": -215.0416717529297, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.716059446334839, "rewards/margins": 10.233918190002441, "rewards/rejected": -13.94997787475586, "step": 2719 }, { "epoch": 4.37, "learning_rate": 2.8606817281014663e-07, "logits/chosen": -1.604697585105896, "logits/rejected": -1.574940800666809, "logps/chosen": -141.23324584960938, "logps/rejected": -254.27012634277344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.838027477264404, "rewards/margins": 9.745918273925781, "rewards/rejected": -14.583945274353027, "step": 2720 }, { "epoch": 4.37, "learning_rate": 2.859690844233056e-07, "logits/chosen": -1.6449010372161865, "logits/rejected": -1.6311697959899902, "logps/chosen": -144.22784423828125, "logps/rejected": -230.12008666992188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.7527947425842285, "rewards/margins": 8.402633666992188, "rewards/rejected": -13.155428886413574, "step": 2721 }, { "epoch": 4.37, "learning_rate": 2.858699960364645e-07, "logits/chosen": -1.820873737335205, "logits/rejected": -1.7669110298156738, "logps/chosen": -102.68189239501953, "logps/rejected": -216.33917236328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.39007830619812, "rewards/margins": 10.833419799804688, "rewards/rejected": -14.223499298095703, "step": 2722 }, { "epoch": 4.37, "learning_rate": 2.8577090764962345e-07, "logits/chosen": -1.5361204147338867, "logits/rejected": -1.6005632877349854, "logps/chosen": -145.8508758544922, "logps/rejected": -254.4680633544922, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -5.688516616821289, "rewards/margins": 8.850040435791016, "rewards/rejected": -14.538557052612305, "step": 2723 }, { "epoch": 4.37, "learning_rate": 2.856718192627824e-07, "logits/chosen": -1.5622220039367676, "logits/rejected": -1.5475040674209595, "logps/chosen": -131.94325256347656, "logps/rejected": -197.24072265625, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -3.540065288543701, "rewards/margins": 8.749982833862305, "rewards/rejected": -12.290048599243164, "step": 2724 }, { "epoch": 4.37, "learning_rate": 2.855727308759413e-07, "logits/chosen": -1.6205891370773315, "logits/rejected": -1.672995686531067, "logps/chosen": -129.09080505371094, "logps/rejected": -222.75900268554688, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.210233211517334, "rewards/margins": 9.428805351257324, "rewards/rejected": -12.639039039611816, "step": 2725 }, { "epoch": 4.38, "learning_rate": 2.854736424891003e-07, "logits/chosen": -1.4986212253570557, "logits/rejected": -1.488057255744934, "logps/chosen": -172.02178955078125, "logps/rejected": -246.6441650390625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.190101623535156, "rewards/margins": 7.754901885986328, "rewards/rejected": -14.945003509521484, "step": 2726 }, { "epoch": 4.38, "learning_rate": 2.853745541022592e-07, "logits/chosen": -1.6370593309402466, "logits/rejected": -1.5723285675048828, "logps/chosen": -162.03515625, "logps/rejected": -211.7146453857422, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.955228328704834, "rewards/margins": 7.85566520690918, "rewards/rejected": -13.810894012451172, "step": 2727 }, { "epoch": 4.38, "learning_rate": 2.8527546571541815e-07, "logits/chosen": -1.4537222385406494, "logits/rejected": -1.5885872840881348, "logps/chosen": -135.88449096679688, "logps/rejected": -263.41925048828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.1324381828308105, "rewards/margins": 11.385358810424805, "rewards/rejected": -15.517796516418457, "step": 2728 }, { "epoch": 4.38, "learning_rate": 2.8517637732857705e-07, "logits/chosen": -1.5613166093826294, "logits/rejected": -1.5919588804244995, "logps/chosen": -101.88641357421875, "logps/rejected": -179.1001739501953, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.185787677764893, "rewards/margins": 8.363441467285156, "rewards/rejected": -12.549229621887207, "step": 2729 }, { "epoch": 4.38, "learning_rate": 2.85077288941736e-07, "logits/chosen": -1.6459912061691284, "logits/rejected": -1.6963763236999512, "logps/chosen": -121.8833236694336, "logps/rejected": -254.6198272705078, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.575231552124023, "rewards/margins": 10.258979797363281, "rewards/rejected": -14.834212303161621, "step": 2730 }, { "epoch": 4.38, "learning_rate": 2.8497820055489497e-07, "logits/chosen": -1.4920313358306885, "logits/rejected": -1.5534296035766602, "logps/chosen": -136.25750732421875, "logps/rejected": -225.75994873046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.56334114074707, "rewards/margins": 8.823334693908691, "rewards/rejected": -13.386674880981445, "step": 2731 }, { "epoch": 4.39, "learning_rate": 2.848791121680539e-07, "logits/chosen": -1.5881024599075317, "logits/rejected": -1.555364727973938, "logps/chosen": -124.42652893066406, "logps/rejected": -225.83660888671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.8523130416870117, "rewards/margins": 11.03785228729248, "rewards/rejected": -13.890165328979492, "step": 2732 }, { "epoch": 4.39, "learning_rate": 2.8478002378121284e-07, "logits/chosen": -1.5217175483703613, "logits/rejected": -1.4797708988189697, "logps/chosen": -144.7264404296875, "logps/rejected": -249.1586151123047, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.470658779144287, "rewards/margins": 11.141939163208008, "rewards/rejected": -17.612598419189453, "step": 2733 }, { "epoch": 4.39, "learning_rate": 2.8468093539437175e-07, "logits/chosen": -1.5542359352111816, "logits/rejected": -1.586277723312378, "logps/chosen": -150.18081665039062, "logps/rejected": -232.5347137451172, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -5.495151996612549, "rewards/margins": 9.450002670288086, "rewards/rejected": -14.945154190063477, "step": 2734 }, { "epoch": 4.39, "learning_rate": 2.845818470075307e-07, "logits/chosen": -1.7437036037445068, "logits/rejected": -1.8106427192687988, "logps/chosen": -127.38957977294922, "logps/rejected": -236.47662353515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.075035572052002, "rewards/margins": 10.382805824279785, "rewards/rejected": -15.457841873168945, "step": 2735 }, { "epoch": 4.39, "learning_rate": 2.8448275862068967e-07, "logits/chosen": -1.5717424154281616, "logits/rejected": -1.5086891651153564, "logps/chosen": -127.60755920410156, "logps/rejected": -240.640380859375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.413602828979492, "rewards/margins": 12.253816604614258, "rewards/rejected": -16.66741943359375, "step": 2736 }, { "epoch": 4.39, "learning_rate": 2.8438367023384857e-07, "logits/chosen": -1.7324081659317017, "logits/rejected": -1.6997135877609253, "logps/chosen": -156.7826690673828, "logps/rejected": -249.06898498535156, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -4.938941478729248, "rewards/margins": 9.959733963012695, "rewards/rejected": -14.898675918579102, "step": 2737 }, { "epoch": 4.39, "learning_rate": 2.8428458184700753e-07, "logits/chosen": -1.5725185871124268, "logits/rejected": -1.603454828262329, "logps/chosen": -94.85633850097656, "logps/rejected": -222.597412109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.2955291271209717, "rewards/margins": 12.444089889526367, "rewards/rejected": -15.739618301391602, "step": 2738 }, { "epoch": 4.4, "learning_rate": 2.8418549346016644e-07, "logits/chosen": -1.5346653461456299, "logits/rejected": -1.4956259727478027, "logps/chosen": -201.06982421875, "logps/rejected": -221.88934326171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.505808353424072, "rewards/margins": 8.380111694335938, "rewards/rejected": -14.885919570922852, "step": 2739 }, { "epoch": 4.4, "learning_rate": 2.840864050733254e-07, "logits/chosen": -1.5518558025360107, "logits/rejected": -1.5157350301742554, "logps/chosen": -159.24163818359375, "logps/rejected": -228.64549255371094, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.71225643157959, "rewards/margins": 7.522473335266113, "rewards/rejected": -13.234729766845703, "step": 2740 }, { "epoch": 4.4, "learning_rate": 2.8398731668648436e-07, "logits/chosen": -1.533934235572815, "logits/rejected": -1.554236888885498, "logps/chosen": -142.6758575439453, "logps/rejected": -250.65208435058594, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -6.23792839050293, "rewards/margins": 10.542596817016602, "rewards/rejected": -16.78052520751953, "step": 2741 }, { "epoch": 4.4, "learning_rate": 2.8388822829964327e-07, "logits/chosen": -1.6511311531066895, "logits/rejected": -1.665867567062378, "logps/chosen": -135.06382751464844, "logps/rejected": -261.1358337402344, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.100952625274658, "rewards/margins": 11.429085731506348, "rewards/rejected": -16.53003692626953, "step": 2742 }, { "epoch": 4.4, "learning_rate": 2.837891399128022e-07, "logits/chosen": -1.6143620014190674, "logits/rejected": -1.603949785232544, "logps/chosen": -147.03831481933594, "logps/rejected": -248.8500213623047, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.380857467651367, "rewards/margins": 10.758564949035645, "rewards/rejected": -17.139421463012695, "step": 2743 }, { "epoch": 4.4, "learning_rate": 2.8369005152596113e-07, "logits/chosen": -1.4233711957931519, "logits/rejected": -1.5262531042099, "logps/chosen": -106.28994750976562, "logps/rejected": -229.238525390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.5503110885620117, "rewards/margins": 10.036842346191406, "rewards/rejected": -13.587154388427734, "step": 2744 }, { "epoch": 4.41, "learning_rate": 2.8359096313912004e-07, "logits/chosen": -1.5898771286010742, "logits/rejected": -1.5718083381652832, "logps/chosen": -125.01554107666016, "logps/rejected": -222.83021545410156, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -4.853241920471191, "rewards/margins": 9.736324310302734, "rewards/rejected": -14.589567184448242, "step": 2745 }, { "epoch": 4.41, "learning_rate": 2.8349187475227905e-07, "logits/chosen": -1.6329039335250854, "logits/rejected": -1.6574586629867554, "logps/chosen": -95.35833740234375, "logps/rejected": -217.6109619140625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.5097174644470215, "rewards/margins": 11.907732963562012, "rewards/rejected": -14.417449951171875, "step": 2746 }, { "epoch": 4.41, "learning_rate": 2.8339278636543796e-07, "logits/chosen": -1.4765872955322266, "logits/rejected": -1.6432390213012695, "logps/chosen": -118.42501068115234, "logps/rejected": -247.189208984375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -4.513876914978027, "rewards/margins": 10.640029907226562, "rewards/rejected": -15.153907775878906, "step": 2747 }, { "epoch": 4.41, "learning_rate": 2.8329369797859687e-07, "logits/chosen": -1.4298268556594849, "logits/rejected": -1.4288899898529053, "logps/chosen": -151.6299285888672, "logps/rejected": -235.86044311523438, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -6.147488594055176, "rewards/margins": 9.905120849609375, "rewards/rejected": -16.052610397338867, "step": 2748 }, { "epoch": 4.41, "learning_rate": 2.831946095917558e-07, "logits/chosen": -1.6230149269104004, "logits/rejected": -1.6415226459503174, "logps/chosen": -154.929931640625, "logps/rejected": -254.8118133544922, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.146159648895264, "rewards/margins": 10.277194023132324, "rewards/rejected": -15.423354148864746, "step": 2749 }, { "epoch": 4.41, "learning_rate": 2.8309552120491473e-07, "logits/chosen": -1.5679807662963867, "logits/rejected": -1.5540771484375, "logps/chosen": -118.28207397460938, "logps/rejected": -211.56150817871094, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -4.544943809509277, "rewards/margins": 9.553079605102539, "rewards/rejected": -14.098023414611816, "step": 2750 }, { "epoch": 4.42, "learning_rate": 2.8299643281807374e-07, "logits/chosen": -1.5485495328903198, "logits/rejected": -1.5943037271499634, "logps/chosen": -120.41741943359375, "logps/rejected": -180.13356018066406, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -4.025323390960693, "rewards/margins": 5.908749580383301, "rewards/rejected": -9.934072494506836, "step": 2751 }, { "epoch": 4.42, "learning_rate": 2.8289734443123265e-07, "logits/chosen": -1.611480712890625, "logits/rejected": -1.6532461643218994, "logps/chosen": -115.31661987304688, "logps/rejected": -247.1338653564453, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.2026615142822266, "rewards/margins": 12.903923034667969, "rewards/rejected": -16.106582641601562, "step": 2752 }, { "epoch": 4.42, "learning_rate": 2.8279825604439156e-07, "logits/chosen": -1.5620131492614746, "logits/rejected": -1.6490687131881714, "logps/chosen": -79.9268569946289, "logps/rejected": -223.0098876953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.410628318786621, "rewards/margins": 12.070353507995605, "rewards/rejected": -14.480981826782227, "step": 2753 }, { "epoch": 4.42, "learning_rate": 2.826991676575505e-07, "logits/chosen": -1.4730966091156006, "logits/rejected": -1.5045579671859741, "logps/chosen": -144.82215881347656, "logps/rejected": -273.5934753417969, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.425799369812012, "rewards/margins": 12.714338302612305, "rewards/rejected": -18.14013671875, "step": 2754 }, { "epoch": 4.42, "learning_rate": 2.826000792707094e-07, "logits/chosen": -1.6694135665893555, "logits/rejected": -1.6621085405349731, "logps/chosen": -131.1252899169922, "logps/rejected": -217.86700439453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.115210056304932, "rewards/margins": 9.40616226196289, "rewards/rejected": -13.521371841430664, "step": 2755 }, { "epoch": 4.42, "learning_rate": 2.8250099088386844e-07, "logits/chosen": -1.5563735961914062, "logits/rejected": -1.5341675281524658, "logps/chosen": -161.56570434570312, "logps/rejected": -253.91421508789062, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -5.335036754608154, "rewards/margins": 10.296276092529297, "rewards/rejected": -15.63131332397461, "step": 2756 }, { "epoch": 4.43, "learning_rate": 2.8240190249702734e-07, "logits/chosen": -1.5389516353607178, "logits/rejected": -1.6099638938903809, "logps/chosen": -91.54885864257812, "logps/rejected": -168.38998413085938, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -3.0507402420043945, "rewards/margins": 6.907531261444092, "rewards/rejected": -9.958271026611328, "step": 2757 }, { "epoch": 4.43, "learning_rate": 2.8230281411018625e-07, "logits/chosen": -1.571253776550293, "logits/rejected": -1.5348806381225586, "logps/chosen": -107.9016342163086, "logps/rejected": -233.93258666992188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.745862007141113, "rewards/margins": 11.6283540725708, "rewards/rejected": -16.374216079711914, "step": 2758 }, { "epoch": 4.43, "learning_rate": 2.822037257233452e-07, "logits/chosen": -1.495646595954895, "logits/rejected": -1.5194401741027832, "logps/chosen": -95.109619140625, "logps/rejected": -205.4697265625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.696382999420166, "rewards/margins": 10.568206787109375, "rewards/rejected": -13.2645902633667, "step": 2759 }, { "epoch": 4.43, "learning_rate": 2.821046373365041e-07, "logits/chosen": -1.6217894554138184, "logits/rejected": -1.6155112981796265, "logps/chosen": -144.34454345703125, "logps/rejected": -241.9854736328125, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -5.128584861755371, "rewards/margins": 10.77570629119873, "rewards/rejected": -15.904291152954102, "step": 2760 }, { "epoch": 4.43, "learning_rate": 2.8200554894966313e-07, "logits/chosen": -1.5358809232711792, "logits/rejected": -1.530653476715088, "logps/chosen": -175.5430908203125, "logps/rejected": -251.4400634765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.741790294647217, "rewards/margins": 8.747354507446289, "rewards/rejected": -16.489145278930664, "step": 2761 }, { "epoch": 4.43, "learning_rate": 2.8190646056282204e-07, "logits/chosen": -1.5708673000335693, "logits/rejected": -1.5588667392730713, "logps/chosen": -101.2484130859375, "logps/rejected": -161.5988006591797, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.8774006366729736, "rewards/margins": 5.936879634857178, "rewards/rejected": -8.81428050994873, "step": 2762 }, { "epoch": 4.43, "learning_rate": 2.8180737217598094e-07, "logits/chosen": -1.5750653743743896, "logits/rejected": -1.7654157876968384, "logps/chosen": -86.88893127441406, "logps/rejected": -180.5362091064453, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.4166572093963623, "rewards/margins": 8.01063346862793, "rewards/rejected": -10.427290916442871, "step": 2763 }, { "epoch": 4.44, "learning_rate": 2.817082837891399e-07, "logits/chosen": -1.4746253490447998, "logits/rejected": -1.5653737783432007, "logps/chosen": -132.5467529296875, "logps/rejected": -225.18115234375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.933967590332031, "rewards/margins": 8.254783630371094, "rewards/rejected": -13.188752174377441, "step": 2764 }, { "epoch": 4.44, "learning_rate": 2.816091954022988e-07, "logits/chosen": -1.4548503160476685, "logits/rejected": -1.5111383199691772, "logps/chosen": -110.25473022460938, "logps/rejected": -215.64654541015625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.585642337799072, "rewards/margins": 9.538105010986328, "rewards/rejected": -14.123748779296875, "step": 2765 }, { "epoch": 4.44, "learning_rate": 2.815101070154578e-07, "logits/chosen": -1.4512884616851807, "logits/rejected": -1.4752033948898315, "logps/chosen": -172.13363647460938, "logps/rejected": -273.2087707519531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.668320655822754, "rewards/margins": 9.66629695892334, "rewards/rejected": -18.334617614746094, "step": 2766 }, { "epoch": 4.44, "learning_rate": 2.8141101862861673e-07, "logits/chosen": -1.7221190929412842, "logits/rejected": -1.7068355083465576, "logps/chosen": -178.95968627929688, "logps/rejected": -262.5660095214844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.734452247619629, "rewards/margins": 9.113062858581543, "rewards/rejected": -16.847515106201172, "step": 2767 }, { "epoch": 4.44, "learning_rate": 2.8131193024177564e-07, "logits/chosen": -1.625832200050354, "logits/rejected": -1.6556293964385986, "logps/chosen": -106.03610229492188, "logps/rejected": -256.46746826171875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -3.392554998397827, "rewards/margins": 13.48036003112793, "rewards/rejected": -16.872915267944336, "step": 2768 }, { "epoch": 4.44, "learning_rate": 2.812128418549346e-07, "logits/chosen": -1.7671726942062378, "logits/rejected": -1.7746357917785645, "logps/chosen": -140.02615356445312, "logps/rejected": -258.77069091796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.159049034118652, "rewards/margins": 11.598835945129395, "rewards/rejected": -16.757884979248047, "step": 2769 }, { "epoch": 4.45, "learning_rate": 2.811137534680935e-07, "logits/chosen": -1.6480728387832642, "logits/rejected": -1.6215770244598389, "logps/chosen": -168.7593994140625, "logps/rejected": -234.2295379638672, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -6.406893253326416, "rewards/margins": 8.226319313049316, "rewards/rejected": -14.63321304321289, "step": 2770 }, { "epoch": 4.45, "learning_rate": 2.8101466508125246e-07, "logits/chosen": -1.6111595630645752, "logits/rejected": -1.6228020191192627, "logps/chosen": -123.53425598144531, "logps/rejected": -208.87274169921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.9654159545898438, "rewards/margins": 8.618648529052734, "rewards/rejected": -12.584064483642578, "step": 2771 }, { "epoch": 4.45, "learning_rate": 2.809155766944114e-07, "logits/chosen": -1.6037147045135498, "logits/rejected": -1.5263954401016235, "logps/chosen": -154.48353576660156, "logps/rejected": -235.903564453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.21937370300293, "rewards/margins": 8.961101531982422, "rewards/rejected": -14.180475234985352, "step": 2772 }, { "epoch": 4.45, "learning_rate": 2.8081648830757033e-07, "logits/chosen": -1.701793909072876, "logits/rejected": -1.7092843055725098, "logps/chosen": -135.17901611328125, "logps/rejected": -234.3446044921875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.532351493835449, "rewards/margins": 10.55919361114502, "rewards/rejected": -16.09154510498047, "step": 2773 }, { "epoch": 4.45, "learning_rate": 2.807173999207293e-07, "logits/chosen": -1.7039819955825806, "logits/rejected": -1.682356834411621, "logps/chosen": -150.87261962890625, "logps/rejected": -263.63232421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.312885284423828, "rewards/margins": 10.717589378356934, "rewards/rejected": -16.030475616455078, "step": 2774 }, { "epoch": 4.45, "learning_rate": 2.806183115338882e-07, "logits/chosen": -1.4944771528244019, "logits/rejected": -1.4120906591415405, "logps/chosen": -136.0952911376953, "logps/rejected": -193.75282287597656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.084843635559082, "rewards/margins": 7.954451560974121, "rewards/rejected": -12.03929615020752, "step": 2775 }, { "epoch": 4.46, "learning_rate": 2.8051922314704716e-07, "logits/chosen": -1.5095566511154175, "logits/rejected": -1.5478435754776, "logps/chosen": -142.15118408203125, "logps/rejected": -233.05300903320312, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -3.958883285522461, "rewards/margins": 10.36723804473877, "rewards/rejected": -14.32612133026123, "step": 2776 }, { "epoch": 4.46, "learning_rate": 2.804201347602061e-07, "logits/chosen": -1.7724047899246216, "logits/rejected": -1.7530958652496338, "logps/chosen": -169.1248321533203, "logps/rejected": -238.17800903320312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -7.316110134124756, "rewards/margins": 9.206033706665039, "rewards/rejected": -16.522144317626953, "step": 2777 }, { "epoch": 4.46, "learning_rate": 2.80321046373365e-07, "logits/chosen": -1.6659913063049316, "logits/rejected": -1.5619697570800781, "logps/chosen": -171.40823364257812, "logps/rejected": -232.71527099609375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.649877548217773, "rewards/margins": 8.75882339477539, "rewards/rejected": -15.408700942993164, "step": 2778 }, { "epoch": 4.46, "learning_rate": 2.80221957986524e-07, "logits/chosen": -1.6000263690948486, "logits/rejected": -1.5953128337860107, "logps/chosen": -71.24349975585938, "logps/rejected": -174.62022399902344, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.678483247756958, "rewards/margins": 10.27414608001709, "rewards/rejected": -11.952629089355469, "step": 2779 }, { "epoch": 4.46, "learning_rate": 2.801228695996829e-07, "logits/chosen": -1.621415376663208, "logits/rejected": -1.6565399169921875, "logps/chosen": -108.86508178710938, "logps/rejected": -225.0075225830078, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -2.623643159866333, "rewards/margins": 11.878439903259277, "rewards/rejected": -14.502083778381348, "step": 2780 }, { "epoch": 4.46, "learning_rate": 2.8002378121284185e-07, "logits/chosen": -1.5521348714828491, "logits/rejected": -1.5540755987167358, "logps/chosen": -122.08818817138672, "logps/rejected": -209.15716552734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.168209075927734, "rewards/margins": 9.919797897338867, "rewards/rejected": -14.088006973266602, "step": 2781 }, { "epoch": 4.47, "learning_rate": 2.799246928260008e-07, "logits/chosen": -1.485342264175415, "logits/rejected": -1.5466617345809937, "logps/chosen": -119.37955474853516, "logps/rejected": -229.3961181640625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.773891448974609, "rewards/margins": 9.49170970916748, "rewards/rejected": -14.26560115814209, "step": 2782 }, { "epoch": 4.47, "learning_rate": 2.798256044391597e-07, "logits/chosen": -1.7207412719726562, "logits/rejected": -1.7234901189804077, "logps/chosen": -112.4862060546875, "logps/rejected": -250.169189453125, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -3.4771955013275146, "rewards/margins": 13.739461898803711, "rewards/rejected": -17.216655731201172, "step": 2783 }, { "epoch": 4.47, "learning_rate": 2.797265160523187e-07, "logits/chosen": -1.5213991403579712, "logits/rejected": -1.4766119718551636, "logps/chosen": -132.43447875976562, "logps/rejected": -239.79600524902344, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.070225715637207, "rewards/margins": 12.394888877868652, "rewards/rejected": -16.46511459350586, "step": 2784 }, { "epoch": 4.47, "learning_rate": 2.796274276654776e-07, "logits/chosen": -1.503800630569458, "logits/rejected": -1.4523800611495972, "logps/chosen": -136.9077606201172, "logps/rejected": -193.6627197265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.877272129058838, "rewards/margins": 7.413882732391357, "rewards/rejected": -11.291154861450195, "step": 2785 }, { "epoch": 4.47, "learning_rate": 2.795283392786365e-07, "logits/chosen": -1.669974684715271, "logits/rejected": -1.6851468086242676, "logps/chosen": -148.18927001953125, "logps/rejected": -280.16302490234375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -6.377048015594482, "rewards/margins": 11.962980270385742, "rewards/rejected": -18.340028762817383, "step": 2786 }, { "epoch": 4.47, "learning_rate": 2.794292508917955e-07, "logits/chosen": -1.5625427961349487, "logits/rejected": -1.616051435470581, "logps/chosen": -129.7542266845703, "logps/rejected": -241.31643676757812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.4089882373809814, "rewards/margins": 9.094388961791992, "rewards/rejected": -12.503376960754395, "step": 2787 }, { "epoch": 4.48, "learning_rate": 2.793301625049544e-07, "logits/chosen": -1.5150152444839478, "logits/rejected": -1.5254220962524414, "logps/chosen": -126.76512145996094, "logps/rejected": -200.1992950439453, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.936099052429199, "rewards/margins": 6.220221042633057, "rewards/rejected": -12.156320571899414, "step": 2788 }, { "epoch": 4.48, "learning_rate": 2.7923107411811337e-07, "logits/chosen": -1.6040432453155518, "logits/rejected": -1.5642261505126953, "logps/chosen": -133.92800903320312, "logps/rejected": -247.1510467529297, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.699306964874268, "rewards/margins": 11.202713966369629, "rewards/rejected": -15.902021408081055, "step": 2789 }, { "epoch": 4.48, "learning_rate": 2.791319857312723e-07, "logits/chosen": -1.6042938232421875, "logits/rejected": -1.5755186080932617, "logps/chosen": -129.53164672851562, "logps/rejected": -227.7854461669922, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.354366779327393, "rewards/margins": 8.602972984313965, "rewards/rejected": -12.957340240478516, "step": 2790 }, { "epoch": 4.48, "learning_rate": 2.790328973444312e-07, "logits/chosen": -1.451268196105957, "logits/rejected": -1.488668441772461, "logps/chosen": -188.19393920898438, "logps/rejected": -242.9617919921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.885913848876953, "rewards/margins": 7.035870552062988, "rewards/rejected": -14.921785354614258, "step": 2791 }, { "epoch": 4.48, "learning_rate": 2.7893380895759014e-07, "logits/chosen": -1.5272724628448486, "logits/rejected": -1.5402376651763916, "logps/chosen": -98.49087524414062, "logps/rejected": -179.5891876220703, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.8125195503234863, "rewards/margins": 8.231917381286621, "rewards/rejected": -12.044437408447266, "step": 2792 }, { "epoch": 4.48, "learning_rate": 2.788347205707491e-07, "logits/chosen": -1.6101946830749512, "logits/rejected": -1.5522842407226562, "logps/chosen": -138.28851318359375, "logps/rejected": -246.4190673828125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -5.4755353927612305, "rewards/margins": 11.168808937072754, "rewards/rejected": -16.644344329833984, "step": 2793 }, { "epoch": 4.48, "learning_rate": 2.7873563218390806e-07, "logits/chosen": -1.4968459606170654, "logits/rejected": -1.5565507411956787, "logps/chosen": -93.6102294921875, "logps/rejected": -239.70826721191406, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.5057034492492676, "rewards/margins": 13.784570693969727, "rewards/rejected": -15.290273666381836, "step": 2794 }, { "epoch": 4.49, "learning_rate": 2.7863654379706697e-07, "logits/chosen": -1.6463429927825928, "logits/rejected": -1.6407725811004639, "logps/chosen": -111.93770599365234, "logps/rejected": -216.40350341796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.293461799621582, "rewards/margins": 10.810312271118164, "rewards/rejected": -13.10377311706543, "step": 2795 }, { "epoch": 4.49, "learning_rate": 2.785374554102259e-07, "logits/chosen": -1.4864040613174438, "logits/rejected": -1.5323628187179565, "logps/chosen": -81.9390640258789, "logps/rejected": -202.75511169433594, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -3.584714412689209, "rewards/margins": 10.030757904052734, "rewards/rejected": -13.615471839904785, "step": 2796 }, { "epoch": 4.49, "learning_rate": 2.7843836702338484e-07, "logits/chosen": -1.541719913482666, "logits/rejected": -1.476633071899414, "logps/chosen": -150.97898864746094, "logps/rejected": -239.44508361816406, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.847156047821045, "rewards/margins": 9.204046249389648, "rewards/rejected": -14.051202774047852, "step": 2797 }, { "epoch": 4.49, "learning_rate": 2.783392786365438e-07, "logits/chosen": -1.5616563558578491, "logits/rejected": -1.5403021574020386, "logps/chosen": -141.3931884765625, "logps/rejected": -218.9105224609375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -5.778682708740234, "rewards/margins": 8.907339096069336, "rewards/rejected": -14.68602180480957, "step": 2798 }, { "epoch": 4.49, "learning_rate": 2.7824019024970275e-07, "logits/chosen": -1.640052318572998, "logits/rejected": -1.658229947090149, "logps/chosen": -129.95126342773438, "logps/rejected": -240.1207733154297, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -4.86325740814209, "rewards/margins": 10.251483917236328, "rewards/rejected": -15.114742279052734, "step": 2799 }, { "epoch": 4.49, "learning_rate": 2.7814110186286166e-07, "logits/chosen": -1.6252448558807373, "logits/rejected": -1.5091874599456787, "logps/chosen": -131.91546630859375, "logps/rejected": -251.16970825195312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.578756809234619, "rewards/margins": 12.612921714782715, "rewards/rejected": -17.191679000854492, "step": 2800 }, { "epoch": 4.5, "learning_rate": 2.7804201347602057e-07, "logits/chosen": -1.652876853942871, "logits/rejected": -1.6234314441680908, "logps/chosen": -162.2783203125, "logps/rejected": -242.21786499023438, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.6338372230529785, "rewards/margins": 9.332765579223633, "rewards/rejected": -15.96660327911377, "step": 2801 }, { "epoch": 4.5, "learning_rate": 2.7794292508917953e-07, "logits/chosen": -1.5144388675689697, "logits/rejected": -1.5487178564071655, "logps/chosen": -115.75148010253906, "logps/rejected": -250.61029052734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.7632505893707275, "rewards/margins": 11.381921768188477, "rewards/rejected": -15.145173072814941, "step": 2802 }, { "epoch": 4.5, "learning_rate": 2.778438367023385e-07, "logits/chosen": -1.5801445245742798, "logits/rejected": -1.5290191173553467, "logps/chosen": -138.8501434326172, "logps/rejected": -245.97216796875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.890955924987793, "rewards/margins": 10.224698066711426, "rewards/rejected": -16.11565399169922, "step": 2803 }, { "epoch": 4.5, "learning_rate": 2.7774474831549745e-07, "logits/chosen": -1.6285772323608398, "logits/rejected": -1.6636097431182861, "logps/chosen": -113.93743896484375, "logps/rejected": -285.4978942871094, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.404966354370117, "rewards/margins": 14.759344100952148, "rewards/rejected": -19.164310455322266, "step": 2804 }, { "epoch": 4.5, "learning_rate": 2.7764565992865635e-07, "logits/chosen": -1.502935528755188, "logits/rejected": -1.5023832321166992, "logps/chosen": -143.6563720703125, "logps/rejected": -228.884521484375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -4.117403030395508, "rewards/margins": 9.1888427734375, "rewards/rejected": -13.306245803833008, "step": 2805 }, { "epoch": 4.5, "learning_rate": 2.7754657154181526e-07, "logits/chosen": -1.75993013381958, "logits/rejected": -1.778637170791626, "logps/chosen": -145.81350708007812, "logps/rejected": -296.07513427734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.111900806427002, "rewards/margins": 11.174005508422852, "rewards/rejected": -16.285905838012695, "step": 2806 }, { "epoch": 4.51, "learning_rate": 2.774474831549742e-07, "logits/chosen": -1.5163995027542114, "logits/rejected": -1.5313801765441895, "logps/chosen": -134.6122283935547, "logps/rejected": -243.01937866210938, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.483554840087891, "rewards/margins": 8.564491271972656, "rewards/rejected": -14.048046112060547, "step": 2807 }, { "epoch": 4.51, "learning_rate": 2.7734839476813313e-07, "logits/chosen": -1.680015206336975, "logits/rejected": -1.6355396509170532, "logps/chosen": -132.52325439453125, "logps/rejected": -249.52288818359375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -5.545040607452393, "rewards/margins": 11.7825288772583, "rewards/rejected": -17.32756996154785, "step": 2808 }, { "epoch": 4.51, "learning_rate": 2.7724930638129214e-07, "logits/chosen": -1.527362585067749, "logits/rejected": -1.527822494506836, "logps/chosen": -135.01019287109375, "logps/rejected": -238.34754943847656, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -4.903086185455322, "rewards/margins": 10.757129669189453, "rewards/rejected": -15.660216331481934, "step": 2809 }, { "epoch": 4.51, "learning_rate": 2.7715021799445105e-07, "logits/chosen": -1.6305440664291382, "logits/rejected": -1.62916100025177, "logps/chosen": -130.04013061523438, "logps/rejected": -232.4702911376953, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.968426704406738, "rewards/margins": 10.743631362915039, "rewards/rejected": -15.712057113647461, "step": 2810 }, { "epoch": 4.51, "learning_rate": 2.7705112960760995e-07, "logits/chosen": -1.4590213298797607, "logits/rejected": -1.4841420650482178, "logps/chosen": -134.41177368164062, "logps/rejected": -241.6845703125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -4.550762176513672, "rewards/margins": 10.249332427978516, "rewards/rejected": -14.800093650817871, "step": 2811 }, { "epoch": 4.51, "learning_rate": 2.769520412207689e-07, "logits/chosen": -1.4618349075317383, "logits/rejected": -1.4450814723968506, "logps/chosen": -171.9166717529297, "logps/rejected": -247.81298828125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -6.798979759216309, "rewards/margins": 7.979863166809082, "rewards/rejected": -14.77884292602539, "step": 2812 }, { "epoch": 4.52, "learning_rate": 2.768529528339278e-07, "logits/chosen": -1.5947637557983398, "logits/rejected": -1.5830657482147217, "logps/chosen": -163.08323669433594, "logps/rejected": -274.16064453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.5760178565979, "rewards/margins": 10.463120460510254, "rewards/rejected": -17.039138793945312, "step": 2813 }, { "epoch": 4.52, "learning_rate": 2.7675386444708683e-07, "logits/chosen": -1.6015257835388184, "logits/rejected": -1.5990798473358154, "logps/chosen": -117.4588623046875, "logps/rejected": -206.17495727539062, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.0037994384765625, "rewards/margins": 8.453954696655273, "rewards/rejected": -12.457754135131836, "step": 2814 }, { "epoch": 4.52, "learning_rate": 2.7665477606024574e-07, "logits/chosen": -1.6255697011947632, "logits/rejected": -1.6712092161178589, "logps/chosen": -153.6128692626953, "logps/rejected": -238.37274169921875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -5.809083938598633, "rewards/margins": 8.362279891967773, "rewards/rejected": -14.171364784240723, "step": 2815 }, { "epoch": 4.52, "learning_rate": 2.7655568767340465e-07, "logits/chosen": -1.6855837106704712, "logits/rejected": -1.6779553890228271, "logps/chosen": -155.612060546875, "logps/rejected": -228.32540893554688, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.5503058433532715, "rewards/margins": 8.333115577697754, "rewards/rejected": -13.883420944213867, "step": 2816 }, { "epoch": 4.52, "learning_rate": 2.764565992865636e-07, "logits/chosen": -1.5686019659042358, "logits/rejected": -1.6002357006072998, "logps/chosen": -113.88412475585938, "logps/rejected": -247.1538543701172, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -4.359396934509277, "rewards/margins": 11.560690879821777, "rewards/rejected": -15.920087814331055, "step": 2817 }, { "epoch": 4.52, "learning_rate": 2.763575108997225e-07, "logits/chosen": -1.5269384384155273, "logits/rejected": -1.5655933618545532, "logps/chosen": -115.63810729980469, "logps/rejected": -225.92860412597656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.826902151107788, "rewards/margins": 10.578222274780273, "rewards/rejected": -14.405123710632324, "step": 2818 }, { "epoch": 4.52, "learning_rate": 2.762584225128815e-07, "logits/chosen": -1.639686107635498, "logits/rejected": -1.691918134689331, "logps/chosen": -106.64309692382812, "logps/rejected": -190.20960998535156, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -3.8653557300567627, "rewards/margins": 7.256318092346191, "rewards/rejected": -11.121673583984375, "step": 2819 }, { "epoch": 4.53, "learning_rate": 2.7615933412604043e-07, "logits/chosen": -1.5898807048797607, "logits/rejected": -1.5956050157546997, "logps/chosen": -82.0503158569336, "logps/rejected": -174.1080780029297, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.678748846054077, "rewards/margins": 9.388021469116211, "rewards/rejected": -12.06676959991455, "step": 2820 }, { "epoch": 4.53, "learning_rate": 2.7606024573919934e-07, "logits/chosen": -1.8119322061538696, "logits/rejected": -1.7621175050735474, "logps/chosen": -149.87088012695312, "logps/rejected": -241.07542419433594, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.261667728424072, "rewards/margins": 10.34338092803955, "rewards/rejected": -16.60504913330078, "step": 2821 }, { "epoch": 4.53, "learning_rate": 2.759611573523583e-07, "logits/chosen": -1.72573983669281, "logits/rejected": -1.7325003147125244, "logps/chosen": -120.32891082763672, "logps/rejected": -229.6142578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.298008441925049, "rewards/margins": 10.989982604980469, "rewards/rejected": -14.28799057006836, "step": 2822 }, { "epoch": 4.53, "learning_rate": 2.758620689655172e-07, "logits/chosen": -1.5940457582473755, "logits/rejected": -1.5923705101013184, "logps/chosen": -133.24681091308594, "logps/rejected": -237.94903564453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.884000778198242, "rewards/margins": 11.673494338989258, "rewards/rejected": -14.5574951171875, "step": 2823 }, { "epoch": 4.53, "learning_rate": 2.7576298057867617e-07, "logits/chosen": -1.578063726425171, "logits/rejected": -1.578145980834961, "logps/chosen": -151.49685668945312, "logps/rejected": -299.7729187011719, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.873994827270508, "rewards/margins": 13.433101654052734, "rewards/rejected": -19.307096481323242, "step": 2824 }, { "epoch": 4.53, "learning_rate": 2.7566389219183513e-07, "logits/chosen": -1.5385832786560059, "logits/rejected": -1.5514661073684692, "logps/chosen": -134.41766357421875, "logps/rejected": -225.20236206054688, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -4.342463493347168, "rewards/margins": 9.45561408996582, "rewards/rejected": -13.798076629638672, "step": 2825 }, { "epoch": 4.54, "learning_rate": 2.7556480380499403e-07, "logits/chosen": -1.4231773614883423, "logits/rejected": -1.5198866128921509, "logps/chosen": -128.70538330078125, "logps/rejected": -241.09848022460938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.243760585784912, "rewards/margins": 10.570501327514648, "rewards/rejected": -15.814261436462402, "step": 2826 }, { "epoch": 4.54, "learning_rate": 2.75465715418153e-07, "logits/chosen": -1.5884253978729248, "logits/rejected": -1.5305211544036865, "logps/chosen": -137.478271484375, "logps/rejected": -242.0609893798828, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -4.5672197341918945, "rewards/margins": 11.298229217529297, "rewards/rejected": -15.865448951721191, "step": 2827 }, { "epoch": 4.54, "learning_rate": 2.753666270313119e-07, "logits/chosen": -1.6935991048812866, "logits/rejected": -1.6324217319488525, "logps/chosen": -160.7823944091797, "logps/rejected": -268.3804016113281, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -6.118954181671143, "rewards/margins": 10.718652725219727, "rewards/rejected": -16.837608337402344, "step": 2828 }, { "epoch": 4.54, "learning_rate": 2.752675386444708e-07, "logits/chosen": -1.4552879333496094, "logits/rejected": -1.3670326471328735, "logps/chosen": -145.2235870361328, "logps/rejected": -207.7515869140625, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -5.231058120727539, "rewards/margins": 8.504594802856445, "rewards/rejected": -13.7356538772583, "step": 2829 }, { "epoch": 4.54, "learning_rate": 2.751684502576298e-07, "logits/chosen": -1.5197253227233887, "logits/rejected": -1.5053484439849854, "logps/chosen": -138.58673095703125, "logps/rejected": -217.93016052246094, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.293963432312012, "rewards/margins": 8.544078826904297, "rewards/rejected": -12.838041305541992, "step": 2830 }, { "epoch": 4.54, "learning_rate": 2.750693618707887e-07, "logits/chosen": -1.445472002029419, "logits/rejected": -1.414696455001831, "logps/chosen": -152.7145538330078, "logps/rejected": -229.15115356445312, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.331068992614746, "rewards/margins": 9.836455345153809, "rewards/rejected": -14.167524337768555, "step": 2831 }, { "epoch": 4.55, "learning_rate": 2.749702734839477e-07, "logits/chosen": -1.4078130722045898, "logits/rejected": -1.39902663230896, "logps/chosen": -142.72589111328125, "logps/rejected": -253.47982788085938, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -5.298447132110596, "rewards/margins": 11.785785675048828, "rewards/rejected": -17.084232330322266, "step": 2832 }, { "epoch": 4.55, "learning_rate": 2.748711850971066e-07, "logits/chosen": -1.6970798969268799, "logits/rejected": -1.746498942375183, "logps/chosen": -77.19947052001953, "logps/rejected": -186.76528930664062, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.3104373216629028, "rewards/margins": 11.600809097290039, "rewards/rejected": -12.911247253417969, "step": 2833 }, { "epoch": 4.55, "learning_rate": 2.747720967102655e-07, "logits/chosen": -1.5782599449157715, "logits/rejected": -1.6015715599060059, "logps/chosen": -140.51531982421875, "logps/rejected": -267.2781066894531, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -5.273142337799072, "rewards/margins": 12.333765983581543, "rewards/rejected": -17.606908798217773, "step": 2834 }, { "epoch": 4.55, "learning_rate": 2.746730083234245e-07, "logits/chosen": -1.5432519912719727, "logits/rejected": -1.5531351566314697, "logps/chosen": -156.93751525878906, "logps/rejected": -274.94085693359375, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -6.438264846801758, "rewards/margins": 12.945955276489258, "rewards/rejected": -19.384220123291016, "step": 2835 }, { "epoch": 4.55, "learning_rate": 2.745739199365834e-07, "logits/chosen": -1.6199326515197754, "logits/rejected": -1.6426987648010254, "logps/chosen": -109.1934814453125, "logps/rejected": -228.87843322753906, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.3846726417541504, "rewards/margins": 11.644591331481934, "rewards/rejected": -15.029263496398926, "step": 2836 }, { "epoch": 4.55, "learning_rate": 2.744748315497424e-07, "logits/chosen": -1.504603385925293, "logits/rejected": -1.5654747486114502, "logps/chosen": -109.61658477783203, "logps/rejected": -218.8448028564453, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -3.993145704269409, "rewards/margins": 10.182534217834473, "rewards/rejected": -14.175680160522461, "step": 2837 }, { "epoch": 4.56, "learning_rate": 2.743757431629013e-07, "logits/chosen": -1.4566230773925781, "logits/rejected": -1.4841477870941162, "logps/chosen": -133.1646270751953, "logps/rejected": -249.279052734375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.156158924102783, "rewards/margins": 10.0572509765625, "rewards/rejected": -15.213409423828125, "step": 2838 }, { "epoch": 4.56, "learning_rate": 2.742766547760602e-07, "logits/chosen": -1.4440183639526367, "logits/rejected": -1.5208971500396729, "logps/chosen": -174.27093505859375, "logps/rejected": -248.71119689941406, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.084232330322266, "rewards/margins": 8.9021635055542, "rewards/rejected": -16.98639678955078, "step": 2839 }, { "epoch": 4.56, "learning_rate": 2.741775663892192e-07, "logits/chosen": -1.5303318500518799, "logits/rejected": -1.55264413356781, "logps/chosen": -139.18399047851562, "logps/rejected": -280.086669921875, "loss": 0.064, "rewards/accuracies": 1.0, "rewards/chosen": -4.774323463439941, "rewards/margins": 12.022619247436523, "rewards/rejected": -16.79694175720215, "step": 2840 }, { "epoch": 4.56, "learning_rate": 2.740784780023781e-07, "logits/chosen": -1.4367369413375854, "logits/rejected": -1.3810492753982544, "logps/chosen": -102.03660583496094, "logps/rejected": -197.992919921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.2760202884674072, "rewards/margins": 10.389532089233398, "rewards/rejected": -12.665552139282227, "step": 2841 }, { "epoch": 4.56, "learning_rate": 2.7397938961553707e-07, "logits/chosen": -1.5954349040985107, "logits/rejected": -1.542466640472412, "logps/chosen": -125.31034851074219, "logps/rejected": -214.36813354492188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.8865115642547607, "rewards/margins": 9.81609058380127, "rewards/rejected": -12.70260238647461, "step": 2842 }, { "epoch": 4.56, "learning_rate": 2.73880301228696e-07, "logits/chosen": -1.4792134761810303, "logits/rejected": -1.4543198347091675, "logps/chosen": -102.7669677734375, "logps/rejected": -193.1697998046875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.6447434425354004, "rewards/margins": 9.630353927612305, "rewards/rejected": -12.275097846984863, "step": 2843 }, { "epoch": 4.57, "learning_rate": 2.737812128418549e-07, "logits/chosen": -1.7056212425231934, "logits/rejected": -1.6779396533966064, "logps/chosen": -135.36651611328125, "logps/rejected": -292.4979248046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.8712847232818604, "rewards/margins": 16.85468101501465, "rewards/rejected": -20.72596549987793, "step": 2844 }, { "epoch": 4.57, "learning_rate": 2.736821244550139e-07, "logits/chosen": -1.7758382558822632, "logits/rejected": -1.6221508979797363, "logps/chosen": -146.53912353515625, "logps/rejected": -251.81283569335938, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.665487766265869, "rewards/margins": 11.406951904296875, "rewards/rejected": -16.07244110107422, "step": 2845 }, { "epoch": 4.57, "learning_rate": 2.735830360681728e-07, "logits/chosen": -1.602111577987671, "logits/rejected": -1.5715088844299316, "logps/chosen": -145.44622802734375, "logps/rejected": -295.92645263671875, "loss": 0.1383, "rewards/accuracies": 1.0, "rewards/chosen": -6.5456085205078125, "rewards/margins": 13.601818084716797, "rewards/rejected": -20.14742660522461, "step": 2846 }, { "epoch": 4.57, "learning_rate": 2.7348394768133176e-07, "logits/chosen": -1.419093132019043, "logits/rejected": -1.4504579305648804, "logps/chosen": -112.45354461669922, "logps/rejected": -257.2138671875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.76430082321167, "rewards/margins": 13.21423053741455, "rewards/rejected": -15.978532791137695, "step": 2847 }, { "epoch": 4.57, "learning_rate": 2.7338485929449067e-07, "logits/chosen": -1.6715598106384277, "logits/rejected": -1.6332874298095703, "logps/chosen": -120.75337219238281, "logps/rejected": -242.3410186767578, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.915994644165039, "rewards/margins": 12.85696792602539, "rewards/rejected": -15.77296257019043, "step": 2848 }, { "epoch": 4.57, "learning_rate": 2.732857709076496e-07, "logits/chosen": -1.5937896966934204, "logits/rejected": -1.5393452644348145, "logps/chosen": -140.9891815185547, "logps/rejected": -214.26995849609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.082625389099121, "rewards/margins": 8.829866409301758, "rewards/rejected": -12.912492752075195, "step": 2849 }, { "epoch": 4.57, "learning_rate": 2.7318668252080854e-07, "logits/chosen": -1.6953908205032349, "logits/rejected": -1.6876412630081177, "logps/chosen": -119.43608093261719, "logps/rejected": -244.83712768554688, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -4.935963153839111, "rewards/margins": 11.491640090942383, "rewards/rejected": -16.42760467529297, "step": 2850 }, { "epoch": 4.58, "learning_rate": 2.730875941339675e-07, "logits/chosen": -1.5645134449005127, "logits/rejected": -1.5784215927124023, "logps/chosen": -181.3961181640625, "logps/rejected": -233.08697509765625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -7.7351531982421875, "rewards/margins": 6.526765823364258, "rewards/rejected": -14.261919021606445, "step": 2851 }, { "epoch": 4.58, "learning_rate": 2.7298850574712646e-07, "logits/chosen": -1.7069848775863647, "logits/rejected": -1.7591235637664795, "logps/chosen": -140.8182373046875, "logps/rejected": -246.55023193359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.655781269073486, "rewards/margins": 10.435622215270996, "rewards/rejected": -15.091403007507324, "step": 2852 }, { "epoch": 4.58, "learning_rate": 2.7288941736028536e-07, "logits/chosen": -1.6918538808822632, "logits/rejected": -1.64956796169281, "logps/chosen": -131.4624786376953, "logps/rejected": -240.11093139648438, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.597045421600342, "rewards/margins": 10.032846450805664, "rewards/rejected": -14.629892349243164, "step": 2853 }, { "epoch": 4.58, "learning_rate": 2.7279032897344427e-07, "logits/chosen": -1.5912437438964844, "logits/rejected": -1.7224540710449219, "logps/chosen": -122.45218658447266, "logps/rejected": -244.23648071289062, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -4.209161758422852, "rewards/margins": 9.854090690612793, "rewards/rejected": -14.063251495361328, "step": 2854 }, { "epoch": 4.58, "learning_rate": 2.7269124058660323e-07, "logits/chosen": -1.5638470649719238, "logits/rejected": -1.6249359846115112, "logps/chosen": -171.281982421875, "logps/rejected": -249.23193359375, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -6.5090508460998535, "rewards/margins": 7.132864952087402, "rewards/rejected": -13.641915321350098, "step": 2855 }, { "epoch": 4.58, "learning_rate": 2.725921521997622e-07, "logits/chosen": -1.694502353668213, "logits/rejected": -1.57957124710083, "logps/chosen": -171.33291625976562, "logps/rejected": -241.36090087890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.534945011138916, "rewards/margins": 8.704036712646484, "rewards/rejected": -15.238981246948242, "step": 2856 }, { "epoch": 4.59, "learning_rate": 2.724930638129211e-07, "logits/chosen": -1.6113861799240112, "logits/rejected": -1.5304720401763916, "logps/chosen": -123.95651245117188, "logps/rejected": -224.29103088378906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.831267356872559, "rewards/margins": 10.141507148742676, "rewards/rejected": -14.972774505615234, "step": 2857 }, { "epoch": 4.59, "learning_rate": 2.7239397542608006e-07, "logits/chosen": -1.5323429107666016, "logits/rejected": -1.5122230052947998, "logps/chosen": -110.03152465820312, "logps/rejected": -184.61599731445312, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -4.055251121520996, "rewards/margins": 7.018614292144775, "rewards/rejected": -11.07386589050293, "step": 2858 }, { "epoch": 4.59, "learning_rate": 2.7229488703923896e-07, "logits/chosen": -1.5625205039978027, "logits/rejected": -1.5995848178863525, "logps/chosen": -130.38983154296875, "logps/rejected": -244.61831665039062, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.221988677978516, "rewards/margins": 9.722090721130371, "rewards/rejected": -14.944079399108887, "step": 2859 }, { "epoch": 4.59, "learning_rate": 2.721957986523979e-07, "logits/chosen": -1.459826111793518, "logits/rejected": -1.4898276329040527, "logps/chosen": -178.95028686523438, "logps/rejected": -288.26678466796875, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -6.899082660675049, "rewards/margins": 11.026535987854004, "rewards/rejected": -17.925621032714844, "step": 2860 }, { "epoch": 4.59, "learning_rate": 2.720967102655569e-07, "logits/chosen": -1.5481373071670532, "logits/rejected": -1.521531343460083, "logps/chosen": -108.42938232421875, "logps/rejected": -202.45663452148438, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.783082962036133, "rewards/margins": 9.706971168518066, "rewards/rejected": -14.490053176879883, "step": 2861 }, { "epoch": 4.59, "learning_rate": 2.719976218787158e-07, "logits/chosen": -1.535512924194336, "logits/rejected": -1.5648725032806396, "logps/chosen": -115.46162414550781, "logps/rejected": -214.9276885986328, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.809177875518799, "rewards/margins": 9.185504913330078, "rewards/rejected": -13.994682312011719, "step": 2862 }, { "epoch": 4.6, "learning_rate": 2.7189853349187475e-07, "logits/chosen": -1.5587040185928345, "logits/rejected": -1.577810525894165, "logps/chosen": -116.78153991699219, "logps/rejected": -230.13900756835938, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.005915641784668, "rewards/margins": 10.214391708374023, "rewards/rejected": -14.220308303833008, "step": 2863 }, { "epoch": 4.6, "learning_rate": 2.7179944510503366e-07, "logits/chosen": -1.769890308380127, "logits/rejected": -1.6159653663635254, "logps/chosen": -130.84564208984375, "logps/rejected": -215.83831787109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.756534576416016, "rewards/margins": 10.184228897094727, "rewards/rejected": -14.940763473510742, "step": 2864 }, { "epoch": 4.6, "learning_rate": 2.717003567181926e-07, "logits/chosen": -1.5526883602142334, "logits/rejected": -1.544137954711914, "logps/chosen": -150.59523010253906, "logps/rejected": -220.68824768066406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.624057292938232, "rewards/margins": 8.075841903686523, "rewards/rejected": -13.699898719787598, "step": 2865 }, { "epoch": 4.6, "learning_rate": 2.716012683313516e-07, "logits/chosen": -1.6180590391159058, "logits/rejected": -1.5881212949752808, "logps/chosen": -143.15939331054688, "logps/rejected": -254.510498046875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -5.065999984741211, "rewards/margins": 11.864163398742676, "rewards/rejected": -16.930164337158203, "step": 2866 }, { "epoch": 4.6, "learning_rate": 2.715021799445105e-07, "logits/chosen": -1.5364744663238525, "logits/rejected": -1.4912023544311523, "logps/chosen": -173.7873077392578, "logps/rejected": -269.84869384765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.672622203826904, "rewards/margins": 11.081594467163086, "rewards/rejected": -18.75421714782715, "step": 2867 }, { "epoch": 4.6, "learning_rate": 2.7140309155766944e-07, "logits/chosen": -1.6385951042175293, "logits/rejected": -1.6474331617355347, "logps/chosen": -117.94232177734375, "logps/rejected": -233.2362518310547, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.4060778617858887, "rewards/margins": 12.000121116638184, "rewards/rejected": -15.406198501586914, "step": 2868 }, { "epoch": 4.61, "learning_rate": 2.7130400317082835e-07, "logits/chosen": -1.5424784421920776, "logits/rejected": -1.4660046100616455, "logps/chosen": -145.44784545898438, "logps/rejected": -222.13406372070312, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -5.257870674133301, "rewards/margins": 9.107823371887207, "rewards/rejected": -14.365694046020508, "step": 2869 }, { "epoch": 4.61, "learning_rate": 2.712049147839873e-07, "logits/chosen": -1.6024067401885986, "logits/rejected": -1.5794553756713867, "logps/chosen": -142.2131805419922, "logps/rejected": -242.68667602539062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.568624019622803, "rewards/margins": 10.841888427734375, "rewards/rejected": -16.410512924194336, "step": 2870 }, { "epoch": 4.61, "learning_rate": 2.711058263971462e-07, "logits/chosen": -1.594876766204834, "logits/rejected": -1.6474511623382568, "logps/chosen": -143.87496948242188, "logps/rejected": -234.07223510742188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.096208572387695, "rewards/margins": 8.206635475158691, "rewards/rejected": -15.302844047546387, "step": 2871 }, { "epoch": 4.61, "learning_rate": 2.710067380103052e-07, "logits/chosen": -1.6903713941574097, "logits/rejected": -1.6525278091430664, "logps/chosen": -161.66152954101562, "logps/rejected": -231.22991943359375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.024187088012695, "rewards/margins": 8.099905014038086, "rewards/rejected": -14.124092102050781, "step": 2872 }, { "epoch": 4.61, "learning_rate": 2.7090764962346414e-07, "logits/chosen": -1.5043197870254517, "logits/rejected": -1.4559921026229858, "logps/chosen": -83.07469940185547, "logps/rejected": -178.99398803710938, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0453598499298096, "rewards/margins": 9.669290542602539, "rewards/rejected": -10.714651107788086, "step": 2873 }, { "epoch": 4.61, "learning_rate": 2.7080856123662304e-07, "logits/chosen": -1.5894557237625122, "logits/rejected": -1.5098575353622437, "logps/chosen": -146.45852661132812, "logps/rejected": -226.5121612548828, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.6977667808532715, "rewards/margins": 8.523452758789062, "rewards/rejected": -14.221220016479492, "step": 2874 }, { "epoch": 4.61, "learning_rate": 2.70709472849782e-07, "logits/chosen": -1.5694338083267212, "logits/rejected": -1.5304968357086182, "logps/chosen": -144.4697723388672, "logps/rejected": -228.8848419189453, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.247193813323975, "rewards/margins": 9.654844284057617, "rewards/rejected": -15.90203857421875, "step": 2875 }, { "epoch": 4.62, "learning_rate": 2.706103844629409e-07, "logits/chosen": -1.6360493898391724, "logits/rejected": -1.6244531869888306, "logps/chosen": -139.91123962402344, "logps/rejected": -242.9866943359375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.894350051879883, "rewards/margins": 10.541926383972168, "rewards/rejected": -16.436277389526367, "step": 2876 }, { "epoch": 4.62, "learning_rate": 2.7051129607609987e-07, "logits/chosen": -1.5513970851898193, "logits/rejected": -1.5425575971603394, "logps/chosen": -133.17799377441406, "logps/rejected": -226.77452087402344, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.3998589515686035, "rewards/margins": 10.763910293579102, "rewards/rejected": -14.163768768310547, "step": 2877 }, { "epoch": 4.62, "learning_rate": 2.7041220768925883e-07, "logits/chosen": -1.4514422416687012, "logits/rejected": -1.461829423904419, "logps/chosen": -142.50318908691406, "logps/rejected": -257.77740478515625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -7.008760929107666, "rewards/margins": 10.264333724975586, "rewards/rejected": -17.273094177246094, "step": 2878 }, { "epoch": 4.62, "learning_rate": 2.7031311930241774e-07, "logits/chosen": -1.6614940166473389, "logits/rejected": -1.5810109376907349, "logps/chosen": -142.23460388183594, "logps/rejected": -185.27137756347656, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.3465774059295654, "rewards/margins": 8.810511589050293, "rewards/rejected": -12.157089233398438, "step": 2879 }, { "epoch": 4.62, "learning_rate": 2.702140309155767e-07, "logits/chosen": -1.8086720705032349, "logits/rejected": -1.8149009943008423, "logps/chosen": -100.29417419433594, "logps/rejected": -245.81729125976562, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.788410186767578, "rewards/margins": 14.36094856262207, "rewards/rejected": -17.14935874938965, "step": 2880 }, { "epoch": 4.62, "learning_rate": 2.701149425287356e-07, "logits/chosen": -1.5031431913375854, "logits/rejected": -1.559556245803833, "logps/chosen": -149.8198699951172, "logps/rejected": -262.16864013671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.330438137054443, "rewards/margins": 11.175887107849121, "rewards/rejected": -16.506324768066406, "step": 2881 }, { "epoch": 4.63, "learning_rate": 2.7001585414189456e-07, "logits/chosen": -1.5649210214614868, "logits/rejected": -1.5573844909667969, "logps/chosen": -108.39559173583984, "logps/rejected": -210.0255126953125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.747928619384766, "rewards/margins": 9.190079689025879, "rewards/rejected": -13.938007354736328, "step": 2882 }, { "epoch": 4.63, "learning_rate": 2.699167657550535e-07, "logits/chosen": -1.4670464992523193, "logits/rejected": -1.5230388641357422, "logps/chosen": -135.4256134033203, "logps/rejected": -212.1370849609375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -5.514664173126221, "rewards/margins": 6.9630608558654785, "rewards/rejected": -12.477725982666016, "step": 2883 }, { "epoch": 4.63, "learning_rate": 2.6981767736821243e-07, "logits/chosen": -1.6632089614868164, "logits/rejected": -1.7093381881713867, "logps/chosen": -139.55984497070312, "logps/rejected": -249.17184448242188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.729571342468262, "rewards/margins": 10.636819839477539, "rewards/rejected": -17.366392135620117, "step": 2884 }, { "epoch": 4.63, "learning_rate": 2.697185889813714e-07, "logits/chosen": -1.6220649480819702, "logits/rejected": -1.6062796115875244, "logps/chosen": -192.724853515625, "logps/rejected": -291.74371337890625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -9.636268615722656, "rewards/margins": 11.107978820800781, "rewards/rejected": -20.744247436523438, "step": 2885 }, { "epoch": 4.63, "learning_rate": 2.696195005945303e-07, "logits/chosen": -1.5526129007339478, "logits/rejected": -1.4173314571380615, "logps/chosen": -175.43580627441406, "logps/rejected": -253.78579711914062, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -7.003754615783691, "rewards/margins": 9.556163787841797, "rewards/rejected": -16.559917449951172, "step": 2886 }, { "epoch": 4.63, "learning_rate": 2.6952041220768926e-07, "logits/chosen": -1.5534238815307617, "logits/rejected": -1.5591456890106201, "logps/chosen": -149.20518493652344, "logps/rejected": -273.7069091796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.393659591674805, "rewards/margins": 12.216948509216309, "rewards/rejected": -19.610607147216797, "step": 2887 }, { "epoch": 4.64, "learning_rate": 2.694213238208482e-07, "logits/chosen": -1.7168524265289307, "logits/rejected": -1.7563542127609253, "logps/chosen": -124.13566589355469, "logps/rejected": -207.54307556152344, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -4.677646636962891, "rewards/margins": 7.961723804473877, "rewards/rejected": -12.639370918273926, "step": 2888 }, { "epoch": 4.64, "learning_rate": 2.693222354340071e-07, "logits/chosen": -1.6165814399719238, "logits/rejected": -1.6294846534729004, "logps/chosen": -148.03492736816406, "logps/rejected": -214.65997314453125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -5.220637321472168, "rewards/margins": 7.983526229858398, "rewards/rejected": -13.20416259765625, "step": 2889 }, { "epoch": 4.64, "learning_rate": 2.6922314704716603e-07, "logits/chosen": -1.6697485446929932, "logits/rejected": -1.656581997871399, "logps/chosen": -128.30319213867188, "logps/rejected": -248.2773895263672, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.145974159240723, "rewards/margins": 11.773134231567383, "rewards/rejected": -16.919109344482422, "step": 2890 }, { "epoch": 4.64, "learning_rate": 2.69124058660325e-07, "logits/chosen": -1.5598927736282349, "logits/rejected": -1.5349242687225342, "logps/chosen": -156.8033447265625, "logps/rejected": -224.45506286621094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.480134963989258, "rewards/margins": 7.624922752380371, "rewards/rejected": -14.105056762695312, "step": 2891 }, { "epoch": 4.64, "learning_rate": 2.690249702734839e-07, "logits/chosen": -1.573132038116455, "logits/rejected": -1.5361766815185547, "logps/chosen": -163.15284729003906, "logps/rejected": -250.19525146484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.774960041046143, "rewards/margins": 11.149232864379883, "rewards/rejected": -16.924192428588867, "step": 2892 }, { "epoch": 4.64, "learning_rate": 2.689258818866429e-07, "logits/chosen": -1.6583707332611084, "logits/rejected": -1.5696437358856201, "logps/chosen": -129.41468811035156, "logps/rejected": -206.2461395263672, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.004560708999634, "rewards/margins": 10.203919410705566, "rewards/rejected": -13.208480834960938, "step": 2893 }, { "epoch": 4.65, "learning_rate": 2.688267934998018e-07, "logits/chosen": -1.6241576671600342, "logits/rejected": -1.6229395866394043, "logps/chosen": -182.19021606445312, "logps/rejected": -275.8990478515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -10.00061321258545, "rewards/margins": 8.604902267456055, "rewards/rejected": -18.605514526367188, "step": 2894 }, { "epoch": 4.65, "learning_rate": 2.687277051129607e-07, "logits/chosen": -1.6811754703521729, "logits/rejected": -1.692021369934082, "logps/chosen": -159.83892822265625, "logps/rejected": -274.3501281738281, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.545848369598389, "rewards/margins": 10.21853256225586, "rewards/rejected": -16.764381408691406, "step": 2895 }, { "epoch": 4.65, "learning_rate": 2.686286167261197e-07, "logits/chosen": -1.6802849769592285, "logits/rejected": -1.609713077545166, "logps/chosen": -190.84652709960938, "logps/rejected": -255.67349243164062, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.907395362854004, "rewards/margins": 8.721273422241211, "rewards/rejected": -15.628667831420898, "step": 2896 }, { "epoch": 4.65, "learning_rate": 2.685295283392786e-07, "logits/chosen": -1.6797481775283813, "logits/rejected": -1.695957899093628, "logps/chosen": -140.5084991455078, "logps/rejected": -244.77548217773438, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.539989471435547, "rewards/margins": 7.949967384338379, "rewards/rejected": -14.489956855773926, "step": 2897 }, { "epoch": 4.65, "learning_rate": 2.684304399524376e-07, "logits/chosen": -1.602318286895752, "logits/rejected": -1.5384528636932373, "logps/chosen": -174.68133544921875, "logps/rejected": -252.36404418945312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.19322395324707, "rewards/margins": 10.038524627685547, "rewards/rejected": -15.231748580932617, "step": 2898 }, { "epoch": 4.65, "learning_rate": 2.683313515655965e-07, "logits/chosen": -1.7264543771743774, "logits/rejected": -1.7745482921600342, "logps/chosen": -164.20077514648438, "logps/rejected": -275.601318359375, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -4.307490348815918, "rewards/margins": 11.073887825012207, "rewards/rejected": -15.381378173828125, "step": 2899 }, { "epoch": 4.65, "learning_rate": 2.682322631787554e-07, "logits/chosen": -1.4478037357330322, "logits/rejected": -1.4360638856887817, "logps/chosen": -134.5330352783203, "logps/rejected": -243.22271728515625, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -4.858177185058594, "rewards/margins": 10.760713577270508, "rewards/rejected": -15.618890762329102, "step": 2900 }, { "epoch": 4.66, "learning_rate": 2.681331747919144e-07, "logits/chosen": -1.4796507358551025, "logits/rejected": -1.5287935733795166, "logps/chosen": -152.68247985839844, "logps/rejected": -261.4158630371094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.208851337432861, "rewards/margins": 11.347524642944336, "rewards/rejected": -17.55637550354004, "step": 2901 }, { "epoch": 4.66, "learning_rate": 2.680340864050733e-07, "logits/chosen": -1.4666680097579956, "logits/rejected": -1.5345869064331055, "logps/chosen": -112.3341064453125, "logps/rejected": -213.49148559570312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.441270351409912, "rewards/margins": 10.046318054199219, "rewards/rejected": -12.487588882446289, "step": 2902 }, { "epoch": 4.66, "learning_rate": 2.679349980182323e-07, "logits/chosen": -1.5045793056488037, "logits/rejected": -1.527762532234192, "logps/chosen": -116.43006896972656, "logps/rejected": -226.27032470703125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -3.89715576171875, "rewards/margins": 10.26179313659668, "rewards/rejected": -14.158949851989746, "step": 2903 }, { "epoch": 4.66, "learning_rate": 2.678359096313912e-07, "logits/chosen": -1.523429274559021, "logits/rejected": -1.551692247390747, "logps/chosen": -147.62579345703125, "logps/rejected": -227.86009216308594, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.701258659362793, "rewards/margins": 8.856009483337402, "rewards/rejected": -14.557268142700195, "step": 2904 }, { "epoch": 4.66, "learning_rate": 2.677368212445501e-07, "logits/chosen": -1.5607854127883911, "logits/rejected": -1.4940048456192017, "logps/chosen": -132.0556640625, "logps/rejected": -235.2646484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.4093828201293945, "rewards/margins": 10.782269477844238, "rewards/rejected": -15.19165325164795, "step": 2905 }, { "epoch": 4.66, "learning_rate": 2.6763773285770907e-07, "logits/chosen": -1.5931551456451416, "logits/rejected": -1.5749748945236206, "logps/chosen": -150.5115966796875, "logps/rejected": -248.45108032226562, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -4.414941787719727, "rewards/margins": 11.708484649658203, "rewards/rejected": -16.123428344726562, "step": 2906 }, { "epoch": 4.67, "learning_rate": 2.67538644470868e-07, "logits/chosen": -1.6313732862472534, "logits/rejected": -1.565626859664917, "logps/chosen": -129.89486694335938, "logps/rejected": -234.93191528320312, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.043497085571289, "rewards/margins": 10.83903694152832, "rewards/rejected": -15.88253402709961, "step": 2907 }, { "epoch": 4.67, "learning_rate": 2.67439556084027e-07, "logits/chosen": -1.709572434425354, "logits/rejected": -1.6631548404693604, "logps/chosen": -108.17478942871094, "logps/rejected": -178.11448669433594, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -3.400628089904785, "rewards/margins": 8.231270790100098, "rewards/rejected": -11.631897926330566, "step": 2908 }, { "epoch": 4.67, "learning_rate": 2.673404676971859e-07, "logits/chosen": -1.6117336750030518, "logits/rejected": -1.6395901441574097, "logps/chosen": -108.18408966064453, "logps/rejected": -208.12350463867188, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.2281837463378906, "rewards/margins": 10.018380165100098, "rewards/rejected": -13.246563911437988, "step": 2909 }, { "epoch": 4.67, "learning_rate": 2.672413793103448e-07, "logits/chosen": -1.6330645084381104, "logits/rejected": -1.645434856414795, "logps/chosen": -200.32723999023438, "logps/rejected": -242.7725830078125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -9.177993774414062, "rewards/margins": 6.051130294799805, "rewards/rejected": -15.2291259765625, "step": 2910 }, { "epoch": 4.67, "learning_rate": 2.6714229092350376e-07, "logits/chosen": -1.6660782098770142, "logits/rejected": -1.5866376161575317, "logps/chosen": -153.9830780029297, "logps/rejected": -225.43341064453125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -5.0449676513671875, "rewards/margins": 9.111007690429688, "rewards/rejected": -14.155975341796875, "step": 2911 }, { "epoch": 4.67, "learning_rate": 2.6704320253666267e-07, "logits/chosen": -1.6977410316467285, "logits/rejected": -1.6392161846160889, "logps/chosen": -200.16798400878906, "logps/rejected": -280.4103698730469, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -6.172115325927734, "rewards/margins": 11.860982894897461, "rewards/rejected": -18.033098220825195, "step": 2912 }, { "epoch": 4.68, "learning_rate": 2.6694411414982163e-07, "logits/chosen": -1.4373106956481934, "logits/rejected": -1.4683008193969727, "logps/chosen": -106.68769836425781, "logps/rejected": -212.5522003173828, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -4.390641689300537, "rewards/margins": 9.783185005187988, "rewards/rejected": -14.173827171325684, "step": 2913 }, { "epoch": 4.68, "learning_rate": 2.668450257629806e-07, "logits/chosen": -1.5258934497833252, "logits/rejected": -1.587598443031311, "logps/chosen": -112.08844757080078, "logps/rejected": -196.68472290039062, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.026933431625366, "rewards/margins": 7.787805557250977, "rewards/rejected": -10.814739227294922, "step": 2914 }, { "epoch": 4.68, "learning_rate": 2.667459373761395e-07, "logits/chosen": -1.7174715995788574, "logits/rejected": -1.608305811882019, "logps/chosen": -143.92864990234375, "logps/rejected": -265.87652587890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.048043251037598, "rewards/margins": 12.241436004638672, "rewards/rejected": -17.289478302001953, "step": 2915 }, { "epoch": 4.68, "learning_rate": 2.6664684898929845e-07, "logits/chosen": -1.4923722743988037, "logits/rejected": -1.517388939857483, "logps/chosen": -155.4786376953125, "logps/rejected": -259.5655517578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.3448381423950195, "rewards/margins": 10.912919998168945, "rewards/rejected": -18.25775909423828, "step": 2916 }, { "epoch": 4.68, "learning_rate": 2.6654776060245736e-07, "logits/chosen": -1.6469858884811401, "logits/rejected": -1.6828852891921997, "logps/chosen": -125.11383056640625, "logps/rejected": -289.50384521484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.5388028621673584, "rewards/margins": 15.57254409790039, "rewards/rejected": -19.111347198486328, "step": 2917 }, { "epoch": 4.68, "learning_rate": 2.664486722156163e-07, "logits/chosen": -1.71649169921875, "logits/rejected": -1.7645237445831299, "logps/chosen": -90.726806640625, "logps/rejected": -192.79583740234375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.869795560836792, "rewards/margins": 9.187906265258789, "rewards/rejected": -12.05770206451416, "step": 2918 }, { "epoch": 4.69, "learning_rate": 2.663495838287753e-07, "logits/chosen": -1.629927635192871, "logits/rejected": -1.5398938655853271, "logps/chosen": -146.66632080078125, "logps/rejected": -240.93734741210938, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -5.837042808532715, "rewards/margins": 10.826396942138672, "rewards/rejected": -16.663440704345703, "step": 2919 }, { "epoch": 4.69, "learning_rate": 2.662504954419342e-07, "logits/chosen": -1.5755157470703125, "logits/rejected": -1.5550447702407837, "logps/chosen": -149.80189514160156, "logps/rejected": -263.40863037109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.717446327209473, "rewards/margins": 10.858149528503418, "rewards/rejected": -15.57559585571289, "step": 2920 }, { "epoch": 4.69, "learning_rate": 2.6615140705509315e-07, "logits/chosen": -1.5270607471466064, "logits/rejected": -1.576663851737976, "logps/chosen": -134.2341766357422, "logps/rejected": -244.19442749023438, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -6.020873546600342, "rewards/margins": 9.274447441101074, "rewards/rejected": -15.295321464538574, "step": 2921 }, { "epoch": 4.69, "learning_rate": 2.6605231866825205e-07, "logits/chosen": -1.6992183923721313, "logits/rejected": -1.6777079105377197, "logps/chosen": -109.28311157226562, "logps/rejected": -192.36102294921875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.4933342933654785, "rewards/margins": 7.887777328491211, "rewards/rejected": -11.381111145019531, "step": 2922 }, { "epoch": 4.69, "learning_rate": 2.65953230281411e-07, "logits/chosen": -1.664992332458496, "logits/rejected": -1.7715034484863281, "logps/chosen": -166.52658081054688, "logps/rejected": -287.16522216796875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.854942798614502, "rewards/margins": 10.135269165039062, "rewards/rejected": -16.99021339416504, "step": 2923 }, { "epoch": 4.69, "learning_rate": 2.6585414189456997e-07, "logits/chosen": -1.5022164583206177, "logits/rejected": -1.5000247955322266, "logps/chosen": -124.38109588623047, "logps/rejected": -245.6664276123047, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.711190223693848, "rewards/margins": 11.398712158203125, "rewards/rejected": -16.10990333557129, "step": 2924 }, { "epoch": 4.7, "learning_rate": 2.657550535077289e-07, "logits/chosen": -1.6681286096572876, "logits/rejected": -1.6769405603408813, "logps/chosen": -165.5919647216797, "logps/rejected": -297.0609130859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.970783233642578, "rewards/margins": 14.066560745239258, "rewards/rejected": -19.03734588623047, "step": 2925 }, { "epoch": 4.7, "learning_rate": 2.6565596512088784e-07, "logits/chosen": -1.56667959690094, "logits/rejected": -1.6233766078948975, "logps/chosen": -142.14007568359375, "logps/rejected": -277.90521240234375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.964111804962158, "rewards/margins": 12.068734169006348, "rewards/rejected": -17.03284454345703, "step": 2926 }, { "epoch": 4.7, "learning_rate": 2.6555687673404675e-07, "logits/chosen": -1.6209644079208374, "logits/rejected": -1.625811219215393, "logps/chosen": -107.24798583984375, "logps/rejected": -247.08096313476562, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -4.125896453857422, "rewards/margins": 11.901782989501953, "rewards/rejected": -16.027681350708008, "step": 2927 }, { "epoch": 4.7, "learning_rate": 2.6545778834720565e-07, "logits/chosen": -1.5354163646697998, "logits/rejected": -1.5703887939453125, "logps/chosen": -148.10104370117188, "logps/rejected": -250.90032958984375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.14177131652832, "rewards/margins": 9.716143608093262, "rewards/rejected": -14.857914924621582, "step": 2928 }, { "epoch": 4.7, "learning_rate": 2.6535869996036467e-07, "logits/chosen": -1.4321351051330566, "logits/rejected": -1.4843884706497192, "logps/chosen": -109.00495910644531, "logps/rejected": -232.0357208251953, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.6745710372924805, "rewards/margins": 11.093461990356445, "rewards/rejected": -15.768033981323242, "step": 2929 }, { "epoch": 4.7, "learning_rate": 2.6525961157352357e-07, "logits/chosen": -1.562753677368164, "logits/rejected": -1.6928423643112183, "logps/chosen": -124.12287902832031, "logps/rejected": -205.25027465820312, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -6.199832916259766, "rewards/margins": 5.243329048156738, "rewards/rejected": -11.443161964416504, "step": 2930 }, { "epoch": 4.7, "learning_rate": 2.6516052318668253e-07, "logits/chosen": -1.681947112083435, "logits/rejected": -1.5355989933013916, "logps/chosen": -159.88783264160156, "logps/rejected": -242.93641662597656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.009876251220703, "rewards/margins": 10.836859703063965, "rewards/rejected": -15.846735000610352, "step": 2931 }, { "epoch": 4.71, "learning_rate": 2.6506143479984144e-07, "logits/chosen": -1.6634950637817383, "logits/rejected": -1.7535076141357422, "logps/chosen": -117.1104507446289, "logps/rejected": -251.49745178222656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6129586696624756, "rewards/margins": 12.792705535888672, "rewards/rejected": -15.405664443969727, "step": 2932 }, { "epoch": 4.71, "learning_rate": 2.6496234641300035e-07, "logits/chosen": -1.5390058755874634, "logits/rejected": -1.5790014266967773, "logps/chosen": -128.06729125976562, "logps/rejected": -260.2852783203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.911489486694336, "rewards/margins": 11.941197395324707, "rewards/rejected": -15.85268783569336, "step": 2933 }, { "epoch": 4.71, "learning_rate": 2.648632580261593e-07, "logits/chosen": -1.6585723161697388, "logits/rejected": -1.7001410722732544, "logps/chosen": -96.82504272460938, "logps/rejected": -256.3796691894531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.8496479988098145, "rewards/margins": 14.918493270874023, "rewards/rejected": -17.76814079284668, "step": 2934 }, { "epoch": 4.71, "learning_rate": 2.6476416963931827e-07, "logits/chosen": -1.634920358657837, "logits/rejected": -1.530321478843689, "logps/chosen": -163.0467987060547, "logps/rejected": -232.03384399414062, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -6.0554680824279785, "rewards/margins": 7.967438220977783, "rewards/rejected": -14.022905349731445, "step": 2935 }, { "epoch": 4.71, "learning_rate": 2.646650812524772e-07, "logits/chosen": -1.5610246658325195, "logits/rejected": -1.5466043949127197, "logps/chosen": -140.00904846191406, "logps/rejected": -248.87921142578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.378490447998047, "rewards/margins": 11.078807830810547, "rewards/rejected": -16.457298278808594, "step": 2936 }, { "epoch": 4.71, "learning_rate": 2.6456599286563613e-07, "logits/chosen": -1.5065592527389526, "logits/rejected": -1.5753366947174072, "logps/chosen": -116.34737396240234, "logps/rejected": -233.07809448242188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.8005051612854, "rewards/margins": 10.016885757446289, "rewards/rejected": -14.817390441894531, "step": 2937 }, { "epoch": 4.72, "learning_rate": 2.6446690447879504e-07, "logits/chosen": -1.5162394046783447, "logits/rejected": -1.5769097805023193, "logps/chosen": -131.25196838378906, "logps/rejected": -261.5663757324219, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.10664176940918, "rewards/margins": 10.131183624267578, "rewards/rejected": -15.237825393676758, "step": 2938 }, { "epoch": 4.72, "learning_rate": 2.64367816091954e-07, "logits/chosen": -1.4345641136169434, "logits/rejected": -1.496512532234192, "logps/chosen": -78.43336486816406, "logps/rejected": -202.10061645507812, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.0575406551361084, "rewards/margins": 11.824251174926758, "rewards/rejected": -13.881792068481445, "step": 2939 }, { "epoch": 4.72, "learning_rate": 2.6426872770511296e-07, "logits/chosen": -1.6593408584594727, "logits/rejected": -1.756556749343872, "logps/chosen": -135.34695434570312, "logps/rejected": -281.85272216796875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.294543743133545, "rewards/margins": 12.589704513549805, "rewards/rejected": -18.884246826171875, "step": 2940 }, { "epoch": 4.72, "learning_rate": 2.641696393182719e-07, "logits/chosen": -1.724341630935669, "logits/rejected": -1.7301373481750488, "logps/chosen": -128.88929748535156, "logps/rejected": -220.2422332763672, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.602444648742676, "rewards/margins": 10.830321311950684, "rewards/rejected": -14.43276596069336, "step": 2941 }, { "epoch": 4.72, "learning_rate": 2.640705509314308e-07, "logits/chosen": -1.5199843645095825, "logits/rejected": -1.509350061416626, "logps/chosen": -100.95677947998047, "logps/rejected": -186.08816528320312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.161935329437256, "rewards/margins": 9.148808479309082, "rewards/rejected": -12.31074333190918, "step": 2942 }, { "epoch": 4.72, "learning_rate": 2.6397146254458973e-07, "logits/chosen": -1.6404558420181274, "logits/rejected": -1.640014410018921, "logps/chosen": -168.42388916015625, "logps/rejected": -275.60650634765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.199614524841309, "rewards/margins": 10.895272254943848, "rewards/rejected": -19.094886779785156, "step": 2943 }, { "epoch": 4.73, "learning_rate": 2.638723741577487e-07, "logits/chosen": -1.6340628862380981, "logits/rejected": -1.7327852249145508, "logps/chosen": -91.6138916015625, "logps/rejected": -293.92767333984375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.09183669090271, "rewards/margins": 16.92431640625, "rewards/rejected": -20.01615333557129, "step": 2944 }, { "epoch": 4.73, "learning_rate": 2.6377328577090765e-07, "logits/chosen": -1.6631691455841064, "logits/rejected": -1.741714358329773, "logps/chosen": -150.75234985351562, "logps/rejected": -267.5704345703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.6404314041137695, "rewards/margins": 11.15585708618164, "rewards/rejected": -15.796287536621094, "step": 2945 }, { "epoch": 4.73, "learning_rate": 2.636741973840666e-07, "logits/chosen": -1.481337547302246, "logits/rejected": -1.5540251731872559, "logps/chosen": -150.5426788330078, "logps/rejected": -297.3229675292969, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -8.031569480895996, "rewards/margins": 12.178277969360352, "rewards/rejected": -20.209848403930664, "step": 2946 }, { "epoch": 4.73, "learning_rate": 2.635751089972255e-07, "logits/chosen": -1.6638230085372925, "logits/rejected": -1.6340886354446411, "logps/chosen": -166.31642150878906, "logps/rejected": -262.2658996582031, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.866975784301758, "rewards/margins": 10.196449279785156, "rewards/rejected": -17.063425064086914, "step": 2947 }, { "epoch": 4.73, "learning_rate": 2.634760206103844e-07, "logits/chosen": -1.6198116540908813, "logits/rejected": -1.7256009578704834, "logps/chosen": -115.16364288330078, "logps/rejected": -240.25595092773438, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": -2.8089957237243652, "rewards/margins": 11.751544952392578, "rewards/rejected": -14.560540199279785, "step": 2948 }, { "epoch": 4.73, "learning_rate": 2.633769322235434e-07, "logits/chosen": -1.6263103485107422, "logits/rejected": -1.6700454950332642, "logps/chosen": -134.1862335205078, "logps/rejected": -244.60009765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.5912861824035645, "rewards/margins": 11.207610130310059, "rewards/rejected": -15.798895835876465, "step": 2949 }, { "epoch": 4.74, "learning_rate": 2.632778438367023e-07, "logits/chosen": -1.6107457876205444, "logits/rejected": -1.5127252340316772, "logps/chosen": -149.2644500732422, "logps/rejected": -246.7620391845703, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.013493061065674, "rewards/margins": 10.808847427368164, "rewards/rejected": -15.822341918945312, "step": 2950 }, { "epoch": 4.74, "learning_rate": 2.631787554498613e-07, "logits/chosen": -1.6662752628326416, "logits/rejected": -1.6638222932815552, "logps/chosen": -140.41502380371094, "logps/rejected": -264.69879150390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3868589401245117, "rewards/margins": 14.510576248168945, "rewards/rejected": -16.897436141967773, "step": 2951 }, { "epoch": 4.74, "learning_rate": 2.630796670630202e-07, "logits/chosen": -1.6421881914138794, "logits/rejected": -1.6611055135726929, "logps/chosen": -165.55694580078125, "logps/rejected": -294.27813720703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.808432579040527, "rewards/margins": 12.045783042907715, "rewards/rejected": -18.854215621948242, "step": 2952 }, { "epoch": 4.74, "learning_rate": 2.629805786761791e-07, "logits/chosen": -1.513806700706482, "logits/rejected": -1.5880364179611206, "logps/chosen": -131.44705200195312, "logps/rejected": -268.2743225097656, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -6.388998985290527, "rewards/margins": 11.766688346862793, "rewards/rejected": -18.15568733215332, "step": 2953 }, { "epoch": 4.74, "learning_rate": 2.628814902893381e-07, "logits/chosen": -1.6824309825897217, "logits/rejected": -1.6920301914215088, "logps/chosen": -109.23954010009766, "logps/rejected": -204.2901611328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.154960870742798, "rewards/margins": 10.380378723144531, "rewards/rejected": -13.53533935546875, "step": 2954 }, { "epoch": 4.74, "learning_rate": 2.62782401902497e-07, "logits/chosen": -1.5776376724243164, "logits/rejected": -1.6075630187988281, "logps/chosen": -139.78448486328125, "logps/rejected": -292.4078063964844, "loss": 0.0804, "rewards/accuracies": 1.0, "rewards/chosen": -4.433243751525879, "rewards/margins": 15.481950759887695, "rewards/rejected": -19.915193557739258, "step": 2955 }, { "epoch": 4.74, "learning_rate": 2.62683313515656e-07, "logits/chosen": -1.5523757934570312, "logits/rejected": -1.5167943239212036, "logps/chosen": -139.92617797851562, "logps/rejected": -234.29151916503906, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.011055946350098, "rewards/margins": 9.119413375854492, "rewards/rejected": -15.130468368530273, "step": 2956 }, { "epoch": 4.75, "learning_rate": 2.625842251288149e-07, "logits/chosen": -1.5878188610076904, "logits/rejected": -1.6367127895355225, "logps/chosen": -154.1046142578125, "logps/rejected": -263.8914794921875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -7.735781192779541, "rewards/margins": 10.246749877929688, "rewards/rejected": -17.98253059387207, "step": 2957 }, { "epoch": 4.75, "learning_rate": 2.624851367419738e-07, "logits/chosen": -1.533967137336731, "logits/rejected": -1.580559253692627, "logps/chosen": -135.9903564453125, "logps/rejected": -261.99066162109375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.8494205474853516, "rewards/margins": 12.925979614257812, "rewards/rejected": -16.775400161743164, "step": 2958 }, { "epoch": 4.75, "learning_rate": 2.6238604835513277e-07, "logits/chosen": -1.5072414875030518, "logits/rejected": -1.5882620811462402, "logps/chosen": -115.62643432617188, "logps/rejected": -230.82611083984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.8355181217193604, "rewards/margins": 10.628847122192383, "rewards/rejected": -14.464365005493164, "step": 2959 }, { "epoch": 4.75, "learning_rate": 2.622869599682917e-07, "logits/chosen": -1.8272441625595093, "logits/rejected": -1.7548272609710693, "logps/chosen": -122.36483764648438, "logps/rejected": -245.96002197265625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.6312496662139893, "rewards/margins": 13.059724807739258, "rewards/rejected": -16.690975189208984, "step": 2960 }, { "epoch": 4.75, "learning_rate": 2.6218787158145064e-07, "logits/chosen": -1.64153254032135, "logits/rejected": -1.686498999595642, "logps/chosen": -115.31649017333984, "logps/rejected": -223.97317504882812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8216562271118164, "rewards/margins": 11.755496978759766, "rewards/rejected": -14.577152252197266, "step": 2961 }, { "epoch": 4.75, "learning_rate": 2.620887831946096e-07, "logits/chosen": -1.560295820236206, "logits/rejected": -1.5732100009918213, "logps/chosen": -137.75436401367188, "logps/rejected": -205.2888641357422, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.02023458480835, "rewards/margins": 6.271869659423828, "rewards/rejected": -12.292104721069336, "step": 2962 }, { "epoch": 4.76, "learning_rate": 2.619896948077685e-07, "logits/chosen": -1.5232536792755127, "logits/rejected": -1.5038464069366455, "logps/chosen": -149.11981201171875, "logps/rejected": -255.08160400390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.597732067108154, "rewards/margins": 12.18422794342041, "rewards/rejected": -17.781959533691406, "step": 2963 }, { "epoch": 4.76, "learning_rate": 2.6189060642092746e-07, "logits/chosen": -1.4766042232513428, "logits/rejected": -1.4662771224975586, "logps/chosen": -143.96310424804688, "logps/rejected": -264.7278747558594, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.392165184020996, "rewards/margins": 13.039006233215332, "rewards/rejected": -18.431171417236328, "step": 2964 }, { "epoch": 4.76, "learning_rate": 2.6179151803408637e-07, "logits/chosen": -1.5099155902862549, "logits/rejected": -1.4369797706604004, "logps/chosen": -137.8896484375, "logps/rejected": -193.15420532226562, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -5.0526580810546875, "rewards/margins": 7.801795959472656, "rewards/rejected": -12.854454040527344, "step": 2965 }, { "epoch": 4.76, "learning_rate": 2.6169242964724533e-07, "logits/chosen": -1.532375454902649, "logits/rejected": -1.5290477275848389, "logps/chosen": -123.12327575683594, "logps/rejected": -212.09608459472656, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.7397208213806152, "rewards/margins": 8.42689323425293, "rewards/rejected": -12.166614532470703, "step": 2966 }, { "epoch": 4.76, "learning_rate": 2.615933412604043e-07, "logits/chosen": -1.6975924968719482, "logits/rejected": -1.7444579601287842, "logps/chosen": -125.30848693847656, "logps/rejected": -260.2738037109375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -4.951406478881836, "rewards/margins": 11.768741607666016, "rewards/rejected": -16.72014808654785, "step": 2967 }, { "epoch": 4.76, "learning_rate": 2.614942528735632e-07, "logits/chosen": -1.5837938785552979, "logits/rejected": -1.6247901916503906, "logps/chosen": -147.67364501953125, "logps/rejected": -252.96434020996094, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.573224067687988, "rewards/margins": 11.018562316894531, "rewards/rejected": -16.591787338256836, "step": 2968 }, { "epoch": 4.77, "learning_rate": 2.6139516448672216e-07, "logits/chosen": -1.4666013717651367, "logits/rejected": -1.503180742263794, "logps/chosen": -158.0015106201172, "logps/rejected": -280.8604736328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.912051200866699, "rewards/margins": 11.598762512207031, "rewards/rejected": -19.510814666748047, "step": 2969 }, { "epoch": 4.77, "learning_rate": 2.6129607609988106e-07, "logits/chosen": -1.5692720413208008, "logits/rejected": -1.536184310913086, "logps/chosen": -144.45391845703125, "logps/rejected": -239.8967742919922, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -5.885148525238037, "rewards/margins": 9.141096115112305, "rewards/rejected": -15.0262451171875, "step": 2970 }, { "epoch": 4.77, "learning_rate": 2.6119698771303997e-07, "logits/chosen": -1.5661554336547852, "logits/rejected": -1.4158103466033936, "logps/chosen": -127.92787170410156, "logps/rejected": -180.58016967773438, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -3.7636351585388184, "rewards/margins": 8.442623138427734, "rewards/rejected": -12.206257820129395, "step": 2971 }, { "epoch": 4.77, "learning_rate": 2.61097899326199e-07, "logits/chosen": -1.5915119647979736, "logits/rejected": -1.5848171710968018, "logps/chosen": -102.35797119140625, "logps/rejected": -237.24099731445312, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.321559190750122, "rewards/margins": 13.124653816223145, "rewards/rejected": -16.44621467590332, "step": 2972 }, { "epoch": 4.77, "learning_rate": 2.609988109393579e-07, "logits/chosen": -1.6077306270599365, "logits/rejected": -1.5940797328948975, "logps/chosen": -144.78851318359375, "logps/rejected": -262.662841796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.638704299926758, "rewards/margins": 10.334847450256348, "rewards/rejected": -15.973552703857422, "step": 2973 }, { "epoch": 4.77, "learning_rate": 2.6089972255251685e-07, "logits/chosen": -1.488654375076294, "logits/rejected": -1.5726031064987183, "logps/chosen": -149.5167694091797, "logps/rejected": -266.619140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.937635898590088, "rewards/margins": 10.097967147827148, "rewards/rejected": -17.035602569580078, "step": 2974 }, { "epoch": 4.78, "learning_rate": 2.6080063416567576e-07, "logits/chosen": -1.532511591911316, "logits/rejected": -1.5266168117523193, "logps/chosen": -149.94171142578125, "logps/rejected": -258.4335021972656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.099495887756348, "rewards/margins": 11.029693603515625, "rewards/rejected": -17.12919044494629, "step": 2975 }, { "epoch": 4.78, "learning_rate": 2.6070154577883466e-07, "logits/chosen": -1.648294448852539, "logits/rejected": -1.6365549564361572, "logps/chosen": -149.1060333251953, "logps/rejected": -266.92181396484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.991412162780762, "rewards/margins": 11.124512672424316, "rewards/rejected": -16.115924835205078, "step": 2976 }, { "epoch": 4.78, "learning_rate": 2.606024573919937e-07, "logits/chosen": -1.5770782232284546, "logits/rejected": -1.6465632915496826, "logps/chosen": -94.91999053955078, "logps/rejected": -186.91290283203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.1803793907165527, "rewards/margins": 8.724727630615234, "rewards/rejected": -10.905107498168945, "step": 2977 }, { "epoch": 4.78, "learning_rate": 2.605033690051526e-07, "logits/chosen": -1.5647876262664795, "logits/rejected": -1.6584553718566895, "logps/chosen": -150.70741271972656, "logps/rejected": -279.16900634765625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -6.360967636108398, "rewards/margins": 9.65416145324707, "rewards/rejected": -16.01512908935547, "step": 2978 }, { "epoch": 4.78, "learning_rate": 2.6040428061831154e-07, "logits/chosen": -1.657009482383728, "logits/rejected": -1.6330184936523438, "logps/chosen": -137.5081787109375, "logps/rejected": -258.7147521972656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.467405319213867, "rewards/margins": 11.928424835205078, "rewards/rejected": -17.395828247070312, "step": 2979 }, { "epoch": 4.78, "learning_rate": 2.6030519223147045e-07, "logits/chosen": -1.4449386596679688, "logits/rejected": -1.4652687311172485, "logps/chosen": -135.45867919921875, "logps/rejected": -243.10995483398438, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.1092448234558105, "rewards/margins": 9.865264892578125, "rewards/rejected": -14.974510192871094, "step": 2980 }, { "epoch": 4.78, "learning_rate": 2.6020610384462936e-07, "logits/chosen": -1.5515271425247192, "logits/rejected": -1.6211881637573242, "logps/chosen": -161.5952606201172, "logps/rejected": -304.0499267578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.367401599884033, "rewards/margins": 12.173785209655762, "rewards/rejected": -19.541187286376953, "step": 2981 }, { "epoch": 4.79, "learning_rate": 2.6010701545778837e-07, "logits/chosen": -1.6486141681671143, "logits/rejected": -1.6421289443969727, "logps/chosen": -112.67147827148438, "logps/rejected": -235.7434539794922, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -4.381955146789551, "rewards/margins": 12.286215782165527, "rewards/rejected": -16.668170928955078, "step": 2982 }, { "epoch": 4.79, "learning_rate": 2.600079270709473e-07, "logits/chosen": -1.7242636680603027, "logits/rejected": -1.617112636566162, "logps/chosen": -180.59048461914062, "logps/rejected": -255.0449676513672, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.2308454513549805, "rewards/margins": 11.062152862548828, "rewards/rejected": -15.292998313903809, "step": 2983 }, { "epoch": 4.79, "learning_rate": 2.5990883868410624e-07, "logits/chosen": -1.515058994293213, "logits/rejected": -1.5850697755813599, "logps/chosen": -163.87640380859375, "logps/rejected": -280.37091064453125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.012068271636963, "rewards/margins": 10.353845596313477, "rewards/rejected": -17.365915298461914, "step": 2984 }, { "epoch": 4.79, "learning_rate": 2.5980975029726514e-07, "logits/chosen": -1.6814583539962769, "logits/rejected": -1.6520367860794067, "logps/chosen": -142.67295837402344, "logps/rejected": -211.09759521484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.44337272644043, "rewards/margins": 8.3430814743042, "rewards/rejected": -13.786453247070312, "step": 2985 }, { "epoch": 4.79, "learning_rate": 2.5971066191042405e-07, "logits/chosen": -1.4846441745758057, "logits/rejected": -1.4906529188156128, "logps/chosen": -133.39564514160156, "logps/rejected": -235.34683227539062, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.352825164794922, "rewards/margins": 9.272780418395996, "rewards/rejected": -14.625605583190918, "step": 2986 }, { "epoch": 4.79, "learning_rate": 2.5961157352358306e-07, "logits/chosen": -1.691097378730774, "logits/rejected": -1.6358839273452759, "logps/chosen": -125.97467041015625, "logps/rejected": -244.2049560546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.223735809326172, "rewards/margins": 11.826028823852539, "rewards/rejected": -16.049762725830078, "step": 2987 }, { "epoch": 4.8, "learning_rate": 2.5951248513674197e-07, "logits/chosen": -1.5661427974700928, "logits/rejected": -1.6407376527786255, "logps/chosen": -92.1572036743164, "logps/rejected": -202.39401245117188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.8801915645599365, "rewards/margins": 8.6819429397583, "rewards/rejected": -12.562134742736816, "step": 2988 }, { "epoch": 4.8, "learning_rate": 2.5941339674990093e-07, "logits/chosen": -1.5693097114562988, "logits/rejected": -1.4564285278320312, "logps/chosen": -166.66729736328125, "logps/rejected": -248.87783813476562, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.3753461837768555, "rewards/margins": 9.809085845947266, "rewards/rejected": -17.184432983398438, "step": 2989 }, { "epoch": 4.8, "learning_rate": 2.5931430836305984e-07, "logits/chosen": -1.5660454034805298, "logits/rejected": -1.6023552417755127, "logps/chosen": -131.79315185546875, "logps/rejected": -283.03082275390625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.028189659118652, "rewards/margins": 13.342792510986328, "rewards/rejected": -17.370981216430664, "step": 2990 }, { "epoch": 4.8, "learning_rate": 2.5921521997621874e-07, "logits/chosen": -1.718446969985962, "logits/rejected": -1.7605522871017456, "logps/chosen": -136.79595947265625, "logps/rejected": -235.0123291015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.764071464538574, "rewards/margins": 9.382318496704102, "rewards/rejected": -15.146389961242676, "step": 2991 }, { "epoch": 4.8, "learning_rate": 2.5911613158937775e-07, "logits/chosen": -1.6609740257263184, "logits/rejected": -1.645427942276001, "logps/chosen": -163.3021697998047, "logps/rejected": -208.85342407226562, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -7.850152969360352, "rewards/margins": 5.094761371612549, "rewards/rejected": -12.944914817810059, "step": 2992 }, { "epoch": 4.8, "learning_rate": 2.5901704320253666e-07, "logits/chosen": -1.4926533699035645, "logits/rejected": -1.5649020671844482, "logps/chosen": -165.16346740722656, "logps/rejected": -264.6737060546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.645424842834473, "rewards/margins": 8.680688858032227, "rewards/rejected": -16.326114654541016, "step": 2993 }, { "epoch": 4.81, "learning_rate": 2.589179548156956e-07, "logits/chosen": -1.5406733751296997, "logits/rejected": -1.4640238285064697, "logps/chosen": -179.0723876953125, "logps/rejected": -264.95758056640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.898601531982422, "rewards/margins": 9.88935661315918, "rewards/rejected": -18.7879581451416, "step": 2994 }, { "epoch": 4.81, "learning_rate": 2.5881886642885453e-07, "logits/chosen": -1.5794203281402588, "logits/rejected": -1.549370288848877, "logps/chosen": -145.34156799316406, "logps/rejected": -268.919677734375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -5.2300238609313965, "rewards/margins": 13.306482315063477, "rewards/rejected": -18.53650665283203, "step": 2995 }, { "epoch": 4.81, "learning_rate": 2.5871977804201344e-07, "logits/chosen": -1.5180045366287231, "logits/rejected": -1.5880374908447266, "logps/chosen": -124.54895782470703, "logps/rejected": -241.7191925048828, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.959848403930664, "rewards/margins": 11.811300277709961, "rewards/rejected": -16.771148681640625, "step": 2996 }, { "epoch": 4.81, "learning_rate": 2.586206896551724e-07, "logits/chosen": -1.5037211179733276, "logits/rejected": -1.5100979804992676, "logps/chosen": -177.17922973632812, "logps/rejected": -279.1216735839844, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -9.742660522460938, "rewards/margins": 8.819931983947754, "rewards/rejected": -18.562593460083008, "step": 2997 }, { "epoch": 4.81, "learning_rate": 2.5852160126833135e-07, "logits/chosen": -1.5121715068817139, "logits/rejected": -1.4797178506851196, "logps/chosen": -184.1899871826172, "logps/rejected": -294.52752685546875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -6.440917491912842, "rewards/margins": 11.784178733825684, "rewards/rejected": -18.225095748901367, "step": 2998 }, { "epoch": 4.81, "learning_rate": 2.5842251288149026e-07, "logits/chosen": -1.557069182395935, "logits/rejected": -1.627794623374939, "logps/chosen": -179.11473083496094, "logps/rejected": -280.16876220703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.144424438476562, "rewards/margins": 9.128866195678711, "rewards/rejected": -17.273290634155273, "step": 2999 }, { "epoch": 4.82, "learning_rate": 2.583234244946492e-07, "logits/chosen": -1.7662169933319092, "logits/rejected": -1.7248973846435547, "logps/chosen": -156.29934692382812, "logps/rejected": -284.9810791015625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -5.899590969085693, "rewards/margins": 14.615553855895996, "rewards/rejected": -20.51514434814453, "step": 3000 }, { "epoch": 4.82, "learning_rate": 2.5822433610780813e-07, "logits/chosen": -1.5423063039779663, "logits/rejected": -1.5523961782455444, "logps/chosen": -120.17752075195312, "logps/rejected": -212.08482360839844, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.666022300720215, "rewards/margins": 9.611339569091797, "rewards/rejected": -14.277361869812012, "step": 3001 }, { "epoch": 4.82, "learning_rate": 2.581252477209671e-07, "logits/chosen": -1.759448528289795, "logits/rejected": -1.5822601318359375, "logps/chosen": -204.325439453125, "logps/rejected": -261.5441589355469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.24658203125, "rewards/margins": 10.5662202835083, "rewards/rejected": -17.812803268432617, "step": 3002 }, { "epoch": 4.82, "learning_rate": 2.5802615933412605e-07, "logits/chosen": -1.4918349981307983, "logits/rejected": -1.5515334606170654, "logps/chosen": -139.88677978515625, "logps/rejected": -287.52178955078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.982763290405273, "rewards/margins": 13.850737571716309, "rewards/rejected": -19.8335018157959, "step": 3003 }, { "epoch": 4.82, "learning_rate": 2.5792707094728495e-07, "logits/chosen": -1.6262210607528687, "logits/rejected": -1.5808684825897217, "logps/chosen": -138.77532958984375, "logps/rejected": -196.3770751953125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -6.33355712890625, "rewards/margins": 6.742087364196777, "rewards/rejected": -13.075643539428711, "step": 3004 }, { "epoch": 4.82, "learning_rate": 2.578279825604439e-07, "logits/chosen": -1.5124845504760742, "logits/rejected": -1.5999141931533813, "logps/chosen": -87.28600311279297, "logps/rejected": -279.16680908203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.015707492828369, "rewards/margins": 17.88571548461914, "rewards/rejected": -19.90142250061035, "step": 3005 }, { "epoch": 4.83, "learning_rate": 2.577288941736028e-07, "logits/chosen": -1.452108383178711, "logits/rejected": -1.4366297721862793, "logps/chosen": -118.63349914550781, "logps/rejected": -219.49671936035156, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -4.10701322555542, "rewards/margins": 10.924370765686035, "rewards/rejected": -15.031383514404297, "step": 3006 }, { "epoch": 4.83, "learning_rate": 2.576298057867618e-07, "logits/chosen": -1.5253705978393555, "logits/rejected": -1.5792113542556763, "logps/chosen": -162.13113403320312, "logps/rejected": -274.82452392578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.34441614151001, "rewards/margins": 9.571182250976562, "rewards/rejected": -16.915599822998047, "step": 3007 }, { "epoch": 4.83, "learning_rate": 2.5753071739992074e-07, "logits/chosen": -1.659886121749878, "logits/rejected": -1.7633187770843506, "logps/chosen": -169.87677001953125, "logps/rejected": -271.030029296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.8277907371521, "rewards/margins": 9.594008445739746, "rewards/rejected": -16.421798706054688, "step": 3008 }, { "epoch": 4.83, "learning_rate": 2.5743162901307965e-07, "logits/chosen": -1.517781376838684, "logits/rejected": -1.5624313354492188, "logps/chosen": -144.9323272705078, "logps/rejected": -307.3636169433594, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -5.9975786209106445, "rewards/margins": 14.590618133544922, "rewards/rejected": -20.588197708129883, "step": 3009 }, { "epoch": 4.83, "learning_rate": 2.573325406262386e-07, "logits/chosen": -1.6240814924240112, "logits/rejected": -1.6325764656066895, "logps/chosen": -132.98953247070312, "logps/rejected": -242.91131591796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.834216117858887, "rewards/margins": 11.271158218383789, "rewards/rejected": -16.105375289916992, "step": 3010 }, { "epoch": 4.83, "learning_rate": 2.572334522393975e-07, "logits/chosen": -1.4851702451705933, "logits/rejected": -1.554124355316162, "logps/chosen": -173.875732421875, "logps/rejected": -307.4058837890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.630066871643066, "rewards/margins": 11.896696090698242, "rewards/rejected": -19.526763916015625, "step": 3011 }, { "epoch": 4.83, "learning_rate": 2.571343638525565e-07, "logits/chosen": -1.7178688049316406, "logits/rejected": -1.5797784328460693, "logps/chosen": -135.68399047851562, "logps/rejected": -201.8084716796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.931974411010742, "rewards/margins": 9.028636932373047, "rewards/rejected": -13.960610389709473, "step": 3012 }, { "epoch": 4.84, "learning_rate": 2.570352754657154e-07, "logits/chosen": -1.5271825790405273, "logits/rejected": -1.5239899158477783, "logps/chosen": -133.82327270507812, "logps/rejected": -218.52940368652344, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.884002685546875, "rewards/margins": 9.102221488952637, "rewards/rejected": -13.986224174499512, "step": 3013 }, { "epoch": 4.84, "learning_rate": 2.5693618707887434e-07, "logits/chosen": -1.7121046781539917, "logits/rejected": -1.6954052448272705, "logps/chosen": -143.54342651367188, "logps/rejected": -242.92987060546875, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -4.22504997253418, "rewards/margins": 12.094991683959961, "rewards/rejected": -16.320043563842773, "step": 3014 }, { "epoch": 4.84, "learning_rate": 2.568370986920333e-07, "logits/chosen": -1.4397704601287842, "logits/rejected": -1.4956470727920532, "logps/chosen": -97.32813262939453, "logps/rejected": -176.48704528808594, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.939892292022705, "rewards/margins": 7.171698570251465, "rewards/rejected": -10.111590385437012, "step": 3015 }, { "epoch": 4.84, "learning_rate": 2.567380103051922e-07, "logits/chosen": -1.3748772144317627, "logits/rejected": -1.395900011062622, "logps/chosen": -109.3741226196289, "logps/rejected": -209.11856079101562, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": -4.669735431671143, "rewards/margins": 10.947754859924316, "rewards/rejected": -15.617490768432617, "step": 3016 }, { "epoch": 4.84, "learning_rate": 2.5663892191835117e-07, "logits/chosen": -1.6579629182815552, "logits/rejected": -1.578538417816162, "logps/chosen": -122.98030090332031, "logps/rejected": -205.38848876953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.343793869018555, "rewards/margins": 9.223575592041016, "rewards/rejected": -13.56736946105957, "step": 3017 }, { "epoch": 4.84, "learning_rate": 2.5653983353151007e-07, "logits/chosen": -1.463202953338623, "logits/rejected": -1.5099128484725952, "logps/chosen": -138.76492309570312, "logps/rejected": -275.33782958984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.227639198303223, "rewards/margins": 13.36764907836914, "rewards/rejected": -18.595287322998047, "step": 3018 }, { "epoch": 4.85, "learning_rate": 2.5644074514466903e-07, "logits/chosen": -1.5126879215240479, "logits/rejected": -1.449783205986023, "logps/chosen": -192.20120239257812, "logps/rejected": -288.2878112792969, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -9.051506996154785, "rewards/margins": 11.412683486938477, "rewards/rejected": -20.464189529418945, "step": 3019 }, { "epoch": 4.85, "learning_rate": 2.56341656757828e-07, "logits/chosen": -1.810093641281128, "logits/rejected": -1.6125891208648682, "logps/chosen": -206.1083221435547, "logps/rejected": -256.6120300292969, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -8.839030265808105, "rewards/margins": 8.256784439086914, "rewards/rejected": -17.095815658569336, "step": 3020 }, { "epoch": 4.85, "learning_rate": 2.562425683709869e-07, "logits/chosen": -1.6111949682235718, "logits/rejected": -1.557801365852356, "logps/chosen": -137.77828979492188, "logps/rejected": -282.5586853027344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.428106784820557, "rewards/margins": 14.518948554992676, "rewards/rejected": -19.94705581665039, "step": 3021 }, { "epoch": 4.85, "learning_rate": 2.5614347998414586e-07, "logits/chosen": -1.4028807878494263, "logits/rejected": -1.4797894954681396, "logps/chosen": -160.17153930664062, "logps/rejected": -229.8134765625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -6.1658453941345215, "rewards/margins": 7.4520649909973145, "rewards/rejected": -13.617910385131836, "step": 3022 }, { "epoch": 4.85, "learning_rate": 2.5604439159730477e-07, "logits/chosen": -1.4407119750976562, "logits/rejected": -1.5702636241912842, "logps/chosen": -109.42483520507812, "logps/rejected": -258.8568115234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.915988922119141, "rewards/margins": 11.449865341186523, "rewards/rejected": -16.365854263305664, "step": 3023 }, { "epoch": 4.85, "learning_rate": 2.559453032104637e-07, "logits/chosen": -1.6092844009399414, "logits/rejected": -1.4708192348480225, "logps/chosen": -143.40869140625, "logps/rejected": -225.58863830566406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.504721164703369, "rewards/margins": 10.053157806396484, "rewards/rejected": -15.557880401611328, "step": 3024 }, { "epoch": 4.86, "learning_rate": 2.558462148236227e-07, "logits/chosen": -1.6969234943389893, "logits/rejected": -1.6762382984161377, "logps/chosen": -184.93289184570312, "logps/rejected": -270.7356872558594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.311636924743652, "rewards/margins": 9.189972877502441, "rewards/rejected": -16.501609802246094, "step": 3025 }, { "epoch": 4.86, "learning_rate": 2.557471264367816e-07, "logits/chosen": -1.5995945930480957, "logits/rejected": -1.6474636793136597, "logps/chosen": -154.37522888183594, "logps/rejected": -291.111083984375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.891350746154785, "rewards/margins": 11.481247901916504, "rewards/rejected": -19.372596740722656, "step": 3026 }, { "epoch": 4.86, "learning_rate": 2.5564803804994055e-07, "logits/chosen": -1.6266865730285645, "logits/rejected": -1.6713911294937134, "logps/chosen": -110.50544738769531, "logps/rejected": -230.7946319580078, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": -4.195006370544434, "rewards/margins": 9.61620044708252, "rewards/rejected": -13.811206817626953, "step": 3027 }, { "epoch": 4.86, "learning_rate": 2.5554894966309946e-07, "logits/chosen": -1.6201668977737427, "logits/rejected": -1.685059905052185, "logps/chosen": -154.9766845703125, "logps/rejected": -238.64572143554688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -8.651875495910645, "rewards/margins": 5.77121639251709, "rewards/rejected": -14.423091888427734, "step": 3028 }, { "epoch": 4.86, "learning_rate": 2.554498612762584e-07, "logits/chosen": -1.4411693811416626, "logits/rejected": -1.4504756927490234, "logps/chosen": -140.91592407226562, "logps/rejected": -243.12066650390625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -4.666123390197754, "rewards/margins": 11.04104995727539, "rewards/rejected": -15.707172393798828, "step": 3029 }, { "epoch": 4.86, "learning_rate": 2.553507728894174e-07, "logits/chosen": -1.5778255462646484, "logits/rejected": -1.510769248008728, "logps/chosen": -174.35948181152344, "logps/rejected": -249.70950317382812, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -7.522197246551514, "rewards/margins": 9.520999908447266, "rewards/rejected": -17.043197631835938, "step": 3030 }, { "epoch": 4.87, "learning_rate": 2.552516845025763e-07, "logits/chosen": -1.5168817043304443, "logits/rejected": -1.5514177083969116, "logps/chosen": -133.1212615966797, "logps/rejected": -287.5615234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.5749921798706055, "rewards/margins": 11.86282730102539, "rewards/rejected": -18.437820434570312, "step": 3031 }, { "epoch": 4.87, "learning_rate": 2.551525961157352e-07, "logits/chosen": -1.6411187648773193, "logits/rejected": -1.613901138305664, "logps/chosen": -192.70660400390625, "logps/rejected": -277.80035400390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.916500091552734, "rewards/margins": 9.471851348876953, "rewards/rejected": -18.388355255126953, "step": 3032 }, { "epoch": 4.87, "learning_rate": 2.5505350772889415e-07, "logits/chosen": -1.4554873704910278, "logits/rejected": -1.569277048110962, "logps/chosen": -140.38502502441406, "logps/rejected": -277.5528259277344, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.462531089782715, "rewards/margins": 11.665081977844238, "rewards/rejected": -17.127614974975586, "step": 3033 }, { "epoch": 4.87, "learning_rate": 2.5495441934205306e-07, "logits/chosen": -1.4279760122299194, "logits/rejected": -1.499062418937683, "logps/chosen": -112.95838928222656, "logps/rejected": -252.52569580078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.1023759841918945, "rewards/margins": 13.043456077575684, "rewards/rejected": -18.145832061767578, "step": 3034 }, { "epoch": 4.87, "learning_rate": 2.5485533095521207e-07, "logits/chosen": -1.3998849391937256, "logits/rejected": -1.474881887435913, "logps/chosen": -158.05711364746094, "logps/rejected": -280.638671875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -8.19196891784668, "rewards/margins": 10.856045722961426, "rewards/rejected": -19.048015594482422, "step": 3035 }, { "epoch": 4.87, "learning_rate": 2.54756242568371e-07, "logits/chosen": -1.5152959823608398, "logits/rejected": -1.4274942874908447, "logps/chosen": -159.01747131347656, "logps/rejected": -252.18788146972656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.751352310180664, "rewards/margins": 8.909814834594727, "rewards/rejected": -15.66116714477539, "step": 3036 }, { "epoch": 4.87, "learning_rate": 2.546571541815299e-07, "logits/chosen": -1.5985699892044067, "logits/rejected": -1.4947700500488281, "logps/chosen": -137.47235107421875, "logps/rejected": -236.36874389648438, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -6.44924783706665, "rewards/margins": 10.36996841430664, "rewards/rejected": -16.819215774536133, "step": 3037 }, { "epoch": 4.88, "learning_rate": 2.5455806579468885e-07, "logits/chosen": -1.6482042074203491, "logits/rejected": -1.6252140998840332, "logps/chosen": -174.86434936523438, "logps/rejected": -286.18310546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.444029808044434, "rewards/margins": 12.65528678894043, "rewards/rejected": -19.09931755065918, "step": 3038 }, { "epoch": 4.88, "learning_rate": 2.5445897740784775e-07, "logits/chosen": -1.4293930530548096, "logits/rejected": -1.5777881145477295, "logps/chosen": -109.97760772705078, "logps/rejected": -268.6188049316406, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.18352746963501, "rewards/margins": 11.854950904846191, "rewards/rejected": -17.03847885131836, "step": 3039 }, { "epoch": 4.88, "learning_rate": 2.5435988902100676e-07, "logits/chosen": -1.6549938917160034, "logits/rejected": -1.610329031944275, "logps/chosen": -168.6908416748047, "logps/rejected": -251.5948944091797, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.269668102264404, "rewards/margins": 10.21218204498291, "rewards/rejected": -16.481849670410156, "step": 3040 }, { "epoch": 4.88, "learning_rate": 2.5426080063416567e-07, "logits/chosen": -1.6161553859710693, "logits/rejected": -1.6414794921875, "logps/chosen": -142.24404907226562, "logps/rejected": -251.42674255371094, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -6.786394119262695, "rewards/margins": 11.211397171020508, "rewards/rejected": -17.997791290283203, "step": 3041 }, { "epoch": 4.88, "learning_rate": 2.541617122473246e-07, "logits/chosen": -1.543824553489685, "logits/rejected": -1.6203733682632446, "logps/chosen": -126.67439270019531, "logps/rejected": -256.6875305175781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.456245422363281, "rewards/margins": 11.644412994384766, "rewards/rejected": -17.100658416748047, "step": 3042 }, { "epoch": 4.88, "learning_rate": 2.5406262386048354e-07, "logits/chosen": -1.6935137510299683, "logits/rejected": -1.623835563659668, "logps/chosen": -130.92575073242188, "logps/rejected": -236.81829833984375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -5.713956356048584, "rewards/margins": 10.850776672363281, "rewards/rejected": -16.564733505249023, "step": 3043 }, { "epoch": 4.89, "learning_rate": 2.5396353547364245e-07, "logits/chosen": -1.5924901962280273, "logits/rejected": -1.7246167659759521, "logps/chosen": -99.78510284423828, "logps/rejected": -239.112060546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.228536605834961, "rewards/margins": 11.721044540405273, "rewards/rejected": -14.949579238891602, "step": 3044 }, { "epoch": 4.89, "learning_rate": 2.5386444708680146e-07, "logits/chosen": -1.4147179126739502, "logits/rejected": -1.442866325378418, "logps/chosen": -130.70849609375, "logps/rejected": -265.4307861328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.769624710083008, "rewards/margins": 13.195096969604492, "rewards/rejected": -18.9647216796875, "step": 3045 }, { "epoch": 4.89, "learning_rate": 2.5376535869996036e-07, "logits/chosen": -1.7117120027542114, "logits/rejected": -1.5665923357009888, "logps/chosen": -181.68759155273438, "logps/rejected": -293.5534362792969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.962737083435059, "rewards/margins": 13.143957138061523, "rewards/rejected": -20.106693267822266, "step": 3046 }, { "epoch": 4.89, "learning_rate": 2.5366627031311927e-07, "logits/chosen": -1.6603600978851318, "logits/rejected": -1.5208940505981445, "logps/chosen": -176.27255249023438, "logps/rejected": -254.94200134277344, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -7.637332439422607, "rewards/margins": 10.188783645629883, "rewards/rejected": -17.826114654541016, "step": 3047 }, { "epoch": 4.89, "learning_rate": 2.5356718192627823e-07, "logits/chosen": -1.7850474119186401, "logits/rejected": -1.6328308582305908, "logps/chosen": -148.95382690429688, "logps/rejected": -254.66424560546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.718789100646973, "rewards/margins": 11.145008087158203, "rewards/rejected": -17.863798141479492, "step": 3048 }, { "epoch": 4.89, "learning_rate": 2.5346809353943714e-07, "logits/chosen": -1.6109039783477783, "logits/rejected": -1.5951831340789795, "logps/chosen": -166.17770385742188, "logps/rejected": -286.60345458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.492368698120117, "rewards/margins": 9.838651657104492, "rewards/rejected": -18.33102035522461, "step": 3049 }, { "epoch": 4.9, "learning_rate": 2.5336900515259615e-07, "logits/chosen": -1.6459803581237793, "logits/rejected": -1.6789069175720215, "logps/chosen": -148.41807556152344, "logps/rejected": -291.2695007324219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.048157691955566, "rewards/margins": 13.181796073913574, "rewards/rejected": -18.22995376586914, "step": 3050 }, { "epoch": 4.9, "learning_rate": 2.5326991676575506e-07, "logits/chosen": -1.4894603490829468, "logits/rejected": -1.5766639709472656, "logps/chosen": -112.33875274658203, "logps/rejected": -255.47564697265625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -4.537836074829102, "rewards/margins": 12.534717559814453, "rewards/rejected": -17.072551727294922, "step": 3051 }, { "epoch": 4.9, "learning_rate": 2.5317082837891396e-07, "logits/chosen": -1.3636729717254639, "logits/rejected": -1.4332247972488403, "logps/chosen": -123.3023910522461, "logps/rejected": -199.00674438476562, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -6.502621650695801, "rewards/margins": 6.467041969299316, "rewards/rejected": -12.969663619995117, "step": 3052 }, { "epoch": 4.9, "learning_rate": 2.530717399920729e-07, "logits/chosen": -1.7122979164123535, "logits/rejected": -1.6508651971817017, "logps/chosen": -207.53216552734375, "logps/rejected": -333.4276123046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -9.89658260345459, "rewards/margins": 14.763897895812988, "rewards/rejected": -24.66048240661621, "step": 3053 }, { "epoch": 4.9, "learning_rate": 2.5297265160523183e-07, "logits/chosen": -1.7131376266479492, "logits/rejected": -1.6878223419189453, "logps/chosen": -187.15902709960938, "logps/rejected": -286.279296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -8.35421371459961, "rewards/margins": 10.467395782470703, "rewards/rejected": -18.821609497070312, "step": 3054 }, { "epoch": 4.9, "learning_rate": 2.5287356321839084e-07, "logits/chosen": -1.4956250190734863, "logits/rejected": -1.5285588502883911, "logps/chosen": -134.54701232910156, "logps/rejected": -308.81707763671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.661291122436523, "rewards/margins": 16.509952545166016, "rewards/rejected": -22.171241760253906, "step": 3055 }, { "epoch": 4.91, "learning_rate": 2.5277447483154975e-07, "logits/chosen": -1.5224061012268066, "logits/rejected": -1.4577035903930664, "logps/chosen": -159.84732055664062, "logps/rejected": -223.0663299560547, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.8549113273620605, "rewards/margins": 7.827574729919434, "rewards/rejected": -12.682486534118652, "step": 3056 }, { "epoch": 4.91, "learning_rate": 2.5267538644470866e-07, "logits/chosen": -1.6337649822235107, "logits/rejected": -1.6779203414916992, "logps/chosen": -142.1373291015625, "logps/rejected": -237.81472778320312, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.467741966247559, "rewards/margins": 9.072274208068848, "rewards/rejected": -14.540017127990723, "step": 3057 }, { "epoch": 4.91, "learning_rate": 2.525762980578676e-07, "logits/chosen": -1.5525791645050049, "logits/rejected": -1.5684441328048706, "logps/chosen": -143.92800903320312, "logps/rejected": -277.7154541015625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -6.978017330169678, "rewards/margins": 13.036785125732422, "rewards/rejected": -20.014802932739258, "step": 3058 }, { "epoch": 4.91, "learning_rate": 2.524772096710265e-07, "logits/chosen": -1.4951804876327515, "logits/rejected": -1.4670066833496094, "logps/chosen": -157.13235473632812, "logps/rejected": -253.3062744140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -7.798226833343506, "rewards/margins": 9.68120288848877, "rewards/rejected": -17.47943115234375, "step": 3059 }, { "epoch": 4.91, "learning_rate": 2.523781212841855e-07, "logits/chosen": -1.5410572290420532, "logits/rejected": -1.6684672832489014, "logps/chosen": -158.89663696289062, "logps/rejected": -267.79168701171875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -8.571890830993652, "rewards/margins": 9.328311920166016, "rewards/rejected": -17.900203704833984, "step": 3060 }, { "epoch": 4.91, "learning_rate": 2.5227903289734444e-07, "logits/chosen": -1.4299260377883911, "logits/rejected": -1.4791194200515747, "logps/chosen": -153.26637268066406, "logps/rejected": -262.45172119140625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -7.322373867034912, "rewards/margins": 10.137031555175781, "rewards/rejected": -17.45940589904785, "step": 3061 }, { "epoch": 4.91, "learning_rate": 2.5217994451050335e-07, "logits/chosen": -1.5761810541152954, "logits/rejected": -1.6085141897201538, "logps/chosen": -152.52017211914062, "logps/rejected": -250.35157775878906, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.0363569259643555, "rewards/margins": 10.279949188232422, "rewards/rejected": -15.316306114196777, "step": 3062 }, { "epoch": 4.92, "learning_rate": 2.520808561236623e-07, "logits/chosen": -1.7657877206802368, "logits/rejected": -1.623388648033142, "logps/chosen": -146.90713500976562, "logps/rejected": -253.6886444091797, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.065025329589844, "rewards/margins": 13.297927856445312, "rewards/rejected": -17.362953186035156, "step": 3063 }, { "epoch": 4.92, "learning_rate": 2.519817677368212e-07, "logits/chosen": -1.69261634349823, "logits/rejected": -1.6406782865524292, "logps/chosen": -117.64510345458984, "logps/rejected": -219.14857482910156, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": -4.527890205383301, "rewards/margins": 9.896730422973633, "rewards/rejected": -14.42462158203125, "step": 3064 }, { "epoch": 4.92, "learning_rate": 2.518826793499802e-07, "logits/chosen": -1.7124931812286377, "logits/rejected": -1.6435210704803467, "logps/chosen": -129.6770782470703, "logps/rejected": -241.09988403320312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.890984535217285, "rewards/margins": 11.25416374206543, "rewards/rejected": -16.1451473236084, "step": 3065 }, { "epoch": 4.92, "learning_rate": 2.5178359096313914e-07, "logits/chosen": -1.517417311668396, "logits/rejected": -1.5557595491409302, "logps/chosen": -142.91761779785156, "logps/rejected": -242.11032104492188, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -7.149068832397461, "rewards/margins": 9.622203826904297, "rewards/rejected": -16.771272659301758, "step": 3066 }, { "epoch": 4.92, "learning_rate": 2.5168450257629804e-07, "logits/chosen": -1.6447687149047852, "logits/rejected": -1.5586367845535278, "logps/chosen": -136.85098266601562, "logps/rejected": -191.77786254882812, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -4.014993667602539, "rewards/margins": 7.356808662414551, "rewards/rejected": -11.37180233001709, "step": 3067 }, { "epoch": 4.92, "learning_rate": 2.51585414189457e-07, "logits/chosen": -1.4543451070785522, "logits/rejected": -1.4207231998443604, "logps/chosen": -144.015869140625, "logps/rejected": -258.7178039550781, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.880503177642822, "rewards/margins": 11.478341102600098, "rewards/rejected": -16.358844757080078, "step": 3068 }, { "epoch": 4.93, "learning_rate": 2.514863258026159e-07, "logits/chosen": -1.8979496955871582, "logits/rejected": -1.7937335968017578, "logps/chosen": -98.01948547363281, "logps/rejected": -261.7539367675781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.608144760131836, "rewards/margins": 17.445890426635742, "rewards/rejected": -19.054035186767578, "step": 3069 }, { "epoch": 4.93, "learning_rate": 2.513872374157748e-07, "logits/chosen": -1.692335844039917, "logits/rejected": -1.6906415224075317, "logps/chosen": -140.7911834716797, "logps/rejected": -231.30050659179688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.028026103973389, "rewards/margins": 11.04559326171875, "rewards/rejected": -16.073619842529297, "step": 3070 }, { "epoch": 4.93, "learning_rate": 2.5128814902893383e-07, "logits/chosen": -1.5596023797988892, "logits/rejected": -1.5552492141723633, "logps/chosen": -156.7205810546875, "logps/rejected": -276.81292724609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.1753339767456055, "rewards/margins": 11.322607040405273, "rewards/rejected": -17.497940063476562, "step": 3071 }, { "epoch": 4.93, "learning_rate": 2.5118906064209274e-07, "logits/chosen": -1.6315504312515259, "logits/rejected": -1.6874337196350098, "logps/chosen": -156.35267639160156, "logps/rejected": -287.9909973144531, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -7.325174331665039, "rewards/margins": 12.316265106201172, "rewards/rejected": -19.641437530517578, "step": 3072 }, { "epoch": 4.93, "learning_rate": 2.510899722552517e-07, "logits/chosen": -1.615752100944519, "logits/rejected": -1.6367475986480713, "logps/chosen": -117.86549377441406, "logps/rejected": -248.46163940429688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.403353214263916, "rewards/margins": 12.886713027954102, "rewards/rejected": -15.29006576538086, "step": 3073 }, { "epoch": 4.93, "learning_rate": 2.509908838684106e-07, "logits/chosen": -1.4133230447769165, "logits/rejected": -1.5059024095535278, "logps/chosen": -133.52183532714844, "logps/rejected": -250.16741943359375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.208264350891113, "rewards/margins": 11.906686782836914, "rewards/rejected": -17.114952087402344, "step": 3074 }, { "epoch": 4.94, "learning_rate": 2.508917954815695e-07, "logits/chosen": -1.4344881772994995, "logits/rejected": -1.4670958518981934, "logps/chosen": -136.4660186767578, "logps/rejected": -248.94219970703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.163681507110596, "rewards/margins": 10.29470443725586, "rewards/rejected": -15.458386421203613, "step": 3075 }, { "epoch": 4.94, "learning_rate": 2.5079270709472847e-07, "logits/chosen": -1.5844206809997559, "logits/rejected": -1.6508249044418335, "logps/chosen": -107.33158874511719, "logps/rejected": -238.4326171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.270458221435547, "rewards/margins": 12.17056655883789, "rewards/rejected": -15.441024780273438, "step": 3076 }, { "epoch": 4.94, "learning_rate": 2.5069361870788743e-07, "logits/chosen": -1.5883867740631104, "logits/rejected": -1.587373971939087, "logps/chosen": -132.30503845214844, "logps/rejected": -240.78802490234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.3818864822387695, "rewards/margins": 9.410202980041504, "rewards/rejected": -13.792089462280273, "step": 3077 }, { "epoch": 4.94, "learning_rate": 2.505945303210464e-07, "logits/chosen": -1.528652310371399, "logits/rejected": -1.479478120803833, "logps/chosen": -172.88731384277344, "logps/rejected": -252.09652709960938, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -6.779089450836182, "rewards/margins": 10.40493392944336, "rewards/rejected": -17.184022903442383, "step": 3078 }, { "epoch": 4.94, "learning_rate": 2.504954419342053e-07, "logits/chosen": -1.53297758102417, "logits/rejected": -1.6328545808792114, "logps/chosen": -175.34901428222656, "logps/rejected": -298.1333312988281, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -7.061513900756836, "rewards/margins": 11.571731567382812, "rewards/rejected": -18.63324546813965, "step": 3079 }, { "epoch": 4.94, "learning_rate": 2.503963535473642e-07, "logits/chosen": -1.4776356220245361, "logits/rejected": -1.5513346195220947, "logps/chosen": -140.17054748535156, "logps/rejected": -319.95904541015625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.457078456878662, "rewards/margins": 16.09861946105957, "rewards/rejected": -22.55569839477539, "step": 3080 }, { "epoch": 4.95, "learning_rate": 2.5029726516052316e-07, "logits/chosen": -1.6960495710372925, "logits/rejected": -1.7201521396636963, "logps/chosen": -128.60205078125, "logps/rejected": -244.00936889648438, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.88749361038208, "rewards/margins": 10.736011505126953, "rewards/rejected": -14.623505592346191, "step": 3081 }, { "epoch": 4.95, "learning_rate": 2.501981767736821e-07, "logits/chosen": -1.6174514293670654, "logits/rejected": -1.5619838237762451, "logps/chosen": -160.61322021484375, "logps/rejected": -235.08087158203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.998156547546387, "rewards/margins": 9.564531326293945, "rewards/rejected": -15.562688827514648, "step": 3082 }, { "epoch": 4.95, "learning_rate": 2.500990883868411e-07, "logits/chosen": -1.606747031211853, "logits/rejected": -1.6287868022918701, "logps/chosen": -118.26628112792969, "logps/rejected": -237.82022094726562, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.9723987579345703, "rewards/margins": 12.131949424743652, "rewards/rejected": -16.104347229003906, "step": 3083 }, { "epoch": 4.95, "learning_rate": 2.5e-07, "logits/chosen": -1.5745983123779297, "logits/rejected": -1.6349263191223145, "logps/chosen": -145.8449249267578, "logps/rejected": -267.9464111328125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.040528774261475, "rewards/margins": 11.05945110321045, "rewards/rejected": -16.099979400634766, "step": 3084 }, { "epoch": 4.95, "learning_rate": 2.4990091161315895e-07, "logits/chosen": -1.521039366722107, "logits/rejected": -1.5049598217010498, "logps/chosen": -160.49920654296875, "logps/rejected": -269.1514587402344, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -7.744072914123535, "rewards/margins": 10.94176197052002, "rewards/rejected": -18.685834884643555, "step": 3085 }, { "epoch": 4.95, "learning_rate": 2.4980182322631786e-07, "logits/chosen": -1.4584065675735474, "logits/rejected": -1.5422462224960327, "logps/chosen": -155.54901123046875, "logps/rejected": -269.0795593261719, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.046666145324707, "rewards/margins": 10.023353576660156, "rewards/rejected": -17.070018768310547, "step": 3086 }, { "epoch": 4.96, "learning_rate": 2.497027348394768e-07, "logits/chosen": -1.7263514995574951, "logits/rejected": -1.721433162689209, "logps/chosen": -165.44935607910156, "logps/rejected": -292.9507141113281, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -6.401544094085693, "rewards/margins": 11.778488159179688, "rewards/rejected": -18.18003273010254, "step": 3087 }, { "epoch": 4.96, "learning_rate": 2.496036464526357e-07, "logits/chosen": -1.4228614568710327, "logits/rejected": -1.4179209470748901, "logps/chosen": -168.57325744628906, "logps/rejected": -268.0875244140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.321219444274902, "rewards/margins": 10.325368881225586, "rewards/rejected": -16.646589279174805, "step": 3088 }, { "epoch": 4.96, "learning_rate": 2.495045580657947e-07, "logits/chosen": -1.5707318782806396, "logits/rejected": -1.5840909481048584, "logps/chosen": -129.78872680664062, "logps/rejected": -246.52664184570312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.227991104125977, "rewards/margins": 10.597474098205566, "rewards/rejected": -15.825465202331543, "step": 3089 }, { "epoch": 4.96, "learning_rate": 2.4940546967895364e-07, "logits/chosen": -1.8096868991851807, "logits/rejected": -1.7173882722854614, "logps/chosen": -104.82066345214844, "logps/rejected": -268.82269287109375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.754993438720703, "rewards/margins": 15.436769485473633, "rewards/rejected": -18.191761016845703, "step": 3090 }, { "epoch": 4.96, "learning_rate": 2.4930638129211255e-07, "logits/chosen": -1.6890531778335571, "logits/rejected": -1.6280899047851562, "logps/chosen": -141.97451782226562, "logps/rejected": -248.74961853027344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.2841033935546875, "rewards/margins": 10.982544898986816, "rewards/rejected": -17.266647338867188, "step": 3091 }, { "epoch": 4.96, "learning_rate": 2.492072929052715e-07, "logits/chosen": -1.4639781713485718, "logits/rejected": -1.542650580406189, "logps/chosen": -139.55494689941406, "logps/rejected": -258.54522705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.171555995941162, "rewards/margins": 10.85981559753418, "rewards/rejected": -17.0313720703125, "step": 3092 }, { "epoch": 4.96, "learning_rate": 2.491082045184304e-07, "logits/chosen": -1.4393049478530884, "logits/rejected": -1.4394692182540894, "logps/chosen": -109.1115951538086, "logps/rejected": -237.92742919921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2762861251831055, "rewards/margins": 12.328411102294922, "rewards/rejected": -15.604696273803711, "step": 3093 }, { "epoch": 4.97, "learning_rate": 2.490091161315894e-07, "logits/chosen": -1.5235254764556885, "logits/rejected": -1.5486503839492798, "logps/chosen": -140.0886993408203, "logps/rejected": -262.8493957519531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.771365642547607, "rewards/margins": 11.111190795898438, "rewards/rejected": -16.882556915283203, "step": 3094 }, { "epoch": 4.97, "learning_rate": 2.4891002774474833e-07, "logits/chosen": -1.472780466079712, "logits/rejected": -1.5682305097579956, "logps/chosen": -120.42237854003906, "logps/rejected": -265.95587158203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.262857437133789, "rewards/margins": 12.360076904296875, "rewards/rejected": -18.62293243408203, "step": 3095 }, { "epoch": 4.97, "learning_rate": 2.4881093935790724e-07, "logits/chosen": -1.5604878664016724, "logits/rejected": -1.561241626739502, "logps/chosen": -153.3413848876953, "logps/rejected": -270.0909729003906, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -6.463719367980957, "rewards/margins": 10.881559371948242, "rewards/rejected": -17.345279693603516, "step": 3096 }, { "epoch": 4.97, "learning_rate": 2.4871185097106615e-07, "logits/chosen": -1.713822603225708, "logits/rejected": -1.672666072845459, "logps/chosen": -195.22149658203125, "logps/rejected": -301.16943359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.990966796875, "rewards/margins": 10.110091209411621, "rewards/rejected": -20.101058959960938, "step": 3097 }, { "epoch": 4.97, "learning_rate": 2.486127625842251e-07, "logits/chosen": -1.3799023628234863, "logits/rejected": -1.4090087413787842, "logps/chosen": -103.22047424316406, "logps/rejected": -253.007568359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.3250179290771484, "rewards/margins": 14.3345365524292, "rewards/rejected": -16.65955352783203, "step": 3098 }, { "epoch": 4.97, "learning_rate": 2.4851367419738407e-07, "logits/chosen": -1.4867222309112549, "logits/rejected": -1.5101947784423828, "logps/chosen": -170.1656036376953, "logps/rejected": -292.2098388671875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -8.335787773132324, "rewards/margins": 11.195863723754883, "rewards/rejected": -19.53165054321289, "step": 3099 }, { "epoch": 4.98, "learning_rate": 2.4841458581054303e-07, "logits/chosen": -1.5421479940414429, "logits/rejected": -1.5743883848190308, "logps/chosen": -127.24839782714844, "logps/rejected": -266.1440124511719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.1333537101745605, "rewards/margins": 12.162609100341797, "rewards/rejected": -17.295961380004883, "step": 3100 }, { "epoch": 4.98, "learning_rate": 2.4831549742370193e-07, "logits/chosen": -1.5263954401016235, "logits/rejected": -1.527968406677246, "logps/chosen": -176.54725646972656, "logps/rejected": -288.5918884277344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.533895492553711, "rewards/margins": 11.561506271362305, "rewards/rejected": -19.095401763916016, "step": 3101 }, { "epoch": 4.98, "learning_rate": 2.4821640903686084e-07, "logits/chosen": -1.4578834772109985, "logits/rejected": -1.5096769332885742, "logps/chosen": -144.4181365966797, "logps/rejected": -247.67747497558594, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -6.759390830993652, "rewards/margins": 10.894268035888672, "rewards/rejected": -17.65365982055664, "step": 3102 }, { "epoch": 4.98, "learning_rate": 2.481173206500198e-07, "logits/chosen": -1.5571353435516357, "logits/rejected": -1.5526766777038574, "logps/chosen": -98.32770538330078, "logps/rejected": -214.333740234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3344192504882812, "rewards/margins": 11.841728210449219, "rewards/rejected": -15.1761474609375, "step": 3103 }, { "epoch": 4.98, "learning_rate": 2.4801823226317876e-07, "logits/chosen": -1.5838717222213745, "logits/rejected": -1.5972464084625244, "logps/chosen": -125.76941680908203, "logps/rejected": -217.18421936035156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.7811689376831055, "rewards/margins": 9.681500434875488, "rewards/rejected": -13.462669372558594, "step": 3104 }, { "epoch": 4.98, "learning_rate": 2.479191438763377e-07, "logits/chosen": -1.554275631904602, "logits/rejected": -1.5663264989852905, "logps/chosen": -133.95654296875, "logps/rejected": -276.4942321777344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.099637031555176, "rewards/margins": 15.120672225952148, "rewards/rejected": -19.22031021118164, "step": 3105 }, { "epoch": 4.99, "learning_rate": 2.4782005548949663e-07, "logits/chosen": -1.4723037481307983, "logits/rejected": -1.4768097400665283, "logps/chosen": -127.9981689453125, "logps/rejected": -222.2070770263672, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -6.291740894317627, "rewards/margins": 9.419509887695312, "rewards/rejected": -15.711250305175781, "step": 3106 }, { "epoch": 4.99, "learning_rate": 2.4772096710265553e-07, "logits/chosen": -1.4645708799362183, "logits/rejected": -1.3986704349517822, "logps/chosen": -138.61245727539062, "logps/rejected": -245.0240478515625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.251101016998291, "rewards/margins": 7.6906843185424805, "rewards/rejected": -12.941784858703613, "step": 3107 }, { "epoch": 4.99, "learning_rate": 2.476218787158145e-07, "logits/chosen": -1.483332633972168, "logits/rejected": -1.5682251453399658, "logps/chosen": -150.23435974121094, "logps/rejected": -256.00970458984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.571688175201416, "rewards/margins": 10.647275924682617, "rewards/rejected": -17.218965530395508, "step": 3108 }, { "epoch": 4.99, "learning_rate": 2.4752279032897345e-07, "logits/chosen": -1.5320026874542236, "logits/rejected": -1.5542941093444824, "logps/chosen": -143.69407653808594, "logps/rejected": -227.49298095703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.0731611251831055, "rewards/margins": 8.607951164245605, "rewards/rejected": -14.681112289428711, "step": 3109 }, { "epoch": 4.99, "learning_rate": 2.4742370194213236e-07, "logits/chosen": -1.537348985671997, "logits/rejected": -1.5065922737121582, "logps/chosen": -128.8089141845703, "logps/rejected": -243.07215881347656, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.816633462905884, "rewards/margins": 12.699992179870605, "rewards/rejected": -16.516626358032227, "step": 3110 }, { "epoch": 4.99, "learning_rate": 2.473246135552913e-07, "logits/chosen": -1.612801432609558, "logits/rejected": -1.576140284538269, "logps/chosen": -156.64932250976562, "logps/rejected": -247.78526306152344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.467748641967773, "rewards/margins": 10.777105331420898, "rewards/rejected": -17.244853973388672, "step": 3111 }, { "epoch": 5.0, "learning_rate": 2.4722552516845023e-07, "logits/chosen": -1.402331829071045, "logits/rejected": -1.4307838678359985, "logps/chosen": -185.59922790527344, "logps/rejected": -286.432861328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -10.13956356048584, "rewards/margins": 10.205191612243652, "rewards/rejected": -20.344755172729492, "step": 3112 }, { "epoch": 5.0, "learning_rate": 2.471264367816092e-07, "logits/chosen": -1.5006393194198608, "logits/rejected": -1.5229551792144775, "logps/chosen": -149.61106872558594, "logps/rejected": -214.18585205078125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -5.988677501678467, "rewards/margins": 7.156899929046631, "rewards/rejected": -13.145576477050781, "step": 3113 }, { "epoch": 5.0, "learning_rate": 2.4702734839476815e-07, "logits/chosen": -1.6205846071243286, "logits/rejected": -1.659084677696228, "logps/chosen": -144.57650756835938, "logps/rejected": -253.15553283691406, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.137938022613525, "rewards/margins": 12.348478317260742, "rewards/rejected": -17.486417770385742, "step": 3114 }, { "epoch": 5.0, "learning_rate": 2.4692826000792705e-07, "logits/chosen": -1.7013664245605469, "logits/rejected": -1.7275958061218262, "logps/chosen": -157.40179443359375, "logps/rejected": -309.5972900390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.570490837097168, "rewards/margins": 14.25887680053711, "rewards/rejected": -19.829368591308594, "step": 3115 }, { "epoch": 5.0, "learning_rate": 2.46829171621086e-07, "logits/chosen": -1.6073191165924072, "logits/rejected": -1.6952643394470215, "logps/chosen": -169.89649963378906, "logps/rejected": -324.5791320800781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.033249855041504, "rewards/margins": 13.300962448120117, "rewards/rejected": -21.334213256835938, "step": 3116 }, { "epoch": 5.0, "learning_rate": 2.467300832342449e-07, "logits/chosen": -1.5690288543701172, "logits/rejected": -1.5801321268081665, "logps/chosen": -134.87632751464844, "logps/rejected": -260.5616455078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.494507789611816, "rewards/margins": 12.489862442016602, "rewards/rejected": -16.984371185302734, "step": 3117 }, { "epoch": 5.0, "learning_rate": 2.466309948474039e-07, "logits/chosen": -1.6771221160888672, "logits/rejected": -1.586949110031128, "logps/chosen": -149.57559204101562, "logps/rejected": -286.3363037109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.784968852996826, "rewards/margins": 13.636580467224121, "rewards/rejected": -19.421550750732422, "step": 3118 }, { "epoch": 5.01, "learning_rate": 2.4653190646056284e-07, "logits/chosen": -1.5849571228027344, "logits/rejected": -1.4959657192230225, "logps/chosen": -126.88955688476562, "logps/rejected": -252.347900390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.608795642852783, "rewards/margins": 12.170234680175781, "rewards/rejected": -17.779029846191406, "step": 3119 }, { "epoch": 5.01, "learning_rate": 2.4643281807372175e-07, "logits/chosen": -1.4078638553619385, "logits/rejected": -1.3200838565826416, "logps/chosen": -149.1171875, "logps/rejected": -215.20152282714844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.48276948928833, "rewards/margins": 10.235958099365234, "rewards/rejected": -14.718727111816406, "step": 3120 }, { "epoch": 5.01, "learning_rate": 2.463337296868807e-07, "logits/chosen": -1.5108827352523804, "logits/rejected": -1.590162992477417, "logps/chosen": -187.14810180664062, "logps/rejected": -311.34747314453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.1807403564453125, "rewards/margins": 12.703447341918945, "rewards/rejected": -19.88418960571289, "step": 3121 }, { "epoch": 5.01, "learning_rate": 2.462346413000396e-07, "logits/chosen": -1.5147552490234375, "logits/rejected": -1.5789105892181396, "logps/chosen": -162.84564208984375, "logps/rejected": -312.63934326171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.183581352233887, "rewards/margins": 13.094157218933105, "rewards/rejected": -22.277738571166992, "step": 3122 }, { "epoch": 5.01, "learning_rate": 2.4613555291319857e-07, "logits/chosen": -1.7397079467773438, "logits/rejected": -1.6673675775527954, "logps/chosen": -164.10562133789062, "logps/rejected": -250.80450439453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.105806350708008, "rewards/margins": 9.521023750305176, "rewards/rejected": -15.6268310546875, "step": 3123 }, { "epoch": 5.01, "learning_rate": 2.4603646452635753e-07, "logits/chosen": -1.3734591007232666, "logits/rejected": -1.4460797309875488, "logps/chosen": -103.94938659667969, "logps/rejected": -255.7218780517578, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6443698406219482, "rewards/margins": 13.684560775756836, "rewards/rejected": -17.32893180847168, "step": 3124 }, { "epoch": 5.02, "learning_rate": 2.4593737613951644e-07, "logits/chosen": -1.668335199356079, "logits/rejected": -1.5911206007003784, "logps/chosen": -155.06602478027344, "logps/rejected": -239.8800048828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.271856307983398, "rewards/margins": 11.419143676757812, "rewards/rejected": -15.690999984741211, "step": 3125 }, { "epoch": 5.02, "learning_rate": 2.4583828775267535e-07, "logits/chosen": -1.3719030618667603, "logits/rejected": -1.3534754514694214, "logps/chosen": -94.68092346191406, "logps/rejected": -205.46533203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.412700653076172, "rewards/margins": 10.408016204833984, "rewards/rejected": -13.820716857910156, "step": 3126 }, { "epoch": 5.02, "learning_rate": 2.457391993658343e-07, "logits/chosen": -1.429132103919983, "logits/rejected": -1.3759117126464844, "logps/chosen": -134.21786499023438, "logps/rejected": -209.40074157714844, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.532031536102295, "rewards/margins": 9.023947715759277, "rewards/rejected": -13.55597972869873, "step": 3127 }, { "epoch": 5.02, "learning_rate": 2.4564011097899327e-07, "logits/chosen": -1.574343204498291, "logits/rejected": -1.5807675123214722, "logps/chosen": -96.25482177734375, "logps/rejected": -245.55543518066406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.575493574142456, "rewards/margins": 14.908943176269531, "rewards/rejected": -16.484437942504883, "step": 3128 }, { "epoch": 5.02, "learning_rate": 2.4554102259215217e-07, "logits/chosen": -1.4758803844451904, "logits/rejected": -1.503057837486267, "logps/chosen": -155.5498809814453, "logps/rejected": -272.6719970703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.049112796783447, "rewards/margins": 12.91306209564209, "rewards/rejected": -17.962173461914062, "step": 3129 }, { "epoch": 5.02, "learning_rate": 2.4544193420531113e-07, "logits/chosen": -1.5561389923095703, "logits/rejected": -1.5662267208099365, "logps/chosen": -173.9281005859375, "logps/rejected": -288.9751281738281, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.914882183074951, "rewards/margins": 11.669995307922363, "rewards/rejected": -18.584877014160156, "step": 3130 }, { "epoch": 5.03, "learning_rate": 2.4534284581847004e-07, "logits/chosen": -1.4983558654785156, "logits/rejected": -1.3058159351348877, "logps/chosen": -116.93765258789062, "logps/rejected": -221.72483825683594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.662283897399902, "rewards/margins": 11.42985725402832, "rewards/rejected": -16.09214210510254, "step": 3131 }, { "epoch": 5.03, "learning_rate": 2.45243757431629e-07, "logits/chosen": -1.4575530290603638, "logits/rejected": -1.4805736541748047, "logps/chosen": -105.72708129882812, "logps/rejected": -241.04641723632812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.6687145233154297, "rewards/margins": 13.232151985168457, "rewards/rejected": -16.90086555480957, "step": 3132 }, { "epoch": 5.03, "learning_rate": 2.4514466904478796e-07, "logits/chosen": -1.5933864116668701, "logits/rejected": -1.6280900239944458, "logps/chosen": -173.43997192382812, "logps/rejected": -277.2003479003906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.874416351318359, "rewards/margins": 11.494421005249023, "rewards/rejected": -17.368839263916016, "step": 3133 }, { "epoch": 5.03, "learning_rate": 2.4504558065794687e-07, "logits/chosen": -1.6775513887405396, "logits/rejected": -1.6595275402069092, "logps/chosen": -124.96160125732422, "logps/rejected": -248.89614868164062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.017516613006592, "rewards/margins": 12.928281784057617, "rewards/rejected": -17.945798873901367, "step": 3134 }, { "epoch": 5.03, "learning_rate": 2.449464922711058e-07, "logits/chosen": -1.4349302053451538, "logits/rejected": -1.412517786026001, "logps/chosen": -145.3555145263672, "logps/rejected": -252.66746520996094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.50351095199585, "rewards/margins": 12.763410568237305, "rewards/rejected": -17.266921997070312, "step": 3135 }, { "epoch": 5.03, "learning_rate": 2.4484740388426473e-07, "logits/chosen": -1.5984992980957031, "logits/rejected": -1.5627403259277344, "logps/chosen": -101.12865447998047, "logps/rejected": -226.9209442138672, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.545706033706665, "rewards/margins": 13.011831283569336, "rewards/rejected": -16.557537078857422, "step": 3136 }, { "epoch": 5.04, "learning_rate": 2.447483154974237e-07, "logits/chosen": -1.5194011926651, "logits/rejected": -1.4146113395690918, "logps/chosen": -154.14649963378906, "logps/rejected": -244.73233032226562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.4118757247924805, "rewards/margins": 10.063342094421387, "rewards/rejected": -15.475218772888184, "step": 3137 }, { "epoch": 5.04, "learning_rate": 2.4464922711058265e-07, "logits/chosen": -1.675168514251709, "logits/rejected": -1.678998351097107, "logps/chosen": -119.29895782470703, "logps/rejected": -259.0997314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4724926948547363, "rewards/margins": 12.605355262756348, "rewards/rejected": -16.077848434448242, "step": 3138 }, { "epoch": 5.04, "learning_rate": 2.4455013872374156e-07, "logits/chosen": -1.3847206830978394, "logits/rejected": -1.410377025604248, "logps/chosen": -148.40048217773438, "logps/rejected": -215.83322143554688, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -6.791936874389648, "rewards/margins": 8.232172012329102, "rewards/rejected": -15.02410888671875, "step": 3139 }, { "epoch": 5.04, "learning_rate": 2.444510503369005e-07, "logits/chosen": -1.5075887441635132, "logits/rejected": -1.5566884279251099, "logps/chosen": -110.22516632080078, "logps/rejected": -267.6278991699219, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.38134765625, "rewards/margins": 14.499710083007812, "rewards/rejected": -17.881057739257812, "step": 3140 }, { "epoch": 5.04, "learning_rate": 2.443519619500594e-07, "logits/chosen": -1.561842918395996, "logits/rejected": -1.6149367094039917, "logps/chosen": -141.79708862304688, "logps/rejected": -279.7761535644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.632045745849609, "rewards/margins": 11.58454704284668, "rewards/rejected": -18.21659278869629, "step": 3141 }, { "epoch": 5.04, "learning_rate": 2.442528735632184e-07, "logits/chosen": -1.5129773616790771, "logits/rejected": -1.5186468362808228, "logps/chosen": -156.6436004638672, "logps/rejected": -237.1956329345703, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -6.661927223205566, "rewards/margins": 9.545805931091309, "rewards/rejected": -16.207731246948242, "step": 3142 }, { "epoch": 5.04, "learning_rate": 2.4415378517637734e-07, "logits/chosen": -1.4838292598724365, "logits/rejected": -1.5774509906768799, "logps/chosen": -154.13494873046875, "logps/rejected": -323.2926330566406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.614153861999512, "rewards/margins": 15.523143768310547, "rewards/rejected": -22.137298583984375, "step": 3143 }, { "epoch": 5.05, "learning_rate": 2.4405469678953625e-07, "logits/chosen": -1.5540493726730347, "logits/rejected": -1.5812885761260986, "logps/chosen": -133.6961669921875, "logps/rejected": -245.48904418945312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.887031555175781, "rewards/margins": 12.682815551757812, "rewards/rejected": -17.569847106933594, "step": 3144 }, { "epoch": 5.05, "learning_rate": 2.439556084026952e-07, "logits/chosen": -1.524163842201233, "logits/rejected": -1.5160088539123535, "logps/chosen": -142.4969482421875, "logps/rejected": -287.4452819824219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.722354412078857, "rewards/margins": 12.59227466583252, "rewards/rejected": -18.31462860107422, "step": 3145 }, { "epoch": 5.05, "learning_rate": 2.438565200158541e-07, "logits/chosen": -1.6073378324508667, "logits/rejected": -1.6624040603637695, "logps/chosen": -177.421875, "logps/rejected": -341.3675842285156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.220351219177246, "rewards/margins": 14.256845474243164, "rewards/rejected": -21.477197647094727, "step": 3146 }, { "epoch": 5.05, "learning_rate": 2.437574316290131e-07, "logits/chosen": -1.534429669380188, "logits/rejected": -1.4994834661483765, "logps/chosen": -172.43939208984375, "logps/rejected": -264.2120666503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.800379276275635, "rewards/margins": 10.000265121459961, "rewards/rejected": -16.800643920898438, "step": 3147 }, { "epoch": 5.05, "learning_rate": 2.43658343242172e-07, "logits/chosen": -1.4677658081054688, "logits/rejected": -1.421427607536316, "logps/chosen": -163.04380798339844, "logps/rejected": -293.80548095703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.840329170227051, "rewards/margins": 11.317394256591797, "rewards/rejected": -19.157724380493164, "step": 3148 }, { "epoch": 5.05, "learning_rate": 2.4355925485533094e-07, "logits/chosen": -1.824153184890747, "logits/rejected": -1.8157682418823242, "logps/chosen": -125.73027038574219, "logps/rejected": -284.9013977050781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.916707515716553, "rewards/margins": 14.998092651367188, "rewards/rejected": -19.9148006439209, "step": 3149 }, { "epoch": 5.06, "learning_rate": 2.434601664684899e-07, "logits/chosen": -1.4629919528961182, "logits/rejected": -1.646169662475586, "logps/chosen": -135.14364624023438, "logps/rejected": -273.2564697265625, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -6.158356189727783, "rewards/margins": 10.67152214050293, "rewards/rejected": -16.829879760742188, "step": 3150 }, { "epoch": 5.06, "learning_rate": 2.433610780816488e-07, "logits/chosen": -1.498049259185791, "logits/rejected": -1.4637588262557983, "logps/chosen": -166.65927124023438, "logps/rejected": -306.7194519042969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.751962184906006, "rewards/margins": 13.092897415161133, "rewards/rejected": -19.844858169555664, "step": 3151 }, { "epoch": 5.06, "learning_rate": 2.4326198969480777e-07, "logits/chosen": -1.6863857507705688, "logits/rejected": -1.7126166820526123, "logps/chosen": -128.91854858398438, "logps/rejected": -256.70367431640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.685506343841553, "rewards/margins": 12.960786819458008, "rewards/rejected": -17.646291732788086, "step": 3152 }, { "epoch": 5.06, "learning_rate": 2.431629013079667e-07, "logits/chosen": -1.6440143585205078, "logits/rejected": -1.7044243812561035, "logps/chosen": -142.89927673339844, "logps/rejected": -303.29522705078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.680419921875, "rewards/margins": 13.770174980163574, "rewards/rejected": -19.45059585571289, "step": 3153 }, { "epoch": 5.06, "learning_rate": 2.4306381292112564e-07, "logits/chosen": -1.5221821069717407, "logits/rejected": -1.5835154056549072, "logps/chosen": -120.89131164550781, "logps/rejected": -238.28048706054688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.144491195678711, "rewards/margins": 11.279291152954102, "rewards/rejected": -16.423782348632812, "step": 3154 }, { "epoch": 5.06, "learning_rate": 2.429647245342846e-07, "logits/chosen": -1.554871678352356, "logits/rejected": -1.6169588565826416, "logps/chosen": -130.958740234375, "logps/rejected": -251.32794189453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.95173978805542, "rewards/margins": 11.645973205566406, "rewards/rejected": -16.597713470458984, "step": 3155 }, { "epoch": 5.07, "learning_rate": 2.428656361474435e-07, "logits/chosen": -1.6710537672042847, "logits/rejected": -1.720177173614502, "logps/chosen": -166.33872985839844, "logps/rejected": -282.7784118652344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.695423126220703, "rewards/margins": 11.024748802185059, "rewards/rejected": -15.720171928405762, "step": 3156 }, { "epoch": 5.07, "learning_rate": 2.4276654776060246e-07, "logits/chosen": -1.653134822845459, "logits/rejected": -1.6333341598510742, "logps/chosen": -141.50177001953125, "logps/rejected": -273.343017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.305607318878174, "rewards/margins": 13.651803970336914, "rewards/rejected": -18.95741081237793, "step": 3157 }, { "epoch": 5.07, "learning_rate": 2.4266745937376137e-07, "logits/chosen": -1.4675874710083008, "logits/rejected": -1.5357871055603027, "logps/chosen": -145.43722534179688, "logps/rejected": -272.02142333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.341757774353027, "rewards/margins": 10.81241226196289, "rewards/rejected": -17.154170989990234, "step": 3158 }, { "epoch": 5.07, "learning_rate": 2.4256837098692033e-07, "logits/chosen": -1.4958487749099731, "logits/rejected": -1.5321637392044067, "logps/chosen": -139.0830535888672, "logps/rejected": -249.35081481933594, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.514171600341797, "rewards/margins": 10.439618110656738, "rewards/rejected": -16.95379066467285, "step": 3159 }, { "epoch": 5.07, "learning_rate": 2.4246928260007924e-07, "logits/chosen": -1.5937190055847168, "logits/rejected": -1.5868417024612427, "logps/chosen": -112.6384506225586, "logps/rejected": -240.93423461914062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.328816890716553, "rewards/margins": 11.782520294189453, "rewards/rejected": -16.111337661743164, "step": 3160 }, { "epoch": 5.07, "learning_rate": 2.423701942132382e-07, "logits/chosen": -1.6132572889328003, "logits/rejected": -1.5710232257843018, "logps/chosen": -167.9349822998047, "logps/rejected": -280.2166748046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.472358703613281, "rewards/margins": 12.534078598022461, "rewards/rejected": -20.006437301635742, "step": 3161 }, { "epoch": 5.08, "learning_rate": 2.4227110582639716e-07, "logits/chosen": -1.548827886581421, "logits/rejected": -1.486708164215088, "logps/chosen": -129.19781494140625, "logps/rejected": -243.1239013671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.169085502624512, "rewards/margins": 11.44007396697998, "rewards/rejected": -16.609159469604492, "step": 3162 }, { "epoch": 5.08, "learning_rate": 2.4217201743955606e-07, "logits/chosen": -1.4925241470336914, "logits/rejected": -1.4897024631500244, "logps/chosen": -171.30148315429688, "logps/rejected": -314.03851318359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.23686408996582, "rewards/margins": 15.026387214660645, "rewards/rejected": -22.26325225830078, "step": 3163 }, { "epoch": 5.08, "learning_rate": 2.42072929052715e-07, "logits/chosen": -1.6479902267456055, "logits/rejected": -1.5054610967636108, "logps/chosen": -151.65875244140625, "logps/rejected": -258.36505126953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.161971092224121, "rewards/margins": 13.31981372833252, "rewards/rejected": -18.48178482055664, "step": 3164 }, { "epoch": 5.08, "learning_rate": 2.4197384066587393e-07, "logits/chosen": -1.6587005853652954, "logits/rejected": -1.6956088542938232, "logps/chosen": -125.80198669433594, "logps/rejected": -257.9600524902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.259902000427246, "rewards/margins": 13.361173629760742, "rewards/rejected": -17.621074676513672, "step": 3165 }, { "epoch": 5.08, "learning_rate": 2.418747522790329e-07, "logits/chosen": -1.5346665382385254, "logits/rejected": -1.5596517324447632, "logps/chosen": -200.36203002929688, "logps/rejected": -317.67291259765625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -9.520515441894531, "rewards/margins": 10.853381156921387, "rewards/rejected": -20.3738956451416, "step": 3166 }, { "epoch": 5.08, "learning_rate": 2.417756638921918e-07, "logits/chosen": -1.469420075416565, "logits/rejected": -1.4556242227554321, "logps/chosen": -131.2755584716797, "logps/rejected": -303.41162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.084958076477051, "rewards/margins": 13.508155822753906, "rewards/rejected": -17.59311294555664, "step": 3167 }, { "epoch": 5.09, "learning_rate": 2.4167657550535076e-07, "logits/chosen": -1.4631619453430176, "logits/rejected": -1.5311259031295776, "logps/chosen": -145.0003662109375, "logps/rejected": -245.78659057617188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.472144603729248, "rewards/margins": 9.785760879516602, "rewards/rejected": -16.257904052734375, "step": 3168 }, { "epoch": 5.09, "learning_rate": 2.415774871185097e-07, "logits/chosen": -1.625868558883667, "logits/rejected": -1.655190110206604, "logps/chosen": -119.47862243652344, "logps/rejected": -231.51852416992188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.8015151023864746, "rewards/margins": 11.319512367248535, "rewards/rejected": -15.121026992797852, "step": 3169 }, { "epoch": 5.09, "learning_rate": 2.414783987316686e-07, "logits/chosen": -1.4148235321044922, "logits/rejected": -1.5776946544647217, "logps/chosen": -112.34758758544922, "logps/rejected": -229.85975646972656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.333334445953369, "rewards/margins": 9.78282642364502, "rewards/rejected": -14.116161346435547, "step": 3170 }, { "epoch": 5.09, "learning_rate": 2.413793103448276e-07, "logits/chosen": -1.6155585050582886, "logits/rejected": -1.5806667804718018, "logps/chosen": -170.17526245117188, "logps/rejected": -273.2729797363281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.624184608459473, "rewards/margins": 11.053913116455078, "rewards/rejected": -17.678096771240234, "step": 3171 }, { "epoch": 5.09, "learning_rate": 2.412802219579865e-07, "logits/chosen": -1.6264030933380127, "logits/rejected": -1.5830748081207275, "logps/chosen": -146.99046325683594, "logps/rejected": -300.71966552734375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.727875232696533, "rewards/margins": 15.240476608276367, "rewards/rejected": -20.968353271484375, "step": 3172 }, { "epoch": 5.09, "learning_rate": 2.4118113357114545e-07, "logits/chosen": -1.4874294996261597, "logits/rejected": -1.6085609197616577, "logps/chosen": -158.98423767089844, "logps/rejected": -268.0056457519531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.468179225921631, "rewards/margins": 10.704697608947754, "rewards/rejected": -18.172876358032227, "step": 3173 }, { "epoch": 5.09, "learning_rate": 2.410820451843044e-07, "logits/chosen": -1.5545432567596436, "logits/rejected": -1.6085259914398193, "logps/chosen": -108.6632308959961, "logps/rejected": -287.2519226074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9405882358551025, "rewards/margins": 15.286620140075684, "rewards/rejected": -19.227209091186523, "step": 3174 }, { "epoch": 5.1, "learning_rate": 2.409829567974633e-07, "logits/chosen": -1.6072496175765991, "logits/rejected": -1.5307139158248901, "logps/chosen": -168.88690185546875, "logps/rejected": -244.57989501953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.488372802734375, "rewards/margins": 8.880040168762207, "rewards/rejected": -15.368412971496582, "step": 3175 }, { "epoch": 5.1, "learning_rate": 2.408838684106223e-07, "logits/chosen": -1.3584026098251343, "logits/rejected": -1.3968713283538818, "logps/chosen": -149.29000854492188, "logps/rejected": -283.7102355957031, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -7.038351058959961, "rewards/margins": 13.755119323730469, "rewards/rejected": -20.79347038269043, "step": 3176 }, { "epoch": 5.1, "learning_rate": 2.407847800237812e-07, "logits/chosen": -1.6401853561401367, "logits/rejected": -1.5146300792694092, "logps/chosen": -199.17617797851562, "logps/rejected": -282.709716796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.502864360809326, "rewards/margins": 10.344626426696777, "rewards/rejected": -17.847490310668945, "step": 3177 }, { "epoch": 5.1, "learning_rate": 2.4068569163694014e-07, "logits/chosen": -1.6261147260665894, "logits/rejected": -1.6783071756362915, "logps/chosen": -195.93594360351562, "logps/rejected": -296.75115966796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.813990592956543, "rewards/margins": 9.98478889465332, "rewards/rejected": -16.798778533935547, "step": 3178 }, { "epoch": 5.1, "learning_rate": 2.405866032500991e-07, "logits/chosen": -1.5799000263214111, "logits/rejected": -1.5825241804122925, "logps/chosen": -116.3267593383789, "logps/rejected": -216.80186462402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.04426383972168, "rewards/margins": 9.95551872253418, "rewards/rejected": -13.999783515930176, "step": 3179 }, { "epoch": 5.1, "learning_rate": 2.40487514863258e-07, "logits/chosen": -1.7790751457214355, "logits/rejected": -1.7287636995315552, "logps/chosen": -162.95701599121094, "logps/rejected": -292.09710693359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.054365158081055, "rewards/margins": 14.684898376464844, "rewards/rejected": -21.73926544189453, "step": 3180 }, { "epoch": 5.11, "learning_rate": 2.4038842647641697e-07, "logits/chosen": -1.5564777851104736, "logits/rejected": -1.4983683824539185, "logps/chosen": -153.91122436523438, "logps/rejected": -304.90020751953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.46242094039917, "rewards/margins": 13.035486221313477, "rewards/rejected": -18.497905731201172, "step": 3181 }, { "epoch": 5.11, "learning_rate": 2.402893380895759e-07, "logits/chosen": -1.5475441217422485, "logits/rejected": -1.5356594324111938, "logps/chosen": -147.68112182617188, "logps/rejected": -248.908935546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.369869232177734, "rewards/margins": 9.40334415435791, "rewards/rejected": -16.773212432861328, "step": 3182 }, { "epoch": 5.11, "learning_rate": 2.4019024970273484e-07, "logits/chosen": -1.5567400455474854, "logits/rejected": -1.5901281833648682, "logps/chosen": -165.19375610351562, "logps/rejected": -299.53887939453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.687152862548828, "rewards/margins": 11.854867935180664, "rewards/rejected": -19.542020797729492, "step": 3183 }, { "epoch": 5.11, "learning_rate": 2.400911613158938e-07, "logits/chosen": -1.5024412870407104, "logits/rejected": -1.550827980041504, "logps/chosen": -143.9988555908203, "logps/rejected": -267.4727783203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.880146026611328, "rewards/margins": 11.272326469421387, "rewards/rejected": -17.15247344970703, "step": 3184 }, { "epoch": 5.11, "learning_rate": 2.399920729290527e-07, "logits/chosen": -1.5089281797409058, "logits/rejected": -1.4820448160171509, "logps/chosen": -104.75028228759766, "logps/rejected": -213.5843505859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3720955848693848, "rewards/margins": 10.765411376953125, "rewards/rejected": -14.137507438659668, "step": 3185 }, { "epoch": 5.11, "learning_rate": 2.398929845422116e-07, "logits/chosen": -1.5917717218399048, "logits/rejected": -1.5431129932403564, "logps/chosen": -97.7753677368164, "logps/rejected": -204.78872680664062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.330921173095703, "rewards/margins": 11.401549339294434, "rewards/rejected": -13.732471466064453, "step": 3186 }, { "epoch": 5.12, "learning_rate": 2.3979389615537057e-07, "logits/chosen": -1.54288649559021, "logits/rejected": -1.52110755443573, "logps/chosen": -131.3324737548828, "logps/rejected": -243.46820068359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.9985809326171875, "rewards/margins": 11.441302299499512, "rewards/rejected": -15.439882278442383, "step": 3187 }, { "epoch": 5.12, "learning_rate": 2.3969480776852953e-07, "logits/chosen": -1.559395432472229, "logits/rejected": -1.570784330368042, "logps/chosen": -137.42257690429688, "logps/rejected": -250.772216796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.311646938323975, "rewards/margins": 11.314321517944336, "rewards/rejected": -16.62596893310547, "step": 3188 }, { "epoch": 5.12, "learning_rate": 2.3959571938168843e-07, "logits/chosen": -1.4227893352508545, "logits/rejected": -1.4221336841583252, "logps/chosen": -161.3375244140625, "logps/rejected": -251.66964721679688, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -8.020692825317383, "rewards/margins": 9.012049674987793, "rewards/rejected": -17.03274154663086, "step": 3189 }, { "epoch": 5.12, "learning_rate": 2.394966309948474e-07, "logits/chosen": -1.6116386651992798, "logits/rejected": -1.6298054456710815, "logps/chosen": -136.89144897460938, "logps/rejected": -299.12750244140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.895498275756836, "rewards/margins": 15.831296920776367, "rewards/rejected": -20.726795196533203, "step": 3190 }, { "epoch": 5.12, "learning_rate": 2.393975426080063e-07, "logits/chosen": -1.624500036239624, "logits/rejected": -1.7062586545944214, "logps/chosen": -158.8345947265625, "logps/rejected": -289.94500732421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.207781791687012, "rewards/margins": 12.679265975952148, "rewards/rejected": -18.887046813964844, "step": 3191 }, { "epoch": 5.12, "learning_rate": 2.3929845422116526e-07, "logits/chosen": -1.6003546714782715, "logits/rejected": -1.613721489906311, "logps/chosen": -154.72366333007812, "logps/rejected": -287.6579895019531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.323756217956543, "rewards/margins": 13.252193450927734, "rewards/rejected": -19.575950622558594, "step": 3192 }, { "epoch": 5.13, "learning_rate": 2.391993658343242e-07, "logits/chosen": -1.550842046737671, "logits/rejected": -1.6385729312896729, "logps/chosen": -100.74845886230469, "logps/rejected": -266.0341796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.272961139678955, "rewards/margins": 14.451205253601074, "rewards/rejected": -16.724166870117188, "step": 3193 }, { "epoch": 5.13, "learning_rate": 2.3910027744748313e-07, "logits/chosen": -1.4961446523666382, "logits/rejected": -1.4985783100128174, "logps/chosen": -145.59654235839844, "logps/rejected": -288.4313049316406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.940141677856445, "rewards/margins": 14.661964416503906, "rewards/rejected": -21.60210609436035, "step": 3194 }, { "epoch": 5.13, "learning_rate": 2.390011890606421e-07, "logits/chosen": -1.419656753540039, "logits/rejected": -1.4520695209503174, "logps/chosen": -123.42374420166016, "logps/rejected": -256.1627197265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6006455421447754, "rewards/margins": 13.859405517578125, "rewards/rejected": -17.460050582885742, "step": 3195 }, { "epoch": 5.13, "learning_rate": 2.38902100673801e-07, "logits/chosen": -1.5750820636749268, "logits/rejected": -1.584234595298767, "logps/chosen": -126.82999420166016, "logps/rejected": -229.6516571044922, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.250291347503662, "rewards/margins": 10.041524887084961, "rewards/rejected": -14.291815757751465, "step": 3196 }, { "epoch": 5.13, "learning_rate": 2.3880301228695995e-07, "logits/chosen": -1.646533727645874, "logits/rejected": -1.6222277879714966, "logps/chosen": -158.57931518554688, "logps/rejected": -277.1866455078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.46047306060791, "rewards/margins": 12.738115310668945, "rewards/rejected": -20.19858741760254, "step": 3197 }, { "epoch": 5.13, "learning_rate": 2.387039239001189e-07, "logits/chosen": -1.5066620111465454, "logits/rejected": -1.5057357549667358, "logps/chosen": -148.2268829345703, "logps/rejected": -294.7750244140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.161953449249268, "rewards/margins": 14.586830139160156, "rewards/rejected": -21.748783111572266, "step": 3198 }, { "epoch": 5.13, "learning_rate": 2.386048355132778e-07, "logits/chosen": -1.6287343502044678, "logits/rejected": -1.6164053678512573, "logps/chosen": -164.13414001464844, "logps/rejected": -249.60653686523438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.410094738006592, "rewards/margins": 10.765108108520508, "rewards/rejected": -17.17520523071289, "step": 3199 }, { "epoch": 5.14, "learning_rate": 2.385057471264368e-07, "logits/chosen": -1.4799693822860718, "logits/rejected": -1.5283031463623047, "logps/chosen": -146.6845703125, "logps/rejected": -269.7674560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.749364852905273, "rewards/margins": 11.234390258789062, "rewards/rejected": -16.983755111694336, "step": 3200 }, { "epoch": 5.14, "learning_rate": 2.3840665873959571e-07, "logits/chosen": -1.5180721282958984, "logits/rejected": -1.5383292436599731, "logps/chosen": -140.6108856201172, "logps/rejected": -295.345458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.321572303771973, "rewards/margins": 13.186603546142578, "rewards/rejected": -18.508174896240234, "step": 3201 }, { "epoch": 5.14, "learning_rate": 2.3830757035275465e-07, "logits/chosen": -1.6529362201690674, "logits/rejected": -1.6601307392120361, "logps/chosen": -145.5390625, "logps/rejected": -277.162841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.232569694519043, "rewards/margins": 13.778791427612305, "rewards/rejected": -19.01136016845703, "step": 3202 }, { "epoch": 5.14, "learning_rate": 2.3820848196591358e-07, "logits/chosen": -1.4580551385879517, "logits/rejected": -1.5134916305541992, "logps/chosen": -106.58941650390625, "logps/rejected": -242.60333251953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.5802881717681885, "rewards/margins": 10.833939552307129, "rewards/rejected": -14.414228439331055, "step": 3203 }, { "epoch": 5.14, "learning_rate": 2.3810939357907254e-07, "logits/chosen": -1.5074307918548584, "logits/rejected": -1.5751277208328247, "logps/chosen": -147.79502868652344, "logps/rejected": -249.8145751953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.987096786499023, "rewards/margins": 10.71384334564209, "rewards/rejected": -16.700939178466797, "step": 3204 }, { "epoch": 5.14, "learning_rate": 2.3801030519223145e-07, "logits/chosen": -1.5912379026412964, "logits/rejected": -1.605468988418579, "logps/chosen": -127.42286682128906, "logps/rejected": -254.30679321289062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.925098896026611, "rewards/margins": 12.603862762451172, "rewards/rejected": -17.528961181640625, "step": 3205 }, { "epoch": 5.15, "learning_rate": 2.3791121680539038e-07, "logits/chosen": -1.432645320892334, "logits/rejected": -1.4523851871490479, "logps/chosen": -142.0499267578125, "logps/rejected": -309.50653076171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.040424823760986, "rewards/margins": 14.45701789855957, "rewards/rejected": -20.4974422454834, "step": 3206 }, { "epoch": 5.15, "learning_rate": 2.3781212841854934e-07, "logits/chosen": -1.5784976482391357, "logits/rejected": -1.5892794132232666, "logps/chosen": -84.11479949951172, "logps/rejected": -244.76019287109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9428043365478516, "rewards/margins": 15.518158912658691, "rewards/rejected": -17.460962295532227, "step": 3207 }, { "epoch": 5.15, "learning_rate": 2.3771304003170827e-07, "logits/chosen": -1.5341260433197021, "logits/rejected": -1.5225542783737183, "logps/chosen": -169.13433837890625, "logps/rejected": -288.4042663574219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.703637599945068, "rewards/margins": 12.756125450134277, "rewards/rejected": -19.459762573242188, "step": 3208 }, { "epoch": 5.15, "learning_rate": 2.3761395164486723e-07, "logits/chosen": -1.478395938873291, "logits/rejected": -1.5561151504516602, "logps/chosen": -131.5529022216797, "logps/rejected": -290.9166564941406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7935383319854736, "rewards/margins": 15.696136474609375, "rewards/rejected": -19.489673614501953, "step": 3209 }, { "epoch": 5.15, "learning_rate": 2.3751486325802614e-07, "logits/chosen": -1.643330454826355, "logits/rejected": -1.7135322093963623, "logps/chosen": -144.00119018554688, "logps/rejected": -237.15234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.963705062866211, "rewards/margins": 10.236708641052246, "rewards/rejected": -16.200414657592773, "step": 3210 }, { "epoch": 5.15, "learning_rate": 2.3741577487118507e-07, "logits/chosen": -1.4808939695358276, "logits/rejected": -1.4823921918869019, "logps/chosen": -152.72988891601562, "logps/rejected": -240.95889282226562, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -6.707036972045898, "rewards/margins": 9.11284351348877, "rewards/rejected": -15.819881439208984, "step": 3211 }, { "epoch": 5.16, "learning_rate": 2.3731668648434403e-07, "logits/chosen": -1.6992356777191162, "logits/rejected": -1.7742265462875366, "logps/chosen": -107.19584655761719, "logps/rejected": -208.1270751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.9430060386657715, "rewards/margins": 10.682143211364746, "rewards/rejected": -13.625149726867676, "step": 3212 }, { "epoch": 5.16, "learning_rate": 2.3721759809750297e-07, "logits/chosen": -1.4266363382339478, "logits/rejected": -1.4660193920135498, "logps/chosen": -150.39674377441406, "logps/rejected": -326.59552001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.167367935180664, "rewards/margins": 17.413372039794922, "rewards/rejected": -23.58074188232422, "step": 3213 }, { "epoch": 5.16, "learning_rate": 2.371185097106619e-07, "logits/chosen": -1.6089842319488525, "logits/rejected": -1.5871546268463135, "logps/chosen": -114.25248718261719, "logps/rejected": -219.93336486816406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5139923095703125, "rewards/margins": 11.068422317504883, "rewards/rejected": -14.582414627075195, "step": 3214 }, { "epoch": 5.16, "learning_rate": 2.3701942132382083e-07, "logits/chosen": -1.4985086917877197, "logits/rejected": -1.544895052909851, "logps/chosen": -203.76171875, "logps/rejected": -313.0286865234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.600740432739258, "rewards/margins": 10.468510627746582, "rewards/rejected": -20.069250106811523, "step": 3215 }, { "epoch": 5.16, "learning_rate": 2.3692033293697977e-07, "logits/chosen": -1.5833156108856201, "logits/rejected": -1.6952776908874512, "logps/chosen": -115.45599365234375, "logps/rejected": -243.67604064941406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.594620704650879, "rewards/margins": 11.863938331604004, "rewards/rejected": -17.458559036254883, "step": 3216 }, { "epoch": 5.16, "learning_rate": 2.3682124455013873e-07, "logits/chosen": -1.4744211435317993, "logits/rejected": -1.4402027130126953, "logps/chosen": -122.39469909667969, "logps/rejected": -213.79937744140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.751378059387207, "rewards/margins": 10.528996467590332, "rewards/rejected": -14.280374526977539, "step": 3217 }, { "epoch": 5.17, "learning_rate": 2.3672215616329766e-07, "logits/chosen": -1.4203299283981323, "logits/rejected": -1.5032073259353638, "logps/chosen": -117.93722534179688, "logps/rejected": -296.6234130859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.513025283813477, "rewards/margins": 15.677573204040527, "rewards/rejected": -21.190597534179688, "step": 3218 }, { "epoch": 5.17, "learning_rate": 2.3662306777645657e-07, "logits/chosen": -1.5693271160125732, "logits/rejected": -1.5620808601379395, "logps/chosen": -161.18385314941406, "logps/rejected": -257.6756591796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.115326881408691, "rewards/margins": 10.601192474365234, "rewards/rejected": -16.71651840209961, "step": 3219 }, { "epoch": 5.17, "learning_rate": 2.3652397938961553e-07, "logits/chosen": -1.5126863718032837, "logits/rejected": -1.5029302835464478, "logps/chosen": -132.8124237060547, "logps/rejected": -271.66888427734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.430583477020264, "rewards/margins": 13.23404598236084, "rewards/rejected": -18.664630889892578, "step": 3220 }, { "epoch": 5.17, "learning_rate": 2.3642489100277446e-07, "logits/chosen": -1.591031551361084, "logits/rejected": -1.5691275596618652, "logps/chosen": -119.74256896972656, "logps/rejected": -261.3751220703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.776947975158691, "rewards/margins": 14.37158489227295, "rewards/rejected": -19.14853286743164, "step": 3221 }, { "epoch": 5.17, "learning_rate": 2.3632580261593342e-07, "logits/chosen": -1.4939556121826172, "logits/rejected": -1.4509109258651733, "logps/chosen": -126.8677978515625, "logps/rejected": -199.62594604492188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.6359381675720215, "rewards/margins": 7.349055767059326, "rewards/rejected": -11.984993934631348, "step": 3222 }, { "epoch": 5.17, "learning_rate": 2.3622671422909235e-07, "logits/chosen": -1.5984020233154297, "logits/rejected": -1.5163812637329102, "logps/chosen": -205.803466796875, "logps/rejected": -278.2143859863281, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.173091888427734, "rewards/margins": 9.750670433044434, "rewards/rejected": -18.923763275146484, "step": 3223 }, { "epoch": 5.17, "learning_rate": 2.3612762584225126e-07, "logits/chosen": -1.5635510683059692, "logits/rejected": -1.5466606616973877, "logps/chosen": -132.71099853515625, "logps/rejected": -317.65966796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.270294666290283, "rewards/margins": 17.413549423217773, "rewards/rejected": -21.6838436126709, "step": 3224 }, { "epoch": 5.18, "learning_rate": 2.3602853745541022e-07, "logits/chosen": -1.6111106872558594, "logits/rejected": -1.6414375305175781, "logps/chosen": -124.83035278320312, "logps/rejected": -252.02389526367188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.641130447387695, "rewards/margins": 13.044724464416504, "rewards/rejected": -17.685855865478516, "step": 3225 }, { "epoch": 5.18, "learning_rate": 2.3592944906856915e-07, "logits/chosen": -1.4124680757522583, "logits/rejected": -1.4615193605422974, "logps/chosen": -167.5802459716797, "logps/rejected": -281.37481689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.54211950302124, "rewards/margins": 11.974138259887695, "rewards/rejected": -18.516258239746094, "step": 3226 }, { "epoch": 5.18, "learning_rate": 2.3583036068172809e-07, "logits/chosen": -1.5986031293869019, "logits/rejected": -1.6610945463180542, "logps/chosen": -141.59658813476562, "logps/rejected": -285.4009704589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.51434326171875, "rewards/margins": 14.436666488647461, "rewards/rejected": -18.95100975036621, "step": 3227 }, { "epoch": 5.18, "learning_rate": 2.3573127229488705e-07, "logits/chosen": -1.4598937034606934, "logits/rejected": -1.487381935119629, "logps/chosen": -182.51158142089844, "logps/rejected": -309.7664794921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.192888259887695, "rewards/margins": 11.046586990356445, "rewards/rejected": -19.23947525024414, "step": 3228 }, { "epoch": 5.18, "learning_rate": 2.3563218390804595e-07, "logits/chosen": -1.6894848346710205, "logits/rejected": -1.6161319017410278, "logps/chosen": -169.37271118164062, "logps/rejected": -266.62249755859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.891951084136963, "rewards/margins": 11.862787246704102, "rewards/rejected": -18.754737854003906, "step": 3229 }, { "epoch": 5.18, "learning_rate": 2.355330955212049e-07, "logits/chosen": -1.5411924123764038, "logits/rejected": -1.6398751735687256, "logps/chosen": -165.51388549804688, "logps/rejected": -267.7999267578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.744457244873047, "rewards/margins": 10.326869010925293, "rewards/rejected": -18.071327209472656, "step": 3230 }, { "epoch": 5.19, "learning_rate": 2.3543400713436385e-07, "logits/chosen": -1.5037921667099, "logits/rejected": -1.5333102941513062, "logps/chosen": -149.53155517578125, "logps/rejected": -303.3439025878906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.761889934539795, "rewards/margins": 13.212581634521484, "rewards/rejected": -19.974472045898438, "step": 3231 }, { "epoch": 5.19, "learning_rate": 2.3533491874752278e-07, "logits/chosen": -1.675283670425415, "logits/rejected": -1.6726597547531128, "logps/chosen": -108.67935180664062, "logps/rejected": -214.2249298095703, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.9023025035858154, "rewards/margins": 11.04350757598877, "rewards/rejected": -13.945810317993164, "step": 3232 }, { "epoch": 5.19, "learning_rate": 2.3523583036068174e-07, "logits/chosen": -1.4788655042648315, "logits/rejected": -1.407546043395996, "logps/chosen": -163.31475830078125, "logps/rejected": -283.85540771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.411073684692383, "rewards/margins": 11.93958854675293, "rewards/rejected": -18.350662231445312, "step": 3233 }, { "epoch": 5.19, "learning_rate": 2.3513674197384065e-07, "logits/chosen": -1.5954835414886475, "logits/rejected": -1.5625483989715576, "logps/chosen": -134.38287353515625, "logps/rejected": -241.89967346191406, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -6.399716377258301, "rewards/margins": 9.627535820007324, "rewards/rejected": -16.027252197265625, "step": 3234 }, { "epoch": 5.19, "learning_rate": 2.3503765358699958e-07, "logits/chosen": -1.6475552320480347, "logits/rejected": -1.6721241474151611, "logps/chosen": -176.6290740966797, "logps/rejected": -307.8644104003906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.776479244232178, "rewards/margins": 12.946535110473633, "rewards/rejected": -20.72301483154297, "step": 3235 }, { "epoch": 5.19, "learning_rate": 2.3493856520015854e-07, "logits/chosen": -1.3998464345932007, "logits/rejected": -1.5255343914031982, "logps/chosen": -152.8020477294922, "logps/rejected": -280.48681640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.6975202560424805, "rewards/margins": 11.225238800048828, "rewards/rejected": -17.922760009765625, "step": 3236 }, { "epoch": 5.2, "learning_rate": 2.3483947681331747e-07, "logits/chosen": -1.594093918800354, "logits/rejected": -1.7372207641601562, "logps/chosen": -108.70181274414062, "logps/rejected": -230.80320739746094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.338521480560303, "rewards/margins": 11.556549072265625, "rewards/rejected": -15.895071029663086, "step": 3237 }, { "epoch": 5.2, "learning_rate": 2.347403884264764e-07, "logits/chosen": -1.476464033126831, "logits/rejected": -1.5167592763900757, "logps/chosen": -186.84690856933594, "logps/rejected": -275.9740295410156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.133797645568848, "rewards/margins": 9.393763542175293, "rewards/rejected": -18.52756118774414, "step": 3238 }, { "epoch": 5.2, "learning_rate": 2.3464130003963534e-07, "logits/chosen": -1.7026022672653198, "logits/rejected": -1.6584250926971436, "logps/chosen": -160.26150512695312, "logps/rejected": -283.57635498046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.77739143371582, "rewards/margins": 13.933647155761719, "rewards/rejected": -18.71103858947754, "step": 3239 }, { "epoch": 5.2, "learning_rate": 2.3454221165279427e-07, "logits/chosen": -1.472076177597046, "logits/rejected": -1.4352281093597412, "logps/chosen": -167.53164672851562, "logps/rejected": -320.10174560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.890683650970459, "rewards/margins": 14.155416488647461, "rewards/rejected": -22.046100616455078, "step": 3240 }, { "epoch": 5.2, "learning_rate": 2.3444312326595323e-07, "logits/chosen": -1.5412604808807373, "logits/rejected": -1.453109860420227, "logps/chosen": -185.27133178710938, "logps/rejected": -314.72479248046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -9.390541076660156, "rewards/margins": 13.547540664672852, "rewards/rejected": -22.938081741333008, "step": 3241 }, { "epoch": 5.2, "learning_rate": 2.3434403487911216e-07, "logits/chosen": -1.6711206436157227, "logits/rejected": -1.6967322826385498, "logps/chosen": -148.71234130859375, "logps/rejected": -305.2937316894531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.829867362976074, "rewards/margins": 13.141830444335938, "rewards/rejected": -19.971698760986328, "step": 3242 }, { "epoch": 5.21, "learning_rate": 2.3424494649227107e-07, "logits/chosen": -1.63736891746521, "logits/rejected": -1.6895978450775146, "logps/chosen": -155.7957000732422, "logps/rejected": -304.1769714355469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.213360786437988, "rewards/margins": 14.222009658813477, "rewards/rejected": -22.43536949157715, "step": 3243 }, { "epoch": 5.21, "learning_rate": 2.3414585810543003e-07, "logits/chosen": -1.5346466302871704, "logits/rejected": -1.4475499391555786, "logps/chosen": -156.03607177734375, "logps/rejected": -272.4963684082031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.2712812423706055, "rewards/margins": 11.385296821594238, "rewards/rejected": -17.656578063964844, "step": 3244 }, { "epoch": 5.21, "learning_rate": 2.3404676971858896e-07, "logits/chosen": -1.5307530164718628, "logits/rejected": -1.6006977558135986, "logps/chosen": -132.73867797851562, "logps/rejected": -228.21435546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.018735885620117, "rewards/margins": 8.67677116394043, "rewards/rejected": -13.695507049560547, "step": 3245 }, { "epoch": 5.21, "learning_rate": 2.3394768133174792e-07, "logits/chosen": -1.3633646965026855, "logits/rejected": -1.4136271476745605, "logps/chosen": -132.14773559570312, "logps/rejected": -255.9661865234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.652203559875488, "rewards/margins": 10.587190628051758, "rewards/rejected": -17.239395141601562, "step": 3246 }, { "epoch": 5.21, "learning_rate": 2.3384859294490686e-07, "logits/chosen": -1.5207713842391968, "logits/rejected": -1.5364586114883423, "logps/chosen": -125.44482421875, "logps/rejected": -233.26730346679688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.150960922241211, "rewards/margins": 9.541621208190918, "rewards/rejected": -14.692583084106445, "step": 3247 }, { "epoch": 5.21, "learning_rate": 2.3374950455806576e-07, "logits/chosen": -1.5898964405059814, "logits/rejected": -1.6817491054534912, "logps/chosen": -130.79428100585938, "logps/rejected": -329.93597412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.431640625, "rewards/margins": 16.04582405090332, "rewards/rejected": -22.477462768554688, "step": 3248 }, { "epoch": 5.22, "learning_rate": 2.3365041617122472e-07, "logits/chosen": -1.7000527381896973, "logits/rejected": -1.6941851377487183, "logps/chosen": -140.56613159179688, "logps/rejected": -214.4590606689453, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.589258193969727, "rewards/margins": 9.570714950561523, "rewards/rejected": -14.159974098205566, "step": 3249 }, { "epoch": 5.22, "learning_rate": 2.3355132778438366e-07, "logits/chosen": -1.5193397998809814, "logits/rejected": -1.5657575130462646, "logps/chosen": -185.35009765625, "logps/rejected": -344.8037414550781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.23292350769043, "rewards/margins": 15.275766372680664, "rewards/rejected": -24.508689880371094, "step": 3250 }, { "epoch": 5.22, "learning_rate": 2.3345223939754262e-07, "logits/chosen": -1.5152623653411865, "logits/rejected": -1.607582926750183, "logps/chosen": -165.06732177734375, "logps/rejected": -300.64239501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.504597663879395, "rewards/margins": 12.122743606567383, "rewards/rejected": -21.627342224121094, "step": 3251 }, { "epoch": 5.22, "learning_rate": 2.3335315101070155e-07, "logits/chosen": -1.5840299129486084, "logits/rejected": -1.6357758045196533, "logps/chosen": -127.7243881225586, "logps/rejected": -240.23248291015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.148174285888672, "rewards/margins": 10.319061279296875, "rewards/rejected": -15.467235565185547, "step": 3252 }, { "epoch": 5.22, "learning_rate": 2.3325406262386046e-07, "logits/chosen": -1.5520362854003906, "logits/rejected": -1.5606131553649902, "logps/chosen": -146.765380859375, "logps/rejected": -264.03167724609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.288313865661621, "rewards/margins": 11.18571662902832, "rewards/rejected": -16.474029541015625, "step": 3253 }, { "epoch": 5.22, "learning_rate": 2.3315497423701942e-07, "logits/chosen": -1.418746829032898, "logits/rejected": -1.3575607538223267, "logps/chosen": -156.94210815429688, "logps/rejected": -277.6868896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.608823299407959, "rewards/margins": 12.016447067260742, "rewards/rejected": -19.62527084350586, "step": 3254 }, { "epoch": 5.22, "learning_rate": 2.3305588585017835e-07, "logits/chosen": -1.7951960563659668, "logits/rejected": -1.7447463274002075, "logps/chosen": -178.4369354248047, "logps/rejected": -295.24310302734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.495842456817627, "rewards/margins": 12.618724822998047, "rewards/rejected": -18.114566802978516, "step": 3255 }, { "epoch": 5.23, "learning_rate": 2.3295679746333728e-07, "logits/chosen": -1.7019855976104736, "logits/rejected": -1.6987783908843994, "logps/chosen": -137.5904083251953, "logps/rejected": -266.77313232421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.693784713745117, "rewards/margins": 13.427701950073242, "rewards/rejected": -18.12148666381836, "step": 3256 }, { "epoch": 5.23, "learning_rate": 2.3285770907649622e-07, "logits/chosen": -1.5488239526748657, "logits/rejected": -1.5518592596054077, "logps/chosen": -161.21530151367188, "logps/rejected": -262.62506103515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.895386219024658, "rewards/margins": 11.002251625061035, "rewards/rejected": -17.89763641357422, "step": 3257 }, { "epoch": 5.23, "learning_rate": 2.3275862068965515e-07, "logits/chosen": -1.467268466949463, "logits/rejected": -1.4266059398651123, "logps/chosen": -167.54747009277344, "logps/rejected": -259.21722412109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.52095365524292, "rewards/margins": 10.596217155456543, "rewards/rejected": -17.117170333862305, "step": 3258 }, { "epoch": 5.23, "learning_rate": 2.326595323028141e-07, "logits/chosen": -1.352489948272705, "logits/rejected": -1.4461692571640015, "logps/chosen": -150.4564666748047, "logps/rejected": -260.1969909667969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.805381774902344, "rewards/margins": 8.863666534423828, "rewards/rejected": -15.669048309326172, "step": 3259 }, { "epoch": 5.23, "learning_rate": 2.3256044391597304e-07, "logits/chosen": -1.535402774810791, "logits/rejected": -1.5595383644104004, "logps/chosen": -118.28038024902344, "logps/rejected": -230.89389038085938, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.7228240966796875, "rewards/margins": 10.820110321044922, "rewards/rejected": -15.54293441772461, "step": 3260 }, { "epoch": 5.23, "learning_rate": 2.3246135552913198e-07, "logits/chosen": -1.5762367248535156, "logits/rejected": -1.5843279361724854, "logps/chosen": -178.55323791503906, "logps/rejected": -325.1698913574219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.951789379119873, "rewards/margins": 13.694095611572266, "rewards/rejected": -21.645885467529297, "step": 3261 }, { "epoch": 5.24, "learning_rate": 2.323622671422909e-07, "logits/chosen": -1.4012043476104736, "logits/rejected": -1.6242129802703857, "logps/chosen": -112.91183471679688, "logps/rejected": -305.6136169433594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.708033800125122, "rewards/margins": 13.885345458984375, "rewards/rejected": -17.593379974365234, "step": 3262 }, { "epoch": 5.24, "learning_rate": 2.3226317875544984e-07, "logits/chosen": -1.7395079135894775, "logits/rejected": -1.6762055158615112, "logps/chosen": -124.63433837890625, "logps/rejected": -242.65408325195312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.523141384124756, "rewards/margins": 12.231035232543945, "rewards/rejected": -16.75417709350586, "step": 3263 }, { "epoch": 5.24, "learning_rate": 2.3216409036860878e-07, "logits/chosen": -1.611687183380127, "logits/rejected": -1.5990633964538574, "logps/chosen": -154.560546875, "logps/rejected": -262.45916748046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.212143898010254, "rewards/margins": 10.791460990905762, "rewards/rejected": -17.003604888916016, "step": 3264 }, { "epoch": 5.24, "learning_rate": 2.3206500198176774e-07, "logits/chosen": -1.494474172592163, "logits/rejected": -1.493428349494934, "logps/chosen": -182.43023681640625, "logps/rejected": -310.861328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.06014347076416, "rewards/margins": 11.876724243164062, "rewards/rejected": -19.93686866760254, "step": 3265 }, { "epoch": 5.24, "learning_rate": 2.3196591359492667e-07, "logits/chosen": -1.4659819602966309, "logits/rejected": -1.4336625337600708, "logps/chosen": -120.85904693603516, "logps/rejected": -222.517578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.353159427642822, "rewards/margins": 10.461543083190918, "rewards/rejected": -14.814702987670898, "step": 3266 }, { "epoch": 5.24, "learning_rate": 2.318668252080856e-07, "logits/chosen": -1.7432674169540405, "logits/rejected": -1.617167592048645, "logps/chosen": -130.8292236328125, "logps/rejected": -244.19369506835938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7796995639801025, "rewards/margins": 14.482709884643555, "rewards/rejected": -16.262409210205078, "step": 3267 }, { "epoch": 5.25, "learning_rate": 2.3176773682124454e-07, "logits/chosen": -1.6015013456344604, "logits/rejected": -1.5804815292358398, "logps/chosen": -110.40737915039062, "logps/rejected": -279.0521240234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.332005023956299, "rewards/margins": 15.928768157958984, "rewards/rejected": -20.260772705078125, "step": 3268 }, { "epoch": 5.25, "learning_rate": 2.3166864843440347e-07, "logits/chosen": -1.5537872314453125, "logits/rejected": -1.5661128759384155, "logps/chosen": -180.87454223632812, "logps/rejected": -294.96343994140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.87488842010498, "rewards/margins": 11.79098129272461, "rewards/rejected": -20.665868759155273, "step": 3269 }, { "epoch": 5.25, "learning_rate": 2.3156956004756243e-07, "logits/chosen": -1.4591339826583862, "logits/rejected": -1.4485584497451782, "logps/chosen": -162.53550720214844, "logps/rejected": -280.2856140136719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.929163455963135, "rewards/margins": 10.754241943359375, "rewards/rejected": -18.68340492248535, "step": 3270 }, { "epoch": 5.25, "learning_rate": 2.3147047166072134e-07, "logits/chosen": -1.390628457069397, "logits/rejected": -1.3830832242965698, "logps/chosen": -157.52175903320312, "logps/rejected": -244.1839599609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.661186218261719, "rewards/margins": 9.477179527282715, "rewards/rejected": -16.138364791870117, "step": 3271 }, { "epoch": 5.25, "learning_rate": 2.313713832738803e-07, "logits/chosen": -1.5687041282653809, "logits/rejected": -1.5763561725616455, "logps/chosen": -173.70741271972656, "logps/rejected": -299.7364807128906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.241046905517578, "rewards/margins": 12.588224411010742, "rewards/rejected": -20.829273223876953, "step": 3272 }, { "epoch": 5.25, "learning_rate": 2.3127229488703923e-07, "logits/chosen": -1.5640531778335571, "logits/rejected": -1.5544629096984863, "logps/chosen": -110.02836608886719, "logps/rejected": -266.82366943359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2608039379119873, "rewards/margins": 14.549251556396484, "rewards/rejected": -17.810054779052734, "step": 3273 }, { "epoch": 5.26, "learning_rate": 2.3117320650019816e-07, "logits/chosen": -1.558677077293396, "logits/rejected": -1.617550015449524, "logps/chosen": -112.341552734375, "logps/rejected": -240.3922576904297, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.679274559020996, "rewards/margins": 12.593742370605469, "rewards/rejected": -17.27301788330078, "step": 3274 }, { "epoch": 5.26, "learning_rate": 2.3107411811335712e-07, "logits/chosen": -1.719848394393921, "logits/rejected": -1.7204039096832275, "logps/chosen": -124.46070861816406, "logps/rejected": -249.36705017089844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.4637322425842285, "rewards/margins": 12.323979377746582, "rewards/rejected": -16.78771209716797, "step": 3275 }, { "epoch": 5.26, "learning_rate": 2.3097502972651603e-07, "logits/chosen": -1.5134086608886719, "logits/rejected": -1.4789170026779175, "logps/chosen": -203.6083984375, "logps/rejected": -327.28729248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.652441024780273, "rewards/margins": 13.799589157104492, "rewards/rejected": -21.452028274536133, "step": 3276 }, { "epoch": 5.26, "learning_rate": 2.3087594133967496e-07, "logits/chosen": -1.5415900945663452, "logits/rejected": -1.581908941268921, "logps/chosen": -188.88739013671875, "logps/rejected": -316.2205505371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.665101051330566, "rewards/margins": 13.70997428894043, "rewards/rejected": -20.37507438659668, "step": 3277 }, { "epoch": 5.26, "learning_rate": 2.3077685295283392e-07, "logits/chosen": -1.400320053100586, "logits/rejected": -1.4474568367004395, "logps/chosen": -176.33660888671875, "logps/rejected": -345.6932373046875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -8.29930305480957, "rewards/margins": 17.116024017333984, "rewards/rejected": -25.415327072143555, "step": 3278 }, { "epoch": 5.26, "learning_rate": 2.3067776456599286e-07, "logits/chosen": -1.5643625259399414, "logits/rejected": -1.5363796949386597, "logps/chosen": -122.2890396118164, "logps/rejected": -227.96897888183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6713435649871826, "rewards/margins": 10.427351951599121, "rewards/rejected": -14.098694801330566, "step": 3279 }, { "epoch": 5.26, "learning_rate": 2.3057867617915181e-07, "logits/chosen": -1.6216700077056885, "logits/rejected": -1.6071879863739014, "logps/chosen": -162.56187438964844, "logps/rejected": -268.1200256347656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.406813621520996, "rewards/margins": 11.757715225219727, "rewards/rejected": -19.16452980041504, "step": 3280 }, { "epoch": 5.27, "learning_rate": 2.3047958779231072e-07, "logits/chosen": -1.5049550533294678, "logits/rejected": -1.495296835899353, "logps/chosen": -151.47744750976562, "logps/rejected": -236.39984130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.343379020690918, "rewards/margins": 10.43460464477539, "rewards/rejected": -15.777982711791992, "step": 3281 }, { "epoch": 5.27, "learning_rate": 2.3038049940546966e-07, "logits/chosen": -1.5928688049316406, "logits/rejected": -1.6381444931030273, "logps/chosen": -136.7577667236328, "logps/rejected": -288.31103515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.79954719543457, "rewards/margins": 14.656883239746094, "rewards/rejected": -20.456430435180664, "step": 3282 }, { "epoch": 5.27, "learning_rate": 2.3028141101862861e-07, "logits/chosen": -1.490057349205017, "logits/rejected": -1.4614628553390503, "logps/chosen": -175.01210021972656, "logps/rejected": -289.3257141113281, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.845943450927734, "rewards/margins": 10.557852745056152, "rewards/rejected": -20.40379524230957, "step": 3283 }, { "epoch": 5.27, "learning_rate": 2.3018232263178755e-07, "logits/chosen": -1.465447187423706, "logits/rejected": -1.5683213472366333, "logps/chosen": -190.9565887451172, "logps/rejected": -349.9540710449219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.824747085571289, "rewards/margins": 14.203786849975586, "rewards/rejected": -23.028533935546875, "step": 3284 }, { "epoch": 5.27, "learning_rate": 2.300832342449465e-07, "logits/chosen": -1.5052553415298462, "logits/rejected": -1.6110550165176392, "logps/chosen": -166.7972869873047, "logps/rejected": -287.06744384765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.260326385498047, "rewards/margins": 9.954858779907227, "rewards/rejected": -18.215185165405273, "step": 3285 }, { "epoch": 5.27, "learning_rate": 2.2998414585810541e-07, "logits/chosen": -1.7910115718841553, "logits/rejected": -1.7335350513458252, "logps/chosen": -130.99227905273438, "logps/rejected": -274.59332275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3881120681762695, "rewards/margins": 15.901726722717285, "rewards/rejected": -19.289838790893555, "step": 3286 }, { "epoch": 5.28, "learning_rate": 2.2988505747126435e-07, "logits/chosen": -1.38985276222229, "logits/rejected": -1.3994165658950806, "logps/chosen": -148.76405334472656, "logps/rejected": -247.40203857421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.58608341217041, "rewards/margins": 10.773872375488281, "rewards/rejected": -17.359956741333008, "step": 3287 }, { "epoch": 5.28, "learning_rate": 2.297859690844233e-07, "logits/chosen": -1.48069167137146, "logits/rejected": -1.5046736001968384, "logps/chosen": -182.3841552734375, "logps/rejected": -284.04144287109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.171463966369629, "rewards/margins": 9.217697143554688, "rewards/rejected": -18.389162063598633, "step": 3288 }, { "epoch": 5.28, "learning_rate": 2.2968688069758224e-07, "logits/chosen": -1.4783912897109985, "logits/rejected": -1.5058367252349854, "logps/chosen": -161.37045288085938, "logps/rejected": -275.2249755859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.754576683044434, "rewards/margins": 10.274942398071289, "rewards/rejected": -17.02952003479004, "step": 3289 }, { "epoch": 5.28, "learning_rate": 2.2958779231074115e-07, "logits/chosen": -1.4675097465515137, "logits/rejected": -1.4226959943771362, "logps/chosen": -129.98219299316406, "logps/rejected": -293.51397705078125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -4.891878128051758, "rewards/margins": 16.738704681396484, "rewards/rejected": -21.63058090209961, "step": 3290 }, { "epoch": 5.28, "learning_rate": 2.294887039239001e-07, "logits/chosen": -1.6181025505065918, "logits/rejected": -1.6557501554489136, "logps/chosen": -128.70790100097656, "logps/rejected": -269.06646728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.766424655914307, "rewards/margins": 12.692357063293457, "rewards/rejected": -17.458782196044922, "step": 3291 }, { "epoch": 5.28, "learning_rate": 2.2938961553705904e-07, "logits/chosen": -1.40916109085083, "logits/rejected": -1.4052858352661133, "logps/chosen": -168.75111389160156, "logps/rejected": -291.683349609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.780033111572266, "rewards/margins": 11.931514739990234, "rewards/rejected": -20.7115478515625, "step": 3292 }, { "epoch": 5.29, "learning_rate": 2.29290527150218e-07, "logits/chosen": -1.6158010959625244, "logits/rejected": -1.530199646949768, "logps/chosen": -202.82101440429688, "logps/rejected": -296.4802551269531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.339792251586914, "rewards/margins": 9.82237434387207, "rewards/rejected": -20.162166595458984, "step": 3293 }, { "epoch": 5.29, "learning_rate": 2.2919143876337693e-07, "logits/chosen": -1.493280291557312, "logits/rejected": -1.451041579246521, "logps/chosen": -163.53591918945312, "logps/rejected": -268.64324951171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -8.058167457580566, "rewards/margins": 10.893571853637695, "rewards/rejected": -18.951740264892578, "step": 3294 }, { "epoch": 5.29, "learning_rate": 2.2909235037653584e-07, "logits/chosen": -1.4288734197616577, "logits/rejected": -1.4758449792861938, "logps/chosen": -130.56301879882812, "logps/rejected": -293.0477294921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.947011470794678, "rewards/margins": 15.635238647460938, "rewards/rejected": -20.58224868774414, "step": 3295 }, { "epoch": 5.29, "learning_rate": 2.289932619896948e-07, "logits/chosen": -1.3653998374938965, "logits/rejected": -1.3507616519927979, "logps/chosen": -113.7938003540039, "logps/rejected": -212.2369384765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.329197406768799, "rewards/margins": 10.41024398803711, "rewards/rejected": -13.739441871643066, "step": 3296 }, { "epoch": 5.29, "learning_rate": 2.2889417360285373e-07, "logits/chosen": -1.4972128868103027, "logits/rejected": -1.5469497442245483, "logps/chosen": -116.20575714111328, "logps/rejected": -253.90692138671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.379452705383301, "rewards/margins": 13.03973388671875, "rewards/rejected": -17.419187545776367, "step": 3297 }, { "epoch": 5.29, "learning_rate": 2.2879508521601267e-07, "logits/chosen": -1.5097559690475464, "logits/rejected": -1.5235953330993652, "logps/chosen": -132.9793701171875, "logps/rejected": -266.6614685058594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.667145252227783, "rewards/margins": 12.524120330810547, "rewards/rejected": -17.191265106201172, "step": 3298 }, { "epoch": 5.3, "learning_rate": 2.2869599682917163e-07, "logits/chosen": -1.48417329788208, "logits/rejected": -1.510541558265686, "logps/chosen": -118.65602111816406, "logps/rejected": -273.9678955078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.223136901855469, "rewards/margins": 13.71442699432373, "rewards/rejected": -18.937564849853516, "step": 3299 }, { "epoch": 5.3, "learning_rate": 2.2859690844233053e-07, "logits/chosen": -1.5556857585906982, "logits/rejected": -1.5986441373825073, "logps/chosen": -148.67242431640625, "logps/rejected": -272.2933044433594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.443939685821533, "rewards/margins": 11.895475387573242, "rewards/rejected": -18.339414596557617, "step": 3300 }, { "epoch": 5.3, "learning_rate": 2.284978200554895e-07, "logits/chosen": -1.5792648792266846, "logits/rejected": -1.5814098119735718, "logps/chosen": -186.92202758789062, "logps/rejected": -259.39453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.63750171661377, "rewards/margins": 8.415603637695312, "rewards/rejected": -17.0531063079834, "step": 3301 }, { "epoch": 5.3, "learning_rate": 2.2839873166864843e-07, "logits/chosen": -1.500571370124817, "logits/rejected": -1.6020872592926025, "logps/chosen": -161.57269287109375, "logps/rejected": -272.75360107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.336462020874023, "rewards/margins": 8.724156379699707, "rewards/rejected": -16.060617446899414, "step": 3302 }, { "epoch": 5.3, "learning_rate": 2.2829964328180736e-07, "logits/chosen": -1.4412658214569092, "logits/rejected": -1.4683027267456055, "logps/chosen": -155.39122009277344, "logps/rejected": -253.5810546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.24833869934082, "rewards/margins": 9.23845386505127, "rewards/rejected": -15.486793518066406, "step": 3303 }, { "epoch": 5.3, "learning_rate": 2.2820055489496632e-07, "logits/chosen": -1.4774980545043945, "logits/rejected": -1.5155560970306396, "logps/chosen": -173.79515075683594, "logps/rejected": -273.3815002441406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.15631103515625, "rewards/margins": 10.717728614807129, "rewards/rejected": -18.874038696289062, "step": 3304 }, { "epoch": 5.3, "learning_rate": 2.2810146650812523e-07, "logits/chosen": -1.6362597942352295, "logits/rejected": -1.7329810857772827, "logps/chosen": -136.3558807373047, "logps/rejected": -292.86810302734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.342553615570068, "rewards/margins": 15.292057037353516, "rewards/rejected": -20.63460922241211, "step": 3305 }, { "epoch": 5.31, "learning_rate": 2.2800237812128416e-07, "logits/chosen": -1.347977638244629, "logits/rejected": -1.3817092180252075, "logps/chosen": -137.6536865234375, "logps/rejected": -270.13775634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.370110034942627, "rewards/margins": 12.63882827758789, "rewards/rejected": -20.00893783569336, "step": 3306 }, { "epoch": 5.31, "learning_rate": 2.2790328973444312e-07, "logits/chosen": -1.4882045984268188, "logits/rejected": -1.5241202116012573, "logps/chosen": -156.85360717773438, "logps/rejected": -248.65489196777344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.829144477844238, "rewards/margins": 9.813946723937988, "rewards/rejected": -16.643091201782227, "step": 3307 }, { "epoch": 5.31, "learning_rate": 2.2780420134760205e-07, "logits/chosen": -1.5645530223846436, "logits/rejected": -1.551581621170044, "logps/chosen": -143.82723999023438, "logps/rejected": -332.50396728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.892045021057129, "rewards/margins": 15.889039039611816, "rewards/rejected": -23.781082153320312, "step": 3308 }, { "epoch": 5.31, "learning_rate": 2.2770511296076099e-07, "logits/chosen": -1.5151004791259766, "logits/rejected": -1.5553202629089355, "logps/chosen": -153.21485900878906, "logps/rejected": -297.355224609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.613072395324707, "rewards/margins": 12.98960018157959, "rewards/rejected": -19.60267448425293, "step": 3309 }, { "epoch": 5.31, "learning_rate": 2.2760602457391992e-07, "logits/chosen": -1.5750819444656372, "logits/rejected": -1.5507875680923462, "logps/chosen": -178.33319091796875, "logps/rejected": -288.76751708984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.701860427856445, "rewards/margins": 12.18509292602539, "rewards/rejected": -18.88695526123047, "step": 3310 }, { "epoch": 5.31, "learning_rate": 2.2750693618707885e-07, "logits/chosen": -1.577398419380188, "logits/rejected": -1.6510770320892334, "logps/chosen": -143.89920043945312, "logps/rejected": -282.8096923828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.01813268661499, "rewards/margins": 12.017876625061035, "rewards/rejected": -18.036008834838867, "step": 3311 }, { "epoch": 5.32, "learning_rate": 2.274078478002378e-07, "logits/chosen": -1.4647969007492065, "logits/rejected": -1.3879203796386719, "logps/chosen": -145.91766357421875, "logps/rejected": -298.03466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.840178489685059, "rewards/margins": 14.308417320251465, "rewards/rejected": -20.148595809936523, "step": 3312 }, { "epoch": 5.32, "learning_rate": 2.2730875941339675e-07, "logits/chosen": -1.6105878353118896, "logits/rejected": -1.4899859428405762, "logps/chosen": -164.84814453125, "logps/rejected": -248.66580200195312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.969851016998291, "rewards/margins": 9.671026229858398, "rewards/rejected": -15.640876770019531, "step": 3313 }, { "epoch": 5.32, "learning_rate": 2.2720967102655565e-07, "logits/chosen": -1.5811818838119507, "logits/rejected": -1.5612133741378784, "logps/chosen": -126.66638946533203, "logps/rejected": -225.18374633789062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.562431812286377, "rewards/margins": 9.958690643310547, "rewards/rejected": -13.521121978759766, "step": 3314 }, { "epoch": 5.32, "learning_rate": 2.271105826397146e-07, "logits/chosen": -1.5350260734558105, "logits/rejected": -1.6077206134796143, "logps/chosen": -140.8316650390625, "logps/rejected": -253.14756774902344, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.808531761169434, "rewards/margins": 10.061297416687012, "rewards/rejected": -15.869829177856445, "step": 3315 }, { "epoch": 5.32, "learning_rate": 2.2701149425287355e-07, "logits/chosen": -1.4854471683502197, "logits/rejected": -1.525792121887207, "logps/chosen": -120.77369689941406, "logps/rejected": -205.78564453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.336245059967041, "rewards/margins": 9.537599563598633, "rewards/rejected": -13.873845100402832, "step": 3316 }, { "epoch": 5.32, "learning_rate": 2.269124058660325e-07, "logits/chosen": -1.4358478784561157, "logits/rejected": -1.447367787361145, "logps/chosen": -103.92425537109375, "logps/rejected": -211.43685913085938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.216926097869873, "rewards/margins": 10.131386756896973, "rewards/rejected": -14.348312377929688, "step": 3317 }, { "epoch": 5.33, "learning_rate": 2.2681331747919144e-07, "logits/chosen": -1.545941948890686, "logits/rejected": -1.5655145645141602, "logps/chosen": -157.71240234375, "logps/rejected": -271.8997802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.9868927001953125, "rewards/margins": 10.784299850463867, "rewards/rejected": -17.771190643310547, "step": 3318 }, { "epoch": 5.33, "learning_rate": 2.2671422909235035e-07, "logits/chosen": -1.6806466579437256, "logits/rejected": -1.6708253622055054, "logps/chosen": -87.8006591796875, "logps/rejected": -243.98867797851562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2052860260009766, "rewards/margins": 15.323360443115234, "rewards/rejected": -16.52864646911621, "step": 3319 }, { "epoch": 5.33, "learning_rate": 2.266151407055093e-07, "logits/chosen": -1.5885224342346191, "logits/rejected": -1.6605273485183716, "logps/chosen": -158.06655883789062, "logps/rejected": -338.20062255859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.112833023071289, "rewards/margins": 16.720754623413086, "rewards/rejected": -22.833587646484375, "step": 3320 }, { "epoch": 5.33, "learning_rate": 2.2651605231866824e-07, "logits/chosen": -1.7238097190856934, "logits/rejected": -1.7660444974899292, "logps/chosen": -116.2686767578125, "logps/rejected": -262.0578918457031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.963369369506836, "rewards/margins": 12.433982849121094, "rewards/rejected": -17.39735221862793, "step": 3321 }, { "epoch": 5.33, "learning_rate": 2.264169639318272e-07, "logits/chosen": -1.6300324201583862, "logits/rejected": -1.4522960186004639, "logps/chosen": -135.105712890625, "logps/rejected": -253.45008850097656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.38510799407959, "rewards/margins": 12.70014762878418, "rewards/rejected": -18.085254669189453, "step": 3322 }, { "epoch": 5.33, "learning_rate": 2.2631787554498613e-07, "logits/chosen": -1.5349348783493042, "logits/rejected": -1.4331884384155273, "logps/chosen": -149.43663024902344, "logps/rejected": -251.2551727294922, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.056065559387207, "rewards/margins": 10.902688980102539, "rewards/rejected": -17.958755493164062, "step": 3323 }, { "epoch": 5.34, "learning_rate": 2.2621878715814504e-07, "logits/chosen": -1.3630272150039673, "logits/rejected": -1.4950703382492065, "logps/chosen": -111.91983795166016, "logps/rejected": -217.37673950195312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.111433506011963, "rewards/margins": 8.157143592834473, "rewards/rejected": -13.268577575683594, "step": 3324 }, { "epoch": 5.34, "learning_rate": 2.26119698771304e-07, "logits/chosen": -1.632229208946228, "logits/rejected": -1.62516188621521, "logps/chosen": -192.82925415039062, "logps/rejected": -296.07208251953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.844401359558105, "rewards/margins": 10.255441665649414, "rewards/rejected": -19.099842071533203, "step": 3325 }, { "epoch": 5.34, "learning_rate": 2.2602061038446293e-07, "logits/chosen": -1.453197717666626, "logits/rejected": -1.4036836624145508, "logps/chosen": -120.9522933959961, "logps/rejected": -197.45651245117188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7538535594940186, "rewards/margins": 10.840166091918945, "rewards/rejected": -13.594019889831543, "step": 3326 }, { "epoch": 5.34, "learning_rate": 2.2592152199762187e-07, "logits/chosen": -1.5256881713867188, "logits/rejected": -1.5507906675338745, "logps/chosen": -180.89517211914062, "logps/rejected": -300.45220947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.152420043945312, "rewards/margins": 12.529157638549805, "rewards/rejected": -20.681577682495117, "step": 3327 }, { "epoch": 5.34, "learning_rate": 2.258224336107808e-07, "logits/chosen": -1.5061918497085571, "logits/rejected": -1.462681770324707, "logps/chosen": -138.4984130859375, "logps/rejected": -234.9783935546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.132136344909668, "rewards/margins": 10.839895248413086, "rewards/rejected": -16.97203254699707, "step": 3328 }, { "epoch": 5.34, "learning_rate": 2.2572334522393973e-07, "logits/chosen": -1.6498697996139526, "logits/rejected": -1.6011707782745361, "logps/chosen": -154.8911590576172, "logps/rejected": -286.8303527832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.219493865966797, "rewards/margins": 13.911413192749023, "rewards/rejected": -20.13090705871582, "step": 3329 }, { "epoch": 5.35, "learning_rate": 2.256242568370987e-07, "logits/chosen": -1.5793354511260986, "logits/rejected": -1.532224178314209, "logps/chosen": -171.15084838867188, "logps/rejected": -269.3011779785156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.45323371887207, "rewards/margins": 11.509300231933594, "rewards/rejected": -18.962533950805664, "step": 3330 }, { "epoch": 5.35, "learning_rate": 2.2552516845025762e-07, "logits/chosen": -1.5102906227111816, "logits/rejected": -1.5350855588912964, "logps/chosen": -136.7614288330078, "logps/rejected": -304.9424743652344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.676676273345947, "rewards/margins": 15.80157470703125, "rewards/rejected": -20.478252410888672, "step": 3331 }, { "epoch": 5.35, "learning_rate": 2.2542608006341656e-07, "logits/chosen": -1.4463659524917603, "logits/rejected": -1.5209327936172485, "logps/chosen": -152.08547973632812, "logps/rejected": -315.5830993652344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.7533159255981445, "rewards/margins": 15.747629165649414, "rewards/rejected": -22.500946044921875, "step": 3332 }, { "epoch": 5.35, "learning_rate": 2.253269916765755e-07, "logits/chosen": -1.4326118230819702, "logits/rejected": -1.605055332183838, "logps/chosen": -161.41836547851562, "logps/rejected": -299.6729431152344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.34447717666626, "rewards/margins": 11.521932601928711, "rewards/rejected": -18.866409301757812, "step": 3333 }, { "epoch": 5.35, "learning_rate": 2.2522790328973442e-07, "logits/chosen": -1.535721778869629, "logits/rejected": -1.5774413347244263, "logps/chosen": -132.92913818359375, "logps/rejected": -272.3597717285156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.416486740112305, "rewards/margins": 11.906148910522461, "rewards/rejected": -17.322635650634766, "step": 3334 }, { "epoch": 5.35, "learning_rate": 2.2512881490289338e-07, "logits/chosen": -1.570178508758545, "logits/rejected": -1.5657563209533691, "logps/chosen": -172.29144287109375, "logps/rejected": -257.20306396484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.493678092956543, "rewards/margins": 9.694555282592773, "rewards/rejected": -17.188232421875, "step": 3335 }, { "epoch": 5.35, "learning_rate": 2.2502972651605232e-07, "logits/chosen": -1.4509042501449585, "logits/rejected": -1.4092247486114502, "logps/chosen": -153.49996948242188, "logps/rejected": -304.203857421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.7667236328125, "rewards/margins": 14.15746784210205, "rewards/rejected": -19.924190521240234, "step": 3336 }, { "epoch": 5.36, "learning_rate": 2.2493063812921125e-07, "logits/chosen": -1.5317697525024414, "logits/rejected": -1.4868426322937012, "logps/chosen": -129.99703979492188, "logps/rejected": -251.9051513671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.795592308044434, "rewards/margins": 11.148192405700684, "rewards/rejected": -16.943784713745117, "step": 3337 }, { "epoch": 5.36, "learning_rate": 2.2483154974237018e-07, "logits/chosen": -1.457454800605774, "logits/rejected": -1.4756230115890503, "logps/chosen": -123.6118392944336, "logps/rejected": -240.3975830078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.904645919799805, "rewards/margins": 12.0093355178833, "rewards/rejected": -16.913982391357422, "step": 3338 }, { "epoch": 5.36, "learning_rate": 2.2473246135552912e-07, "logits/chosen": -1.5437859296798706, "logits/rejected": -1.5696723461151123, "logps/chosen": -152.26199340820312, "logps/rejected": -246.85086059570312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.5242486000061035, "rewards/margins": 9.850549697875977, "rewards/rejected": -14.374797821044922, "step": 3339 }, { "epoch": 5.36, "learning_rate": 2.2463337296868805e-07, "logits/chosen": -1.3714799880981445, "logits/rejected": -1.3958139419555664, "logps/chosen": -184.04798889160156, "logps/rejected": -267.7174072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.70742654800415, "rewards/margins": 10.010367393493652, "rewards/rejected": -17.71779441833496, "step": 3340 }, { "epoch": 5.36, "learning_rate": 2.24534284581847e-07, "logits/chosen": -1.5415242910385132, "logits/rejected": -1.6627039909362793, "logps/chosen": -139.56666564941406, "logps/rejected": -238.7611083984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.5672101974487305, "rewards/margins": 8.37545394897461, "rewards/rejected": -14.942663192749023, "step": 3341 }, { "epoch": 5.36, "learning_rate": 2.2443519619500592e-07, "logits/chosen": -1.5591256618499756, "logits/rejected": -1.5583850145339966, "logps/chosen": -144.3818359375, "logps/rejected": -284.83392333984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.635990619659424, "rewards/margins": 12.929494857788086, "rewards/rejected": -19.56548500061035, "step": 3342 }, { "epoch": 5.37, "learning_rate": 2.2433610780816488e-07, "logits/chosen": -1.592528223991394, "logits/rejected": -1.6109066009521484, "logps/chosen": -139.62033081054688, "logps/rejected": -276.4849548339844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.375699043273926, "rewards/margins": 14.04610824584961, "rewards/rejected": -20.421810150146484, "step": 3343 }, { "epoch": 5.37, "learning_rate": 2.242370194213238e-07, "logits/chosen": -1.5474252700805664, "logits/rejected": -1.449514627456665, "logps/chosen": -135.19134521484375, "logps/rejected": -255.0718994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3810648918151855, "rewards/margins": 13.859562873840332, "rewards/rejected": -18.24062728881836, "step": 3344 }, { "epoch": 5.37, "learning_rate": 2.2413793103448274e-07, "logits/chosen": -1.6842137575149536, "logits/rejected": -1.5948195457458496, "logps/chosen": -186.7830810546875, "logps/rejected": -284.5519104003906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.71574592590332, "rewards/margins": 12.599804878234863, "rewards/rejected": -20.315549850463867, "step": 3345 }, { "epoch": 5.37, "learning_rate": 2.240388426476417e-07, "logits/chosen": -1.698282241821289, "logits/rejected": -1.620221495628357, "logps/chosen": -108.21202087402344, "logps/rejected": -239.95144653320312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.161227226257324, "rewards/margins": 14.208375930786133, "rewards/rejected": -16.36960220336914, "step": 3346 }, { "epoch": 5.37, "learning_rate": 2.239397542608006e-07, "logits/chosen": -1.5395092964172363, "logits/rejected": -1.532173752784729, "logps/chosen": -155.18951416015625, "logps/rejected": -272.2019348144531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.746330738067627, "rewards/margins": 11.576675415039062, "rewards/rejected": -18.32300567626953, "step": 3347 }, { "epoch": 5.37, "learning_rate": 2.2384066587395954e-07, "logits/chosen": -1.3876769542694092, "logits/rejected": -1.449293851852417, "logps/chosen": -125.07403564453125, "logps/rejected": -278.3925476074219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.222963809967041, "rewards/margins": 14.931577682495117, "rewards/rejected": -20.154542922973633, "step": 3348 }, { "epoch": 5.38, "learning_rate": 2.237415774871185e-07, "logits/chosen": -1.5666778087615967, "logits/rejected": -1.711693525314331, "logps/chosen": -117.53593444824219, "logps/rejected": -291.260986328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.911811828613281, "rewards/margins": 14.627108573913574, "rewards/rejected": -19.53891944885254, "step": 3349 }, { "epoch": 5.38, "learning_rate": 2.2364248910027744e-07, "logits/chosen": -1.5801458358764648, "logits/rejected": -1.568089485168457, "logps/chosen": -134.3922119140625, "logps/rejected": -234.49005126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.527836322784424, "rewards/margins": 9.966581344604492, "rewards/rejected": -15.494417190551758, "step": 3350 }, { "epoch": 5.38, "learning_rate": 2.235434007134364e-07, "logits/chosen": -1.5634105205535889, "logits/rejected": -1.5351967811584473, "logps/chosen": -162.6669464111328, "logps/rejected": -270.06829833984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.583613395690918, "rewards/margins": 9.931751251220703, "rewards/rejected": -16.515365600585938, "step": 3351 }, { "epoch": 5.38, "learning_rate": 2.234443123265953e-07, "logits/chosen": -1.4940297603607178, "logits/rejected": -1.4565410614013672, "logps/chosen": -152.0593719482422, "logps/rejected": -296.40203857421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.479157447814941, "rewards/margins": 14.992426872253418, "rewards/rejected": -22.47158432006836, "step": 3352 }, { "epoch": 5.38, "learning_rate": 2.2334522393975424e-07, "logits/chosen": -1.7206963300704956, "logits/rejected": -1.6227232217788696, "logps/chosen": -114.64385223388672, "logps/rejected": -263.2828369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.159984588623047, "rewards/margins": 16.36581802368164, "rewards/rejected": -19.525802612304688, "step": 3353 }, { "epoch": 5.38, "learning_rate": 2.232461355529132e-07, "logits/chosen": -1.463364601135254, "logits/rejected": -1.5254769325256348, "logps/chosen": -131.56166076660156, "logps/rejected": -277.1054992675781, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.043187141418457, "rewards/margins": 15.634376525878906, "rewards/rejected": -19.67756462097168, "step": 3354 }, { "epoch": 5.39, "learning_rate": 2.2314704716607213e-07, "logits/chosen": -1.4703330993652344, "logits/rejected": -1.524598240852356, "logps/chosen": -139.65371704101562, "logps/rejected": -243.3048095703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.039375305175781, "rewards/margins": 10.573387145996094, "rewards/rejected": -16.612762451171875, "step": 3355 }, { "epoch": 5.39, "learning_rate": 2.230479587792311e-07, "logits/chosen": -1.6625964641571045, "logits/rejected": -1.7274631261825562, "logps/chosen": -116.28390502929688, "logps/rejected": -272.6272277832031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.50934362411499, "rewards/margins": 13.706849098205566, "rewards/rejected": -18.21619415283203, "step": 3356 }, { "epoch": 5.39, "learning_rate": 2.2294887039239e-07, "logits/chosen": -1.6670743227005005, "logits/rejected": -1.5316647291183472, "logps/chosen": -139.34652709960938, "logps/rejected": -230.95863342285156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.874160289764404, "rewards/margins": 11.127359390258789, "rewards/rejected": -17.00151824951172, "step": 3357 }, { "epoch": 5.39, "learning_rate": 2.2284978200554893e-07, "logits/chosen": -1.5382909774780273, "logits/rejected": -1.529942274093628, "logps/chosen": -200.63597106933594, "logps/rejected": -310.177978515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.582544326782227, "rewards/margins": 11.499267578125, "rewards/rejected": -21.081811904907227, "step": 3358 }, { "epoch": 5.39, "learning_rate": 2.227506936187079e-07, "logits/chosen": -1.546960473060608, "logits/rejected": -1.5958765745162964, "logps/chosen": -113.99314880371094, "logps/rejected": -274.0147705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.952789783477783, "rewards/margins": 15.033554077148438, "rewards/rejected": -18.986343383789062, "step": 3359 }, { "epoch": 5.39, "learning_rate": 2.2265160523186682e-07, "logits/chosen": -1.4956220388412476, "logits/rejected": -1.5152796506881714, "logps/chosen": -118.70171356201172, "logps/rejected": -261.7405090332031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.6020045280456543, "rewards/margins": 13.936685562133789, "rewards/rejected": -17.5386905670166, "step": 3360 }, { "epoch": 5.39, "learning_rate": 2.2255251684502573e-07, "logits/chosen": -1.526985764503479, "logits/rejected": -1.555748462677002, "logps/chosen": -133.33059692382812, "logps/rejected": -251.51255798339844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9240264892578125, "rewards/margins": 11.591777801513672, "rewards/rejected": -15.515804290771484, "step": 3361 }, { "epoch": 5.4, "learning_rate": 2.224534284581847e-07, "logits/chosen": -1.614539623260498, "logits/rejected": -1.5123636722564697, "logps/chosen": -118.60287475585938, "logps/rejected": -211.76608276367188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.677467346191406, "rewards/margins": 9.764123916625977, "rewards/rejected": -14.441591262817383, "step": 3362 }, { "epoch": 5.4, "learning_rate": 2.2235434007134362e-07, "logits/chosen": -1.566989779472351, "logits/rejected": -1.6183035373687744, "logps/chosen": -134.1474609375, "logps/rejected": -233.4180908203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.1238274574279785, "rewards/margins": 9.82984733581543, "rewards/rejected": -14.95367431640625, "step": 3363 }, { "epoch": 5.4, "learning_rate": 2.2225525168450258e-07, "logits/chosen": -1.4607737064361572, "logits/rejected": -1.4881620407104492, "logps/chosen": -179.74595642089844, "logps/rejected": -278.3017578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.919157981872559, "rewards/margins": 10.873269081115723, "rewards/rejected": -18.79242706298828, "step": 3364 }, { "epoch": 5.4, "learning_rate": 2.2215616329766152e-07, "logits/chosen": -1.6124848127365112, "logits/rejected": -1.5343328714370728, "logps/chosen": -121.9565200805664, "logps/rejected": -217.14706420898438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.396571636199951, "rewards/margins": 11.568666458129883, "rewards/rejected": -14.965237617492676, "step": 3365 }, { "epoch": 5.4, "learning_rate": 2.2205707491082042e-07, "logits/chosen": -1.52131187915802, "logits/rejected": -1.5269343852996826, "logps/chosen": -127.14079284667969, "logps/rejected": -254.12628173828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8454363346099854, "rewards/margins": 12.024837493896484, "rewards/rejected": -15.870275497436523, "step": 3366 }, { "epoch": 5.4, "learning_rate": 2.2195798652397938e-07, "logits/chosen": -1.460913896560669, "logits/rejected": -1.490901231765747, "logps/chosen": -169.38668823242188, "logps/rejected": -343.76806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.101324081420898, "rewards/margins": 15.524415016174316, "rewards/rejected": -23.6257381439209, "step": 3367 }, { "epoch": 5.41, "learning_rate": 2.2185889813713832e-07, "logits/chosen": -1.4364376068115234, "logits/rejected": -1.5023207664489746, "logps/chosen": -183.91632080078125, "logps/rejected": -288.2415771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.191518783569336, "rewards/margins": 11.386292457580566, "rewards/rejected": -19.57781219482422, "step": 3368 }, { "epoch": 5.41, "learning_rate": 2.2175980975029725e-07, "logits/chosen": -1.3790723085403442, "logits/rejected": -1.3920319080352783, "logps/chosen": -102.01871490478516, "logps/rejected": -176.1423797607422, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.574155330657959, "rewards/margins": 9.471451759338379, "rewards/rejected": -12.045607566833496, "step": 3369 }, { "epoch": 5.41, "learning_rate": 2.216607213634562e-07, "logits/chosen": -1.7128570079803467, "logits/rejected": -1.7457858324050903, "logps/chosen": -173.56561279296875, "logps/rejected": -302.0688171386719, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.877683162689209, "rewards/margins": 11.865898132324219, "rewards/rejected": -19.743579864501953, "step": 3370 }, { "epoch": 5.41, "learning_rate": 2.2156163297661512e-07, "logits/chosen": -1.5667210817337036, "logits/rejected": -1.5284806489944458, "logps/chosen": -147.80776977539062, "logps/rejected": -247.23788452148438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.01568603515625, "rewards/margins": 10.655046463012695, "rewards/rejected": -16.670734405517578, "step": 3371 }, { "epoch": 5.41, "learning_rate": 2.2146254458977408e-07, "logits/chosen": -1.5296783447265625, "logits/rejected": -1.5212018489837646, "logps/chosen": -149.3108673095703, "logps/rejected": -262.4295654296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.820113182067871, "rewards/margins": 11.58735466003418, "rewards/rejected": -18.407466888427734, "step": 3372 }, { "epoch": 5.41, "learning_rate": 2.21363456202933e-07, "logits/chosen": -1.7003076076507568, "logits/rejected": -1.7299786806106567, "logps/chosen": -130.5372772216797, "logps/rejected": -283.408203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.891129970550537, "rewards/margins": 14.459957122802734, "rewards/rejected": -20.35108757019043, "step": 3373 }, { "epoch": 5.42, "learning_rate": 2.2126436781609194e-07, "logits/chosen": -1.4702881574630737, "logits/rejected": -1.5102828741073608, "logps/chosen": -106.86442565917969, "logps/rejected": -254.77456665039062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.383804798126221, "rewards/margins": 13.272889137268066, "rewards/rejected": -17.656692504882812, "step": 3374 }, { "epoch": 5.42, "learning_rate": 2.211652794292509e-07, "logits/chosen": -1.532680869102478, "logits/rejected": -1.4243810176849365, "logps/chosen": -155.78079223632812, "logps/rejected": -237.3238525390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.9549431800842285, "rewards/margins": 10.20772933959961, "rewards/rejected": -16.162673950195312, "step": 3375 }, { "epoch": 5.42, "learning_rate": 2.210661910424098e-07, "logits/chosen": -1.5506293773651123, "logits/rejected": -1.6120247840881348, "logps/chosen": -150.59664916992188, "logps/rejected": -238.9454345703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.4619221687316895, "rewards/margins": 9.693292617797852, "rewards/rejected": -16.15521240234375, "step": 3376 }, { "epoch": 5.42, "learning_rate": 2.2096710265556874e-07, "logits/chosen": -1.7855098247528076, "logits/rejected": -1.7643678188323975, "logps/chosen": -144.03543090820312, "logps/rejected": -274.50439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.230337142944336, "rewards/margins": 13.725303649902344, "rewards/rejected": -18.95564079284668, "step": 3377 }, { "epoch": 5.42, "learning_rate": 2.208680142687277e-07, "logits/chosen": -1.5035109519958496, "logits/rejected": -1.5759987831115723, "logps/chosen": -171.74893188476562, "logps/rejected": -282.68402099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.193686485290527, "rewards/margins": 10.870843887329102, "rewards/rejected": -20.064529418945312, "step": 3378 }, { "epoch": 5.42, "learning_rate": 2.2076892588188663e-07, "logits/chosen": -1.6473997831344604, "logits/rejected": -1.6807217597961426, "logps/chosen": -119.12995910644531, "logps/rejected": -251.85171508789062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.423239707946777, "rewards/margins": 12.765886306762695, "rewards/rejected": -17.189125061035156, "step": 3379 }, { "epoch": 5.43, "learning_rate": 2.2066983749504557e-07, "logits/chosen": -1.4059085845947266, "logits/rejected": -1.4500528573989868, "logps/chosen": -150.53305053710938, "logps/rejected": -272.1643371582031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.726877212524414, "rewards/margins": 11.987201690673828, "rewards/rejected": -19.714078903198242, "step": 3380 }, { "epoch": 5.43, "learning_rate": 2.205707491082045e-07, "logits/chosen": -1.5820019245147705, "logits/rejected": -1.513116717338562, "logps/chosen": -145.5394744873047, "logps/rejected": -277.168212890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.813573837280273, "rewards/margins": 13.872626304626465, "rewards/rejected": -18.686199188232422, "step": 3381 }, { "epoch": 5.43, "learning_rate": 2.2047166072136343e-07, "logits/chosen": -1.5477066040039062, "logits/rejected": -1.5952719449996948, "logps/chosen": -167.15444946289062, "logps/rejected": -288.15570068359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.680694580078125, "rewards/margins": 11.664008140563965, "rewards/rejected": -19.344701766967773, "step": 3382 }, { "epoch": 5.43, "learning_rate": 2.203725723345224e-07, "logits/chosen": -1.4422056674957275, "logits/rejected": -1.5392370223999023, "logps/chosen": -135.5591278076172, "logps/rejected": -247.91094970703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.057971954345703, "rewards/margins": 10.212977409362793, "rewards/rejected": -15.27094841003418, "step": 3383 }, { "epoch": 5.43, "learning_rate": 2.2027348394768133e-07, "logits/chosen": -1.5374940633773804, "logits/rejected": -1.5760115385055542, "logps/chosen": -101.42708587646484, "logps/rejected": -266.1417236328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.863496780395508, "rewards/margins": 14.370357513427734, "rewards/rejected": -18.233856201171875, "step": 3384 }, { "epoch": 5.43, "learning_rate": 2.2017439556084026e-07, "logits/chosen": -1.435502290725708, "logits/rejected": -1.3635164499282837, "logps/chosen": -165.69476318359375, "logps/rejected": -265.2212219238281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.772959232330322, "rewards/margins": 10.885974884033203, "rewards/rejected": -18.658935546875, "step": 3385 }, { "epoch": 5.43, "learning_rate": 2.200753071739992e-07, "logits/chosen": -1.5999939441680908, "logits/rejected": -1.6163662672042847, "logps/chosen": -138.12277221679688, "logps/rejected": -288.14154052734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.978157997131348, "rewards/margins": 14.016080856323242, "rewards/rejected": -19.994237899780273, "step": 3386 }, { "epoch": 5.44, "learning_rate": 2.1997621878715813e-07, "logits/chosen": -1.5740634202957153, "logits/rejected": -1.4953091144561768, "logps/chosen": -147.4095001220703, "logps/rejected": -285.9583435058594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.825496673583984, "rewards/margins": 13.267680168151855, "rewards/rejected": -19.093175888061523, "step": 3387 }, { "epoch": 5.44, "learning_rate": 2.198771304003171e-07, "logits/chosen": -1.474001169204712, "logits/rejected": -1.532721996307373, "logps/chosen": -150.21087646484375, "logps/rejected": -290.87530517578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.657354831695557, "rewards/margins": 14.362232208251953, "rewards/rejected": -20.01958656311035, "step": 3388 }, { "epoch": 5.44, "learning_rate": 2.1977804201347602e-07, "logits/chosen": -1.3892403841018677, "logits/rejected": -1.4526026248931885, "logps/chosen": -138.7139892578125, "logps/rejected": -288.3580627441406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.710198879241943, "rewards/margins": 14.914694786071777, "rewards/rejected": -20.624893188476562, "step": 3389 }, { "epoch": 5.44, "learning_rate": 2.1967895362663493e-07, "logits/chosen": -1.4090297222137451, "logits/rejected": -1.4123625755310059, "logps/chosen": -151.28958129882812, "logps/rejected": -292.968017578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.033416748046875, "rewards/margins": 13.450350761413574, "rewards/rejected": -20.483768463134766, "step": 3390 }, { "epoch": 5.44, "learning_rate": 2.195798652397939e-07, "logits/chosen": -1.6812785863876343, "logits/rejected": -1.715536117553711, "logps/chosen": -125.8580322265625, "logps/rejected": -242.32733154296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.532454490661621, "rewards/margins": 10.984025955200195, "rewards/rejected": -15.5164794921875, "step": 3391 }, { "epoch": 5.44, "learning_rate": 2.1948077685295282e-07, "logits/chosen": -1.5406553745269775, "logits/rejected": -1.5125868320465088, "logps/chosen": -150.23541259765625, "logps/rejected": -261.8678894042969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.892866611480713, "rewards/margins": 11.752792358398438, "rewards/rejected": -16.645660400390625, "step": 3392 }, { "epoch": 5.45, "learning_rate": 2.1938168846611178e-07, "logits/chosen": -1.5411978960037231, "logits/rejected": -1.4925578832626343, "logps/chosen": -209.87864685058594, "logps/rejected": -335.8096618652344, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -9.847379684448242, "rewards/margins": 13.121818542480469, "rewards/rejected": -22.96919822692871, "step": 3393 }, { "epoch": 5.45, "learning_rate": 2.1928260007927071e-07, "logits/chosen": -1.47858726978302, "logits/rejected": -1.4731569290161133, "logps/chosen": -172.7291717529297, "logps/rejected": -258.4764099121094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.767223834991455, "rewards/margins": 9.968217849731445, "rewards/rejected": -17.735441207885742, "step": 3394 }, { "epoch": 5.45, "learning_rate": 2.1918351169242962e-07, "logits/chosen": -1.4767916202545166, "logits/rejected": -1.5431993007659912, "logps/chosen": -162.16171264648438, "logps/rejected": -347.032958984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.341720581054688, "rewards/margins": 14.830513000488281, "rewards/rejected": -23.17223358154297, "step": 3395 }, { "epoch": 5.45, "learning_rate": 2.1908442330558858e-07, "logits/chosen": -1.5759713649749756, "logits/rejected": -1.590254306793213, "logps/chosen": -228.2081298828125, "logps/rejected": -327.6415710449219, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.776154518127441, "rewards/margins": 10.774214744567871, "rewards/rejected": -20.55036735534668, "step": 3396 }, { "epoch": 5.45, "learning_rate": 2.1898533491874751e-07, "logits/chosen": -1.4583839178085327, "logits/rejected": -1.531585931777954, "logps/chosen": -159.20938110351562, "logps/rejected": -274.0878601074219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.6368913650512695, "rewards/margins": 11.348533630371094, "rewards/rejected": -17.985424041748047, "step": 3397 }, { "epoch": 5.45, "learning_rate": 2.1888624653190645e-07, "logits/chosen": -1.6541659832000732, "logits/rejected": -1.6589959859848022, "logps/chosen": -125.59602355957031, "logps/rejected": -249.15585327148438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.201717376708984, "rewards/margins": 12.439949035644531, "rewards/rejected": -17.641666412353516, "step": 3398 }, { "epoch": 5.46, "learning_rate": 2.1878715814506538e-07, "logits/chosen": -1.5422694683074951, "logits/rejected": -1.558213710784912, "logps/chosen": -98.25377655029297, "logps/rejected": -233.5749969482422, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1022322177886963, "rewards/margins": 14.079442024230957, "rewards/rejected": -17.18167495727539, "step": 3399 }, { "epoch": 5.46, "learning_rate": 2.1868806975822431e-07, "logits/chosen": -1.5851092338562012, "logits/rejected": -1.5383484363555908, "logps/chosen": -117.46080017089844, "logps/rejected": -273.9396057128906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.9035251140594482, "rewards/margins": 14.74166202545166, "rewards/rejected": -18.645187377929688, "step": 3400 }, { "epoch": 5.46, "learning_rate": 2.1858898137138327e-07, "logits/chosen": -1.4096810817718506, "logits/rejected": -1.4691529273986816, "logps/chosen": -188.3643798828125, "logps/rejected": -331.3023681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.140613555908203, "rewards/margins": 13.731876373291016, "rewards/rejected": -22.87248992919922, "step": 3401 }, { "epoch": 5.46, "learning_rate": 2.184898929845422e-07, "logits/chosen": -1.6868270635604858, "logits/rejected": -1.5930402278900146, "logps/chosen": -161.64739990234375, "logps/rejected": -291.76275634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.854515075683594, "rewards/margins": 15.682470321655273, "rewards/rejected": -21.536985397338867, "step": 3402 }, { "epoch": 5.46, "learning_rate": 2.1839080459770114e-07, "logits/chosen": -1.3915770053863525, "logits/rejected": -1.3953169584274292, "logps/chosen": -97.4369125366211, "logps/rejected": -196.7766571044922, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.489727020263672, "rewards/margins": 9.68229866027832, "rewards/rejected": -14.172026634216309, "step": 3403 }, { "epoch": 5.46, "learning_rate": 2.1829171621086007e-07, "logits/chosen": -1.530609130859375, "logits/rejected": -1.564410924911499, "logps/chosen": -197.47984313964844, "logps/rejected": -284.98614501953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.315011024475098, "rewards/margins": 9.376608848571777, "rewards/rejected": -18.691619873046875, "step": 3404 }, { "epoch": 5.47, "learning_rate": 2.18192627824019e-07, "logits/chosen": -1.5130506753921509, "logits/rejected": -1.541816234588623, "logps/chosen": -169.7334747314453, "logps/rejected": -274.76068115234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.375558853149414, "rewards/margins": 10.44033432006836, "rewards/rejected": -18.815893173217773, "step": 3405 }, { "epoch": 5.47, "learning_rate": 2.1809353943717797e-07, "logits/chosen": -1.4647538661956787, "logits/rejected": -1.5671467781066895, "logps/chosen": -118.93898010253906, "logps/rejected": -298.5596923828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.821059226989746, "rewards/margins": 14.004740715026855, "rewards/rejected": -19.825801849365234, "step": 3406 }, { "epoch": 5.47, "learning_rate": 2.179944510503369e-07, "logits/chosen": -1.4423624277114868, "logits/rejected": -1.4711577892303467, "logps/chosen": -145.74716186523438, "logps/rejected": -269.322998046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.104604721069336, "rewards/margins": 10.80618667602539, "rewards/rejected": -16.910791397094727, "step": 3407 }, { "epoch": 5.47, "learning_rate": 2.1789536266349583e-07, "logits/chosen": -1.4702683687210083, "logits/rejected": -1.4496498107910156, "logps/chosen": -127.46072387695312, "logps/rejected": -237.20611572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.806929588317871, "rewards/margins": 12.100058555603027, "rewards/rejected": -16.9069881439209, "step": 3408 }, { "epoch": 5.47, "learning_rate": 2.1779627427665477e-07, "logits/chosen": -1.4614458084106445, "logits/rejected": -1.4422098398208618, "logps/chosen": -150.91543579101562, "logps/rejected": -260.36627197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.6605634689331055, "rewards/margins": 10.555765151977539, "rewards/rejected": -17.216327667236328, "step": 3409 }, { "epoch": 5.47, "learning_rate": 2.176971858898137e-07, "logits/chosen": -1.4418106079101562, "logits/rejected": -1.4834110736846924, "logps/chosen": -162.11143493652344, "logps/rejected": -253.22183227539062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.522298812866211, "rewards/margins": 8.737313270568848, "rewards/rejected": -17.259613037109375, "step": 3410 }, { "epoch": 5.48, "learning_rate": 2.1759809750297263e-07, "logits/chosen": -1.3720133304595947, "logits/rejected": -1.4534249305725098, "logps/chosen": -148.52078247070312, "logps/rejected": -326.36737060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.168240070343018, "rewards/margins": 16.55620574951172, "rewards/rejected": -22.724445343017578, "step": 3411 }, { "epoch": 5.48, "learning_rate": 2.174990091161316e-07, "logits/chosen": -1.443222999572754, "logits/rejected": -1.4689271450042725, "logps/chosen": -117.4993896484375, "logps/rejected": -213.51210021972656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.965090274810791, "rewards/margins": 10.409432411193848, "rewards/rejected": -15.37452220916748, "step": 3412 }, { "epoch": 5.48, "learning_rate": 2.173999207292905e-07, "logits/chosen": -1.5441243648529053, "logits/rejected": -1.501905083656311, "logps/chosen": -133.42288208007812, "logps/rejected": -249.25625610351562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.117092132568359, "rewards/margins": 13.081611633300781, "rewards/rejected": -17.19870376586914, "step": 3413 }, { "epoch": 5.48, "learning_rate": 2.1730083234244946e-07, "logits/chosen": -1.524592638015747, "logits/rejected": -1.4863003492355347, "logps/chosen": -102.32524108886719, "logps/rejected": -273.52764892578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.379734516143799, "rewards/margins": 15.495105743408203, "rewards/rejected": -18.874839782714844, "step": 3414 }, { "epoch": 5.48, "learning_rate": 2.172017439556084e-07, "logits/chosen": -1.3711071014404297, "logits/rejected": -1.5472300052642822, "logps/chosen": -155.7696533203125, "logps/rejected": -295.6031799316406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.725275993347168, "rewards/margins": 12.450275421142578, "rewards/rejected": -20.17555046081543, "step": 3415 }, { "epoch": 5.48, "learning_rate": 2.1710265556876733e-07, "logits/chosen": -1.3435040712356567, "logits/rejected": -1.3740893602371216, "logps/chosen": -107.17675018310547, "logps/rejected": -239.00540161132812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.672761917114258, "rewards/margins": 11.973121643066406, "rewards/rejected": -16.64588165283203, "step": 3416 }, { "epoch": 5.48, "learning_rate": 2.1700356718192629e-07, "logits/chosen": -1.3675215244293213, "logits/rejected": -1.3907420635223389, "logps/chosen": -145.81846618652344, "logps/rejected": -292.1810302734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.917076110839844, "rewards/margins": 14.353343963623047, "rewards/rejected": -21.270421981811523, "step": 3417 }, { "epoch": 5.49, "learning_rate": 2.169044787950852e-07, "logits/chosen": -1.454232931137085, "logits/rejected": -1.4496525526046753, "logps/chosen": -192.88731384277344, "logps/rejected": -301.1120910644531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.94334602355957, "rewards/margins": 9.59316635131836, "rewards/rejected": -19.53651237487793, "step": 3418 }, { "epoch": 5.49, "learning_rate": 2.1680539040824413e-07, "logits/chosen": -1.4443333148956299, "logits/rejected": -1.5949188470840454, "logps/chosen": -163.4739990234375, "logps/rejected": -267.27728271484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.709090232849121, "rewards/margins": 8.831838607788086, "rewards/rejected": -16.540929794311523, "step": 3419 }, { "epoch": 5.49, "learning_rate": 2.1670630202140309e-07, "logits/chosen": -1.4827373027801514, "logits/rejected": -1.494735598564148, "logps/chosen": -114.93074035644531, "logps/rejected": -264.31060791015625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -4.187319755554199, "rewards/margins": 13.390203475952148, "rewards/rejected": -17.577524185180664, "step": 3420 }, { "epoch": 5.49, "learning_rate": 2.1660721363456202e-07, "logits/chosen": -1.5603173971176147, "logits/rejected": -1.5269737243652344, "logps/chosen": -126.33602905273438, "logps/rejected": -245.9889373779297, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.3714752197265625, "rewards/margins": 11.540634155273438, "rewards/rejected": -16.912109375, "step": 3421 }, { "epoch": 5.49, "learning_rate": 2.1650812524772098e-07, "logits/chosen": -1.3887673616409302, "logits/rejected": -1.4665441513061523, "logps/chosen": -154.67999267578125, "logps/rejected": -293.91424560546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.805274486541748, "rewards/margins": 13.488457679748535, "rewards/rejected": -20.293731689453125, "step": 3422 }, { "epoch": 5.49, "learning_rate": 2.1640903686087989e-07, "logits/chosen": -1.5458590984344482, "logits/rejected": -1.56844162940979, "logps/chosen": -158.26779174804688, "logps/rejected": -304.8733215332031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.423677444458008, "rewards/margins": 14.188054084777832, "rewards/rejected": -21.611732482910156, "step": 3423 }, { "epoch": 5.5, "learning_rate": 2.1630994847403882e-07, "logits/chosen": -1.4092642068862915, "logits/rejected": -1.566103219985962, "logps/chosen": -139.1046600341797, "logps/rejected": -322.3624572753906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.243317604064941, "rewards/margins": 14.497196197509766, "rewards/rejected": -20.74051284790039, "step": 3424 }, { "epoch": 5.5, "learning_rate": 2.1621086008719778e-07, "logits/chosen": -1.4919686317443848, "logits/rejected": -1.5348799228668213, "logps/chosen": -135.2732391357422, "logps/rejected": -253.70298767089844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.664247512817383, "rewards/margins": 11.335546493530273, "rewards/rejected": -17.999794006347656, "step": 3425 }, { "epoch": 5.5, "learning_rate": 2.161117717003567e-07, "logits/chosen": -1.484498381614685, "logits/rejected": -1.5397371053695679, "logps/chosen": -141.9759063720703, "logps/rejected": -303.1224365234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.133430004119873, "rewards/margins": 14.223925590515137, "rewards/rejected": -20.35735511779785, "step": 3426 }, { "epoch": 5.5, "learning_rate": 2.1601268331351567e-07, "logits/chosen": -1.4503648281097412, "logits/rejected": -1.4295748472213745, "logps/chosen": -160.02755737304688, "logps/rejected": -266.31207275390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.710765838623047, "rewards/margins": 11.482879638671875, "rewards/rejected": -18.193645477294922, "step": 3427 }, { "epoch": 5.5, "learning_rate": 2.1591359492667458e-07, "logits/chosen": -1.5500283241271973, "logits/rejected": -1.6177650690078735, "logps/chosen": -143.53424072265625, "logps/rejected": -283.0515441894531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.169302463531494, "rewards/margins": 13.649847984313965, "rewards/rejected": -19.819149017333984, "step": 3428 }, { "epoch": 5.5, "learning_rate": 2.158145065398335e-07, "logits/chosen": -1.4650323390960693, "logits/rejected": -1.4889445304870605, "logps/chosen": -169.26470947265625, "logps/rejected": -296.1282958984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.847884178161621, "rewards/margins": 12.830238342285156, "rewards/rejected": -19.67812156677246, "step": 3429 }, { "epoch": 5.51, "learning_rate": 2.1571541815299247e-07, "logits/chosen": -1.5286608934402466, "logits/rejected": -1.6418735980987549, "logps/chosen": -188.9846649169922, "logps/rejected": -297.3423156738281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.809849739074707, "rewards/margins": 9.70708179473877, "rewards/rejected": -18.51693344116211, "step": 3430 }, { "epoch": 5.51, "learning_rate": 2.156163297661514e-07, "logits/chosen": -1.4271926879882812, "logits/rejected": -1.3851617574691772, "logps/chosen": -161.28384399414062, "logps/rejected": -250.90151977539062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.6663994789123535, "rewards/margins": 12.147462844848633, "rewards/rejected": -16.813861846923828, "step": 3431 }, { "epoch": 5.51, "learning_rate": 2.155172413793103e-07, "logits/chosen": -1.7137434482574463, "logits/rejected": -1.7398827075958252, "logps/chosen": -157.59165954589844, "logps/rejected": -268.9953308105469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.546286582946777, "rewards/margins": 11.670187950134277, "rewards/rejected": -18.216474533081055, "step": 3432 }, { "epoch": 5.51, "learning_rate": 2.1541815299246927e-07, "logits/chosen": -1.534991979598999, "logits/rejected": -1.4968020915985107, "logps/chosen": -105.90400695800781, "logps/rejected": -226.97796630859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.785414695739746, "rewards/margins": 12.468313217163086, "rewards/rejected": -16.25372886657715, "step": 3433 }, { "epoch": 5.51, "learning_rate": 2.153190646056282e-07, "logits/chosen": -1.4106501340866089, "logits/rejected": -1.4255609512329102, "logps/chosen": -176.66073608398438, "logps/rejected": -274.36572265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.97189712524414, "rewards/margins": 10.777088165283203, "rewards/rejected": -19.748985290527344, "step": 3434 }, { "epoch": 5.51, "learning_rate": 2.1521997621878716e-07, "logits/chosen": -1.7300175428390503, "logits/rejected": -1.6854029893875122, "logps/chosen": -128.77200317382812, "logps/rejected": -273.62225341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.551894187927246, "rewards/margins": 14.43295669555664, "rewards/rejected": -19.984851837158203, "step": 3435 }, { "epoch": 5.52, "learning_rate": 2.151208878319461e-07, "logits/chosen": -1.4227174520492554, "logits/rejected": -1.4231430292129517, "logps/chosen": -144.3121337890625, "logps/rejected": -311.919189453125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.6403489112854, "rewards/margins": 16.390661239624023, "rewards/rejected": -22.031009674072266, "step": 3436 }, { "epoch": 5.52, "learning_rate": 2.15021799445105e-07, "logits/chosen": -1.440816879272461, "logits/rejected": -1.4553241729736328, "logps/chosen": -114.87913513183594, "logps/rejected": -227.59336853027344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.729645252227783, "rewards/margins": 11.623116493225098, "rewards/rejected": -15.352761268615723, "step": 3437 }, { "epoch": 5.52, "learning_rate": 2.1492271105826396e-07, "logits/chosen": -1.4858475923538208, "logits/rejected": -1.4217169284820557, "logps/chosen": -209.4885711669922, "logps/rejected": -283.1610412597656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.446978569030762, "rewards/margins": 9.776723861694336, "rewards/rejected": -18.223703384399414, "step": 3438 }, { "epoch": 5.52, "learning_rate": 2.148236226714229e-07, "logits/chosen": -1.5462822914123535, "logits/rejected": -1.4937914609909058, "logps/chosen": -163.20535278320312, "logps/rejected": -295.1076354980469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.032345771789551, "rewards/margins": 12.072263717651367, "rewards/rejected": -18.1046085357666, "step": 3439 }, { "epoch": 5.52, "learning_rate": 2.1472453428458183e-07, "logits/chosen": -1.561997413635254, "logits/rejected": -1.5506079196929932, "logps/chosen": -162.46746826171875, "logps/rejected": -277.029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.701944351196289, "rewards/margins": 12.85633659362793, "rewards/rejected": -19.55828094482422, "step": 3440 }, { "epoch": 5.52, "learning_rate": 2.146254458977408e-07, "logits/chosen": -1.5318663120269775, "logits/rejected": -1.5665340423583984, "logps/chosen": -160.5145263671875, "logps/rejected": -339.3807373046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.1625776290893555, "rewards/margins": 16.76846694946289, "rewards/rejected": -23.931041717529297, "step": 3441 }, { "epoch": 5.52, "learning_rate": 2.145263575108997e-07, "logits/chosen": -1.5023705959320068, "logits/rejected": -1.4268518686294556, "logps/chosen": -231.8016815185547, "logps/rejected": -264.9580383300781, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.721844673156738, "rewards/margins": 8.513740539550781, "rewards/rejected": -17.235586166381836, "step": 3442 }, { "epoch": 5.53, "learning_rate": 2.1442726912405866e-07, "logits/chosen": -1.5287936925888062, "logits/rejected": -1.4619197845458984, "logps/chosen": -182.6705780029297, "logps/rejected": -293.1990966796875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -8.571776390075684, "rewards/margins": 12.777178764343262, "rewards/rejected": -21.348955154418945, "step": 3443 }, { "epoch": 5.53, "learning_rate": 2.143281807372176e-07, "logits/chosen": -1.4586869478225708, "logits/rejected": -1.497658371925354, "logps/chosen": -149.40089416503906, "logps/rejected": -286.95098876953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.645305633544922, "rewards/margins": 13.105929374694824, "rewards/rejected": -19.75123405456543, "step": 3444 }, { "epoch": 5.53, "learning_rate": 2.1422909235037652e-07, "logits/chosen": -1.6375882625579834, "logits/rejected": -1.662219524383545, "logps/chosen": -119.6922607421875, "logps/rejected": -257.5961608886719, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.347027778625488, "rewards/margins": 13.006986618041992, "rewards/rejected": -17.354015350341797, "step": 3445 }, { "epoch": 5.53, "learning_rate": 2.1413000396353548e-07, "logits/chosen": -1.3750144243240356, "logits/rejected": -1.518882393836975, "logps/chosen": -113.49573516845703, "logps/rejected": -245.63491821289062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.284154891967773, "rewards/margins": 10.61517333984375, "rewards/rejected": -15.899328231811523, "step": 3446 }, { "epoch": 5.53, "learning_rate": 2.140309155766944e-07, "logits/chosen": -1.420674443244934, "logits/rejected": -1.4245469570159912, "logps/chosen": -146.1182861328125, "logps/rejected": -256.2110595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.880023002624512, "rewards/margins": 10.483912467956543, "rewards/rejected": -17.363935470581055, "step": 3447 }, { "epoch": 5.53, "learning_rate": 2.1393182718985332e-07, "logits/chosen": -1.4224376678466797, "logits/rejected": -1.4667400121688843, "logps/chosen": -157.93873596191406, "logps/rejected": -319.5220642089844, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -7.863391876220703, "rewards/margins": 14.624561309814453, "rewards/rejected": -22.487951278686523, "step": 3448 }, { "epoch": 5.54, "learning_rate": 2.1383273880301228e-07, "logits/chosen": -1.5504735708236694, "logits/rejected": -1.6136361360549927, "logps/chosen": -126.59565734863281, "logps/rejected": -279.4231262207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.845088958740234, "rewards/margins": 12.714363098144531, "rewards/rejected": -18.559452056884766, "step": 3449 }, { "epoch": 5.54, "learning_rate": 2.1373365041617122e-07, "logits/chosen": -1.475582480430603, "logits/rejected": -1.4415696859359741, "logps/chosen": -133.4188995361328, "logps/rejected": -290.6744079589844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.813361644744873, "rewards/margins": 16.151216506958008, "rewards/rejected": -20.964580535888672, "step": 3450 }, { "epoch": 5.54, "learning_rate": 2.1363456202933015e-07, "logits/chosen": -1.4117300510406494, "logits/rejected": -1.483047604560852, "logps/chosen": -152.42173767089844, "logps/rejected": -312.80426025390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.858860015869141, "rewards/margins": 15.626880645751953, "rewards/rejected": -22.485740661621094, "step": 3451 }, { "epoch": 5.54, "learning_rate": 2.1353547364248908e-07, "logits/chosen": -1.4515260457992554, "logits/rejected": -1.4625074863433838, "logps/chosen": -136.45709228515625, "logps/rejected": -269.1407470703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.452543258666992, "rewards/margins": 12.845173835754395, "rewards/rejected": -18.29771614074707, "step": 3452 }, { "epoch": 5.54, "learning_rate": 2.1343638525564802e-07, "logits/chosen": -1.5901306867599487, "logits/rejected": -1.6348779201507568, "logps/chosen": -230.0763397216797, "logps/rejected": -355.6812744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.871137619018555, "rewards/margins": 10.222611427307129, "rewards/rejected": -22.09375, "step": 3453 }, { "epoch": 5.54, "learning_rate": 2.1333729686880698e-07, "logits/chosen": -1.4135034084320068, "logits/rejected": -1.4534860849380493, "logps/chosen": -115.4327392578125, "logps/rejected": -229.9302978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.28120756149292, "rewards/margins": 10.383345603942871, "rewards/rejected": -15.664552688598633, "step": 3454 }, { "epoch": 5.55, "learning_rate": 2.132382084819659e-07, "logits/chosen": -1.545456886291504, "logits/rejected": -1.5549765825271606, "logps/chosen": -154.99961853027344, "logps/rejected": -304.4394836425781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.052010536193848, "rewards/margins": 12.902388572692871, "rewards/rejected": -20.95439910888672, "step": 3455 }, { "epoch": 5.55, "learning_rate": 2.1313912009512484e-07, "logits/chosen": -1.513218641281128, "logits/rejected": -1.4968972206115723, "logps/chosen": -169.7577667236328, "logps/rejected": -305.21429443359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.250058174133301, "rewards/margins": 14.359582901000977, "rewards/rejected": -20.60964012145996, "step": 3456 }, { "epoch": 5.55, "learning_rate": 2.1304003170828378e-07, "logits/chosen": -1.4491702318191528, "logits/rejected": -1.4875330924987793, "logps/chosen": -135.46507263183594, "logps/rejected": -266.0128173828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.584984302520752, "rewards/margins": 12.297099113464355, "rewards/rejected": -16.882083892822266, "step": 3457 }, { "epoch": 5.55, "learning_rate": 2.129409433214427e-07, "logits/chosen": -1.4234471321105957, "logits/rejected": -1.5615057945251465, "logps/chosen": -165.02728271484375, "logps/rejected": -291.1048278808594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.628448486328125, "rewards/margins": 10.694999694824219, "rewards/rejected": -19.323448181152344, "step": 3458 }, { "epoch": 5.55, "learning_rate": 2.1284185493460167e-07, "logits/chosen": -1.5023926496505737, "logits/rejected": -1.5145243406295776, "logps/chosen": -166.86395263671875, "logps/rejected": -322.5313720703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.474535942077637, "rewards/margins": 13.365673065185547, "rewards/rejected": -21.840208053588867, "step": 3459 }, { "epoch": 5.55, "learning_rate": 2.127427665477606e-07, "logits/chosen": -1.4995983839035034, "logits/rejected": -1.6318519115447998, "logps/chosen": -147.55908203125, "logps/rejected": -263.0432434082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.746371746063232, "rewards/margins": 9.30437183380127, "rewards/rejected": -17.050743103027344, "step": 3460 }, { "epoch": 5.56, "learning_rate": 2.126436781609195e-07, "logits/chosen": -1.4209668636322021, "logits/rejected": -1.4657387733459473, "logps/chosen": -199.90377807617188, "logps/rejected": -326.2291259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.882694244384766, "rewards/margins": 12.913928031921387, "rewards/rejected": -22.79662322998047, "step": 3461 }, { "epoch": 5.56, "learning_rate": 2.1254458977407847e-07, "logits/chosen": -1.510312557220459, "logits/rejected": -1.526473045349121, "logps/chosen": -104.86387634277344, "logps/rejected": -236.08953857421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.114480495452881, "rewards/margins": 12.498226165771484, "rewards/rejected": -16.612707138061523, "step": 3462 }, { "epoch": 5.56, "learning_rate": 2.124455013872374e-07, "logits/chosen": -1.6254152059555054, "logits/rejected": -1.5807161331176758, "logps/chosen": -172.88888549804688, "logps/rejected": -278.616943359375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -6.6392130851745605, "rewards/margins": 12.036291122436523, "rewards/rejected": -18.67550277709961, "step": 3463 }, { "epoch": 5.56, "learning_rate": 2.1234641300039636e-07, "logits/chosen": -1.580838918685913, "logits/rejected": -1.449442744255066, "logps/chosen": -167.49075317382812, "logps/rejected": -256.189453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.328787803649902, "rewards/margins": 10.541651725769043, "rewards/rejected": -17.870439529418945, "step": 3464 }, { "epoch": 5.56, "learning_rate": 2.122473246135553e-07, "logits/chosen": -1.467173457145691, "logits/rejected": -1.42405366897583, "logps/chosen": -128.48675537109375, "logps/rejected": -219.92221069335938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.125523567199707, "rewards/margins": 10.216520309448242, "rewards/rejected": -14.34204387664795, "step": 3465 }, { "epoch": 5.56, "learning_rate": 2.121482362267142e-07, "logits/chosen": -1.536937952041626, "logits/rejected": -1.5816075801849365, "logps/chosen": -120.34854125976562, "logps/rejected": -264.47314453125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.967096328735352, "rewards/margins": 13.329809188842773, "rewards/rejected": -18.296905517578125, "step": 3466 }, { "epoch": 5.57, "learning_rate": 2.1204914783987316e-07, "logits/chosen": -1.5219027996063232, "logits/rejected": -1.5078930854797363, "logps/chosen": -163.04747009277344, "logps/rejected": -305.27471923828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.313136100769043, "rewards/margins": 13.732112884521484, "rewards/rejected": -21.045249938964844, "step": 3467 }, { "epoch": 5.57, "learning_rate": 2.119500594530321e-07, "logits/chosen": -1.588708758354187, "logits/rejected": -1.538028359413147, "logps/chosen": -175.19378662109375, "logps/rejected": -268.5478820800781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.663297653198242, "rewards/margins": 9.261184692382812, "rewards/rejected": -17.924482345581055, "step": 3468 }, { "epoch": 5.57, "learning_rate": 2.1185097106619106e-07, "logits/chosen": -1.5705335140228271, "logits/rejected": -1.5216724872589111, "logps/chosen": -165.31146240234375, "logps/rejected": -282.43084716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5694098472595215, "rewards/margins": 12.201087951660156, "rewards/rejected": -18.770496368408203, "step": 3469 }, { "epoch": 5.57, "learning_rate": 2.1175188267934996e-07, "logits/chosen": -1.480363368988037, "logits/rejected": -1.4656383991241455, "logps/chosen": -190.98443603515625, "logps/rejected": -305.0986328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.11679458618164, "rewards/margins": 12.317111015319824, "rewards/rejected": -21.43390464782715, "step": 3470 }, { "epoch": 5.57, "learning_rate": 2.116527942925089e-07, "logits/chosen": -1.619957685470581, "logits/rejected": -1.546367883682251, "logps/chosen": -153.62782287597656, "logps/rejected": -268.7706298828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.356766700744629, "rewards/margins": 12.042521476745605, "rewards/rejected": -18.399288177490234, "step": 3471 }, { "epoch": 5.57, "learning_rate": 2.1155370590566785e-07, "logits/chosen": -1.6157139539718628, "logits/rejected": -1.5353957414627075, "logps/chosen": -129.51522827148438, "logps/rejected": -270.95965576171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2077975273132324, "rewards/margins": 14.3507661819458, "rewards/rejected": -17.558563232421875, "step": 3472 }, { "epoch": 5.57, "learning_rate": 2.114546175188268e-07, "logits/chosen": -1.411192536354065, "logits/rejected": -1.4063540697097778, "logps/chosen": -172.2589111328125, "logps/rejected": -272.8524475097656, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.989190101623535, "rewards/margins": 10.653397560119629, "rewards/rejected": -18.642587661743164, "step": 3473 }, { "epoch": 5.58, "learning_rate": 2.1135552913198572e-07, "logits/chosen": -1.598246693611145, "logits/rejected": -1.4570465087890625, "logps/chosen": -151.0003204345703, "logps/rejected": -236.34561157226562, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.301996231079102, "rewards/margins": 10.25024127960205, "rewards/rejected": -16.55223846435547, "step": 3474 }, { "epoch": 5.58, "learning_rate": 2.1125644074514465e-07, "logits/chosen": -1.6418222188949585, "logits/rejected": -1.6030536890029907, "logps/chosen": -101.15557098388672, "logps/rejected": -223.5531463623047, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.030444622039795, "rewards/margins": 13.175436019897461, "rewards/rejected": -16.205881118774414, "step": 3475 }, { "epoch": 5.58, "learning_rate": 2.111573523583036e-07, "logits/chosen": -1.572303056716919, "logits/rejected": -1.4535349607467651, "logps/chosen": -183.78753662109375, "logps/rejected": -268.50311279296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.888599395751953, "rewards/margins": 10.309011459350586, "rewards/rejected": -19.19761085510254, "step": 3476 }, { "epoch": 5.58, "learning_rate": 2.1105826397146255e-07, "logits/chosen": -1.3945050239562988, "logits/rejected": -1.413754940032959, "logps/chosen": -154.4713134765625, "logps/rejected": -276.90478515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -8.282293319702148, "rewards/margins": 10.809566497802734, "rewards/rejected": -19.091859817504883, "step": 3477 }, { "epoch": 5.58, "learning_rate": 2.1095917558462148e-07, "logits/chosen": -1.4268770217895508, "logits/rejected": -1.377671241760254, "logps/chosen": -149.87142944335938, "logps/rejected": -231.5007781982422, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.529449462890625, "rewards/margins": 10.307394027709961, "rewards/rejected": -16.836843490600586, "step": 3478 }, { "epoch": 5.58, "learning_rate": 2.1086008719778041e-07, "logits/chosen": -1.524371862411499, "logits/rejected": -1.5602476596832275, "logps/chosen": -153.4820098876953, "logps/rejected": -267.6202697753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.067951202392578, "rewards/margins": 9.18103313446045, "rewards/rejected": -17.24898338317871, "step": 3479 }, { "epoch": 5.59, "learning_rate": 2.1076099881093935e-07, "logits/chosen": -1.5817943811416626, "logits/rejected": -1.513641595840454, "logps/chosen": -177.39862060546875, "logps/rejected": -269.17144775390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.720231056213379, "rewards/margins": 11.374757766723633, "rewards/rejected": -19.094989776611328, "step": 3480 }, { "epoch": 5.59, "learning_rate": 2.1066191042409828e-07, "logits/chosen": -1.506537675857544, "logits/rejected": -1.5210570096969604, "logps/chosen": -141.04534912109375, "logps/rejected": -242.78671264648438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.220089912414551, "rewards/margins": 9.612577438354492, "rewards/rejected": -14.83266830444336, "step": 3481 }, { "epoch": 5.59, "learning_rate": 2.1056282203725721e-07, "logits/chosen": -1.555442452430725, "logits/rejected": -1.6271581649780273, "logps/chosen": -144.35475158691406, "logps/rejected": -296.9488830566406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.988823890686035, "rewards/margins": 14.041683197021484, "rewards/rejected": -20.030508041381836, "step": 3482 }, { "epoch": 5.59, "learning_rate": 2.1046373365041617e-07, "logits/chosen": -1.6058940887451172, "logits/rejected": -1.6205238103866577, "logps/chosen": -147.2259063720703, "logps/rejected": -277.75469970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.05565071105957, "rewards/margins": 14.026470184326172, "rewards/rejected": -20.082120895385742, "step": 3483 }, { "epoch": 5.59, "learning_rate": 2.1036464526357508e-07, "logits/chosen": -1.467818021774292, "logits/rejected": -1.5783430337905884, "logps/chosen": -159.65179443359375, "logps/rejected": -281.60931396484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.15143871307373, "rewards/margins": 11.02839183807373, "rewards/rejected": -19.179832458496094, "step": 3484 }, { "epoch": 5.59, "learning_rate": 2.1026555687673404e-07, "logits/chosen": -1.5809314250946045, "logits/rejected": -1.5349500179290771, "logps/chosen": -151.61505126953125, "logps/rejected": -244.9976348876953, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.910013198852539, "rewards/margins": 11.858074188232422, "rewards/rejected": -17.76808738708496, "step": 3485 }, { "epoch": 5.6, "learning_rate": 2.1016646848989297e-07, "logits/chosen": -1.4671688079833984, "logits/rejected": -1.4165728092193604, "logps/chosen": -161.30531311035156, "logps/rejected": -294.05560302734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.514042377471924, "rewards/margins": 12.113029479980469, "rewards/rejected": -19.627071380615234, "step": 3486 }, { "epoch": 5.6, "learning_rate": 2.100673801030519e-07, "logits/chosen": -1.4841006994247437, "logits/rejected": -1.4929653406143188, "logps/chosen": -147.3690185546875, "logps/rejected": -285.8001708984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.313442230224609, "rewards/margins": 12.737858772277832, "rewards/rejected": -19.051300048828125, "step": 3487 }, { "epoch": 5.6, "learning_rate": 2.0996829171621087e-07, "logits/chosen": -1.4345812797546387, "logits/rejected": -1.5191748142242432, "logps/chosen": -161.46340942382812, "logps/rejected": -255.54258728027344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.819353103637695, "rewards/margins": 9.715951919555664, "rewards/rejected": -16.53530502319336, "step": 3488 }, { "epoch": 5.6, "learning_rate": 2.0986920332936977e-07, "logits/chosen": -1.441930890083313, "logits/rejected": -1.4702404737472534, "logps/chosen": -170.82797241210938, "logps/rejected": -305.4510498046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.844261646270752, "rewards/margins": 11.98133659362793, "rewards/rejected": -19.825599670410156, "step": 3489 }, { "epoch": 5.6, "learning_rate": 2.097701149425287e-07, "logits/chosen": -1.537670612335205, "logits/rejected": -1.4510598182678223, "logps/chosen": -163.748779296875, "logps/rejected": -318.516845703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.953666687011719, "rewards/margins": 14.541647911071777, "rewards/rejected": -21.495315551757812, "step": 3490 }, { "epoch": 5.6, "learning_rate": 2.0967102655568767e-07, "logits/chosen": -1.622631549835205, "logits/rejected": -1.6390390396118164, "logps/chosen": -141.270263671875, "logps/rejected": -254.4559326171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.802359580993652, "rewards/margins": 11.843047142028809, "rewards/rejected": -16.64540672302246, "step": 3491 }, { "epoch": 5.61, "learning_rate": 2.095719381688466e-07, "logits/chosen": -1.449061393737793, "logits/rejected": -1.5777435302734375, "logps/chosen": -128.67315673828125, "logps/rejected": -268.0546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.2907233238220215, "rewards/margins": 11.80470085144043, "rewards/rejected": -18.09542465209961, "step": 3492 }, { "epoch": 5.61, "learning_rate": 2.0947284978200556e-07, "logits/chosen": -1.5823407173156738, "logits/rejected": -1.550660490989685, "logps/chosen": -146.87701416015625, "logps/rejected": -268.1518859863281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.771166801452637, "rewards/margins": 12.822327613830566, "rewards/rejected": -19.593494415283203, "step": 3493 }, { "epoch": 5.61, "learning_rate": 2.0937376139516447e-07, "logits/chosen": -1.622446060180664, "logits/rejected": -1.6589446067810059, "logps/chosen": -86.09461212158203, "logps/rejected": -205.93997192382812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.3844799995422363, "rewards/margins": 11.903199195861816, "rewards/rejected": -14.287679672241211, "step": 3494 }, { "epoch": 5.61, "learning_rate": 2.092746730083234e-07, "logits/chosen": -1.5718985795974731, "logits/rejected": -1.599583387374878, "logps/chosen": -170.6344757080078, "logps/rejected": -314.8659973144531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.866886138916016, "rewards/margins": 12.410504341125488, "rewards/rejected": -21.277389526367188, "step": 3495 }, { "epoch": 5.61, "learning_rate": 2.0917558462148236e-07, "logits/chosen": -1.3862360715866089, "logits/rejected": -1.3888206481933594, "logps/chosen": -169.1226043701172, "logps/rejected": -307.270263671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.87095832824707, "rewards/margins": 14.130465507507324, "rewards/rejected": -22.001422882080078, "step": 3496 }, { "epoch": 5.61, "learning_rate": 2.090764962346413e-07, "logits/chosen": -1.4592256546020508, "logits/rejected": -1.5071525573730469, "logps/chosen": -178.7654266357422, "logps/rejected": -272.4892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.426994323730469, "rewards/margins": 10.035850524902344, "rewards/rejected": -18.462844848632812, "step": 3497 }, { "epoch": 5.61, "learning_rate": 2.0897740784780025e-07, "logits/chosen": -1.4971421957015991, "logits/rejected": -1.5510032176971436, "logps/chosen": -171.96389770507812, "logps/rejected": -257.8359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.277139663696289, "rewards/margins": 8.066900253295898, "rewards/rejected": -16.344039916992188, "step": 3498 }, { "epoch": 5.62, "learning_rate": 2.0887831946095916e-07, "logits/chosen": -1.6488255262374878, "logits/rejected": -1.6917957067489624, "logps/chosen": -128.809814453125, "logps/rejected": -297.54876708984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.061434745788574, "rewards/margins": 15.731962203979492, "rewards/rejected": -20.79339599609375, "step": 3499 }, { "epoch": 5.62, "learning_rate": 2.087792310741181e-07, "logits/chosen": -1.657825231552124, "logits/rejected": -1.7020796537399292, "logps/chosen": -128.25462341308594, "logps/rejected": -291.19573974609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.090444564819336, "rewards/margins": 14.861099243164062, "rewards/rejected": -19.9515438079834, "step": 3500 }, { "epoch": 5.62, "learning_rate": 2.0868014268727705e-07, "logits/chosen": -1.5991452932357788, "logits/rejected": -1.637681007385254, "logps/chosen": -113.05462646484375, "logps/rejected": -255.38168334960938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.677969932556152, "rewards/margins": 14.873306274414062, "rewards/rejected": -19.5512752532959, "step": 3501 }, { "epoch": 5.62, "learning_rate": 2.0858105430043599e-07, "logits/chosen": -1.6034021377563477, "logits/rejected": -1.5931682586669922, "logps/chosen": -156.61996459960938, "logps/rejected": -280.9715270996094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.966739654541016, "rewards/margins": 12.928670883178711, "rewards/rejected": -19.895410537719727, "step": 3502 }, { "epoch": 5.62, "learning_rate": 2.084819659135949e-07, "logits/chosen": -1.457820177078247, "logits/rejected": -1.4800223112106323, "logps/chosen": -121.35319519042969, "logps/rejected": -276.4391174316406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.449268817901611, "rewards/margins": 15.169504165649414, "rewards/rejected": -20.618772506713867, "step": 3503 }, { "epoch": 5.62, "learning_rate": 2.0838287752675385e-07, "logits/chosen": -1.5091395378112793, "logits/rejected": -1.5735511779785156, "logps/chosen": -142.26956176757812, "logps/rejected": -306.8866271972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.00269889831543, "rewards/margins": 14.74393081665039, "rewards/rejected": -20.74662971496582, "step": 3504 }, { "epoch": 5.63, "learning_rate": 2.0828378913991279e-07, "logits/chosen": -1.378591537475586, "logits/rejected": -1.3285752534866333, "logps/chosen": -144.63565063476562, "logps/rejected": -275.3168029785156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.038185119628906, "rewards/margins": 12.782560348510742, "rewards/rejected": -19.82074546813965, "step": 3505 }, { "epoch": 5.63, "learning_rate": 2.0818470075307175e-07, "logits/chosen": -1.5825040340423584, "logits/rejected": -1.580885410308838, "logps/chosen": -142.89547729492188, "logps/rejected": -253.44285583496094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.151176452636719, "rewards/margins": 11.703563690185547, "rewards/rejected": -17.854740142822266, "step": 3506 }, { "epoch": 5.63, "learning_rate": 2.0808561236623068e-07, "logits/chosen": -1.4873921871185303, "logits/rejected": -1.5253534317016602, "logps/chosen": -143.24313354492188, "logps/rejected": -292.96026611328125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.160441875457764, "rewards/margins": 13.942279815673828, "rewards/rejected": -19.10272216796875, "step": 3507 }, { "epoch": 5.63, "learning_rate": 2.0798652397938959e-07, "logits/chosen": -1.5862098932266235, "logits/rejected": -1.6957588195800781, "logps/chosen": -176.39666748046875, "logps/rejected": -360.6898193359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.487279415130615, "rewards/margins": 15.480062484741211, "rewards/rejected": -22.967342376708984, "step": 3508 }, { "epoch": 5.63, "learning_rate": 2.0788743559254855e-07, "logits/chosen": -1.5357627868652344, "logits/rejected": -1.5881835222244263, "logps/chosen": -128.56817626953125, "logps/rejected": -267.74334716796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.929163455963135, "rewards/margins": 12.324718475341797, "rewards/rejected": -18.253883361816406, "step": 3509 }, { "epoch": 5.63, "learning_rate": 2.0778834720570748e-07, "logits/chosen": -1.6373027563095093, "logits/rejected": -1.5733379125595093, "logps/chosen": -176.00518798828125, "logps/rejected": -265.058349609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.064338684082031, "rewards/margins": 10.770269393920898, "rewards/rejected": -17.83460807800293, "step": 3510 }, { "epoch": 5.64, "learning_rate": 2.076892588188664e-07, "logits/chosen": -1.5358843803405762, "logits/rejected": -1.4863566160202026, "logps/chosen": -166.96798706054688, "logps/rejected": -229.59681701660156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.249853134155273, "rewards/margins": 9.163719177246094, "rewards/rejected": -15.413572311401367, "step": 3511 }, { "epoch": 5.64, "learning_rate": 2.0759017043202537e-07, "logits/chosen": -1.497086763381958, "logits/rejected": -1.5779889822006226, "logps/chosen": -169.27078247070312, "logps/rejected": -297.73388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.500387191772461, "rewards/margins": 12.184248924255371, "rewards/rejected": -20.68463706970215, "step": 3512 }, { "epoch": 5.64, "learning_rate": 2.0749108204518428e-07, "logits/chosen": -1.445956826210022, "logits/rejected": -1.4788063764572144, "logps/chosen": -161.59393310546875, "logps/rejected": -267.75799560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.866689682006836, "rewards/margins": 10.481036186218262, "rewards/rejected": -19.34772491455078, "step": 3513 }, { "epoch": 5.64, "learning_rate": 2.0739199365834324e-07, "logits/chosen": -1.5390739440917969, "logits/rejected": -1.5803804397583008, "logps/chosen": -155.83084106445312, "logps/rejected": -246.75985717773438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.244839191436768, "rewards/margins": 9.831026077270508, "rewards/rejected": -17.075864791870117, "step": 3514 }, { "epoch": 5.64, "learning_rate": 2.0729290527150217e-07, "logits/chosen": -1.5421901941299438, "logits/rejected": -1.5358848571777344, "logps/chosen": -141.08447265625, "logps/rejected": -272.2650451660156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.407374858856201, "rewards/margins": 12.155393600463867, "rewards/rejected": -17.562768936157227, "step": 3515 }, { "epoch": 5.64, "learning_rate": 2.071938168846611e-07, "logits/chosen": -1.6914560794830322, "logits/rejected": -1.6321383714675903, "logps/chosen": -138.86932373046875, "logps/rejected": -233.4820098876953, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.154097080230713, "rewards/margins": 11.60377025604248, "rewards/rejected": -15.757867813110352, "step": 3516 }, { "epoch": 5.65, "learning_rate": 2.0709472849782007e-07, "logits/chosen": -1.65492582321167, "logits/rejected": -1.6905113458633423, "logps/chosen": -139.6899871826172, "logps/rejected": -260.28607177734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.599881649017334, "rewards/margins": 12.029863357543945, "rewards/rejected": -18.629745483398438, "step": 3517 }, { "epoch": 5.65, "learning_rate": 2.0699564011097897e-07, "logits/chosen": -1.6711056232452393, "logits/rejected": -1.6952325105667114, "logps/chosen": -163.0201416015625, "logps/rejected": -262.00250244140625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -7.483802795410156, "rewards/margins": 8.725163459777832, "rewards/rejected": -16.208967208862305, "step": 3518 }, { "epoch": 5.65, "learning_rate": 2.0689655172413793e-07, "logits/chosen": -1.4953594207763672, "logits/rejected": -1.4675028324127197, "logps/chosen": -204.3321990966797, "logps/rejected": -318.9854431152344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -10.17468547821045, "rewards/margins": 12.923084259033203, "rewards/rejected": -23.097766876220703, "step": 3519 }, { "epoch": 5.65, "learning_rate": 2.0679746333729687e-07, "logits/chosen": -1.7349953651428223, "logits/rejected": -1.6611180305480957, "logps/chosen": -137.07705688476562, "logps/rejected": -262.3623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.503540515899658, "rewards/margins": 14.570038795471191, "rewards/rejected": -19.073579788208008, "step": 3520 }, { "epoch": 5.65, "learning_rate": 2.066983749504558e-07, "logits/chosen": -1.4682340621948242, "logits/rejected": -1.4893825054168701, "logps/chosen": -140.5184326171875, "logps/rejected": -271.0100402832031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.596077919006348, "rewards/margins": 13.537010192871094, "rewards/rejected": -18.133087158203125, "step": 3521 }, { "epoch": 5.65, "learning_rate": 2.0659928656361473e-07, "logits/chosen": -1.505333662033081, "logits/rejected": -1.4636342525482178, "logps/chosen": -196.52276611328125, "logps/rejected": -318.2760009765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.679302215576172, "rewards/margins": 11.629175186157227, "rewards/rejected": -22.30847930908203, "step": 3522 }, { "epoch": 5.65, "learning_rate": 2.0650019817677366e-07, "logits/chosen": -1.37777841091156, "logits/rejected": -1.4127016067504883, "logps/chosen": -99.77792358398438, "logps/rejected": -234.60333251953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.4538702964782715, "rewards/margins": 11.31854248046875, "rewards/rejected": -14.772412300109863, "step": 3523 }, { "epoch": 5.66, "learning_rate": 2.064011097899326e-07, "logits/chosen": -1.631746768951416, "logits/rejected": -1.5850729942321777, "logps/chosen": -141.7529296875, "logps/rejected": -258.9488830566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.887248992919922, "rewards/margins": 11.788074493408203, "rewards/rejected": -18.675325393676758, "step": 3524 }, { "epoch": 5.66, "learning_rate": 2.0630202140309156e-07, "logits/chosen": -1.5112628936767578, "logits/rejected": -1.6130366325378418, "logps/chosen": -154.795654296875, "logps/rejected": -316.2657470703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.618958473205566, "rewards/margins": 13.794021606445312, "rewards/rejected": -20.412979125976562, "step": 3525 }, { "epoch": 5.66, "learning_rate": 2.062029330162505e-07, "logits/chosen": -1.6076500415802002, "logits/rejected": -1.5037975311279297, "logps/chosen": -154.53564453125, "logps/rejected": -241.74484252929688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.068292617797852, "rewards/margins": 10.463736534118652, "rewards/rejected": -16.53203010559082, "step": 3526 }, { "epoch": 5.66, "learning_rate": 2.0610384462940942e-07, "logits/chosen": -1.461061716079712, "logits/rejected": -1.525355339050293, "logps/chosen": -155.22055053710938, "logps/rejected": -325.6090087890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.9952239990234375, "rewards/margins": 14.975289344787598, "rewards/rejected": -22.97051429748535, "step": 3527 }, { "epoch": 5.66, "learning_rate": 2.0600475624256836e-07, "logits/chosen": -1.7046408653259277, "logits/rejected": -1.6654388904571533, "logps/chosen": -144.3090362548828, "logps/rejected": -273.2762145996094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.3456645011901855, "rewards/margins": 13.871570587158203, "rewards/rejected": -18.217233657836914, "step": 3528 }, { "epoch": 5.66, "learning_rate": 2.059056678557273e-07, "logits/chosen": -1.5195531845092773, "logits/rejected": -1.5716814994812012, "logps/chosen": -154.99191284179688, "logps/rejected": -286.3543395996094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.913886070251465, "rewards/margins": 11.889086723327637, "rewards/rejected": -19.802974700927734, "step": 3529 }, { "epoch": 5.67, "learning_rate": 2.0580657946888625e-07, "logits/chosen": -1.6514383554458618, "logits/rejected": -1.5940759181976318, "logps/chosen": -149.56546020507812, "logps/rejected": -269.6033630371094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.9260735511779785, "rewards/margins": 12.607810020446777, "rewards/rejected": -20.533884048461914, "step": 3530 }, { "epoch": 5.67, "learning_rate": 2.0570749108204518e-07, "logits/chosen": -1.796550989151001, "logits/rejected": -1.7530274391174316, "logps/chosen": -125.2088851928711, "logps/rejected": -291.35943603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.228306770324707, "rewards/margins": 16.907188415527344, "rewards/rejected": -21.135496139526367, "step": 3531 }, { "epoch": 5.67, "learning_rate": 2.056084026952041e-07, "logits/chosen": -1.4930200576782227, "logits/rejected": -1.5153943300247192, "logps/chosen": -124.77980041503906, "logps/rejected": -284.3247375488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6466524600982666, "rewards/margins": 15.05573844909668, "rewards/rejected": -18.702390670776367, "step": 3532 }, { "epoch": 5.67, "learning_rate": 2.0550931430836305e-07, "logits/chosen": -1.4678759574890137, "logits/rejected": -1.4246290922164917, "logps/chosen": -170.02784729003906, "logps/rejected": -297.3772277832031, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -7.877201557159424, "rewards/margins": 13.11614990234375, "rewards/rejected": -20.993350982666016, "step": 3533 }, { "epoch": 5.67, "learning_rate": 2.0541022592152198e-07, "logits/chosen": -1.5578639507293701, "logits/rejected": -1.4947177171707153, "logps/chosen": -165.95391845703125, "logps/rejected": -293.1918640136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.643244743347168, "rewards/margins": 13.28227424621582, "rewards/rejected": -20.925518035888672, "step": 3534 }, { "epoch": 5.67, "learning_rate": 2.0531113753468094e-07, "logits/chosen": -1.3211886882781982, "logits/rejected": -1.3503947257995605, "logps/chosen": -160.87661743164062, "logps/rejected": -283.2664794921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.231307029724121, "rewards/margins": 12.0439453125, "rewards/rejected": -20.275251388549805, "step": 3535 }, { "epoch": 5.68, "learning_rate": 2.0521204914783988e-07, "logits/chosen": -1.5581319332122803, "logits/rejected": -1.4653578996658325, "logps/chosen": -200.51364135742188, "logps/rejected": -304.32989501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.42978286743164, "rewards/margins": 13.421276092529297, "rewards/rejected": -22.851058959960938, "step": 3536 }, { "epoch": 5.68, "learning_rate": 2.0511296076099878e-07, "logits/chosen": -1.618722915649414, "logits/rejected": -1.517574667930603, "logps/chosen": -185.39898681640625, "logps/rejected": -268.9796447753906, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -8.109620094299316, "rewards/margins": 9.910978317260742, "rewards/rejected": -18.020599365234375, "step": 3537 }, { "epoch": 5.68, "learning_rate": 2.0501387237415774e-07, "logits/chosen": -1.4455080032348633, "logits/rejected": -1.4987713098526, "logps/chosen": -118.1763687133789, "logps/rejected": -295.4964599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.477498531341553, "rewards/margins": 13.800302505493164, "rewards/rejected": -19.277801513671875, "step": 3538 }, { "epoch": 5.68, "learning_rate": 2.0491478398731668e-07, "logits/chosen": -1.4148435592651367, "logits/rejected": -1.5270719528198242, "logps/chosen": -178.26426696777344, "logps/rejected": -302.45404052734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.34427547454834, "rewards/margins": 11.370718002319336, "rewards/rejected": -19.71499252319336, "step": 3539 }, { "epoch": 5.68, "learning_rate": 2.0481569560047564e-07, "logits/chosen": -1.7418153285980225, "logits/rejected": -1.7663133144378662, "logps/chosen": -133.5062255859375, "logps/rejected": -294.9941711425781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.562310695648193, "rewards/margins": 14.592042922973633, "rewards/rejected": -19.154354095458984, "step": 3540 }, { "epoch": 5.68, "learning_rate": 2.0471660721363454e-07, "logits/chosen": -1.4859404563903809, "logits/rejected": -1.4931477308273315, "logps/chosen": -155.93174743652344, "logps/rejected": -336.06915283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.504201889038086, "rewards/margins": 15.88814640045166, "rewards/rejected": -21.392349243164062, "step": 3541 }, { "epoch": 5.69, "learning_rate": 2.0461751882679348e-07, "logits/chosen": -1.682798981666565, "logits/rejected": -1.7489967346191406, "logps/chosen": -149.91802978515625, "logps/rejected": -301.7533874511719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.797520637512207, "rewards/margins": 14.831779479980469, "rewards/rejected": -21.62929916381836, "step": 3542 }, { "epoch": 5.69, "learning_rate": 2.0451843043995244e-07, "logits/chosen": -1.6320478916168213, "logits/rejected": -1.7101695537567139, "logps/chosen": -131.40391540527344, "logps/rejected": -258.38043212890625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -6.378139495849609, "rewards/margins": 10.418229103088379, "rewards/rejected": -16.796369552612305, "step": 3543 }, { "epoch": 5.69, "learning_rate": 2.0441934205311137e-07, "logits/chosen": -1.4227283000946045, "logits/rejected": -1.4669923782348633, "logps/chosen": -138.54624938964844, "logps/rejected": -262.70660400390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.704582214355469, "rewards/margins": 11.912574768066406, "rewards/rejected": -18.617156982421875, "step": 3544 }, { "epoch": 5.69, "learning_rate": 2.043202536662703e-07, "logits/chosen": -1.4847792387008667, "logits/rejected": -1.5021198987960815, "logps/chosen": -139.29275512695312, "logps/rejected": -274.85467529296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.649136066436768, "rewards/margins": 12.453357696533203, "rewards/rejected": -18.102493286132812, "step": 3545 }, { "epoch": 5.69, "learning_rate": 2.0422116527942924e-07, "logits/chosen": -1.5052356719970703, "logits/rejected": -1.5876795053482056, "logps/chosen": -179.66453552246094, "logps/rejected": -342.5696716308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.686628341674805, "rewards/margins": 15.56988525390625, "rewards/rejected": -24.256515502929688, "step": 3546 }, { "epoch": 5.69, "learning_rate": 2.0412207689258817e-07, "logits/chosen": -1.6352779865264893, "logits/rejected": -1.648259162902832, "logps/chosen": -131.49856567382812, "logps/rejected": -241.78872680664062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.069862365722656, "rewards/margins": 9.054788589477539, "rewards/rejected": -14.124650955200195, "step": 3547 }, { "epoch": 5.7, "learning_rate": 2.0402298850574713e-07, "logits/chosen": -1.5092244148254395, "logits/rejected": -1.491392970085144, "logps/chosen": -183.5706787109375, "logps/rejected": -337.9241943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.956581115722656, "rewards/margins": 15.629890441894531, "rewards/rejected": -24.58647346496582, "step": 3548 }, { "epoch": 5.7, "learning_rate": 2.0392390011890606e-07, "logits/chosen": -1.5024819374084473, "logits/rejected": -1.5552245378494263, "logps/chosen": -174.82614135742188, "logps/rejected": -292.9357604980469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.744539260864258, "rewards/margins": 11.926436424255371, "rewards/rejected": -20.670974731445312, "step": 3549 }, { "epoch": 5.7, "learning_rate": 2.03824811732065e-07, "logits/chosen": -1.511600136756897, "logits/rejected": -1.4875361919403076, "logps/chosen": -143.91891479492188, "logps/rejected": -229.28689575195312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.312620162963867, "rewards/margins": 10.142881393432617, "rewards/rejected": -15.455500602722168, "step": 3550 }, { "epoch": 5.7, "learning_rate": 2.0372572334522393e-07, "logits/chosen": -1.5917384624481201, "logits/rejected": -1.5776280164718628, "logps/chosen": -167.51870727539062, "logps/rejected": -301.0332946777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.939592361450195, "rewards/margins": 12.207817077636719, "rewards/rejected": -21.14740753173828, "step": 3551 }, { "epoch": 5.7, "learning_rate": 2.0362663495838286e-07, "logits/chosen": -1.4983164072036743, "logits/rejected": -1.5237936973571777, "logps/chosen": -182.20559692382812, "logps/rejected": -276.1044921875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.436517715454102, "rewards/margins": 9.365097045898438, "rewards/rejected": -17.80161476135254, "step": 3552 }, { "epoch": 5.7, "learning_rate": 2.035275465715418e-07, "logits/chosen": -1.561061978340149, "logits/rejected": -1.7600593566894531, "logps/chosen": -133.84555053710938, "logps/rejected": -296.4178466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.517006874084473, "rewards/margins": 13.70553970336914, "rewards/rejected": -19.222545623779297, "step": 3553 }, { "epoch": 5.7, "learning_rate": 2.0342845818470076e-07, "logits/chosen": -1.5922131538391113, "logits/rejected": -1.557830810546875, "logps/chosen": -131.55929565429688, "logps/rejected": -245.2205047607422, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.651397228240967, "rewards/margins": 13.379343032836914, "rewards/rejected": -18.03074073791504, "step": 3554 }, { "epoch": 5.71, "learning_rate": 2.0332936979785966e-07, "logits/chosen": -1.6778231859207153, "logits/rejected": -1.6266074180603027, "logps/chosen": -157.13607788085938, "logps/rejected": -244.11932373046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.587542533874512, "rewards/margins": 11.425036430358887, "rewards/rejected": -18.0125789642334, "step": 3555 }, { "epoch": 5.71, "learning_rate": 2.0323028141101862e-07, "logits/chosen": -1.4737540483474731, "logits/rejected": -1.456387996673584, "logps/chosen": -151.18467712402344, "logps/rejected": -244.69520568847656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.683345317840576, "rewards/margins": 10.568519592285156, "rewards/rejected": -17.25186538696289, "step": 3556 }, { "epoch": 5.71, "learning_rate": 2.0313119302417756e-07, "logits/chosen": -1.5176188945770264, "logits/rejected": -1.5028632879257202, "logps/chosen": -130.75633239746094, "logps/rejected": -246.44522094726562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.476804256439209, "rewards/margins": 12.84494400024414, "rewards/rejected": -18.321746826171875, "step": 3557 }, { "epoch": 5.71, "learning_rate": 2.030321046373365e-07, "logits/chosen": -1.426805019378662, "logits/rejected": -1.3686909675598145, "logps/chosen": -209.47300720214844, "logps/rejected": -308.56365966796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -11.321763038635254, "rewards/margins": 11.223033905029297, "rewards/rejected": -22.544795989990234, "step": 3558 }, { "epoch": 5.71, "learning_rate": 2.0293301625049545e-07, "logits/chosen": -1.4640378952026367, "logits/rejected": -1.4723306894302368, "logps/chosen": -159.8562469482422, "logps/rejected": -295.4896545410156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.02308177947998, "rewards/margins": 12.408321380615234, "rewards/rejected": -20.43140411376953, "step": 3559 }, { "epoch": 5.71, "learning_rate": 2.0283392786365436e-07, "logits/chosen": -1.5524799823760986, "logits/rejected": -1.5302510261535645, "logps/chosen": -133.75259399414062, "logps/rejected": -239.54159545898438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.304057598114014, "rewards/margins": 12.450199127197266, "rewards/rejected": -16.754257202148438, "step": 3560 }, { "epoch": 5.72, "learning_rate": 2.027348394768133e-07, "logits/chosen": -1.521172285079956, "logits/rejected": -1.488037347793579, "logps/chosen": -159.90667724609375, "logps/rejected": -288.56658935546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.059093475341797, "rewards/margins": 12.773754119873047, "rewards/rejected": -19.832847595214844, "step": 3561 }, { "epoch": 5.72, "learning_rate": 2.0263575108997225e-07, "logits/chosen": -1.5032345056533813, "logits/rejected": -1.551843285560608, "logps/chosen": -191.45289611816406, "logps/rejected": -288.1676330566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.208745956420898, "rewards/margins": 10.742368698120117, "rewards/rejected": -19.951114654541016, "step": 3562 }, { "epoch": 5.72, "learning_rate": 2.0253666270313118e-07, "logits/chosen": -1.448955774307251, "logits/rejected": -1.506296157836914, "logps/chosen": -203.14108276367188, "logps/rejected": -345.15936279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.56435775756836, "rewards/margins": 13.397268295288086, "rewards/rejected": -23.961627960205078, "step": 3563 }, { "epoch": 5.72, "learning_rate": 2.0243757431629014e-07, "logits/chosen": -1.6489144563674927, "logits/rejected": -1.580488920211792, "logps/chosen": -143.65866088867188, "logps/rejected": -266.9210510253906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.636787414550781, "rewards/margins": 11.612483024597168, "rewards/rejected": -16.249271392822266, "step": 3564 }, { "epoch": 5.72, "learning_rate": 2.0233848592944905e-07, "logits/chosen": -1.566999077796936, "logits/rejected": -1.6450825929641724, "logps/chosen": -120.28274536132812, "logps/rejected": -287.6656799316406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.104822158813477, "rewards/margins": 14.038522720336914, "rewards/rejected": -19.143346786499023, "step": 3565 }, { "epoch": 5.72, "learning_rate": 2.0223939754260798e-07, "logits/chosen": -1.7184793949127197, "logits/rejected": -1.5953909158706665, "logps/chosen": -149.8682098388672, "logps/rejected": -301.1786193847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.588178634643555, "rewards/margins": 15.959988594055176, "rewards/rejected": -22.548168182373047, "step": 3566 }, { "epoch": 5.73, "learning_rate": 2.0214030915576694e-07, "logits/chosen": -1.4647021293640137, "logits/rejected": -1.4719187021255493, "logps/chosen": -123.21796417236328, "logps/rejected": -296.1027526855469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.9264678955078125, "rewards/margins": 14.839374542236328, "rewards/rejected": -19.765840530395508, "step": 3567 }, { "epoch": 5.73, "learning_rate": 2.0204122076892588e-07, "logits/chosen": -1.4788576364517212, "logits/rejected": -1.4537010192871094, "logps/chosen": -156.90538024902344, "logps/rejected": -242.5503387451172, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.611600875854492, "rewards/margins": 10.983524322509766, "rewards/rejected": -17.595125198364258, "step": 3568 }, { "epoch": 5.73, "learning_rate": 2.0194213238208483e-07, "logits/chosen": -1.5989553928375244, "logits/rejected": -1.6376492977142334, "logps/chosen": -140.93154907226562, "logps/rejected": -272.79827880859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.4215826988220215, "rewards/margins": 13.255082130432129, "rewards/rejected": -18.676664352416992, "step": 3569 }, { "epoch": 5.73, "learning_rate": 2.0184304399524374e-07, "logits/chosen": -1.5587729215621948, "logits/rejected": -1.4989063739776611, "logps/chosen": -152.70803833007812, "logps/rejected": -228.9523468017578, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.8674845695495605, "rewards/margins": 9.062484741210938, "rewards/rejected": -15.929969787597656, "step": 3570 }, { "epoch": 5.73, "learning_rate": 2.0174395560840267e-07, "logits/chosen": -1.6701334714889526, "logits/rejected": -1.6112020015716553, "logps/chosen": -154.34176635742188, "logps/rejected": -262.3481750488281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.533101558685303, "rewards/margins": 11.809993743896484, "rewards/rejected": -19.343093872070312, "step": 3571 }, { "epoch": 5.73, "learning_rate": 2.0164486722156163e-07, "logits/chosen": -1.393770456314087, "logits/rejected": -1.3855311870574951, "logps/chosen": -90.99893188476562, "logps/rejected": -216.751220703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.2704715728759766, "rewards/margins": 11.286284446716309, "rewards/rejected": -14.556756019592285, "step": 3572 }, { "epoch": 5.74, "learning_rate": 2.0154577883472057e-07, "logits/chosen": -1.6137773990631104, "logits/rejected": -1.5613099336624146, "logps/chosen": -143.21726989746094, "logps/rejected": -284.9044189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.160017967224121, "rewards/margins": 13.069473266601562, "rewards/rejected": -19.2294921875, "step": 3573 }, { "epoch": 5.74, "learning_rate": 2.0144669044787947e-07, "logits/chosen": -1.4302728176116943, "logits/rejected": -1.4839001893997192, "logps/chosen": -125.02999114990234, "logps/rejected": -260.8192443847656, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.321097373962402, "rewards/margins": 12.836235046386719, "rewards/rejected": -19.157331466674805, "step": 3574 }, { "epoch": 5.74, "learning_rate": 2.0134760206103843e-07, "logits/chosen": -1.527632713317871, "logits/rejected": -1.4656946659088135, "logps/chosen": -156.17684936523438, "logps/rejected": -244.519287109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.164624214172363, "rewards/margins": 11.137093544006348, "rewards/rejected": -17.30171775817871, "step": 3575 }, { "epoch": 5.74, "learning_rate": 2.0124851367419737e-07, "logits/chosen": -1.6314806938171387, "logits/rejected": -1.5746362209320068, "logps/chosen": -169.90769958496094, "logps/rejected": -289.0606994628906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.0435791015625, "rewards/margins": 13.864806175231934, "rewards/rejected": -21.90838623046875, "step": 3576 }, { "epoch": 5.74, "learning_rate": 2.0114942528735633e-07, "logits/chosen": -1.4886690378189087, "logits/rejected": -1.40162992477417, "logps/chosen": -172.57791137695312, "logps/rejected": -285.21942138671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.035948753356934, "rewards/margins": 12.426420211791992, "rewards/rejected": -20.46236801147461, "step": 3577 }, { "epoch": 5.74, "learning_rate": 2.0105033690051526e-07, "logits/chosen": -1.5235354900360107, "logits/rejected": -1.5458662509918213, "logps/chosen": -154.47250366210938, "logps/rejected": -296.79498291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.3569416999816895, "rewards/margins": 13.459123611450195, "rewards/rejected": -19.816064834594727, "step": 3578 }, { "epoch": 5.74, "learning_rate": 2.0095124851367417e-07, "logits/chosen": -1.5485100746154785, "logits/rejected": -1.5467251539230347, "logps/chosen": -165.16693115234375, "logps/rejected": -258.6487731933594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.763125419616699, "rewards/margins": 10.396384239196777, "rewards/rejected": -18.159509658813477, "step": 3579 }, { "epoch": 5.75, "learning_rate": 2.0085216012683313e-07, "logits/chosen": -1.4398720264434814, "logits/rejected": -1.4736427068710327, "logps/chosen": -115.2834701538086, "logps/rejected": -223.43472290039062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.911824703216553, "rewards/margins": 10.584640502929688, "rewards/rejected": -15.496465682983398, "step": 3580 }, { "epoch": 5.75, "learning_rate": 2.0075307173999206e-07, "logits/chosen": -1.3699381351470947, "logits/rejected": -1.3879597187042236, "logps/chosen": -129.5419464111328, "logps/rejected": -257.3197021484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.5684921741485596, "rewards/margins": 13.051033973693848, "rewards/rejected": -16.619525909423828, "step": 3581 }, { "epoch": 5.75, "learning_rate": 2.00653983353151e-07, "logits/chosen": -1.3750466108322144, "logits/rejected": -1.3528680801391602, "logps/chosen": -170.72450256347656, "logps/rejected": -265.4868469238281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.602771759033203, "rewards/margins": 9.813447952270508, "rewards/rejected": -19.41621971130371, "step": 3582 }, { "epoch": 5.75, "learning_rate": 2.0055489496630995e-07, "logits/chosen": -1.434492588043213, "logits/rejected": -1.5546616315841675, "logps/chosen": -147.2637939453125, "logps/rejected": -249.18231201171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.574392318725586, "rewards/margins": 9.400993347167969, "rewards/rejected": -15.975385665893555, "step": 3583 }, { "epoch": 5.75, "learning_rate": 2.0045580657946886e-07, "logits/chosen": -1.3554716110229492, "logits/rejected": -1.402051568031311, "logps/chosen": -97.26850891113281, "logps/rejected": -176.31997680664062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.11417293548584, "rewards/margins": 7.249265670776367, "rewards/rejected": -11.363438606262207, "step": 3584 }, { "epoch": 5.75, "learning_rate": 2.0035671819262782e-07, "logits/chosen": -1.5690315961837769, "logits/rejected": -1.5253278017044067, "logps/chosen": -111.10417175292969, "logps/rejected": -216.83543395996094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.784228324890137, "rewards/margins": 10.298759460449219, "rewards/rejected": -15.082986831665039, "step": 3585 }, { "epoch": 5.76, "learning_rate": 2.0025762980578675e-07, "logits/chosen": -1.5699938535690308, "logits/rejected": -1.663338303565979, "logps/chosen": -157.43161010742188, "logps/rejected": -290.7130432128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.756935119628906, "rewards/margins": 14.020074844360352, "rewards/rejected": -19.777009963989258, "step": 3586 }, { "epoch": 5.76, "learning_rate": 2.001585414189457e-07, "logits/chosen": -1.4872403144836426, "logits/rejected": -1.4863290786743164, "logps/chosen": -145.29481506347656, "logps/rejected": -290.0386962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.069206237792969, "rewards/margins": 12.496539115905762, "rewards/rejected": -17.565746307373047, "step": 3587 }, { "epoch": 5.76, "learning_rate": 2.0005945303210465e-07, "logits/chosen": -1.5600666999816895, "logits/rejected": -1.621584415435791, "logps/chosen": -157.46954345703125, "logps/rejected": -285.50616455078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.9017181396484375, "rewards/margins": 12.87321662902832, "rewards/rejected": -19.774934768676758, "step": 3588 }, { "epoch": 5.76, "learning_rate": 1.9996036464526355e-07, "logits/chosen": -1.4330542087554932, "logits/rejected": -1.4750866889953613, "logps/chosen": -139.40362548828125, "logps/rejected": -251.46456909179688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9308011531829834, "rewards/margins": 14.118204116821289, "rewards/rejected": -18.04900550842285, "step": 3589 }, { "epoch": 5.76, "learning_rate": 1.9986127625842251e-07, "logits/chosen": -1.4769009351730347, "logits/rejected": -1.4740387201309204, "logps/chosen": -171.4402618408203, "logps/rejected": -332.0131530761719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.355722427368164, "rewards/margins": 15.581747055053711, "rewards/rejected": -22.937469482421875, "step": 3590 }, { "epoch": 5.76, "learning_rate": 1.9976218787158145e-07, "logits/chosen": -1.3216586112976074, "logits/rejected": -1.3692569732666016, "logps/chosen": -147.22802734375, "logps/rejected": -273.540771484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.8810834884643555, "rewards/margins": 12.605966567993164, "rewards/rejected": -18.487049102783203, "step": 3591 }, { "epoch": 5.77, "learning_rate": 1.9966309948474038e-07, "logits/chosen": -1.4093987941741943, "logits/rejected": -1.4458301067352295, "logps/chosen": -118.55331420898438, "logps/rejected": -260.40362548828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.255422592163086, "rewards/margins": 13.546026229858398, "rewards/rejected": -17.801448822021484, "step": 3592 }, { "epoch": 5.77, "learning_rate": 1.9956401109789931e-07, "logits/chosen": -1.6516507863998413, "logits/rejected": -1.542681336402893, "logps/chosen": -129.18846130371094, "logps/rejected": -242.06375122070312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.248739242553711, "rewards/margins": 13.694120407104492, "rewards/rejected": -18.942859649658203, "step": 3593 }, { "epoch": 5.77, "learning_rate": 1.9946492271105825e-07, "logits/chosen": -1.3624695539474487, "logits/rejected": -1.46234130859375, "logps/chosen": -190.02490234375, "logps/rejected": -313.30657958984375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -10.15498161315918, "rewards/margins": 10.451542854309082, "rewards/rejected": -20.606525421142578, "step": 3594 }, { "epoch": 5.77, "learning_rate": 1.9936583432421718e-07, "logits/chosen": -1.4925742149353027, "logits/rejected": -1.4886584281921387, "logps/chosen": -114.5026626586914, "logps/rejected": -253.30343627929688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.153810501098633, "rewards/margins": 13.177797317504883, "rewards/rejected": -17.331607818603516, "step": 3595 }, { "epoch": 5.77, "learning_rate": 1.9926674593737614e-07, "logits/chosen": -1.6086325645446777, "logits/rejected": -1.5818456411361694, "logps/chosen": -158.46426391601562, "logps/rejected": -276.2382507324219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.729948997497559, "rewards/margins": 12.530540466308594, "rewards/rejected": -19.260488510131836, "step": 3596 }, { "epoch": 5.77, "learning_rate": 1.9916765755053507e-07, "logits/chosen": -1.6947975158691406, "logits/rejected": -1.6810553073883057, "logps/chosen": -162.68609619140625, "logps/rejected": -284.0870056152344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.401661396026611, "rewards/margins": 12.530789375305176, "rewards/rejected": -17.932451248168945, "step": 3597 }, { "epoch": 5.78, "learning_rate": 1.99068569163694e-07, "logits/chosen": -1.5132685899734497, "logits/rejected": -1.4850399494171143, "logps/chosen": -169.82498168945312, "logps/rejected": -252.5393524169922, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.1982526779174805, "rewards/margins": 9.479042053222656, "rewards/rejected": -16.67729377746582, "step": 3598 }, { "epoch": 5.78, "learning_rate": 1.9896948077685294e-07, "logits/chosen": -1.6742961406707764, "logits/rejected": -1.6924819946289062, "logps/chosen": -152.41909790039062, "logps/rejected": -282.9547424316406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.786500930786133, "rewards/margins": 12.395954132080078, "rewards/rejected": -20.18245506286621, "step": 3599 }, { "epoch": 5.78, "learning_rate": 1.9887039239001187e-07, "logits/chosen": -1.5375986099243164, "logits/rejected": -1.4762905836105347, "logps/chosen": -169.60302734375, "logps/rejected": -293.0714111328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.97305154800415, "rewards/margins": 12.60361099243164, "rewards/rejected": -19.576662063598633, "step": 3600 }, { "epoch": 5.78, "learning_rate": 1.9877130400317083e-07, "logits/chosen": -1.5752952098846436, "logits/rejected": -1.5008842945098877, "logps/chosen": -167.08627319335938, "logps/rejected": -307.57672119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.409596920013428, "rewards/margins": 15.092230796813965, "rewards/rejected": -21.501827239990234, "step": 3601 }, { "epoch": 5.78, "learning_rate": 1.9867221561632977e-07, "logits/chosen": -1.5248076915740967, "logits/rejected": -1.5527385473251343, "logps/chosen": -149.1284942626953, "logps/rejected": -289.356201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.969615936279297, "rewards/margins": 11.953279495239258, "rewards/rejected": -18.922895431518555, "step": 3602 }, { "epoch": 5.78, "learning_rate": 1.9857312722948867e-07, "logits/chosen": -1.551891565322876, "logits/rejected": -1.6286375522613525, "logps/chosen": -105.75352478027344, "logps/rejected": -249.41807556152344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.346821308135986, "rewards/margins": 11.970518112182617, "rewards/rejected": -16.317338943481445, "step": 3603 }, { "epoch": 5.78, "learning_rate": 1.9847403884264763e-07, "logits/chosen": -1.411435842514038, "logits/rejected": -1.3770455121994019, "logps/chosen": -119.30325317382812, "logps/rejected": -247.80270385742188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.738180637359619, "rewards/margins": 13.475936889648438, "rewards/rejected": -18.21411895751953, "step": 3604 }, { "epoch": 5.79, "learning_rate": 1.9837495045580657e-07, "logits/chosen": -1.4738895893096924, "logits/rejected": -1.5098522901535034, "logps/chosen": -99.53736877441406, "logps/rejected": -299.56439208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.749724864959717, "rewards/margins": 17.422269821166992, "rewards/rejected": -21.171993255615234, "step": 3605 }, { "epoch": 5.79, "learning_rate": 1.9827586206896553e-07, "logits/chosen": -1.5424823760986328, "logits/rejected": -1.6310869455337524, "logps/chosen": -140.87510681152344, "logps/rejected": -335.4617614746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.027637004852295, "rewards/margins": 16.25531578063965, "rewards/rejected": -22.2829532623291, "step": 3606 }, { "epoch": 5.79, "learning_rate": 1.9817677368212446e-07, "logits/chosen": -1.4546661376953125, "logits/rejected": -1.4626210927963257, "logps/chosen": -133.47866821289062, "logps/rejected": -208.24957275390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.985434532165527, "rewards/margins": 7.077291488647461, "rewards/rejected": -14.062725067138672, "step": 3607 }, { "epoch": 5.79, "learning_rate": 1.9807768529528337e-07, "logits/chosen": -1.678086519241333, "logits/rejected": -1.7727010250091553, "logps/chosen": -126.27224731445312, "logps/rejected": -266.5982360839844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.389190673828125, "rewards/margins": 11.338318824768066, "rewards/rejected": -16.727510452270508, "step": 3608 }, { "epoch": 5.79, "learning_rate": 1.9797859690844233e-07, "logits/chosen": -1.4443756341934204, "logits/rejected": -1.4090099334716797, "logps/chosen": -144.97027587890625, "logps/rejected": -251.01841735839844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.2147979736328125, "rewards/margins": 12.241732597351074, "rewards/rejected": -18.45652961730957, "step": 3609 }, { "epoch": 5.79, "learning_rate": 1.9787950852160126e-07, "logits/chosen": -1.4543309211730957, "logits/rejected": -1.4037752151489258, "logps/chosen": -167.0087890625, "logps/rejected": -271.51153564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.51118278503418, "rewards/margins": 10.784629821777344, "rewards/rejected": -19.295812606811523, "step": 3610 }, { "epoch": 5.8, "learning_rate": 1.9778042013476022e-07, "logits/chosen": -1.5626275539398193, "logits/rejected": -1.539595127105713, "logps/chosen": -151.12904357910156, "logps/rejected": -303.9042663574219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.0249552726745605, "rewards/margins": 15.471617698669434, "rewards/rejected": -22.496572494506836, "step": 3611 }, { "epoch": 5.8, "learning_rate": 1.9768133174791913e-07, "logits/chosen": -1.5154834985733032, "logits/rejected": -1.525287389755249, "logps/chosen": -132.39305114746094, "logps/rejected": -232.89024353027344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.786574840545654, "rewards/margins": 11.740911483764648, "rewards/rejected": -16.52748680114746, "step": 3612 }, { "epoch": 5.8, "learning_rate": 1.9758224336107806e-07, "logits/chosen": -1.495429277420044, "logits/rejected": -1.5334311723709106, "logps/chosen": -133.42051696777344, "logps/rejected": -297.0221862792969, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.421781063079834, "rewards/margins": 16.473379135131836, "rewards/rejected": -20.895160675048828, "step": 3613 }, { "epoch": 5.8, "learning_rate": 1.9748315497423702e-07, "logits/chosen": -1.494077205657959, "logits/rejected": -1.473947286605835, "logps/chosen": -173.30856323242188, "logps/rejected": -254.0756072998047, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.575965881347656, "rewards/margins": 9.75881576538086, "rewards/rejected": -15.334782600402832, "step": 3614 }, { "epoch": 5.8, "learning_rate": 1.9738406658739595e-07, "logits/chosen": -1.507444143295288, "logits/rejected": -1.4891562461853027, "logps/chosen": -119.53681945800781, "logps/rejected": -268.8651428222656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.380403995513916, "rewards/margins": 12.298852920532227, "rewards/rejected": -14.6792573928833, "step": 3615 }, { "epoch": 5.8, "learning_rate": 1.9728497820055489e-07, "logits/chosen": -1.4732081890106201, "logits/rejected": -1.4758204221725464, "logps/chosen": -149.52557373046875, "logps/rejected": -294.8367919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.379432678222656, "rewards/margins": 15.880510330200195, "rewards/rejected": -22.25994110107422, "step": 3616 }, { "epoch": 5.81, "learning_rate": 1.9718588981371382e-07, "logits/chosen": -1.6422940492630005, "logits/rejected": -1.5283619165420532, "logps/chosen": -149.61883544921875, "logps/rejected": -244.26026916503906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.03014612197876, "rewards/margins": 10.754767417907715, "rewards/rejected": -16.784914016723633, "step": 3617 }, { "epoch": 5.81, "learning_rate": 1.9708680142687275e-07, "logits/chosen": -1.6985548734664917, "logits/rejected": -1.6211954355239868, "logps/chosen": -146.80267333984375, "logps/rejected": -214.2506866455078, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.056077480316162, "rewards/margins": 9.950145721435547, "rewards/rejected": -15.00622272491455, "step": 3618 }, { "epoch": 5.81, "learning_rate": 1.969877130400317e-07, "logits/chosen": -1.5220632553100586, "logits/rejected": -1.5187170505523682, "logps/chosen": -132.83258056640625, "logps/rejected": -265.8354187011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.797036170959473, "rewards/margins": 13.89424991607666, "rewards/rejected": -18.691286087036133, "step": 3619 }, { "epoch": 5.81, "learning_rate": 1.9688862465319064e-07, "logits/chosen": -1.4496593475341797, "logits/rejected": -1.6414544582366943, "logps/chosen": -132.57351684570312, "logps/rejected": -276.1432189941406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.7112555503845215, "rewards/margins": 12.481143951416016, "rewards/rejected": -19.192399978637695, "step": 3620 }, { "epoch": 5.81, "learning_rate": 1.9678953626634958e-07, "logits/chosen": -1.602809190750122, "logits/rejected": -1.5953152179718018, "logps/chosen": -141.86465454101562, "logps/rejected": -280.22393798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.632419109344482, "rewards/margins": 12.905250549316406, "rewards/rejected": -17.537670135498047, "step": 3621 }, { "epoch": 5.81, "learning_rate": 1.966904478795085e-07, "logits/chosen": -1.7487375736236572, "logits/rejected": -1.7852073907852173, "logps/chosen": -143.2958221435547, "logps/rejected": -298.2388916015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.60183572769165, "rewards/margins": 15.003372192382812, "rewards/rejected": -20.605207443237305, "step": 3622 }, { "epoch": 5.82, "learning_rate": 1.9659135949266744e-07, "logits/chosen": -1.5541441440582275, "logits/rejected": -1.5739750862121582, "logps/chosen": -148.06402587890625, "logps/rejected": -284.72100830078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.945123672485352, "rewards/margins": 11.871484756469727, "rewards/rejected": -17.816608428955078, "step": 3623 }, { "epoch": 5.82, "learning_rate": 1.9649227110582638e-07, "logits/chosen": -1.610907793045044, "logits/rejected": -1.553051233291626, "logps/chosen": -125.56085205078125, "logps/rejected": -265.0923156738281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.031960964202881, "rewards/margins": 15.170647621154785, "rewards/rejected": -19.202608108520508, "step": 3624 }, { "epoch": 5.82, "learning_rate": 1.9639318271898534e-07, "logits/chosen": -1.6155335903167725, "logits/rejected": -1.5425313711166382, "logps/chosen": -137.10391235351562, "logps/rejected": -272.3021240234375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -5.423020362854004, "rewards/margins": 13.866828918457031, "rewards/rejected": -19.28984832763672, "step": 3625 }, { "epoch": 5.82, "learning_rate": 1.9629409433214424e-07, "logits/chosen": -1.5763707160949707, "logits/rejected": -1.6264281272888184, "logps/chosen": -173.49237060546875, "logps/rejected": -329.8870544433594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.510921001434326, "rewards/margins": 15.485235214233398, "rewards/rejected": -22.996156692504883, "step": 3626 }, { "epoch": 5.82, "learning_rate": 1.961950059453032e-07, "logits/chosen": -1.566690444946289, "logits/rejected": -1.5727636814117432, "logps/chosen": -159.57525634765625, "logps/rejected": -280.1453552246094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.94136905670166, "rewards/margins": 12.805856704711914, "rewards/rejected": -18.74722671508789, "step": 3627 }, { "epoch": 5.82, "learning_rate": 1.9609591755846214e-07, "logits/chosen": -1.448369026184082, "logits/rejected": -1.4360079765319824, "logps/chosen": -146.82530212402344, "logps/rejected": -259.76483154296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.333584785461426, "rewards/margins": 11.439241409301758, "rewards/rejected": -17.772825241088867, "step": 3628 }, { "epoch": 5.83, "learning_rate": 1.9599682917162107e-07, "logits/chosen": -1.416291356086731, "logits/rejected": -1.5277870893478394, "logps/chosen": -137.57135009765625, "logps/rejected": -278.9080505371094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.08071756362915, "rewards/margins": 11.568477630615234, "rewards/rejected": -18.64919662475586, "step": 3629 }, { "epoch": 5.83, "learning_rate": 1.9589774078478003e-07, "logits/chosen": -1.5619186162948608, "logits/rejected": -1.5831950902938843, "logps/chosen": -133.29714965820312, "logps/rejected": -243.6216278076172, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.843698024749756, "rewards/margins": 11.363057136535645, "rewards/rejected": -17.206754684448242, "step": 3630 }, { "epoch": 5.83, "learning_rate": 1.9579865239793894e-07, "logits/chosen": -1.5447971820831299, "logits/rejected": -1.5462377071380615, "logps/chosen": -201.35948181152344, "logps/rejected": -301.2703857421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.162944793701172, "rewards/margins": 12.206296920776367, "rewards/rejected": -21.36924171447754, "step": 3631 }, { "epoch": 5.83, "learning_rate": 1.956995640110979e-07, "logits/chosen": -1.4950284957885742, "logits/rejected": -1.4623119831085205, "logps/chosen": -180.0721435546875, "logps/rejected": -306.3988342285156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.075178146362305, "rewards/margins": 13.221765518188477, "rewards/rejected": -22.29694366455078, "step": 3632 }, { "epoch": 5.83, "learning_rate": 1.9560047562425683e-07, "logits/chosen": -1.5311946868896484, "logits/rejected": -1.535646915435791, "logps/chosen": -118.03160858154297, "logps/rejected": -251.7478485107422, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.309359073638916, "rewards/margins": 13.63560676574707, "rewards/rejected": -17.944965362548828, "step": 3633 }, { "epoch": 5.83, "learning_rate": 1.9550138723741576e-07, "logits/chosen": -1.4413695335388184, "logits/rejected": -1.496425986289978, "logps/chosen": -118.56185913085938, "logps/rejected": -259.22149658203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.958490371704102, "rewards/margins": 11.972829818725586, "rewards/rejected": -17.931320190429688, "step": 3634 }, { "epoch": 5.83, "learning_rate": 1.9540229885057472e-07, "logits/chosen": -1.647402048110962, "logits/rejected": -1.6106317043304443, "logps/chosen": -169.7397918701172, "logps/rejected": -284.4171447753906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.414337635040283, "rewards/margins": 11.571904182434082, "rewards/rejected": -17.986242294311523, "step": 3635 }, { "epoch": 5.84, "learning_rate": 1.9530321046373363e-07, "logits/chosen": -1.547605276107788, "logits/rejected": -1.4955909252166748, "logps/chosen": -149.25592041015625, "logps/rejected": -274.1378173828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.790032386779785, "rewards/margins": 14.382854461669922, "rewards/rejected": -20.172887802124023, "step": 3636 }, { "epoch": 5.84, "learning_rate": 1.9520412207689256e-07, "logits/chosen": -1.4827961921691895, "logits/rejected": -1.5378875732421875, "logps/chosen": -151.05050659179688, "logps/rejected": -268.6923522949219, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.258273124694824, "rewards/margins": 12.2664794921875, "rewards/rejected": -19.524751663208008, "step": 3637 }, { "epoch": 5.84, "learning_rate": 1.9510503369005152e-07, "logits/chosen": -1.5171899795532227, "logits/rejected": -1.4746599197387695, "logps/chosen": -124.65941619873047, "logps/rejected": -221.1080322265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.475859642028809, "rewards/margins": 9.939740180969238, "rewards/rejected": -15.41559886932373, "step": 3638 }, { "epoch": 5.84, "learning_rate": 1.9500594530321046e-07, "logits/chosen": -1.435643196105957, "logits/rejected": -1.4645709991455078, "logps/chosen": -186.7717742919922, "logps/rejected": -279.43536376953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.460721015930176, "rewards/margins": 11.204614639282227, "rewards/rejected": -19.665334701538086, "step": 3639 }, { "epoch": 5.84, "learning_rate": 1.9490685691636942e-07, "logits/chosen": -1.5706042051315308, "logits/rejected": -1.5340220928192139, "logps/chosen": -154.95777893066406, "logps/rejected": -301.5893249511719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.094677925109863, "rewards/margins": 14.367963790893555, "rewards/rejected": -21.462642669677734, "step": 3640 }, { "epoch": 5.84, "learning_rate": 1.9480776852952832e-07, "logits/chosen": -1.5330665111541748, "logits/rejected": -1.5439238548278809, "logps/chosen": -143.248046875, "logps/rejected": -311.9872741699219, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.508798599243164, "rewards/margins": 15.64439582824707, "rewards/rejected": -22.153194427490234, "step": 3641 }, { "epoch": 5.85, "learning_rate": 1.9470868014268726e-07, "logits/chosen": -1.4373807907104492, "logits/rejected": -1.450153112411499, "logps/chosen": -164.4884490966797, "logps/rejected": -312.8091735839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.459832191467285, "rewards/margins": 13.822973251342773, "rewards/rejected": -21.282806396484375, "step": 3642 }, { "epoch": 5.85, "learning_rate": 1.9460959175584622e-07, "logits/chosen": -1.4590200185775757, "logits/rejected": -1.49994695186615, "logps/chosen": -95.1679458618164, "logps/rejected": -213.40316772460938, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.255184650421143, "rewards/margins": 10.410913467407227, "rewards/rejected": -14.666098594665527, "step": 3643 }, { "epoch": 5.85, "learning_rate": 1.9451050336900515e-07, "logits/chosen": -1.4650955200195312, "logits/rejected": -1.5128268003463745, "logps/chosen": -136.38409423828125, "logps/rejected": -273.6250305175781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.704941749572754, "rewards/margins": 13.200605392456055, "rewards/rejected": -18.905546188354492, "step": 3644 }, { "epoch": 5.85, "learning_rate": 1.9441141498216406e-07, "logits/chosen": -1.4618892669677734, "logits/rejected": -1.44507896900177, "logps/chosen": -161.7688446044922, "logps/rejected": -305.83111572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.389949321746826, "rewards/margins": 13.65894889831543, "rewards/rejected": -21.04889678955078, "step": 3645 }, { "epoch": 5.85, "learning_rate": 1.9431232659532302e-07, "logits/chosen": -1.5288586616516113, "logits/rejected": -1.511027216911316, "logps/chosen": -152.18124389648438, "logps/rejected": -247.59426879882812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.090428829193115, "rewards/margins": 10.249648094177246, "rewards/rejected": -17.340076446533203, "step": 3646 }, { "epoch": 5.85, "learning_rate": 1.9421323820848195e-07, "logits/chosen": -1.5149869918823242, "logits/rejected": -1.5595722198486328, "logps/chosen": -137.8832550048828, "logps/rejected": -250.62841796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.474795818328857, "rewards/margins": 10.082210540771484, "rewards/rejected": -15.557004928588867, "step": 3647 }, { "epoch": 5.86, "learning_rate": 1.941141498216409e-07, "logits/chosen": -1.517293930053711, "logits/rejected": -1.6003823280334473, "logps/chosen": -95.16696166992188, "logps/rejected": -228.02438354492188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.010128021240234, "rewards/margins": 12.445441246032715, "rewards/rejected": -17.455570220947266, "step": 3648 }, { "epoch": 5.86, "learning_rate": 1.9401506143479984e-07, "logits/chosen": -1.5968083143234253, "logits/rejected": -1.6257691383361816, "logps/chosen": -166.9114227294922, "logps/rejected": -249.9268035888672, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -6.440021514892578, "rewards/margins": 9.112191200256348, "rewards/rejected": -15.552213668823242, "step": 3649 }, { "epoch": 5.86, "learning_rate": 1.9391597304795875e-07, "logits/chosen": -1.4860155582427979, "logits/rejected": -1.4595417976379395, "logps/chosen": -172.56219482421875, "logps/rejected": -299.3880615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.659389495849609, "rewards/margins": 13.330337524414062, "rewards/rejected": -19.989727020263672, "step": 3650 }, { "epoch": 5.86, "learning_rate": 1.938168846611177e-07, "logits/chosen": -1.610443115234375, "logits/rejected": -1.5963492393493652, "logps/chosen": -213.02243041992188, "logps/rejected": -296.08905029296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -10.291985511779785, "rewards/margins": 10.119683265686035, "rewards/rejected": -20.411670684814453, "step": 3651 }, { "epoch": 5.86, "learning_rate": 1.9371779627427664e-07, "logits/chosen": -1.4166561365127563, "logits/rejected": -1.424612045288086, "logps/chosen": -164.54742431640625, "logps/rejected": -253.05850219726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7008256912231445, "rewards/margins": 10.451868057250977, "rewards/rejected": -15.152694702148438, "step": 3652 }, { "epoch": 5.86, "learning_rate": 1.936187078874356e-07, "logits/chosen": -1.494945764541626, "logits/rejected": -1.4699807167053223, "logps/chosen": -113.80967712402344, "logps/rejected": -236.53164672851562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.9342713356018066, "rewards/margins": 12.6946439743042, "rewards/rejected": -15.628915786743164, "step": 3653 }, { "epoch": 5.87, "learning_rate": 1.9351961950059454e-07, "logits/chosen": -1.5003175735473633, "logits/rejected": -1.5379233360290527, "logps/chosen": -133.9944610595703, "logps/rejected": -280.30853271484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.822778701782227, "rewards/margins": 13.631128311157227, "rewards/rejected": -19.453907012939453, "step": 3654 }, { "epoch": 5.87, "learning_rate": 1.9342053111375344e-07, "logits/chosen": -1.4672565460205078, "logits/rejected": -1.4937043190002441, "logps/chosen": -106.48299407958984, "logps/rejected": -238.42361450195312, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -4.253215312957764, "rewards/margins": 12.468277931213379, "rewards/rejected": -16.721492767333984, "step": 3655 }, { "epoch": 5.87, "learning_rate": 1.933214427269124e-07, "logits/chosen": -1.408714771270752, "logits/rejected": -1.3810824155807495, "logps/chosen": -119.79273223876953, "logps/rejected": -220.61209106445312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.9259769916534424, "rewards/margins": 11.27649211883545, "rewards/rejected": -15.202468872070312, "step": 3656 }, { "epoch": 5.87, "learning_rate": 1.9322235434007134e-07, "logits/chosen": -1.6185362339019775, "logits/rejected": -1.5941388607025146, "logps/chosen": -144.82681274414062, "logps/rejected": -278.14581298828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.164248466491699, "rewards/margins": 14.599987030029297, "rewards/rejected": -18.764236450195312, "step": 3657 }, { "epoch": 5.87, "learning_rate": 1.9312326595323027e-07, "logits/chosen": -1.4566502571105957, "logits/rejected": -1.5016978979110718, "logps/chosen": -123.39091491699219, "logps/rejected": -259.015869140625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -3.765225410461426, "rewards/margins": 11.899807929992676, "rewards/rejected": -15.665034294128418, "step": 3658 }, { "epoch": 5.87, "learning_rate": 1.9302417756638923e-07, "logits/chosen": -1.5461838245391846, "logits/rejected": -1.5293848514556885, "logps/chosen": -161.35906982421875, "logps/rejected": -276.5901184082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.117895126342773, "rewards/margins": 11.426451683044434, "rewards/rejected": -18.54434585571289, "step": 3659 }, { "epoch": 5.87, "learning_rate": 1.9292508917954814e-07, "logits/chosen": -1.4827799797058105, "logits/rejected": -1.5103849172592163, "logps/chosen": -117.5271987915039, "logps/rejected": -210.06454467773438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.808136463165283, "rewards/margins": 9.750738143920898, "rewards/rejected": -13.55887508392334, "step": 3660 }, { "epoch": 5.88, "learning_rate": 1.928260007927071e-07, "logits/chosen": -1.718045711517334, "logits/rejected": -1.7224940061569214, "logps/chosen": -150.13800048828125, "logps/rejected": -280.9176330566406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.734304428100586, "rewards/margins": 12.752914428710938, "rewards/rejected": -19.487220764160156, "step": 3661 }, { "epoch": 5.88, "learning_rate": 1.9272691240586603e-07, "logits/chosen": -1.4959279298782349, "logits/rejected": -1.5190191268920898, "logps/chosen": -126.38053131103516, "logps/rejected": -247.49526977539062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.780041217803955, "rewards/margins": 11.220012664794922, "rewards/rejected": -16.00005340576172, "step": 3662 }, { "epoch": 5.88, "learning_rate": 1.9262782401902496e-07, "logits/chosen": -1.5342084169387817, "logits/rejected": -1.5700995922088623, "logps/chosen": -152.32943725585938, "logps/rejected": -258.2336120605469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.056337356567383, "rewards/margins": 9.165003776550293, "rewards/rejected": -16.221342086791992, "step": 3663 }, { "epoch": 5.88, "learning_rate": 1.925287356321839e-07, "logits/chosen": -1.5052680969238281, "logits/rejected": -1.4243559837341309, "logps/chosen": -163.4620819091797, "logps/rejected": -302.07861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.449522495269775, "rewards/margins": 14.556560516357422, "rewards/rejected": -20.006084442138672, "step": 3664 }, { "epoch": 5.88, "learning_rate": 1.9242964724534283e-07, "logits/chosen": -1.5509437322616577, "logits/rejected": -1.5314981937408447, "logps/chosen": -165.66082763671875, "logps/rejected": -253.30783081054688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.470024585723877, "rewards/margins": 10.727234840393066, "rewards/rejected": -18.1972599029541, "step": 3665 }, { "epoch": 5.88, "learning_rate": 1.9233055885850176e-07, "logits/chosen": -1.3682401180267334, "logits/rejected": -1.3855705261230469, "logps/chosen": -125.54808807373047, "logps/rejected": -240.92471313476562, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.1726155281066895, "rewards/margins": 10.08011245727539, "rewards/rejected": -16.252729415893555, "step": 3666 }, { "epoch": 5.89, "learning_rate": 1.9223147047166072e-07, "logits/chosen": -1.5226454734802246, "logits/rejected": -1.6500730514526367, "logps/chosen": -98.17153930664062, "logps/rejected": -284.37322998046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7687339782714844, "rewards/margins": 17.142288208007812, "rewards/rejected": -20.911022186279297, "step": 3667 }, { "epoch": 5.89, "learning_rate": 1.9213238208481965e-07, "logits/chosen": -1.5499145984649658, "logits/rejected": -1.5094082355499268, "logps/chosen": -136.2602996826172, "logps/rejected": -241.84165954589844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.06522274017334, "rewards/margins": 11.144933700561523, "rewards/rejected": -15.21015739440918, "step": 3668 }, { "epoch": 5.89, "learning_rate": 1.920332936979786e-07, "logits/chosen": -1.6371996402740479, "logits/rejected": -1.5926542282104492, "logps/chosen": -160.00906372070312, "logps/rejected": -304.69354248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.27596664428711, "rewards/margins": 13.391912460327148, "rewards/rejected": -21.667877197265625, "step": 3669 }, { "epoch": 5.89, "learning_rate": 1.9193420531113752e-07, "logits/chosen": -1.6718637943267822, "logits/rejected": -1.6634516716003418, "logps/chosen": -135.7847900390625, "logps/rejected": -252.88002014160156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.889223098754883, "rewards/margins": 11.005735397338867, "rewards/rejected": -17.89495849609375, "step": 3670 }, { "epoch": 5.89, "learning_rate": 1.9183511692429645e-07, "logits/chosen": -1.7969212532043457, "logits/rejected": -1.824304461479187, "logps/chosen": -126.70066833496094, "logps/rejected": -228.4222412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.722947835922241, "rewards/margins": 10.630352020263672, "rewards/rejected": -14.353300094604492, "step": 3671 }, { "epoch": 5.89, "learning_rate": 1.9173602853745541e-07, "logits/chosen": -1.4347548484802246, "logits/rejected": -1.4165340662002563, "logps/chosen": -145.5503692626953, "logps/rejected": -241.5057830810547, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.195026397705078, "rewards/margins": 10.51089859008789, "rewards/rejected": -16.70592498779297, "step": 3672 }, { "epoch": 5.9, "learning_rate": 1.9163694015061435e-07, "logits/chosen": -1.5520538091659546, "logits/rejected": -1.6382880210876465, "logps/chosen": -86.64100646972656, "logps/rejected": -248.48851013183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.385003089904785, "rewards/margins": 14.836771011352539, "rewards/rejected": -17.221773147583008, "step": 3673 }, { "epoch": 5.9, "learning_rate": 1.9153785176377325e-07, "logits/chosen": -1.4207098484039307, "logits/rejected": -1.5153276920318604, "logps/chosen": -119.36077880859375, "logps/rejected": -229.57427978515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.647098541259766, "rewards/margins": 9.655750274658203, "rewards/rejected": -14.302848815917969, "step": 3674 }, { "epoch": 5.9, "learning_rate": 1.9143876337693221e-07, "logits/chosen": -1.6695656776428223, "logits/rejected": -1.6403167247772217, "logps/chosen": -120.75398254394531, "logps/rejected": -219.77392578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.281494617462158, "rewards/margins": 10.195404052734375, "rewards/rejected": -14.476898193359375, "step": 3675 }, { "epoch": 5.9, "learning_rate": 1.9133967499009115e-07, "logits/chosen": -1.6315786838531494, "logits/rejected": -1.559051752090454, "logps/chosen": -141.34146118164062, "logps/rejected": -296.9576416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.48838996887207, "rewards/margins": 16.799072265625, "rewards/rejected": -22.28746223449707, "step": 3676 }, { "epoch": 5.9, "learning_rate": 1.912405866032501e-07, "logits/chosen": -1.480517864227295, "logits/rejected": -1.4801418781280518, "logps/chosen": -130.97279357910156, "logps/rejected": -255.55825805664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.76218843460083, "rewards/margins": 12.43912124633789, "rewards/rejected": -18.201309204101562, "step": 3677 }, { "epoch": 5.9, "learning_rate": 1.9114149821640904e-07, "logits/chosen": -1.534497857093811, "logits/rejected": -1.3863554000854492, "logps/chosen": -140.65335083007812, "logps/rejected": -246.45889282226562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.218197345733643, "rewards/margins": 13.02921199798584, "rewards/rejected": -17.24740982055664, "step": 3678 }, { "epoch": 5.91, "learning_rate": 1.9104240982956795e-07, "logits/chosen": -1.6438411474227905, "logits/rejected": -1.5998400449752808, "logps/chosen": -171.29721069335938, "logps/rejected": -342.979736328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.31832218170166, "rewards/margins": 17.253503799438477, "rewards/rejected": -24.571826934814453, "step": 3679 }, { "epoch": 5.91, "learning_rate": 1.909433214427269e-07, "logits/chosen": -1.4763121604919434, "logits/rejected": -1.4671536684036255, "logps/chosen": -125.75518798828125, "logps/rejected": -302.0433349609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.461373329162598, "rewards/margins": 15.335487365722656, "rewards/rejected": -20.796859741210938, "step": 3680 }, { "epoch": 5.91, "learning_rate": 1.9084423305588584e-07, "logits/chosen": -1.5870596170425415, "logits/rejected": -1.6089247465133667, "logps/chosen": -130.16603088378906, "logps/rejected": -265.1653137207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.111386299133301, "rewards/margins": 12.33742904663086, "rewards/rejected": -18.448814392089844, "step": 3681 }, { "epoch": 5.91, "learning_rate": 1.907451446690448e-07, "logits/chosen": -1.5631225109100342, "logits/rejected": -1.4993486404418945, "logps/chosen": -164.56576538085938, "logps/rejected": -276.1475830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.375594139099121, "rewards/margins": 12.361285209655762, "rewards/rejected": -19.736879348754883, "step": 3682 }, { "epoch": 5.91, "learning_rate": 1.906460562822037e-07, "logits/chosen": -1.4620574712753296, "logits/rejected": -1.5107136964797974, "logps/chosen": -144.11215209960938, "logps/rejected": -261.0520935058594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.4463043212890625, "rewards/margins": 10.59670639038086, "rewards/rejected": -16.043010711669922, "step": 3683 }, { "epoch": 5.91, "learning_rate": 1.9054696789536264e-07, "logits/chosen": -1.5434565544128418, "logits/rejected": -1.5131157636642456, "logps/chosen": -162.0755615234375, "logps/rejected": -280.32537841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.071712493896484, "rewards/margins": 9.292452812194824, "rewards/rejected": -17.364164352416992, "step": 3684 }, { "epoch": 5.91, "learning_rate": 1.904478795085216e-07, "logits/chosen": -1.5239708423614502, "logits/rejected": -1.6967506408691406, "logps/chosen": -70.85276794433594, "logps/rejected": -293.915771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4649735689163208, "rewards/margins": 16.974592208862305, "rewards/rejected": -18.43956756591797, "step": 3685 }, { "epoch": 5.92, "learning_rate": 1.9034879112168053e-07, "logits/chosen": -1.4462329149246216, "logits/rejected": -1.5220086574554443, "logps/chosen": -135.77420043945312, "logps/rejected": -254.41748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.89843225479126, "rewards/margins": 13.211665153503418, "rewards/rejected": -18.110097885131836, "step": 3686 }, { "epoch": 5.92, "learning_rate": 1.9024970273483947e-07, "logits/chosen": -1.4700506925582886, "logits/rejected": -1.5044233798980713, "logps/chosen": -117.62751007080078, "logps/rejected": -211.74563598632812, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.7619452476501465, "rewards/margins": 8.892574310302734, "rewards/rejected": -14.654519081115723, "step": 3687 }, { "epoch": 5.92, "learning_rate": 1.901506143479984e-07, "logits/chosen": -1.5958352088928223, "logits/rejected": -1.6465210914611816, "logps/chosen": -104.88279724121094, "logps/rejected": -243.6307373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.036955833435059, "rewards/margins": 11.254576683044434, "rewards/rejected": -15.291532516479492, "step": 3688 }, { "epoch": 5.92, "learning_rate": 1.9005152596115733e-07, "logits/chosen": -1.5979094505310059, "logits/rejected": -1.6012042760849, "logps/chosen": -128.72796630859375, "logps/rejected": -266.0513916015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.283300399780273, "rewards/margins": 13.259130477905273, "rewards/rejected": -17.542430877685547, "step": 3689 }, { "epoch": 5.92, "learning_rate": 1.899524375743163e-07, "logits/chosen": -1.3955384492874146, "logits/rejected": -1.4877187013626099, "logps/chosen": -146.68392944335938, "logps/rejected": -308.4697265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.643572807312012, "rewards/margins": 13.967140197753906, "rewards/rejected": -21.610713958740234, "step": 3690 }, { "epoch": 5.92, "learning_rate": 1.8985334918747523e-07, "logits/chosen": -1.5422056913375854, "logits/rejected": -1.5991989374160767, "logps/chosen": -152.45521545410156, "logps/rejected": -292.18634033203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.650496482849121, "rewards/margins": 12.444392204284668, "rewards/rejected": -20.09488868713379, "step": 3691 }, { "epoch": 5.93, "learning_rate": 1.8975426080063416e-07, "logits/chosen": -1.3720598220825195, "logits/rejected": -1.3632543087005615, "logps/chosen": -152.57733154296875, "logps/rejected": -301.2712707519531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.584931373596191, "rewards/margins": 14.315500259399414, "rewards/rejected": -20.900432586669922, "step": 3692 }, { "epoch": 5.93, "learning_rate": 1.896551724137931e-07, "logits/chosen": -1.5509374141693115, "logits/rejected": -1.6073428392410278, "logps/chosen": -133.9862060546875, "logps/rejected": -251.84368896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5873799324035645, "rewards/margins": 11.93701457977295, "rewards/rejected": -17.524394989013672, "step": 3693 }, { "epoch": 5.93, "learning_rate": 1.8955608402695203e-07, "logits/chosen": -1.4050536155700684, "logits/rejected": -1.5354934930801392, "logps/chosen": -138.06346130371094, "logps/rejected": -300.16064453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.3258538246154785, "rewards/margins": 13.810397148132324, "rewards/rejected": -20.13625144958496, "step": 3694 }, { "epoch": 5.93, "learning_rate": 1.8945699564011096e-07, "logits/chosen": -1.4550800323486328, "logits/rejected": -1.3998072147369385, "logps/chosen": -143.1812286376953, "logps/rejected": -234.50949096679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.388744831085205, "rewards/margins": 10.256027221679688, "rewards/rejected": -15.644771575927734, "step": 3695 }, { "epoch": 5.93, "learning_rate": 1.8935790725326992e-07, "logits/chosen": -1.4175169467926025, "logits/rejected": -1.393728256225586, "logps/chosen": -192.8778839111328, "logps/rejected": -348.37835693359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.646796226501465, "rewards/margins": 15.695621490478516, "rewards/rejected": -25.342418670654297, "step": 3696 }, { "epoch": 5.93, "learning_rate": 1.8925881886642883e-07, "logits/chosen": -1.768151044845581, "logits/rejected": -1.7857685089111328, "logps/chosen": -162.59762573242188, "logps/rejected": -300.15850830078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.62718677520752, "rewards/margins": 14.317544937133789, "rewards/rejected": -22.944730758666992, "step": 3697 }, { "epoch": 5.94, "learning_rate": 1.8915973047958779e-07, "logits/chosen": -1.513951301574707, "logits/rejected": -1.6251705884933472, "logps/chosen": -162.78033447265625, "logps/rejected": -295.7638854980469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.491299629211426, "rewards/margins": 11.151388168334961, "rewards/rejected": -19.642688751220703, "step": 3698 }, { "epoch": 5.94, "learning_rate": 1.8906064209274672e-07, "logits/chosen": -1.6087629795074463, "logits/rejected": -1.6428990364074707, "logps/chosen": -188.2069854736328, "logps/rejected": -284.1829528808594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.806699752807617, "rewards/margins": 9.564703941345215, "rewards/rejected": -19.37140464782715, "step": 3699 }, { "epoch": 5.94, "learning_rate": 1.8896155370590565e-07, "logits/chosen": -1.6553736925125122, "logits/rejected": -1.729343295097351, "logps/chosen": -113.10751342773438, "logps/rejected": -262.05474853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.107659339904785, "rewards/margins": 12.38688850402832, "rewards/rejected": -17.49454689025879, "step": 3700 }, { "epoch": 5.94, "learning_rate": 1.888624653190646e-07, "logits/chosen": -1.5415149927139282, "logits/rejected": -1.4571938514709473, "logps/chosen": -130.62001037597656, "logps/rejected": -203.699951171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.10054349899292, "rewards/margins": 9.422486305236816, "rewards/rejected": -13.523029327392578, "step": 3701 }, { "epoch": 5.94, "learning_rate": 1.8876337693222352e-07, "logits/chosen": -1.4082000255584717, "logits/rejected": -1.4240295886993408, "logps/chosen": -161.91326904296875, "logps/rejected": -336.9073486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.498258590698242, "rewards/margins": 13.28285026550293, "rewards/rejected": -21.781108856201172, "step": 3702 }, { "epoch": 5.94, "learning_rate": 1.8866428854538248e-07, "logits/chosen": -1.522631049156189, "logits/rejected": -1.4284687042236328, "logps/chosen": -173.02447509765625, "logps/rejected": -318.6395568847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.221850395202637, "rewards/margins": 15.330917358398438, "rewards/rejected": -22.55276870727539, "step": 3703 }, { "epoch": 5.95, "learning_rate": 1.885652001585414e-07, "logits/chosen": -1.666113257408142, "logits/rejected": -1.6611957550048828, "logps/chosen": -127.99026489257812, "logps/rejected": -303.2962951660156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5711822509765625, "rewards/margins": 16.306671142578125, "rewards/rejected": -21.877853393554688, "step": 3704 }, { "epoch": 5.95, "learning_rate": 1.8846611177170035e-07, "logits/chosen": -1.4214556217193604, "logits/rejected": -1.4787708520889282, "logps/chosen": -105.6366958618164, "logps/rejected": -219.59039306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.811261177062988, "rewards/margins": 10.935802459716797, "rewards/rejected": -15.747063636779785, "step": 3705 }, { "epoch": 5.95, "learning_rate": 1.883670233848593e-07, "logits/chosen": -1.4125428199768066, "logits/rejected": -1.4424316883087158, "logps/chosen": -157.37298583984375, "logps/rejected": -254.4681854248047, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.305351257324219, "rewards/margins": 10.276167869567871, "rewards/rejected": -18.581520080566406, "step": 3706 }, { "epoch": 5.95, "learning_rate": 1.882679349980182e-07, "logits/chosen": -1.4438930749893188, "logits/rejected": -1.4940712451934814, "logps/chosen": -191.05059814453125, "logps/rejected": -313.15069580078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -10.298832893371582, "rewards/margins": 10.93721866607666, "rewards/rejected": -21.236053466796875, "step": 3707 }, { "epoch": 5.95, "learning_rate": 1.8816884661117715e-07, "logits/chosen": -1.570009469985962, "logits/rejected": -1.6258060932159424, "logps/chosen": -149.80404663085938, "logps/rejected": -272.12823486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.608193397521973, "rewards/margins": 12.463358879089355, "rewards/rejected": -19.071552276611328, "step": 3708 }, { "epoch": 5.95, "learning_rate": 1.880697582243361e-07, "logits/chosen": -1.5564316511154175, "logits/rejected": -1.5848227739334106, "logps/chosen": -189.29261779785156, "logps/rejected": -337.5676574707031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.573407173156738, "rewards/margins": 14.258111000061035, "rewards/rejected": -23.83151626586914, "step": 3709 }, { "epoch": 5.96, "learning_rate": 1.8797066983749504e-07, "logits/chosen": -1.4031949043273926, "logits/rejected": -1.4441360235214233, "logps/chosen": -135.97903442382812, "logps/rejected": -287.9762878417969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.494509220123291, "rewards/margins": 13.647260665893555, "rewards/rejected": -21.141769409179688, "step": 3710 }, { "epoch": 5.96, "learning_rate": 1.87871581450654e-07, "logits/chosen": -1.4672036170959473, "logits/rejected": -1.4632892608642578, "logps/chosen": -132.6333770751953, "logps/rejected": -283.39324951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.481267929077148, "rewards/margins": 14.675756454467773, "rewards/rejected": -20.15702247619629, "step": 3711 }, { "epoch": 5.96, "learning_rate": 1.877724930638129e-07, "logits/chosen": -1.4450477361679077, "logits/rejected": -1.4160045385360718, "logps/chosen": -164.26168823242188, "logps/rejected": -286.7809143066406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.212534427642822, "rewards/margins": 11.673108100891113, "rewards/rejected": -18.885643005371094, "step": 3712 }, { "epoch": 5.96, "learning_rate": 1.8767340467697184e-07, "logits/chosen": -1.6563515663146973, "logits/rejected": -1.5344302654266357, "logps/chosen": -189.4044189453125, "logps/rejected": -272.1693420410156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.401962280273438, "rewards/margins": 9.75802993774414, "rewards/rejected": -18.159992218017578, "step": 3713 }, { "epoch": 5.96, "learning_rate": 1.875743162901308e-07, "logits/chosen": -1.5040847063064575, "logits/rejected": -1.5975149869918823, "logps/chosen": -70.62644958496094, "logps/rejected": -211.5277099609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9613145589828491, "rewards/margins": 13.232431411743164, "rewards/rejected": -14.193745613098145, "step": 3714 }, { "epoch": 5.96, "learning_rate": 1.8747522790328973e-07, "logits/chosen": -1.7836558818817139, "logits/rejected": -1.7876358032226562, "logps/chosen": -140.72097778320312, "logps/rejected": -275.6371154785156, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -6.797496795654297, "rewards/margins": 12.669290542602539, "rewards/rejected": -19.466787338256836, "step": 3715 }, { "epoch": 5.96, "learning_rate": 1.8737613951644864e-07, "logits/chosen": -1.3768072128295898, "logits/rejected": -1.465212345123291, "logps/chosen": -120.96431732177734, "logps/rejected": -258.19769287109375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.034911632537842, "rewards/margins": 13.804450988769531, "rewards/rejected": -17.83936309814453, "step": 3716 }, { "epoch": 5.97, "learning_rate": 1.872770511296076e-07, "logits/chosen": -1.611348271369934, "logits/rejected": -1.667280912399292, "logps/chosen": -136.48165893554688, "logps/rejected": -288.8954772949219, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.08405876159668, "rewards/margins": 13.650394439697266, "rewards/rejected": -19.734453201293945, "step": 3717 }, { "epoch": 5.97, "learning_rate": 1.8717796274276653e-07, "logits/chosen": -1.481740117073059, "logits/rejected": -1.5134330987930298, "logps/chosen": -158.48883056640625, "logps/rejected": -293.87384033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.975423336029053, "rewards/margins": 13.202312469482422, "rewards/rejected": -21.177734375, "step": 3718 }, { "epoch": 5.97, "learning_rate": 1.870788743559255e-07, "logits/chosen": -1.4430646896362305, "logits/rejected": -1.4120030403137207, "logps/chosen": -150.5356903076172, "logps/rejected": -245.22471618652344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.788219451904297, "rewards/margins": 10.5567045211792, "rewards/rejected": -16.344924926757812, "step": 3719 }, { "epoch": 5.97, "learning_rate": 1.8697978596908442e-07, "logits/chosen": -1.7909808158874512, "logits/rejected": -1.6334832906723022, "logps/chosen": -137.17445373535156, "logps/rejected": -226.52706909179688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.884033441543579, "rewards/margins": 11.848114967346191, "rewards/rejected": -15.732149124145508, "step": 3720 }, { "epoch": 5.97, "learning_rate": 1.8688069758224333e-07, "logits/chosen": -1.4211233854293823, "logits/rejected": -1.3814936876296997, "logps/chosen": -160.5302734375, "logps/rejected": -254.46755981445312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.037004470825195, "rewards/margins": 10.84640121459961, "rewards/rejected": -17.883405685424805, "step": 3721 }, { "epoch": 5.97, "learning_rate": 1.867816091954023e-07, "logits/chosen": -1.7150602340698242, "logits/rejected": -1.7019472122192383, "logps/chosen": -92.93966674804688, "logps/rejected": -230.4935302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.899407386779785, "rewards/margins": 13.690093040466309, "rewards/rejected": -16.589500427246094, "step": 3722 }, { "epoch": 5.98, "learning_rate": 1.8668252080856122e-07, "logits/chosen": -1.502805233001709, "logits/rejected": -1.478081226348877, "logps/chosen": -189.28366088867188, "logps/rejected": -265.9151916503906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.784960746765137, "rewards/margins": 9.927692413330078, "rewards/rejected": -18.71265411376953, "step": 3723 }, { "epoch": 5.98, "learning_rate": 1.8658343242172018e-07, "logits/chosen": -1.3867504596710205, "logits/rejected": -1.4207792282104492, "logps/chosen": -157.76779174804688, "logps/rejected": -289.5389709472656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.68863582611084, "rewards/margins": 13.308408737182617, "rewards/rejected": -19.99704360961914, "step": 3724 }, { "epoch": 5.98, "learning_rate": 1.8648434403487912e-07, "logits/chosen": -1.5385690927505493, "logits/rejected": -1.6557109355926514, "logps/chosen": -131.52041625976562, "logps/rejected": -232.51535034179688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.508660316467285, "rewards/margins": 9.426185607910156, "rewards/rejected": -13.934846878051758, "step": 3725 }, { "epoch": 5.98, "learning_rate": 1.8638525564803802e-07, "logits/chosen": -1.5688905715942383, "logits/rejected": -1.5362516641616821, "logps/chosen": -133.5271453857422, "logps/rejected": -264.12646484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.7925689220428467, "rewards/margins": 13.396677017211914, "rewards/rejected": -17.189245223999023, "step": 3726 }, { "epoch": 5.98, "learning_rate": 1.8628616726119698e-07, "logits/chosen": -1.4343740940093994, "logits/rejected": -1.400930643081665, "logps/chosen": -132.9620819091797, "logps/rejected": -227.93939208984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.864546298980713, "rewards/margins": 10.637269973754883, "rewards/rejected": -16.501815795898438, "step": 3727 }, { "epoch": 5.98, "learning_rate": 1.8618707887435592e-07, "logits/chosen": -1.3522822856903076, "logits/rejected": -1.3627744913101196, "logps/chosen": -138.448974609375, "logps/rejected": -287.5068359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.474243640899658, "rewards/margins": 14.87516975402832, "rewards/rejected": -21.349414825439453, "step": 3728 }, { "epoch": 5.99, "learning_rate": 1.8608799048751485e-07, "logits/chosen": -1.4551266431808472, "logits/rejected": -1.5144580602645874, "logps/chosen": -114.57350158691406, "logps/rejected": -240.2957305908203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.49277400970459, "rewards/margins": 12.307703018188477, "rewards/rejected": -16.80047607421875, "step": 3729 }, { "epoch": 5.99, "learning_rate": 1.859889021006738e-07, "logits/chosen": -1.5020544528961182, "logits/rejected": -1.59376060962677, "logps/chosen": -114.01325225830078, "logps/rejected": -259.8193359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.326720714569092, "rewards/margins": 12.606695175170898, "rewards/rejected": -16.93341636657715, "step": 3730 }, { "epoch": 5.99, "learning_rate": 1.8588981371383272e-07, "logits/chosen": -1.5785584449768066, "logits/rejected": -1.6342432498931885, "logps/chosen": -106.4214096069336, "logps/rejected": -249.5433807373047, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.555849313735962, "rewards/margins": 14.302323341369629, "rewards/rejected": -17.858173370361328, "step": 3731 }, { "epoch": 5.99, "learning_rate": 1.8579072532699168e-07, "logits/chosen": -1.6108336448669434, "logits/rejected": -1.5623013973236084, "logps/chosen": -180.12229919433594, "logps/rejected": -306.132080078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.040301322937012, "rewards/margins": 13.10683822631836, "rewards/rejected": -22.147140502929688, "step": 3732 }, { "epoch": 5.99, "learning_rate": 1.856916369401506e-07, "logits/chosen": -1.4907872676849365, "logits/rejected": -1.4656487703323364, "logps/chosen": -159.18826293945312, "logps/rejected": -247.97947692871094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.089438438415527, "rewards/margins": 9.549057006835938, "rewards/rejected": -16.63849639892578, "step": 3733 }, { "epoch": 5.99, "learning_rate": 1.8559254855330954e-07, "logits/chosen": -1.4108601808547974, "logits/rejected": -1.500807762145996, "logps/chosen": -174.00119018554688, "logps/rejected": -308.46392822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.421490669250488, "rewards/margins": 12.244840621948242, "rewards/rejected": -20.666330337524414, "step": 3734 }, { "epoch": 6.0, "learning_rate": 1.8549346016646848e-07, "logits/chosen": -1.6357122659683228, "logits/rejected": -1.6525511741638184, "logps/chosen": -130.7125701904297, "logps/rejected": -286.3031005859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.685513973236084, "rewards/margins": 15.708707809448242, "rewards/rejected": -20.394222259521484, "step": 3735 }, { "epoch": 6.0, "learning_rate": 1.853943717796274e-07, "logits/chosen": -1.631971836090088, "logits/rejected": -1.62613046169281, "logps/chosen": -99.45304107666016, "logps/rejected": -230.4906005859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.7393112182617188, "rewards/margins": 13.323470115661621, "rewards/rejected": -16.062782287597656, "step": 3736 }, { "epoch": 6.0, "learning_rate": 1.8529528339278634e-07, "logits/chosen": -1.47713041305542, "logits/rejected": -1.5451579093933105, "logps/chosen": -101.16068267822266, "logps/rejected": -283.6063232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.585237503051758, "rewards/margins": 16.273935317993164, "rewards/rejected": -19.859172821044922, "step": 3737 }, { "epoch": 6.0, "learning_rate": 1.851961950059453e-07, "logits/chosen": -1.330141544342041, "logits/rejected": -1.283247709274292, "logps/chosen": -111.42694854736328, "logps/rejected": -242.72128295898438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.7566046714782715, "rewards/margins": 12.802552223205566, "rewards/rejected": -17.55915641784668, "step": 3738 }, { "epoch": 6.0, "learning_rate": 1.8509710661910424e-07, "logits/chosen": -1.5390558242797852, "logits/rejected": -1.5887528657913208, "logps/chosen": -117.5335693359375, "logps/rejected": -225.11244201660156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.619745254516602, "rewards/margins": 10.816580772399902, "rewards/rejected": -15.436326026916504, "step": 3739 }, { "epoch": 6.0, "learning_rate": 1.8499801823226317e-07, "logits/chosen": -1.416367769241333, "logits/rejected": -1.4028477668762207, "logps/chosen": -173.96469116210938, "logps/rejected": -277.3204040527344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.947136878967285, "rewards/margins": 11.761013984680176, "rewards/rejected": -19.708148956298828, "step": 3740 }, { "epoch": 6.0, "learning_rate": 1.848989298454221e-07, "logits/chosen": -1.531615138053894, "logits/rejected": -1.5191782712936401, "logps/chosen": -199.8966522216797, "logps/rejected": -321.79681396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.083995819091797, "rewards/margins": 11.639716148376465, "rewards/rejected": -19.723711013793945, "step": 3741 }, { "epoch": 6.01, "learning_rate": 1.8479984145858104e-07, "logits/chosen": -1.3982784748077393, "logits/rejected": -1.4397295713424683, "logps/chosen": -174.62762451171875, "logps/rejected": -293.93682861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.023435592651367, "rewards/margins": 10.839275360107422, "rewards/rejected": -18.862712860107422, "step": 3742 }, { "epoch": 6.01, "learning_rate": 1.8470075307174e-07, "logits/chosen": -1.5266352891921997, "logits/rejected": -1.481297254562378, "logps/chosen": -171.38177490234375, "logps/rejected": -277.15167236328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.255051612854004, "rewards/margins": 12.699668884277344, "rewards/rejected": -19.95471954345703, "step": 3743 }, { "epoch": 6.01, "learning_rate": 1.8460166468489893e-07, "logits/chosen": -1.4513261318206787, "logits/rejected": -1.3418900966644287, "logps/chosen": -112.23099517822266, "logps/rejected": -219.62103271484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.6433634757995605, "rewards/margins": 11.723655700683594, "rewards/rejected": -16.367019653320312, "step": 3744 }, { "epoch": 6.01, "learning_rate": 1.8450257629805784e-07, "logits/chosen": -1.5302023887634277, "logits/rejected": -1.5946837663650513, "logps/chosen": -184.11492919921875, "logps/rejected": -329.04974365234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.555567741394043, "rewards/margins": 13.98332405090332, "rewards/rejected": -22.53889274597168, "step": 3745 }, { "epoch": 6.01, "learning_rate": 1.844034879112168e-07, "logits/chosen": -1.4342551231384277, "logits/rejected": -1.43612539768219, "logps/chosen": -146.6827392578125, "logps/rejected": -276.5037841796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.963329315185547, "rewards/margins": 13.246003150939941, "rewards/rejected": -19.209333419799805, "step": 3746 }, { "epoch": 6.01, "learning_rate": 1.8430439952437573e-07, "logits/chosen": -1.627333402633667, "logits/rejected": -1.6453521251678467, "logps/chosen": -180.25430297851562, "logps/rejected": -292.6415100097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.190729141235352, "rewards/margins": 12.59901237487793, "rewards/rejected": -19.78974151611328, "step": 3747 }, { "epoch": 6.02, "learning_rate": 1.842053111375347e-07, "logits/chosen": -1.514379620552063, "logits/rejected": -1.5775344371795654, "logps/chosen": -159.56065368652344, "logps/rejected": -275.2501525878906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.024033069610596, "rewards/margins": 12.119817733764648, "rewards/rejected": -18.14385223388672, "step": 3748 }, { "epoch": 6.02, "learning_rate": 1.8410622275069362e-07, "logits/chosen": -1.6145408153533936, "logits/rejected": -1.6594067811965942, "logps/chosen": -147.41680908203125, "logps/rejected": -264.0226745605469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.344820499420166, "rewards/margins": 11.834607124328613, "rewards/rejected": -18.179428100585938, "step": 3749 }, { "epoch": 6.02, "learning_rate": 1.8400713436385253e-07, "logits/chosen": -1.5392974615097046, "logits/rejected": -1.4972853660583496, "logps/chosen": -140.9880828857422, "logps/rejected": -273.8529052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.352421283721924, "rewards/margins": 13.943973541259766, "rewards/rejected": -19.29639434814453, "step": 3750 }, { "epoch": 6.02, "learning_rate": 1.839080459770115e-07, "logits/chosen": -1.449488878250122, "logits/rejected": -1.4488788843154907, "logps/chosen": -138.74526977539062, "logps/rejected": -281.5375061035156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.07131290435791, "rewards/margins": 13.863448143005371, "rewards/rejected": -19.93476104736328, "step": 3751 }, { "epoch": 6.02, "learning_rate": 1.8380895759017042e-07, "logits/chosen": -1.4665435552597046, "logits/rejected": -1.4294648170471191, "logps/chosen": -194.15631103515625, "logps/rejected": -345.0496826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.602141380310059, "rewards/margins": 14.402270317077637, "rewards/rejected": -25.004413604736328, "step": 3752 }, { "epoch": 6.02, "learning_rate": 1.8370986920332938e-07, "logits/chosen": -1.4537912607192993, "logits/rejected": -1.4255924224853516, "logps/chosen": -143.8022003173828, "logps/rejected": -306.625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.235991477966309, "rewards/margins": 16.183080673217773, "rewards/rejected": -22.4190731048584, "step": 3753 }, { "epoch": 6.03, "learning_rate": 1.836107808164883e-07, "logits/chosen": -1.4513514041900635, "logits/rejected": -1.4844778776168823, "logps/chosen": -201.3562469482422, "logps/rejected": -301.7613525390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.75457763671875, "rewards/margins": 10.705495834350586, "rewards/rejected": -20.460073471069336, "step": 3754 }, { "epoch": 6.03, "learning_rate": 1.8351169242964722e-07, "logits/chosen": -1.5252188444137573, "logits/rejected": -1.5071762800216675, "logps/chosen": -102.25727844238281, "logps/rejected": -226.20709228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.541853904724121, "rewards/margins": 11.987116813659668, "rewards/rejected": -15.528970718383789, "step": 3755 }, { "epoch": 6.03, "learning_rate": 1.8341260404280618e-07, "logits/chosen": -1.4276235103607178, "logits/rejected": -1.478003740310669, "logps/chosen": -132.8735809326172, "logps/rejected": -267.139892578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.851090431213379, "rewards/margins": 12.239778518676758, "rewards/rejected": -18.09086799621582, "step": 3756 }, { "epoch": 6.03, "learning_rate": 1.8331351565596512e-07, "logits/chosen": -1.4930830001831055, "logits/rejected": -1.5063660144805908, "logps/chosen": -183.04066467285156, "logps/rejected": -371.712646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.156694412231445, "rewards/margins": 19.094629287719727, "rewards/rejected": -28.251323699951172, "step": 3757 }, { "epoch": 6.03, "learning_rate": 1.8321442726912405e-07, "logits/chosen": -1.3763052225112915, "logits/rejected": -1.3766800165176392, "logps/chosen": -143.50381469726562, "logps/rejected": -308.6813049316406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9523091316223145, "rewards/margins": 14.761186599731445, "rewards/rejected": -20.713497161865234, "step": 3758 }, { "epoch": 6.03, "learning_rate": 1.8311533888228298e-07, "logits/chosen": -1.4609462022781372, "logits/rejected": -1.3504738807678223, "logps/chosen": -192.33236694335938, "logps/rejected": -284.9546813964844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.8056058883667, "rewards/margins": 11.831807136535645, "rewards/rejected": -21.637413024902344, "step": 3759 }, { "epoch": 6.04, "learning_rate": 1.8301625049544192e-07, "logits/chosen": -1.4034686088562012, "logits/rejected": -1.3783164024353027, "logps/chosen": -142.7501220703125, "logps/rejected": -271.149169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.015439987182617, "rewards/margins": 13.045120239257812, "rewards/rejected": -21.060558319091797, "step": 3760 }, { "epoch": 6.04, "learning_rate": 1.8291716210860087e-07, "logits/chosen": -1.4516793489456177, "logits/rejected": -1.446109414100647, "logps/chosen": -118.95420837402344, "logps/rejected": -247.193359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.4847025871276855, "rewards/margins": 12.702291488647461, "rewards/rejected": -17.186992645263672, "step": 3761 }, { "epoch": 6.04, "learning_rate": 1.828180737217598e-07, "logits/chosen": -1.489023208618164, "logits/rejected": -1.478100061416626, "logps/chosen": -172.7230682373047, "logps/rejected": -331.7967529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.193109512329102, "rewards/margins": 15.603076934814453, "rewards/rejected": -23.796188354492188, "step": 3762 }, { "epoch": 6.04, "learning_rate": 1.8271898533491874e-07, "logits/chosen": -1.4038270711898804, "logits/rejected": -1.3796181678771973, "logps/chosen": -192.1802978515625, "logps/rejected": -323.2943115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.976802825927734, "rewards/margins": 13.481634140014648, "rewards/rejected": -22.458438873291016, "step": 3763 }, { "epoch": 6.04, "learning_rate": 1.8261989694807767e-07, "logits/chosen": -1.3714425563812256, "logits/rejected": -1.3603813648223877, "logps/chosen": -186.00930786132812, "logps/rejected": -314.2300109863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.517593383789062, "rewards/margins": 12.490150451660156, "rewards/rejected": -23.00774383544922, "step": 3764 }, { "epoch": 6.04, "learning_rate": 1.825208085612366e-07, "logits/chosen": -1.4109392166137695, "logits/rejected": -1.4348992109298706, "logps/chosen": -164.87130737304688, "logps/rejected": -298.5825500488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.826113700866699, "rewards/margins": 11.549124717712402, "rewards/rejected": -19.3752384185791, "step": 3765 }, { "epoch": 6.04, "learning_rate": 1.8242172017439557e-07, "logits/chosen": -1.4163470268249512, "logits/rejected": -1.4462199211120605, "logps/chosen": -134.70814514160156, "logps/rejected": -297.186767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.378488540649414, "rewards/margins": 13.460439682006836, "rewards/rejected": -19.838926315307617, "step": 3766 }, { "epoch": 6.05, "learning_rate": 1.823226317875545e-07, "logits/chosen": -1.3906621932983398, "logits/rejected": -1.4663701057434082, "logps/chosen": -124.69303894042969, "logps/rejected": -259.7684326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.260793209075928, "rewards/margins": 12.29456901550293, "rewards/rejected": -17.555362701416016, "step": 3767 }, { "epoch": 6.05, "learning_rate": 1.8222354340071343e-07, "logits/chosen": -1.4665334224700928, "logits/rejected": -1.4844856262207031, "logps/chosen": -171.92459106445312, "logps/rejected": -289.804931640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.954721450805664, "rewards/margins": 12.6197509765625, "rewards/rejected": -20.574472427368164, "step": 3768 }, { "epoch": 6.05, "learning_rate": 1.8212445501387237e-07, "logits/chosen": -1.4873765707015991, "logits/rejected": -1.5165445804595947, "logps/chosen": -131.38307189941406, "logps/rejected": -315.9477844238281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.881532669067383, "rewards/margins": 17.7265682220459, "rewards/rejected": -22.60810089111328, "step": 3769 }, { "epoch": 6.05, "learning_rate": 1.820253666270313e-07, "logits/chosen": -1.3193511962890625, "logits/rejected": -1.30250084400177, "logps/chosen": -154.16888427734375, "logps/rejected": -279.6081237792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.555691719055176, "rewards/margins": 14.110661506652832, "rewards/rejected": -20.666353225708008, "step": 3770 }, { "epoch": 6.05, "learning_rate": 1.8192627824019023e-07, "logits/chosen": -1.4352095127105713, "logits/rejected": -1.4608900547027588, "logps/chosen": -222.8261260986328, "logps/rejected": -380.8035583496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.399138450622559, "rewards/margins": 15.254426956176758, "rewards/rejected": -26.653564453125, "step": 3771 }, { "epoch": 6.05, "learning_rate": 1.818271898533492e-07, "logits/chosen": -1.486783742904663, "logits/rejected": -1.5051430463790894, "logps/chosen": -181.663330078125, "logps/rejected": -328.8226623535156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.14644718170166, "rewards/margins": 15.112361907958984, "rewards/rejected": -24.25881004333496, "step": 3772 }, { "epoch": 6.06, "learning_rate": 1.817281014665081e-07, "logits/chosen": -1.6808695793151855, "logits/rejected": -1.7473810911178589, "logps/chosen": -102.59202575683594, "logps/rejected": -265.6480407714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7721221446990967, "rewards/margins": 17.1637020111084, "rewards/rejected": -18.93582534790039, "step": 3773 }, { "epoch": 6.06, "learning_rate": 1.8162901307966706e-07, "logits/chosen": -1.567201852798462, "logits/rejected": -1.5589690208435059, "logps/chosen": -188.40087890625, "logps/rejected": -312.92010498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.422811508178711, "rewards/margins": 13.399160385131836, "rewards/rejected": -21.821971893310547, "step": 3774 }, { "epoch": 6.06, "learning_rate": 1.81529924692826e-07, "logits/chosen": -1.5052729845046997, "logits/rejected": -1.5610016584396362, "logps/chosen": -131.29763793945312, "logps/rejected": -264.26446533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0080084800720215, "rewards/margins": 12.385486602783203, "rewards/rejected": -18.393495559692383, "step": 3775 }, { "epoch": 6.06, "learning_rate": 1.8143083630598493e-07, "logits/chosen": -1.4382182359695435, "logits/rejected": -1.4311517477035522, "logps/chosen": -133.76930236816406, "logps/rejected": -263.8323974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.2685956954956055, "rewards/margins": 12.965446472167969, "rewards/rejected": -18.23404312133789, "step": 3776 }, { "epoch": 6.06, "learning_rate": 1.813317479191439e-07, "logits/chosen": -1.6239570379257202, "logits/rejected": -1.516526460647583, "logps/chosen": -118.94258117675781, "logps/rejected": -227.5891571044922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5296781063079834, "rewards/margins": 13.924795150756836, "rewards/rejected": -17.4544734954834, "step": 3777 }, { "epoch": 6.06, "learning_rate": 1.812326595323028e-07, "logits/chosen": -1.6953494548797607, "logits/rejected": -1.6409553289413452, "logps/chosen": -123.7352294921875, "logps/rejected": -266.2259826660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8071422576904297, "rewards/margins": 14.900186538696289, "rewards/rejected": -18.70732879638672, "step": 3778 }, { "epoch": 6.07, "learning_rate": 1.8113357114546173e-07, "logits/chosen": -1.4740943908691406, "logits/rejected": -1.454007625579834, "logps/chosen": -214.21633911132812, "logps/rejected": -263.9981384277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.605491638183594, "rewards/margins": 8.986871719360352, "rewards/rejected": -18.592363357543945, "step": 3779 }, { "epoch": 6.07, "learning_rate": 1.810344827586207e-07, "logits/chosen": -1.4661208391189575, "logits/rejected": -1.4076242446899414, "logps/chosen": -134.00616455078125, "logps/rejected": -237.57586669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.082403182983398, "rewards/margins": 10.657125473022461, "rewards/rejected": -15.73952865600586, "step": 3780 }, { "epoch": 6.07, "learning_rate": 1.8093539437177962e-07, "logits/chosen": -1.4575575590133667, "logits/rejected": -1.4409706592559814, "logps/chosen": -167.14247131347656, "logps/rejected": -276.9107360839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.19277286529541, "rewards/margins": 12.372063636779785, "rewards/rejected": -19.564836502075195, "step": 3781 }, { "epoch": 6.07, "learning_rate": 1.8083630598493858e-07, "logits/chosen": -1.4474996328353882, "logits/rejected": -1.465004801750183, "logps/chosen": -124.71369171142578, "logps/rejected": -230.5960693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.554933547973633, "rewards/margins": 10.66431999206543, "rewards/rejected": -15.219253540039062, "step": 3782 }, { "epoch": 6.07, "learning_rate": 1.807372175980975e-07, "logits/chosen": -1.6190226078033447, "logits/rejected": -1.642683982849121, "logps/chosen": -140.6440887451172, "logps/rejected": -270.41217041015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.868729591369629, "rewards/margins": 12.8851900100708, "rewards/rejected": -18.753921508789062, "step": 3783 }, { "epoch": 6.07, "learning_rate": 1.8063812921125642e-07, "logits/chosen": -1.631064534187317, "logits/rejected": -1.549789309501648, "logps/chosen": -160.88836669921875, "logps/rejected": -257.59588623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.184457778930664, "rewards/margins": 11.752429008483887, "rewards/rejected": -18.936887741088867, "step": 3784 }, { "epoch": 6.08, "learning_rate": 1.8053904082441538e-07, "logits/chosen": -1.5913455486297607, "logits/rejected": -1.6091259717941284, "logps/chosen": -139.75454711914062, "logps/rejected": -296.818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.965237140655518, "rewards/margins": 15.649551391601562, "rewards/rejected": -20.614788055419922, "step": 3785 }, { "epoch": 6.08, "learning_rate": 1.804399524375743e-07, "logits/chosen": -1.7018628120422363, "logits/rejected": -1.7536125183105469, "logps/chosen": -125.75643920898438, "logps/rejected": -266.48870849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.68771505355835, "rewards/margins": 13.164011001586914, "rewards/rejected": -17.851726531982422, "step": 3786 }, { "epoch": 6.08, "learning_rate": 1.8034086405073322e-07, "logits/chosen": -1.6061551570892334, "logits/rejected": -1.542169451713562, "logps/chosen": -114.40037536621094, "logps/rejected": -213.57174682617188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.734830856323242, "rewards/margins": 10.017226219177246, "rewards/rejected": -14.752057075500488, "step": 3787 }, { "epoch": 6.08, "learning_rate": 1.8024177566389218e-07, "logits/chosen": -1.5183043479919434, "logits/rejected": -1.526430368423462, "logps/chosen": -138.40643310546875, "logps/rejected": -249.5294189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.439443588256836, "rewards/margins": 10.88719367980957, "rewards/rejected": -16.326637268066406, "step": 3788 }, { "epoch": 6.08, "learning_rate": 1.801426872770511e-07, "logits/chosen": -1.565469741821289, "logits/rejected": -1.547842264175415, "logps/chosen": -157.80914306640625, "logps/rejected": -293.969970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.375960350036621, "rewards/margins": 14.547316551208496, "rewards/rejected": -19.923276901245117, "step": 3789 }, { "epoch": 6.08, "learning_rate": 1.8004359889021007e-07, "logits/chosen": -1.4169995784759521, "logits/rejected": -1.4991260766983032, "logps/chosen": -136.2613525390625, "logps/rejected": -282.9382019042969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.45950984954834, "rewards/margins": 12.52802848815918, "rewards/rejected": -18.987539291381836, "step": 3790 }, { "epoch": 6.09, "learning_rate": 1.79944510503369e-07, "logits/chosen": -1.6300346851348877, "logits/rejected": -1.5992354154586792, "logps/chosen": -178.6389923095703, "logps/rejected": -275.06304931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.132242202758789, "rewards/margins": 12.213111877441406, "rewards/rejected": -20.345354080200195, "step": 3791 }, { "epoch": 6.09, "learning_rate": 1.798454221165279e-07, "logits/chosen": -1.393763542175293, "logits/rejected": -1.3956931829452515, "logps/chosen": -129.41075134277344, "logps/rejected": -238.068115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.418262958526611, "rewards/margins": 10.327736854553223, "rewards/rejected": -16.746000289916992, "step": 3792 }, { "epoch": 6.09, "learning_rate": 1.7974633372968687e-07, "logits/chosen": -1.34010648727417, "logits/rejected": -1.3275750875473022, "logps/chosen": -160.78604125976562, "logps/rejected": -345.16265869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.877038478851318, "rewards/margins": 15.498299598693848, "rewards/rejected": -23.375335693359375, "step": 3793 }, { "epoch": 6.09, "learning_rate": 1.796472453428458e-07, "logits/chosen": -1.6587071418762207, "logits/rejected": -1.628845453262329, "logps/chosen": -134.6777801513672, "logps/rejected": -306.2210998535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.81648588180542, "rewards/margins": 17.395549774169922, "rewards/rejected": -22.212034225463867, "step": 3794 }, { "epoch": 6.09, "learning_rate": 1.7954815695600477e-07, "logits/chosen": -1.4521899223327637, "logits/rejected": -1.5008140802383423, "logps/chosen": -157.00741577148438, "logps/rejected": -260.797119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.5496063232421875, "rewards/margins": 9.704809188842773, "rewards/rejected": -17.25441551208496, "step": 3795 }, { "epoch": 6.09, "learning_rate": 1.794490685691637e-07, "logits/chosen": -1.637709140777588, "logits/rejected": -1.6971737146377563, "logps/chosen": -124.56082153320312, "logps/rejected": -297.3476257324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.865278244018555, "rewards/margins": 14.160666465759277, "rewards/rejected": -19.02594566345215, "step": 3796 }, { "epoch": 6.09, "learning_rate": 1.793499801823226e-07, "logits/chosen": -1.5442612171173096, "logits/rejected": -1.5449774265289307, "logps/chosen": -166.0533447265625, "logps/rejected": -279.19281005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.236565589904785, "rewards/margins": 12.470453262329102, "rewards/rejected": -19.70701789855957, "step": 3797 }, { "epoch": 6.1, "learning_rate": 1.7925089179548157e-07, "logits/chosen": -1.595421314239502, "logits/rejected": -1.4901849031448364, "logps/chosen": -127.79541015625, "logps/rejected": -301.0218505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.283175468444824, "rewards/margins": 17.78931427001953, "rewards/rejected": -22.072490692138672, "step": 3798 }, { "epoch": 6.1, "learning_rate": 1.791518034086405e-07, "logits/chosen": -1.565866470336914, "logits/rejected": -1.6001019477844238, "logps/chosen": -175.692626953125, "logps/rejected": -291.1026611328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.593716621398926, "rewards/margins": 11.742859840393066, "rewards/rejected": -21.336576461791992, "step": 3799 }, { "epoch": 6.1, "learning_rate": 1.7905271502179943e-07, "logits/chosen": -1.5721890926361084, "logits/rejected": -1.567267656326294, "logps/chosen": -129.5963592529297, "logps/rejected": -238.68072509765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.484330654144287, "rewards/margins": 11.897600173950195, "rewards/rejected": -17.381929397583008, "step": 3800 }, { "epoch": 6.1, "learning_rate": 1.789536266349584e-07, "logits/chosen": -1.536003828048706, "logits/rejected": -1.5603069067001343, "logps/chosen": -179.0333251953125, "logps/rejected": -325.583251953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.69306468963623, "rewards/margins": 13.500709533691406, "rewards/rejected": -23.19377326965332, "step": 3801 }, { "epoch": 6.1, "learning_rate": 1.788545382481173e-07, "logits/chosen": -1.4298464059829712, "logits/rejected": -1.49652099609375, "logps/chosen": -157.45657348632812, "logps/rejected": -278.4204406738281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.556663513183594, "rewards/margins": 12.258509635925293, "rewards/rejected": -19.815174102783203, "step": 3802 }, { "epoch": 6.1, "learning_rate": 1.7875544986127626e-07, "logits/chosen": -1.4853955507278442, "logits/rejected": -1.476299524307251, "logps/chosen": -166.4022979736328, "logps/rejected": -293.12030029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.654758453369141, "rewards/margins": 12.98419189453125, "rewards/rejected": -20.63895034790039, "step": 3803 }, { "epoch": 6.11, "learning_rate": 1.786563614744352e-07, "logits/chosen": -1.247530221939087, "logits/rejected": -1.2877860069274902, "logps/chosen": -218.80303955078125, "logps/rejected": -361.0220031738281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -12.733563423156738, "rewards/margins": 12.186948776245117, "rewards/rejected": -24.920513153076172, "step": 3804 }, { "epoch": 6.11, "learning_rate": 1.7855727308759413e-07, "logits/chosen": -1.4932072162628174, "logits/rejected": -1.4531999826431274, "logps/chosen": -172.45372009277344, "logps/rejected": -285.5954284667969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.857060432434082, "rewards/margins": 12.19033432006836, "rewards/rejected": -20.047393798828125, "step": 3805 }, { "epoch": 6.11, "learning_rate": 1.7845818470075306e-07, "logits/chosen": -1.482393503189087, "logits/rejected": -1.4890785217285156, "logps/chosen": -171.34042358398438, "logps/rejected": -306.6715393066406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.051907539367676, "rewards/margins": 13.611867904663086, "rewards/rejected": -21.663776397705078, "step": 3806 }, { "epoch": 6.11, "learning_rate": 1.78359096313912e-07, "logits/chosen": -1.423534870147705, "logits/rejected": -1.3842170238494873, "logps/chosen": -167.61651611328125, "logps/rejected": -268.9778137207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.304862022399902, "rewards/margins": 11.43362045288086, "rewards/rejected": -18.738483428955078, "step": 3807 }, { "epoch": 6.11, "learning_rate": 1.7826000792707093e-07, "logits/chosen": -1.6707885265350342, "logits/rejected": -1.70903742313385, "logps/chosen": -123.28440856933594, "logps/rejected": -261.91973876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9392216205596924, "rewards/margins": 14.484914779663086, "rewards/rejected": -18.424137115478516, "step": 3808 }, { "epoch": 6.11, "learning_rate": 1.7816091954022988e-07, "logits/chosen": -1.35280442237854, "logits/rejected": -1.3350253105163574, "logps/chosen": -186.35333251953125, "logps/rejected": -282.67352294921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.204326152801514, "rewards/margins": 11.649652481079102, "rewards/rejected": -18.853979110717773, "step": 3809 }, { "epoch": 6.12, "learning_rate": 1.7806183115338882e-07, "logits/chosen": -1.4358875751495361, "logits/rejected": -1.3967214822769165, "logps/chosen": -162.30902099609375, "logps/rejected": -274.95941162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.812281131744385, "rewards/margins": 11.479654312133789, "rewards/rejected": -19.291934967041016, "step": 3810 }, { "epoch": 6.12, "learning_rate": 1.7796274276654775e-07, "logits/chosen": -1.3384654521942139, "logits/rejected": -1.3976777791976929, "logps/chosen": -173.08551025390625, "logps/rejected": -281.0780029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.944849491119385, "rewards/margins": 9.928544044494629, "rewards/rejected": -17.87339210510254, "step": 3811 }, { "epoch": 6.12, "learning_rate": 1.7786365437970668e-07, "logits/chosen": -1.444828987121582, "logits/rejected": -1.5061047077178955, "logps/chosen": -137.6788787841797, "logps/rejected": -297.1807861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.707231521606445, "rewards/margins": 14.911031723022461, "rewards/rejected": -21.61826515197754, "step": 3812 }, { "epoch": 6.12, "learning_rate": 1.7776456599286562e-07, "logits/chosen": -1.6114637851715088, "logits/rejected": -1.6083928346633911, "logps/chosen": -150.7445068359375, "logps/rejected": -300.4595947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.256232261657715, "rewards/margins": 15.430974006652832, "rewards/rejected": -23.687206268310547, "step": 3813 }, { "epoch": 6.12, "learning_rate": 1.7766547760602458e-07, "logits/chosen": -1.4876816272735596, "logits/rejected": -1.3876979351043701, "logps/chosen": -152.75875854492188, "logps/rejected": -270.2669677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.008551120758057, "rewards/margins": 12.534966468811035, "rewards/rejected": -18.54351806640625, "step": 3814 }, { "epoch": 6.12, "learning_rate": 1.775663892191835e-07, "logits/chosen": -1.4808738231658936, "logits/rejected": -1.575512170791626, "logps/chosen": -154.97482299804688, "logps/rejected": -297.31329345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.233509063720703, "rewards/margins": 12.483366012573242, "rewards/rejected": -19.716875076293945, "step": 3815 }, { "epoch": 6.13, "learning_rate": 1.7746730083234244e-07, "logits/chosen": -1.4333171844482422, "logits/rejected": -1.3958699703216553, "logps/chosen": -170.19345092773438, "logps/rejected": -254.851806640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.281806945800781, "rewards/margins": 10.081523895263672, "rewards/rejected": -18.363330841064453, "step": 3816 }, { "epoch": 6.13, "learning_rate": 1.7736821244550138e-07, "logits/chosen": -1.5037004947662354, "logits/rejected": -1.4921587705612183, "logps/chosen": -172.55136108398438, "logps/rejected": -278.90374755859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.565716743469238, "rewards/margins": 11.258262634277344, "rewards/rejected": -19.823978424072266, "step": 3817 }, { "epoch": 6.13, "learning_rate": 1.772691240586603e-07, "logits/chosen": -1.6077229976654053, "logits/rejected": -1.565847635269165, "logps/chosen": -123.74120330810547, "logps/rejected": -240.32485961914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.144407272338867, "rewards/margins": 12.720708847045898, "rewards/rejected": -17.865116119384766, "step": 3818 }, { "epoch": 6.13, "learning_rate": 1.7717003567181927e-07, "logits/chosen": -1.4853519201278687, "logits/rejected": -1.4600698947906494, "logps/chosen": -131.24649047851562, "logps/rejected": -269.6234130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.49221658706665, "rewards/margins": 12.163769721984863, "rewards/rejected": -17.655986785888672, "step": 3819 }, { "epoch": 6.13, "learning_rate": 1.770709472849782e-07, "logits/chosen": -1.5571489334106445, "logits/rejected": -1.5601773262023926, "logps/chosen": -147.54766845703125, "logps/rejected": -326.118408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.4067535400390625, "rewards/margins": 17.37704086303711, "rewards/rejected": -24.783794403076172, "step": 3820 }, { "epoch": 6.13, "learning_rate": 1.769718588981371e-07, "logits/chosen": -1.619027853012085, "logits/rejected": -1.6156718730926514, "logps/chosen": -124.7366943359375, "logps/rejected": -259.7955627441406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9635443687438965, "rewards/margins": 14.071791648864746, "rewards/rejected": -18.035335540771484, "step": 3821 }, { "epoch": 6.13, "learning_rate": 1.7687277051129607e-07, "logits/chosen": -1.4566301107406616, "logits/rejected": -1.4034392833709717, "logps/chosen": -154.65065002441406, "logps/rejected": -242.01780700683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.770689964294434, "rewards/margins": 9.75728988647461, "rewards/rejected": -16.52798080444336, "step": 3822 }, { "epoch": 6.14, "learning_rate": 1.76773682124455e-07, "logits/chosen": -1.4636826515197754, "logits/rejected": -1.5505468845367432, "logps/chosen": -121.57402038574219, "logps/rejected": -320.09912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0760722160339355, "rewards/margins": 16.59515380859375, "rewards/rejected": -22.671226501464844, "step": 3823 }, { "epoch": 6.14, "learning_rate": 1.7667459373761396e-07, "logits/chosen": -1.6819249391555786, "logits/rejected": -1.6390478610992432, "logps/chosen": -153.12631225585938, "logps/rejected": -253.43814086914062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.84613037109375, "rewards/margins": 11.02760124206543, "rewards/rejected": -17.87373161315918, "step": 3824 }, { "epoch": 6.14, "learning_rate": 1.7657550535077287e-07, "logits/chosen": -1.5726685523986816, "logits/rejected": -1.5228798389434814, "logps/chosen": -170.64163208007812, "logps/rejected": -267.82781982421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.40782356262207, "rewards/margins": 12.538034439086914, "rewards/rejected": -18.945858001708984, "step": 3825 }, { "epoch": 6.14, "learning_rate": 1.764764169639318e-07, "logits/chosen": -1.5259493589401245, "logits/rejected": -1.5667859315872192, "logps/chosen": -169.5206756591797, "logps/rejected": -290.6390075683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.270910263061523, "rewards/margins": 11.483455657958984, "rewards/rejected": -20.75436782836914, "step": 3826 }, { "epoch": 6.14, "learning_rate": 1.7637732857709076e-07, "logits/chosen": -1.2963566780090332, "logits/rejected": -1.3839125633239746, "logps/chosen": -178.90089416503906, "logps/rejected": -299.0060729980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.238103866577148, "rewards/margins": 11.606868743896484, "rewards/rejected": -21.844970703125, "step": 3827 }, { "epoch": 6.14, "learning_rate": 1.762782401902497e-07, "logits/chosen": -1.5130980014801025, "logits/rejected": -1.4904916286468506, "logps/chosen": -120.67831420898438, "logps/rejected": -322.59869384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3660995960235596, "rewards/margins": 18.8270320892334, "rewards/rejected": -22.193130493164062, "step": 3828 }, { "epoch": 6.15, "learning_rate": 1.7617915180340863e-07, "logits/chosen": -1.5276298522949219, "logits/rejected": -1.4329999685287476, "logps/chosen": -106.82864379882812, "logps/rejected": -215.99310302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.975137948989868, "rewards/margins": 12.048977851867676, "rewards/rejected": -15.024116516113281, "step": 3829 }, { "epoch": 6.15, "learning_rate": 1.7608006341656756e-07, "logits/chosen": -1.5999598503112793, "logits/rejected": -1.5721654891967773, "logps/chosen": -191.3624267578125, "logps/rejected": -279.52374267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.868209838867188, "rewards/margins": 10.693227767944336, "rewards/rejected": -19.561439514160156, "step": 3830 }, { "epoch": 6.15, "learning_rate": 1.759809750297265e-07, "logits/chosen": -1.4275267124176025, "logits/rejected": -1.4615769386291504, "logps/chosen": -165.47213745117188, "logps/rejected": -292.7216796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.581279754638672, "rewards/margins": 12.478967666625977, "rewards/rejected": -21.06024742126465, "step": 3831 }, { "epoch": 6.15, "learning_rate": 1.7588188664288546e-07, "logits/chosen": -1.4849669933319092, "logits/rejected": -1.504443645477295, "logps/chosen": -128.66592407226562, "logps/rejected": -311.8310852050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.076352596282959, "rewards/margins": 16.04985809326172, "rewards/rejected": -22.126209259033203, "step": 3832 }, { "epoch": 6.15, "learning_rate": 1.757827982560444e-07, "logits/chosen": -1.3865934610366821, "logits/rejected": -1.4249476194381714, "logps/chosen": -121.48272705078125, "logps/rejected": -279.328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.088095188140869, "rewards/margins": 15.226882934570312, "rewards/rejected": -21.314977645874023, "step": 3833 }, { "epoch": 6.15, "learning_rate": 1.7568370986920332e-07, "logits/chosen": -1.3261303901672363, "logits/rejected": -1.4245729446411133, "logps/chosen": -176.65623474121094, "logps/rejected": -327.67803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.250896453857422, "rewards/margins": 13.696477890014648, "rewards/rejected": -22.947372436523438, "step": 3834 }, { "epoch": 6.16, "learning_rate": 1.7558462148236226e-07, "logits/chosen": -1.3696670532226562, "logits/rejected": -1.3856624364852905, "logps/chosen": -152.82598876953125, "logps/rejected": -300.609130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.825052738189697, "rewards/margins": 13.269974708557129, "rewards/rejected": -21.095027923583984, "step": 3835 }, { "epoch": 6.16, "learning_rate": 1.754855330955212e-07, "logits/chosen": -1.3844093084335327, "logits/rejected": -1.4814257621765137, "logps/chosen": -166.3373565673828, "logps/rejected": -326.5030517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.192693710327148, "rewards/margins": 12.815999031066895, "rewards/rejected": -22.00869369506836, "step": 3836 }, { "epoch": 6.16, "learning_rate": 1.7538644470868015e-07, "logits/chosen": -1.4280104637145996, "logits/rejected": -1.4718190431594849, "logps/chosen": -113.91871643066406, "logps/rejected": -299.0685729980469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.845567226409912, "rewards/margins": 16.911596298217773, "rewards/rejected": -21.757164001464844, "step": 3837 }, { "epoch": 6.16, "learning_rate": 1.7528735632183908e-07, "logits/chosen": -1.4824315309524536, "logits/rejected": -1.5338945388793945, "logps/chosen": -162.718994140625, "logps/rejected": -291.10479736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.8893866539001465, "rewards/margins": 13.124232292175293, "rewards/rejected": -21.01361846923828, "step": 3838 }, { "epoch": 6.16, "learning_rate": 1.7518826793499802e-07, "logits/chosen": -1.4497954845428467, "logits/rejected": -1.522520661354065, "logps/chosen": -120.57606506347656, "logps/rejected": -287.828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.103967189788818, "rewards/margins": 15.613201141357422, "rewards/rejected": -21.7171688079834, "step": 3839 }, { "epoch": 6.16, "learning_rate": 1.7508917954815695e-07, "logits/chosen": -1.7610411643981934, "logits/rejected": -1.661388874053955, "logps/chosen": -115.68321228027344, "logps/rejected": -240.237060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.477254629135132, "rewards/margins": 14.764564514160156, "rewards/rejected": -18.241819381713867, "step": 3840 }, { "epoch": 6.17, "learning_rate": 1.7499009116131588e-07, "logits/chosen": -1.6221367120742798, "logits/rejected": -1.5923718214035034, "logps/chosen": -161.3482666015625, "logps/rejected": -290.1473388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.769899368286133, "rewards/margins": 12.521329879760742, "rewards/rejected": -17.291229248046875, "step": 3841 }, { "epoch": 6.17, "learning_rate": 1.7489100277447482e-07, "logits/chosen": -1.569892406463623, "logits/rejected": -1.509198546409607, "logps/chosen": -171.5223388671875, "logps/rejected": -302.9300537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.834332466125488, "rewards/margins": 13.204143524169922, "rewards/rejected": -22.038475036621094, "step": 3842 }, { "epoch": 6.17, "learning_rate": 1.7479191438763378e-07, "logits/chosen": -1.4206053018569946, "logits/rejected": -1.3526794910430908, "logps/chosen": -166.36354064941406, "logps/rejected": -270.6518859863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.2524919509887695, "rewards/margins": 11.531538009643555, "rewards/rejected": -18.78403091430664, "step": 3843 }, { "epoch": 6.17, "learning_rate": 1.7469282600079268e-07, "logits/chosen": -1.7332799434661865, "logits/rejected": -1.626183032989502, "logps/chosen": -157.85369873046875, "logps/rejected": -270.10540771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.139922618865967, "rewards/margins": 12.709182739257812, "rewards/rejected": -19.849105834960938, "step": 3844 }, { "epoch": 6.17, "learning_rate": 1.7459373761395164e-07, "logits/chosen": -1.5969452857971191, "logits/rejected": -1.657511830329895, "logps/chosen": -77.7225570678711, "logps/rejected": -218.53013610839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.833327293395996, "rewards/margins": 11.985757827758789, "rewards/rejected": -14.819086074829102, "step": 3845 }, { "epoch": 6.17, "learning_rate": 1.7449464922711058e-07, "logits/chosen": -1.428108811378479, "logits/rejected": -1.4289007186889648, "logps/chosen": -128.5738525390625, "logps/rejected": -279.18505859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.6427388191223145, "rewards/margins": 14.539169311523438, "rewards/rejected": -21.181907653808594, "step": 3846 }, { "epoch": 6.17, "learning_rate": 1.743955608402695e-07, "logits/chosen": -1.3786137104034424, "logits/rejected": -1.4462168216705322, "logps/chosen": -150.94070434570312, "logps/rejected": -269.56561279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.0785064697265625, "rewards/margins": 11.051803588867188, "rewards/rejected": -16.13031005859375, "step": 3847 }, { "epoch": 6.18, "learning_rate": 1.7429647245342847e-07, "logits/chosen": -1.3843207359313965, "logits/rejected": -1.4085087776184082, "logps/chosen": -158.68716430664062, "logps/rejected": -268.3040771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.329726219177246, "rewards/margins": 11.531207084655762, "rewards/rejected": -17.860933303833008, "step": 3848 }, { "epoch": 6.18, "learning_rate": 1.7419738406658738e-07, "logits/chosen": -1.519884467124939, "logits/rejected": -1.5395328998565674, "logps/chosen": -141.9871826171875, "logps/rejected": -303.6024169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.050459861755371, "rewards/margins": 15.11154556274414, "rewards/rejected": -21.162006378173828, "step": 3849 }, { "epoch": 6.18, "learning_rate": 1.740982956797463e-07, "logits/chosen": -1.6215970516204834, "logits/rejected": -1.629342794418335, "logps/chosen": -132.2390594482422, "logps/rejected": -319.824951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.919148921966553, "rewards/margins": 18.122648239135742, "rewards/rejected": -23.04179573059082, "step": 3850 }, { "epoch": 6.18, "learning_rate": 1.7399920729290527e-07, "logits/chosen": -1.3492869138717651, "logits/rejected": -1.4705023765563965, "logps/chosen": -113.3492431640625, "logps/rejected": -282.7255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.74180269241333, "rewards/margins": 14.137002944946289, "rewards/rejected": -17.878807067871094, "step": 3851 }, { "epoch": 6.18, "learning_rate": 1.739001189060642e-07, "logits/chosen": -1.5944342613220215, "logits/rejected": -1.5429421663284302, "logps/chosen": -168.31809997558594, "logps/rejected": -282.19879150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.379705429077148, "rewards/margins": 13.792737007141113, "rewards/rejected": -21.172443389892578, "step": 3852 }, { "epoch": 6.18, "learning_rate": 1.7380103051922316e-07, "logits/chosen": -1.5550401210784912, "logits/rejected": -1.5876328945159912, "logps/chosen": -154.95199584960938, "logps/rejected": -270.5619201660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.908344268798828, "rewards/margins": 11.830986022949219, "rewards/rejected": -19.739328384399414, "step": 3853 }, { "epoch": 6.19, "learning_rate": 1.7370194213238207e-07, "logits/chosen": -1.4398596286773682, "logits/rejected": -1.4562010765075684, "logps/chosen": -154.61407470703125, "logps/rejected": -276.36370849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.429623603820801, "rewards/margins": 12.411519050598145, "rewards/rejected": -19.841142654418945, "step": 3854 }, { "epoch": 6.19, "learning_rate": 1.73602853745541e-07, "logits/chosen": -1.593056321144104, "logits/rejected": -1.6533660888671875, "logps/chosen": -93.23754119873047, "logps/rejected": -282.664794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.56699800491333, "rewards/margins": 15.884660720825195, "rewards/rejected": -18.451658248901367, "step": 3855 }, { "epoch": 6.19, "learning_rate": 1.7350376535869996e-07, "logits/chosen": -1.5050660371780396, "logits/rejected": -1.4875307083129883, "logps/chosen": -121.5550765991211, "logps/rejected": -259.6245422363281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.520781517028809, "rewards/margins": 12.151273727416992, "rewards/rejected": -16.672054290771484, "step": 3856 }, { "epoch": 6.19, "learning_rate": 1.734046769718589e-07, "logits/chosen": -1.551447868347168, "logits/rejected": -1.5426613092422485, "logps/chosen": -138.09239196777344, "logps/rejected": -255.3394012451172, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.444283485412598, "rewards/margins": 12.174798011779785, "rewards/rejected": -17.619081497192383, "step": 3857 }, { "epoch": 6.19, "learning_rate": 1.733055885850178e-07, "logits/chosen": -1.4740409851074219, "logits/rejected": -1.5714890956878662, "logps/chosen": -132.3528594970703, "logps/rejected": -300.00994873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.720621109008789, "rewards/margins": 13.848098754882812, "rewards/rejected": -19.5687198638916, "step": 3858 }, { "epoch": 6.19, "learning_rate": 1.7320650019817676e-07, "logits/chosen": -1.5807878971099854, "logits/rejected": -1.7224830389022827, "logps/chosen": -126.27462768554688, "logps/rejected": -300.0312805175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.346314430236816, "rewards/margins": 14.826430320739746, "rewards/rejected": -20.172744750976562, "step": 3859 }, { "epoch": 6.2, "learning_rate": 1.731074118113357e-07, "logits/chosen": -1.4225494861602783, "logits/rejected": -1.4204446077346802, "logps/chosen": -153.34326171875, "logps/rejected": -288.9552917480469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.690119743347168, "rewards/margins": 13.606597900390625, "rewards/rejected": -22.296716690063477, "step": 3860 }, { "epoch": 6.2, "learning_rate": 1.7300832342449465e-07, "logits/chosen": -1.3409863710403442, "logits/rejected": -1.3816914558410645, "logps/chosen": -154.36805725097656, "logps/rejected": -302.51080322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.967069625854492, "rewards/margins": 15.24156379699707, "rewards/rejected": -21.208633422851562, "step": 3861 }, { "epoch": 6.2, "learning_rate": 1.729092350376536e-07, "logits/chosen": -1.5778148174285889, "logits/rejected": -1.6799161434173584, "logps/chosen": -134.7712860107422, "logps/rejected": -282.6299133300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.987733840942383, "rewards/margins": 12.59692668914795, "rewards/rejected": -18.584659576416016, "step": 3862 }, { "epoch": 6.2, "learning_rate": 1.728101466508125e-07, "logits/chosen": -1.4984519481658936, "logits/rejected": -1.533626675605774, "logps/chosen": -170.5453643798828, "logps/rejected": -349.5455322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.560235023498535, "rewards/margins": 16.893150329589844, "rewards/rejected": -25.453386306762695, "step": 3863 }, { "epoch": 6.2, "learning_rate": 1.7271105826397145e-07, "logits/chosen": -1.533313512802124, "logits/rejected": -1.5506547689437866, "logps/chosen": -145.4617156982422, "logps/rejected": -272.6354064941406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.333415985107422, "rewards/margins": 13.494709014892578, "rewards/rejected": -19.828123092651367, "step": 3864 }, { "epoch": 6.2, "learning_rate": 1.726119698771304e-07, "logits/chosen": -1.4255099296569824, "logits/rejected": -1.4249063730239868, "logps/chosen": -164.3316192626953, "logps/rejected": -298.12640380859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.051913261413574, "rewards/margins": 11.631453514099121, "rewards/rejected": -19.683364868164062, "step": 3865 }, { "epoch": 6.21, "learning_rate": 1.7251288149028935e-07, "logits/chosen": -1.6056209802627563, "logits/rejected": -1.5921669006347656, "logps/chosen": -166.289306640625, "logps/rejected": -338.6816101074219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.280487537384033, "rewards/margins": 18.513002395629883, "rewards/rejected": -24.79349136352539, "step": 3866 }, { "epoch": 6.21, "learning_rate": 1.7241379310344828e-07, "logits/chosen": -1.4909693002700806, "logits/rejected": -1.5521025657653809, "logps/chosen": -138.48324584960938, "logps/rejected": -320.2838439941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.431232929229736, "rewards/margins": 15.430791854858398, "rewards/rejected": -20.862024307250977, "step": 3867 }, { "epoch": 6.21, "learning_rate": 1.723147047166072e-07, "logits/chosen": -1.3606196641921997, "logits/rejected": -1.3225680589675903, "logps/chosen": -129.82752990722656, "logps/rejected": -262.2251281738281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.352655410766602, "rewards/margins": 12.738580703735352, "rewards/rejected": -18.091236114501953, "step": 3868 }, { "epoch": 6.21, "learning_rate": 1.7221561632976615e-07, "logits/chosen": -1.5962382555007935, "logits/rejected": -1.5871429443359375, "logps/chosen": -185.88157653808594, "logps/rejected": -323.19512939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.974701881408691, "rewards/margins": 13.709521293640137, "rewards/rejected": -23.684221267700195, "step": 3869 }, { "epoch": 6.21, "learning_rate": 1.7211652794292508e-07, "logits/chosen": -1.541239619255066, "logits/rejected": -1.604701280593872, "logps/chosen": -128.6729736328125, "logps/rejected": -254.98373413085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.761159420013428, "rewards/margins": 12.612956047058105, "rewards/rejected": -17.374114990234375, "step": 3870 }, { "epoch": 6.21, "learning_rate": 1.7201743955608401e-07, "logits/chosen": -1.4594465494155884, "logits/rejected": -1.4292292594909668, "logps/chosen": -167.54849243164062, "logps/rejected": -311.68603515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.87504768371582, "rewards/margins": 13.862178802490234, "rewards/rejected": -21.737226486206055, "step": 3871 }, { "epoch": 6.22, "learning_rate": 1.7191835116924297e-07, "logits/chosen": -1.5023512840270996, "logits/rejected": -1.4889732599258423, "logps/chosen": -201.88916015625, "logps/rejected": -302.737060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.768281936645508, "rewards/margins": 11.471170425415039, "rewards/rejected": -20.239452362060547, "step": 3872 }, { "epoch": 6.22, "learning_rate": 1.7181926278240188e-07, "logits/chosen": -1.4305771589279175, "logits/rejected": -1.4483516216278076, "logps/chosen": -173.68829345703125, "logps/rejected": -284.6599426269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.438737869262695, "rewards/margins": 10.638248443603516, "rewards/rejected": -20.07698631286621, "step": 3873 }, { "epoch": 6.22, "learning_rate": 1.7172017439556084e-07, "logits/chosen": -1.4120426177978516, "logits/rejected": -1.456087350845337, "logps/chosen": -190.09632873535156, "logps/rejected": -312.9935302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.297791481018066, "rewards/margins": 13.136938095092773, "rewards/rejected": -21.434730529785156, "step": 3874 }, { "epoch": 6.22, "learning_rate": 1.7162108600871977e-07, "logits/chosen": -1.5439200401306152, "logits/rejected": -1.552311897277832, "logps/chosen": -135.64334106445312, "logps/rejected": -289.7987365722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.463281631469727, "rewards/margins": 13.073899269104004, "rewards/rejected": -18.537181854248047, "step": 3875 }, { "epoch": 6.22, "learning_rate": 1.715219976218787e-07, "logits/chosen": -1.4085543155670166, "logits/rejected": -1.4225096702575684, "logps/chosen": -185.99681091308594, "logps/rejected": -295.7558288574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.702444076538086, "rewards/margins": 12.530111312866211, "rewards/rejected": -21.232555389404297, "step": 3876 }, { "epoch": 6.22, "learning_rate": 1.7142290923503764e-07, "logits/chosen": -1.4050679206848145, "logits/rejected": -1.3458149433135986, "logps/chosen": -142.16769409179688, "logps/rejected": -274.2831115722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.059338092803955, "rewards/margins": 13.936020851135254, "rewards/rejected": -19.995359420776367, "step": 3877 }, { "epoch": 6.22, "learning_rate": 1.7132382084819657e-07, "logits/chosen": -1.5970219373703003, "logits/rejected": -1.5252021551132202, "logps/chosen": -166.8612060546875, "logps/rejected": -263.92755126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.00337028503418, "rewards/margins": 9.398751258850098, "rewards/rejected": -17.402122497558594, "step": 3878 }, { "epoch": 6.23, "learning_rate": 1.712247324613555e-07, "logits/chosen": -1.6325063705444336, "logits/rejected": -1.5764509439468384, "logps/chosen": -189.055908203125, "logps/rejected": -307.99609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.103551864624023, "rewards/margins": 12.151839256286621, "rewards/rejected": -21.25539207458496, "step": 3879 }, { "epoch": 6.23, "learning_rate": 1.7112564407451447e-07, "logits/chosen": -1.4125529527664185, "logits/rejected": -1.434945821762085, "logps/chosen": -159.01956176757812, "logps/rejected": -320.3140869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.397619724273682, "rewards/margins": 15.68539047241211, "rewards/rejected": -23.083011627197266, "step": 3880 }, { "epoch": 6.23, "learning_rate": 1.710265556876734e-07, "logits/chosen": -1.524092674255371, "logits/rejected": -1.5119178295135498, "logps/chosen": -145.8486785888672, "logps/rejected": -270.34722900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.972131729125977, "rewards/margins": 13.597867012023926, "rewards/rejected": -19.56999969482422, "step": 3881 }, { "epoch": 6.23, "learning_rate": 1.7092746730083233e-07, "logits/chosen": -1.3804686069488525, "logits/rejected": -1.5149834156036377, "logps/chosen": -129.577880859375, "logps/rejected": -311.12353515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.484848976135254, "rewards/margins": 16.10519790649414, "rewards/rejected": -21.59004783630371, "step": 3882 }, { "epoch": 6.23, "learning_rate": 1.7082837891399127e-07, "logits/chosen": -1.5789371728897095, "logits/rejected": -1.486254334449768, "logps/chosen": -185.633056640625, "logps/rejected": -255.0836181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.563197135925293, "rewards/margins": 10.21501350402832, "rewards/rejected": -17.77821159362793, "step": 3883 }, { "epoch": 6.23, "learning_rate": 1.707292905271502e-07, "logits/chosen": -1.425544023513794, "logits/rejected": -1.48406183719635, "logps/chosen": -189.84178161621094, "logps/rejected": -351.216552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.790539741516113, "rewards/margins": 14.423883438110352, "rewards/rejected": -25.21442222595215, "step": 3884 }, { "epoch": 6.24, "learning_rate": 1.7063020214030916e-07, "logits/chosen": -1.458839774131775, "logits/rejected": -1.460540533065796, "logps/chosen": -163.9482421875, "logps/rejected": -285.7064208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.216795921325684, "rewards/margins": 12.228906631469727, "rewards/rejected": -19.445703506469727, "step": 3885 }, { "epoch": 6.24, "learning_rate": 1.705311137534681e-07, "logits/chosen": -1.6150254011154175, "logits/rejected": -1.6892033815383911, "logps/chosen": -123.8370361328125, "logps/rejected": -274.9578857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.753222465515137, "rewards/margins": 12.837717056274414, "rewards/rejected": -18.590938568115234, "step": 3886 }, { "epoch": 6.24, "learning_rate": 1.7043202536662703e-07, "logits/chosen": -1.391291856765747, "logits/rejected": -1.3539625406265259, "logps/chosen": -127.42044830322266, "logps/rejected": -272.901123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.314912796020508, "rewards/margins": 12.438775062561035, "rewards/rejected": -17.75368881225586, "step": 3887 }, { "epoch": 6.24, "learning_rate": 1.7033293697978596e-07, "logits/chosen": -1.5950490236282349, "logits/rejected": -1.594017505645752, "logps/chosen": -170.873291015625, "logps/rejected": -333.9078063964844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.738178253173828, "rewards/margins": 16.04368019104004, "rewards/rejected": -23.781858444213867, "step": 3888 }, { "epoch": 6.24, "learning_rate": 1.702338485929449e-07, "logits/chosen": -1.5385055541992188, "logits/rejected": -1.5058715343475342, "logps/chosen": -127.00202178955078, "logps/rejected": -270.83782958984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.5927042961120605, "rewards/margins": 14.673528671264648, "rewards/rejected": -21.266233444213867, "step": 3889 }, { "epoch": 6.24, "learning_rate": 1.7013476020610385e-07, "logits/chosen": -1.736159324645996, "logits/rejected": -1.6818408966064453, "logps/chosen": -115.66167449951172, "logps/rejected": -251.61285400390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6863386631011963, "rewards/margins": 15.092650413513184, "rewards/rejected": -17.778989791870117, "step": 3890 }, { "epoch": 6.25, "learning_rate": 1.7003567181926279e-07, "logits/chosen": -1.5683491230010986, "logits/rejected": -1.4765475988388062, "logps/chosen": -140.60000610351562, "logps/rejected": -256.1197814941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.914010047912598, "rewards/margins": 13.672219276428223, "rewards/rejected": -19.58622932434082, "step": 3891 }, { "epoch": 6.25, "learning_rate": 1.699365834324217e-07, "logits/chosen": -1.3702197074890137, "logits/rejected": -1.3703851699829102, "logps/chosen": -139.6952362060547, "logps/rejected": -263.6590881347656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.659933090209961, "rewards/margins": 13.055978775024414, "rewards/rejected": -17.715911865234375, "step": 3892 }, { "epoch": 6.25, "learning_rate": 1.6983749504558065e-07, "logits/chosen": -1.6215869188308716, "logits/rejected": -1.663212776184082, "logps/chosen": -133.451171875, "logps/rejected": -266.4822998046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.3023176193237305, "rewards/margins": 11.696677207946777, "rewards/rejected": -17.998992919921875, "step": 3893 }, { "epoch": 6.25, "learning_rate": 1.6973840665873959e-07, "logits/chosen": -1.4693995714187622, "logits/rejected": -1.4857287406921387, "logps/chosen": -118.60004425048828, "logps/rejected": -237.61465454101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.585785388946533, "rewards/margins": 11.622251510620117, "rewards/rejected": -16.208036422729492, "step": 3894 }, { "epoch": 6.25, "learning_rate": 1.6963931827189855e-07, "logits/chosen": -1.48971426486969, "logits/rejected": -1.6138503551483154, "logps/chosen": -170.00245666503906, "logps/rejected": -338.73504638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.606276988983154, "rewards/margins": 13.955498695373535, "rewards/rejected": -21.56177520751953, "step": 3895 }, { "epoch": 6.25, "learning_rate": 1.6954022988505745e-07, "logits/chosen": -1.476684808731079, "logits/rejected": -1.6425307989120483, "logps/chosen": -91.8194808959961, "logps/rejected": -278.4526062011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8845624923706055, "rewards/margins": 14.39467716217041, "rewards/rejected": -18.279239654541016, "step": 3896 }, { "epoch": 6.26, "learning_rate": 1.6944114149821639e-07, "logits/chosen": -1.4746347665786743, "logits/rejected": -1.5137865543365479, "logps/chosen": -89.16888427734375, "logps/rejected": -217.9373016357422, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4629018306732178, "rewards/margins": 11.086264610290527, "rewards/rejected": -14.549165725708008, "step": 3897 }, { "epoch": 6.26, "learning_rate": 1.6934205311137535e-07, "logits/chosen": -1.2870584726333618, "logits/rejected": -1.3494127988815308, "logps/chosen": -130.896484375, "logps/rejected": -246.9204559326172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.605616569519043, "rewards/margins": 11.094234466552734, "rewards/rejected": -16.699851989746094, "step": 3898 }, { "epoch": 6.26, "learning_rate": 1.6924296472453428e-07, "logits/chosen": -1.4696335792541504, "logits/rejected": -1.5489413738250732, "logps/chosen": -209.91419982910156, "logps/rejected": -321.0644836425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.179826736450195, "rewards/margins": 11.845887184143066, "rewards/rejected": -22.025712966918945, "step": 3899 }, { "epoch": 6.26, "learning_rate": 1.6914387633769324e-07, "logits/chosen": -1.456642746925354, "logits/rejected": -1.4238017797470093, "logps/chosen": -225.08555603027344, "logps/rejected": -308.24456787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -12.017528533935547, "rewards/margins": 10.307282447814941, "rewards/rejected": -22.324811935424805, "step": 3900 }, { "epoch": 6.26, "learning_rate": 1.6904478795085215e-07, "logits/chosen": -1.25442373752594, "logits/rejected": -1.2574689388275146, "logps/chosen": -132.80120849609375, "logps/rejected": -235.2710723876953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.650886535644531, "rewards/margins": 10.778369903564453, "rewards/rejected": -15.429256439208984, "step": 3901 }, { "epoch": 6.26, "learning_rate": 1.6894569956401108e-07, "logits/chosen": -1.5512027740478516, "logits/rejected": -1.565185785293579, "logps/chosen": -134.906982421875, "logps/rejected": -309.6687316894531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.782958030700684, "rewards/margins": 17.268020629882812, "rewards/rejected": -22.050979614257812, "step": 3902 }, { "epoch": 6.26, "learning_rate": 1.6884661117717004e-07, "logits/chosen": -1.3824589252471924, "logits/rejected": -1.445134162902832, "logps/chosen": -129.37356567382812, "logps/rejected": -245.88743591308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.811347007751465, "rewards/margins": 10.565105438232422, "rewards/rejected": -17.376453399658203, "step": 3903 }, { "epoch": 6.27, "learning_rate": 1.6874752279032897e-07, "logits/chosen": -1.5099104642868042, "logits/rejected": -1.5295369625091553, "logps/chosen": -184.1702423095703, "logps/rejected": -344.0736083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.89108657836914, "rewards/margins": 15.398740768432617, "rewards/rejected": -24.289825439453125, "step": 3904 }, { "epoch": 6.27, "learning_rate": 1.686484344034879e-07, "logits/chosen": -1.4581186771392822, "logits/rejected": -1.42510187625885, "logps/chosen": -186.78285217285156, "logps/rejected": -342.8776550292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.103338241577148, "rewards/margins": 15.993819236755371, "rewards/rejected": -25.097156524658203, "step": 3905 }, { "epoch": 6.27, "learning_rate": 1.6854934601664684e-07, "logits/chosen": -1.7315924167633057, "logits/rejected": -1.5733197927474976, "logps/chosen": -116.51252746582031, "logps/rejected": -276.1841735839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.995081663131714, "rewards/margins": 18.008350372314453, "rewards/rejected": -21.003433227539062, "step": 3906 }, { "epoch": 6.27, "learning_rate": 1.6845025762980577e-07, "logits/chosen": -1.6040890216827393, "logits/rejected": -1.6402450799942017, "logps/chosen": -124.61642456054688, "logps/rejected": -312.6434326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.101325511932373, "rewards/margins": 16.236629486083984, "rewards/rejected": -22.337955474853516, "step": 3907 }, { "epoch": 6.27, "learning_rate": 1.6835116924296473e-07, "logits/chosen": -1.4962958097457886, "logits/rejected": -1.4931763410568237, "logps/chosen": -158.1457061767578, "logps/rejected": -273.2548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.5857343673706055, "rewards/margins": 11.426156044006348, "rewards/rejected": -18.011890411376953, "step": 3908 }, { "epoch": 6.27, "learning_rate": 1.6825208085612366e-07, "logits/chosen": -1.490328073501587, "logits/rejected": -1.5195735692977905, "logps/chosen": -167.98883056640625, "logps/rejected": -320.948974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.061930656433105, "rewards/margins": 14.700002670288086, "rewards/rejected": -22.761932373046875, "step": 3909 }, { "epoch": 6.28, "learning_rate": 1.681529924692826e-07, "logits/chosen": -1.3981870412826538, "logits/rejected": -1.430159568786621, "logps/chosen": -161.3581085205078, "logps/rejected": -314.7900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.326447486877441, "rewards/margins": 13.712295532226562, "rewards/rejected": -22.038742065429688, "step": 3910 }, { "epoch": 6.28, "learning_rate": 1.6805390408244153e-07, "logits/chosen": -1.4249391555786133, "logits/rejected": -1.4937368631362915, "logps/chosen": -159.84837341308594, "logps/rejected": -299.9945373535156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.913923740386963, "rewards/margins": 12.096325874328613, "rewards/rejected": -20.010250091552734, "step": 3911 }, { "epoch": 6.28, "learning_rate": 1.6795481569560046e-07, "logits/chosen": -1.6382650136947632, "logits/rejected": -1.5445024967193604, "logps/chosen": -143.21714782714844, "logps/rejected": -254.938232421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.424627304077148, "rewards/margins": 13.045330047607422, "rewards/rejected": -19.46995735168457, "step": 3912 }, { "epoch": 6.28, "learning_rate": 1.678557273087594e-07, "logits/chosen": -1.5665897130966187, "logits/rejected": -1.5277615785598755, "logps/chosen": -159.82220458984375, "logps/rejected": -305.2598876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5474982261657715, "rewards/margins": 14.1263427734375, "rewards/rejected": -20.67384147644043, "step": 3913 }, { "epoch": 6.28, "learning_rate": 1.6775663892191836e-07, "logits/chosen": -1.4943060874938965, "logits/rejected": -1.5069241523742676, "logps/chosen": -152.5938262939453, "logps/rejected": -310.50787353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.281498908996582, "rewards/margins": 15.097932815551758, "rewards/rejected": -23.379432678222656, "step": 3914 }, { "epoch": 6.28, "learning_rate": 1.6765755053507726e-07, "logits/chosen": -1.4406414031982422, "logits/rejected": -1.4236328601837158, "logps/chosen": -184.52505493164062, "logps/rejected": -298.0910339355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.425711631774902, "rewards/margins": 12.35720443725586, "rewards/rejected": -22.782917022705078, "step": 3915 }, { "epoch": 6.29, "learning_rate": 1.6755846214823622e-07, "logits/chosen": -1.3292018175125122, "logits/rejected": -1.3749730587005615, "logps/chosen": -123.0317611694336, "logps/rejected": -237.62698364257812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.623045921325684, "rewards/margins": 11.66877555847168, "rewards/rejected": -17.291820526123047, "step": 3916 }, { "epoch": 6.29, "learning_rate": 1.6745937376139516e-07, "logits/chosen": -1.6437163352966309, "logits/rejected": -1.5841041803359985, "logps/chosen": -143.4715118408203, "logps/rejected": -281.08355712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.558477401733398, "rewards/margins": 14.997993469238281, "rewards/rejected": -19.55647087097168, "step": 3917 }, { "epoch": 6.29, "learning_rate": 1.673602853745541e-07, "logits/chosen": -1.5948247909545898, "logits/rejected": -1.5530383586883545, "logps/chosen": -180.7176055908203, "logps/rejected": -302.14093017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.7121453285217285, "rewards/margins": 12.731134414672852, "rewards/rejected": -20.443279266357422, "step": 3918 }, { "epoch": 6.29, "learning_rate": 1.6726119698771305e-07, "logits/chosen": -1.4951684474945068, "logits/rejected": -1.5472091436386108, "logps/chosen": -175.42410278320312, "logps/rejected": -320.24981689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.012097358703613, "rewards/margins": 14.288057327270508, "rewards/rejected": -23.300155639648438, "step": 3919 }, { "epoch": 6.29, "learning_rate": 1.6716210860087196e-07, "logits/chosen": -1.5128087997436523, "logits/rejected": -1.5726499557495117, "logps/chosen": -167.81069946289062, "logps/rejected": -287.0572204589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.458122253417969, "rewards/margins": 12.17203426361084, "rewards/rejected": -19.630157470703125, "step": 3920 }, { "epoch": 6.29, "learning_rate": 1.670630202140309e-07, "logits/chosen": -1.329018473625183, "logits/rejected": -1.4045355319976807, "logps/chosen": -153.38424682617188, "logps/rejected": -323.7459716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.477097511291504, "rewards/margins": 16.50042724609375, "rewards/rejected": -24.97752571105957, "step": 3921 }, { "epoch": 6.3, "learning_rate": 1.6696393182718985e-07, "logits/chosen": -1.5834708213806152, "logits/rejected": -1.5859671831130981, "logps/chosen": -145.66639709472656, "logps/rejected": -271.28167724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.670893669128418, "rewards/margins": 12.54137134552002, "rewards/rejected": -19.212265014648438, "step": 3922 }, { "epoch": 6.3, "learning_rate": 1.6686484344034878e-07, "logits/chosen": -1.6217231750488281, "logits/rejected": -1.6505992412567139, "logps/chosen": -110.48384094238281, "logps/rejected": -298.868408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.155902624130249, "rewards/margins": 18.67673110961914, "rewards/rejected": -21.832632064819336, "step": 3923 }, { "epoch": 6.3, "learning_rate": 1.6676575505350774e-07, "logits/chosen": -1.5178585052490234, "logits/rejected": -1.5125021934509277, "logps/chosen": -121.4353256225586, "logps/rejected": -232.28118896484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.116846561431885, "rewards/margins": 11.216753005981445, "rewards/rejected": -16.333600997924805, "step": 3924 }, { "epoch": 6.3, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -1.5145236253738403, "logits/rejected": -1.5426770448684692, "logps/chosen": -135.06344604492188, "logps/rejected": -297.96112060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.359985828399658, "rewards/margins": 16.958215713500977, "rewards/rejected": -22.31820297241211, "step": 3925 }, { "epoch": 6.3, "learning_rate": 1.6656757827982558e-07, "logits/chosen": -1.436886191368103, "logits/rejected": -1.4327298402786255, "logps/chosen": -178.8482208251953, "logps/rejected": -289.58514404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.92324447631836, "rewards/margins": 11.526580810546875, "rewards/rejected": -20.449825286865234, "step": 3926 }, { "epoch": 6.3, "learning_rate": 1.6646848989298454e-07, "logits/chosen": -1.497796654701233, "logits/rejected": -1.4908064603805542, "logps/chosen": -131.18984985351562, "logps/rejected": -303.9706115722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9991021156311035, "rewards/margins": 18.290143966674805, "rewards/rejected": -22.289247512817383, "step": 3927 }, { "epoch": 6.3, "learning_rate": 1.6636940150614348e-07, "logits/chosen": -1.45273756980896, "logits/rejected": -1.4951268434524536, "logps/chosen": -150.1935577392578, "logps/rejected": -305.368408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.11789608001709, "rewards/margins": 14.827159881591797, "rewards/rejected": -20.945056915283203, "step": 3928 }, { "epoch": 6.31, "learning_rate": 1.6627031311930238e-07, "logits/chosen": -1.380833387374878, "logits/rejected": -1.5213100910186768, "logps/chosen": -174.23814392089844, "logps/rejected": -327.9368896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.808857917785645, "rewards/margins": 13.467377662658691, "rewards/rejected": -22.276235580444336, "step": 3929 }, { "epoch": 6.31, "learning_rate": 1.6617122473246134e-07, "logits/chosen": -1.6637816429138184, "logits/rejected": -1.7057750225067139, "logps/chosen": -122.02420043945312, "logps/rejected": -287.966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.301971912384033, "rewards/margins": 15.0834321975708, "rewards/rejected": -19.385404586791992, "step": 3930 }, { "epoch": 6.31, "learning_rate": 1.6607213634562028e-07, "logits/chosen": -1.4318759441375732, "logits/rejected": -1.402531385421753, "logps/chosen": -158.71206665039062, "logps/rejected": -289.5340270996094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.177924156188965, "rewards/margins": 13.949573516845703, "rewards/rejected": -21.127498626708984, "step": 3931 }, { "epoch": 6.31, "learning_rate": 1.6597304795877924e-07, "logits/chosen": -1.4782382249832153, "logits/rejected": -1.518707275390625, "logps/chosen": -105.34712219238281, "logps/rejected": -249.3628692626953, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.1274826526641846, "rewards/margins": 14.668456077575684, "rewards/rejected": -17.79593849182129, "step": 3932 }, { "epoch": 6.31, "learning_rate": 1.6587395957193817e-07, "logits/chosen": -1.48872971534729, "logits/rejected": -1.4784202575683594, "logps/chosen": -179.91456604003906, "logps/rejected": -286.9005432128906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.385456085205078, "rewards/margins": 12.12592601776123, "rewards/rejected": -20.511383056640625, "step": 3933 }, { "epoch": 6.31, "learning_rate": 1.6577487118509708e-07, "logits/chosen": -1.4260241985321045, "logits/rejected": -1.441740870475769, "logps/chosen": -159.15692138671875, "logps/rejected": -304.0753173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.682707786560059, "rewards/margins": 13.278423309326172, "rewards/rejected": -21.961132049560547, "step": 3934 }, { "epoch": 6.32, "learning_rate": 1.6567578279825604e-07, "logits/chosen": -1.565319538116455, "logits/rejected": -1.4701380729675293, "logps/chosen": -147.08578491210938, "logps/rejected": -257.8539733886719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.302391529083252, "rewards/margins": 11.434797286987305, "rewards/rejected": -17.7371883392334, "step": 3935 }, { "epoch": 6.32, "learning_rate": 1.6557669441141497e-07, "logits/chosen": -1.5070892572402954, "logits/rejected": -1.46585214138031, "logps/chosen": -124.07930755615234, "logps/rejected": -259.25347900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.764210224151611, "rewards/margins": 11.769156455993652, "rewards/rejected": -17.533367156982422, "step": 3936 }, { "epoch": 6.32, "learning_rate": 1.6547760602457393e-07, "logits/chosen": -1.5915930271148682, "logits/rejected": -1.6002469062805176, "logps/chosen": -114.32086944580078, "logps/rejected": -242.3919219970703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.834458351135254, "rewards/margins": 12.412710189819336, "rewards/rejected": -17.247169494628906, "step": 3937 }, { "epoch": 6.32, "learning_rate": 1.6537851763773286e-07, "logits/chosen": -1.4663176536560059, "logits/rejected": -1.6317639350891113, "logps/chosen": -112.09112548828125, "logps/rejected": -309.00482177734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.963474750518799, "rewards/margins": 16.44464111328125, "rewards/rejected": -21.408117294311523, "step": 3938 }, { "epoch": 6.32, "learning_rate": 1.6527942925089177e-07, "logits/chosen": -1.5561463832855225, "logits/rejected": -1.4748085737228394, "logps/chosen": -141.75355529785156, "logps/rejected": -241.1177978515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.80802059173584, "rewards/margins": 12.3397216796875, "rewards/rejected": -18.147743225097656, "step": 3939 }, { "epoch": 6.32, "learning_rate": 1.6518034086405073e-07, "logits/chosen": -1.6014814376831055, "logits/rejected": -1.5534241199493408, "logps/chosen": -176.32872009277344, "logps/rejected": -268.36962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.164235591888428, "rewards/margins": 10.997298240661621, "rewards/rejected": -18.16153335571289, "step": 3940 }, { "epoch": 6.33, "learning_rate": 1.6508125247720966e-07, "logits/chosen": -1.4826384782791138, "logits/rejected": -1.4709956645965576, "logps/chosen": -157.76043701171875, "logps/rejected": -242.96405029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.429962635040283, "rewards/margins": 10.02396297454834, "rewards/rejected": -16.45392608642578, "step": 3941 }, { "epoch": 6.33, "learning_rate": 1.649821640903686e-07, "logits/chosen": -1.4272725582122803, "logits/rejected": -1.4178071022033691, "logps/chosen": -145.05706787109375, "logps/rejected": -226.4466552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9261980056762695, "rewards/margins": 9.814224243164062, "rewards/rejected": -15.740421295166016, "step": 3942 }, { "epoch": 6.33, "learning_rate": 1.6488307570352756e-07, "logits/chosen": -1.5481877326965332, "logits/rejected": -1.6056452989578247, "logps/chosen": -133.4686279296875, "logps/rejected": -259.7863464355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.713413238525391, "rewards/margins": 12.394735336303711, "rewards/rejected": -18.108150482177734, "step": 3943 }, { "epoch": 6.33, "learning_rate": 1.6478398731668646e-07, "logits/chosen": -1.5274676084518433, "logits/rejected": -1.5501234531402588, "logps/chosen": -101.38319396972656, "logps/rejected": -283.90545654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.120100498199463, "rewards/margins": 15.812801361083984, "rewards/rejected": -18.932903289794922, "step": 3944 }, { "epoch": 6.33, "learning_rate": 1.6468489892984542e-07, "logits/chosen": -1.5590604543685913, "logits/rejected": -1.5858144760131836, "logps/chosen": -216.06515502929688, "logps/rejected": -325.2770080566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.338653564453125, "rewards/margins": 11.537188529968262, "rewards/rejected": -21.87584114074707, "step": 3945 }, { "epoch": 6.33, "learning_rate": 1.6458581054300436e-07, "logits/chosen": -1.4349849224090576, "logits/rejected": -1.3805030584335327, "logps/chosen": -159.8118896484375, "logps/rejected": -278.40655517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1569132804870605, "rewards/margins": 13.383237838745117, "rewards/rejected": -20.540149688720703, "step": 3946 }, { "epoch": 6.34, "learning_rate": 1.644867221561633e-07, "logits/chosen": -1.4467719793319702, "logits/rejected": -1.4682390689849854, "logps/chosen": -111.69586181640625, "logps/rejected": -245.0198974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.848292827606201, "rewards/margins": 12.602740287780762, "rewards/rejected": -17.451032638549805, "step": 3947 }, { "epoch": 6.34, "learning_rate": 1.6438763376932222e-07, "logits/chosen": -1.3886165618896484, "logits/rejected": -1.4153711795806885, "logps/chosen": -192.13400268554688, "logps/rejected": -336.62481689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.283824920654297, "rewards/margins": 15.424585342407227, "rewards/rejected": -24.70840835571289, "step": 3948 }, { "epoch": 6.34, "learning_rate": 1.6428854538248116e-07, "logits/chosen": -1.4544893503189087, "logits/rejected": -1.4096252918243408, "logps/chosen": -153.1943359375, "logps/rejected": -261.0718078613281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.217142105102539, "rewards/margins": 11.858192443847656, "rewards/rejected": -19.075334548950195, "step": 3949 }, { "epoch": 6.34, "learning_rate": 1.6418945699564012e-07, "logits/chosen": -1.611348032951355, "logits/rejected": -1.585326075553894, "logps/chosen": -171.95423889160156, "logps/rejected": -287.021240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.607560634613037, "rewards/margins": 13.285688400268555, "rewards/rejected": -20.89324951171875, "step": 3950 }, { "epoch": 6.34, "learning_rate": 1.6409036860879905e-07, "logits/chosen": -1.574178695678711, "logits/rejected": -1.5552211999893188, "logps/chosen": -143.1689453125, "logps/rejected": -275.5705871582031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.878445625305176, "rewards/margins": 10.498218536376953, "rewards/rejected": -16.376663208007812, "step": 3951 }, { "epoch": 6.34, "learning_rate": 1.6399128022195798e-07, "logits/chosen": -1.5309865474700928, "logits/rejected": -1.4454176425933838, "logps/chosen": -177.5360870361328, "logps/rejected": -267.66070556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.446547508239746, "rewards/margins": 9.913538932800293, "rewards/rejected": -19.36008644104004, "step": 3952 }, { "epoch": 6.35, "learning_rate": 1.6389219183511691e-07, "logits/chosen": -1.5138297080993652, "logits/rejected": -1.5664706230163574, "logps/chosen": -178.10263061523438, "logps/rejected": -329.0377197265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.158344268798828, "rewards/margins": 14.226865768432617, "rewards/rejected": -22.385210037231445, "step": 3953 }, { "epoch": 6.35, "learning_rate": 1.6379310344827585e-07, "logits/chosen": -1.4848296642303467, "logits/rejected": -1.5652869939804077, "logps/chosen": -138.16136169433594, "logps/rejected": -281.2196044921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.452284812927246, "rewards/margins": 11.854347229003906, "rewards/rejected": -18.306631088256836, "step": 3954 }, { "epoch": 6.35, "learning_rate": 1.6369401506143478e-07, "logits/chosen": -1.3937268257141113, "logits/rejected": -1.448549747467041, "logps/chosen": -178.87010192871094, "logps/rejected": -356.12335205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.499481201171875, "rewards/margins": 16.344602584838867, "rewards/rejected": -24.844083786010742, "step": 3955 }, { "epoch": 6.35, "learning_rate": 1.6359492667459374e-07, "logits/chosen": -1.5862387418746948, "logits/rejected": -1.5898258686065674, "logps/chosen": -114.17906951904297, "logps/rejected": -295.2752685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.248535633087158, "rewards/margins": 15.324241638183594, "rewards/rejected": -19.572776794433594, "step": 3956 }, { "epoch": 6.35, "learning_rate": 1.6349583828775267e-07, "logits/chosen": -1.5324729681015015, "logits/rejected": -1.4619789123535156, "logps/chosen": -141.83250427246094, "logps/rejected": -239.22146606445312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.866727352142334, "rewards/margins": 13.088116645812988, "rewards/rejected": -16.954843521118164, "step": 3957 }, { "epoch": 6.35, "learning_rate": 1.633967499009116e-07, "logits/chosen": -1.4563406705856323, "logits/rejected": -1.4612839221954346, "logps/chosen": -146.20460510253906, "logps/rejected": -230.49400329589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.711292743682861, "rewards/margins": 9.491861343383789, "rewards/rejected": -16.203155517578125, "step": 3958 }, { "epoch": 6.35, "learning_rate": 1.6329766151407054e-07, "logits/chosen": -1.4298436641693115, "logits/rejected": -1.4685190916061401, "logps/chosen": -176.33087158203125, "logps/rejected": -347.7360534667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.961612701416016, "rewards/margins": 17.022851943969727, "rewards/rejected": -23.984464645385742, "step": 3959 }, { "epoch": 6.36, "learning_rate": 1.6319857312722947e-07, "logits/chosen": -1.5172276496887207, "logits/rejected": -1.4890596866607666, "logps/chosen": -135.53721618652344, "logps/rejected": -273.04595947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.017271518707275, "rewards/margins": 13.089927673339844, "rewards/rejected": -19.10719871520996, "step": 3960 }, { "epoch": 6.36, "learning_rate": 1.6309948474038843e-07, "logits/chosen": -1.5351402759552002, "logits/rejected": -1.5822473764419556, "logps/chosen": -145.30728149414062, "logps/rejected": -269.07012939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.862299919128418, "rewards/margins": 10.376681327819824, "rewards/rejected": -16.238981246948242, "step": 3961 }, { "epoch": 6.36, "learning_rate": 1.6300039635354737e-07, "logits/chosen": -1.5119330883026123, "logits/rejected": -1.5082218647003174, "logps/chosen": -172.72796630859375, "logps/rejected": -268.2436828613281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.157577514648438, "rewards/margins": 9.139463424682617, "rewards/rejected": -17.297040939331055, "step": 3962 }, { "epoch": 6.36, "learning_rate": 1.6290130796670627e-07, "logits/chosen": -1.5119835138320923, "logits/rejected": -1.6388754844665527, "logps/chosen": -128.8704071044922, "logps/rejected": -314.309326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.851696491241455, "rewards/margins": 14.602123260498047, "rewards/rejected": -20.453819274902344, "step": 3963 }, { "epoch": 6.36, "learning_rate": 1.6280221957986523e-07, "logits/chosen": -1.4335370063781738, "logits/rejected": -1.4531058073043823, "logps/chosen": -151.99932861328125, "logps/rejected": -330.26531982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.935831546783447, "rewards/margins": 15.584542274475098, "rewards/rejected": -22.520374298095703, "step": 3964 }, { "epoch": 6.36, "learning_rate": 1.6270313119302417e-07, "logits/chosen": -1.4839730262756348, "logits/rejected": -1.3839322328567505, "logps/chosen": -175.63381958007812, "logps/rejected": -276.9841003417969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.180705070495605, "rewards/margins": 10.646291732788086, "rewards/rejected": -19.826995849609375, "step": 3965 }, { "epoch": 6.37, "learning_rate": 1.6260404280618313e-07, "logits/chosen": -1.4569215774536133, "logits/rejected": -1.4089760780334473, "logps/chosen": -145.66085815429688, "logps/rejected": -295.6203308105469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.313427925109863, "rewards/margins": 14.913666725158691, "rewards/rejected": -21.227096557617188, "step": 3966 }, { "epoch": 6.37, "learning_rate": 1.6250495441934203e-07, "logits/chosen": -1.4601564407348633, "logits/rejected": -1.5214871168136597, "logps/chosen": -131.26988220214844, "logps/rejected": -272.28802490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.458736419677734, "rewards/margins": 11.883935928344727, "rewards/rejected": -18.342674255371094, "step": 3967 }, { "epoch": 6.37, "learning_rate": 1.6240586603250097e-07, "logits/chosen": -1.6175827980041504, "logits/rejected": -1.6038904190063477, "logps/chosen": -107.5924301147461, "logps/rejected": -237.25375366210938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.65548038482666, "rewards/margins": 11.880311965942383, "rewards/rejected": -16.53579330444336, "step": 3968 }, { "epoch": 6.37, "learning_rate": 1.6230677764565993e-07, "logits/chosen": -1.7689862251281738, "logits/rejected": -1.7836031913757324, "logps/chosen": -114.25425720214844, "logps/rejected": -237.8696746826172, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.137484550476074, "rewards/margins": 12.622108459472656, "rewards/rejected": -16.759592056274414, "step": 3969 }, { "epoch": 6.37, "learning_rate": 1.6220768925881886e-07, "logits/chosen": -1.4086711406707764, "logits/rejected": -1.5081077814102173, "logps/chosen": -102.00456237792969, "logps/rejected": -270.527099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5692057609558105, "rewards/margins": 14.846660614013672, "rewards/rejected": -18.41586685180664, "step": 3970 }, { "epoch": 6.37, "learning_rate": 1.6210860087197782e-07, "logits/chosen": -1.608017086982727, "logits/rejected": -1.5650662183761597, "logps/chosen": -123.24845123291016, "logps/rejected": -234.94717407226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.47219181060791, "rewards/margins": 11.062385559082031, "rewards/rejected": -15.534577369689941, "step": 3971 }, { "epoch": 6.38, "learning_rate": 1.6200951248513673e-07, "logits/chosen": -1.3778172731399536, "logits/rejected": -1.3590917587280273, "logps/chosen": -222.862060546875, "logps/rejected": -283.87274169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.502344131469727, "rewards/margins": 11.53364372253418, "rewards/rejected": -20.035987854003906, "step": 3972 }, { "epoch": 6.38, "learning_rate": 1.6191042409829566e-07, "logits/chosen": -1.4982316493988037, "logits/rejected": -1.539750337600708, "logps/chosen": -172.0172119140625, "logps/rejected": -275.61785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.788986206054688, "rewards/margins": 10.190176010131836, "rewards/rejected": -18.97916030883789, "step": 3973 }, { "epoch": 6.38, "learning_rate": 1.6181133571145462e-07, "logits/chosen": -1.5428799390792847, "logits/rejected": -1.6092513799667358, "logps/chosen": -155.87754821777344, "logps/rejected": -292.10845947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.423941135406494, "rewards/margins": 13.171009063720703, "rewards/rejected": -20.594951629638672, "step": 3974 }, { "epoch": 6.38, "learning_rate": 1.6171224732461355e-07, "logits/chosen": -1.6777634620666504, "logits/rejected": -1.6639420986175537, "logps/chosen": -100.64932250976562, "logps/rejected": -224.90467834472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.968264579772949, "rewards/margins": 13.40709114074707, "rewards/rejected": -16.375356674194336, "step": 3975 }, { "epoch": 6.38, "learning_rate": 1.6161315893777249e-07, "logits/chosen": -1.5678623914718628, "logits/rejected": -1.4481014013290405, "logps/chosen": -185.88197326660156, "logps/rejected": -327.3393249511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.950716972351074, "rewards/margins": 15.109739303588867, "rewards/rejected": -24.060455322265625, "step": 3976 }, { "epoch": 6.38, "learning_rate": 1.6151407055093142e-07, "logits/chosen": -1.292104721069336, "logits/rejected": -1.4341113567352295, "logps/chosen": -117.68405151367188, "logps/rejected": -257.08154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.303684711456299, "rewards/margins": 11.20142650604248, "rewards/rejected": -16.505111694335938, "step": 3977 }, { "epoch": 6.39, "learning_rate": 1.6141498216409035e-07, "logits/chosen": -1.4918079376220703, "logits/rejected": -1.518143653869629, "logps/chosen": -148.5343780517578, "logps/rejected": -254.49838256835938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.138147354125977, "rewards/margins": 11.770766258239746, "rewards/rejected": -18.908912658691406, "step": 3978 }, { "epoch": 6.39, "learning_rate": 1.613158937772493e-07, "logits/chosen": -1.4118938446044922, "logits/rejected": -1.4329826831817627, "logps/chosen": -180.9456787109375, "logps/rejected": -298.9212646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.040372848510742, "rewards/margins": 11.520695686340332, "rewards/rejected": -20.56106948852539, "step": 3979 }, { "epoch": 6.39, "learning_rate": 1.6121680539040825e-07, "logits/chosen": -1.545224666595459, "logits/rejected": -1.5012035369873047, "logps/chosen": -200.25958251953125, "logps/rejected": -320.175048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.988940238952637, "rewards/margins": 13.523780822753906, "rewards/rejected": -22.512720108032227, "step": 3980 }, { "epoch": 6.39, "learning_rate": 1.6111771700356718e-07, "logits/chosen": -1.3925974369049072, "logits/rejected": -1.3957159519195557, "logps/chosen": -145.08267211914062, "logps/rejected": -282.7617492675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.291065692901611, "rewards/margins": 13.710073471069336, "rewards/rejected": -21.001140594482422, "step": 3981 }, { "epoch": 6.39, "learning_rate": 1.610186286167261e-07, "logits/chosen": -1.4944243431091309, "logits/rejected": -1.430525779724121, "logps/chosen": -148.34033203125, "logps/rejected": -259.8709716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.749449253082275, "rewards/margins": 10.940183639526367, "rewards/rejected": -16.689632415771484, "step": 3982 }, { "epoch": 6.39, "learning_rate": 1.6091954022988505e-07, "logits/chosen": -1.4707145690917969, "logits/rejected": -1.4026367664337158, "logps/chosen": -148.54312133789062, "logps/rejected": -274.0921630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.918565273284912, "rewards/margins": 13.038726806640625, "rewards/rejected": -18.957292556762695, "step": 3983 }, { "epoch": 6.39, "learning_rate": 1.6082045184304398e-07, "logits/chosen": -1.5322589874267578, "logits/rejected": -1.5631089210510254, "logps/chosen": -140.72003173828125, "logps/rejected": -275.0437316894531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.445150375366211, "rewards/margins": 11.071027755737305, "rewards/rejected": -16.516178131103516, "step": 3984 }, { "epoch": 6.4, "learning_rate": 1.6072136345620294e-07, "logits/chosen": -1.5657658576965332, "logits/rejected": -1.5092768669128418, "logps/chosen": -127.70248413085938, "logps/rejected": -248.90765380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.499255657196045, "rewards/margins": 11.999900817871094, "rewards/rejected": -17.499156951904297, "step": 3985 }, { "epoch": 6.4, "learning_rate": 1.6062227506936185e-07, "logits/chosen": -1.6717333793640137, "logits/rejected": -1.696704626083374, "logps/chosen": -141.57635498046875, "logps/rejected": -305.54833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.044940948486328, "rewards/margins": 13.380005836486816, "rewards/rejected": -20.424945831298828, "step": 3986 }, { "epoch": 6.4, "learning_rate": 1.605231866825208e-07, "logits/chosen": -1.471925973892212, "logits/rejected": -1.5221645832061768, "logps/chosen": -142.48593139648438, "logps/rejected": -301.0586853027344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.977481365203857, "rewards/margins": 14.57772159576416, "rewards/rejected": -20.55520248413086, "step": 3987 }, { "epoch": 6.4, "learning_rate": 1.6042409829567974e-07, "logits/chosen": -1.5294488668441772, "logits/rejected": -1.5482138395309448, "logps/chosen": -137.36618041992188, "logps/rejected": -303.8057556152344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.090366840362549, "rewards/margins": 15.792388916015625, "rewards/rejected": -21.88275718688965, "step": 3988 }, { "epoch": 6.4, "learning_rate": 1.6032500990883867e-07, "logits/chosen": -1.5439040660858154, "logits/rejected": -1.522796392440796, "logps/chosen": -115.33815002441406, "logps/rejected": -263.8655090332031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.630532741546631, "rewards/margins": 15.538803100585938, "rewards/rejected": -19.169336318969727, "step": 3989 }, { "epoch": 6.4, "learning_rate": 1.6022592152199763e-07, "logits/chosen": -1.5632652044296265, "logits/rejected": -1.5821970701217651, "logps/chosen": -128.12454223632812, "logps/rejected": -242.43075561523438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.349673271179199, "rewards/margins": 10.831616401672363, "rewards/rejected": -16.181289672851562, "step": 3990 }, { "epoch": 6.41, "learning_rate": 1.6012683313515654e-07, "logits/chosen": -1.566260576248169, "logits/rejected": -1.5405153036117554, "logps/chosen": -152.32485961914062, "logps/rejected": -299.2727966308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.996333599090576, "rewards/margins": 13.206554412841797, "rewards/rejected": -19.20288848876953, "step": 3991 }, { "epoch": 6.41, "learning_rate": 1.6002774474831547e-07, "logits/chosen": -1.4987488985061646, "logits/rejected": -1.5142943859100342, "logps/chosen": -152.9492645263672, "logps/rejected": -286.52044677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.817915439605713, "rewards/margins": 11.753351211547852, "rewards/rejected": -18.571266174316406, "step": 3992 }, { "epoch": 6.41, "learning_rate": 1.5992865636147443e-07, "logits/chosen": -1.6702743768692017, "logits/rejected": -1.6214627027511597, "logps/chosen": -175.078369140625, "logps/rejected": -304.1716613769531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.049171447753906, "rewards/margins": 12.150520324707031, "rewards/rejected": -21.199691772460938, "step": 3993 }, { "epoch": 6.41, "learning_rate": 1.5982956797463337e-07, "logits/chosen": -1.4068892002105713, "logits/rejected": -1.4163830280303955, "logps/chosen": -198.59129333496094, "logps/rejected": -272.71990966796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.383121490478516, "rewards/margins": 9.940868377685547, "rewards/rejected": -18.323989868164062, "step": 3994 }, { "epoch": 6.41, "learning_rate": 1.5973047958779233e-07, "logits/chosen": -1.4574804306030273, "logits/rejected": -1.456333041191101, "logps/chosen": -125.31588745117188, "logps/rejected": -244.9595947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.521605014801025, "rewards/margins": 12.06488037109375, "rewards/rejected": -18.586484909057617, "step": 3995 }, { "epoch": 6.41, "learning_rate": 1.5963139120095123e-07, "logits/chosen": -1.5615702867507935, "logits/rejected": -1.5320665836334229, "logps/chosen": -133.40367126464844, "logps/rejected": -290.703369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.839057445526123, "rewards/margins": 15.359874725341797, "rewards/rejected": -19.198932647705078, "step": 3996 }, { "epoch": 6.42, "learning_rate": 1.5953230281411017e-07, "logits/chosen": -1.5317665338516235, "logits/rejected": -1.5759742259979248, "logps/chosen": -117.11602783203125, "logps/rejected": -281.5647888183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.161654472351074, "rewards/margins": 15.053659439086914, "rewards/rejected": -20.215312957763672, "step": 3997 }, { "epoch": 6.42, "learning_rate": 1.5943321442726913e-07, "logits/chosen": -1.7259312868118286, "logits/rejected": -1.6679203510284424, "logps/chosen": -125.95059967041016, "logps/rejected": -220.51663208007812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.294445276260376, "rewards/margins": 11.433616638183594, "rewards/rejected": -14.728063583374023, "step": 3998 }, { "epoch": 6.42, "learning_rate": 1.5933412604042806e-07, "logits/chosen": -1.532376766204834, "logits/rejected": -1.4637141227722168, "logps/chosen": -159.4670867919922, "logps/rejected": -284.4428405761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.34735631942749, "rewards/margins": 14.270535469055176, "rewards/rejected": -20.617891311645508, "step": 3999 }, { "epoch": 6.42, "learning_rate": 1.59235037653587e-07, "logits/chosen": -1.3065749406814575, "logits/rejected": -1.374037742614746, "logps/chosen": -185.95437622070312, "logps/rejected": -316.81842041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.636619567871094, "rewards/margins": 12.462785720825195, "rewards/rejected": -22.09940528869629, "step": 4000 }, { "epoch": 6.42, "learning_rate": 1.5913594926674592e-07, "logits/chosen": -1.4686524868011475, "logits/rejected": -1.4706889390945435, "logps/chosen": -166.93557739257812, "logps/rejected": -268.86920166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.190255641937256, "rewards/margins": 11.974369049072266, "rewards/rejected": -18.16462516784668, "step": 4001 }, { "epoch": 6.42, "learning_rate": 1.5903686087990486e-07, "logits/chosen": -1.6474428176879883, "logits/rejected": -1.6152622699737549, "logps/chosen": -112.57962036132812, "logps/rejected": -200.76976013183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4474239349365234, "rewards/margins": 9.468438148498535, "rewards/rejected": -12.915862083435059, "step": 4002 }, { "epoch": 6.43, "learning_rate": 1.5893777249306382e-07, "logits/chosen": -1.3121598958969116, "logits/rejected": -1.2661166191101074, "logps/chosen": -159.44471740722656, "logps/rejected": -260.6114501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.206964492797852, "rewards/margins": 10.608610153198242, "rewards/rejected": -18.815574645996094, "step": 4003 }, { "epoch": 6.43, "learning_rate": 1.5883868410622275e-07, "logits/chosen": -1.524505376815796, "logits/rejected": -1.5568066835403442, "logps/chosen": -173.30853271484375, "logps/rejected": -287.29364013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.556703567504883, "rewards/margins": 11.73339557647705, "rewards/rejected": -19.290098190307617, "step": 4004 }, { "epoch": 6.43, "learning_rate": 1.5873959571938166e-07, "logits/chosen": -1.6058626174926758, "logits/rejected": -1.6432983875274658, "logps/chosen": -105.3158950805664, "logps/rejected": -257.5553283691406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.105947971343994, "rewards/margins": 14.854936599731445, "rewards/rejected": -17.96088409423828, "step": 4005 }, { "epoch": 6.43, "learning_rate": 1.5864050733254062e-07, "logits/chosen": -1.551438570022583, "logits/rejected": -1.4922043085098267, "logps/chosen": -148.701416015625, "logps/rejected": -332.36407470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.61543607711792, "rewards/margins": 17.985177993774414, "rewards/rejected": -24.600614547729492, "step": 4006 }, { "epoch": 6.43, "learning_rate": 1.5854141894569955e-07, "logits/chosen": -1.6053153276443481, "logits/rejected": -1.6445326805114746, "logps/chosen": -116.2775650024414, "logps/rejected": -265.2764892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.607358694076538, "rewards/margins": 15.314004898071289, "rewards/rejected": -18.921363830566406, "step": 4007 }, { "epoch": 6.43, "learning_rate": 1.584423305588585e-07, "logits/chosen": -1.5090774297714233, "logits/rejected": -1.4739888906478882, "logps/chosen": -187.30592346191406, "logps/rejected": -284.43609619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.676107406616211, "rewards/margins": 10.218493461608887, "rewards/rejected": -17.89459991455078, "step": 4008 }, { "epoch": 6.43, "learning_rate": 1.5834324217201744e-07, "logits/chosen": -1.5496875047683716, "logits/rejected": -1.6941678524017334, "logps/chosen": -114.19149017333984, "logps/rejected": -323.5517883300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.632122039794922, "rewards/margins": 18.16196060180664, "rewards/rejected": -21.794084548950195, "step": 4009 }, { "epoch": 6.44, "learning_rate": 1.5824415378517635e-07, "logits/chosen": -1.5770862102508545, "logits/rejected": -1.4732118844985962, "logps/chosen": -176.50079345703125, "logps/rejected": -294.6792907714844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.269203186035156, "rewards/margins": 13.095417022705078, "rewards/rejected": -20.364620208740234, "step": 4010 }, { "epoch": 6.44, "learning_rate": 1.581450653983353e-07, "logits/chosen": -1.3086860179901123, "logits/rejected": -1.4644720554351807, "logps/chosen": -128.7637176513672, "logps/rejected": -292.11627197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.257585048675537, "rewards/margins": 15.899805068969727, "rewards/rejected": -20.157392501831055, "step": 4011 }, { "epoch": 6.44, "learning_rate": 1.5804597701149424e-07, "logits/chosen": -1.4771032333374023, "logits/rejected": -1.5301387310028076, "logps/chosen": -144.66453552246094, "logps/rejected": -275.156494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2535858154296875, "rewards/margins": 11.568961143493652, "rewards/rejected": -17.822547912597656, "step": 4012 }, { "epoch": 6.44, "learning_rate": 1.5794688862465318e-07, "logits/chosen": -1.446357011795044, "logits/rejected": -1.4339394569396973, "logps/chosen": -149.5625, "logps/rejected": -276.4503173828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.790907859802246, "rewards/margins": 12.779481887817383, "rewards/rejected": -20.570388793945312, "step": 4013 }, { "epoch": 6.44, "learning_rate": 1.5784780023781214e-07, "logits/chosen": -1.5044715404510498, "logits/rejected": -1.5016742944717407, "logps/chosen": -145.66880798339844, "logps/rejected": -282.7096862792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.762472629547119, "rewards/margins": 15.048627853393555, "rewards/rejected": -20.811100006103516, "step": 4014 }, { "epoch": 6.44, "learning_rate": 1.5774871185097104e-07, "logits/chosen": -1.552590250968933, "logits/rejected": -1.563828945159912, "logps/chosen": -165.28549194335938, "logps/rejected": -318.8038330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.110502243041992, "rewards/margins": 13.313064575195312, "rewards/rejected": -20.423566818237305, "step": 4015 }, { "epoch": 6.45, "learning_rate": 1.5764962346413e-07, "logits/chosen": -1.4708999395370483, "logits/rejected": -1.5288752317428589, "logps/chosen": -159.75576782226562, "logps/rejected": -312.43658447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.523534774780273, "rewards/margins": 14.518341064453125, "rewards/rejected": -21.041873931884766, "step": 4016 }, { "epoch": 6.45, "learning_rate": 1.5755053507728894e-07, "logits/chosen": -1.7209053039550781, "logits/rejected": -1.6415555477142334, "logps/chosen": -174.29444885253906, "logps/rejected": -298.5415954589844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.518221855163574, "rewards/margins": 14.24942398071289, "rewards/rejected": -21.76764678955078, "step": 4017 }, { "epoch": 6.45, "learning_rate": 1.5745144669044787e-07, "logits/chosen": -1.4763753414154053, "logits/rejected": -1.5161622762680054, "logps/chosen": -177.55010986328125, "logps/rejected": -301.70379638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.10258674621582, "rewards/margins": 12.763797760009766, "rewards/rejected": -21.866384506225586, "step": 4018 }, { "epoch": 6.45, "learning_rate": 1.573523583036068e-07, "logits/chosen": -1.665682315826416, "logits/rejected": -1.625770092010498, "logps/chosen": -141.99594116210938, "logps/rejected": -303.4227294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.150576114654541, "rewards/margins": 15.851202011108398, "rewards/rejected": -22.00177764892578, "step": 4019 }, { "epoch": 6.45, "learning_rate": 1.5725326991676574e-07, "logits/chosen": -1.4847831726074219, "logits/rejected": -1.5872048139572144, "logps/chosen": -103.22293090820312, "logps/rejected": -257.4953918457031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7639405727386475, "rewards/margins": 14.22868537902832, "rewards/rejected": -17.992626190185547, "step": 4020 }, { "epoch": 6.45, "learning_rate": 1.571541815299247e-07, "logits/chosen": -1.4240381717681885, "logits/rejected": -1.5253708362579346, "logps/chosen": -129.3914031982422, "logps/rejected": -274.4068908691406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.696744918823242, "rewards/margins": 11.765289306640625, "rewards/rejected": -16.462034225463867, "step": 4021 }, { "epoch": 6.46, "learning_rate": 1.5705509314308363e-07, "logits/chosen": -1.449000358581543, "logits/rejected": -1.3978806734085083, "logps/chosen": -192.55979919433594, "logps/rejected": -288.3559875488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.606399536132812, "rewards/margins": 10.967217445373535, "rewards/rejected": -20.57361602783203, "step": 4022 }, { "epoch": 6.46, "learning_rate": 1.5695600475624256e-07, "logits/chosen": -1.492967128753662, "logits/rejected": -1.5154919624328613, "logps/chosen": -139.0252227783203, "logps/rejected": -258.66302490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.300014972686768, "rewards/margins": 11.301677703857422, "rewards/rejected": -16.601694107055664, "step": 4023 }, { "epoch": 6.46, "learning_rate": 1.568569163694015e-07, "logits/chosen": -1.489736795425415, "logits/rejected": -1.5334579944610596, "logps/chosen": -112.01498413085938, "logps/rejected": -231.52047729492188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.152523040771484, "rewards/margins": 12.57978630065918, "rewards/rejected": -17.732309341430664, "step": 4024 }, { "epoch": 6.46, "learning_rate": 1.5675782798256043e-07, "logits/chosen": -1.4706202745437622, "logits/rejected": -1.4792487621307373, "logps/chosen": -150.43295288085938, "logps/rejected": -300.1558837890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.121355056762695, "rewards/margins": 14.558577537536621, "rewards/rejected": -22.679931640625, "step": 4025 }, { "epoch": 6.46, "learning_rate": 1.5665873959571936e-07, "logits/chosen": -1.5216423273086548, "logits/rejected": -1.5291874408721924, "logps/chosen": -172.5982208251953, "logps/rejected": -260.7652587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.815979957580566, "rewards/margins": 9.141763687133789, "rewards/rejected": -17.957744598388672, "step": 4026 }, { "epoch": 6.46, "learning_rate": 1.5655965120887832e-07, "logits/chosen": -1.5779879093170166, "logits/rejected": -1.6116265058517456, "logps/chosen": -142.5040283203125, "logps/rejected": -300.6974182128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.42648983001709, "rewards/margins": 14.707590103149414, "rewards/rejected": -21.13408088684082, "step": 4027 }, { "epoch": 6.47, "learning_rate": 1.5646056282203726e-07, "logits/chosen": -1.3537213802337646, "logits/rejected": -1.4076082706451416, "logps/chosen": -125.388671875, "logps/rejected": -259.47393798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.980767726898193, "rewards/margins": 10.933026313781738, "rewards/rejected": -15.913793563842773, "step": 4028 }, { "epoch": 6.47, "learning_rate": 1.563614744351962e-07, "logits/chosen": -1.4563075304031372, "logits/rejected": -1.4627364873886108, "logps/chosen": -179.6787109375, "logps/rejected": -298.1599426269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.388933181762695, "rewards/margins": 11.938668251037598, "rewards/rejected": -21.327600479125977, "step": 4029 }, { "epoch": 6.47, "learning_rate": 1.5626238604835512e-07, "logits/chosen": -1.4612109661102295, "logits/rejected": -1.3519361019134521, "logps/chosen": -139.62327575683594, "logps/rejected": -259.12493896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.951837062835693, "rewards/margins": 13.576677322387695, "rewards/rejected": -18.528514862060547, "step": 4030 }, { "epoch": 6.47, "learning_rate": 1.5616329766151406e-07, "logits/chosen": -1.4441955089569092, "logits/rejected": -1.3917382955551147, "logps/chosen": -172.54931640625, "logps/rejected": -298.00079345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.140151977539062, "rewards/margins": 12.766448020935059, "rewards/rejected": -20.906600952148438, "step": 4031 }, { "epoch": 6.47, "learning_rate": 1.5606420927467302e-07, "logits/chosen": -1.4717050790786743, "logits/rejected": -1.5717570781707764, "logps/chosen": -142.377197265625, "logps/rejected": -342.005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0678558349609375, "rewards/margins": 17.64043617248535, "rewards/rejected": -24.70829200744629, "step": 4032 }, { "epoch": 6.47, "learning_rate": 1.5596512088783195e-07, "logits/chosen": -1.472227692604065, "logits/rejected": -1.4111366271972656, "logps/chosen": -137.22311401367188, "logps/rejected": -272.8728942871094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.061628341674805, "rewards/margins": 13.836725234985352, "rewards/rejected": -18.898353576660156, "step": 4033 }, { "epoch": 6.48, "learning_rate": 1.5586603250099086e-07, "logits/chosen": -1.4909038543701172, "logits/rejected": -1.5268927812576294, "logps/chosen": -138.73800659179688, "logps/rejected": -240.01583862304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.46785306930542, "rewards/margins": 11.053812026977539, "rewards/rejected": -16.521663665771484, "step": 4034 }, { "epoch": 6.48, "learning_rate": 1.5576694411414982e-07, "logits/chosen": -1.5176796913146973, "logits/rejected": -1.519277572631836, "logps/chosen": -126.01438903808594, "logps/rejected": -243.8826904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.158909320831299, "rewards/margins": 12.317508697509766, "rewards/rejected": -17.47641944885254, "step": 4035 }, { "epoch": 6.48, "learning_rate": 1.5566785572730875e-07, "logits/chosen": -1.6301202774047852, "logits/rejected": -1.6603573560714722, "logps/chosen": -148.1241455078125, "logps/rejected": -303.43212890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.5341596603393555, "rewards/margins": 15.131271362304688, "rewards/rejected": -22.66543197631836, "step": 4036 }, { "epoch": 6.48, "learning_rate": 1.555687673404677e-07, "logits/chosen": -1.689089298248291, "logits/rejected": -1.7518136501312256, "logps/chosen": -124.66378021240234, "logps/rejected": -313.168212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.377783298492432, "rewards/margins": 17.17529296875, "rewards/rejected": -22.553077697753906, "step": 4037 }, { "epoch": 6.48, "learning_rate": 1.5546967895362662e-07, "logits/chosen": -1.5309021472930908, "logits/rejected": -1.4685280323028564, "logps/chosen": -150.35133361816406, "logps/rejected": -261.2171936035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.955092430114746, "rewards/margins": 12.160360336303711, "rewards/rejected": -17.115453720092773, "step": 4038 }, { "epoch": 6.48, "learning_rate": 1.5537059056678555e-07, "logits/chosen": -1.2937068939208984, "logits/rejected": -1.3220818042755127, "logps/chosen": -162.06298828125, "logps/rejected": -320.28204345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.562356948852539, "rewards/margins": 13.811516761779785, "rewards/rejected": -22.37387466430664, "step": 4039 }, { "epoch": 6.48, "learning_rate": 1.552715021799445e-07, "logits/chosen": -1.4887566566467285, "logits/rejected": -1.5064687728881836, "logps/chosen": -153.98260498046875, "logps/rejected": -304.3556823730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.184513092041016, "rewards/margins": 13.11319351196289, "rewards/rejected": -21.297706604003906, "step": 4040 }, { "epoch": 6.49, "learning_rate": 1.5517241379310344e-07, "logits/chosen": -1.5476994514465332, "logits/rejected": -1.522373914718628, "logps/chosen": -164.2434539794922, "logps/rejected": -269.0760803222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.634079933166504, "rewards/margins": 11.179290771484375, "rewards/rejected": -17.813371658325195, "step": 4041 }, { "epoch": 6.49, "learning_rate": 1.550733254062624e-07, "logits/chosen": -1.6039087772369385, "logits/rejected": -1.54339599609375, "logps/chosen": -166.3028106689453, "logps/rejected": -300.0264892578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.200139999389648, "rewards/margins": 14.276904106140137, "rewards/rejected": -21.47704315185547, "step": 4042 }, { "epoch": 6.49, "learning_rate": 1.549742370194213e-07, "logits/chosen": -1.4729117155075073, "logits/rejected": -1.5165952444076538, "logps/chosen": -165.81039428710938, "logps/rejected": -275.4357604980469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.7823805809021, "rewards/margins": 10.663763046264648, "rewards/rejected": -16.446144104003906, "step": 4043 }, { "epoch": 6.49, "learning_rate": 1.5487514863258024e-07, "logits/chosen": -1.5489170551300049, "logits/rejected": -1.6288915872573853, "logps/chosen": -118.94019317626953, "logps/rejected": -269.0135498046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.367690563201904, "rewards/margins": 13.569025039672852, "rewards/rejected": -18.936716079711914, "step": 4044 }, { "epoch": 6.49, "learning_rate": 1.547760602457392e-07, "logits/chosen": -1.3427296876907349, "logits/rejected": -1.3488569259643555, "logps/chosen": -162.4305419921875, "logps/rejected": -309.95404052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4087958335876465, "rewards/margins": 13.870898246765137, "rewards/rejected": -21.27969741821289, "step": 4045 }, { "epoch": 6.49, "learning_rate": 1.5467697185889814e-07, "logits/chosen": -1.4743199348449707, "logits/rejected": -1.5243568420410156, "logps/chosen": -214.33221435546875, "logps/rejected": -340.5472412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.968498229980469, "rewards/margins": 14.186691284179688, "rewards/rejected": -25.155187606811523, "step": 4046 }, { "epoch": 6.5, "learning_rate": 1.5457788347205707e-07, "logits/chosen": -1.6100554466247559, "logits/rejected": -1.6321802139282227, "logps/chosen": -186.58619689941406, "logps/rejected": -293.6015319824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.803191184997559, "rewards/margins": 11.488643646240234, "rewards/rejected": -20.29183578491211, "step": 4047 }, { "epoch": 6.5, "learning_rate": 1.54478795085216e-07, "logits/chosen": -1.6969642639160156, "logits/rejected": -1.6392288208007812, "logps/chosen": -138.90847778320312, "logps/rejected": -251.98297119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.196267127990723, "rewards/margins": 13.48394775390625, "rewards/rejected": -18.68021583557129, "step": 4048 }, { "epoch": 6.5, "learning_rate": 1.5437970669837494e-07, "logits/chosen": -1.5088942050933838, "logits/rejected": -1.4974995851516724, "logps/chosen": -177.2664031982422, "logps/rejected": -277.2903137207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.642128944396973, "rewards/margins": 11.434252738952637, "rewards/rejected": -19.07638168334961, "step": 4049 }, { "epoch": 6.5, "learning_rate": 1.542806183115339e-07, "logits/chosen": -1.6334421634674072, "logits/rejected": -1.6535859107971191, "logps/chosen": -123.62342834472656, "logps/rejected": -270.6128845214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.040425777435303, "rewards/margins": 14.677518844604492, "rewards/rejected": -19.717945098876953, "step": 4050 }, { "epoch": 6.5, "learning_rate": 1.5418152992469283e-07, "logits/chosen": -1.4406675100326538, "logits/rejected": -1.5025980472564697, "logps/chosen": -159.02328491210938, "logps/rejected": -284.29351806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.819018363952637, "rewards/margins": 11.620731353759766, "rewards/rejected": -19.43975067138672, "step": 4051 }, { "epoch": 6.5, "learning_rate": 1.5408244153785176e-07, "logits/chosen": -1.4954683780670166, "logits/rejected": -1.480470895767212, "logps/chosen": -96.87015533447266, "logps/rejected": -189.7866668701172, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.303051948547363, "rewards/margins": 10.316515922546387, "rewards/rejected": -14.61956787109375, "step": 4052 }, { "epoch": 6.51, "learning_rate": 1.539833531510107e-07, "logits/chosen": -1.4738945960998535, "logits/rejected": -1.3929920196533203, "logps/chosen": -161.80072021484375, "logps/rejected": -246.00262451171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.863292694091797, "rewards/margins": 9.966072082519531, "rewards/rejected": -16.829364776611328, "step": 4053 }, { "epoch": 6.51, "learning_rate": 1.5388426476416963e-07, "logits/chosen": -1.489832878112793, "logits/rejected": -1.4833807945251465, "logps/chosen": -150.83251953125, "logps/rejected": -315.46734619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.621740341186523, "rewards/margins": 15.58474063873291, "rewards/rejected": -22.20648193359375, "step": 4054 }, { "epoch": 6.51, "learning_rate": 1.5378517637732856e-07, "logits/chosen": -1.4445433616638184, "logits/rejected": -1.4277299642562866, "logps/chosen": -139.4583740234375, "logps/rejected": -234.72232055664062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.540168762207031, "rewards/margins": 9.233020782470703, "rewards/rejected": -16.773189544677734, "step": 4055 }, { "epoch": 6.51, "learning_rate": 1.5368608799048752e-07, "logits/chosen": -1.3780791759490967, "logits/rejected": -1.4367531538009644, "logps/chosen": -128.8372802734375, "logps/rejected": -298.2812194824219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.486569404602051, "rewards/margins": 14.67315673828125, "rewards/rejected": -20.159725189208984, "step": 4056 }, { "epoch": 6.51, "learning_rate": 1.5358699960364643e-07, "logits/chosen": -1.6670149564743042, "logits/rejected": -1.6376612186431885, "logps/chosen": -144.72723388671875, "logps/rejected": -283.223876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.177809715270996, "rewards/margins": 11.866500854492188, "rewards/rejected": -18.0443115234375, "step": 4057 }, { "epoch": 6.51, "learning_rate": 1.534879112168054e-07, "logits/chosen": -1.538050889968872, "logits/rejected": -1.5637664794921875, "logps/chosen": -131.35787963867188, "logps/rejected": -249.2568359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.585724830627441, "rewards/margins": 11.512473106384277, "rewards/rejected": -17.09819793701172, "step": 4058 }, { "epoch": 6.52, "learning_rate": 1.5338882282996432e-07, "logits/chosen": -1.600117564201355, "logits/rejected": -1.5869152545928955, "logps/chosen": -195.26974487304688, "logps/rejected": -318.6341552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.631392478942871, "rewards/margins": 10.820945739746094, "rewards/rejected": -20.45233917236328, "step": 4059 }, { "epoch": 6.52, "learning_rate": 1.5328973444312325e-07, "logits/chosen": -1.3947941064834595, "logits/rejected": -1.4391862154006958, "logps/chosen": -173.4237518310547, "logps/rejected": -300.8558044433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.204452514648438, "rewards/margins": 11.748929977416992, "rewards/rejected": -19.95338249206543, "step": 4060 }, { "epoch": 6.52, "learning_rate": 1.5319064605628221e-07, "logits/chosen": -1.5086286067962646, "logits/rejected": -1.5597150325775146, "logps/chosen": -141.82708740234375, "logps/rejected": -287.0211181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.535424709320068, "rewards/margins": 14.22792911529541, "rewards/rejected": -19.76335334777832, "step": 4061 }, { "epoch": 6.52, "learning_rate": 1.5309155766944112e-07, "logits/chosen": -1.4420535564422607, "logits/rejected": -1.4773969650268555, "logps/chosen": -214.10601806640625, "logps/rejected": -335.39935302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -12.91952133178711, "rewards/margins": 11.909290313720703, "rewards/rejected": -24.828811645507812, "step": 4062 }, { "epoch": 6.52, "learning_rate": 1.5299246928260008e-07, "logits/chosen": -1.5383051633834839, "logits/rejected": -1.5570423603057861, "logps/chosen": -158.11825561523438, "logps/rejected": -324.3089904785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.321404933929443, "rewards/margins": 14.512482643127441, "rewards/rejected": -21.833887100219727, "step": 4063 }, { "epoch": 6.52, "learning_rate": 1.5289338089575901e-07, "logits/chosen": -1.5520732402801514, "logits/rejected": -1.4799624681472778, "logps/chosen": -187.21121215820312, "logps/rejected": -288.5232238769531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.288106918334961, "rewards/margins": 12.356675148010254, "rewards/rejected": -20.6447811126709, "step": 4064 }, { "epoch": 6.52, "learning_rate": 1.5279429250891795e-07, "logits/chosen": -1.6449767351150513, "logits/rejected": -1.653568983078003, "logps/chosen": -109.162353515625, "logps/rejected": -272.7698974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.447150230407715, "rewards/margins": 15.514894485473633, "rewards/rejected": -19.962045669555664, "step": 4065 }, { "epoch": 6.53, "learning_rate": 1.526952041220769e-07, "logits/chosen": -1.513624668121338, "logits/rejected": -1.4873899221420288, "logps/chosen": -190.31919860839844, "logps/rejected": -299.04449462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.963915824890137, "rewards/margins": 12.532449722290039, "rewards/rejected": -22.49636459350586, "step": 4066 }, { "epoch": 6.53, "learning_rate": 1.5259611573523581e-07, "logits/chosen": -1.4888205528259277, "logits/rejected": -1.4714902639389038, "logps/chosen": -132.14393615722656, "logps/rejected": -272.19549560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.42944860458374, "rewards/margins": 14.731949806213379, "rewards/rejected": -19.16139793395996, "step": 4067 }, { "epoch": 6.53, "learning_rate": 1.5249702734839475e-07, "logits/chosen": -1.4932118654251099, "logits/rejected": -1.5197771787643433, "logps/chosen": -181.98719787597656, "logps/rejected": -314.7780456542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.27634048461914, "rewards/margins": 11.719573020935059, "rewards/rejected": -22.995914459228516, "step": 4068 }, { "epoch": 6.53, "learning_rate": 1.523979389615537e-07, "logits/chosen": -1.3459826707839966, "logits/rejected": -1.3479771614074707, "logps/chosen": -158.1329345703125, "logps/rejected": -305.89825439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.458131790161133, "rewards/margins": 15.924132347106934, "rewards/rejected": -23.382265090942383, "step": 4069 }, { "epoch": 6.53, "learning_rate": 1.5229885057471264e-07, "logits/chosen": -1.656179666519165, "logits/rejected": -1.5793451070785522, "logps/chosen": -136.218017578125, "logps/rejected": -279.0203857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.587588310241699, "rewards/margins": 15.342615127563477, "rewards/rejected": -19.93020248413086, "step": 4070 }, { "epoch": 6.53, "learning_rate": 1.5219976218787157e-07, "logits/chosen": -1.486596703529358, "logits/rejected": -1.4575188159942627, "logps/chosen": -194.72317504882812, "logps/rejected": -316.1368103027344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.970039367675781, "rewards/margins": 11.540298461914062, "rewards/rejected": -22.510337829589844, "step": 4071 }, { "epoch": 6.54, "learning_rate": 1.521006738010305e-07, "logits/chosen": -1.6316734552383423, "logits/rejected": -1.604292631149292, "logps/chosen": -144.06222534179688, "logps/rejected": -271.86846923828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.008913993835449, "rewards/margins": 13.145017623901367, "rewards/rejected": -18.1539306640625, "step": 4072 }, { "epoch": 6.54, "learning_rate": 1.5200158541418944e-07, "logits/chosen": -1.4934431314468384, "logits/rejected": -1.6216177940368652, "logps/chosen": -147.292724609375, "logps/rejected": -301.3812255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.668780326843262, "rewards/margins": 12.563833236694336, "rewards/rejected": -19.232614517211914, "step": 4073 }, { "epoch": 6.54, "learning_rate": 1.519024970273484e-07, "logits/chosen": -1.4712704420089722, "logits/rejected": -1.5283141136169434, "logps/chosen": -149.1527099609375, "logps/rejected": -333.89715576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.419144153594971, "rewards/margins": 16.183475494384766, "rewards/rejected": -22.60262107849121, "step": 4074 }, { "epoch": 6.54, "learning_rate": 1.5180340864050733e-07, "logits/chosen": -1.4666672945022583, "logits/rejected": -1.3869317770004272, "logps/chosen": -198.39854431152344, "logps/rejected": -312.58642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.85908317565918, "rewards/margins": 12.615459442138672, "rewards/rejected": -21.47454071044922, "step": 4075 }, { "epoch": 6.54, "learning_rate": 1.5170432025366624e-07, "logits/chosen": -1.402453064918518, "logits/rejected": -1.4854819774627686, "logps/chosen": -170.8695068359375, "logps/rejected": -291.66143798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.4193878173828125, "rewards/margins": 13.234356880187988, "rewards/rejected": -19.653745651245117, "step": 4076 }, { "epoch": 6.54, "learning_rate": 1.516052318668252e-07, "logits/chosen": -1.5971767902374268, "logits/rejected": -1.6214932203292847, "logps/chosen": -145.2372589111328, "logps/rejected": -332.22235107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.7430572509765625, "rewards/margins": 18.325904846191406, "rewards/rejected": -24.06896209716797, "step": 4077 }, { "epoch": 6.55, "learning_rate": 1.5150614347998413e-07, "logits/chosen": -1.3945674896240234, "logits/rejected": -1.521774411201477, "logps/chosen": -163.29364013671875, "logps/rejected": -310.1119384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.146615982055664, "rewards/margins": 11.157669067382812, "rewards/rejected": -18.304283142089844, "step": 4078 }, { "epoch": 6.55, "learning_rate": 1.514070550931431e-07, "logits/chosen": -1.662732720375061, "logits/rejected": -1.5629010200500488, "logps/chosen": -178.56961059570312, "logps/rejected": -281.796142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.30190372467041, "rewards/margins": 11.85798168182373, "rewards/rejected": -20.15988540649414, "step": 4079 }, { "epoch": 6.55, "learning_rate": 1.5130796670630203e-07, "logits/chosen": -1.5804085731506348, "logits/rejected": -1.6488217115402222, "logps/chosen": -158.18911743164062, "logps/rejected": -359.8983459472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.0494842529296875, "rewards/margins": 18.54588508605957, "rewards/rejected": -25.59537124633789, "step": 4080 }, { "epoch": 6.55, "learning_rate": 1.5120887831946093e-07, "logits/chosen": -1.449872612953186, "logits/rejected": -1.4460430145263672, "logps/chosen": -179.75961303710938, "logps/rejected": -301.960205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.485612869262695, "rewards/margins": 12.297904014587402, "rewards/rejected": -21.78351593017578, "step": 4081 }, { "epoch": 6.55, "learning_rate": 1.511097899326199e-07, "logits/chosen": -1.6274003982543945, "logits/rejected": -1.6389713287353516, "logps/chosen": -121.28640747070312, "logps/rejected": -210.23715209960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.806861877441406, "rewards/margins": 9.546662330627441, "rewards/rejected": -15.353523254394531, "step": 4082 }, { "epoch": 6.55, "learning_rate": 1.5101070154577883e-07, "logits/chosen": -1.6186316013336182, "logits/rejected": -1.6266202926635742, "logps/chosen": -162.55934143066406, "logps/rejected": -295.6706237792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.261623382568359, "rewards/margins": 13.612985610961914, "rewards/rejected": -20.874608993530273, "step": 4083 }, { "epoch": 6.56, "learning_rate": 1.5091161315893779e-07, "logits/chosen": -1.4036301374435425, "logits/rejected": -1.3807857036590576, "logps/chosen": -158.0211639404297, "logps/rejected": -255.51791381835938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.10399341583252, "rewards/margins": 9.913474082946777, "rewards/rejected": -18.017467498779297, "step": 4084 }, { "epoch": 6.56, "learning_rate": 1.5081252477209672e-07, "logits/chosen": -1.5111534595489502, "logits/rejected": -1.5831298828125, "logps/chosen": -170.0465850830078, "logps/rejected": -313.25994873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.435107231140137, "rewards/margins": 14.866000175476074, "rewards/rejected": -22.30110740661621, "step": 4085 }, { "epoch": 6.56, "learning_rate": 1.5071343638525563e-07, "logits/chosen": -1.3186919689178467, "logits/rejected": -1.4023411273956299, "logps/chosen": -167.7193145751953, "logps/rejected": -302.72174072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.198563575744629, "rewards/margins": 11.965446472167969, "rewards/rejected": -21.16400909423828, "step": 4086 }, { "epoch": 6.56, "learning_rate": 1.5061434799841459e-07, "logits/chosen": -1.3928310871124268, "logits/rejected": -1.4552295207977295, "logps/chosen": -172.41641235351562, "logps/rejected": -307.2332458496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.563188552856445, "rewards/margins": 12.907790184020996, "rewards/rejected": -22.470979690551758, "step": 4087 }, { "epoch": 6.56, "learning_rate": 1.5051525961157352e-07, "logits/chosen": -1.4398659467697144, "logits/rejected": -1.4436283111572266, "logps/chosen": -153.02853393554688, "logps/rejected": -318.7279052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.009366035461426, "rewards/margins": 16.107200622558594, "rewards/rejected": -23.116565704345703, "step": 4088 }, { "epoch": 6.56, "learning_rate": 1.5041617122473245e-07, "logits/chosen": -1.6115996837615967, "logits/rejected": -1.7055580615997314, "logps/chosen": -162.7047882080078, "logps/rejected": -306.8726501464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.268516540527344, "rewards/margins": 13.849541664123535, "rewards/rejected": -21.118059158325195, "step": 4089 }, { "epoch": 6.57, "learning_rate": 1.5031708283789139e-07, "logits/chosen": -1.4757345914840698, "logits/rejected": -1.4621689319610596, "logps/chosen": -150.90625, "logps/rejected": -258.958984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.189865589141846, "rewards/margins": 12.185379028320312, "rewards/rejected": -19.375244140625, "step": 4090 }, { "epoch": 6.57, "learning_rate": 1.5021799445105032e-07, "logits/chosen": -1.5603598356246948, "logits/rejected": -1.5140317678451538, "logps/chosen": -156.30186462402344, "logps/rejected": -231.10037231445312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.396376609802246, "rewards/margins": 10.294145584106445, "rewards/rejected": -15.690522193908691, "step": 4091 }, { "epoch": 6.57, "learning_rate": 1.5011890606420928e-07, "logits/chosen": -1.413588047027588, "logits/rejected": -1.2986944913864136, "logps/chosen": -180.11880493164062, "logps/rejected": -262.81036376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.319664478302002, "rewards/margins": 11.70711612701416, "rewards/rejected": -18.02678108215332, "step": 4092 }, { "epoch": 6.57, "learning_rate": 1.500198176773682e-07, "logits/chosen": -1.4531843662261963, "logits/rejected": -1.3659489154815674, "logps/chosen": -119.97425842285156, "logps/rejected": -222.18466186523438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.700887680053711, "rewards/margins": 11.426825523376465, "rewards/rejected": -16.12771224975586, "step": 4093 }, { "epoch": 6.57, "learning_rate": 1.4992072929052715e-07, "logits/chosen": -1.6835366487503052, "logits/rejected": -1.7321363687515259, "logps/chosen": -103.9294662475586, "logps/rejected": -245.88499450683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.15220832824707, "rewards/margins": 12.703938484191895, "rewards/rejected": -17.85614776611328, "step": 4094 }, { "epoch": 6.57, "learning_rate": 1.4982164090368608e-07, "logits/chosen": -1.5938084125518799, "logits/rejected": -1.6410586833953857, "logps/chosen": -104.45947265625, "logps/rejected": -233.14706420898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.787874221801758, "rewards/margins": 12.836455345153809, "rewards/rejected": -15.624329566955566, "step": 4095 }, { "epoch": 6.57, "learning_rate": 1.49722552516845e-07, "logits/chosen": -1.443957805633545, "logits/rejected": -1.399722695350647, "logps/chosen": -211.9832000732422, "logps/rejected": -328.7348327636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.599187850952148, "rewards/margins": 12.750417709350586, "rewards/rejected": -24.349605560302734, "step": 4096 }, { "epoch": 6.58, "learning_rate": 1.4962346413000395e-07, "logits/chosen": -1.5091696977615356, "logits/rejected": -1.5460914373397827, "logps/chosen": -150.3516082763672, "logps/rejected": -354.9713439941406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.331852436065674, "rewards/margins": 16.30389976501465, "rewards/rejected": -23.635751724243164, "step": 4097 }, { "epoch": 6.58, "learning_rate": 1.495243757431629e-07, "logits/chosen": -1.6088656187057495, "logits/rejected": -1.556983232498169, "logps/chosen": -138.19561767578125, "logps/rejected": -232.7463836669922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.46855354309082, "rewards/margins": 9.872434616088867, "rewards/rejected": -16.340988159179688, "step": 4098 }, { "epoch": 6.58, "learning_rate": 1.4942528735632184e-07, "logits/chosen": -1.49601411819458, "logits/rejected": -1.5659170150756836, "logps/chosen": -168.0802459716797, "logps/rejected": -371.47625732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.519537925720215, "rewards/margins": 17.123958587646484, "rewards/rejected": -24.643497467041016, "step": 4099 }, { "epoch": 6.58, "learning_rate": 1.4932619896948077e-07, "logits/chosen": -1.5118989944458008, "logits/rejected": -1.6016643047332764, "logps/chosen": -153.66831970214844, "logps/rejected": -301.02783203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.66386604309082, "rewards/margins": 12.948326110839844, "rewards/rejected": -19.612192153930664, "step": 4100 }, { "epoch": 6.58, "learning_rate": 1.492271105826397e-07, "logits/chosen": -1.5455269813537598, "logits/rejected": -1.5489541292190552, "logps/chosen": -155.40951538085938, "logps/rejected": -368.84222412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.031145095825195, "rewards/margins": 19.048681259155273, "rewards/rejected": -27.07982635498047, "step": 4101 }, { "epoch": 6.58, "learning_rate": 1.4912802219579864e-07, "logits/chosen": -1.5789958238601685, "logits/rejected": -1.489863395690918, "logps/chosen": -150.97787475585938, "logps/rejected": -268.7757568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.071887493133545, "rewards/margins": 11.74664306640625, "rewards/rejected": -18.818531036376953, "step": 4102 }, { "epoch": 6.59, "learning_rate": 1.490289338089576e-07, "logits/chosen": -1.4339970350265503, "logits/rejected": -1.5039293766021729, "logps/chosen": -168.73056030273438, "logps/rejected": -342.98095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.63376235961914, "rewards/margins": 15.128793716430664, "rewards/rejected": -23.762554168701172, "step": 4103 }, { "epoch": 6.59, "learning_rate": 1.4892984542211653e-07, "logits/chosen": -1.4057669639587402, "logits/rejected": -1.3966867923736572, "logps/chosen": -149.01832580566406, "logps/rejected": -310.9473876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.307614326477051, "rewards/margins": 14.647039413452148, "rewards/rejected": -21.954654693603516, "step": 4104 }, { "epoch": 6.59, "learning_rate": 1.4883075703527544e-07, "logits/chosen": -1.5777184963226318, "logits/rejected": -1.578404188156128, "logps/chosen": -150.27523803710938, "logps/rejected": -271.3721618652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.108552932739258, "rewards/margins": 13.089954376220703, "rewards/rejected": -18.19850730895996, "step": 4105 }, { "epoch": 6.59, "learning_rate": 1.487316686484344e-07, "logits/chosen": -1.4395304918289185, "logits/rejected": -1.4709645509719849, "logps/chosen": -184.512939453125, "logps/rejected": -345.3472900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.956774711608887, "rewards/margins": 15.761384963989258, "rewards/rejected": -23.71816062927246, "step": 4106 }, { "epoch": 6.59, "learning_rate": 1.4863258026159333e-07, "logits/chosen": -1.5246232748031616, "logits/rejected": -1.5974068641662598, "logps/chosen": -172.21853637695312, "logps/rejected": -309.82281494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.148319244384766, "rewards/margins": 12.023696899414062, "rewards/rejected": -20.172016143798828, "step": 4107 }, { "epoch": 6.59, "learning_rate": 1.485334918747523e-07, "logits/chosen": -1.4852598905563354, "logits/rejected": -1.509317398071289, "logps/chosen": -129.41629028320312, "logps/rejected": -277.6058044433594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9687604904174805, "rewards/margins": 13.778006553649902, "rewards/rejected": -19.746767044067383, "step": 4108 }, { "epoch": 6.6, "learning_rate": 1.484344034879112e-07, "logits/chosen": -1.654557466506958, "logits/rejected": -1.4429893493652344, "logps/chosen": -187.88986206054688, "logps/rejected": -286.07305908203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.8853440284729, "rewards/margins": 12.055037498474121, "rewards/rejected": -19.94038200378418, "step": 4109 }, { "epoch": 6.6, "learning_rate": 1.4833531510107013e-07, "logits/chosen": -1.4232083559036255, "logits/rejected": -1.4196951389312744, "logps/chosen": -146.98289489746094, "logps/rejected": -317.016845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.373187065124512, "rewards/margins": 15.869359016418457, "rewards/rejected": -22.24254608154297, "step": 4110 }, { "epoch": 6.6, "learning_rate": 1.482362267142291e-07, "logits/chosen": -1.5808310508728027, "logits/rejected": -1.611970067024231, "logps/chosen": -165.60556030273438, "logps/rejected": -296.2220764160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.541987419128418, "rewards/margins": 12.11770248413086, "rewards/rejected": -21.659690856933594, "step": 4111 }, { "epoch": 6.6, "learning_rate": 1.4813713832738802e-07, "logits/chosen": -1.382578730583191, "logits/rejected": -1.4101051092147827, "logps/chosen": -147.51280212402344, "logps/rejected": -266.1624755859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.309831619262695, "rewards/margins": 11.702223777770996, "rewards/rejected": -18.012054443359375, "step": 4112 }, { "epoch": 6.6, "learning_rate": 1.4803804994054698e-07, "logits/chosen": -1.5783956050872803, "logits/rejected": -1.5444945096969604, "logps/chosen": -158.3048095703125, "logps/rejected": -318.23974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.591848373413086, "rewards/margins": 14.775527954101562, "rewards/rejected": -22.36737632751465, "step": 4113 }, { "epoch": 6.6, "learning_rate": 1.479389615537059e-07, "logits/chosen": -1.3988580703735352, "logits/rejected": -1.3380420207977295, "logps/chosen": -152.75042724609375, "logps/rejected": -267.7724304199219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2040300369262695, "rewards/margins": 12.473662376403809, "rewards/rejected": -18.677692413330078, "step": 4114 }, { "epoch": 6.61, "learning_rate": 1.4783987316686482e-07, "logits/chosen": -1.3291654586791992, "logits/rejected": -1.3710399866104126, "logps/chosen": -142.30471801757812, "logps/rejected": -276.33740234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.024691581726074, "rewards/margins": 12.783974647521973, "rewards/rejected": -19.808666229248047, "step": 4115 }, { "epoch": 6.61, "learning_rate": 1.4774078478002378e-07, "logits/chosen": -1.3804129362106323, "logits/rejected": -1.3978275060653687, "logps/chosen": -162.5071563720703, "logps/rejected": -303.33172607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.255181312561035, "rewards/margins": 15.716211318969727, "rewards/rejected": -23.971393585205078, "step": 4116 }, { "epoch": 6.61, "learning_rate": 1.4764169639318272e-07, "logits/chosen": -1.419332504272461, "logits/rejected": -1.376801609992981, "logps/chosen": -190.6568603515625, "logps/rejected": -288.55035400390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.518251419067383, "rewards/margins": 12.45156478881836, "rewards/rejected": -20.96981430053711, "step": 4117 }, { "epoch": 6.61, "learning_rate": 1.4754260800634165e-07, "logits/chosen": -1.331827163696289, "logits/rejected": -1.3622570037841797, "logps/chosen": -158.91969299316406, "logps/rejected": -313.927490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.457273006439209, "rewards/margins": 14.844523429870605, "rewards/rejected": -22.30179786682129, "step": 4118 }, { "epoch": 6.61, "learning_rate": 1.4744351961950058e-07, "logits/chosen": -1.5200949907302856, "logits/rejected": -1.5151524543762207, "logps/chosen": -166.9207763671875, "logps/rejected": -307.7278747558594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.971203804016113, "rewards/margins": 12.824392318725586, "rewards/rejected": -21.795597076416016, "step": 4119 }, { "epoch": 6.61, "learning_rate": 1.4734443123265952e-07, "logits/chosen": -1.5693577527999878, "logits/rejected": -1.6414839029312134, "logps/chosen": -148.14251708984375, "logps/rejected": -297.39166259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.220607280731201, "rewards/margins": 14.073882102966309, "rewards/rejected": -20.29448890686035, "step": 4120 }, { "epoch": 6.61, "learning_rate": 1.4724534284581848e-07, "logits/chosen": -1.5770797729492188, "logits/rejected": -1.7559382915496826, "logps/chosen": -108.04955291748047, "logps/rejected": -325.7701416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.621438980102539, "rewards/margins": 17.024879455566406, "rewards/rejected": -21.646320343017578, "step": 4121 }, { "epoch": 6.62, "learning_rate": 1.471462544589774e-07, "logits/chosen": -1.4643566608428955, "logits/rejected": -1.6133437156677246, "logps/chosen": -144.56851196289062, "logps/rejected": -264.08685302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.319293022155762, "rewards/margins": 10.51630973815918, "rewards/rejected": -17.835601806640625, "step": 4122 }, { "epoch": 6.62, "learning_rate": 1.4704716607213634e-07, "logits/chosen": -1.3060307502746582, "logits/rejected": -1.3858160972595215, "logps/chosen": -98.84954833984375, "logps/rejected": -248.50494384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.014519691467285, "rewards/margins": 11.676782608032227, "rewards/rejected": -16.691303253173828, "step": 4123 }, { "epoch": 6.62, "learning_rate": 1.4694807768529528e-07, "logits/chosen": -1.4652916193008423, "logits/rejected": -1.6324361562728882, "logps/chosen": -135.28973388671875, "logps/rejected": -299.8081359863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.020556449890137, "rewards/margins": 13.206077575683594, "rewards/rejected": -19.226633071899414, "step": 4124 }, { "epoch": 6.62, "learning_rate": 1.468489892984542e-07, "logits/chosen": -1.4585589170455933, "logits/rejected": -1.4125738143920898, "logps/chosen": -181.2947540283203, "logps/rejected": -271.39752197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.106605529785156, "rewards/margins": 11.24235725402832, "rewards/rejected": -18.348962783813477, "step": 4125 }, { "epoch": 6.62, "learning_rate": 1.4674990091161314e-07, "logits/chosen": -1.4602010250091553, "logits/rejected": -1.5112550258636475, "logps/chosen": -167.30206298828125, "logps/rejected": -303.95208740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.99468994140625, "rewards/margins": 12.562808990478516, "rewards/rejected": -21.557498931884766, "step": 4126 }, { "epoch": 6.62, "learning_rate": 1.466508125247721e-07, "logits/chosen": -1.4752020835876465, "logits/rejected": -1.4603357315063477, "logps/chosen": -166.50213623046875, "logps/rejected": -265.3182373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.309196472167969, "rewards/margins": 10.920965194702148, "rewards/rejected": -19.230159759521484, "step": 4127 }, { "epoch": 6.63, "learning_rate": 1.46551724137931e-07, "logits/chosen": -1.3355724811553955, "logits/rejected": -1.4242204427719116, "logps/chosen": -181.42282104492188, "logps/rejected": -313.04248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.133359909057617, "rewards/margins": 11.921483993530273, "rewards/rejected": -21.05484390258789, "step": 4128 }, { "epoch": 6.63, "learning_rate": 1.4645263575108997e-07, "logits/chosen": -1.6381499767303467, "logits/rejected": -1.5911481380462646, "logps/chosen": -126.77091979980469, "logps/rejected": -274.7130126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.499006271362305, "rewards/margins": 14.977409362792969, "rewards/rejected": -20.47641372680664, "step": 4129 }, { "epoch": 6.63, "learning_rate": 1.463535473642489e-07, "logits/chosen": -1.4138033390045166, "logits/rejected": -1.3972196578979492, "logps/chosen": -146.31008911132812, "logps/rejected": -288.8357849121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.472864627838135, "rewards/margins": 12.610782623291016, "rewards/rejected": -19.083646774291992, "step": 4130 }, { "epoch": 6.63, "learning_rate": 1.4625445897740784e-07, "logits/chosen": -1.5702593326568604, "logits/rejected": -1.590135097503662, "logps/chosen": -179.53326416015625, "logps/rejected": -298.007568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.500025749206543, "rewards/margins": 12.267621994018555, "rewards/rejected": -20.767650604248047, "step": 4131 }, { "epoch": 6.63, "learning_rate": 1.461553705905668e-07, "logits/chosen": -1.5654537677764893, "logits/rejected": -1.4965026378631592, "logps/chosen": -148.61912536621094, "logps/rejected": -263.8389892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.723934650421143, "rewards/margins": 11.422367095947266, "rewards/rejected": -18.14630126953125, "step": 4132 }, { "epoch": 6.63, "learning_rate": 1.460562822037257e-07, "logits/chosen": -1.454143762588501, "logits/rejected": -1.3970427513122559, "logps/chosen": -205.63394165039062, "logps/rejected": -342.7855224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.57720947265625, "rewards/margins": 14.704434394836426, "rewards/rejected": -25.28164291381836, "step": 4133 }, { "epoch": 6.64, "learning_rate": 1.4595719381688466e-07, "logits/chosen": -1.4308161735534668, "logits/rejected": -1.4358233213424683, "logps/chosen": -133.0418243408203, "logps/rejected": -281.9250793457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.332579612731934, "rewards/margins": 13.451087951660156, "rewards/rejected": -19.783666610717773, "step": 4134 }, { "epoch": 6.64, "learning_rate": 1.458581054300436e-07, "logits/chosen": -1.460931658744812, "logits/rejected": -1.3953906297683716, "logps/chosen": -192.30526733398438, "logps/rejected": -313.3667907714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.678272247314453, "rewards/margins": 13.712261199951172, "rewards/rejected": -22.390533447265625, "step": 4135 }, { "epoch": 6.64, "learning_rate": 1.4575901704320253e-07, "logits/chosen": -1.280988097190857, "logits/rejected": -1.250014066696167, "logps/chosen": -172.99005126953125, "logps/rejected": -299.4040832519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.70810317993164, "rewards/margins": 14.445243835449219, "rewards/rejected": -23.153348922729492, "step": 4136 }, { "epoch": 6.64, "learning_rate": 1.456599286563615e-07, "logits/chosen": -1.5811346769332886, "logits/rejected": -1.5825557708740234, "logps/chosen": -171.8234405517578, "logps/rejected": -286.1657409667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.561553001403809, "rewards/margins": 12.74410629272461, "rewards/rejected": -19.305660247802734, "step": 4137 }, { "epoch": 6.64, "learning_rate": 1.455608402695204e-07, "logits/chosen": -1.5619128942489624, "logits/rejected": -1.517148733139038, "logps/chosen": -94.3139419555664, "logps/rejected": -207.98068237304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6813533306121826, "rewards/margins": 12.425191879272461, "rewards/rejected": -15.106544494628906, "step": 4138 }, { "epoch": 6.64, "learning_rate": 1.4546175188267933e-07, "logits/chosen": -1.5387370586395264, "logits/rejected": -1.416195034980774, "logps/chosen": -192.57049560546875, "logps/rejected": -277.26348876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.053545951843262, "rewards/margins": 11.418782234191895, "rewards/rejected": -18.472328186035156, "step": 4139 }, { "epoch": 6.65, "learning_rate": 1.453626634958383e-07, "logits/chosen": -1.4890508651733398, "logits/rejected": -1.486991286277771, "logps/chosen": -104.88322448730469, "logps/rejected": -262.5455322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8885533809661865, "rewards/margins": 15.526912689208984, "rewards/rejected": -19.41546630859375, "step": 4140 }, { "epoch": 6.65, "learning_rate": 1.4526357510899722e-07, "logits/chosen": -1.6078287363052368, "logits/rejected": -1.6309047937393188, "logps/chosen": -118.92379760742188, "logps/rejected": -261.51959228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7476558685302734, "rewards/margins": 14.800752639770508, "rewards/rejected": -18.54840850830078, "step": 4141 }, { "epoch": 6.65, "learning_rate": 1.4516448672215616e-07, "logits/chosen": -1.4068337678909302, "logits/rejected": -1.4641025066375732, "logps/chosen": -141.02725219726562, "logps/rejected": -280.63897705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.012684345245361, "rewards/margins": 12.930212020874023, "rewards/rejected": -18.94289779663086, "step": 4142 }, { "epoch": 6.65, "learning_rate": 1.450653983353151e-07, "logits/chosen": -1.5320560932159424, "logits/rejected": -1.583310604095459, "logps/chosen": -135.03936767578125, "logps/rejected": -280.48828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.52979040145874, "rewards/margins": 14.588117599487305, "rewards/rejected": -20.117908477783203, "step": 4143 }, { "epoch": 6.65, "learning_rate": 1.4496630994847402e-07, "logits/chosen": -1.513519287109375, "logits/rejected": -1.4528483152389526, "logps/chosen": -211.60166931152344, "logps/rejected": -330.83905029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.44937515258789, "rewards/margins": 13.099647521972656, "rewards/rejected": -22.549022674560547, "step": 4144 }, { "epoch": 6.65, "learning_rate": 1.4486722156163298e-07, "logits/chosen": -1.5248932838439941, "logits/rejected": -1.5117985010147095, "logps/chosen": -163.4835968017578, "logps/rejected": -303.4458312988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.524850845336914, "rewards/margins": 13.669477462768555, "rewards/rejected": -22.19432830810547, "step": 4145 }, { "epoch": 6.65, "learning_rate": 1.4476813317479191e-07, "logits/chosen": -1.3018198013305664, "logits/rejected": -1.2976622581481934, "logps/chosen": -190.58021545410156, "logps/rejected": -329.042236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.94289779663086, "rewards/margins": 12.605820655822754, "rewards/rejected": -23.548717498779297, "step": 4146 }, { "epoch": 6.66, "learning_rate": 1.4466904478795082e-07, "logits/chosen": -1.4294718503952026, "logits/rejected": -1.4945263862609863, "logps/chosen": -167.09747314453125, "logps/rejected": -332.243896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.193537712097168, "rewards/margins": 13.761260986328125, "rewards/rejected": -21.954795837402344, "step": 4147 }, { "epoch": 6.66, "learning_rate": 1.4456995640110978e-07, "logits/chosen": -1.2887673377990723, "logits/rejected": -1.3931349515914917, "logps/chosen": -119.58167266845703, "logps/rejected": -279.794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.480682373046875, "rewards/margins": 14.623363494873047, "rewards/rejected": -19.104045867919922, "step": 4148 }, { "epoch": 6.66, "learning_rate": 1.4447086801426871e-07, "logits/chosen": -1.628179907798767, "logits/rejected": -1.6781283617019653, "logps/chosen": -122.92680358886719, "logps/rejected": -259.0716552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9362754821777344, "rewards/margins": 13.408138275146484, "rewards/rejected": -17.34441375732422, "step": 4149 }, { "epoch": 6.66, "learning_rate": 1.4437177962742767e-07, "logits/chosen": -1.4635975360870361, "logits/rejected": -1.3925254344940186, "logps/chosen": -206.1355438232422, "logps/rejected": -343.0355529785156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.737168312072754, "rewards/margins": 15.306827545166016, "rewards/rejected": -26.043996810913086, "step": 4150 }, { "epoch": 6.66, "learning_rate": 1.442726912405866e-07, "logits/chosen": -1.5318611860275269, "logits/rejected": -1.5216847658157349, "logps/chosen": -177.15521240234375, "logps/rejected": -267.67376708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.672640800476074, "rewards/margins": 9.44110107421875, "rewards/rejected": -18.11374282836914, "step": 4151 }, { "epoch": 6.66, "learning_rate": 1.4417360285374551e-07, "logits/chosen": -1.4890094995498657, "logits/rejected": -1.5268622636795044, "logps/chosen": -187.4859619140625, "logps/rejected": -339.1991271972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.167099952697754, "rewards/margins": 13.180408477783203, "rewards/rejected": -23.34750747680664, "step": 4152 }, { "epoch": 6.67, "learning_rate": 1.4407451446690447e-07, "logits/chosen": -1.5438661575317383, "logits/rejected": -1.5583442449569702, "logps/chosen": -117.7491455078125, "logps/rejected": -252.84800720214844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.544641494750977, "rewards/margins": 13.375804901123047, "rewards/rejected": -17.920446395874023, "step": 4153 }, { "epoch": 6.67, "learning_rate": 1.439754260800634e-07, "logits/chosen": -1.5880905389785767, "logits/rejected": -1.5045795440673828, "logps/chosen": -162.87344360351562, "logps/rejected": -261.3790283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.113718032836914, "rewards/margins": 11.206376075744629, "rewards/rejected": -17.320093154907227, "step": 4154 }, { "epoch": 6.67, "learning_rate": 1.4387633769322237e-07, "logits/chosen": -1.6531448364257812, "logits/rejected": -1.6122077703475952, "logps/chosen": -144.02215576171875, "logps/rejected": -290.75042724609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.697759628295898, "rewards/margins": 14.717501640319824, "rewards/rejected": -20.41526222229004, "step": 4155 }, { "epoch": 6.67, "learning_rate": 1.437772493063813e-07, "logits/chosen": -1.3774375915527344, "logits/rejected": -1.505420207977295, "logps/chosen": -149.7346649169922, "logps/rejected": -306.6710205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.263072490692139, "rewards/margins": 14.269231796264648, "rewards/rejected": -21.532304763793945, "step": 4156 }, { "epoch": 6.67, "learning_rate": 1.436781609195402e-07, "logits/chosen": -1.4557645320892334, "logits/rejected": -1.4548401832580566, "logps/chosen": -188.98538208007812, "logps/rejected": -335.6321105957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.310656547546387, "rewards/margins": 14.009450912475586, "rewards/rejected": -24.320106506347656, "step": 4157 }, { "epoch": 6.67, "learning_rate": 1.4357907253269917e-07, "logits/chosen": -1.4381978511810303, "logits/rejected": -1.3880447149276733, "logps/chosen": -154.64810180664062, "logps/rejected": -276.7188415527344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.377471923828125, "rewards/margins": 11.651687622070312, "rewards/rejected": -19.029159545898438, "step": 4158 }, { "epoch": 6.68, "learning_rate": 1.434799841458581e-07, "logits/chosen": -1.67477285861969, "logits/rejected": -1.7095811367034912, "logps/chosen": -119.07569885253906, "logps/rejected": -299.6015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.857611656188965, "rewards/margins": 16.669761657714844, "rewards/rejected": -21.527372360229492, "step": 4159 }, { "epoch": 6.68, "learning_rate": 1.4338089575901703e-07, "logits/chosen": -1.6132476329803467, "logits/rejected": -1.6084840297698975, "logps/chosen": -158.45413208007812, "logps/rejected": -317.97760009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.818764686584473, "rewards/margins": 15.344642639160156, "rewards/rejected": -23.163410186767578, "step": 4160 }, { "epoch": 6.68, "learning_rate": 1.4328180737217597e-07, "logits/chosen": -1.3431742191314697, "logits/rejected": -1.4128085374832153, "logps/chosen": -168.3163604736328, "logps/rejected": -318.6066589355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.184720039367676, "rewards/margins": 14.864900588989258, "rewards/rejected": -22.049619674682617, "step": 4161 }, { "epoch": 6.68, "learning_rate": 1.431827189853349e-07, "logits/chosen": -1.73689866065979, "logits/rejected": -1.636826992034912, "logps/chosen": -138.83328247070312, "logps/rejected": -261.7895202636719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.030681133270264, "rewards/margins": 14.110193252563477, "rewards/rejected": -19.1408748626709, "step": 4162 }, { "epoch": 6.68, "learning_rate": 1.4308363059849386e-07, "logits/chosen": -1.4595438241958618, "logits/rejected": -1.5064449310302734, "logps/chosen": -159.46665954589844, "logps/rejected": -298.0342102050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.28271198272705, "rewards/margins": 12.710628509521484, "rewards/rejected": -20.99334144592285, "step": 4163 }, { "epoch": 6.68, "learning_rate": 1.429845422116528e-07, "logits/chosen": -1.6640092134475708, "logits/rejected": -1.6700853109359741, "logps/chosen": -154.46194458007812, "logps/rejected": -273.7508544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.393726825714111, "rewards/margins": 12.04390811920166, "rewards/rejected": -18.43763542175293, "step": 4164 }, { "epoch": 6.69, "learning_rate": 1.4288545382481173e-07, "logits/chosen": -1.547052264213562, "logits/rejected": -1.5542913675308228, "logps/chosen": -187.65841674804688, "logps/rejected": -341.79925537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.650487899780273, "rewards/margins": 15.992619514465332, "rewards/rejected": -24.643108367919922, "step": 4165 }, { "epoch": 6.69, "learning_rate": 1.4278636543797066e-07, "logits/chosen": -1.3973982334136963, "logits/rejected": -1.4237322807312012, "logps/chosen": -160.01058959960938, "logps/rejected": -298.11907958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.063535690307617, "rewards/margins": 12.777791976928711, "rewards/rejected": -19.841327667236328, "step": 4166 }, { "epoch": 6.69, "learning_rate": 1.426872770511296e-07, "logits/chosen": -1.5940396785736084, "logits/rejected": -1.5663610696792603, "logps/chosen": -144.07498168945312, "logps/rejected": -296.02740478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.173338890075684, "rewards/margins": 14.756559371948242, "rewards/rejected": -20.92989730834961, "step": 4167 }, { "epoch": 6.69, "learning_rate": 1.4258818866428853e-07, "logits/chosen": -1.42523193359375, "logits/rejected": -1.3911864757537842, "logps/chosen": -133.39654541015625, "logps/rejected": -233.8380889892578, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.8040876388549805, "rewards/margins": 11.721803665161133, "rewards/rejected": -17.525890350341797, "step": 4168 }, { "epoch": 6.69, "learning_rate": 1.4248910027744749e-07, "logits/chosen": -1.4825286865234375, "logits/rejected": -1.350489854812622, "logps/chosen": -195.60696411132812, "logps/rejected": -265.55499267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.542814254760742, "rewards/margins": 11.295408248901367, "rewards/rejected": -19.83822250366211, "step": 4169 }, { "epoch": 6.69, "learning_rate": 1.4239001189060642e-07, "logits/chosen": -1.6233148574829102, "logits/rejected": -1.6303476095199585, "logps/chosen": -145.54916381835938, "logps/rejected": -308.0374450683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.140379905700684, "rewards/margins": 16.378568649291992, "rewards/rejected": -21.51894760131836, "step": 4170 }, { "epoch": 6.7, "learning_rate": 1.4229092350376535e-07, "logits/chosen": -1.5391818284988403, "logits/rejected": -1.5154540538787842, "logps/chosen": -154.34364318847656, "logps/rejected": -301.5252380371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.073291778564453, "rewards/margins": 14.543349266052246, "rewards/rejected": -20.616641998291016, "step": 4171 }, { "epoch": 6.7, "learning_rate": 1.4219183511692429e-07, "logits/chosen": -1.488121747970581, "logits/rejected": -1.400504469871521, "logps/chosen": -142.19029235839844, "logps/rejected": -311.00469970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.470901012420654, "rewards/margins": 16.786785125732422, "rewards/rejected": -23.2576847076416, "step": 4172 }, { "epoch": 6.7, "learning_rate": 1.4209274673008322e-07, "logits/chosen": -1.5550674200057983, "logits/rejected": -1.5109450817108154, "logps/chosen": -130.66900634765625, "logps/rejected": -273.69476318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.430903434753418, "rewards/margins": 15.731758117675781, "rewards/rejected": -20.162660598754883, "step": 4173 }, { "epoch": 6.7, "learning_rate": 1.4199365834324218e-07, "logits/chosen": -1.4124763011932373, "logits/rejected": -1.4424049854278564, "logps/chosen": -142.48397827148438, "logps/rejected": -247.60086059570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.08627986907959, "rewards/margins": 12.552963256835938, "rewards/rejected": -17.639244079589844, "step": 4174 }, { "epoch": 6.7, "learning_rate": 1.418945699564011e-07, "logits/chosen": -1.4049806594848633, "logits/rejected": -1.4243189096450806, "logps/chosen": -188.0469207763672, "logps/rejected": -330.2589111328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.740144729614258, "rewards/margins": 10.832058906555176, "rewards/rejected": -21.57220458984375, "step": 4175 }, { "epoch": 6.7, "learning_rate": 1.4179548156956002e-07, "logits/chosen": -1.6565531492233276, "logits/rejected": -1.7107784748077393, "logps/chosen": -136.58639526367188, "logps/rejected": -307.6034240722656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.976609706878662, "rewards/margins": 17.073862075805664, "rewards/rejected": -22.05047035217285, "step": 4176 }, { "epoch": 6.7, "learning_rate": 1.4169639318271898e-07, "logits/chosen": -1.566839575767517, "logits/rejected": -1.6329126358032227, "logps/chosen": -106.99298095703125, "logps/rejected": -292.76776123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8332653045654297, "rewards/margins": 18.02234649658203, "rewards/rejected": -21.855613708496094, "step": 4177 }, { "epoch": 6.71, "learning_rate": 1.415973047958779e-07, "logits/chosen": -1.628218650817871, "logits/rejected": -1.558458924293518, "logps/chosen": -152.14390563964844, "logps/rejected": -279.933837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.546247959136963, "rewards/margins": 14.633050918579102, "rewards/rejected": -20.179298400878906, "step": 4178 }, { "epoch": 6.71, "learning_rate": 1.4149821640903687e-07, "logits/chosen": -1.5350837707519531, "logits/rejected": -1.5468717813491821, "logps/chosen": -203.40269470214844, "logps/rejected": -345.151123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.929265975952148, "rewards/margins": 14.341625213623047, "rewards/rejected": -23.270893096923828, "step": 4179 }, { "epoch": 6.71, "learning_rate": 1.4139912802219578e-07, "logits/chosen": -1.3904941082000732, "logits/rejected": -1.4066163301467896, "logps/chosen": -118.10150146484375, "logps/rejected": -252.86570739746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.503152370452881, "rewards/margins": 12.195387840270996, "rewards/rejected": -17.69853973388672, "step": 4180 }, { "epoch": 6.71, "learning_rate": 1.413000396353547e-07, "logits/chosen": -1.712864637374878, "logits/rejected": -1.6083701848983765, "logps/chosen": -178.15542602539062, "logps/rejected": -300.3887939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.919681549072266, "rewards/margins": 14.121657371520996, "rewards/rejected": -22.041339874267578, "step": 4181 }, { "epoch": 6.71, "learning_rate": 1.4120095124851367e-07, "logits/chosen": -1.4576765298843384, "logits/rejected": -1.5443769693374634, "logps/chosen": -159.44192504882812, "logps/rejected": -333.700439453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9225640296936035, "rewards/margins": 17.705093383789062, "rewards/rejected": -23.627656936645508, "step": 4182 }, { "epoch": 6.71, "learning_rate": 1.411018628616726e-07, "logits/chosen": -1.525565266609192, "logits/rejected": -1.4863661527633667, "logps/chosen": -176.59548950195312, "logps/rejected": -347.2818908691406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.023497581481934, "rewards/margins": 15.380790710449219, "rewards/rejected": -24.40428924560547, "step": 4183 }, { "epoch": 6.72, "learning_rate": 1.4100277447483157e-07, "logits/chosen": -1.5409024953842163, "logits/rejected": -1.554563045501709, "logps/chosen": -144.6818084716797, "logps/rejected": -256.3326416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.2471771240234375, "rewards/margins": 13.475988388061523, "rewards/rejected": -18.72316551208496, "step": 4184 }, { "epoch": 6.72, "learning_rate": 1.4090368608799047e-07, "logits/chosen": -1.5026435852050781, "logits/rejected": -1.5118991136550903, "logps/chosen": -161.51602172851562, "logps/rejected": -296.0475769042969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.294412612915039, "rewards/margins": 13.584609985351562, "rewards/rejected": -21.8790225982666, "step": 4185 }, { "epoch": 6.72, "learning_rate": 1.408045977011494e-07, "logits/chosen": -1.3960626125335693, "logits/rejected": -1.485150694847107, "logps/chosen": -182.22268676757812, "logps/rejected": -351.0351867675781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.836528778076172, "rewards/margins": 16.27189826965332, "rewards/rejected": -26.10842514038086, "step": 4186 }, { "epoch": 6.72, "learning_rate": 1.4070550931430837e-07, "logits/chosen": -1.7648355960845947, "logits/rejected": -1.7423571348190308, "logps/chosen": -99.65127563476562, "logps/rejected": -260.027587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.537397861480713, "rewards/margins": 15.914102554321289, "rewards/rejected": -17.451499938964844, "step": 4187 }, { "epoch": 6.72, "learning_rate": 1.406064209274673e-07, "logits/chosen": -1.6282294988632202, "logits/rejected": -1.5688549280166626, "logps/chosen": -147.1982421875, "logps/rejected": -273.07373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.969296455383301, "rewards/margins": 12.682306289672852, "rewards/rejected": -18.65160369873047, "step": 4188 }, { "epoch": 6.72, "learning_rate": 1.4050733254062623e-07, "logits/chosen": -1.6054474115371704, "logits/rejected": -1.4427701234817505, "logps/chosen": -175.18902587890625, "logps/rejected": -264.67578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.52860689163208, "rewards/margins": 11.988675117492676, "rewards/rejected": -18.51728057861328, "step": 4189 }, { "epoch": 6.73, "learning_rate": 1.4040824415378517e-07, "logits/chosen": -1.4576466083526611, "logits/rejected": -1.4606050252914429, "logps/chosen": -96.95005798339844, "logps/rejected": -286.34759521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3959801197052, "rewards/margins": 18.908754348754883, "rewards/rejected": -22.30473518371582, "step": 4190 }, { "epoch": 6.73, "learning_rate": 1.403091557669441e-07, "logits/chosen": -1.4272748231887817, "logits/rejected": -1.3189821243286133, "logps/chosen": -179.88076782226562, "logps/rejected": -301.047607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.990093231201172, "rewards/margins": 12.916566848754883, "rewards/rejected": -21.906660079956055, "step": 4191 }, { "epoch": 6.73, "learning_rate": 1.4021006738010306e-07, "logits/chosen": -1.3107473850250244, "logits/rejected": -1.4377899169921875, "logps/chosen": -126.6053466796875, "logps/rejected": -317.40521240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.537108898162842, "rewards/margins": 16.6176700592041, "rewards/rejected": -22.1547794342041, "step": 4192 }, { "epoch": 6.73, "learning_rate": 1.40110978993262e-07, "logits/chosen": -1.4734348058700562, "logits/rejected": -1.4387104511260986, "logps/chosen": -133.36573791503906, "logps/rejected": -292.3707275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.8218231201171875, "rewards/margins": 14.826764106750488, "rewards/rejected": -20.648588180541992, "step": 4193 }, { "epoch": 6.73, "learning_rate": 1.4001189060642092e-07, "logits/chosen": -1.344437599182129, "logits/rejected": -1.2959506511688232, "logps/chosen": -141.65847778320312, "logps/rejected": -263.14752197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.050051689147949, "rewards/margins": 12.05284595489502, "rewards/rejected": -18.10289764404297, "step": 4194 }, { "epoch": 6.73, "learning_rate": 1.3991280221957986e-07, "logits/chosen": -1.5837688446044922, "logits/rejected": -1.6362450122833252, "logps/chosen": -164.06332397460938, "logps/rejected": -358.36041259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.501842498779297, "rewards/margins": 17.540061950683594, "rewards/rejected": -24.04190444946289, "step": 4195 }, { "epoch": 6.74, "learning_rate": 1.398137138327388e-07, "logits/chosen": -1.6112642288208008, "logits/rejected": -1.573140025138855, "logps/chosen": -164.20797729492188, "logps/rejected": -275.2939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.3556671142578125, "rewards/margins": 12.353065490722656, "rewards/rejected": -19.70873260498047, "step": 4196 }, { "epoch": 6.74, "learning_rate": 1.3971462544589775e-07, "logits/chosen": -1.4641635417938232, "logits/rejected": -1.4328367710113525, "logps/chosen": -159.75201416015625, "logps/rejected": -280.0947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.2330827713012695, "rewards/margins": 13.1507568359375, "rewards/rejected": -20.383840560913086, "step": 4197 }, { "epoch": 6.74, "learning_rate": 1.3961553705905668e-07, "logits/chosen": -1.5324251651763916, "logits/rejected": -1.5155521631240845, "logps/chosen": -109.0118179321289, "logps/rejected": -241.6544647216797, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.1436262130737305, "rewards/margins": 13.517034530639648, "rewards/rejected": -17.660659790039062, "step": 4198 }, { "epoch": 6.74, "learning_rate": 1.395164486722156e-07, "logits/chosen": -1.5373634099960327, "logits/rejected": -1.570500135421753, "logps/chosen": -195.3729248046875, "logps/rejected": -345.05096435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.909837245941162, "rewards/margins": 14.379840850830078, "rewards/rejected": -22.289676666259766, "step": 4199 }, { "epoch": 6.74, "learning_rate": 1.3941736028537455e-07, "logits/chosen": -1.4142670631408691, "logits/rejected": -1.3718929290771484, "logps/chosen": -176.2945556640625, "logps/rejected": -275.48077392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.357383728027344, "rewards/margins": 11.141830444335938, "rewards/rejected": -19.49921417236328, "step": 4200 }, { "epoch": 6.74, "learning_rate": 1.3931827189853348e-07, "logits/chosen": -1.416426181793213, "logits/rejected": -1.3661353588104248, "logps/chosen": -180.42660522460938, "logps/rejected": -301.6062927246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.546095848083496, "rewards/margins": 12.149117469787598, "rewards/rejected": -20.695213317871094, "step": 4201 }, { "epoch": 6.74, "learning_rate": 1.3921918351169242e-07, "logits/chosen": -1.502349853515625, "logits/rejected": -1.5710465908050537, "logps/chosen": -165.03587341308594, "logps/rejected": -269.8828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.48328971862793, "rewards/margins": 12.111289978027344, "rewards/rejected": -19.594579696655273, "step": 4202 }, { "epoch": 6.75, "learning_rate": 1.3912009512485138e-07, "logits/chosen": -1.471245288848877, "logits/rejected": -1.5619478225708008, "logps/chosen": -199.80055236816406, "logps/rejected": -381.7022399902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.83605670928955, "rewards/margins": 15.710522651672363, "rewards/rejected": -25.54657745361328, "step": 4203 }, { "epoch": 6.75, "learning_rate": 1.3902100673801028e-07, "logits/chosen": -1.6255221366882324, "logits/rejected": -1.5882902145385742, "logps/chosen": -114.98757934570312, "logps/rejected": -282.79827880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2930526733398438, "rewards/margins": 17.85187530517578, "rewards/rejected": -21.144927978515625, "step": 4204 }, { "epoch": 6.75, "learning_rate": 1.3892191835116924e-07, "logits/chosen": -1.355288028717041, "logits/rejected": -1.3818347454071045, "logps/chosen": -84.55691528320312, "logps/rejected": -225.534423828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8436410427093506, "rewards/margins": 12.789389610290527, "rewards/rejected": -15.633030891418457, "step": 4205 }, { "epoch": 6.75, "learning_rate": 1.3882282996432818e-07, "logits/chosen": -1.4046738147735596, "logits/rejected": -1.3920999765396118, "logps/chosen": -169.51559448242188, "logps/rejected": -317.60003662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.968656539916992, "rewards/margins": 14.347142219543457, "rewards/rejected": -23.315799713134766, "step": 4206 }, { "epoch": 6.75, "learning_rate": 1.387237415774871e-07, "logits/chosen": -1.3879774808883667, "logits/rejected": -1.3791463375091553, "logps/chosen": -185.00335693359375, "logps/rejected": -296.15008544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.037274360656738, "rewards/margins": 11.37918472290039, "rewards/rejected": -20.416458129882812, "step": 4207 }, { "epoch": 6.75, "learning_rate": 1.3862465319064607e-07, "logits/chosen": -1.5228712558746338, "logits/rejected": -1.5581618547439575, "logps/chosen": -145.5684356689453, "logps/rejected": -322.1065673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.948997497558594, "rewards/margins": 15.489208221435547, "rewards/rejected": -21.43820571899414, "step": 4208 }, { "epoch": 6.76, "learning_rate": 1.3852556480380498e-07, "logits/chosen": -1.4421032667160034, "logits/rejected": -1.477669596672058, "logps/chosen": -199.07562255859375, "logps/rejected": -302.7087097167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.027076721191406, "rewards/margins": 9.495296478271484, "rewards/rejected": -20.52237319946289, "step": 4209 }, { "epoch": 6.76, "learning_rate": 1.384264764169639e-07, "logits/chosen": -1.542765498161316, "logits/rejected": -1.5292882919311523, "logps/chosen": -128.90863037109375, "logps/rejected": -249.88949584960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.555241823196411, "rewards/margins": 13.459095001220703, "rewards/rejected": -16.01433563232422, "step": 4210 }, { "epoch": 6.76, "learning_rate": 1.3832738803012287e-07, "logits/chosen": -1.3053468465805054, "logits/rejected": -1.4114680290222168, "logps/chosen": -185.9967498779297, "logps/rejected": -381.86553955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.74640941619873, "rewards/margins": 14.839594841003418, "rewards/rejected": -23.58600425720215, "step": 4211 }, { "epoch": 6.76, "learning_rate": 1.382282996432818e-07, "logits/chosen": -1.5004873275756836, "logits/rejected": -1.3799601793289185, "logps/chosen": -159.48114013671875, "logps/rejected": -239.1923370361328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.310884475708008, "rewards/margins": 12.190966606140137, "rewards/rejected": -17.501850128173828, "step": 4212 }, { "epoch": 6.76, "learning_rate": 1.3812921125644074e-07, "logits/chosen": -1.6431342363357544, "logits/rejected": -1.6274620294570923, "logps/chosen": -152.58880615234375, "logps/rejected": -253.568603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.372913360595703, "rewards/margins": 11.662158012390137, "rewards/rejected": -18.035072326660156, "step": 4213 }, { "epoch": 6.76, "learning_rate": 1.3803012286959967e-07, "logits/chosen": -1.406298041343689, "logits/rejected": -1.4026992321014404, "logps/chosen": -152.84335327148438, "logps/rejected": -295.94561767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.797030925750732, "rewards/margins": 14.592645645141602, "rewards/rejected": -21.389677047729492, "step": 4214 }, { "epoch": 6.77, "learning_rate": 1.379310344827586e-07, "logits/chosen": -1.3578693866729736, "logits/rejected": -1.3330128192901611, "logps/chosen": -178.56866455078125, "logps/rejected": -332.51531982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.198840141296387, "rewards/margins": 17.167245864868164, "rewards/rejected": -25.366086959838867, "step": 4215 }, { "epoch": 6.77, "learning_rate": 1.3783194609591756e-07, "logits/chosen": -1.646689534187317, "logits/rejected": -1.7635502815246582, "logps/chosen": -121.24423217773438, "logps/rejected": -271.32440185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.927326202392578, "rewards/margins": 14.422897338867188, "rewards/rejected": -19.350223541259766, "step": 4216 }, { "epoch": 6.77, "learning_rate": 1.377328577090765e-07, "logits/chosen": -1.3439942598342896, "logits/rejected": -1.313960075378418, "logps/chosen": -142.451416015625, "logps/rejected": -256.6858215332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.165729522705078, "rewards/margins": 12.596446990966797, "rewards/rejected": -18.762176513671875, "step": 4217 }, { "epoch": 6.77, "learning_rate": 1.376337693222354e-07, "logits/chosen": -1.4488309621810913, "logits/rejected": -1.455206274986267, "logps/chosen": -124.57482147216797, "logps/rejected": -352.192138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6294145584106445, "rewards/margins": 20.568099975585938, "rewards/rejected": -26.1975154876709, "step": 4218 }, { "epoch": 6.77, "learning_rate": 1.3753468093539436e-07, "logits/chosen": -1.481250286102295, "logits/rejected": -1.5213878154754639, "logps/chosen": -92.03909301757812, "logps/rejected": -273.85400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.907594919204712, "rewards/margins": 17.696212768554688, "rewards/rejected": -19.603809356689453, "step": 4219 }, { "epoch": 6.77, "learning_rate": 1.374355925485533e-07, "logits/chosen": -1.4872835874557495, "logits/rejected": -1.4210989475250244, "logps/chosen": -131.97666931152344, "logps/rejected": -246.2215118408203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.808626174926758, "rewards/margins": 12.157038688659668, "rewards/rejected": -17.965665817260742, "step": 4220 }, { "epoch": 6.78, "learning_rate": 1.3733650416171226e-07, "logits/chosen": -1.4906489849090576, "logits/rejected": -1.5348397493362427, "logps/chosen": -178.25982666015625, "logps/rejected": -323.1091003417969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.397759914398193, "rewards/margins": 14.381185531616211, "rewards/rejected": -21.778945922851562, "step": 4221 }, { "epoch": 6.78, "learning_rate": 1.372374157748712e-07, "logits/chosen": -1.367074728012085, "logits/rejected": -1.4018521308898926, "logps/chosen": -159.77740478515625, "logps/rejected": -316.6710205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.5631303787231445, "rewards/margins": 14.684538841247559, "rewards/rejected": -22.247669219970703, "step": 4222 }, { "epoch": 6.78, "learning_rate": 1.371383273880301e-07, "logits/chosen": -1.2958322763442993, "logits/rejected": -1.341019868850708, "logps/chosen": -139.02597045898438, "logps/rejected": -310.98291015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.969642639160156, "rewards/margins": 14.775911331176758, "rewards/rejected": -21.745555877685547, "step": 4223 }, { "epoch": 6.78, "learning_rate": 1.3703923900118906e-07, "logits/chosen": -1.4638042449951172, "logits/rejected": -1.5520609617233276, "logps/chosen": -174.69342041015625, "logps/rejected": -304.9420166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.791754245758057, "rewards/margins": 12.500545501708984, "rewards/rejected": -19.292301177978516, "step": 4224 }, { "epoch": 6.78, "learning_rate": 1.36940150614348e-07, "logits/chosen": -1.3869901895523071, "logits/rejected": -1.4309580326080322, "logps/chosen": -160.62710571289062, "logps/rejected": -301.4296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.455282211303711, "rewards/margins": 12.66466999053955, "rewards/rejected": -20.119953155517578, "step": 4225 }, { "epoch": 6.78, "learning_rate": 1.3684106222750695e-07, "logits/chosen": -1.607903242111206, "logits/rejected": -1.6546087265014648, "logps/chosen": -92.54154968261719, "logps/rejected": -250.07321166992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.969646692276001, "rewards/margins": 14.390527725219727, "rewards/rejected": -17.360172271728516, "step": 4226 }, { "epoch": 6.78, "learning_rate": 1.3674197384066588e-07, "logits/chosen": -1.4558486938476562, "logits/rejected": -1.5134347677230835, "logps/chosen": -147.99209594726562, "logps/rejected": -322.4406433105469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.608140468597412, "rewards/margins": 15.668767929077148, "rewards/rejected": -20.27690887451172, "step": 4227 }, { "epoch": 6.79, "learning_rate": 1.366428854538248e-07, "logits/chosen": -1.452075481414795, "logits/rejected": -1.4377785921096802, "logps/chosen": -221.9173126220703, "logps/rejected": -350.39691162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.757213592529297, "rewards/margins": 11.450274467468262, "rewards/rejected": -25.207489013671875, "step": 4228 }, { "epoch": 6.79, "learning_rate": 1.3654379706698375e-07, "logits/chosen": -1.5230522155761719, "logits/rejected": -1.609006404876709, "logps/chosen": -186.92562866210938, "logps/rejected": -365.34649658203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.43431282043457, "rewards/margins": 16.729055404663086, "rewards/rejected": -26.163368225097656, "step": 4229 }, { "epoch": 6.79, "learning_rate": 1.3644470868014268e-07, "logits/chosen": -1.4394872188568115, "logits/rejected": -1.4731945991516113, "logps/chosen": -126.47897338867188, "logps/rejected": -329.49090576171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.604137897491455, "rewards/margins": 18.451519012451172, "rewards/rejected": -23.0556583404541, "step": 4230 }, { "epoch": 6.79, "learning_rate": 1.3634562029330162e-07, "logits/chosen": -1.4288427829742432, "logits/rejected": -1.3802759647369385, "logps/chosen": -125.47401428222656, "logps/rejected": -231.2393798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.3236517906188965, "rewards/margins": 12.081584930419922, "rewards/rejected": -16.405237197875977, "step": 4231 }, { "epoch": 6.79, "learning_rate": 1.3624653190646055e-07, "logits/chosen": -1.4611788988113403, "logits/rejected": -1.5623860359191895, "logps/chosen": -125.67829132080078, "logps/rejected": -285.93658447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.990811824798584, "rewards/margins": 15.615158081054688, "rewards/rejected": -20.60597038269043, "step": 4232 }, { "epoch": 6.79, "learning_rate": 1.3614744351961948e-07, "logits/chosen": -1.3753681182861328, "logits/rejected": -1.3948185443878174, "logps/chosen": -190.4010772705078, "logps/rejected": -320.72198486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.330679893493652, "rewards/margins": 12.345712661743164, "rewards/rejected": -22.676393508911133, "step": 4233 }, { "epoch": 6.8, "learning_rate": 1.3604835513277844e-07, "logits/chosen": -1.4113807678222656, "logits/rejected": -1.339273452758789, "logps/chosen": -203.28053283691406, "logps/rejected": -315.11187744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.20056438446045, "rewards/margins": 11.58791732788086, "rewards/rejected": -21.788480758666992, "step": 4234 }, { "epoch": 6.8, "learning_rate": 1.3594926674593738e-07, "logits/chosen": -1.5670324563980103, "logits/rejected": -1.6419540643692017, "logps/chosen": -109.01220703125, "logps/rejected": -258.7642822265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8818726539611816, "rewards/margins": 12.2427978515625, "rewards/rejected": -16.124670028686523, "step": 4235 }, { "epoch": 6.8, "learning_rate": 1.358501783590963e-07, "logits/chosen": -1.4653048515319824, "logits/rejected": -1.5415207147598267, "logps/chosen": -133.68951416015625, "logps/rejected": -271.817138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.913015365600586, "rewards/margins": 12.05225944519043, "rewards/rejected": -18.965274810791016, "step": 4236 }, { "epoch": 6.8, "learning_rate": 1.3575108997225524e-07, "logits/chosen": -1.3659536838531494, "logits/rejected": -1.342930793762207, "logps/chosen": -117.63931274414062, "logps/rejected": -216.10165405273438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.422656536102295, "rewards/margins": 11.153700828552246, "rewards/rejected": -15.576356887817383, "step": 4237 }, { "epoch": 6.8, "learning_rate": 1.3565200158541418e-07, "logits/chosen": -1.392491102218628, "logits/rejected": -1.4203555583953857, "logps/chosen": -156.10675048828125, "logps/rejected": -298.53973388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.859453201293945, "rewards/margins": 13.579512596130371, "rewards/rejected": -20.438966751098633, "step": 4238 }, { "epoch": 6.8, "learning_rate": 1.355529131985731e-07, "logits/chosen": -1.444943904876709, "logits/rejected": -1.3559720516204834, "logps/chosen": -185.278076171875, "logps/rejected": -307.0498046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.274322509765625, "rewards/margins": 13.988917350769043, "rewards/rejected": -22.263240814208984, "step": 4239 }, { "epoch": 6.81, "learning_rate": 1.3545382481173207e-07, "logits/chosen": -1.5708236694335938, "logits/rejected": -1.545567512512207, "logps/chosen": -139.77146911621094, "logps/rejected": -270.19512939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.674892902374268, "rewards/margins": 13.2164306640625, "rewards/rejected": -18.89132308959961, "step": 4240 }, { "epoch": 6.81, "learning_rate": 1.35354736424891e-07, "logits/chosen": -1.382473349571228, "logits/rejected": -1.5197217464447021, "logps/chosen": -89.27389526367188, "logps/rejected": -238.52078247070312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6011040210723877, "rewards/margins": 13.426082611083984, "rewards/rejected": -17.02718734741211, "step": 4241 }, { "epoch": 6.81, "learning_rate": 1.3525564803804993e-07, "logits/chosen": -1.5791114568710327, "logits/rejected": -1.6141993999481201, "logps/chosen": -192.59625244140625, "logps/rejected": -286.80072021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.338299751281738, "rewards/margins": 10.783578872680664, "rewards/rejected": -20.121877670288086, "step": 4242 }, { "epoch": 6.81, "learning_rate": 1.3515655965120887e-07, "logits/chosen": -1.5115331411361694, "logits/rejected": -1.51179039478302, "logps/chosen": -171.96302795410156, "logps/rejected": -334.5011291503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.919258117675781, "rewards/margins": 16.53603744506836, "rewards/rejected": -24.45529556274414, "step": 4243 }, { "epoch": 6.81, "learning_rate": 1.350574712643678e-07, "logits/chosen": -1.4239180088043213, "logits/rejected": -1.4461281299591064, "logps/chosen": -102.62583923339844, "logps/rejected": -223.0908966064453, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8368186950683594, "rewards/margins": 12.051172256469727, "rewards/rejected": -15.887989044189453, "step": 4244 }, { "epoch": 6.81, "learning_rate": 1.3495838287752676e-07, "logits/chosen": -1.4536802768707275, "logits/rejected": -1.442291498184204, "logps/chosen": -135.4903564453125, "logps/rejected": -233.75701904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.6245832443237305, "rewards/margins": 10.887350082397461, "rewards/rejected": -16.511934280395508, "step": 4245 }, { "epoch": 6.82, "learning_rate": 1.348592944906857e-07, "logits/chosen": -1.4449113607406616, "logits/rejected": -1.4207689762115479, "logps/chosen": -229.13827514648438, "logps/rejected": -321.19879150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.323939323425293, "rewards/margins": 12.243558883666992, "rewards/rejected": -23.56749725341797, "step": 4246 }, { "epoch": 6.82, "learning_rate": 1.3476020610384463e-07, "logits/chosen": -1.307510256767273, "logits/rejected": -1.3112220764160156, "logps/chosen": -165.0018310546875, "logps/rejected": -280.0767517089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.487719535827637, "rewards/margins": 13.49551773071289, "rewards/rejected": -20.983238220214844, "step": 4247 }, { "epoch": 6.82, "learning_rate": 1.3466111771700356e-07, "logits/chosen": -1.545485258102417, "logits/rejected": -1.4272127151489258, "logps/chosen": -216.63613891601562, "logps/rejected": -303.2686767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.721135139465332, "rewards/margins": 11.529053688049316, "rewards/rejected": -22.25018882751465, "step": 4248 }, { "epoch": 6.82, "learning_rate": 1.345620293301625e-07, "logits/chosen": -1.4033474922180176, "logits/rejected": -1.3280503749847412, "logps/chosen": -159.3905029296875, "logps/rejected": -256.2696533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.223450183868408, "rewards/margins": 11.601327896118164, "rewards/rejected": -17.824777603149414, "step": 4249 }, { "epoch": 6.82, "learning_rate": 1.3446294094332145e-07, "logits/chosen": -1.4835851192474365, "logits/rejected": -1.4732414484024048, "logps/chosen": -155.80023193359375, "logps/rejected": -312.1395263671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.359098434448242, "rewards/margins": 14.757181167602539, "rewards/rejected": -22.11627960205078, "step": 4250 }, { "epoch": 6.82, "learning_rate": 1.3436385255648036e-07, "logits/chosen": -1.4709765911102295, "logits/rejected": -1.4556918144226074, "logps/chosen": -158.65200805664062, "logps/rejected": -336.28106689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.515510559082031, "rewards/margins": 16.66599464416504, "rewards/rejected": -23.18150520324707, "step": 4251 }, { "epoch": 6.83, "learning_rate": 1.342647641696393e-07, "logits/chosen": -1.579154372215271, "logits/rejected": -1.5992822647094727, "logps/chosen": -145.5520782470703, "logps/rejected": -269.52178955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.703699588775635, "rewards/margins": 12.847330093383789, "rewards/rejected": -18.551029205322266, "step": 4252 }, { "epoch": 6.83, "learning_rate": 1.3416567578279825e-07, "logits/chosen": -1.5302876234054565, "logits/rejected": -1.4907629489898682, "logps/chosen": -167.96246337890625, "logps/rejected": -247.34103393554688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.5380659103393555, "rewards/margins": 9.099124908447266, "rewards/rejected": -16.637189865112305, "step": 4253 }, { "epoch": 6.83, "learning_rate": 1.340665873959572e-07, "logits/chosen": -1.4703125953674316, "logits/rejected": -1.5003622770309448, "logps/chosen": -110.11369323730469, "logps/rejected": -228.05165100097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.230956554412842, "rewards/margins": 11.44955062866211, "rewards/rejected": -16.68050765991211, "step": 4254 }, { "epoch": 6.83, "learning_rate": 1.3396749900911615e-07, "logits/chosen": -1.5509713888168335, "logits/rejected": -1.5155510902404785, "logps/chosen": -115.55642700195312, "logps/rejected": -217.81393432617188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.608709335327148, "rewards/margins": 11.265159606933594, "rewards/rejected": -15.873870849609375, "step": 4255 }, { "epoch": 6.83, "learning_rate": 1.3386841062227505e-07, "logits/chosen": -1.3940749168395996, "logits/rejected": -1.4406899213790894, "logps/chosen": -180.99130249023438, "logps/rejected": -315.6795349121094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.061578750610352, "rewards/margins": 12.910501480102539, "rewards/rejected": -22.97208023071289, "step": 4256 }, { "epoch": 6.83, "learning_rate": 1.33769322235434e-07, "logits/chosen": -1.4535274505615234, "logits/rejected": -1.528532862663269, "logps/chosen": -157.77195739746094, "logps/rejected": -293.68701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.224557876586914, "rewards/margins": 11.541576385498047, "rewards/rejected": -19.76613426208496, "step": 4257 }, { "epoch": 6.83, "learning_rate": 1.3367023384859295e-07, "logits/chosen": -1.4985743761062622, "logits/rejected": -1.4585516452789307, "logps/chosen": -206.73086547851562, "logps/rejected": -338.92230224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.333765029907227, "rewards/margins": 14.604330062866211, "rewards/rejected": -25.938095092773438, "step": 4258 }, { "epoch": 6.84, "learning_rate": 1.3357114546175188e-07, "logits/chosen": -1.5046782493591309, "logits/rejected": -1.495187759399414, "logps/chosen": -160.5237579345703, "logps/rejected": -296.5344543457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.188738822937012, "rewards/margins": 13.967012405395508, "rewards/rejected": -21.155752182006836, "step": 4259 }, { "epoch": 6.84, "learning_rate": 1.3347205707491081e-07, "logits/chosen": -1.5589667558670044, "logits/rejected": -1.6188794374465942, "logps/chosen": -135.3613739013672, "logps/rejected": -298.66302490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.690541744232178, "rewards/margins": 13.391782760620117, "rewards/rejected": -20.082324981689453, "step": 4260 }, { "epoch": 6.84, "learning_rate": 1.3337296868806975e-07, "logits/chosen": -1.4349784851074219, "logits/rejected": -1.4999220371246338, "logps/chosen": -150.71524047851562, "logps/rejected": -334.1357727050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.589204788208008, "rewards/margins": 17.417346954345703, "rewards/rejected": -25.006553649902344, "step": 4261 }, { "epoch": 6.84, "learning_rate": 1.3327388030122868e-07, "logits/chosen": -1.5153224468231201, "logits/rejected": -1.3695610761642456, "logps/chosen": -179.51065063476562, "logps/rejected": -256.43719482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.449282169342041, "rewards/margins": 11.090581893920898, "rewards/rejected": -18.53986358642578, "step": 4262 }, { "epoch": 6.84, "learning_rate": 1.3317479191438764e-07, "logits/chosen": -1.5446449518203735, "logits/rejected": -1.5583688020706177, "logps/chosen": -142.17074584960938, "logps/rejected": -327.21966552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.682271480560303, "rewards/margins": 17.637310028076172, "rewards/rejected": -23.319583892822266, "step": 4263 }, { "epoch": 6.84, "learning_rate": 1.3307570352754657e-07, "logits/chosen": -1.3670862913131714, "logits/rejected": -1.4265691041946411, "logps/chosen": -183.40567016601562, "logps/rejected": -296.88665771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.634556770324707, "rewards/margins": 11.373229026794434, "rewards/rejected": -21.00778579711914, "step": 4264 }, { "epoch": 6.85, "learning_rate": 1.329766151407055e-07, "logits/chosen": -1.535935401916504, "logits/rejected": -1.5516026020050049, "logps/chosen": -174.38327026367188, "logps/rejected": -303.5965270996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.910573959350586, "rewards/margins": 12.454301834106445, "rewards/rejected": -21.36487579345703, "step": 4265 }, { "epoch": 6.85, "learning_rate": 1.3287752675386444e-07, "logits/chosen": -1.4564298391342163, "logits/rejected": -1.4800231456756592, "logps/chosen": -149.65760803222656, "logps/rejected": -288.11639404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.360375881195068, "rewards/margins": 13.698225975036621, "rewards/rejected": -21.05860137939453, "step": 4266 }, { "epoch": 6.85, "learning_rate": 1.3277843836702337e-07, "logits/chosen": -1.3553333282470703, "logits/rejected": -1.3605999946594238, "logps/chosen": -191.07833862304688, "logps/rejected": -290.35400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.556795120239258, "rewards/margins": 11.366944313049316, "rewards/rejected": -20.923738479614258, "step": 4267 }, { "epoch": 6.85, "learning_rate": 1.3267934998018233e-07, "logits/chosen": -1.498841404914856, "logits/rejected": -1.5227713584899902, "logps/chosen": -143.8974609375, "logps/rejected": -300.13287353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.49031400680542, "rewards/margins": 15.746574401855469, "rewards/rejected": -22.236888885498047, "step": 4268 }, { "epoch": 6.85, "learning_rate": 1.3258026159334127e-07, "logits/chosen": -1.5693347454071045, "logits/rejected": -1.6507294178009033, "logps/chosen": -120.5625, "logps/rejected": -295.6064453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.028434753417969, "rewards/margins": 14.605969429016113, "rewards/rejected": -19.6344051361084, "step": 4269 }, { "epoch": 6.85, "learning_rate": 1.3248117320650017e-07, "logits/chosen": -1.4581916332244873, "logits/rejected": -1.4435745477676392, "logps/chosen": -106.74722290039062, "logps/rejected": -270.0750732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.826873540878296, "rewards/margins": 17.50199317932129, "rewards/rejected": -20.32886505126953, "step": 4270 }, { "epoch": 6.86, "learning_rate": 1.3238208481965913e-07, "logits/chosen": -1.4398397207260132, "logits/rejected": -1.4245291948318481, "logps/chosen": -154.8601531982422, "logps/rejected": -303.6233825683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.262284755706787, "rewards/margins": 13.917516708374023, "rewards/rejected": -20.17980194091797, "step": 4271 }, { "epoch": 6.86, "learning_rate": 1.3228299643281807e-07, "logits/chosen": -1.5410716533660889, "logits/rejected": -1.569474220275879, "logps/chosen": -159.18771362304688, "logps/rejected": -357.85089111328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.405728340148926, "rewards/margins": 18.594573974609375, "rewards/rejected": -26.000303268432617, "step": 4272 }, { "epoch": 6.86, "learning_rate": 1.32183908045977e-07, "logits/chosen": -1.4270234107971191, "logits/rejected": -1.537653923034668, "logps/chosen": -176.86102294921875, "logps/rejected": -385.6090087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.321099281311035, "rewards/margins": 18.798484802246094, "rewards/rejected": -28.119586944580078, "step": 4273 }, { "epoch": 6.86, "learning_rate": 1.3208481965913596e-07, "logits/chosen": -1.4427878856658936, "logits/rejected": -1.4544813632965088, "logps/chosen": -224.9772491455078, "logps/rejected": -332.191650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.838093757629395, "rewards/margins": 12.031434059143066, "rewards/rejected": -24.86952781677246, "step": 4274 }, { "epoch": 6.86, "learning_rate": 1.3198573127229487e-07, "logits/chosen": -1.4862805604934692, "logits/rejected": -1.5148365497589111, "logps/chosen": -123.48849487304688, "logps/rejected": -270.2886962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.962425231933594, "rewards/margins": 13.866817474365234, "rewards/rejected": -19.829242706298828, "step": 4275 }, { "epoch": 6.86, "learning_rate": 1.3188664288545383e-07, "logits/chosen": -1.6777883768081665, "logits/rejected": -1.7043771743774414, "logps/chosen": -166.06011962890625, "logps/rejected": -279.2103271484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.672287940979004, "rewards/margins": 11.566886901855469, "rewards/rejected": -19.239173889160156, "step": 4276 }, { "epoch": 6.87, "learning_rate": 1.3178755449861276e-07, "logits/chosen": -1.5696409940719604, "logits/rejected": -1.4400540590286255, "logps/chosen": -153.15509033203125, "logps/rejected": -281.15252685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.518252372741699, "rewards/margins": 13.754487037658691, "rewards/rejected": -20.27273941040039, "step": 4277 }, { "epoch": 6.87, "learning_rate": 1.316884661117717e-07, "logits/chosen": -1.5224648714065552, "logits/rejected": -1.5814319849014282, "logps/chosen": -164.186279296875, "logps/rejected": -332.18701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.82908821105957, "rewards/margins": 16.91836166381836, "rewards/rejected": -23.747451782226562, "step": 4278 }, { "epoch": 6.87, "learning_rate": 1.3158937772493065e-07, "logits/chosen": -1.4290246963500977, "logits/rejected": -1.5582456588745117, "logps/chosen": -179.80572509765625, "logps/rejected": -322.2653503417969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.820511817932129, "rewards/margins": 13.470190048217773, "rewards/rejected": -20.290700912475586, "step": 4279 }, { "epoch": 6.87, "learning_rate": 1.3149028933808956e-07, "logits/chosen": -1.3676645755767822, "logits/rejected": -1.4288015365600586, "logps/chosen": -137.02755737304688, "logps/rejected": -278.9418640136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.417818546295166, "rewards/margins": 13.079514503479004, "rewards/rejected": -17.497333526611328, "step": 4280 }, { "epoch": 6.87, "learning_rate": 1.313912009512485e-07, "logits/chosen": -1.5556520223617554, "logits/rejected": -1.583845853805542, "logps/chosen": -151.59805297851562, "logps/rejected": -315.99078369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.469965934753418, "rewards/margins": 16.242353439331055, "rewards/rejected": -23.712318420410156, "step": 4281 }, { "epoch": 6.87, "learning_rate": 1.3129211256440745e-07, "logits/chosen": -1.4821794033050537, "logits/rejected": -1.5267000198364258, "logps/chosen": -147.068115234375, "logps/rejected": -283.7849426269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.342219829559326, "rewards/margins": 13.891756057739258, "rewards/rejected": -20.233976364135742, "step": 4282 }, { "epoch": 6.87, "learning_rate": 1.3119302417756639e-07, "logits/chosen": -1.4784932136535645, "logits/rejected": -1.4943057298660278, "logps/chosen": -181.0413360595703, "logps/rejected": -347.7564697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.350299835205078, "rewards/margins": 17.27113914489746, "rewards/rejected": -24.62143898010254, "step": 4283 }, { "epoch": 6.88, "learning_rate": 1.3109393579072532e-07, "logits/chosen": -1.5137200355529785, "logits/rejected": -1.5007898807525635, "logps/chosen": -168.63641357421875, "logps/rejected": -321.08251953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.49238109588623, "rewards/margins": 15.637548446655273, "rewards/rejected": -24.129928588867188, "step": 4284 }, { "epoch": 6.88, "learning_rate": 1.3099484740388425e-07, "logits/chosen": -1.5510516166687012, "logits/rejected": -1.587266206741333, "logps/chosen": -187.2799530029297, "logps/rejected": -286.5812072753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.653657913208008, "rewards/margins": 10.423924446105957, "rewards/rejected": -20.07758331298828, "step": 4285 }, { "epoch": 6.88, "learning_rate": 1.3089575901704319e-07, "logits/chosen": -1.480712890625, "logits/rejected": -1.5313217639923096, "logps/chosen": -172.68942260742188, "logps/rejected": -315.8013916015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.25301742553711, "rewards/margins": 13.626751899719238, "rewards/rejected": -22.87976837158203, "step": 4286 }, { "epoch": 6.88, "learning_rate": 1.3079667063020214e-07, "logits/chosen": -1.2362608909606934, "logits/rejected": -1.2863179445266724, "logps/chosen": -183.6317138671875, "logps/rejected": -311.90960693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.576515197753906, "rewards/margins": 12.326824188232422, "rewards/rejected": -22.903339385986328, "step": 4287 }, { "epoch": 6.88, "learning_rate": 1.3069758224336108e-07, "logits/chosen": -1.6772159337997437, "logits/rejected": -1.5387132167816162, "logps/chosen": -189.92189025878906, "logps/rejected": -291.9770202636719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.35258960723877, "rewards/margins": 12.875459671020508, "rewards/rejected": -21.228050231933594, "step": 4288 }, { "epoch": 6.88, "learning_rate": 1.3059849385651999e-07, "logits/chosen": -1.4387080669403076, "logits/rejected": -1.4397437572479248, "logps/chosen": -168.4374237060547, "logps/rejected": -298.67498779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.485949993133545, "rewards/margins": 13.225630760192871, "rewards/rejected": -20.711580276489258, "step": 4289 }, { "epoch": 6.89, "learning_rate": 1.3049940546967894e-07, "logits/chosen": -1.4883170127868652, "logits/rejected": -1.5089118480682373, "logps/chosen": -139.19073486328125, "logps/rejected": -274.90606689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.884763240814209, "rewards/margins": 13.900800704956055, "rewards/rejected": -18.78556251525879, "step": 4290 }, { "epoch": 6.89, "learning_rate": 1.3040031708283788e-07, "logits/chosen": -1.3994731903076172, "logits/rejected": -1.412219524383545, "logps/chosen": -146.13217163085938, "logps/rejected": -291.40997314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.722890377044678, "rewards/margins": 14.221756935119629, "rewards/rejected": -20.94464683532715, "step": 4291 }, { "epoch": 6.89, "learning_rate": 1.3030122869599684e-07, "logits/chosen": -1.533659815788269, "logits/rejected": -1.4114477634429932, "logps/chosen": -177.42849731445312, "logps/rejected": -267.626708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.664575576782227, "rewards/margins": 11.954486846923828, "rewards/rejected": -18.619062423706055, "step": 4292 }, { "epoch": 6.89, "learning_rate": 1.3020214030915577e-07, "logits/chosen": -1.726412057876587, "logits/rejected": -1.7339627742767334, "logps/chosen": -141.17657470703125, "logps/rejected": -296.1455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.869499206542969, "rewards/margins": 15.45513916015625, "rewards/rejected": -20.32463836669922, "step": 4293 }, { "epoch": 6.89, "learning_rate": 1.3010305192231468e-07, "logits/chosen": -1.3734432458877563, "logits/rejected": -1.3948428630828857, "logps/chosen": -152.02963256835938, "logps/rejected": -266.4960021972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.83348274230957, "rewards/margins": 10.864480972290039, "rewards/rejected": -18.69796371459961, "step": 4294 }, { "epoch": 6.89, "learning_rate": 1.3000396353547364e-07, "logits/chosen": -1.3678107261657715, "logits/rejected": -1.3610135316848755, "logps/chosen": -134.41851806640625, "logps/rejected": -231.99363708496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.509337425231934, "rewards/margins": 10.38669490814209, "rewards/rejected": -16.896032333374023, "step": 4295 }, { "epoch": 6.9, "learning_rate": 1.2990487514863257e-07, "logits/chosen": -1.5657483339309692, "logits/rejected": -1.5451221466064453, "logps/chosen": -171.8986358642578, "logps/rejected": -254.52366638183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.943211555480957, "rewards/margins": 10.141676902770996, "rewards/rejected": -17.084888458251953, "step": 4296 }, { "epoch": 6.9, "learning_rate": 1.2980578676179153e-07, "logits/chosen": -1.5095562934875488, "logits/rejected": -1.5189958810806274, "logps/chosen": -187.65733337402344, "logps/rejected": -300.6346435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.892082214355469, "rewards/margins": 10.896815299987793, "rewards/rejected": -20.788896560668945, "step": 4297 }, { "epoch": 6.9, "learning_rate": 1.2970669837495046e-07, "logits/chosen": -1.4297704696655273, "logits/rejected": -1.5709483623504639, "logps/chosen": -140.33506774902344, "logps/rejected": -298.2570495605469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.668919086456299, "rewards/margins": 14.307233810424805, "rewards/rejected": -19.976152420043945, "step": 4298 }, { "epoch": 6.9, "learning_rate": 1.2960760998810937e-07, "logits/chosen": -1.5177892446517944, "logits/rejected": -1.499434471130371, "logps/chosen": -177.0868377685547, "logps/rejected": -306.70391845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.092121124267578, "rewards/margins": 11.351507186889648, "rewards/rejected": -21.44363021850586, "step": 4299 }, { "epoch": 6.9, "learning_rate": 1.2950852160126833e-07, "logits/chosen": -1.4757041931152344, "logits/rejected": -1.55293607711792, "logps/chosen": -146.1461944580078, "logps/rejected": -279.0918884277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.655509948730469, "rewards/margins": 12.780374526977539, "rewards/rejected": -19.435884475708008, "step": 4300 }, { "epoch": 6.9, "learning_rate": 1.2940943321442726e-07, "logits/chosen": -1.355355978012085, "logits/rejected": -1.3110828399658203, "logps/chosen": -184.12118530273438, "logps/rejected": -266.32025146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.04542064666748, "rewards/margins": 11.658212661743164, "rewards/rejected": -20.703632354736328, "step": 4301 }, { "epoch": 6.91, "learning_rate": 1.293103448275862e-07, "logits/chosen": -1.4735065698623657, "logits/rejected": -1.5677225589752197, "logps/chosen": -109.21278381347656, "logps/rejected": -265.9952392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.959303379058838, "rewards/margins": 15.024749755859375, "rewards/rejected": -18.984052658081055, "step": 4302 }, { "epoch": 6.91, "learning_rate": 1.2921125644074513e-07, "logits/chosen": -1.5612730979919434, "logits/rejected": -1.5712697505950928, "logps/chosen": -172.47418212890625, "logps/rejected": -312.5321044921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.856637001037598, "rewards/margins": 12.654653549194336, "rewards/rejected": -21.51129150390625, "step": 4303 }, { "epoch": 6.91, "learning_rate": 1.2911216805390406e-07, "logits/chosen": -1.587221622467041, "logits/rejected": -1.4515109062194824, "logps/chosen": -147.28927612304688, "logps/rejected": -213.03211975097656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.9463725090026855, "rewards/margins": 9.342229843139648, "rewards/rejected": -13.288601875305176, "step": 4304 }, { "epoch": 6.91, "learning_rate": 1.2901307966706302e-07, "logits/chosen": -1.5290552377700806, "logits/rejected": -1.5231764316558838, "logps/chosen": -134.39151000976562, "logps/rejected": -247.91705322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.591496467590332, "rewards/margins": 11.982635498046875, "rewards/rejected": -17.57413101196289, "step": 4305 }, { "epoch": 6.91, "learning_rate": 1.2891399128022196e-07, "logits/chosen": -1.4264874458312988, "logits/rejected": -1.5338096618652344, "logps/chosen": -129.68983459472656, "logps/rejected": -262.00665283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.54251766204834, "rewards/margins": 11.844867706298828, "rewards/rejected": -18.38738441467285, "step": 4306 }, { "epoch": 6.91, "learning_rate": 1.288149028933809e-07, "logits/chosen": -1.4937124252319336, "logits/rejected": -1.5156177282333374, "logps/chosen": -137.41441345214844, "logps/rejected": -246.71856689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.466919422149658, "rewards/margins": 10.736380577087402, "rewards/rejected": -17.20330047607422, "step": 4307 }, { "epoch": 6.91, "learning_rate": 1.2871581450653982e-07, "logits/chosen": -1.4568756818771362, "logits/rejected": -1.5030415058135986, "logps/chosen": -133.63563537597656, "logps/rejected": -259.2521667480469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.009183883666992, "rewards/margins": 12.545990943908691, "rewards/rejected": -17.555173873901367, "step": 4308 }, { "epoch": 6.92, "learning_rate": 1.2861672611969876e-07, "logits/chosen": -1.5248723030090332, "logits/rejected": -1.5386236906051636, "logps/chosen": -112.57725524902344, "logps/rejected": -220.40223693847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.225460052490234, "rewards/margins": 11.656986236572266, "rewards/rejected": -15.8824462890625, "step": 4309 }, { "epoch": 6.92, "learning_rate": 1.285176377328577e-07, "logits/chosen": -1.6561203002929688, "logits/rejected": -1.697479486465454, "logps/chosen": -177.25717163085938, "logps/rejected": -313.2477722167969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.710463523864746, "rewards/margins": 12.630510330200195, "rewards/rejected": -21.340972900390625, "step": 4310 }, { "epoch": 6.92, "learning_rate": 1.2841854934601665e-07, "logits/chosen": -1.560349464416504, "logits/rejected": -1.6322051286697388, "logps/chosen": -132.02638244628906, "logps/rejected": -271.97515869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4519453048706055, "rewards/margins": 11.839178085327148, "rewards/rejected": -18.291122436523438, "step": 4311 }, { "epoch": 6.92, "learning_rate": 1.2831946095917558e-07, "logits/chosen": -1.5629547834396362, "logits/rejected": -1.5957180261611938, "logps/chosen": -137.21560668945312, "logps/rejected": -317.1037292480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.305415153503418, "rewards/margins": 17.662899017333984, "rewards/rejected": -22.96831512451172, "step": 4312 }, { "epoch": 6.92, "learning_rate": 1.2822037257233452e-07, "logits/chosen": -1.3525371551513672, "logits/rejected": -1.3652621507644653, "logps/chosen": -126.0892562866211, "logps/rejected": -316.083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.30812931060791, "rewards/margins": 15.535308837890625, "rewards/rejected": -21.84343910217285, "step": 4313 }, { "epoch": 6.92, "learning_rate": 1.2812128418549345e-07, "logits/chosen": -1.408422827720642, "logits/rejected": -1.4256047010421753, "logps/chosen": -90.45665740966797, "logps/rejected": -205.7488555908203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.566164493560791, "rewards/margins": 11.031644821166992, "rewards/rejected": -14.597808837890625, "step": 4314 }, { "epoch": 6.93, "learning_rate": 1.2802219579865238e-07, "logits/chosen": -1.4226073026657104, "logits/rejected": -1.50832200050354, "logps/chosen": -167.0977783203125, "logps/rejected": -366.845947265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.75607442855835, "rewards/margins": 18.29708480834961, "rewards/rejected": -25.053159713745117, "step": 4315 }, { "epoch": 6.93, "learning_rate": 1.2792310741181134e-07, "logits/chosen": -1.470255732536316, "logits/rejected": -1.5280423164367676, "logps/chosen": -135.12840270996094, "logps/rejected": -342.0127868652344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.1385722160339355, "rewards/margins": 18.15222930908203, "rewards/rejected": -24.290802001953125, "step": 4316 }, { "epoch": 6.93, "learning_rate": 1.2782401902497028e-07, "logits/chosen": -1.347651481628418, "logits/rejected": -1.3877893686294556, "logps/chosen": -210.9666748046875, "logps/rejected": -377.32489013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.988554000854492, "rewards/margins": 16.125385284423828, "rewards/rejected": -28.11393928527832, "step": 4317 }, { "epoch": 6.93, "learning_rate": 1.277249306381292e-07, "logits/chosen": -1.4457354545593262, "logits/rejected": -1.427058458328247, "logps/chosen": -183.82545471191406, "logps/rejected": -357.3561096191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.28513240814209, "rewards/margins": 17.351966857910156, "rewards/rejected": -26.637102127075195, "step": 4318 }, { "epoch": 6.93, "learning_rate": 1.2762584225128814e-07, "logits/chosen": -1.5396779775619507, "logits/rejected": -1.484226107597351, "logps/chosen": -130.5384521484375, "logps/rejected": -268.9083557128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.09043025970459, "rewards/margins": 13.801069259643555, "rewards/rejected": -18.89150047302246, "step": 4319 }, { "epoch": 6.93, "learning_rate": 1.2752675386444708e-07, "logits/chosen": -1.5914158821105957, "logits/rejected": -1.642514705657959, "logps/chosen": -103.25065612792969, "logps/rejected": -293.6171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.964869499206543, "rewards/margins": 16.868938446044922, "rewards/rejected": -20.83380699157715, "step": 4320 }, { "epoch": 6.94, "learning_rate": 1.2742766547760604e-07, "logits/chosen": -1.6004083156585693, "logits/rejected": -1.5024765729904175, "logps/chosen": -153.00497436523438, "logps/rejected": -227.97891235351562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.593244552612305, "rewards/margins": 11.049426078796387, "rewards/rejected": -15.642671585083008, "step": 4321 }, { "epoch": 6.94, "learning_rate": 1.2732857709076494e-07, "logits/chosen": -1.3689305782318115, "logits/rejected": -1.3575849533081055, "logps/chosen": -141.92520141601562, "logps/rejected": -275.8426208496094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.579128742218018, "rewards/margins": 12.354674339294434, "rewards/rejected": -18.93380355834961, "step": 4322 }, { "epoch": 6.94, "learning_rate": 1.2722948870392388e-07, "logits/chosen": -1.484875202178955, "logits/rejected": -1.5846595764160156, "logps/chosen": -152.639404296875, "logps/rejected": -341.92633056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.637639999389648, "rewards/margins": 15.9112548828125, "rewards/rejected": -23.54889488220215, "step": 4323 }, { "epoch": 6.94, "learning_rate": 1.2713040031708284e-07, "logits/chosen": -1.7022486925125122, "logits/rejected": -1.5783485174179077, "logps/chosen": -141.59970092773438, "logps/rejected": -228.56942749023438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.71903657913208, "rewards/margins": 11.14383316040039, "rewards/rejected": -16.862869262695312, "step": 4324 }, { "epoch": 6.94, "learning_rate": 1.2703131193024177e-07, "logits/chosen": -1.399747371673584, "logits/rejected": -1.4299784898757935, "logps/chosen": -141.03260803222656, "logps/rejected": -263.9141845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.242031574249268, "rewards/margins": 12.130402565002441, "rewards/rejected": -18.372434616088867, "step": 4325 }, { "epoch": 6.94, "learning_rate": 1.2693222354340073e-07, "logits/chosen": -1.4708397388458252, "logits/rejected": -1.4839723110198975, "logps/chosen": -140.30972290039062, "logps/rejected": -280.33154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.224983215332031, "rewards/margins": 11.686664581298828, "rewards/rejected": -16.91164779663086, "step": 4326 }, { "epoch": 6.95, "learning_rate": 1.2683313515655964e-07, "logits/chosen": -1.4596989154815674, "logits/rejected": -1.4362398386001587, "logps/chosen": -134.68399047851562, "logps/rejected": -299.4694519042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.061861991882324, "rewards/margins": 16.057004928588867, "rewards/rejected": -22.118867874145508, "step": 4327 }, { "epoch": 6.95, "learning_rate": 1.2673404676971857e-07, "logits/chosen": -1.4489200115203857, "logits/rejected": -1.400987148284912, "logps/chosen": -193.5735321044922, "logps/rejected": -334.8089599609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.789441108703613, "rewards/margins": 11.954316139221191, "rewards/rejected": -22.743757247924805, "step": 4328 }, { "epoch": 6.95, "learning_rate": 1.2663495838287753e-07, "logits/chosen": -1.6088558435440063, "logits/rejected": -1.6177834272384644, "logps/chosen": -172.08392333984375, "logps/rejected": -326.7933349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.902764320373535, "rewards/margins": 15.188146591186523, "rewards/rejected": -22.090909957885742, "step": 4329 }, { "epoch": 6.95, "learning_rate": 1.2653586999603646e-07, "logits/chosen": -1.5794850587844849, "logits/rejected": -1.5611016750335693, "logps/chosen": -143.60308837890625, "logps/rejected": -281.58123779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.910326957702637, "rewards/margins": 13.70084285736084, "rewards/rejected": -20.61117172241211, "step": 4330 }, { "epoch": 6.95, "learning_rate": 1.2643678160919542e-07, "logits/chosen": -1.5582289695739746, "logits/rejected": -1.5649431943893433, "logps/chosen": -139.22743225097656, "logps/rejected": -269.9653015136719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.808687210083008, "rewards/margins": 12.504372596740723, "rewards/rejected": -18.313060760498047, "step": 4331 }, { "epoch": 6.95, "learning_rate": 1.2633769322235433e-07, "logits/chosen": -1.4214506149291992, "logits/rejected": -1.4490200281143188, "logps/chosen": -159.44528198242188, "logps/rejected": -338.112548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.098174095153809, "rewards/margins": 17.26534080505371, "rewards/rejected": -25.363513946533203, "step": 4332 }, { "epoch": 6.96, "learning_rate": 1.2623860483551326e-07, "logits/chosen": -1.6215322017669678, "logits/rejected": -1.7945780754089355, "logps/chosen": -133.97959899902344, "logps/rejected": -317.5301208496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.092343330383301, "rewards/margins": 14.559404373168945, "rewards/rejected": -21.651748657226562, "step": 4333 }, { "epoch": 6.96, "learning_rate": 1.2613951644867222e-07, "logits/chosen": -1.4846603870391846, "logits/rejected": -1.4378643035888672, "logps/chosen": -202.65493774414062, "logps/rejected": -334.9845886230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.635741233825684, "rewards/margins": 12.923158645629883, "rewards/rejected": -24.55889892578125, "step": 4334 }, { "epoch": 6.96, "learning_rate": 1.2604042806183116e-07, "logits/chosen": -1.4485752582550049, "logits/rejected": -1.4352654218673706, "logps/chosen": -128.20318603515625, "logps/rejected": -233.1246337890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.599601745605469, "rewards/margins": 12.369684219360352, "rewards/rejected": -16.96928596496582, "step": 4335 }, { "epoch": 6.96, "learning_rate": 1.259413396749901e-07, "logits/chosen": -1.427125096321106, "logits/rejected": -1.3672806024551392, "logps/chosen": -118.27519226074219, "logps/rejected": -234.66537475585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7877249717712402, "rewards/margins": 12.478940963745117, "rewards/rejected": -16.266666412353516, "step": 4336 }, { "epoch": 6.96, "learning_rate": 1.2584225128814902e-07, "logits/chosen": -1.602966070175171, "logits/rejected": -1.5214741230010986, "logps/chosen": -119.90373229980469, "logps/rejected": -223.2362823486328, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9289188385009766, "rewards/margins": 11.311025619506836, "rewards/rejected": -15.239944458007812, "step": 4337 }, { "epoch": 6.96, "learning_rate": 1.2574316290130795e-07, "logits/chosen": -1.444711685180664, "logits/rejected": -1.460920810699463, "logps/chosen": -116.18070983886719, "logps/rejected": -261.0518493652344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.299787521362305, "rewards/margins": 14.132219314575195, "rewards/rejected": -19.432008743286133, "step": 4338 }, { "epoch": 6.96, "learning_rate": 1.2564407451446691e-07, "logits/chosen": -1.3033758401870728, "logits/rejected": -1.299605369567871, "logps/chosen": -166.93743896484375, "logps/rejected": -289.3072204589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.856400489807129, "rewards/margins": 12.895968437194824, "rewards/rejected": -21.752368927001953, "step": 4339 }, { "epoch": 6.97, "learning_rate": 1.2554498612762585e-07, "logits/chosen": -1.6564733982086182, "logits/rejected": -1.5853700637817383, "logps/chosen": -181.61209106445312, "logps/rejected": -296.44329833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.322128772735596, "rewards/margins": 12.886022567749023, "rewards/rejected": -20.208152770996094, "step": 4340 }, { "epoch": 6.97, "learning_rate": 1.2544589774078475e-07, "logits/chosen": -1.477905511856079, "logits/rejected": -1.4226627349853516, "logps/chosen": -220.5298614501953, "logps/rejected": -335.6328430175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.493106842041016, "rewards/margins": 12.343870162963867, "rewards/rejected": -23.836977005004883, "step": 4341 }, { "epoch": 6.97, "learning_rate": 1.2534680935394371e-07, "logits/chosen": -1.3785821199417114, "logits/rejected": -1.3501948118209839, "logps/chosen": -114.79023742675781, "logps/rejected": -308.3306579589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.111201763153076, "rewards/margins": 17.29815673828125, "rewards/rejected": -21.409358978271484, "step": 4342 }, { "epoch": 6.97, "learning_rate": 1.2524772096710265e-07, "logits/chosen": -1.528933048248291, "logits/rejected": -1.5162949562072754, "logps/chosen": -170.25083923339844, "logps/rejected": -305.92376708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.048604011535645, "rewards/margins": 11.975053787231445, "rewards/rejected": -21.023658752441406, "step": 4343 }, { "epoch": 6.97, "learning_rate": 1.2514863258026158e-07, "logits/chosen": -1.5402898788452148, "logits/rejected": -1.5932040214538574, "logps/chosen": -165.48687744140625, "logps/rejected": -303.7412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.816098213195801, "rewards/margins": 14.347036361694336, "rewards/rejected": -21.163135528564453, "step": 4344 }, { "epoch": 6.97, "learning_rate": 1.2504954419342054e-07, "logits/chosen": -1.4314227104187012, "logits/rejected": -1.4015283584594727, "logps/chosen": -213.84091186523438, "logps/rejected": -314.7798156738281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.597672462463379, "rewards/margins": 11.267631530761719, "rewards/rejected": -21.865304946899414, "step": 4345 }, { "epoch": 6.98, "learning_rate": 1.2495045580657947e-07, "logits/chosen": -1.320091962814331, "logits/rejected": -1.3571906089782715, "logps/chosen": -205.00474548339844, "logps/rejected": -309.4711608886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.23813247680664, "rewards/margins": 11.156259536743164, "rewards/rejected": -22.394392013549805, "step": 4346 }, { "epoch": 6.98, "learning_rate": 1.248513674197384e-07, "logits/chosen": -1.286195158958435, "logits/rejected": -1.3818988800048828, "logps/chosen": -175.76976013183594, "logps/rejected": -306.3787841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.991697311401367, "rewards/margins": 11.727690696716309, "rewards/rejected": -21.71938705444336, "step": 4347 }, { "epoch": 6.98, "learning_rate": 1.2475227903289734e-07, "logits/chosen": -1.7449885606765747, "logits/rejected": -1.6435647010803223, "logps/chosen": -199.27691650390625, "logps/rejected": -307.28863525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.85352897644043, "rewards/margins": 12.099481582641602, "rewards/rejected": -20.95301055908203, "step": 4348 }, { "epoch": 6.98, "learning_rate": 1.2465319064605627e-07, "logits/chosen": -1.4709646701812744, "logits/rejected": -1.450405240058899, "logps/chosen": -119.30654907226562, "logps/rejected": -275.74298095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.484562873840332, "rewards/margins": 15.570978164672852, "rewards/rejected": -20.0555419921875, "step": 4349 }, { "epoch": 6.98, "learning_rate": 1.245541022592152e-07, "logits/chosen": -1.3718750476837158, "logits/rejected": -1.5239137411117554, "logps/chosen": -149.04898071289062, "logps/rejected": -298.96209716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.040124893188477, "rewards/margins": 13.255836486816406, "rewards/rejected": -20.295961380004883, "step": 4350 }, { "epoch": 6.98, "learning_rate": 1.2445501387237417e-07, "logits/chosen": -1.6435092687606812, "logits/rejected": -1.6289594173431396, "logps/chosen": -109.72781372070312, "logps/rejected": -258.74896240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.06679105758667, "rewards/margins": 14.82215404510498, "rewards/rejected": -18.888944625854492, "step": 4351 }, { "epoch": 6.99, "learning_rate": 1.2435592548553307e-07, "logits/chosen": -1.5125677585601807, "logits/rejected": -1.5900278091430664, "logps/chosen": -97.42155456542969, "logps/rejected": -249.88551330566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1825666427612305, "rewards/margins": 15.161263465881348, "rewards/rejected": -18.34383201599121, "step": 4352 }, { "epoch": 6.99, "learning_rate": 1.2425683709869203e-07, "logits/chosen": -1.654233694076538, "logits/rejected": -1.5677802562713623, "logps/chosen": -162.11654663085938, "logps/rejected": -277.99517822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9748992919921875, "rewards/margins": 12.15892505645752, "rewards/rejected": -18.133825302124023, "step": 4353 }, { "epoch": 6.99, "learning_rate": 1.2415774871185097e-07, "logits/chosen": -1.6477930545806885, "logits/rejected": -1.6572380065917969, "logps/chosen": -142.06248474121094, "logps/rejected": -260.0127258300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.362393856048584, "rewards/margins": 12.69320297241211, "rewards/rejected": -18.05559730529785, "step": 4354 }, { "epoch": 6.99, "learning_rate": 1.240586603250099e-07, "logits/chosen": -1.4674665927886963, "logits/rejected": -1.4407474994659424, "logps/chosen": -184.73048400878906, "logps/rejected": -277.995361328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.985604763031006, "rewards/margins": 9.408061981201172, "rewards/rejected": -17.393667221069336, "step": 4355 }, { "epoch": 6.99, "learning_rate": 1.2395957193816886e-07, "logits/chosen": -1.5035059452056885, "logits/rejected": -1.4764820337295532, "logps/chosen": -152.93182373046875, "logps/rejected": -286.5018005371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.285207748413086, "rewards/margins": 14.789018630981445, "rewards/rejected": -21.07422637939453, "step": 4356 }, { "epoch": 6.99, "learning_rate": 1.2386048355132777e-07, "logits/chosen": -1.6303181648254395, "logits/rejected": -1.4925563335418701, "logps/chosen": -178.75704956054688, "logps/rejected": -275.66058349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.514385223388672, "rewards/margins": 11.352544784545898, "rewards/rejected": -19.86693000793457, "step": 4357 }, { "epoch": 7.0, "learning_rate": 1.2376139516448673e-07, "logits/chosen": -1.5762104988098145, "logits/rejected": -1.630784511566162, "logps/chosen": -189.03720092773438, "logps/rejected": -342.8877258300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.583969116210938, "rewards/margins": 13.548315048217773, "rewards/rejected": -22.132282257080078, "step": 4358 }, { "epoch": 7.0, "learning_rate": 1.2366230677764566e-07, "logits/chosen": -1.611122965812683, "logits/rejected": -1.5208609104156494, "logps/chosen": -158.3083038330078, "logps/rejected": -267.5928955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.909217834472656, "rewards/margins": 12.562777519226074, "rewards/rejected": -18.471996307373047, "step": 4359 }, { "epoch": 7.0, "learning_rate": 1.235632183908046e-07, "logits/chosen": -1.3326228857040405, "logits/rejected": -1.3369784355163574, "logps/chosen": -186.82571411132812, "logps/rejected": -320.1168212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.14082145690918, "rewards/margins": 12.48708724975586, "rewards/rejected": -23.627910614013672, "step": 4360 }, { "epoch": 7.0, "learning_rate": 1.2346413000396353e-07, "logits/chosen": -1.4784719944000244, "logits/rejected": -1.506305456161499, "logps/chosen": -101.01057434082031, "logps/rejected": -257.0434265136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9965596199035645, "rewards/margins": 15.007376670837402, "rewards/rejected": -19.003936767578125, "step": 4361 }, { "epoch": 7.0, "learning_rate": 1.2336504161712246e-07, "logits/chosen": -1.3592658042907715, "logits/rejected": -1.3227920532226562, "logps/chosen": -185.77207946777344, "logps/rejected": -303.3753662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.643695831298828, "rewards/margins": 13.453834533691406, "rewards/rejected": -22.097530364990234, "step": 4362 }, { "epoch": 7.0, "learning_rate": 1.2326595323028142e-07, "logits/chosen": -1.6914420127868652, "logits/rejected": -1.7723102569580078, "logps/chosen": -156.48257446289062, "logps/rejected": -307.8924255371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.793942451477051, "rewards/margins": 12.395276069641113, "rewards/rejected": -20.189218521118164, "step": 4363 }, { "epoch": 7.0, "learning_rate": 1.2316686484344035e-07, "logits/chosen": -1.6555708646774292, "logits/rejected": -1.6472747325897217, "logps/chosen": -147.65478515625, "logps/rejected": -276.0013732910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3416218757629395, "rewards/margins": 13.329360008239746, "rewards/rejected": -18.67098045349121, "step": 4364 }, { "epoch": 7.01, "learning_rate": 1.2306777645659929e-07, "logits/chosen": -1.5685001611709595, "logits/rejected": -1.5460562705993652, "logps/chosen": -140.03431701660156, "logps/rejected": -287.5618591308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.312833309173584, "rewards/margins": 15.868837356567383, "rewards/rejected": -22.181671142578125, "step": 4365 }, { "epoch": 7.01, "learning_rate": 1.2296868806975822e-07, "logits/chosen": -1.3017094135284424, "logits/rejected": -1.4309937953948975, "logps/chosen": -156.5979766845703, "logps/rejected": -308.9305419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.258066177368164, "rewards/margins": 12.527042388916016, "rewards/rejected": -19.78510856628418, "step": 4366 }, { "epoch": 7.01, "learning_rate": 1.2286959968291715e-07, "logits/chosen": -1.4879162311553955, "logits/rejected": -1.4898526668548584, "logps/chosen": -132.4640655517578, "logps/rejected": -280.7765197753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.587507247924805, "rewards/margins": 12.685235977172852, "rewards/rejected": -19.272743225097656, "step": 4367 }, { "epoch": 7.01, "learning_rate": 1.2277051129607609e-07, "logits/chosen": -1.458630084991455, "logits/rejected": -1.4607023000717163, "logps/chosen": -167.19024658203125, "logps/rejected": -323.1876220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.481583595275879, "rewards/margins": 15.827878952026367, "rewards/rejected": -23.30946159362793, "step": 4368 }, { "epoch": 7.01, "learning_rate": 1.2267142290923502e-07, "logits/chosen": -1.4201716184616089, "logits/rejected": -1.4372873306274414, "logps/chosen": -188.40719604492188, "logps/rejected": -338.65093994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.276450157165527, "rewards/margins": 13.215478897094727, "rewards/rejected": -23.491928100585938, "step": 4369 }, { "epoch": 7.01, "learning_rate": 1.2257233452239398e-07, "logits/chosen": -1.6339762210845947, "logits/rejected": -1.6343148946762085, "logps/chosen": -160.24766540527344, "logps/rejected": -297.3423156738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3611321449279785, "rewards/margins": 13.183692932128906, "rewards/rejected": -19.544824600219727, "step": 4370 }, { "epoch": 7.02, "learning_rate": 1.224732461355529e-07, "logits/chosen": -1.626706600189209, "logits/rejected": -1.6109113693237305, "logps/chosen": -139.7855224609375, "logps/rejected": -265.82562255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.05205774307251, "rewards/margins": 13.116009712219238, "rewards/rejected": -18.168067932128906, "step": 4371 }, { "epoch": 7.02, "learning_rate": 1.2237415774871185e-07, "logits/chosen": -1.365592122077942, "logits/rejected": -1.3639887571334839, "logps/chosen": -155.46438598632812, "logps/rejected": -315.6642150878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.844658374786377, "rewards/margins": 13.99251937866211, "rewards/rejected": -20.837177276611328, "step": 4372 }, { "epoch": 7.02, "learning_rate": 1.2227506936187078e-07, "logits/chosen": -1.5425270795822144, "logits/rejected": -1.5028343200683594, "logps/chosen": -163.34988403320312, "logps/rejected": -322.7779541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.515161037445068, "rewards/margins": 16.696697235107422, "rewards/rejected": -24.211856842041016, "step": 4373 }, { "epoch": 7.02, "learning_rate": 1.221759809750297e-07, "logits/chosen": -1.6626607179641724, "logits/rejected": -1.540974497795105, "logps/chosen": -186.1359100341797, "logps/rejected": -257.8954772949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.79967737197876, "rewards/margins": 11.212881088256836, "rewards/rejected": -18.012557983398438, "step": 4374 }, { "epoch": 7.02, "learning_rate": 1.2207689258818867e-07, "logits/chosen": -1.6337803602218628, "logits/rejected": -1.6138213872909546, "logps/chosen": -110.348388671875, "logps/rejected": -243.9624786376953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.222269058227539, "rewards/margins": 13.77242374420166, "rewards/rejected": -17.994693756103516, "step": 4375 }, { "epoch": 7.02, "learning_rate": 1.219778042013476e-07, "logits/chosen": -1.4480528831481934, "logits/rejected": -1.3929768800735474, "logps/chosen": -160.10691833496094, "logps/rejected": -260.9598693847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.675724029541016, "rewards/margins": 12.73061752319336, "rewards/rejected": -19.406341552734375, "step": 4376 }, { "epoch": 7.03, "learning_rate": 1.2187871581450654e-07, "logits/chosen": -1.445618748664856, "logits/rejected": -1.5290457010269165, "logps/chosen": -130.90147399902344, "logps/rejected": -333.85650634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.311690807342529, "rewards/margins": 19.463947296142578, "rewards/rejected": -24.7756404876709, "step": 4377 }, { "epoch": 7.03, "learning_rate": 1.2177962742766547e-07, "logits/chosen": -1.6218866109848022, "logits/rejected": -1.61871337890625, "logps/chosen": -141.46035766601562, "logps/rejected": -334.3531188964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.819666862487793, "rewards/margins": 18.168058395385742, "rewards/rejected": -23.98772621154785, "step": 4378 }, { "epoch": 7.03, "learning_rate": 1.216805390408244e-07, "logits/chosen": -1.4919884204864502, "logits/rejected": -1.5171438455581665, "logps/chosen": -97.53219604492188, "logps/rejected": -223.1673126220703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.256082534790039, "rewards/margins": 11.25899887084961, "rewards/rejected": -15.515082359313965, "step": 4379 }, { "epoch": 7.03, "learning_rate": 1.2158145065398334e-07, "logits/chosen": -1.6179909706115723, "logits/rejected": -1.6266682147979736, "logps/chosen": -167.18922424316406, "logps/rejected": -297.3836975097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.91915225982666, "rewards/margins": 12.567896842956543, "rewards/rejected": -19.487049102783203, "step": 4380 }, { "epoch": 7.03, "learning_rate": 1.214823622671423e-07, "logits/chosen": -1.332798719406128, "logits/rejected": -1.320021152496338, "logps/chosen": -180.3543243408203, "logps/rejected": -294.85595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.139558792114258, "rewards/margins": 12.574420928955078, "rewards/rejected": -21.71398162841797, "step": 4381 }, { "epoch": 7.03, "learning_rate": 1.2138327388030123e-07, "logits/chosen": -1.5753852128982544, "logits/rejected": -1.4763171672821045, "logps/chosen": -106.01918029785156, "logps/rejected": -228.9042205810547, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0765833854675293, "rewards/margins": 12.449565887451172, "rewards/rejected": -15.52614974975586, "step": 4382 }, { "epoch": 7.04, "learning_rate": 1.2128418549346017e-07, "logits/chosen": -1.3377175331115723, "logits/rejected": -1.4172230958938599, "logps/chosen": -197.42666625976562, "logps/rejected": -293.0606689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.457674026489258, "rewards/margins": 11.102187156677246, "rewards/rejected": -19.559860229492188, "step": 4383 }, { "epoch": 7.04, "learning_rate": 1.211850971066191e-07, "logits/chosen": -1.42937171459198, "logits/rejected": -1.427314043045044, "logps/chosen": -189.89483642578125, "logps/rejected": -370.8692321777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.784836769104004, "rewards/margins": 16.446590423583984, "rewards/rejected": -26.231430053710938, "step": 4384 }, { "epoch": 7.04, "learning_rate": 1.2108600871977803e-07, "logits/chosen": -1.4280948638916016, "logits/rejected": -1.3845053911209106, "logps/chosen": -171.34970092773438, "logps/rejected": -299.17620849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.193657875061035, "rewards/margins": 12.318739891052246, "rewards/rejected": -19.51239776611328, "step": 4385 }, { "epoch": 7.04, "learning_rate": 1.2098692033293696e-07, "logits/chosen": -1.370217204093933, "logits/rejected": -1.4574098587036133, "logps/chosen": -110.87786865234375, "logps/rejected": -256.34674072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.341951370239258, "rewards/margins": 12.749320983886719, "rewards/rejected": -17.09127426147461, "step": 4386 }, { "epoch": 7.04, "learning_rate": 1.208878319460959e-07, "logits/chosen": -1.4369972944259644, "logits/rejected": -1.4989745616912842, "logps/chosen": -172.84030151367188, "logps/rejected": -303.0500183105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.561979293823242, "rewards/margins": 10.936203002929688, "rewards/rejected": -20.49818229675293, "step": 4387 }, { "epoch": 7.04, "learning_rate": 1.2078874355925486e-07, "logits/chosen": -1.5658349990844727, "logits/rejected": -1.5637575387954712, "logps/chosen": -146.93243408203125, "logps/rejected": -295.1549377441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.028261184692383, "rewards/margins": 14.471406936645508, "rewards/rejected": -20.49966812133789, "step": 4388 }, { "epoch": 7.04, "learning_rate": 1.206896551724138e-07, "logits/chosen": -1.497483730316162, "logits/rejected": -1.5429902076721191, "logps/chosen": -118.52787780761719, "logps/rejected": -274.13720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.54818058013916, "rewards/margins": 15.234678268432617, "rewards/rejected": -19.782859802246094, "step": 4389 }, { "epoch": 7.05, "learning_rate": 1.2059056678557272e-07, "logits/chosen": -1.4976643323898315, "logits/rejected": -1.5186039209365845, "logps/chosen": -186.392578125, "logps/rejected": -350.80767822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.995890617370605, "rewards/margins": 16.163089752197266, "rewards/rejected": -25.158981323242188, "step": 4390 }, { "epoch": 7.05, "learning_rate": 1.2049147839873166e-07, "logits/chosen": -1.3440977334976196, "logits/rejected": -1.3671637773513794, "logps/chosen": -172.62954711914062, "logps/rejected": -312.9458312988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.042278289794922, "rewards/margins": 13.587515830993652, "rewards/rejected": -22.62979507446289, "step": 4391 }, { "epoch": 7.05, "learning_rate": 1.203923900118906e-07, "logits/chosen": -1.5549403429031372, "logits/rejected": -1.4701956510543823, "logps/chosen": -162.96522521972656, "logps/rejected": -286.3807067871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.806276321411133, "rewards/margins": 13.089628219604492, "rewards/rejected": -21.895904541015625, "step": 4392 }, { "epoch": 7.05, "learning_rate": 1.2029330162504955e-07, "logits/chosen": -1.329545497894287, "logits/rejected": -1.3512823581695557, "logps/chosen": -166.4410400390625, "logps/rejected": -306.2109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.095890998840332, "rewards/margins": 13.523273468017578, "rewards/rejected": -21.619165420532227, "step": 4393 }, { "epoch": 7.05, "learning_rate": 1.2019421323820848e-07, "logits/chosen": -1.3834015130996704, "logits/rejected": -1.3561965227127075, "logps/chosen": -189.90713500976562, "logps/rejected": -316.8287353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.710467338562012, "rewards/margins": 12.092619895935059, "rewards/rejected": -22.80308723449707, "step": 4394 }, { "epoch": 7.05, "learning_rate": 1.2009512485136742e-07, "logits/chosen": -1.4336225986480713, "logits/rejected": -1.4905768632888794, "logps/chosen": -199.7512969970703, "logps/rejected": -318.2454528808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.809566497802734, "rewards/margins": 12.135251998901367, "rewards/rejected": -23.9448184967041, "step": 4395 }, { "epoch": 7.06, "learning_rate": 1.1999603646452635e-07, "logits/chosen": -1.3141175508499146, "logits/rejected": -1.464215874671936, "logps/chosen": -155.70541381835938, "logps/rejected": -278.99810791015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.354961395263672, "rewards/margins": 12.077619552612305, "rewards/rejected": -19.432580947875977, "step": 4396 }, { "epoch": 7.06, "learning_rate": 1.1989694807768528e-07, "logits/chosen": -1.4785109758377075, "logits/rejected": -1.395127296447754, "logps/chosen": -235.4036102294922, "logps/rejected": -297.4437561035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.262883186340332, "rewards/margins": 13.017287254333496, "rewards/rejected": -22.28017234802246, "step": 4397 }, { "epoch": 7.06, "learning_rate": 1.1979785969084422e-07, "logits/chosen": -1.6896998882293701, "logits/rejected": -1.651196002960205, "logps/chosen": -170.74319458007812, "logps/rejected": -334.74078369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.166314125061035, "rewards/margins": 16.158979415893555, "rewards/rejected": -22.325292587280273, "step": 4398 }, { "epoch": 7.06, "learning_rate": 1.1969877130400315e-07, "logits/chosen": -1.4963104724884033, "logits/rejected": -1.5063445568084717, "logps/chosen": -113.156982421875, "logps/rejected": -248.58995056152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.673783302307129, "rewards/margins": 13.906438827514648, "rewards/rejected": -18.58022117614746, "step": 4399 }, { "epoch": 7.06, "learning_rate": 1.195996829171621e-07, "logits/chosen": -1.4310479164123535, "logits/rejected": -1.4770886898040771, "logps/chosen": -152.43527221679688, "logps/rejected": -301.9744567871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1401872634887695, "rewards/margins": 13.811334609985352, "rewards/rejected": -20.951520919799805, "step": 4400 }, { "epoch": 7.06, "learning_rate": 1.1950059453032104e-07, "logits/chosen": -1.3650128841400146, "logits/rejected": -1.4018361568450928, "logps/chosen": -181.78424072265625, "logps/rejected": -295.04132080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.376157760620117, "rewards/margins": 11.968466758728027, "rewards/rejected": -21.34462547302246, "step": 4401 }, { "epoch": 7.07, "learning_rate": 1.1940150614347998e-07, "logits/chosen": -1.4893929958343506, "logits/rejected": -1.5523968935012817, "logps/chosen": -127.37936401367188, "logps/rejected": -279.45147705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.672035217285156, "rewards/margins": 14.61146354675293, "rewards/rejected": -20.283498764038086, "step": 4402 }, { "epoch": 7.07, "learning_rate": 1.193024177566389e-07, "logits/chosen": -1.4192248582839966, "logits/rejected": -1.5713485479354858, "logps/chosen": -147.90444946289062, "logps/rejected": -307.06976318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.014726161956787, "rewards/margins": 14.839646339416504, "rewards/rejected": -20.854373931884766, "step": 4403 }, { "epoch": 7.07, "learning_rate": 1.1920332936979786e-07, "logits/chosen": -1.5117084980010986, "logits/rejected": -1.498105764389038, "logps/chosen": -209.72364807128906, "logps/rejected": -310.7216796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.816614151000977, "rewards/margins": 11.283796310424805, "rewards/rejected": -22.100412368774414, "step": 4404 }, { "epoch": 7.07, "learning_rate": 1.1910424098295679e-07, "logits/chosen": -1.4139434099197388, "logits/rejected": -1.3381391763687134, "logps/chosen": -136.28648376464844, "logps/rejected": -291.208740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7991180419921875, "rewards/margins": 15.359137535095215, "rewards/rejected": -21.158254623413086, "step": 4405 }, { "epoch": 7.07, "learning_rate": 1.1900515259611572e-07, "logits/chosen": -1.3613402843475342, "logits/rejected": -1.3988492488861084, "logps/chosen": -144.20407104492188, "logps/rejected": -268.828857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.536291599273682, "rewards/margins": 11.424067497253418, "rewards/rejected": -17.960359573364258, "step": 4406 }, { "epoch": 7.07, "learning_rate": 1.1890606420927467e-07, "logits/chosen": -1.5548901557922363, "logits/rejected": -1.5583322048187256, "logps/chosen": -184.01783752441406, "logps/rejected": -354.754638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.044439315795898, "rewards/margins": 15.768331527709961, "rewards/rejected": -26.81277084350586, "step": 4407 }, { "epoch": 7.08, "learning_rate": 1.1880697582243362e-07, "logits/chosen": -1.4849333763122559, "logits/rejected": -1.4338558912277222, "logps/chosen": -192.2312774658203, "logps/rejected": -341.52923583984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.298298835754395, "rewards/margins": 14.357309341430664, "rewards/rejected": -23.655609130859375, "step": 4408 }, { "epoch": 7.08, "learning_rate": 1.1870788743559254e-07, "logits/chosen": -1.361864686012268, "logits/rejected": -1.4718658924102783, "logps/chosen": -179.06112670898438, "logps/rejected": -349.86395263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.71994686126709, "rewards/margins": 15.661255836486816, "rewards/rejected": -24.381202697753906, "step": 4409 }, { "epoch": 7.08, "learning_rate": 1.1860879904875148e-07, "logits/chosen": -1.431876540184021, "logits/rejected": -1.3553695678710938, "logps/chosen": -172.84556579589844, "logps/rejected": -266.1935119628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.005773544311523, "rewards/margins": 11.218879699707031, "rewards/rejected": -19.224655151367188, "step": 4410 }, { "epoch": 7.08, "learning_rate": 1.1850971066191042e-07, "logits/chosen": -1.5858781337738037, "logits/rejected": -1.5668963193893433, "logps/chosen": -136.26177978515625, "logps/rejected": -264.7826232910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.194519996643066, "rewards/margins": 13.416681289672852, "rewards/rejected": -18.6112003326416, "step": 4411 }, { "epoch": 7.08, "learning_rate": 1.1841062227506936e-07, "logits/chosen": -1.453546404838562, "logits/rejected": -1.4401999711990356, "logps/chosen": -160.61709594726562, "logps/rejected": -277.25213623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.445626258850098, "rewards/margins": 12.429258346557617, "rewards/rejected": -20.87488555908203, "step": 4412 }, { "epoch": 7.08, "learning_rate": 1.1831153388822828e-07, "logits/chosen": -1.479432225227356, "logits/rejected": -1.487130880355835, "logps/chosen": -161.880615234375, "logps/rejected": -320.90679931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.643210411071777, "rewards/margins": 14.85353946685791, "rewards/rejected": -22.496749877929688, "step": 4413 }, { "epoch": 7.09, "learning_rate": 1.1821244550138723e-07, "logits/chosen": -1.5551575422286987, "logits/rejected": -1.5682353973388672, "logps/chosen": -166.21414184570312, "logps/rejected": -285.204345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.92892599105835, "rewards/margins": 12.734193801879883, "rewards/rejected": -19.66312026977539, "step": 4414 }, { "epoch": 7.09, "learning_rate": 1.1811335711454618e-07, "logits/chosen": -1.4179699420928955, "logits/rejected": -1.3936232328414917, "logps/chosen": -106.32705688476562, "logps/rejected": -307.7656555175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2879066467285156, "rewards/margins": 19.634695053100586, "rewards/rejected": -22.9226016998291, "step": 4415 }, { "epoch": 7.09, "learning_rate": 1.1801426872770511e-07, "logits/chosen": -1.5107581615447998, "logits/rejected": -1.4787640571594238, "logps/chosen": -165.77395629882812, "logps/rejected": -344.2603759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.408445358276367, "rewards/margins": 16.36794662475586, "rewards/rejected": -24.776390075683594, "step": 4416 }, { "epoch": 7.09, "learning_rate": 1.1791518034086404e-07, "logits/chosen": -1.5861022472381592, "logits/rejected": -1.6524176597595215, "logps/chosen": -174.70314025878906, "logps/rejected": -336.43536376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.959985256195068, "rewards/margins": 15.298704147338867, "rewards/rejected": -22.258689880371094, "step": 4417 }, { "epoch": 7.09, "learning_rate": 1.1781609195402298e-07, "logits/chosen": -1.385389804840088, "logits/rejected": -1.4482723474502563, "logps/chosen": -161.1041259765625, "logps/rejected": -311.85662841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.588863372802734, "rewards/margins": 13.352644920349121, "rewards/rejected": -20.941509246826172, "step": 4418 }, { "epoch": 7.09, "learning_rate": 1.1771700356718192e-07, "logits/chosen": -1.5862985849380493, "logits/rejected": -1.560420274734497, "logps/chosen": -156.8106689453125, "logps/rejected": -259.51141357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.413906097412109, "rewards/margins": 11.655350685119629, "rewards/rejected": -19.069255828857422, "step": 4419 }, { "epoch": 7.09, "learning_rate": 1.1761791518034087e-07, "logits/chosen": -1.4887361526489258, "logits/rejected": -1.4792457818984985, "logps/chosen": -152.6226806640625, "logps/rejected": -276.1060485839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.357394695281982, "rewards/margins": 12.943361282348633, "rewards/rejected": -19.300756454467773, "step": 4420 }, { "epoch": 7.1, "learning_rate": 1.1751882679349979e-07, "logits/chosen": -1.483611822128296, "logits/rejected": -1.482194185256958, "logps/chosen": -162.638916015625, "logps/rejected": -275.14410400390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.554872512817383, "rewards/margins": 11.56513786315918, "rewards/rejected": -18.120010375976562, "step": 4421 }, { "epoch": 7.1, "learning_rate": 1.1741973840665874e-07, "logits/chosen": -1.6419750452041626, "logits/rejected": -1.6320631504058838, "logps/chosen": -102.1863021850586, "logps/rejected": -221.02142333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.883676767349243, "rewards/margins": 12.313340187072754, "rewards/rejected": -16.197017669677734, "step": 4422 }, { "epoch": 7.1, "learning_rate": 1.1732065001981767e-07, "logits/chosen": -1.4688994884490967, "logits/rejected": -1.5430920124053955, "logps/chosen": -202.72364807128906, "logps/rejected": -332.2009582519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.985612869262695, "rewards/margins": 12.8300142288208, "rewards/rejected": -22.815628051757812, "step": 4423 }, { "epoch": 7.1, "learning_rate": 1.1722156163297662e-07, "logits/chosen": -1.364062786102295, "logits/rejected": -1.5183019638061523, "logps/chosen": -152.93630981445312, "logps/rejected": -335.76849365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.837353706359863, "rewards/margins": 14.4450101852417, "rewards/rejected": -23.282363891601562, "step": 4424 }, { "epoch": 7.1, "learning_rate": 1.1712247324613554e-07, "logits/chosen": -1.3889567852020264, "logits/rejected": -1.402087688446045, "logps/chosen": -204.19203186035156, "logps/rejected": -337.25201416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.569656372070312, "rewards/margins": 13.781377792358398, "rewards/rejected": -25.351036071777344, "step": 4425 }, { "epoch": 7.1, "learning_rate": 1.1702338485929448e-07, "logits/chosen": -1.6021469831466675, "logits/rejected": -1.6306980848312378, "logps/chosen": -137.90576171875, "logps/rejected": -277.25836181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.114010810852051, "rewards/margins": 12.51942253112793, "rewards/rejected": -17.633432388305664, "step": 4426 }, { "epoch": 7.11, "learning_rate": 1.1692429647245343e-07, "logits/chosen": -1.4601731300354004, "logits/rejected": -1.3676235675811768, "logps/chosen": -186.82691955566406, "logps/rejected": -293.4724426269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.33127498626709, "rewards/margins": 12.503325462341309, "rewards/rejected": -21.83460235595703, "step": 4427 }, { "epoch": 7.11, "learning_rate": 1.1682520808561236e-07, "logits/chosen": -1.4145084619522095, "logits/rejected": -1.4080500602722168, "logps/chosen": -139.5091094970703, "logps/rejected": -256.03863525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.859859466552734, "rewards/margins": 11.615273475646973, "rewards/rejected": -17.47513198852539, "step": 4428 }, { "epoch": 7.11, "learning_rate": 1.1672611969877131e-07, "logits/chosen": -1.5423989295959473, "logits/rejected": -1.5693622827529907, "logps/chosen": -101.80856323242188, "logps/rejected": -274.6204833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9585373401641846, "rewards/margins": 15.323705673217773, "rewards/rejected": -19.282241821289062, "step": 4429 }, { "epoch": 7.11, "learning_rate": 1.1662703131193023e-07, "logits/chosen": -1.4712005853652954, "logits/rejected": -1.6331952810287476, "logps/chosen": -149.66722106933594, "logps/rejected": -301.5667419433594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.093400001525879, "rewards/margins": 14.721809387207031, "rewards/rejected": -21.815208435058594, "step": 4430 }, { "epoch": 7.11, "learning_rate": 1.1652794292508918e-07, "logits/chosen": -1.374825119972229, "logits/rejected": -1.3486108779907227, "logps/chosen": -130.68577575683594, "logps/rejected": -262.4138488769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.213830947875977, "rewards/margins": 12.66818618774414, "rewards/rejected": -18.882017135620117, "step": 4431 }, { "epoch": 7.11, "learning_rate": 1.1642885453824811e-07, "logits/chosen": -1.582627773284912, "logits/rejected": -1.585771918296814, "logps/chosen": -150.88136291503906, "logps/rejected": -252.6105194091797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.622832298278809, "rewards/margins": 10.834003448486328, "rewards/rejected": -18.45683479309082, "step": 4432 }, { "epoch": 7.12, "learning_rate": 1.1632976615140705e-07, "logits/chosen": -1.3234968185424805, "logits/rejected": -1.2927043437957764, "logps/chosen": -132.90679931640625, "logps/rejected": -291.08648681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.712241172790527, "rewards/margins": 15.087831497192383, "rewards/rejected": -19.800073623657227, "step": 4433 }, { "epoch": 7.12, "learning_rate": 1.1623067776456599e-07, "logits/chosen": -1.4405003786087036, "logits/rejected": -1.4827449321746826, "logps/chosen": -151.06802368164062, "logps/rejected": -291.8225402832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.084744930267334, "rewards/margins": 13.969551086425781, "rewards/rejected": -19.054296493530273, "step": 4434 }, { "epoch": 7.12, "learning_rate": 1.1613158937772492e-07, "logits/chosen": -1.3239552974700928, "logits/rejected": -1.3017215728759766, "logps/chosen": -158.8923797607422, "logps/rejected": -295.18505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.426975727081299, "rewards/margins": 14.278682708740234, "rewards/rejected": -20.705657958984375, "step": 4435 }, { "epoch": 7.12, "learning_rate": 1.1603250099088387e-07, "logits/chosen": -1.5890172719955444, "logits/rejected": -1.6501963138580322, "logps/chosen": -148.562744140625, "logps/rejected": -280.26556396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.415689468383789, "rewards/margins": 11.724197387695312, "rewards/rejected": -18.1398868560791, "step": 4436 }, { "epoch": 7.12, "learning_rate": 1.159334126040428e-07, "logits/chosen": -1.3769185543060303, "logits/rejected": -1.4375079870224, "logps/chosen": -161.35206604003906, "logps/rejected": -271.6094665527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.676819324493408, "rewards/margins": 12.75632381439209, "rewards/rejected": -19.433141708374023, "step": 4437 }, { "epoch": 7.12, "learning_rate": 1.1583432421720173e-07, "logits/chosen": -1.525823950767517, "logits/rejected": -1.4857747554779053, "logps/chosen": -136.2823486328125, "logps/rejected": -261.8940124511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6173248291015625, "rewards/margins": 12.202953338623047, "rewards/rejected": -17.82027816772461, "step": 4438 }, { "epoch": 7.13, "learning_rate": 1.1573523583036067e-07, "logits/chosen": -1.457177996635437, "logits/rejected": -1.4627056121826172, "logps/chosen": -195.02987670898438, "logps/rejected": -339.2269592285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.36465072631836, "rewards/margins": 14.331276893615723, "rewards/rejected": -23.6959285736084, "step": 4439 }, { "epoch": 7.13, "learning_rate": 1.1563614744351961e-07, "logits/chosen": -1.6132800579071045, "logits/rejected": -1.7027281522750854, "logps/chosen": -108.86050415039062, "logps/rejected": -281.8426513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.172152042388916, "rewards/margins": 15.87280559539795, "rewards/rejected": -20.044958114624023, "step": 4440 }, { "epoch": 7.13, "learning_rate": 1.1553705905667856e-07, "logits/chosen": -1.4645763635635376, "logits/rejected": -1.3885095119476318, "logps/chosen": -174.92025756835938, "logps/rejected": -311.6620178222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.319334030151367, "rewards/margins": 13.426764488220215, "rewards/rejected": -22.746097564697266, "step": 4441 }, { "epoch": 7.13, "learning_rate": 1.1543797066983748e-07, "logits/chosen": -1.557429552078247, "logits/rejected": -1.6758471727371216, "logps/chosen": -111.71353912353516, "logps/rejected": -316.83148193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.401368618011475, "rewards/margins": 17.210872650146484, "rewards/rejected": -21.612241744995117, "step": 4442 }, { "epoch": 7.13, "learning_rate": 1.1533888228299643e-07, "logits/chosen": -1.47796630859375, "logits/rejected": -1.521164059638977, "logps/chosen": -150.79031372070312, "logps/rejected": -311.0825500488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.764964580535889, "rewards/margins": 14.160481452941895, "rewards/rejected": -21.925445556640625, "step": 4443 }, { "epoch": 7.13, "learning_rate": 1.1523979389615536e-07, "logits/chosen": -1.5690498352050781, "logits/rejected": -1.5780504941940308, "logps/chosen": -163.36813354492188, "logps/rejected": -302.6620788574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.280340194702148, "rewards/margins": 12.635034561157227, "rewards/rejected": -18.915374755859375, "step": 4444 }, { "epoch": 7.13, "learning_rate": 1.1514070550931431e-07, "logits/chosen": -1.4979910850524902, "logits/rejected": -1.5226258039474487, "logps/chosen": -178.41664123535156, "logps/rejected": -296.42340087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.071675300598145, "rewards/margins": 11.806652069091797, "rewards/rejected": -21.878326416015625, "step": 4445 }, { "epoch": 7.14, "learning_rate": 1.1504161712247325e-07, "logits/chosen": -1.3546698093414307, "logits/rejected": -1.3058072328567505, "logps/chosen": -142.22572326660156, "logps/rejected": -278.24273681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.914264678955078, "rewards/margins": 12.88447093963623, "rewards/rejected": -19.798736572265625, "step": 4446 }, { "epoch": 7.14, "learning_rate": 1.1494252873563217e-07, "logits/chosen": -1.361534833908081, "logits/rejected": -1.3801745176315308, "logps/chosen": -154.43310546875, "logps/rejected": -275.89190673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.062540054321289, "rewards/margins": 12.86067008972168, "rewards/rejected": -19.92321014404297, "step": 4447 }, { "epoch": 7.14, "learning_rate": 1.1484344034879112e-07, "logits/chosen": -1.4785490036010742, "logits/rejected": -1.4345271587371826, "logps/chosen": -151.44174194335938, "logps/rejected": -259.41326904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.118020057678223, "rewards/margins": 10.922743797302246, "rewards/rejected": -18.04076385498047, "step": 4448 }, { "epoch": 7.14, "learning_rate": 1.1474435196195005e-07, "logits/chosen": -1.505431890487671, "logits/rejected": -1.5208663940429688, "logps/chosen": -181.7689971923828, "logps/rejected": -360.4417724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.3892822265625, "rewards/margins": 18.06121826171875, "rewards/rejected": -27.450502395629883, "step": 4449 }, { "epoch": 7.14, "learning_rate": 1.14645263575109e-07, "logits/chosen": -1.317400336265564, "logits/rejected": -1.4054421186447144, "logps/chosen": -116.97425079345703, "logps/rejected": -286.3268127441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.883481979370117, "rewards/margins": 15.518569946289062, "rewards/rejected": -20.40205192565918, "step": 4450 }, { "epoch": 7.14, "learning_rate": 1.1454617518826792e-07, "logits/chosen": -1.444591760635376, "logits/rejected": -1.3637341260910034, "logps/chosen": -205.36611938476562, "logps/rejected": -295.0605163574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.934293746948242, "rewards/margins": 11.134123802185059, "rewards/rejected": -21.068416595458984, "step": 4451 }, { "epoch": 7.15, "learning_rate": 1.1444708680142687e-07, "logits/chosen": -1.4698246717453003, "logits/rejected": -1.4161556959152222, "logps/chosen": -171.14166259765625, "logps/rejected": -298.9524230957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.6643500328063965, "rewards/margins": 12.31596565246582, "rewards/rejected": -19.980316162109375, "step": 4452 }, { "epoch": 7.15, "learning_rate": 1.1434799841458581e-07, "logits/chosen": -1.4975136518478394, "logits/rejected": -1.5147759914398193, "logps/chosen": -137.17042541503906, "logps/rejected": -250.16195678710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0959625244140625, "rewards/margins": 12.604036331176758, "rewards/rejected": -18.69999885559082, "step": 4453 }, { "epoch": 7.15, "learning_rate": 1.1424891002774475e-07, "logits/chosen": -1.633681058883667, "logits/rejected": -1.658503770828247, "logps/chosen": -152.09393310546875, "logps/rejected": -329.01788330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.7407097816467285, "rewards/margins": 18.049171447753906, "rewards/rejected": -23.789880752563477, "step": 4454 }, { "epoch": 7.15, "learning_rate": 1.1414982164090368e-07, "logits/chosen": -1.5273635387420654, "logits/rejected": -1.6087315082550049, "logps/chosen": -157.17156982421875, "logps/rejected": -362.447021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.445289134979248, "rewards/margins": 18.399507522583008, "rewards/rejected": -24.84479522705078, "step": 4455 }, { "epoch": 7.15, "learning_rate": 1.1405073325406261e-07, "logits/chosen": -1.5470829010009766, "logits/rejected": -1.5684939622879028, "logps/chosen": -148.16543579101562, "logps/rejected": -272.06072998046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.978500843048096, "rewards/margins": 12.296247482299805, "rewards/rejected": -18.274749755859375, "step": 4456 }, { "epoch": 7.15, "learning_rate": 1.1395164486722156e-07, "logits/chosen": -1.3739691972732544, "logits/rejected": -1.4242299795150757, "logps/chosen": -168.30699157714844, "logps/rejected": -318.8625183105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.307641983032227, "rewards/margins": 14.911429405212402, "rewards/rejected": -22.219070434570312, "step": 4457 }, { "epoch": 7.16, "learning_rate": 1.1385255648038049e-07, "logits/chosen": -1.441756010055542, "logits/rejected": -1.4965020418167114, "logps/chosen": -154.40444946289062, "logps/rejected": -286.1380615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.041849613189697, "rewards/margins": 13.107115745544434, "rewards/rejected": -20.14896583557129, "step": 4458 }, { "epoch": 7.16, "learning_rate": 1.1375346809353943e-07, "logits/chosen": -1.5607435703277588, "logits/rejected": -1.6428611278533936, "logps/chosen": -122.37406921386719, "logps/rejected": -306.259033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.905029296875, "rewards/margins": 15.576781272888184, "rewards/rejected": -20.481809616088867, "step": 4459 }, { "epoch": 7.16, "learning_rate": 1.1365437970669837e-07, "logits/chosen": -1.4708340167999268, "logits/rejected": -1.3857134580612183, "logps/chosen": -152.0325469970703, "logps/rejected": -266.1288146972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.03642463684082, "rewards/margins": 13.444053649902344, "rewards/rejected": -18.480478286743164, "step": 4460 }, { "epoch": 7.16, "learning_rate": 1.135552913198573e-07, "logits/chosen": -1.6505342721939087, "logits/rejected": -1.7063852548599243, "logps/chosen": -103.31838989257812, "logps/rejected": -240.4877166748047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.947434663772583, "rewards/margins": 12.046475410461426, "rewards/rejected": -15.99390983581543, "step": 4461 }, { "epoch": 7.16, "learning_rate": 1.1345620293301625e-07, "logits/chosen": -1.5462641716003418, "logits/rejected": -1.6134413480758667, "logps/chosen": -108.82008361816406, "logps/rejected": -249.1681365966797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5604941844940186, "rewards/margins": 14.273246765136719, "rewards/rejected": -17.833740234375, "step": 4462 }, { "epoch": 7.16, "learning_rate": 1.1335711454617517e-07, "logits/chosen": -1.4984015226364136, "logits/rejected": -1.5033825635910034, "logps/chosen": -168.34661865234375, "logps/rejected": -249.28280639648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.406966686248779, "rewards/margins": 10.36064338684082, "rewards/rejected": -17.767610549926758, "step": 4463 }, { "epoch": 7.17, "learning_rate": 1.1325802615933412e-07, "logits/chosen": -1.5690394639968872, "logits/rejected": -1.6479684114456177, "logps/chosen": -137.28273010253906, "logps/rejected": -326.61102294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.465676307678223, "rewards/margins": 18.75271224975586, "rewards/rejected": -24.218387603759766, "step": 4464 }, { "epoch": 7.17, "learning_rate": 1.1315893777249307e-07, "logits/chosen": -1.5465962886810303, "logits/rejected": -1.5554571151733398, "logps/chosen": -168.92828369140625, "logps/rejected": -280.0191955566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.085268020629883, "rewards/margins": 12.743964195251465, "rewards/rejected": -20.829233169555664, "step": 4465 }, { "epoch": 7.17, "learning_rate": 1.13059849385652e-07, "logits/chosen": -1.4660253524780273, "logits/rejected": -1.5401372909545898, "logps/chosen": -173.46290588378906, "logps/rejected": -332.5445556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.247304916381836, "rewards/margins": 12.407906532287598, "rewards/rejected": -21.655210494995117, "step": 4466 }, { "epoch": 7.17, "learning_rate": 1.1296076099881093e-07, "logits/chosen": -1.730126142501831, "logits/rejected": -1.6602599620819092, "logps/chosen": -120.71408081054688, "logps/rejected": -280.5617370605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.68569278717041, "rewards/margins": 16.34003448486328, "rewards/rejected": -20.025728225708008, "step": 4467 }, { "epoch": 7.17, "learning_rate": 1.1286167261196987e-07, "logits/chosen": -1.5424342155456543, "logits/rejected": -1.4818556308746338, "logps/chosen": -131.2424774169922, "logps/rejected": -276.4385681152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.558210849761963, "rewards/margins": 14.857831001281738, "rewards/rejected": -19.41604232788086, "step": 4468 }, { "epoch": 7.17, "learning_rate": 1.1276258422512881e-07, "logits/chosen": -1.384629249572754, "logits/rejected": -1.3572701215744019, "logps/chosen": -177.1447296142578, "logps/rejected": -312.53900146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.79460620880127, "rewards/margins": 13.560791015625, "rewards/rejected": -23.355398178100586, "step": 4469 }, { "epoch": 7.17, "learning_rate": 1.1266349583828775e-07, "logits/chosen": -1.5451263189315796, "logits/rejected": -1.4686702489852905, "logps/chosen": -172.42152404785156, "logps/rejected": -265.1445617675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.076458930969238, "rewards/margins": 11.794326782226562, "rewards/rejected": -18.870784759521484, "step": 4470 }, { "epoch": 7.18, "learning_rate": 1.1256440745144669e-07, "logits/chosen": -1.4290134906768799, "logits/rejected": -1.5221740007400513, "logps/chosen": -165.48565673828125, "logps/rejected": -345.3985900878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.863882064819336, "rewards/margins": 15.052719116210938, "rewards/rejected": -23.916603088378906, "step": 4471 }, { "epoch": 7.18, "learning_rate": 1.1246531906460563e-07, "logits/chosen": -1.431606411933899, "logits/rejected": -1.4714363813400269, "logps/chosen": -162.3533935546875, "logps/rejected": -279.32879638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.669551849365234, "rewards/margins": 11.912199020385742, "rewards/rejected": -20.581750869750977, "step": 4472 }, { "epoch": 7.18, "learning_rate": 1.1236623067776456e-07, "logits/chosen": -1.490502119064331, "logits/rejected": -1.43410325050354, "logps/chosen": -199.19113159179688, "logps/rejected": -330.6953430175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.520463943481445, "rewards/margins": 13.581968307495117, "rewards/rejected": -24.102432250976562, "step": 4473 }, { "epoch": 7.18, "learning_rate": 1.122671422909235e-07, "logits/chosen": -1.7095839977264404, "logits/rejected": -1.6550215482711792, "logps/chosen": -169.77423095703125, "logps/rejected": -300.9315490722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.238997936248779, "rewards/margins": 15.138381958007812, "rewards/rejected": -21.37738037109375, "step": 4474 }, { "epoch": 7.18, "learning_rate": 1.1216805390408244e-07, "logits/chosen": -1.405106544494629, "logits/rejected": -1.5599141120910645, "logps/chosen": -174.13082885742188, "logps/rejected": -302.0238952636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.711543083190918, "rewards/margins": 11.395574569702148, "rewards/rejected": -21.10711669921875, "step": 4475 }, { "epoch": 7.18, "learning_rate": 1.1206896551724137e-07, "logits/chosen": -1.4798306226730347, "logits/rejected": -1.4681893587112427, "logps/chosen": -149.54212951660156, "logps/rejected": -281.2727966308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.086391925811768, "rewards/margins": 13.407508850097656, "rewards/rejected": -19.493900299072266, "step": 4476 }, { "epoch": 7.19, "learning_rate": 1.119698771304003e-07, "logits/chosen": -1.405415654182434, "logits/rejected": -1.3924766778945923, "logps/chosen": -178.48556518554688, "logps/rejected": -271.23748779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.524254322052002, "rewards/margins": 9.85496711730957, "rewards/rejected": -17.379222869873047, "step": 4477 }, { "epoch": 7.19, "learning_rate": 1.1187078874355925e-07, "logits/chosen": -1.5242599248886108, "logits/rejected": -1.5426831245422363, "logps/chosen": -139.65513610839844, "logps/rejected": -240.63101196289062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.065365791320801, "rewards/margins": 9.095728874206543, "rewards/rejected": -15.161094665527344, "step": 4478 }, { "epoch": 7.19, "learning_rate": 1.117717003567182e-07, "logits/chosen": -1.589646339416504, "logits/rejected": -1.5648841857910156, "logps/chosen": -112.52920532226562, "logps/rejected": -248.1256866455078, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.782210350036621, "rewards/margins": 13.190191268920898, "rewards/rejected": -17.972400665283203, "step": 4479 }, { "epoch": 7.19, "learning_rate": 1.1167261196987712e-07, "logits/chosen": -1.4224445819854736, "logits/rejected": -1.4717936515808105, "logps/chosen": -177.76217651367188, "logps/rejected": -301.51837158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.192911148071289, "rewards/margins": 12.901500701904297, "rewards/rejected": -23.094411849975586, "step": 4480 }, { "epoch": 7.19, "learning_rate": 1.1157352358303606e-07, "logits/chosen": -1.455580472946167, "logits/rejected": -1.4779707193374634, "logps/chosen": -194.71875, "logps/rejected": -322.84454345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.99976634979248, "rewards/margins": 13.660626411437988, "rewards/rejected": -23.66039276123047, "step": 4481 }, { "epoch": 7.19, "learning_rate": 1.11474435196195e-07, "logits/chosen": -1.551810383796692, "logits/rejected": -1.49295175075531, "logps/chosen": -214.46377563476562, "logps/rejected": -323.32861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.0935697555542, "rewards/margins": 11.321246147155762, "rewards/rejected": -21.41481590270996, "step": 4482 }, { "epoch": 7.2, "learning_rate": 1.1137534680935394e-07, "logits/chosen": -1.5343575477600098, "logits/rejected": -1.5455467700958252, "logps/chosen": -111.05654907226562, "logps/rejected": -263.84039306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.057274341583252, "rewards/margins": 14.446259498596191, "rewards/rejected": -18.50353240966797, "step": 4483 }, { "epoch": 7.2, "learning_rate": 1.1127625842251286e-07, "logits/chosen": -1.4335224628448486, "logits/rejected": -1.347228765487671, "logps/chosen": -180.06182861328125, "logps/rejected": -308.5184631347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.961424827575684, "rewards/margins": 14.306244850158691, "rewards/rejected": -23.267671585083008, "step": 4484 }, { "epoch": 7.2, "learning_rate": 1.1117717003567181e-07, "logits/chosen": -1.3963103294372559, "logits/rejected": -1.3596796989440918, "logps/chosen": -158.6099853515625, "logps/rejected": -295.9410400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.79271125793457, "rewards/margins": 13.601800918579102, "rewards/rejected": -20.394512176513672, "step": 4485 }, { "epoch": 7.2, "learning_rate": 1.1107808164883076e-07, "logits/chosen": -1.5625072717666626, "logits/rejected": -1.4849281311035156, "logps/chosen": -125.5058364868164, "logps/rejected": -224.4513397216797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8290112018585205, "rewards/margins": 12.431656837463379, "rewards/rejected": -16.26066780090332, "step": 4486 }, { "epoch": 7.2, "learning_rate": 1.1097899326198969e-07, "logits/chosen": -1.3261325359344482, "logits/rejected": -1.3719425201416016, "logps/chosen": -182.37612915039062, "logps/rejected": -298.97308349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.615578651428223, "rewards/margins": 13.039054870605469, "rewards/rejected": -20.654632568359375, "step": 4487 }, { "epoch": 7.2, "learning_rate": 1.1087990487514862e-07, "logits/chosen": -1.6193662881851196, "logits/rejected": -1.5166277885437012, "logps/chosen": -166.0712890625, "logps/rejected": -274.9893798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.84495735168457, "rewards/margins": 13.286054611206055, "rewards/rejected": -20.131011962890625, "step": 4488 }, { "epoch": 7.21, "learning_rate": 1.1078081648830756e-07, "logits/chosen": -1.5503712892532349, "logits/rejected": -1.5378947257995605, "logps/chosen": -179.97206115722656, "logps/rejected": -331.73040771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.248779296875, "rewards/margins": 15.138498306274414, "rewards/rejected": -23.387279510498047, "step": 4489 }, { "epoch": 7.21, "learning_rate": 1.106817281014665e-07, "logits/chosen": -1.5703132152557373, "logits/rejected": -1.5212407112121582, "logps/chosen": -226.54873657226562, "logps/rejected": -354.0195007324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.438102722167969, "rewards/margins": 14.573719024658203, "rewards/rejected": -25.011821746826172, "step": 4490 }, { "epoch": 7.21, "learning_rate": 1.1058263971462545e-07, "logits/chosen": -1.420419454574585, "logits/rejected": -1.3829654455184937, "logps/chosen": -158.77938842773438, "logps/rejected": -287.09698486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.895576477050781, "rewards/margins": 12.402054786682129, "rewards/rejected": -20.297630310058594, "step": 4491 }, { "epoch": 7.21, "learning_rate": 1.1048355132778437e-07, "logits/chosen": -1.5890192985534668, "logits/rejected": -1.5488535165786743, "logps/chosen": -175.7840576171875, "logps/rejected": -326.34149169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.55239200592041, "rewards/margins": 14.421974182128906, "rewards/rejected": -21.974365234375, "step": 4492 }, { "epoch": 7.21, "learning_rate": 1.1038446294094332e-07, "logits/chosen": -1.4882782697677612, "logits/rejected": -1.5441523790359497, "logps/chosen": -176.9685821533203, "logps/rejected": -279.03997802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.820552825927734, "rewards/margins": 9.516609191894531, "rewards/rejected": -18.337162017822266, "step": 4493 }, { "epoch": 7.21, "learning_rate": 1.1028537455410225e-07, "logits/chosen": -1.631833553314209, "logits/rejected": -1.568556308746338, "logps/chosen": -205.90176391601562, "logps/rejected": -330.54571533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.624021530151367, "rewards/margins": 14.169875144958496, "rewards/rejected": -24.793895721435547, "step": 4494 }, { "epoch": 7.22, "learning_rate": 1.101862861672612e-07, "logits/chosen": -1.4587687253952026, "logits/rejected": -1.5807727575302124, "logps/chosen": -122.28618621826172, "logps/rejected": -273.742919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.651030540466309, "rewards/margins": 13.114721298217773, "rewards/rejected": -18.765750885009766, "step": 4495 }, { "epoch": 7.22, "learning_rate": 1.1008719778042013e-07, "logits/chosen": -1.5782413482666016, "logits/rejected": -1.4885382652282715, "logps/chosen": -177.33740234375, "logps/rejected": -298.45166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.096373081207275, "rewards/margins": 15.4207763671875, "rewards/rejected": -22.517148971557617, "step": 4496 }, { "epoch": 7.22, "learning_rate": 1.0998810939357906e-07, "logits/chosen": -1.474774718284607, "logits/rejected": -1.5084195137023926, "logps/chosen": -148.48782348632812, "logps/rejected": -286.3673095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.353923797607422, "rewards/margins": 14.272510528564453, "rewards/rejected": -20.626434326171875, "step": 4497 }, { "epoch": 7.22, "learning_rate": 1.0988902100673801e-07, "logits/chosen": -1.3779891729354858, "logits/rejected": -1.3686630725860596, "logps/chosen": -164.83956909179688, "logps/rejected": -302.1667175292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.558714866638184, "rewards/margins": 13.544612884521484, "rewards/rejected": -22.103328704833984, "step": 4498 }, { "epoch": 7.22, "learning_rate": 1.0978993261989694e-07, "logits/chosen": -1.6238688230514526, "logits/rejected": -1.6401951313018799, "logps/chosen": -126.90721893310547, "logps/rejected": -264.90911865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5198121070861816, "rewards/margins": 13.504267692565918, "rewards/rejected": -17.024080276489258, "step": 4499 }, { "epoch": 7.22, "learning_rate": 1.0969084423305589e-07, "logits/chosen": -1.564940094947815, "logits/rejected": -1.527721881866455, "logps/chosen": -115.49974060058594, "logps/rejected": -226.72396850585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.110440254211426, "rewards/margins": 13.147483825683594, "rewards/rejected": -16.257925033569336, "step": 4500 }, { "epoch": 7.22, "learning_rate": 1.0959175584621481e-07, "logits/chosen": -1.4791845083236694, "logits/rejected": -1.4481521844863892, "logps/chosen": -136.6319580078125, "logps/rejected": -267.668701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.383074760437012, "rewards/margins": 14.415462493896484, "rewards/rejected": -19.798538208007812, "step": 4501 }, { "epoch": 7.23, "learning_rate": 1.0949266745937376e-07, "logits/chosen": -1.544517993927002, "logits/rejected": -1.621119499206543, "logps/chosen": -180.95120239257812, "logps/rejected": -306.50030517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.7985258102417, "rewards/margins": 12.58233642578125, "rewards/rejected": -21.380861282348633, "step": 4502 }, { "epoch": 7.23, "learning_rate": 1.0939357907253269e-07, "logits/chosen": -1.4296298027038574, "logits/rejected": -1.4860605001449585, "logps/chosen": -148.98703002929688, "logps/rejected": -318.3758239746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.293027400970459, "rewards/margins": 15.708171844482422, "rewards/rejected": -23.00119972229004, "step": 4503 }, { "epoch": 7.23, "learning_rate": 1.0929449068569164e-07, "logits/chosen": -1.4504823684692383, "logits/rejected": -1.4094116687774658, "logps/chosen": -193.43287658691406, "logps/rejected": -336.18341064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.603006362915039, "rewards/margins": 14.809040069580078, "rewards/rejected": -25.412046432495117, "step": 4504 }, { "epoch": 7.23, "learning_rate": 1.0919540229885057e-07, "logits/chosen": -1.5396747589111328, "logits/rejected": -1.5017549991607666, "logps/chosen": -142.2953643798828, "logps/rejected": -299.8862609863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.979361534118652, "rewards/margins": 15.074559211730957, "rewards/rejected": -22.05392074584961, "step": 4505 }, { "epoch": 7.23, "learning_rate": 1.090963139120095e-07, "logits/chosen": -1.7103101015090942, "logits/rejected": -1.7669146060943604, "logps/chosen": -91.86190795898438, "logps/rejected": -255.84701538085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5695533752441406, "rewards/margins": 14.744503021240234, "rewards/rejected": -17.314058303833008, "step": 4506 }, { "epoch": 7.23, "learning_rate": 1.0899722552516845e-07, "logits/chosen": -1.5399473905563354, "logits/rejected": -1.6394548416137695, "logps/chosen": -163.10610961914062, "logps/rejected": -372.36083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.315756320953369, "rewards/margins": 17.667261123657227, "rewards/rejected": -23.983016967773438, "step": 4507 }, { "epoch": 7.24, "learning_rate": 1.0889813713832738e-07, "logits/chosen": -1.704698920249939, "logits/rejected": -1.7729016542434692, "logps/chosen": -154.2325439453125, "logps/rejected": -288.46533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4761810302734375, "rewards/margins": 13.711715698242188, "rewards/rejected": -19.187896728515625, "step": 4508 }, { "epoch": 7.24, "learning_rate": 1.0879904875148632e-07, "logits/chosen": -1.4802751541137695, "logits/rejected": -1.4764491319656372, "logps/chosen": -143.41993713378906, "logps/rejected": -267.24481201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.018106937408447, "rewards/margins": 13.078242301940918, "rewards/rejected": -19.096349716186523, "step": 4509 }, { "epoch": 7.24, "learning_rate": 1.0869996036464525e-07, "logits/chosen": -1.4748517274856567, "logits/rejected": -1.3933707475662231, "logps/chosen": -166.08929443359375, "logps/rejected": -261.2649230957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.192656993865967, "rewards/margins": 11.087749481201172, "rewards/rejected": -18.280405044555664, "step": 4510 }, { "epoch": 7.24, "learning_rate": 1.086008719778042e-07, "logits/chosen": -1.3483836650848389, "logits/rejected": -1.3886488676071167, "logps/chosen": -167.26980590820312, "logps/rejected": -262.3427734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.425558090209961, "rewards/margins": 10.493950843811035, "rewards/rejected": -17.919509887695312, "step": 4511 }, { "epoch": 7.24, "learning_rate": 1.0850178359096314e-07, "logits/chosen": -1.359569787979126, "logits/rejected": -1.4648995399475098, "logps/chosen": -172.8695526123047, "logps/rejected": -305.9479675292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.800233840942383, "rewards/margins": 12.48349380493164, "rewards/rejected": -21.283727645874023, "step": 4512 }, { "epoch": 7.24, "learning_rate": 1.0840269520412206e-07, "logits/chosen": -1.3028745651245117, "logits/rejected": -1.342869520187378, "logps/chosen": -170.48007202148438, "logps/rejected": -326.51397705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.185457706451416, "rewards/margins": 17.376605987548828, "rewards/rejected": -23.562063217163086, "step": 4513 }, { "epoch": 7.25, "learning_rate": 1.0830360681728101e-07, "logits/chosen": -1.5547959804534912, "logits/rejected": -1.6583147048950195, "logps/chosen": -120.38232421875, "logps/rejected": -299.99188232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.359068870544434, "rewards/margins": 15.136687278747559, "rewards/rejected": -20.495756149291992, "step": 4514 }, { "epoch": 7.25, "learning_rate": 1.0820451843043994e-07, "logits/chosen": -1.3409557342529297, "logits/rejected": -1.2920868396759033, "logps/chosen": -109.53652954101562, "logps/rejected": -222.67977905273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.969639778137207, "rewards/margins": 11.308141708374023, "rewards/rejected": -16.277782440185547, "step": 4515 }, { "epoch": 7.25, "learning_rate": 1.0810543004359889e-07, "logits/chosen": -1.3986499309539795, "logits/rejected": -1.46505868434906, "logps/chosen": -168.3353271484375, "logps/rejected": -339.884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.171112060546875, "rewards/margins": 16.213525772094727, "rewards/rejected": -24.38463592529297, "step": 4516 }, { "epoch": 7.25, "learning_rate": 1.0800634165675784e-07, "logits/chosen": -1.5528454780578613, "logits/rejected": -1.6383026838302612, "logps/chosen": -190.4738006591797, "logps/rejected": -296.63037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.604737281799316, "rewards/margins": 9.557619094848633, "rewards/rejected": -19.162355422973633, "step": 4517 }, { "epoch": 7.25, "learning_rate": 1.0790725326991676e-07, "logits/chosen": -1.3121531009674072, "logits/rejected": -1.3651115894317627, "logps/chosen": -168.50955200195312, "logps/rejected": -315.0152282714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.960724830627441, "rewards/margins": 14.044177055358887, "rewards/rejected": -22.004901885986328, "step": 4518 }, { "epoch": 7.25, "learning_rate": 1.078081648830757e-07, "logits/chosen": -1.3662840127944946, "logits/rejected": -1.4389960765838623, "logps/chosen": -175.65724182128906, "logps/rejected": -295.2848815917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.204470634460449, "rewards/margins": 12.96906852722168, "rewards/rejected": -20.173540115356445, "step": 4519 }, { "epoch": 7.26, "learning_rate": 1.0770907649623464e-07, "logits/chosen": -1.3955167531967163, "logits/rejected": -1.3213417530059814, "logps/chosen": -153.7684326171875, "logps/rejected": -293.072509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.33896017074585, "rewards/margins": 15.154254913330078, "rewards/rejected": -21.493213653564453, "step": 4520 }, { "epoch": 7.26, "learning_rate": 1.0760998810939358e-07, "logits/chosen": -1.6157830953598022, "logits/rejected": -1.6400020122528076, "logps/chosen": -174.51461791992188, "logps/rejected": -297.8363952636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.644630432128906, "rewards/margins": 11.267107009887695, "rewards/rejected": -19.9117374420166, "step": 4521 }, { "epoch": 7.26, "learning_rate": 1.075108997225525e-07, "logits/chosen": -1.5451730489730835, "logits/rejected": -1.4743618965148926, "logps/chosen": -195.43272399902344, "logps/rejected": -328.095947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.728737831115723, "rewards/margins": 14.080169677734375, "rewards/rejected": -23.80890655517578, "step": 4522 }, { "epoch": 7.26, "learning_rate": 1.0741181133571145e-07, "logits/chosen": -1.3475868701934814, "logits/rejected": -1.2962396144866943, "logps/chosen": -149.30039978027344, "logps/rejected": -262.15008544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.170733451843262, "rewards/margins": 12.130914688110352, "rewards/rejected": -20.301647186279297, "step": 4523 }, { "epoch": 7.26, "learning_rate": 1.073127229488704e-07, "logits/chosen": -1.3454378843307495, "logits/rejected": -1.4265892505645752, "logps/chosen": -117.50833129882812, "logps/rejected": -308.58428955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8180058002471924, "rewards/margins": 17.139118194580078, "rewards/rejected": -20.957124710083008, "step": 4524 }, { "epoch": 7.26, "learning_rate": 1.0721363456202933e-07, "logits/chosen": -1.5613948106765747, "logits/rejected": -1.6173979043960571, "logps/chosen": -141.45318603515625, "logps/rejected": -304.76995849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.831787586212158, "rewards/margins": 15.147156715393066, "rewards/rejected": -20.978944778442383, "step": 4525 }, { "epoch": 7.26, "learning_rate": 1.0711454617518826e-07, "logits/chosen": -1.2983345985412598, "logits/rejected": -1.3483057022094727, "logps/chosen": -174.48638916015625, "logps/rejected": -277.8638610839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.02206039428711, "rewards/margins": 11.738786697387695, "rewards/rejected": -20.760847091674805, "step": 4526 }, { "epoch": 7.27, "learning_rate": 1.070154577883472e-07, "logits/chosen": -1.3234343528747559, "logits/rejected": -1.3763048648834229, "logps/chosen": -176.82061767578125, "logps/rejected": -349.3812255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.587871551513672, "rewards/margins": 15.314958572387695, "rewards/rejected": -24.902830123901367, "step": 4527 }, { "epoch": 7.27, "learning_rate": 1.0691636940150614e-07, "logits/chosen": -1.5075929164886475, "logits/rejected": -1.4846445322036743, "logps/chosen": -150.1634521484375, "logps/rejected": -278.40765380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.550859451293945, "rewards/margins": 14.760583877563477, "rewards/rejected": -20.311443328857422, "step": 4528 }, { "epoch": 7.27, "learning_rate": 1.0681728101466507e-07, "logits/chosen": -1.4691309928894043, "logits/rejected": -1.4017503261566162, "logps/chosen": -137.88058471679688, "logps/rejected": -253.79092407226562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.19529914855957, "rewards/margins": 12.694158554077148, "rewards/rejected": -18.88945770263672, "step": 4529 }, { "epoch": 7.27, "learning_rate": 1.0671819262782401e-07, "logits/chosen": -1.5551271438598633, "logits/rejected": -1.5933513641357422, "logps/chosen": -121.57408905029297, "logps/rejected": -287.60858154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.675888538360596, "rewards/margins": 14.336928367614746, "rewards/rejected": -20.0128173828125, "step": 4530 }, { "epoch": 7.27, "learning_rate": 1.0661910424098295e-07, "logits/chosen": -1.58342444896698, "logits/rejected": -1.6011972427368164, "logps/chosen": -161.8117218017578, "logps/rejected": -294.4410095214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.957898139953613, "rewards/margins": 13.859831809997559, "rewards/rejected": -20.817729949951172, "step": 4531 }, { "epoch": 7.27, "learning_rate": 1.0652001585414189e-07, "logits/chosen": -1.4810792207717896, "logits/rejected": -1.5193748474121094, "logps/chosen": -138.24905395507812, "logps/rejected": -298.7330627441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.781042098999023, "rewards/margins": 14.71879768371582, "rewards/rejected": -22.499839782714844, "step": 4532 }, { "epoch": 7.28, "learning_rate": 1.0642092746730083e-07, "logits/chosen": -1.4313104152679443, "logits/rejected": -1.4391809701919556, "logps/chosen": -171.56979370117188, "logps/rejected": -276.6543884277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.163615226745605, "rewards/margins": 11.0728120803833, "rewards/rejected": -19.236427307128906, "step": 4533 }, { "epoch": 7.28, "learning_rate": 1.0632183908045975e-07, "logits/chosen": -1.324269413948059, "logits/rejected": -1.2632946968078613, "logps/chosen": -132.20773315429688, "logps/rejected": -264.88067626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.345478534698486, "rewards/margins": 13.314757347106934, "rewards/rejected": -18.660236358642578, "step": 4534 }, { "epoch": 7.28, "learning_rate": 1.062227506936187e-07, "logits/chosen": -1.344736933708191, "logits/rejected": -1.4119423627853394, "logps/chosen": -148.13734436035156, "logps/rejected": -285.03277587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.324408054351807, "rewards/margins": 12.96264362335205, "rewards/rejected": -19.287052154541016, "step": 4535 }, { "epoch": 7.28, "learning_rate": 1.0612366230677765e-07, "logits/chosen": -1.5363234281539917, "logits/rejected": -1.5740801095962524, "logps/chosen": -166.61517333984375, "logps/rejected": -314.04296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.606089115142822, "rewards/margins": 12.983720779418945, "rewards/rejected": -20.589811325073242, "step": 4536 }, { "epoch": 7.28, "learning_rate": 1.0602457391993658e-07, "logits/chosen": -1.5413486957550049, "logits/rejected": -1.5191783905029297, "logps/chosen": -132.48916625976562, "logps/rejected": -321.44219970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.856533050537109, "rewards/margins": 16.75694465637207, "rewards/rejected": -22.61347770690918, "step": 4537 }, { "epoch": 7.28, "learning_rate": 1.0592548553309553e-07, "logits/chosen": -1.3387703895568848, "logits/rejected": -1.4038896560668945, "logps/chosen": -167.7861785888672, "logps/rejected": -330.4862060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.973254680633545, "rewards/margins": 15.654348373413086, "rewards/rejected": -23.627605438232422, "step": 4538 }, { "epoch": 7.29, "learning_rate": 1.0582639714625445e-07, "logits/chosen": -1.5405657291412354, "logits/rejected": -1.5298421382904053, "logps/chosen": -200.7737579345703, "logps/rejected": -303.3057556152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.965378761291504, "rewards/margins": 12.612499237060547, "rewards/rejected": -21.577877044677734, "step": 4539 }, { "epoch": 7.29, "learning_rate": 1.057273087594134e-07, "logits/chosen": -1.3281426429748535, "logits/rejected": -1.3116447925567627, "logps/chosen": -148.94332885742188, "logps/rejected": -285.2530212402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.964744567871094, "rewards/margins": 12.591866493225098, "rewards/rejected": -20.556612014770508, "step": 4540 }, { "epoch": 7.29, "learning_rate": 1.0562822037257233e-07, "logits/chosen": -1.510664701461792, "logits/rejected": -1.4371854066848755, "logps/chosen": -196.8003387451172, "logps/rejected": -310.15850830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.019740104675293, "rewards/margins": 12.42959213256836, "rewards/rejected": -21.44933319091797, "step": 4541 }, { "epoch": 7.29, "learning_rate": 1.0552913198573127e-07, "logits/chosen": -1.7364600896835327, "logits/rejected": -1.7145445346832275, "logps/chosen": -147.9008026123047, "logps/rejected": -271.12860107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.881474018096924, "rewards/margins": 14.076520919799805, "rewards/rejected": -17.95799446105957, "step": 4542 }, { "epoch": 7.29, "learning_rate": 1.0543004359889021e-07, "logits/chosen": -1.549738883972168, "logits/rejected": -1.4874011278152466, "logps/chosen": -200.76097106933594, "logps/rejected": -288.91455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.07199764251709, "rewards/margins": 11.517528533935547, "rewards/rejected": -20.58952522277832, "step": 4543 }, { "epoch": 7.29, "learning_rate": 1.0533095521204914e-07, "logits/chosen": -1.4343700408935547, "logits/rejected": -1.4605865478515625, "logps/chosen": -199.68930053710938, "logps/rejected": -370.81854248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.872848510742188, "rewards/margins": 15.93410587310791, "rewards/rejected": -24.80695343017578, "step": 4544 }, { "epoch": 7.3, "learning_rate": 1.0523186682520809e-07, "logits/chosen": -1.3847874402999878, "logits/rejected": -1.3588982820510864, "logps/chosen": -146.98306274414062, "logps/rejected": -278.8884582519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.362154006958008, "rewards/margins": 13.137701034545898, "rewards/rejected": -19.499855041503906, "step": 4545 }, { "epoch": 7.3, "learning_rate": 1.0513277843836702e-07, "logits/chosen": -1.456251621246338, "logits/rejected": -1.4585338830947876, "logps/chosen": -116.31632995605469, "logps/rejected": -228.25286865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.375673532485962, "rewards/margins": 12.413328170776367, "rewards/rejected": -15.78900146484375, "step": 4546 }, { "epoch": 7.3, "learning_rate": 1.0503369005152595e-07, "logits/chosen": -1.5672576427459717, "logits/rejected": -1.5324070453643799, "logps/chosen": -163.9290771484375, "logps/rejected": -300.9716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.088903427124023, "rewards/margins": 14.493488311767578, "rewards/rejected": -21.5823917388916, "step": 4547 }, { "epoch": 7.3, "learning_rate": 1.0493460166468489e-07, "logits/chosen": -1.5024091005325317, "logits/rejected": -1.496657371520996, "logps/chosen": -190.49754333496094, "logps/rejected": -340.88275146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.279966354370117, "rewards/margins": 16.8936824798584, "rewards/rejected": -25.173648834228516, "step": 4548 }, { "epoch": 7.3, "learning_rate": 1.0483551327784383e-07, "logits/chosen": -1.5514261722564697, "logits/rejected": -1.46091890335083, "logps/chosen": -193.9055938720703, "logps/rejected": -278.6861267089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.841743469238281, "rewards/margins": 11.92888355255127, "rewards/rejected": -18.770626068115234, "step": 4549 }, { "epoch": 7.3, "learning_rate": 1.0473642489100278e-07, "logits/chosen": -1.5595101118087769, "logits/rejected": -1.544304370880127, "logps/chosen": -182.76483154296875, "logps/rejected": -342.438232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.267621994018555, "rewards/margins": 15.532920837402344, "rewards/rejected": -23.80054473876953, "step": 4550 }, { "epoch": 7.3, "learning_rate": 1.046373365041617e-07, "logits/chosen": -1.5047022104263306, "logits/rejected": -1.5998475551605225, "logps/chosen": -145.42124938964844, "logps/rejected": -298.83013916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.309586524963379, "rewards/margins": 13.188789367675781, "rewards/rejected": -20.498374938964844, "step": 4551 }, { "epoch": 7.31, "learning_rate": 1.0453824811732065e-07, "logits/chosen": -1.7066656351089478, "logits/rejected": -1.7606239318847656, "logps/chosen": -80.27839660644531, "logps/rejected": -244.3646697998047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7832143306732178, "rewards/margins": 15.396220207214355, "rewards/rejected": -17.179433822631836, "step": 4552 }, { "epoch": 7.31, "learning_rate": 1.0443915973047958e-07, "logits/chosen": -1.4586191177368164, "logits/rejected": -1.4785007238388062, "logps/chosen": -126.30056762695312, "logps/rejected": -260.38916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.39576530456543, "rewards/margins": 12.397372245788574, "rewards/rejected": -18.793136596679688, "step": 4553 }, { "epoch": 7.31, "learning_rate": 1.0434007134363853e-07, "logits/chosen": -1.6960012912750244, "logits/rejected": -1.6539280414581299, "logps/chosen": -200.55091857910156, "logps/rejected": -312.7001647949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.723010063171387, "rewards/margins": 11.762835502624512, "rewards/rejected": -21.4858455657959, "step": 4554 }, { "epoch": 7.31, "learning_rate": 1.0424098295679745e-07, "logits/chosen": -1.4755817651748657, "logits/rejected": -1.5005830526351929, "logps/chosen": -157.73162841796875, "logps/rejected": -268.2693786621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.081024169921875, "rewards/margins": 11.098738670349121, "rewards/rejected": -19.179763793945312, "step": 4555 }, { "epoch": 7.31, "learning_rate": 1.0414189456995639e-07, "logits/chosen": -1.4595191478729248, "logits/rejected": -1.4058254957199097, "logps/chosen": -147.6494140625, "logps/rejected": -278.7900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.261093616485596, "rewards/margins": 13.954341888427734, "rewards/rejected": -19.215436935424805, "step": 4556 }, { "epoch": 7.31, "learning_rate": 1.0404280618311534e-07, "logits/chosen": -1.476191759109497, "logits/rejected": -1.4674546718597412, "logps/chosen": -93.52493286132812, "logps/rejected": -237.82281494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2178454399108887, "rewards/margins": 14.413999557495117, "rewards/rejected": -17.63184356689453, "step": 4557 }, { "epoch": 7.32, "learning_rate": 1.0394371779627427e-07, "logits/chosen": -1.6202484369277954, "logits/rejected": -1.5381954908370972, "logps/chosen": -220.19317626953125, "logps/rejected": -355.7698974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.304471015930176, "rewards/margins": 14.927286148071289, "rewards/rejected": -27.23175621032715, "step": 4558 }, { "epoch": 7.32, "learning_rate": 1.038446294094332e-07, "logits/chosen": -1.4706439971923828, "logits/rejected": -1.5008037090301514, "logps/chosen": -156.0191650390625, "logps/rejected": -329.9837951660156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.74043083190918, "rewards/margins": 16.88787269592285, "rewards/rejected": -23.62830352783203, "step": 4559 }, { "epoch": 7.32, "learning_rate": 1.0374554102259214e-07, "logits/chosen": -1.6279335021972656, "logits/rejected": -1.5935752391815186, "logps/chosen": -160.97047424316406, "logps/rejected": -302.80126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.261077880859375, "rewards/margins": 14.293556213378906, "rewards/rejected": -21.55463409423828, "step": 4560 }, { "epoch": 7.32, "learning_rate": 1.0364645263575109e-07, "logits/chosen": -1.4218772649765015, "logits/rejected": -1.4288113117218018, "logps/chosen": -162.15872192382812, "logps/rejected": -343.804443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.785725593566895, "rewards/margins": 16.83348846435547, "rewards/rejected": -25.619213104248047, "step": 4561 }, { "epoch": 7.32, "learning_rate": 1.0354736424891003e-07, "logits/chosen": -1.6451842784881592, "logits/rejected": -1.48774254322052, "logps/chosen": -175.45468139648438, "logps/rejected": -301.7014465332031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.250351905822754, "rewards/margins": 15.493841171264648, "rewards/rejected": -21.744192123413086, "step": 4562 }, { "epoch": 7.32, "learning_rate": 1.0344827586206897e-07, "logits/chosen": -1.5643247365951538, "logits/rejected": -1.5147581100463867, "logps/chosen": -183.40036010742188, "logps/rejected": -298.45184326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.422131538391113, "rewards/margins": 13.737112998962402, "rewards/rejected": -23.159244537353516, "step": 4563 }, { "epoch": 7.33, "learning_rate": 1.033491874752279e-07, "logits/chosen": -1.5338215827941895, "logits/rejected": -1.4537558555603027, "logps/chosen": -145.86233520507812, "logps/rejected": -272.96905517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.023224830627441, "rewards/margins": 13.257987976074219, "rewards/rejected": -19.281213760375977, "step": 4564 }, { "epoch": 7.33, "learning_rate": 1.0325009908838683e-07, "logits/chosen": -1.290743112564087, "logits/rejected": -1.3378586769104004, "logps/chosen": -159.01620483398438, "logps/rejected": -281.5267333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.535933017730713, "rewards/margins": 12.898370742797852, "rewards/rejected": -20.434303283691406, "step": 4565 }, { "epoch": 7.33, "learning_rate": 1.0315101070154578e-07, "logits/chosen": -1.4255578517913818, "logits/rejected": -1.515138030052185, "logps/chosen": -172.68563842773438, "logps/rejected": -323.5512390136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.20240592956543, "rewards/margins": 13.632437705993652, "rewards/rejected": -23.834842681884766, "step": 4566 }, { "epoch": 7.33, "learning_rate": 1.0305192231470471e-07, "logits/chosen": -1.6219537258148193, "logits/rejected": -1.6204873323440552, "logps/chosen": -101.15103912353516, "logps/rejected": -214.45249938964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5746665000915527, "rewards/margins": 11.231271743774414, "rewards/rejected": -13.805936813354492, "step": 4567 }, { "epoch": 7.33, "learning_rate": 1.0295283392786365e-07, "logits/chosen": -1.4064929485321045, "logits/rejected": -1.454853892326355, "logps/chosen": -147.9895782470703, "logps/rejected": -322.1719970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.673306465148926, "rewards/margins": 14.65631103515625, "rewards/rejected": -21.32961654663086, "step": 4568 }, { "epoch": 7.33, "learning_rate": 1.0285374554102259e-07, "logits/chosen": -1.3610339164733887, "logits/rejected": -1.3926249742507935, "logps/chosen": -171.81619262695312, "logps/rejected": -315.53363037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.26201343536377, "rewards/margins": 13.780435562133789, "rewards/rejected": -22.042449951171875, "step": 4569 }, { "epoch": 7.34, "learning_rate": 1.0275465715418153e-07, "logits/chosen": -1.5401015281677246, "logits/rejected": -1.4695796966552734, "logps/chosen": -177.1994171142578, "logps/rejected": -292.0560607910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4734416007995605, "rewards/margins": 15.219650268554688, "rewards/rejected": -20.693090438842773, "step": 4570 }, { "epoch": 7.34, "learning_rate": 1.0265556876734047e-07, "logits/chosen": -1.5871025323867798, "logits/rejected": -1.5312435626983643, "logps/chosen": -101.41727447509766, "logps/rejected": -265.82562255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.975416660308838, "rewards/margins": 15.130887985229492, "rewards/rejected": -18.106304168701172, "step": 4571 }, { "epoch": 7.34, "learning_rate": 1.0255648038049939e-07, "logits/chosen": -1.5037200450897217, "logits/rejected": -1.5501196384429932, "logps/chosen": -165.11265563964844, "logps/rejected": -298.3482666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.323375701904297, "rewards/margins": 12.058636665344238, "rewards/rejected": -20.38201332092285, "step": 4572 }, { "epoch": 7.34, "learning_rate": 1.0245739199365834e-07, "logits/chosen": -1.3772530555725098, "logits/rejected": -1.414858341217041, "logps/chosen": -145.29922485351562, "logps/rejected": -272.51239013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.534767150878906, "rewards/margins": 12.956623077392578, "rewards/rejected": -19.491390228271484, "step": 4573 }, { "epoch": 7.34, "learning_rate": 1.0235830360681727e-07, "logits/chosen": -1.5829859972000122, "logits/rejected": -1.6160728931427002, "logps/chosen": -99.80282592773438, "logps/rejected": -242.61538696289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.256227731704712, "rewards/margins": 13.576536178588867, "rewards/rejected": -16.832763671875, "step": 4574 }, { "epoch": 7.34, "learning_rate": 1.0225921521997622e-07, "logits/chosen": -1.4290008544921875, "logits/rejected": -1.4821441173553467, "logps/chosen": -185.411865234375, "logps/rejected": -335.80096435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.4811372756958, "rewards/margins": 14.69566535949707, "rewards/rejected": -24.176803588867188, "step": 4575 }, { "epoch": 7.35, "learning_rate": 1.0216012683313515e-07, "logits/chosen": -1.4473015069961548, "logits/rejected": -1.419636607170105, "logps/chosen": -185.844482421875, "logps/rejected": -322.7942199707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.750572204589844, "rewards/margins": 13.919647216796875, "rewards/rejected": -22.67021942138672, "step": 4576 }, { "epoch": 7.35, "learning_rate": 1.0206103844629408e-07, "logits/chosen": -1.3833993673324585, "logits/rejected": -1.477108120918274, "logps/chosen": -124.96116638183594, "logps/rejected": -262.325439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.016386032104492, "rewards/margins": 11.366668701171875, "rewards/rejected": -17.383054733276367, "step": 4577 }, { "epoch": 7.35, "learning_rate": 1.0196195005945303e-07, "logits/chosen": -1.5807570219039917, "logits/rejected": -1.6865369081497192, "logps/chosen": -147.71170043945312, "logps/rejected": -309.3811340332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.04744815826416, "rewards/margins": 14.710670471191406, "rewards/rejected": -21.75811767578125, "step": 4578 }, { "epoch": 7.35, "learning_rate": 1.0186286167261196e-07, "logits/chosen": -1.4335203170776367, "logits/rejected": -1.3448935747146606, "logps/chosen": -186.50392150878906, "logps/rejected": -309.90118408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.206847190856934, "rewards/margins": 14.435318946838379, "rewards/rejected": -24.642166137695312, "step": 4579 }, { "epoch": 7.35, "learning_rate": 1.017637732857709e-07, "logits/chosen": -1.5852086544036865, "logits/rejected": -1.5058685541152954, "logps/chosen": -183.62774658203125, "logps/rejected": -300.19390869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.709893226623535, "rewards/margins": 13.424311637878418, "rewards/rejected": -21.13420295715332, "step": 4580 }, { "epoch": 7.35, "learning_rate": 1.0166468489892983e-07, "logits/chosen": -1.6255788803100586, "logits/rejected": -1.6128435134887695, "logps/chosen": -114.23258972167969, "logps/rejected": -239.925537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.200597763061523, "rewards/margins": 12.705343246459961, "rewards/rejected": -16.905941009521484, "step": 4581 }, { "epoch": 7.35, "learning_rate": 1.0156559651208878e-07, "logits/chosen": -1.5831925868988037, "logits/rejected": -1.6156798601150513, "logps/chosen": -143.03834533691406, "logps/rejected": -267.6253662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.068941593170166, "rewards/margins": 11.544445037841797, "rewards/rejected": -17.613386154174805, "step": 4582 }, { "epoch": 7.36, "learning_rate": 1.0146650812524772e-07, "logits/chosen": -1.5583727359771729, "logits/rejected": -1.6269680261611938, "logps/chosen": -118.80663299560547, "logps/rejected": -235.78518676757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.354904651641846, "rewards/margins": 11.892274856567383, "rewards/rejected": -16.247180938720703, "step": 4583 }, { "epoch": 7.36, "learning_rate": 1.0136741973840664e-07, "logits/chosen": -1.5381572246551514, "logits/rejected": -1.6095712184906006, "logps/chosen": -170.47564697265625, "logps/rejected": -301.60028076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.293197154998779, "rewards/margins": 14.034934997558594, "rewards/rejected": -21.32813262939453, "step": 4584 }, { "epoch": 7.36, "learning_rate": 1.0126833135156559e-07, "logits/chosen": -1.5749778747558594, "logits/rejected": -1.5365815162658691, "logps/chosen": -151.7555694580078, "logps/rejected": -274.4482727050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.050795078277588, "rewards/margins": 13.092572212219238, "rewards/rejected": -20.143367767333984, "step": 4585 }, { "epoch": 7.36, "learning_rate": 1.0116924296472452e-07, "logits/chosen": -1.4875764846801758, "logits/rejected": -1.4846076965332031, "logps/chosen": -118.90524291992188, "logps/rejected": -280.4741516113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.814988613128662, "rewards/margins": 15.438146591186523, "rewards/rejected": -20.253135681152344, "step": 4586 }, { "epoch": 7.36, "learning_rate": 1.0107015457788347e-07, "logits/chosen": -1.3939523696899414, "logits/rejected": -1.5155441761016846, "logps/chosen": -137.75546264648438, "logps/rejected": -325.9117126464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.974702835083008, "rewards/margins": 15.131844520568848, "rewards/rejected": -21.106548309326172, "step": 4587 }, { "epoch": 7.36, "learning_rate": 1.0097106619104242e-07, "logits/chosen": -1.5683947801589966, "logits/rejected": -1.6654645204544067, "logps/chosen": -132.42120361328125, "logps/rejected": -314.8275146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.167524814605713, "rewards/margins": 16.78534698486328, "rewards/rejected": -20.952869415283203, "step": 4588 }, { "epoch": 7.37, "learning_rate": 1.0087197780420134e-07, "logits/chosen": -1.4684202671051025, "logits/rejected": -1.4627599716186523, "logps/chosen": -105.77696228027344, "logps/rejected": -261.33148193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4249725341796875, "rewards/margins": 15.401926040649414, "rewards/rejected": -18.82689666748047, "step": 4589 }, { "epoch": 7.37, "learning_rate": 1.0077288941736028e-07, "logits/chosen": -1.491817831993103, "logits/rejected": -1.4655892848968506, "logps/chosen": -122.58031463623047, "logps/rejected": -235.76451110839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.757542610168457, "rewards/margins": 11.746440887451172, "rewards/rejected": -16.503984451293945, "step": 4590 }, { "epoch": 7.37, "learning_rate": 1.0067380103051922e-07, "logits/chosen": -1.2601001262664795, "logits/rejected": -1.276187539100647, "logps/chosen": -138.05221557617188, "logps/rejected": -254.05972290039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.337490081787109, "rewards/margins": 11.425405502319336, "rewards/rejected": -17.762895584106445, "step": 4591 }, { "epoch": 7.37, "learning_rate": 1.0057471264367816e-07, "logits/chosen": -1.4975332021713257, "logits/rejected": -1.372803807258606, "logps/chosen": -228.57313537597656, "logps/rejected": -294.83441162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.978250503540039, "rewards/margins": 13.643387794494629, "rewards/rejected": -21.62163734436035, "step": 4592 }, { "epoch": 7.37, "learning_rate": 1.0047562425683708e-07, "logits/chosen": -1.3848469257354736, "logits/rejected": -1.416968584060669, "logps/chosen": -145.54074096679688, "logps/rejected": -272.03192138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.087255477905273, "rewards/margins": 11.05277156829834, "rewards/rejected": -19.140026092529297, "step": 4593 }, { "epoch": 7.37, "learning_rate": 1.0037653586999603e-07, "logits/chosen": -1.6508409976959229, "logits/rejected": -1.6827691793441772, "logps/chosen": -155.68209838867188, "logps/rejected": -290.50628662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.884305953979492, "rewards/margins": 13.210468292236328, "rewards/rejected": -20.094776153564453, "step": 4594 }, { "epoch": 7.38, "learning_rate": 1.0027744748315498e-07, "logits/chosen": -1.4949091672897339, "logits/rejected": -1.6461219787597656, "logps/chosen": -78.65393829345703, "logps/rejected": -254.05239868164062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7837982177734375, "rewards/margins": 15.541844367980957, "rewards/rejected": -17.325641632080078, "step": 4595 }, { "epoch": 7.38, "learning_rate": 1.0017835909631391e-07, "logits/chosen": -1.311041235923767, "logits/rejected": -1.318776249885559, "logps/chosen": -196.93667602539062, "logps/rejected": -361.8273010253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.591144561767578, "rewards/margins": 16.265777587890625, "rewards/rejected": -26.856922149658203, "step": 4596 }, { "epoch": 7.38, "learning_rate": 1.0007927070947284e-07, "logits/chosen": -1.6042587757110596, "logits/rejected": -1.419244647026062, "logps/chosen": -133.34585571289062, "logps/rejected": -225.91256713867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.113045692443848, "rewards/margins": 11.112420082092285, "rewards/rejected": -16.225465774536133, "step": 4597 }, { "epoch": 7.38, "learning_rate": 9.998018232263178e-08, "logits/chosen": -1.3483706712722778, "logits/rejected": -1.3393628597259521, "logps/chosen": -123.93716430664062, "logps/rejected": -263.3916320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5801825523376465, "rewards/margins": 15.11819839477539, "rewards/rejected": -19.698379516601562, "step": 4598 }, { "epoch": 7.38, "learning_rate": 9.988109393579072e-08, "logits/chosen": -1.6670691967010498, "logits/rejected": -1.6913690567016602, "logps/chosen": -131.0694580078125, "logps/rejected": -269.1467590332031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.425271034240723, "rewards/margins": 13.69382095336914, "rewards/rejected": -19.119091033935547, "step": 4599 }, { "epoch": 7.38, "learning_rate": 9.978200554894966e-08, "logits/chosen": -1.654115080833435, "logits/rejected": -1.6241618394851685, "logps/chosen": -159.9171142578125, "logps/rejected": -323.47332763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.278756141662598, "rewards/margins": 15.391847610473633, "rewards/rejected": -22.670604705810547, "step": 4600 }, { "epoch": 7.39, "learning_rate": 9.968291716210859e-08, "logits/chosen": -1.4803158044815063, "logits/rejected": -1.5173786878585815, "logps/chosen": -179.74053955078125, "logps/rejected": -300.43267822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.462064743041992, "rewards/margins": 12.35085678100586, "rewards/rejected": -20.81292152404785, "step": 4601 }, { "epoch": 7.39, "learning_rate": 9.958382877526754e-08, "logits/chosen": -1.4616618156433105, "logits/rejected": -1.4953250885009766, "logps/chosen": -110.71243286132812, "logps/rejected": -305.7099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.52284049987793, "rewards/margins": 18.742788314819336, "rewards/rejected": -23.2656307220459, "step": 4602 }, { "epoch": 7.39, "learning_rate": 9.948474038842647e-08, "logits/chosen": -1.4002530574798584, "logits/rejected": -1.476383924484253, "logps/chosen": -145.38536071777344, "logps/rejected": -284.5558776855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.075552940368652, "rewards/margins": 13.55805492401123, "rewards/rejected": -19.633609771728516, "step": 4603 }, { "epoch": 7.39, "learning_rate": 9.938565200158542e-08, "logits/chosen": -1.3701430559158325, "logits/rejected": -1.4366319179534912, "logps/chosen": -102.60385131835938, "logps/rejected": -224.62979125976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.866495370864868, "rewards/margins": 11.594937324523926, "rewards/rejected": -15.461433410644531, "step": 4604 }, { "epoch": 7.39, "learning_rate": 9.928656361474434e-08, "logits/chosen": -1.5001806020736694, "logits/rejected": -1.4414132833480835, "logps/chosen": -103.25643920898438, "logps/rejected": -272.20819091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9184024333953857, "rewards/margins": 14.442022323608398, "rewards/rejected": -18.360424041748047, "step": 4605 }, { "epoch": 7.39, "learning_rate": 9.918747522790328e-08, "logits/chosen": -1.6813652515411377, "logits/rejected": -1.6542198657989502, "logps/chosen": -127.0543212890625, "logps/rejected": -271.75347900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4273295402526855, "rewards/margins": 15.61043930053711, "rewards/rejected": -20.037769317626953, "step": 4606 }, { "epoch": 7.39, "learning_rate": 9.908838684106223e-08, "logits/chosen": -1.5757476091384888, "logits/rejected": -1.6741224527359009, "logps/chosen": -169.7532196044922, "logps/rejected": -359.2912902832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.272216796875, "rewards/margins": 17.02829360961914, "rewards/rejected": -25.30051040649414, "step": 4607 }, { "epoch": 7.4, "learning_rate": 9.898929845422116e-08, "logits/chosen": -1.4953793287277222, "logits/rejected": -1.4364008903503418, "logps/chosen": -158.53517150878906, "logps/rejected": -277.48822021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.075345993041992, "rewards/margins": 13.118927955627441, "rewards/rejected": -20.19427490234375, "step": 4608 }, { "epoch": 7.4, "learning_rate": 9.889021006738011e-08, "logits/chosen": -1.6366379261016846, "logits/rejected": -1.6755564212799072, "logps/chosen": -107.71150970458984, "logps/rejected": -275.9061279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7229197025299072, "rewards/margins": 16.717426300048828, "rewards/rejected": -20.440345764160156, "step": 4609 }, { "epoch": 7.4, "learning_rate": 9.879112168053903e-08, "logits/chosen": -1.3992807865142822, "logits/rejected": -1.3670930862426758, "logps/chosen": -183.82237243652344, "logps/rejected": -285.89202880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.305797576904297, "rewards/margins": 11.150449752807617, "rewards/rejected": -20.456247329711914, "step": 4610 }, { "epoch": 7.4, "learning_rate": 9.869203329369798e-08, "logits/chosen": -1.4467887878417969, "logits/rejected": -1.3532929420471191, "logps/chosen": -162.7723846435547, "logps/rejected": -272.2936096191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.876744270324707, "rewards/margins": 11.827997207641602, "rewards/rejected": -18.704742431640625, "step": 4611 }, { "epoch": 7.4, "learning_rate": 9.859294490685691e-08, "logits/chosen": -1.5649526119232178, "logits/rejected": -1.5034645795822144, "logps/chosen": -159.1440887451172, "logps/rejected": -271.9767150878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.303107261657715, "rewards/margins": 13.57874870300293, "rewards/rejected": -20.88185691833496, "step": 4612 }, { "epoch": 7.4, "learning_rate": 9.849385652001586e-08, "logits/chosen": -1.5682395696640015, "logits/rejected": -1.6176481246948242, "logps/chosen": -174.67330932617188, "logps/rejected": -280.3492431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.383489608764648, "rewards/margins": 11.456134796142578, "rewards/rejected": -17.839624404907227, "step": 4613 }, { "epoch": 7.41, "learning_rate": 9.839476813317479e-08, "logits/chosen": -1.3550658226013184, "logits/rejected": -1.3531510829925537, "logps/chosen": -174.87286376953125, "logps/rejected": -314.5924072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.458209991455078, "rewards/margins": 14.773473739624023, "rewards/rejected": -24.2316837310791, "step": 4614 }, { "epoch": 7.41, "learning_rate": 9.829567974633372e-08, "logits/chosen": -1.4645625352859497, "logits/rejected": -1.3775349855422974, "logps/chosen": -148.8585205078125, "logps/rejected": -305.76904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.895204067230225, "rewards/margins": 15.411626815795898, "rewards/rejected": -21.30683135986328, "step": 4615 }, { "epoch": 7.41, "learning_rate": 9.819659135949267e-08, "logits/chosen": -1.417275309562683, "logits/rejected": -1.3251230716705322, "logps/chosen": -191.51023864746094, "logps/rejected": -289.1645202636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.665304183959961, "rewards/margins": 11.91787338256836, "rewards/rejected": -21.58317756652832, "step": 4616 }, { "epoch": 7.41, "learning_rate": 9.80975029726516e-08, "logits/chosen": -1.4717594385147095, "logits/rejected": -1.4660168886184692, "logps/chosen": -109.50291442871094, "logps/rejected": -238.7255401611328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.930408477783203, "rewards/margins": 12.910100936889648, "rewards/rejected": -17.840511322021484, "step": 4617 }, { "epoch": 7.41, "learning_rate": 9.799841458581054e-08, "logits/chosen": -1.477567195892334, "logits/rejected": -1.6141666173934937, "logps/chosen": -111.9709701538086, "logps/rejected": -303.62213134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.842939853668213, "rewards/margins": 16.392250061035156, "rewards/rejected": -21.23518943786621, "step": 4618 }, { "epoch": 7.41, "learning_rate": 9.789932619896947e-08, "logits/chosen": -1.5361912250518799, "logits/rejected": -1.5514413118362427, "logps/chosen": -175.3197784423828, "logps/rejected": -304.64739990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.306539535522461, "rewards/margins": 12.898799896240234, "rewards/rejected": -23.205337524414062, "step": 4619 }, { "epoch": 7.42, "learning_rate": 9.780023781212842e-08, "logits/chosen": -1.5513657331466675, "logits/rejected": -1.5928735733032227, "logps/chosen": -168.78424072265625, "logps/rejected": -338.4950866699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.974270820617676, "rewards/margins": 14.25711727142334, "rewards/rejected": -23.231388092041016, "step": 4620 }, { "epoch": 7.42, "learning_rate": 9.770114942528736e-08, "logits/chosen": -1.4783188104629517, "logits/rejected": -1.5972596406936646, "logps/chosen": -127.89452362060547, "logps/rejected": -333.6496887207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.299160003662109, "rewards/margins": 16.15605926513672, "rewards/rejected": -21.455219268798828, "step": 4621 }, { "epoch": 7.42, "learning_rate": 9.760206103844628e-08, "logits/chosen": -1.4474598169326782, "logits/rejected": -1.3437678813934326, "logps/chosen": -174.2814178466797, "logps/rejected": -284.5703430175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.705958366394043, "rewards/margins": 12.75230598449707, "rewards/rejected": -21.45826530456543, "step": 4622 }, { "epoch": 7.42, "learning_rate": 9.750297265160523e-08, "logits/chosen": -1.3862318992614746, "logits/rejected": -1.3684009313583374, "logps/chosen": -134.7038116455078, "logps/rejected": -281.9936218261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.6053466796875, "rewards/margins": 14.550690650939941, "rewards/rejected": -20.156036376953125, "step": 4623 }, { "epoch": 7.42, "learning_rate": 9.740388426476416e-08, "logits/chosen": -1.5838134288787842, "logits/rejected": -1.5664482116699219, "logps/chosen": -149.15097045898438, "logps/rejected": -253.12823486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.008848667144775, "rewards/margins": 10.788122177124023, "rewards/rejected": -17.796972274780273, "step": 4624 }, { "epoch": 7.42, "learning_rate": 9.730479587792311e-08, "logits/chosen": -1.4348199367523193, "logits/rejected": -1.44424569606781, "logps/chosen": -168.58038330078125, "logps/rejected": -305.0420227050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.621783256530762, "rewards/margins": 13.990252494812012, "rewards/rejected": -21.612035751342773, "step": 4625 }, { "epoch": 7.43, "learning_rate": 9.720570749108203e-08, "logits/chosen": -1.4955791234970093, "logits/rejected": -1.6274482011795044, "logps/chosen": -126.95660400390625, "logps/rejected": -294.79156494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.744326591491699, "rewards/margins": 15.909370422363281, "rewards/rejected": -22.653696060180664, "step": 4626 }, { "epoch": 7.43, "learning_rate": 9.710661910424097e-08, "logits/chosen": -1.4449772834777832, "logits/rejected": -1.4239524602890015, "logps/chosen": -168.65762329101562, "logps/rejected": -297.3428955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.982611179351807, "rewards/margins": 13.137177467346191, "rewards/rejected": -21.119789123535156, "step": 4627 }, { "epoch": 7.43, "learning_rate": 9.700753071739992e-08, "logits/chosen": -1.5636733770370483, "logits/rejected": -1.4990777969360352, "logps/chosen": -140.17294311523438, "logps/rejected": -226.644775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.183783531188965, "rewards/margins": 11.086920738220215, "rewards/rejected": -16.270706176757812, "step": 4628 }, { "epoch": 7.43, "learning_rate": 9.690844233055885e-08, "logits/chosen": -1.3013708591461182, "logits/rejected": -1.412848711013794, "logps/chosen": -108.52095031738281, "logps/rejected": -237.7718963623047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.855538845062256, "rewards/margins": 11.589658737182617, "rewards/rejected": -17.44519805908203, "step": 4629 }, { "epoch": 7.43, "learning_rate": 9.68093539437178e-08, "logits/chosen": -1.6717712879180908, "logits/rejected": -1.498887300491333, "logps/chosen": -186.2487335205078, "logps/rejected": -296.9094543457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.403076648712158, "rewards/margins": 13.507612228393555, "rewards/rejected": -20.910690307617188, "step": 4630 }, { "epoch": 7.43, "learning_rate": 9.671026555687672e-08, "logits/chosen": -1.4179446697235107, "logits/rejected": -1.4262242317199707, "logps/chosen": -173.87115478515625, "logps/rejected": -334.72308349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.667671203613281, "rewards/margins": 15.975678443908691, "rewards/rejected": -24.64335060119629, "step": 4631 }, { "epoch": 7.43, "learning_rate": 9.661117717003567e-08, "logits/chosen": -1.3390785455703735, "logits/rejected": -1.355980396270752, "logps/chosen": -175.20938110351562, "logps/rejected": -320.1448669433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.567395210266113, "rewards/margins": 14.981678009033203, "rewards/rejected": -22.549074172973633, "step": 4632 }, { "epoch": 7.44, "learning_rate": 9.651208878319461e-08, "logits/chosen": -1.5016354322433472, "logits/rejected": -1.4794961214065552, "logps/chosen": -147.59799194335938, "logps/rejected": -302.3895568847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.613769054412842, "rewards/margins": 15.715124130249023, "rewards/rejected": -21.328893661499023, "step": 4633 }, { "epoch": 7.44, "learning_rate": 9.641300039635355e-08, "logits/chosen": -1.6364526748657227, "logits/rejected": -1.6110318899154663, "logps/chosen": -111.86907958984375, "logps/rejected": -275.453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.459457874298096, "rewards/margins": 15.862634658813477, "rewards/rejected": -20.322092056274414, "step": 4634 }, { "epoch": 7.44, "learning_rate": 9.631391200951248e-08, "logits/chosen": -1.3207837343215942, "logits/rejected": -1.3478920459747314, "logps/chosen": -166.1696319580078, "logps/rejected": -393.1435241699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.172045707702637, "rewards/margins": 18.62868881225586, "rewards/rejected": -28.800737380981445, "step": 4635 }, { "epoch": 7.44, "learning_rate": 9.621482362267141e-08, "logits/chosen": -1.4288808107376099, "logits/rejected": -1.482408046722412, "logps/chosen": -145.3209686279297, "logps/rejected": -267.8291931152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.433206558227539, "rewards/margins": 13.070757865905762, "rewards/rejected": -18.503963470458984, "step": 4636 }, { "epoch": 7.44, "learning_rate": 9.611573523583036e-08, "logits/chosen": -1.5962858200073242, "logits/rejected": -1.4455064535140991, "logps/chosen": -180.2833709716797, "logps/rejected": -231.6917724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.222241401672363, "rewards/margins": 8.987388610839844, "rewards/rejected": -16.209630966186523, "step": 4637 }, { "epoch": 7.44, "learning_rate": 9.60166468489893e-08, "logits/chosen": -1.5753028392791748, "logits/rejected": -1.536102056503296, "logps/chosen": -137.254638671875, "logps/rejected": -265.11932373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.316536903381348, "rewards/margins": 13.005054473876953, "rewards/rejected": -18.321592330932617, "step": 4638 }, { "epoch": 7.45, "learning_rate": 9.591755846214823e-08, "logits/chosen": -1.4976897239685059, "logits/rejected": -1.5179641246795654, "logps/chosen": -126.60063934326172, "logps/rejected": -255.5934295654297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.141974449157715, "rewards/margins": 12.695898056030273, "rewards/rejected": -17.837873458862305, "step": 4639 }, { "epoch": 7.45, "learning_rate": 9.581847007530717e-08, "logits/chosen": -1.5731432437896729, "logits/rejected": -1.598272442817688, "logps/chosen": -165.76541137695312, "logps/rejected": -300.1859130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.973592758178711, "rewards/margins": 13.029263496398926, "rewards/rejected": -22.002857208251953, "step": 4640 }, { "epoch": 7.45, "learning_rate": 9.571938168846611e-08, "logits/chosen": -1.3958719968795776, "logits/rejected": -1.4455864429473877, "logps/chosen": -132.0583038330078, "logps/rejected": -252.60824584960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.716833591461182, "rewards/margins": 11.505600929260254, "rewards/rejected": -16.222434997558594, "step": 4641 }, { "epoch": 7.45, "learning_rate": 9.562029330162505e-08, "logits/chosen": -1.5008289813995361, "logits/rejected": -1.5104775428771973, "logps/chosen": -186.15951538085938, "logps/rejected": -286.67608642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.699739933013916, "rewards/margins": 11.210260391235352, "rewards/rejected": -18.90999984741211, "step": 4642 }, { "epoch": 7.45, "learning_rate": 9.552120491478397e-08, "logits/chosen": -1.3095237016677856, "logits/rejected": -1.3937842845916748, "logps/chosen": -186.666015625, "logps/rejected": -317.0845947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.532760620117188, "rewards/margins": 13.011085510253906, "rewards/rejected": -21.543846130371094, "step": 4643 }, { "epoch": 7.45, "learning_rate": 9.542211652794292e-08, "logits/chosen": -1.5666929483413696, "logits/rejected": -1.516608715057373, "logps/chosen": -166.32217407226562, "logps/rejected": -276.203369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.768231391906738, "rewards/margins": 13.957283973693848, "rewards/rejected": -20.725515365600586, "step": 4644 }, { "epoch": 7.46, "learning_rate": 9.532302814110185e-08, "logits/chosen": -1.5471251010894775, "logits/rejected": -1.5631380081176758, "logps/chosen": -158.32333374023438, "logps/rejected": -342.7694396972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.357734203338623, "rewards/margins": 17.52845001220703, "rewards/rejected": -24.886184692382812, "step": 4645 }, { "epoch": 7.46, "learning_rate": 9.52239397542608e-08, "logits/chosen": -1.4224404096603394, "logits/rejected": -1.3877859115600586, "logps/chosen": -137.76551818847656, "logps/rejected": -232.7071990966797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.137988567352295, "rewards/margins": 9.450699806213379, "rewards/rejected": -15.588687896728516, "step": 4646 }, { "epoch": 7.46, "learning_rate": 9.512485136741973e-08, "logits/chosen": -1.5170836448669434, "logits/rejected": -1.5308327674865723, "logps/chosen": -153.4864501953125, "logps/rejected": -290.47711181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.165126800537109, "rewards/margins": 13.684173583984375, "rewards/rejected": -19.849302291870117, "step": 4647 }, { "epoch": 7.46, "learning_rate": 9.502576298057867e-08, "logits/chosen": -1.4724810123443604, "logits/rejected": -1.4672783613204956, "logps/chosen": -119.09646606445312, "logps/rejected": -225.70132446289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.696094989776611, "rewards/margins": 11.542621612548828, "rewards/rejected": -16.23871612548828, "step": 4648 }, { "epoch": 7.46, "learning_rate": 9.492667459373761e-08, "logits/chosen": -1.3102924823760986, "logits/rejected": -1.3554913997650146, "logps/chosen": -131.0306396484375, "logps/rejected": -330.9505310058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.248379707336426, "rewards/margins": 17.606327056884766, "rewards/rejected": -22.854707717895508, "step": 4649 }, { "epoch": 7.46, "learning_rate": 9.482758620689655e-08, "logits/chosen": -1.3546383380889893, "logits/rejected": -1.3326917886734009, "logps/chosen": -162.3204345703125, "logps/rejected": -338.7189025878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.16411828994751, "rewards/margins": 15.890613555908203, "rewards/rejected": -23.054729461669922, "step": 4650 }, { "epoch": 7.47, "learning_rate": 9.472849782005548e-08, "logits/chosen": -1.4516221284866333, "logits/rejected": -1.443242073059082, "logps/chosen": -147.52035522460938, "logps/rejected": -269.9925842285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.060882091522217, "rewards/margins": 13.360878944396973, "rewards/rejected": -18.421762466430664, "step": 4651 }, { "epoch": 7.47, "learning_rate": 9.462940943321441e-08, "logits/chosen": -1.6977810859680176, "logits/rejected": -1.6491694450378418, "logps/chosen": -170.45297241210938, "logps/rejected": -310.3915100097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.163542747497559, "rewards/margins": 14.743598937988281, "rewards/rejected": -20.907142639160156, "step": 4652 }, { "epoch": 7.47, "learning_rate": 9.453032104637336e-08, "logits/chosen": -1.3077198266983032, "logits/rejected": -1.35025954246521, "logps/chosen": -185.76576232910156, "logps/rejected": -339.12957763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.258760452270508, "rewards/margins": 14.317686080932617, "rewards/rejected": -22.576446533203125, "step": 4653 }, { "epoch": 7.47, "learning_rate": 9.44312326595323e-08, "logits/chosen": -1.4999805688858032, "logits/rejected": -1.495664358139038, "logps/chosen": -143.48851013183594, "logps/rejected": -346.6123962402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.809131622314453, "rewards/margins": 17.835792541503906, "rewards/rejected": -25.64492416381836, "step": 4654 }, { "epoch": 7.47, "learning_rate": 9.433214427269124e-08, "logits/chosen": -1.5503978729248047, "logits/rejected": -1.6688485145568848, "logps/chosen": -143.42144775390625, "logps/rejected": -311.3262939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.694539546966553, "rewards/margins": 15.807502746582031, "rewards/rejected": -22.502044677734375, "step": 4655 }, { "epoch": 7.47, "learning_rate": 9.423305588585017e-08, "logits/chosen": -1.3424460887908936, "logits/rejected": -1.3649423122406006, "logps/chosen": -167.7622833251953, "logps/rejected": -306.54412841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.154219627380371, "rewards/margins": 13.961280822753906, "rewards/rejected": -22.115501403808594, "step": 4656 }, { "epoch": 7.48, "learning_rate": 9.41339674990091e-08, "logits/chosen": -1.5832605361938477, "logits/rejected": -1.7056794166564941, "logps/chosen": -139.80445861816406, "logps/rejected": -296.126708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.785496711730957, "rewards/margins": 13.614591598510742, "rewards/rejected": -20.400089263916016, "step": 4657 }, { "epoch": 7.48, "learning_rate": 9.403487911216805e-08, "logits/chosen": -1.399991512298584, "logits/rejected": -1.3709068298339844, "logps/chosen": -164.457275390625, "logps/rejected": -290.4913635253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.010276794433594, "rewards/margins": 14.605002403259277, "rewards/rejected": -21.615278244018555, "step": 4658 }, { "epoch": 7.48, "learning_rate": 9.3935790725327e-08, "logits/chosen": -1.437023639678955, "logits/rejected": -1.4438631534576416, "logps/chosen": -140.11593627929688, "logps/rejected": -306.06536865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.04295015335083, "rewards/margins": 14.356986999511719, "rewards/rejected": -20.39993667602539, "step": 4659 }, { "epoch": 7.48, "learning_rate": 9.383670233848592e-08, "logits/chosen": -1.4173215627670288, "logits/rejected": -1.488646388053894, "logps/chosen": -130.05393981933594, "logps/rejected": -271.047607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.705996990203857, "rewards/margins": 12.873019218444824, "rewards/rejected": -18.579015731811523, "step": 4660 }, { "epoch": 7.48, "learning_rate": 9.373761395164487e-08, "logits/chosen": -1.446407675743103, "logits/rejected": -1.4857710599899292, "logps/chosen": -201.12258911132812, "logps/rejected": -314.4423828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.791808128356934, "rewards/margins": 11.492267608642578, "rewards/rejected": -22.284076690673828, "step": 4661 }, { "epoch": 7.48, "learning_rate": 9.36385255648038e-08, "logits/chosen": -1.5105905532836914, "logits/rejected": -1.5299694538116455, "logps/chosen": -121.16903686523438, "logps/rejected": -276.4168701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.181404113769531, "rewards/margins": 15.038162231445312, "rewards/rejected": -20.219566345214844, "step": 4662 }, { "epoch": 7.48, "learning_rate": 9.353943717796275e-08, "logits/chosen": -1.5034154653549194, "logits/rejected": -1.458743691444397, "logps/chosen": -163.081787109375, "logps/rejected": -304.5079650878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.122138023376465, "rewards/margins": 14.280546188354492, "rewards/rejected": -21.40268325805664, "step": 4663 }, { "epoch": 7.49, "learning_rate": 9.344034879112167e-08, "logits/chosen": -1.5436289310455322, "logits/rejected": -1.6150435209274292, "logps/chosen": -159.5426025390625, "logps/rejected": -298.88885498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.384900093078613, "rewards/margins": 12.12484359741211, "rewards/rejected": -20.509742736816406, "step": 4664 }, { "epoch": 7.49, "learning_rate": 9.334126040428061e-08, "logits/chosen": -1.4268789291381836, "logits/rejected": -1.3405101299285889, "logps/chosen": -200.993408203125, "logps/rejected": -367.042236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.7235689163208, "rewards/margins": 17.049978256225586, "rewards/rejected": -26.773548126220703, "step": 4665 }, { "epoch": 7.49, "learning_rate": 9.324217201743956e-08, "logits/chosen": -1.51914381980896, "logits/rejected": -1.5844841003417969, "logps/chosen": -96.27718353271484, "logps/rejected": -315.5326843261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3314037322998047, "rewards/margins": 20.46729850769043, "rewards/rejected": -23.798702239990234, "step": 4666 }, { "epoch": 7.49, "learning_rate": 9.314308363059849e-08, "logits/chosen": -1.3720576763153076, "logits/rejected": -1.437596082687378, "logps/chosen": -160.74143981933594, "logps/rejected": -363.5211181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.011680603027344, "rewards/margins": 17.78326988220215, "rewards/rejected": -25.79494857788086, "step": 4667 }, { "epoch": 7.49, "learning_rate": 9.304399524375743e-08, "logits/chosen": -1.465597152709961, "logits/rejected": -1.490572214126587, "logps/chosen": -196.37115478515625, "logps/rejected": -312.5265197753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.090531349182129, "rewards/margins": 11.958608627319336, "rewards/rejected": -22.04914093017578, "step": 4668 }, { "epoch": 7.49, "learning_rate": 9.294490685691636e-08, "logits/chosen": -1.370262861251831, "logits/rejected": -1.3424370288848877, "logps/chosen": -175.2178955078125, "logps/rejected": -306.3891906738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.667069435119629, "rewards/margins": 14.629913330078125, "rewards/rejected": -23.296981811523438, "step": 4669 }, { "epoch": 7.5, "learning_rate": 9.28458184700753e-08, "logits/chosen": -1.6989083290100098, "logits/rejected": -1.7191755771636963, "logps/chosen": -161.3614959716797, "logps/rejected": -319.7138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.547299861907959, "rewards/margins": 14.450780868530273, "rewards/rejected": -21.99808120727539, "step": 4670 }, { "epoch": 7.5, "learning_rate": 9.274673008323424e-08, "logits/chosen": -1.4273133277893066, "logits/rejected": -1.3939707279205322, "logps/chosen": -128.52688598632812, "logps/rejected": -289.8624572753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2614359855651855, "rewards/margins": 14.206296920776367, "rewards/rejected": -20.467735290527344, "step": 4671 }, { "epoch": 7.5, "learning_rate": 9.264764169639317e-08, "logits/chosen": -1.4797279834747314, "logits/rejected": -1.4723173379898071, "logps/chosen": -208.32713317871094, "logps/rejected": -352.598876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.506843566894531, "rewards/margins": 14.681365966796875, "rewards/rejected": -24.188209533691406, "step": 4672 }, { "epoch": 7.5, "learning_rate": 9.254855330955212e-08, "logits/chosen": -1.5180813074111938, "logits/rejected": -1.5404460430145264, "logps/chosen": -185.95999145507812, "logps/rejected": -327.08819580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.46059513092041, "rewards/margins": 14.201869010925293, "rewards/rejected": -22.662464141845703, "step": 4673 }, { "epoch": 7.5, "learning_rate": 9.244946492271105e-08, "logits/chosen": -1.3819918632507324, "logits/rejected": -1.3915354013442993, "logps/chosen": -150.98977661132812, "logps/rejected": -260.66412353515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.844151496887207, "rewards/margins": 9.471665382385254, "rewards/rejected": -17.31581687927246, "step": 4674 }, { "epoch": 7.5, "learning_rate": 9.235037653587e-08, "logits/chosen": -1.3783522844314575, "logits/rejected": -1.3277126550674438, "logps/chosen": -163.96575927734375, "logps/rejected": -275.9067687988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.786041259765625, "rewards/margins": 12.45266056060791, "rewards/rejected": -19.23870277404785, "step": 4675 }, { "epoch": 7.51, "learning_rate": 9.225128814902892e-08, "logits/chosen": -1.570798397064209, "logits/rejected": -1.5771746635437012, "logps/chosen": -103.86129760742188, "logps/rejected": -248.21456909179688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9839491844177246, "rewards/margins": 14.52195930480957, "rewards/rejected": -17.50590705871582, "step": 4676 }, { "epoch": 7.51, "learning_rate": 9.215219976218786e-08, "logits/chosen": -1.26365065574646, "logits/rejected": -1.3567373752593994, "logps/chosen": -122.02236938476562, "logps/rejected": -275.4773864746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.452640056610107, "rewards/margins": 14.306793212890625, "rewards/rejected": -19.759431838989258, "step": 4677 }, { "epoch": 7.51, "learning_rate": 9.205311137534681e-08, "logits/chosen": -1.3751871585845947, "logits/rejected": -1.2816452980041504, "logps/chosen": -136.34095764160156, "logps/rejected": -293.5909423828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.985490798950195, "rewards/margins": 15.11579704284668, "rewards/rejected": -20.101287841796875, "step": 4678 }, { "epoch": 7.51, "learning_rate": 9.195402298850574e-08, "logits/chosen": -1.4282722473144531, "logits/rejected": -1.44568932056427, "logps/chosen": -231.341796875, "logps/rejected": -346.1517028808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.781965255737305, "rewards/margins": 14.174338340759277, "rewards/rejected": -25.9563045501709, "step": 4679 }, { "epoch": 7.51, "learning_rate": 9.185493460166469e-08, "logits/chosen": -1.276572585105896, "logits/rejected": -1.3583242893218994, "logps/chosen": -151.66256713867188, "logps/rejected": -283.3266296386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.48869514465332, "rewards/margins": 11.953573226928711, "rewards/rejected": -19.44226837158203, "step": 4680 }, { "epoch": 7.51, "learning_rate": 9.175584621482361e-08, "logits/chosen": -1.5001600980758667, "logits/rejected": -1.4685509204864502, "logps/chosen": -139.1666259765625, "logps/rejected": -291.9445495605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.011883735656738, "rewards/margins": 14.305089950561523, "rewards/rejected": -20.316974639892578, "step": 4681 }, { "epoch": 7.52, "learning_rate": 9.165675782798256e-08, "logits/chosen": -1.4797042608261108, "logits/rejected": -1.608716607093811, "logps/chosen": -159.43283081054688, "logps/rejected": -303.61358642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7165679931640625, "rewards/margins": 11.393142700195312, "rewards/rejected": -19.109710693359375, "step": 4682 }, { "epoch": 7.52, "learning_rate": 9.155766944114149e-08, "logits/chosen": -1.395006775856018, "logits/rejected": -1.424055814743042, "logps/chosen": -125.1851806640625, "logps/rejected": -262.9864807128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.210836887359619, "rewards/margins": 12.62380599975586, "rewards/rejected": -18.834644317626953, "step": 4683 }, { "epoch": 7.52, "learning_rate": 9.145858105430044e-08, "logits/chosen": -1.4026089906692505, "logits/rejected": -1.352933406829834, "logps/chosen": -161.1826171875, "logps/rejected": -299.03204345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.531794548034668, "rewards/margins": 14.06709098815918, "rewards/rejected": -21.598886489868164, "step": 4684 }, { "epoch": 7.52, "learning_rate": 9.135949266745937e-08, "logits/chosen": -1.3204432725906372, "logits/rejected": -1.4012277126312256, "logps/chosen": -186.34585571289062, "logps/rejected": -319.91046142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.24697494506836, "rewards/margins": 13.53145980834961, "rewards/rejected": -22.77843475341797, "step": 4685 }, { "epoch": 7.52, "learning_rate": 9.12604042806183e-08, "logits/chosen": -1.383288025856018, "logits/rejected": -1.3893635272979736, "logps/chosen": -129.77774047851562, "logps/rejected": -247.53672790527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.686770915985107, "rewards/margins": 11.766590118408203, "rewards/rejected": -17.45336151123047, "step": 4686 }, { "epoch": 7.52, "learning_rate": 9.116131589377725e-08, "logits/chosen": -1.580504059791565, "logits/rejected": -1.5359766483306885, "logps/chosen": -136.1266632080078, "logps/rejected": -274.33251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8723464012146, "rewards/margins": 13.256122589111328, "rewards/rejected": -19.128467559814453, "step": 4687 }, { "epoch": 7.52, "learning_rate": 9.106222750693618e-08, "logits/chosen": -1.4008338451385498, "logits/rejected": -1.4169789552688599, "logps/chosen": -170.75767517089844, "logps/rejected": -311.5745849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.931851387023926, "rewards/margins": 14.704607009887695, "rewards/rejected": -23.636459350585938, "step": 4688 }, { "epoch": 7.53, "learning_rate": 9.096313912009512e-08, "logits/chosen": -1.4016352891921997, "logits/rejected": -1.494713306427002, "logps/chosen": -200.95388793945312, "logps/rejected": -350.3515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.099800109863281, "rewards/margins": 14.090203285217285, "rewards/rejected": -24.19000244140625, "step": 4689 }, { "epoch": 7.53, "learning_rate": 9.086405073325405e-08, "logits/chosen": -1.5773314237594604, "logits/rejected": -1.6315600872039795, "logps/chosen": -188.18704223632812, "logps/rejected": -315.11865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.754254341125488, "rewards/margins": 12.152396202087402, "rewards/rejected": -21.90665054321289, "step": 4690 }, { "epoch": 7.53, "learning_rate": 9.0764962346413e-08, "logits/chosen": -1.3998918533325195, "logits/rejected": -1.39747953414917, "logps/chosen": -135.30029296875, "logps/rejected": -303.98468017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.903489112854004, "rewards/margins": 16.018362045288086, "rewards/rejected": -20.921850204467773, "step": 4691 }, { "epoch": 7.53, "learning_rate": 9.066587395957194e-08, "logits/chosen": -1.5262329578399658, "logits/rejected": -1.509722113609314, "logps/chosen": -158.48410034179688, "logps/rejected": -299.859130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.531373977661133, "rewards/margins": 13.460500717163086, "rewards/rejected": -19.99187469482422, "step": 4692 }, { "epoch": 7.53, "learning_rate": 9.056678557273086e-08, "logits/chosen": -1.4369230270385742, "logits/rejected": -1.4602947235107422, "logps/chosen": -138.4202423095703, "logps/rejected": -342.19970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.230024337768555, "rewards/margins": 19.51519775390625, "rewards/rejected": -25.745222091674805, "step": 4693 }, { "epoch": 7.53, "learning_rate": 9.046769718588981e-08, "logits/chosen": -1.2998539209365845, "logits/rejected": -1.489733099937439, "logps/chosen": -134.3418731689453, "logps/rejected": -293.5400695800781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.83195686340332, "rewards/margins": 11.895517349243164, "rewards/rejected": -18.727474212646484, "step": 4694 }, { "epoch": 7.54, "learning_rate": 9.036860879904874e-08, "logits/chosen": -1.3041269779205322, "logits/rejected": -1.3039062023162842, "logps/chosen": -145.80392456054688, "logps/rejected": -270.0723876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.189149856567383, "rewards/margins": 12.363083839416504, "rewards/rejected": -19.55223274230957, "step": 4695 }, { "epoch": 7.54, "learning_rate": 9.026952041220769e-08, "logits/chosen": -1.4478561878204346, "logits/rejected": -1.4336051940917969, "logps/chosen": -170.98776245117188, "logps/rejected": -291.117431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.052258491516113, "rewards/margins": 13.115023612976074, "rewards/rejected": -22.167282104492188, "step": 4696 }, { "epoch": 7.54, "learning_rate": 9.017043202536661e-08, "logits/chosen": -1.6634595394134521, "logits/rejected": -1.6278806924819946, "logps/chosen": -150.7771759033203, "logps/rejected": -274.83697509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.91173791885376, "rewards/margins": 13.609630584716797, "rewards/rejected": -20.52136993408203, "step": 4697 }, { "epoch": 7.54, "learning_rate": 9.007134363852556e-08, "logits/chosen": -1.491561770439148, "logits/rejected": -1.5233705043792725, "logps/chosen": -155.96990966796875, "logps/rejected": -328.9588623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.528193950653076, "rewards/margins": 16.92095947265625, "rewards/rejected": -24.449153900146484, "step": 4698 }, { "epoch": 7.54, "learning_rate": 8.99722552516845e-08, "logits/chosen": -1.5453157424926758, "logits/rejected": -1.6660012006759644, "logps/chosen": -155.7532958984375, "logps/rejected": -299.9350280761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.298360824584961, "rewards/margins": 13.493926048278809, "rewards/rejected": -20.792285919189453, "step": 4699 }, { "epoch": 7.54, "learning_rate": 8.987316686484344e-08, "logits/chosen": -1.5169858932495117, "logits/rejected": -1.5188794136047363, "logps/chosen": -188.78668212890625, "logps/rejected": -327.8924560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.313609600067139, "rewards/margins": 14.011138916015625, "rewards/rejected": -21.324748992919922, "step": 4700 }, { "epoch": 7.55, "learning_rate": 8.977407847800238e-08, "logits/chosen": -1.4109373092651367, "logits/rejected": -1.4384793043136597, "logps/chosen": -183.653076171875, "logps/rejected": -319.7188720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.954524040222168, "rewards/margins": 12.710280418395996, "rewards/rejected": -21.664804458618164, "step": 4701 }, { "epoch": 7.55, "learning_rate": 8.96749900911613e-08, "logits/chosen": -1.479729413986206, "logits/rejected": -1.4415141344070435, "logps/chosen": -141.6053466796875, "logps/rejected": -243.20616149902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.309662818908691, "rewards/margins": 12.552441596984863, "rewards/rejected": -17.862104415893555, "step": 4702 }, { "epoch": 7.55, "learning_rate": 8.957590170432025e-08, "logits/chosen": -1.6321587562561035, "logits/rejected": -1.6633098125457764, "logps/chosen": -166.41702270507812, "logps/rejected": -332.40069580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.312561988830566, "rewards/margins": 17.46073341369629, "rewards/rejected": -24.773296356201172, "step": 4703 }, { "epoch": 7.55, "learning_rate": 8.94768133174792e-08, "logits/chosen": -1.5055994987487793, "logits/rejected": -1.4562609195709229, "logps/chosen": -188.40005493164062, "logps/rejected": -287.43548583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7014665603637695, "rewards/margins": 11.956165313720703, "rewards/rejected": -19.65763282775879, "step": 4704 }, { "epoch": 7.55, "learning_rate": 8.937772493063813e-08, "logits/chosen": -1.5708049535751343, "logits/rejected": -1.5106664896011353, "logps/chosen": -205.771728515625, "logps/rejected": -327.30084228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.633448600769043, "rewards/margins": 12.655526161193848, "rewards/rejected": -23.28897476196289, "step": 4705 }, { "epoch": 7.55, "learning_rate": 8.927863654379706e-08, "logits/chosen": -1.5547195672988892, "logits/rejected": -1.6433590650558472, "logps/chosen": -154.21633911132812, "logps/rejected": -293.0074768066406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.116382598876953, "rewards/margins": 11.579911231994629, "rewards/rejected": -18.6962947845459, "step": 4706 }, { "epoch": 7.56, "learning_rate": 8.9179548156956e-08, "logits/chosen": -1.5321779251098633, "logits/rejected": -1.43524968624115, "logps/chosen": -171.66183471679688, "logps/rejected": -239.5718231201172, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.177508354187012, "rewards/margins": 9.88084888458252, "rewards/rejected": -16.05835723876953, "step": 4707 }, { "epoch": 7.56, "learning_rate": 8.908045977011494e-08, "logits/chosen": -1.3314945697784424, "logits/rejected": -1.4202792644500732, "logps/chosen": -147.01148986816406, "logps/rejected": -271.3104553222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.869982719421387, "rewards/margins": 11.690071105957031, "rewards/rejected": -19.560054779052734, "step": 4708 }, { "epoch": 7.56, "learning_rate": 8.898137138327388e-08, "logits/chosen": -1.4876796007156372, "logits/rejected": -1.5966458320617676, "logps/chosen": -179.2229461669922, "logps/rejected": -308.83050537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.636667251586914, "rewards/margins": 11.332965850830078, "rewards/rejected": -19.969633102416992, "step": 4709 }, { "epoch": 7.56, "learning_rate": 8.888228299643281e-08, "logits/chosen": -1.4067193269729614, "logits/rejected": -1.449587345123291, "logps/chosen": -121.21437072753906, "logps/rejected": -289.77362060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.367801666259766, "rewards/margins": 14.496866226196289, "rewards/rejected": -20.864667892456055, "step": 4710 }, { "epoch": 7.56, "learning_rate": 8.878319460959176e-08, "logits/chosen": -1.3372551202774048, "logits/rejected": -1.2549307346343994, "logps/chosen": -166.3625030517578, "logps/rejected": -286.3956298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.398911952972412, "rewards/margins": 14.295525550842285, "rewards/rejected": -20.694438934326172, "step": 4711 }, { "epoch": 7.56, "learning_rate": 8.868410622275069e-08, "logits/chosen": -1.4209767580032349, "logits/rejected": -1.5228395462036133, "logps/chosen": -168.14749145507812, "logps/rejected": -314.01910400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.663097381591797, "rewards/margins": 14.242862701416016, "rewards/rejected": -22.905960083007812, "step": 4712 }, { "epoch": 7.57, "learning_rate": 8.858501783590964e-08, "logits/chosen": -1.485479712486267, "logits/rejected": -1.4641757011413574, "logps/chosen": -224.65835571289062, "logps/rejected": -342.92242431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.812578201293945, "rewards/margins": 13.236865997314453, "rewards/rejected": -25.04944610595703, "step": 4713 }, { "epoch": 7.57, "learning_rate": 8.848592944906856e-08, "logits/chosen": -1.5092283487319946, "logits/rejected": -1.4219037294387817, "logps/chosen": -175.57301330566406, "logps/rejected": -338.89569091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.972754955291748, "rewards/margins": 16.078598022460938, "rewards/rejected": -23.051353454589844, "step": 4714 }, { "epoch": 7.57, "learning_rate": 8.83868410622275e-08, "logits/chosen": -1.6338449716567993, "logits/rejected": -1.6691274642944336, "logps/chosen": -156.5954132080078, "logps/rejected": -283.3481750488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.896700859069824, "rewards/margins": 12.337579727172852, "rewards/rejected": -19.23427963256836, "step": 4715 }, { "epoch": 7.57, "learning_rate": 8.828775267538644e-08, "logits/chosen": -1.3378397226333618, "logits/rejected": -1.3853110074996948, "logps/chosen": -151.28402709960938, "logps/rejected": -280.86175537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.552168846130371, "rewards/margins": 12.079757690429688, "rewards/rejected": -20.631927490234375, "step": 4716 }, { "epoch": 7.57, "learning_rate": 8.818866428854538e-08, "logits/chosen": -1.5039646625518799, "logits/rejected": -1.4615591764450073, "logps/chosen": -183.71316528320312, "logps/rejected": -316.1279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.877625465393066, "rewards/margins": 14.499176979064941, "rewards/rejected": -23.376802444458008, "step": 4717 }, { "epoch": 7.57, "learning_rate": 8.808957590170432e-08, "logits/chosen": -1.5677212476730347, "logits/rejected": -1.485735297203064, "logps/chosen": -173.0446014404297, "logps/rejected": -293.56494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.827578067779541, "rewards/margins": 14.64590072631836, "rewards/rejected": -22.47347640991211, "step": 4718 }, { "epoch": 7.57, "learning_rate": 8.799048751486325e-08, "logits/chosen": -1.4940017461776733, "logits/rejected": -1.4886754751205444, "logps/chosen": -113.06331634521484, "logps/rejected": -278.56915283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.232576847076416, "rewards/margins": 15.937223434448242, "rewards/rejected": -21.1697998046875, "step": 4719 }, { "epoch": 7.58, "learning_rate": 8.78913991280222e-08, "logits/chosen": -1.4246290922164917, "logits/rejected": -1.4246153831481934, "logps/chosen": -120.55335998535156, "logps/rejected": -265.0108642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.209877014160156, "rewards/margins": 14.578950881958008, "rewards/rejected": -18.788827896118164, "step": 4720 }, { "epoch": 7.58, "learning_rate": 8.779231074118113e-08, "logits/chosen": -1.439969539642334, "logits/rejected": -1.4460550546646118, "logps/chosen": -158.12982177734375, "logps/rejected": -312.85870361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.583681106567383, "rewards/margins": 14.658941268920898, "rewards/rejected": -22.24262237548828, "step": 4721 }, { "epoch": 7.58, "learning_rate": 8.769322235434007e-08, "logits/chosen": -1.708014726638794, "logits/rejected": -1.6944912672042847, "logps/chosen": -107.54056549072266, "logps/rejected": -271.5185852050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1777000427246094, "rewards/margins": 16.055084228515625, "rewards/rejected": -19.232784271240234, "step": 4722 }, { "epoch": 7.58, "learning_rate": 8.759413396749901e-08, "logits/chosen": -1.5618550777435303, "logits/rejected": -1.5975916385650635, "logps/chosen": -137.83242797851562, "logps/rejected": -317.9883728027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.143451690673828, "rewards/margins": 16.17877960205078, "rewards/rejected": -22.32223129272461, "step": 4723 }, { "epoch": 7.58, "learning_rate": 8.749504558065794e-08, "logits/chosen": -1.375105857849121, "logits/rejected": -1.391018271446228, "logps/chosen": -191.09234619140625, "logps/rejected": -322.84063720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.402933120727539, "rewards/margins": 13.566898345947266, "rewards/rejected": -23.969829559326172, "step": 4724 }, { "epoch": 7.58, "learning_rate": 8.739595719381689e-08, "logits/chosen": -1.4987280368804932, "logits/rejected": -1.417028546333313, "logps/chosen": -195.1342010498047, "logps/rejected": -307.31396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.25172233581543, "rewards/margins": 13.733865737915039, "rewards/rejected": -22.98558807373047, "step": 4725 }, { "epoch": 7.59, "learning_rate": 8.729686880697582e-08, "logits/chosen": -1.5606415271759033, "logits/rejected": -1.4880845546722412, "logps/chosen": -115.09814453125, "logps/rejected": -243.11570739746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6351218223571777, "rewards/margins": 13.275308609008789, "rewards/rejected": -15.910429954528809, "step": 4726 }, { "epoch": 7.59, "learning_rate": 8.719778042013475e-08, "logits/chosen": -1.5517923831939697, "logits/rejected": -1.5226926803588867, "logps/chosen": -186.58535766601562, "logps/rejected": -279.57708740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.69389009475708, "rewards/margins": 11.833724975585938, "rewards/rejected": -18.527616500854492, "step": 4727 }, { "epoch": 7.59, "learning_rate": 8.709869203329369e-08, "logits/chosen": -1.322426199913025, "logits/rejected": -1.3974436521530151, "logps/chosen": -136.0950927734375, "logps/rejected": -261.6964111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.680986404418945, "rewards/margins": 12.36867904663086, "rewards/rejected": -19.049665451049805, "step": 4728 }, { "epoch": 7.59, "learning_rate": 8.699960364645263e-08, "logits/chosen": -1.6627970933914185, "logits/rejected": -1.7141175270080566, "logps/chosen": -146.1900634765625, "logps/rejected": -300.8796081542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.259432792663574, "rewards/margins": 15.951683044433594, "rewards/rejected": -21.211116790771484, "step": 4729 }, { "epoch": 7.59, "learning_rate": 8.690051525961158e-08, "logits/chosen": -1.3275063037872314, "logits/rejected": -1.4167678356170654, "logps/chosen": -155.60263061523438, "logps/rejected": -323.9604187011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2940568923950195, "rewards/margins": 16.25516700744629, "rewards/rejected": -23.549224853515625, "step": 4730 }, { "epoch": 7.59, "learning_rate": 8.68014268727705e-08, "logits/chosen": -1.3957899808883667, "logits/rejected": -1.419694185256958, "logps/chosen": -233.71780395507812, "logps/rejected": -345.4344482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.7249755859375, "rewards/margins": 12.776243209838867, "rewards/rejected": -24.501218795776367, "step": 4731 }, { "epoch": 7.6, "learning_rate": 8.670233848592945e-08, "logits/chosen": -1.2962726354599, "logits/rejected": -1.3864226341247559, "logps/chosen": -171.94631958007812, "logps/rejected": -332.2319641113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.557846069335938, "rewards/margins": 14.34072494506836, "rewards/rejected": -22.898571014404297, "step": 4732 }, { "epoch": 7.6, "learning_rate": 8.660325009908838e-08, "logits/chosen": -1.4189794063568115, "logits/rejected": -1.4011517763137817, "logps/chosen": -175.07763671875, "logps/rejected": -301.352783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.694611549377441, "rewards/margins": 10.955744743347168, "rewards/rejected": -20.65035629272461, "step": 4733 }, { "epoch": 7.6, "learning_rate": 8.650416171224733e-08, "logits/chosen": -1.5141884088516235, "logits/rejected": -1.647175908088684, "logps/chosen": -144.28167724609375, "logps/rejected": -303.4495849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.171380519866943, "rewards/margins": 14.133787155151367, "rewards/rejected": -20.30516815185547, "step": 4734 }, { "epoch": 7.6, "learning_rate": 8.640507332540625e-08, "logits/chosen": -1.6075409650802612, "logits/rejected": -1.5492010116577148, "logps/chosen": -171.6099090576172, "logps/rejected": -266.79583740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.834927558898926, "rewards/margins": 10.087003707885742, "rewards/rejected": -16.92193031311035, "step": 4735 }, { "epoch": 7.6, "learning_rate": 8.63059849385652e-08, "logits/chosen": -1.3793721199035645, "logits/rejected": -1.3449857234954834, "logps/chosen": -116.20500183105469, "logps/rejected": -266.173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.927165508270264, "rewards/margins": 14.923125267028809, "rewards/rejected": -19.850292205810547, "step": 4736 }, { "epoch": 7.6, "learning_rate": 8.620689655172414e-08, "logits/chosen": -1.4459545612335205, "logits/rejected": -1.4735159873962402, "logps/chosen": -215.08274841308594, "logps/rejected": -340.91986083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.283963203430176, "rewards/margins": 12.812395095825195, "rewards/rejected": -24.096359252929688, "step": 4737 }, { "epoch": 7.61, "learning_rate": 8.610780816488307e-08, "logits/chosen": -1.535648226737976, "logits/rejected": -1.4717156887054443, "logps/chosen": -149.67103576660156, "logps/rejected": -262.577392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.651946067810059, "rewards/margins": 13.436820983886719, "rewards/rejected": -19.088768005371094, "step": 4738 }, { "epoch": 7.61, "learning_rate": 8.600871977804201e-08, "logits/chosen": -1.4350230693817139, "logits/rejected": -1.4514607191085815, "logps/chosen": -134.27151489257812, "logps/rejected": -283.39385986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.934743881225586, "rewards/margins": 14.311979293823242, "rewards/rejected": -21.246721267700195, "step": 4739 }, { "epoch": 7.61, "learning_rate": 8.590963139120094e-08, "logits/chosen": -1.6292638778686523, "logits/rejected": -1.601122260093689, "logps/chosen": -144.06423950195312, "logps/rejected": -240.56333923339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.001392841339111, "rewards/margins": 11.121892929077148, "rewards/rejected": -16.1232852935791, "step": 4740 }, { "epoch": 7.61, "learning_rate": 8.581054300435989e-08, "logits/chosen": -1.3901903629302979, "logits/rejected": -1.4786686897277832, "logps/chosen": -134.11297607421875, "logps/rejected": -298.23004150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.989151477813721, "rewards/margins": 14.084030151367188, "rewards/rejected": -20.073183059692383, "step": 4741 }, { "epoch": 7.61, "learning_rate": 8.571145461751882e-08, "logits/chosen": -1.6070424318313599, "logits/rejected": -1.6352970600128174, "logps/chosen": -130.3424530029297, "logps/rejected": -262.3775329589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.587765693664551, "rewards/margins": 12.443835258483887, "rewards/rejected": -18.031600952148438, "step": 4742 }, { "epoch": 7.61, "learning_rate": 8.561236623067775e-08, "logits/chosen": -1.4599140882492065, "logits/rejected": -1.4015250205993652, "logps/chosen": -143.6072998046875, "logps/rejected": -275.05108642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.401677131652832, "rewards/margins": 13.834518432617188, "rewards/rejected": -21.236194610595703, "step": 4743 }, { "epoch": 7.61, "learning_rate": 8.55132778438367e-08, "logits/chosen": -1.571688175201416, "logits/rejected": -1.5473450422286987, "logps/chosen": -152.24240112304688, "logps/rejected": -283.36529541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.187397480010986, "rewards/margins": 13.581812858581543, "rewards/rejected": -19.769210815429688, "step": 4744 }, { "epoch": 7.62, "learning_rate": 8.541418945699563e-08, "logits/chosen": -1.4360275268554688, "logits/rejected": -1.5306233167648315, "logps/chosen": -111.34658813476562, "logps/rejected": -270.0908508300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3043718338012695, "rewards/margins": 14.378730773925781, "rewards/rejected": -18.683101654052734, "step": 4745 }, { "epoch": 7.62, "learning_rate": 8.531510107015458e-08, "logits/chosen": -1.4586148262023926, "logits/rejected": -1.5360833406448364, "logps/chosen": -235.20675659179688, "logps/rejected": -402.67742919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.469856262207031, "rewards/margins": 14.584729194641113, "rewards/rejected": -27.054584503173828, "step": 4746 }, { "epoch": 7.62, "learning_rate": 8.521601268331351e-08, "logits/chosen": -1.4325064420700073, "logits/rejected": -1.3834388256072998, "logps/chosen": -134.2305145263672, "logps/rejected": -296.9809875488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.741318702697754, "rewards/margins": 14.682330131530762, "rewards/rejected": -19.423648834228516, "step": 4747 }, { "epoch": 7.62, "learning_rate": 8.511692429647245e-08, "logits/chosen": -1.4673691987991333, "logits/rejected": -1.4311282634735107, "logps/chosen": -196.43991088867188, "logps/rejected": -363.74102783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.276126861572266, "rewards/margins": 15.343452453613281, "rewards/rejected": -25.619579315185547, "step": 4748 }, { "epoch": 7.62, "learning_rate": 8.501783590963139e-08, "logits/chosen": -1.4131765365600586, "logits/rejected": -1.4250236749649048, "logps/chosen": -175.3342742919922, "logps/rejected": -295.8018798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.878741264343262, "rewards/margins": 13.389217376708984, "rewards/rejected": -23.26795768737793, "step": 4749 }, { "epoch": 7.62, "learning_rate": 8.491874752279033e-08, "logits/chosen": -1.5945651531219482, "logits/rejected": -1.630975365638733, "logps/chosen": -162.35372924804688, "logps/rejected": -319.8852844238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.963551044464111, "rewards/margins": 13.614198684692383, "rewards/rejected": -21.577747344970703, "step": 4750 }, { "epoch": 7.63, "learning_rate": 8.481965913594927e-08, "logits/chosen": -1.6661417484283447, "logits/rejected": -1.618222951889038, "logps/chosen": -147.11959838867188, "logps/rejected": -286.15325927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.629298210144043, "rewards/margins": 13.028430938720703, "rewards/rejected": -20.657730102539062, "step": 4751 }, { "epoch": 7.63, "learning_rate": 8.472057074910819e-08, "logits/chosen": -1.3984218835830688, "logits/rejected": -1.4352836608886719, "logps/chosen": -177.99420166015625, "logps/rejected": -354.4437255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.615397930145264, "rewards/margins": 16.558412551879883, "rewards/rejected": -24.173809051513672, "step": 4752 }, { "epoch": 7.63, "learning_rate": 8.462148236226714e-08, "logits/chosen": -1.3692353963851929, "logits/rejected": -1.4420534372329712, "logps/chosen": -159.91470336914062, "logps/rejected": -318.7105407714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.377086639404297, "rewards/margins": 12.710928916931152, "rewards/rejected": -22.088016510009766, "step": 4753 }, { "epoch": 7.63, "learning_rate": 8.452239397542607e-08, "logits/chosen": -1.5102165937423706, "logits/rejected": -1.5122572183609009, "logps/chosen": -168.33778381347656, "logps/rejected": -363.8729248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.444846153259277, "rewards/margins": 19.758359909057617, "rewards/rejected": -27.203205108642578, "step": 4754 }, { "epoch": 7.63, "learning_rate": 8.442330558858502e-08, "logits/chosen": -1.5187714099884033, "logits/rejected": -1.6741759777069092, "logps/chosen": -108.2055435180664, "logps/rejected": -274.7220458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1362781524658203, "rewards/margins": 13.917840957641602, "rewards/rejected": -17.05411720275879, "step": 4755 }, { "epoch": 7.63, "learning_rate": 8.432421720174395e-08, "logits/chosen": -1.6290470361709595, "logits/rejected": -1.700708031654358, "logps/chosen": -140.0032501220703, "logps/rejected": -290.09576416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.377790451049805, "rewards/margins": 14.643142700195312, "rewards/rejected": -20.02093505859375, "step": 4756 }, { "epoch": 7.64, "learning_rate": 8.422512881490289e-08, "logits/chosen": -1.40584397315979, "logits/rejected": -1.4241124391555786, "logps/chosen": -171.97991943359375, "logps/rejected": -311.65625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.38361930847168, "rewards/margins": 13.001313209533691, "rewards/rejected": -21.384931564331055, "step": 4757 }, { "epoch": 7.64, "learning_rate": 8.412604042806183e-08, "logits/chosen": -1.3932013511657715, "logits/rejected": -1.3312305212020874, "logps/chosen": -151.44078063964844, "logps/rejected": -289.4534912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.007436275482178, "rewards/margins": 14.932750701904297, "rewards/rejected": -20.940185546875, "step": 4758 }, { "epoch": 7.64, "learning_rate": 8.402695204122077e-08, "logits/chosen": -1.4639860391616821, "logits/rejected": -1.514063835144043, "logps/chosen": -171.81576538085938, "logps/rejected": -319.91656494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.182267189025879, "rewards/margins": 14.356365203857422, "rewards/rejected": -22.538633346557617, "step": 4759 }, { "epoch": 7.64, "learning_rate": 8.39278636543797e-08, "logits/chosen": -1.6304694414138794, "logits/rejected": -1.592578411102295, "logps/chosen": -140.41940307617188, "logps/rejected": -252.97280883789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.484315872192383, "rewards/margins": 12.51307487487793, "rewards/rejected": -17.997390747070312, "step": 4760 }, { "epoch": 7.64, "learning_rate": 8.382877526753863e-08, "logits/chosen": -1.5953744649887085, "logits/rejected": -1.5051815509796143, "logps/chosen": -195.4459991455078, "logps/rejected": -273.8307189941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.091302871704102, "rewards/margins": 11.191961288452148, "rewards/rejected": -18.28326416015625, "step": 4761 }, { "epoch": 7.64, "learning_rate": 8.372968688069758e-08, "logits/chosen": -1.394031047821045, "logits/rejected": -1.4545239210128784, "logps/chosen": -159.93218994140625, "logps/rejected": -343.8652648925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.849750995635986, "rewards/margins": 16.008827209472656, "rewards/rejected": -23.858577728271484, "step": 4762 }, { "epoch": 7.65, "learning_rate": 8.363059849385653e-08, "logits/chosen": -1.56461763381958, "logits/rejected": -1.5282788276672363, "logps/chosen": -135.47633361816406, "logps/rejected": -278.03289794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6640472412109375, "rewards/margins": 14.630223274230957, "rewards/rejected": -19.294269561767578, "step": 4763 }, { "epoch": 7.65, "learning_rate": 8.353151010701545e-08, "logits/chosen": -1.463106632232666, "logits/rejected": -1.4530748128890991, "logps/chosen": -142.42347717285156, "logps/rejected": -252.92355346679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.041672229766846, "rewards/margins": 11.401652336120605, "rewards/rejected": -17.44332504272461, "step": 4764 }, { "epoch": 7.65, "learning_rate": 8.343242172017439e-08, "logits/chosen": -1.520037293434143, "logits/rejected": -1.5279098749160767, "logps/chosen": -165.12753295898438, "logps/rejected": -332.9457092285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.980277061462402, "rewards/margins": 16.159137725830078, "rewards/rejected": -25.139415740966797, "step": 4765 }, { "epoch": 7.65, "learning_rate": 8.333333333333333e-08, "logits/chosen": -1.5285849571228027, "logits/rejected": -1.5620148181915283, "logps/chosen": -172.17762756347656, "logps/rejected": -281.8923645019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.156675338745117, "rewards/margins": 11.833928108215332, "rewards/rejected": -17.990604400634766, "step": 4766 }, { "epoch": 7.65, "learning_rate": 8.323424494649227e-08, "logits/chosen": -1.4588035345077515, "logits/rejected": -1.4886770248413086, "logps/chosen": -124.55907440185547, "logps/rejected": -280.48724365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.51432466506958, "rewards/margins": 13.895761489868164, "rewards/rejected": -19.41008758544922, "step": 4767 }, { "epoch": 7.65, "learning_rate": 8.313515655965119e-08, "logits/chosen": -1.5776599645614624, "logits/rejected": -1.519174575805664, "logps/chosen": -125.89043426513672, "logps/rejected": -311.7229919433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.802011013031006, "rewards/margins": 17.6610050201416, "rewards/rejected": -22.463016510009766, "step": 4768 }, { "epoch": 7.65, "learning_rate": 8.303606817281014e-08, "logits/chosen": -1.533827304840088, "logits/rejected": -1.5753484964370728, "logps/chosen": -143.79110717773438, "logps/rejected": -325.6652526855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.658910751342773, "rewards/margins": 17.871015548706055, "rewards/rejected": -23.529926300048828, "step": 4769 }, { "epoch": 7.66, "learning_rate": 8.293697978596908e-08, "logits/chosen": -1.475324034690857, "logits/rejected": -1.4758656024932861, "logps/chosen": -145.5684814453125, "logps/rejected": -268.9165344238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.806931495666504, "rewards/margins": 11.83403205871582, "rewards/rejected": -18.64096450805664, "step": 4770 }, { "epoch": 7.66, "learning_rate": 8.283789139912802e-08, "logits/chosen": -1.3695480823516846, "logits/rejected": -1.3622817993164062, "logps/chosen": -188.85122680664062, "logps/rejected": -311.6961669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.767132759094238, "rewards/margins": 12.720229148864746, "rewards/rejected": -21.487361907958984, "step": 4771 }, { "epoch": 7.66, "learning_rate": 8.273880301228696e-08, "logits/chosen": -1.6632983684539795, "logits/rejected": -1.4955830574035645, "logps/chosen": -169.6571807861328, "logps/rejected": -269.78765869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.539531230926514, "rewards/margins": 12.84572982788086, "rewards/rejected": -20.38526153564453, "step": 4772 }, { "epoch": 7.66, "learning_rate": 8.263971462544588e-08, "logits/chosen": -1.3316935300827026, "logits/rejected": -1.389329195022583, "logps/chosen": -145.34100341796875, "logps/rejected": -278.16455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.207615852355957, "rewards/margins": 12.400346755981445, "rewards/rejected": -20.60796356201172, "step": 4773 }, { "epoch": 7.66, "learning_rate": 8.254062623860483e-08, "logits/chosen": -1.5951980352401733, "logits/rejected": -1.6883621215820312, "logps/chosen": -163.28517150878906, "logps/rejected": -301.76092529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.457943916320801, "rewards/margins": 13.682580947875977, "rewards/rejected": -21.140525817871094, "step": 4774 }, { "epoch": 7.66, "learning_rate": 8.244153785176378e-08, "logits/chosen": -1.480334758758545, "logits/rejected": -1.4564454555511475, "logps/chosen": -156.31349182128906, "logps/rejected": -274.0665283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.668519020080566, "rewards/margins": 12.31429386138916, "rewards/rejected": -18.982812881469727, "step": 4775 }, { "epoch": 7.67, "learning_rate": 8.234244946492271e-08, "logits/chosen": -1.5443370342254639, "logits/rejected": -1.5495305061340332, "logps/chosen": -217.34304809570312, "logps/rejected": -319.9864196777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.795514106750488, "rewards/margins": 11.725458145141602, "rewards/rejected": -23.520973205566406, "step": 4776 }, { "epoch": 7.67, "learning_rate": 8.224336107808164e-08, "logits/chosen": -1.5073390007019043, "logits/rejected": -1.5131723880767822, "logps/chosen": -185.6825714111328, "logps/rejected": -318.1754150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.167139053344727, "rewards/margins": 15.179145812988281, "rewards/rejected": -23.346284866333008, "step": 4777 }, { "epoch": 7.67, "learning_rate": 8.214427269124058e-08, "logits/chosen": -1.4668350219726562, "logits/rejected": -1.2783002853393555, "logps/chosen": -155.72756958007812, "logps/rejected": -244.45428466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.50297737121582, "rewards/margins": 11.257472038269043, "rewards/rejected": -17.76045036315918, "step": 4778 }, { "epoch": 7.67, "learning_rate": 8.204518430439952e-08, "logits/chosen": -1.520755648612976, "logits/rejected": -1.517698049545288, "logps/chosen": -139.54925537109375, "logps/rejected": -262.2976989746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.920642375946045, "rewards/margins": 12.177875518798828, "rewards/rejected": -19.0985164642334, "step": 4779 }, { "epoch": 7.67, "learning_rate": 8.194609591755846e-08, "logits/chosen": -1.327993392944336, "logits/rejected": -1.3283848762512207, "logps/chosen": -148.12619018554688, "logps/rejected": -281.6199645996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.719305515289307, "rewards/margins": 13.205678939819336, "rewards/rejected": -19.924985885620117, "step": 4780 }, { "epoch": 7.67, "learning_rate": 8.184700753071739e-08, "logits/chosen": -1.6350281238555908, "logits/rejected": -1.600688099861145, "logps/chosen": -141.07574462890625, "logps/rejected": -268.331298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.153635025024414, "rewards/margins": 14.226531028747559, "rewards/rejected": -20.38016700744629, "step": 4781 }, { "epoch": 7.68, "learning_rate": 8.174791914387634e-08, "logits/chosen": -1.574456810951233, "logits/rejected": -1.6286429166793823, "logps/chosen": -174.01431274414062, "logps/rejected": -318.9050598144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.283818244934082, "rewards/margins": 14.253927230834961, "rewards/rejected": -23.53774642944336, "step": 4782 }, { "epoch": 7.68, "learning_rate": 8.164883075703527e-08, "logits/chosen": -1.3720003366470337, "logits/rejected": -1.4002422094345093, "logps/chosen": -155.02651977539062, "logps/rejected": -277.1254577636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.042394638061523, "rewards/margins": 12.846209526062012, "rewards/rejected": -19.88860321044922, "step": 4783 }, { "epoch": 7.68, "learning_rate": 8.154974237019422e-08, "logits/chosen": -1.4726266860961914, "logits/rejected": -1.4261341094970703, "logps/chosen": -143.4271240234375, "logps/rejected": -323.5813293457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.413942337036133, "rewards/margins": 15.532570838928223, "rewards/rejected": -21.94651222229004, "step": 4784 }, { "epoch": 7.68, "learning_rate": 8.145065398335314e-08, "logits/chosen": -1.3812963962554932, "logits/rejected": -1.348036766052246, "logps/chosen": -219.85923767089844, "logps/rejected": -304.71856689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.80459213256836, "rewards/margins": 11.698616027832031, "rewards/rejected": -21.50320816040039, "step": 4785 }, { "epoch": 7.68, "learning_rate": 8.135156559651208e-08, "logits/chosen": -1.5422595739364624, "logits/rejected": -1.4884412288665771, "logps/chosen": -167.46914672851562, "logps/rejected": -318.02093505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.197440147399902, "rewards/margins": 16.312192916870117, "rewards/rejected": -23.509634017944336, "step": 4786 }, { "epoch": 7.68, "learning_rate": 8.125247720967102e-08, "logits/chosen": -1.309543490409851, "logits/rejected": -1.4253430366516113, "logps/chosen": -182.3259735107422, "logps/rejected": -337.8734436035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.064485549926758, "rewards/margins": 15.079157829284668, "rewards/rejected": -23.14364242553711, "step": 4787 }, { "epoch": 7.69, "learning_rate": 8.115338882282996e-08, "logits/chosen": -1.2776225805282593, "logits/rejected": -1.3874870538711548, "logps/chosen": -186.14773559570312, "logps/rejected": -305.6936340332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.064751625061035, "rewards/margins": 11.589435577392578, "rewards/rejected": -21.65418815612793, "step": 4788 }, { "epoch": 7.69, "learning_rate": 8.105430043598891e-08, "logits/chosen": -1.481885313987732, "logits/rejected": -1.416290283203125, "logps/chosen": -132.5230712890625, "logps/rejected": -261.89434814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.168519020080566, "rewards/margins": 14.421187400817871, "rewards/rejected": -19.589706420898438, "step": 4789 }, { "epoch": 7.69, "learning_rate": 8.095521204914783e-08, "logits/chosen": -1.7414865493774414, "logits/rejected": -1.7252850532531738, "logps/chosen": -178.866455078125, "logps/rejected": -336.4989013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7740278244018555, "rewards/margins": 14.39316177368164, "rewards/rejected": -21.167190551757812, "step": 4790 }, { "epoch": 7.69, "learning_rate": 8.085612366230678e-08, "logits/chosen": -1.555110216140747, "logits/rejected": -1.5013700723648071, "logps/chosen": -172.4111328125, "logps/rejected": -318.74853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.568130016326904, "rewards/margins": 15.355386734008789, "rewards/rejected": -22.92351722717285, "step": 4791 }, { "epoch": 7.69, "learning_rate": 8.075703527546571e-08, "logits/chosen": -1.629441499710083, "logits/rejected": -1.6092867851257324, "logps/chosen": -170.12200927734375, "logps/rejected": -294.5859069824219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.26343059539795, "rewards/margins": 12.827059745788574, "rewards/rejected": -21.090490341186523, "step": 4792 }, { "epoch": 7.69, "learning_rate": 8.065794688862466e-08, "logits/chosen": -1.6244920492172241, "logits/rejected": -1.613559365272522, "logps/chosen": -115.6810302734375, "logps/rejected": -241.38870239257812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3398756980895996, "rewards/margins": 14.09566593170166, "rewards/rejected": -17.4355411529541, "step": 4793 }, { "epoch": 7.7, "learning_rate": 8.055885850178359e-08, "logits/chosen": -1.6933492422103882, "logits/rejected": -1.668099284172058, "logps/chosen": -171.79647827148438, "logps/rejected": -327.5921936035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.154861927032471, "rewards/margins": 16.589130401611328, "rewards/rejected": -23.74399185180664, "step": 4794 }, { "epoch": 7.7, "learning_rate": 8.045977011494252e-08, "logits/chosen": -1.4437012672424316, "logits/rejected": -1.4881542921066284, "logps/chosen": -173.90151977539062, "logps/rejected": -346.69287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.816163063049316, "rewards/margins": 15.70803451538086, "rewards/rejected": -25.52419662475586, "step": 4795 }, { "epoch": 7.7, "learning_rate": 8.036068172810147e-08, "logits/chosen": -1.4825325012207031, "logits/rejected": -1.5425583124160767, "logps/chosen": -189.38572692871094, "logps/rejected": -342.0391845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.530987739562988, "rewards/margins": 14.627291679382324, "rewards/rejected": -25.158281326293945, "step": 4796 }, { "epoch": 7.7, "learning_rate": 8.02615933412604e-08, "logits/chosen": -1.4591968059539795, "logits/rejected": -1.4739456176757812, "logps/chosen": -202.73387145996094, "logps/rejected": -321.1228942871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.355606555938721, "rewards/margins": 12.76933479309082, "rewards/rejected": -20.124940872192383, "step": 4797 }, { "epoch": 7.7, "learning_rate": 8.016250495441934e-08, "logits/chosen": -1.4544806480407715, "logits/rejected": -1.4027495384216309, "logps/chosen": -176.07504272460938, "logps/rejected": -281.5587463378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.147566795349121, "rewards/margins": 12.557506561279297, "rewards/rejected": -20.705074310302734, "step": 4798 }, { "epoch": 7.7, "learning_rate": 8.006341656757827e-08, "logits/chosen": -1.5249247550964355, "logits/rejected": -1.5082813501358032, "logps/chosen": -151.040771484375, "logps/rejected": -310.1117248535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.534489631652832, "rewards/margins": 15.384288787841797, "rewards/rejected": -22.918779373168945, "step": 4799 }, { "epoch": 7.7, "learning_rate": 7.996432818073722e-08, "logits/chosen": -1.389906644821167, "logits/rejected": -1.4226722717285156, "logps/chosen": -144.805419921875, "logps/rejected": -247.98707580566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.498673915863037, "rewards/margins": 10.713197708129883, "rewards/rejected": -18.211872100830078, "step": 4800 }, { "epoch": 7.71, "learning_rate": 7.986523979389616e-08, "logits/chosen": -1.7218888998031616, "logits/rejected": -1.727052927017212, "logps/chosen": -110.13778686523438, "logps/rejected": -283.1137390136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9581193923950195, "rewards/margins": 15.769174575805664, "rewards/rejected": -20.727293014526367, "step": 4801 }, { "epoch": 7.71, "learning_rate": 7.976615140705508e-08, "logits/chosen": -1.4880125522613525, "logits/rejected": -1.534348487854004, "logps/chosen": -158.4397430419922, "logps/rejected": -285.0205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.003836631774902, "rewards/margins": 11.220605850219727, "rewards/rejected": -20.224441528320312, "step": 4802 }, { "epoch": 7.71, "learning_rate": 7.966706302021403e-08, "logits/chosen": -1.3613121509552002, "logits/rejected": -1.3533039093017578, "logps/chosen": -175.1567840576172, "logps/rejected": -324.88153076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.729764938354492, "rewards/margins": 14.644516944885254, "rewards/rejected": -23.374282836914062, "step": 4803 }, { "epoch": 7.71, "learning_rate": 7.956797463337296e-08, "logits/chosen": -1.3851683139801025, "logits/rejected": -1.4540512561798096, "logps/chosen": -162.03482055664062, "logps/rejected": -299.7041320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.620265007019043, "rewards/margins": 11.715475082397461, "rewards/rejected": -19.335739135742188, "step": 4804 }, { "epoch": 7.71, "learning_rate": 7.946888624653191e-08, "logits/chosen": -1.542832374572754, "logits/rejected": -1.5283925533294678, "logps/chosen": -153.12042236328125, "logps/rejected": -304.25457763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.089602470397949, "rewards/margins": 14.94469165802002, "rewards/rejected": -22.03429412841797, "step": 4805 }, { "epoch": 7.71, "learning_rate": 7.936979785969083e-08, "logits/chosen": -1.6238187551498413, "logits/rejected": -1.542678952217102, "logps/chosen": -131.7935333251953, "logps/rejected": -264.686767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.31143045425415, "rewards/margins": 14.365427017211914, "rewards/rejected": -19.676856994628906, "step": 4806 }, { "epoch": 7.72, "learning_rate": 7.927070947284978e-08, "logits/chosen": -1.399949550628662, "logits/rejected": -1.414551854133606, "logps/chosen": -146.9158172607422, "logps/rejected": -262.9752502441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.811978340148926, "rewards/margins": 11.208044052124023, "rewards/rejected": -18.020023345947266, "step": 4807 }, { "epoch": 7.72, "learning_rate": 7.917162108600872e-08, "logits/chosen": -1.5925066471099854, "logits/rejected": -1.4723541736602783, "logps/chosen": -149.67431640625, "logps/rejected": -267.8202209472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.096548080444336, "rewards/margins": 12.231141090393066, "rewards/rejected": -18.32769012451172, "step": 4808 }, { "epoch": 7.72, "learning_rate": 7.907253269916766e-08, "logits/chosen": -1.3174694776535034, "logits/rejected": -1.353927731513977, "logps/chosen": -141.070068359375, "logps/rejected": -264.1796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7944536209106445, "rewards/margins": 12.498027801513672, "rewards/rejected": -20.292482376098633, "step": 4809 }, { "epoch": 7.72, "learning_rate": 7.897344431232659e-08, "logits/chosen": -1.5490845441818237, "logits/rejected": -1.5164450407028198, "logps/chosen": -223.98516845703125, "logps/rejected": -363.24908447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.388236999511719, "rewards/margins": 15.413241386413574, "rewards/rejected": -26.80147933959961, "step": 4810 }, { "epoch": 7.72, "learning_rate": 7.887435592548552e-08, "logits/chosen": -1.6239291429519653, "logits/rejected": -1.6142754554748535, "logps/chosen": -152.0208740234375, "logps/rejected": -241.111083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.250342845916748, "rewards/margins": 10.71432113647461, "rewards/rejected": -15.9646635055542, "step": 4811 }, { "epoch": 7.72, "learning_rate": 7.877526753864447e-08, "logits/chosen": -1.401241660118103, "logits/rejected": -1.369478464126587, "logps/chosen": -159.365478515625, "logps/rejected": -331.59173583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8393025398254395, "rewards/margins": 16.836162567138672, "rewards/rejected": -23.675466537475586, "step": 4812 }, { "epoch": 7.73, "learning_rate": 7.86761791518034e-08, "logits/chosen": -1.5413236618041992, "logits/rejected": -1.6070890426635742, "logps/chosen": -146.27789306640625, "logps/rejected": -260.5306701660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.914499759674072, "rewards/margins": 11.080751419067383, "rewards/rejected": -17.995250701904297, "step": 4813 }, { "epoch": 7.73, "learning_rate": 7.857709076496235e-08, "logits/chosen": -1.4498943090438843, "logits/rejected": -1.5652554035186768, "logps/chosen": -128.9695281982422, "logps/rejected": -284.035400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.536181926727295, "rewards/margins": 14.283167839050293, "rewards/rejected": -18.81934928894043, "step": 4814 }, { "epoch": 7.73, "learning_rate": 7.847800237812128e-08, "logits/chosen": -1.3689227104187012, "logits/rejected": -1.3875824213027954, "logps/chosen": -252.39236450195312, "logps/rejected": -389.59039306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -15.530145645141602, "rewards/margins": 12.16543960571289, "rewards/rejected": -27.695585250854492, "step": 4815 }, { "epoch": 7.73, "learning_rate": 7.837891399128022e-08, "logits/chosen": -1.5852546691894531, "logits/rejected": -1.5809447765350342, "logps/chosen": -162.05728149414062, "logps/rejected": -291.93597412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.380673408508301, "rewards/margins": 13.242101669311523, "rewards/rejected": -18.622774124145508, "step": 4816 }, { "epoch": 7.73, "learning_rate": 7.827982560443916e-08, "logits/chosen": -1.483335018157959, "logits/rejected": -1.4014983177185059, "logps/chosen": -138.8086700439453, "logps/rejected": -258.2655334472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.766005516052246, "rewards/margins": 13.203063011169434, "rewards/rejected": -18.96906852722168, "step": 4817 }, { "epoch": 7.73, "learning_rate": 7.81807372175981e-08, "logits/chosen": -1.3843443393707275, "logits/rejected": -1.474800944328308, "logps/chosen": -167.9497833251953, "logps/rejected": -325.1835632324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.114603042602539, "rewards/margins": 14.83184814453125, "rewards/rejected": -22.946453094482422, "step": 4818 }, { "epoch": 7.74, "learning_rate": 7.808164883075703e-08, "logits/chosen": -1.4735110998153687, "logits/rejected": -1.4952462911605835, "logps/chosen": -122.94868469238281, "logps/rejected": -228.7157440185547, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.836978912353516, "rewards/margins": 10.006217002868652, "rewards/rejected": -14.843195915222168, "step": 4819 }, { "epoch": 7.74, "learning_rate": 7.798256044391597e-08, "logits/chosen": -1.4518342018127441, "logits/rejected": -1.3866361379623413, "logps/chosen": -164.6827850341797, "logps/rejected": -288.794189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8500494956970215, "rewards/margins": 13.283697128295898, "rewards/rejected": -21.133747100830078, "step": 4820 }, { "epoch": 7.74, "learning_rate": 7.788347205707491e-08, "logits/chosen": -1.7389247417449951, "logits/rejected": -1.7141294479370117, "logps/chosen": -96.34193420410156, "logps/rejected": -280.93212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.603455066680908, "rewards/margins": 17.736061096191406, "rewards/rejected": -21.339515686035156, "step": 4821 }, { "epoch": 7.74, "learning_rate": 7.778438367023385e-08, "logits/chosen": -1.6316708326339722, "logits/rejected": -1.5933218002319336, "logps/chosen": -184.2234649658203, "logps/rejected": -282.5387878417969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.828436851501465, "rewards/margins": 11.655150413513184, "rewards/rejected": -19.48358917236328, "step": 4822 }, { "epoch": 7.74, "learning_rate": 7.768529528339277e-08, "logits/chosen": -1.5012054443359375, "logits/rejected": -1.5627968311309814, "logps/chosen": -196.04656982421875, "logps/rejected": -337.346435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.974745750427246, "rewards/margins": 12.990126609802246, "rewards/rejected": -23.964872360229492, "step": 4823 }, { "epoch": 7.74, "learning_rate": 7.758620689655172e-08, "logits/chosen": -1.508507490158081, "logits/rejected": -1.5404788255691528, "logps/chosen": -151.99716186523438, "logps/rejected": -280.752197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.967416286468506, "rewards/margins": 11.575347900390625, "rewards/rejected": -17.54276466369629, "step": 4824 }, { "epoch": 7.74, "learning_rate": 7.748711850971065e-08, "logits/chosen": -1.4905136823654175, "logits/rejected": -1.5457518100738525, "logps/chosen": -171.46951293945312, "logps/rejected": -307.81219482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7327165603637695, "rewards/margins": 12.55955696105957, "rewards/rejected": -20.292274475097656, "step": 4825 }, { "epoch": 7.75, "learning_rate": 7.73880301228696e-08, "logits/chosen": -1.2987693548202515, "logits/rejected": -1.3742238283157349, "logps/chosen": -168.484130859375, "logps/rejected": -274.26788330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.711971759796143, "rewards/margins": 10.055145263671875, "rewards/rejected": -16.76711654663086, "step": 4826 }, { "epoch": 7.75, "learning_rate": 7.728894173602853e-08, "logits/chosen": -1.426423192024231, "logits/rejected": -1.4436557292938232, "logps/chosen": -171.75010681152344, "logps/rejected": -312.420166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5571489334106445, "rewards/margins": 14.538042068481445, "rewards/rejected": -21.095191955566406, "step": 4827 }, { "epoch": 7.75, "learning_rate": 7.718985334918747e-08, "logits/chosen": -1.700141429901123, "logits/rejected": -1.652474045753479, "logps/chosen": -143.11289978027344, "logps/rejected": -253.99090576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7678632736206055, "rewards/margins": 11.413220405578613, "rewards/rejected": -17.18108367919922, "step": 4828 }, { "epoch": 7.75, "learning_rate": 7.709076496234641e-08, "logits/chosen": -1.3797892332077026, "logits/rejected": -1.407689094543457, "logps/chosen": -165.33065795898438, "logps/rejected": -277.37176513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3642578125, "rewards/margins": 11.54411506652832, "rewards/rejected": -18.908374786376953, "step": 4829 }, { "epoch": 7.75, "learning_rate": 7.699167657550535e-08, "logits/chosen": -1.3661531209945679, "logits/rejected": -1.5483677387237549, "logps/chosen": -124.69178771972656, "logps/rejected": -333.93145751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.178241729736328, "rewards/margins": 16.313688278198242, "rewards/rejected": -21.49193000793457, "step": 4830 }, { "epoch": 7.75, "learning_rate": 7.689258818866428e-08, "logits/chosen": -1.6187138557434082, "logits/rejected": -1.5985801219940186, "logps/chosen": -173.1485137939453, "logps/rejected": -309.1441345214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.871877193450928, "rewards/margins": 13.852082252502441, "rewards/rejected": -21.723960876464844, "step": 4831 }, { "epoch": 7.76, "learning_rate": 7.679349980182321e-08, "logits/chosen": -1.3809478282928467, "logits/rejected": -1.3691251277923584, "logps/chosen": -184.273681640625, "logps/rejected": -285.9242858886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.966527938842773, "rewards/margins": 13.170835494995117, "rewards/rejected": -21.13736343383789, "step": 4832 }, { "epoch": 7.76, "learning_rate": 7.669441141498216e-08, "logits/chosen": -1.4514950513839722, "logits/rejected": -1.4797409772872925, "logps/chosen": -166.5737762451172, "logps/rejected": -311.5140075683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.340671062469482, "rewards/margins": 15.422786712646484, "rewards/rejected": -22.763460159301758, "step": 4833 }, { "epoch": 7.76, "learning_rate": 7.659532302814111e-08, "logits/chosen": -1.5623724460601807, "logits/rejected": -1.5775105953216553, "logps/chosen": -168.35044860839844, "logps/rejected": -338.19818115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.606208801269531, "rewards/margins": 16.411121368408203, "rewards/rejected": -25.017330169677734, "step": 4834 }, { "epoch": 7.76, "learning_rate": 7.649623464130004e-08, "logits/chosen": -1.4405159950256348, "logits/rejected": -1.358030080795288, "logps/chosen": -149.300537109375, "logps/rejected": -232.06472778320312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.600377082824707, "rewards/margins": 9.08041000366211, "rewards/rejected": -15.6807861328125, "step": 4835 }, { "epoch": 7.76, "learning_rate": 7.639714625445897e-08, "logits/chosen": -1.5769211053848267, "logits/rejected": -1.571998119354248, "logps/chosen": -184.2384796142578, "logps/rejected": -313.318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.026741027832031, "rewards/margins": 13.949853897094727, "rewards/rejected": -21.976594924926758, "step": 4836 }, { "epoch": 7.76, "learning_rate": 7.629805786761791e-08, "logits/chosen": -1.409580111503601, "logits/rejected": -1.4568231105804443, "logps/chosen": -190.1983642578125, "logps/rejected": -336.5899658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.264533042907715, "rewards/margins": 13.897610664367676, "rewards/rejected": -25.16214370727539, "step": 4837 }, { "epoch": 7.77, "learning_rate": 7.619896948077685e-08, "logits/chosen": -1.5970262289047241, "logits/rejected": -1.586888313293457, "logps/chosen": -194.20663452148438, "logps/rejected": -350.14886474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.282764434814453, "rewards/margins": 15.461089134216309, "rewards/rejected": -24.74385643005371, "step": 4838 }, { "epoch": 7.77, "learning_rate": 7.609988109393579e-08, "logits/chosen": -1.6048277616500854, "logits/rejected": -1.5288870334625244, "logps/chosen": -171.3717041015625, "logps/rejected": -293.3612060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.2941312789917, "rewards/margins": 13.402706146240234, "rewards/rejected": -21.696836471557617, "step": 4839 }, { "epoch": 7.77, "learning_rate": 7.600079270709472e-08, "logits/chosen": -1.4967422485351562, "logits/rejected": -1.533512830734253, "logps/chosen": -159.74923706054688, "logps/rejected": -274.52056884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.91743803024292, "rewards/margins": 10.864860534667969, "rewards/rejected": -18.782299041748047, "step": 4840 }, { "epoch": 7.77, "learning_rate": 7.590170432025367e-08, "logits/chosen": -1.3638700246810913, "logits/rejected": -1.2319835424423218, "logps/chosen": -213.04342651367188, "logps/rejected": -276.067138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.137819290161133, "rewards/margins": 10.338951110839844, "rewards/rejected": -19.476770401000977, "step": 4841 }, { "epoch": 7.77, "learning_rate": 7.58026159334126e-08, "logits/chosen": -1.4018454551696777, "logits/rejected": -1.4261980056762695, "logps/chosen": -204.5984344482422, "logps/rejected": -351.8634033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.209754943847656, "rewards/margins": 14.641207695007324, "rewards/rejected": -23.850963592529297, "step": 4842 }, { "epoch": 7.77, "learning_rate": 7.570352754657155e-08, "logits/chosen": -1.3693119287490845, "logits/rejected": -1.3902335166931152, "logps/chosen": -168.68910217285156, "logps/rejected": -318.850830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.00726318359375, "rewards/margins": 13.050844192504883, "rewards/rejected": -22.05810546875, "step": 4843 }, { "epoch": 7.78, "learning_rate": 7.560443915973047e-08, "logits/chosen": -1.5151896476745605, "logits/rejected": -1.5358617305755615, "logps/chosen": -105.58500671386719, "logps/rejected": -224.08328247070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6505188941955566, "rewards/margins": 12.73598575592041, "rewards/rejected": -16.386505126953125, "step": 4844 }, { "epoch": 7.78, "learning_rate": 7.550535077288941e-08, "logits/chosen": -1.355112075805664, "logits/rejected": -1.3588402271270752, "logps/chosen": -186.6870880126953, "logps/rejected": -301.73077392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.965882301330566, "rewards/margins": 11.15848445892334, "rewards/rejected": -20.124364852905273, "step": 4845 }, { "epoch": 7.78, "learning_rate": 7.540626238604836e-08, "logits/chosen": -1.5371506214141846, "logits/rejected": -1.5603001117706299, "logps/chosen": -144.6571044921875, "logps/rejected": -290.8252868652344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.374922752380371, "rewards/margins": 15.334087371826172, "rewards/rejected": -21.709009170532227, "step": 4846 }, { "epoch": 7.78, "learning_rate": 7.530717399920729e-08, "logits/chosen": -1.4065238237380981, "logits/rejected": -1.4403071403503418, "logps/chosen": -153.0740203857422, "logps/rejected": -283.8804931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.818629741668701, "rewards/margins": 12.213321685791016, "rewards/rejected": -20.031951904296875, "step": 4847 }, { "epoch": 7.78, "learning_rate": 7.520808561236623e-08, "logits/chosen": -1.4308338165283203, "logits/rejected": -1.409609079360962, "logps/chosen": -168.26583862304688, "logps/rejected": -351.00445556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.920938491821289, "rewards/margins": 16.92116355895996, "rewards/rejected": -25.842103958129883, "step": 4848 }, { "epoch": 7.78, "learning_rate": 7.510899722552516e-08, "logits/chosen": -1.4181245565414429, "logits/rejected": -1.4206080436706543, "logps/chosen": -209.90167236328125, "logps/rejected": -340.98748779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.044333457946777, "rewards/margins": 12.460296630859375, "rewards/rejected": -23.504629135131836, "step": 4849 }, { "epoch": 7.78, "learning_rate": 7.50099088386841e-08, "logits/chosen": -1.5049643516540527, "logits/rejected": -1.5141916275024414, "logps/chosen": -149.92727661132812, "logps/rejected": -298.65692138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.006680011749268, "rewards/margins": 13.425520896911621, "rewards/rejected": -19.432201385498047, "step": 4850 }, { "epoch": 7.79, "learning_rate": 7.491082045184304e-08, "logits/chosen": -1.4364076852798462, "logits/rejected": -1.4183800220489502, "logps/chosen": -201.81112670898438, "logps/rejected": -340.0651550292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.536672592163086, "rewards/margins": 13.532243728637695, "rewards/rejected": -24.068918228149414, "step": 4851 }, { "epoch": 7.79, "learning_rate": 7.481173206500197e-08, "logits/chosen": -1.773967981338501, "logits/rejected": -1.7529749870300293, "logps/chosen": -109.91448974609375, "logps/rejected": -270.362060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8982486724853516, "rewards/margins": 15.395431518554688, "rewards/rejected": -19.293682098388672, "step": 4852 }, { "epoch": 7.79, "learning_rate": 7.471264367816092e-08, "logits/chosen": -1.4627025127410889, "logits/rejected": -1.4475373029708862, "logps/chosen": -167.90420532226562, "logps/rejected": -338.00042724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.224390983581543, "rewards/margins": 16.055959701538086, "rewards/rejected": -25.280349731445312, "step": 4853 }, { "epoch": 7.79, "learning_rate": 7.461355529131985e-08, "logits/chosen": -1.5076239109039307, "logits/rejected": -1.5342090129852295, "logps/chosen": -182.29006958007812, "logps/rejected": -380.78125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.894654273986816, "rewards/margins": 17.79853630065918, "rewards/rejected": -27.69318962097168, "step": 4854 }, { "epoch": 7.79, "learning_rate": 7.45144669044788e-08, "logits/chosen": -1.358757734298706, "logits/rejected": -1.428637981414795, "logps/chosen": -196.1201629638672, "logps/rejected": -352.75640869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.562475204467773, "rewards/margins": 14.56417465209961, "rewards/rejected": -24.12664794921875, "step": 4855 }, { "epoch": 7.79, "learning_rate": 7.441537851763772e-08, "logits/chosen": -1.4112544059753418, "logits/rejected": -1.3740348815917969, "logps/chosen": -174.21014404296875, "logps/rejected": -323.27288818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.789878845214844, "rewards/margins": 14.540266990661621, "rewards/rejected": -23.33014678955078, "step": 4856 }, { "epoch": 7.8, "learning_rate": 7.431629013079667e-08, "logits/chosen": -1.4172449111938477, "logits/rejected": -1.4712252616882324, "logps/chosen": -125.50572204589844, "logps/rejected": -294.3111267089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.263299465179443, "rewards/margins": 16.024417877197266, "rewards/rejected": -22.287717819213867, "step": 4857 }, { "epoch": 7.8, "learning_rate": 7.42172017439556e-08, "logits/chosen": -1.5537174940109253, "logits/rejected": -1.558167576789856, "logps/chosen": -145.53089904785156, "logps/rejected": -326.4154357910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.713858604431152, "rewards/margins": 18.679861068725586, "rewards/rejected": -24.393720626831055, "step": 4858 }, { "epoch": 7.8, "learning_rate": 7.411811335711455e-08, "logits/chosen": -1.3402225971221924, "logits/rejected": -1.4245835542678833, "logps/chosen": -175.9832305908203, "logps/rejected": -299.4299621582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.499971389770508, "rewards/margins": 12.478273391723633, "rewards/rejected": -21.97824478149414, "step": 4859 }, { "epoch": 7.8, "learning_rate": 7.401902497027349e-08, "logits/chosen": -1.337522029876709, "logits/rejected": -1.498256802558899, "logps/chosen": -98.90499114990234, "logps/rejected": -296.823974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.360543966293335, "rewards/margins": 16.74074935913086, "rewards/rejected": -20.101293563842773, "step": 4860 }, { "epoch": 7.8, "learning_rate": 7.391993658343241e-08, "logits/chosen": -1.7188984155654907, "logits/rejected": -1.6528668403625488, "logps/chosen": -190.64923095703125, "logps/rejected": -323.4014587402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.739084243774414, "rewards/margins": 14.802085876464844, "rewards/rejected": -23.54117202758789, "step": 4861 }, { "epoch": 7.8, "learning_rate": 7.382084819659136e-08, "logits/chosen": -1.563624382019043, "logits/rejected": -1.56182861328125, "logps/chosen": -134.64614868164062, "logps/rejected": -307.25927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.19488525390625, "rewards/margins": 17.205852508544922, "rewards/rejected": -22.400739669799805, "step": 4862 }, { "epoch": 7.81, "learning_rate": 7.372175980975029e-08, "logits/chosen": -1.4475407600402832, "logits/rejected": -1.4252361059188843, "logps/chosen": -187.89434814453125, "logps/rejected": -309.33197021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.48046588897705, "rewards/margins": 13.087709426879883, "rewards/rejected": -21.568174362182617, "step": 4863 }, { "epoch": 7.81, "learning_rate": 7.362267142290924e-08, "logits/chosen": -1.3884648084640503, "logits/rejected": -1.3676702976226807, "logps/chosen": -174.2835693359375, "logps/rejected": -299.9855041503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.377692222595215, "rewards/margins": 13.401056289672852, "rewards/rejected": -21.778749465942383, "step": 4864 }, { "epoch": 7.81, "learning_rate": 7.352358303606817e-08, "logits/chosen": -1.4295568466186523, "logits/rejected": -1.4169704914093018, "logps/chosen": -149.14547729492188, "logps/rejected": -290.7594299316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.705175399780273, "rewards/margins": 13.122294425964355, "rewards/rejected": -20.827468872070312, "step": 4865 }, { "epoch": 7.81, "learning_rate": 7.34244946492271e-08, "logits/chosen": -1.364658236503601, "logits/rejected": -1.4732985496520996, "logps/chosen": -149.0472869873047, "logps/rejected": -304.9566650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.611104965209961, "rewards/margins": 13.67896556854248, "rewards/rejected": -21.290069580078125, "step": 4866 }, { "epoch": 7.81, "learning_rate": 7.332540626238605e-08, "logits/chosen": -1.5636632442474365, "logits/rejected": -1.52078115940094, "logps/chosen": -157.994140625, "logps/rejected": -341.10870361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.938293933868408, "rewards/margins": 15.69265365600586, "rewards/rejected": -21.630949020385742, "step": 4867 }, { "epoch": 7.81, "learning_rate": 7.322631787554498e-08, "logits/chosen": -1.4113765954971313, "logits/rejected": -1.4154765605926514, "logps/chosen": -132.22853088378906, "logps/rejected": -282.8968505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.8423075675964355, "rewards/margins": 13.118619918823242, "rewards/rejected": -17.960926055908203, "step": 4868 }, { "epoch": 7.82, "learning_rate": 7.312722948870392e-08, "logits/chosen": -1.564414143562317, "logits/rejected": -1.5569791793823242, "logps/chosen": -162.7370147705078, "logps/rejected": -304.28564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.689040660858154, "rewards/margins": 14.738759994506836, "rewards/rejected": -22.42780113220215, "step": 4869 }, { "epoch": 7.82, "learning_rate": 7.302814110186285e-08, "logits/chosen": -1.345212697982788, "logits/rejected": -1.3827612400054932, "logps/chosen": -172.87220764160156, "logps/rejected": -304.1089782714844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.37352180480957, "rewards/margins": 12.04261589050293, "rewards/rejected": -20.4161376953125, "step": 4870 }, { "epoch": 7.82, "learning_rate": 7.29290527150218e-08, "logits/chosen": -1.5814214944839478, "logits/rejected": -1.576108694076538, "logps/chosen": -125.95165252685547, "logps/rejected": -278.81005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.387411117553711, "rewards/margins": 16.385461807250977, "rewards/rejected": -21.772872924804688, "step": 4871 }, { "epoch": 7.82, "learning_rate": 7.282996432818074e-08, "logits/chosen": -1.5763983726501465, "logits/rejected": -1.5403839349746704, "logps/chosen": -160.18899536132812, "logps/rejected": -286.1727600097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.159602165222168, "rewards/margins": 13.902036666870117, "rewards/rejected": -21.0616397857666, "step": 4872 }, { "epoch": 7.82, "learning_rate": 7.273087594133966e-08, "logits/chosen": -1.6047157049179077, "logits/rejected": -1.6165682077407837, "logps/chosen": -81.00479125976562, "logps/rejected": -230.81350708007812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9800212383270264, "rewards/margins": 13.989391326904297, "rewards/rejected": -16.969411849975586, "step": 4873 }, { "epoch": 7.82, "learning_rate": 7.263178755449861e-08, "logits/chosen": -1.3650959730148315, "logits/rejected": -1.4380059242248535, "logps/chosen": -141.83006286621094, "logps/rejected": -313.1301574707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.302243232727051, "rewards/margins": 15.003865242004395, "rewards/rejected": -22.306106567382812, "step": 4874 }, { "epoch": 7.83, "learning_rate": 7.253269916765754e-08, "logits/chosen": -1.477924108505249, "logits/rejected": -1.4942588806152344, "logps/chosen": -180.15675354003906, "logps/rejected": -319.3282470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.3624906539917, "rewards/margins": 13.531000137329102, "rewards/rejected": -22.893489837646484, "step": 4875 }, { "epoch": 7.83, "learning_rate": 7.243361078081649e-08, "logits/chosen": -1.49049711227417, "logits/rejected": -1.5217269659042358, "logps/chosen": -128.2622528076172, "logps/rejected": -294.0562744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.102451324462891, "rewards/margins": 14.645753860473633, "rewards/rejected": -20.748207092285156, "step": 4876 }, { "epoch": 7.83, "learning_rate": 7.233452239397541e-08, "logits/chosen": -1.4623537063598633, "logits/rejected": -1.5114924907684326, "logps/chosen": -161.34439086914062, "logps/rejected": -301.0716857910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.833163738250732, "rewards/margins": 12.026004791259766, "rewards/rejected": -19.859169006347656, "step": 4877 }, { "epoch": 7.83, "learning_rate": 7.223543400713436e-08, "logits/chosen": -1.3347097635269165, "logits/rejected": -1.346407413482666, "logps/chosen": -166.18698120117188, "logps/rejected": -332.1715393066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.586518287658691, "rewards/margins": 16.712312698364258, "rewards/rejected": -24.298831939697266, "step": 4878 }, { "epoch": 7.83, "learning_rate": 7.21363456202933e-08, "logits/chosen": -1.5563193559646606, "logits/rejected": -1.5326755046844482, "logps/chosen": -112.21443939208984, "logps/rejected": -274.52447509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8612873554229736, "rewards/margins": 16.149744033813477, "rewards/rejected": -20.011028289794922, "step": 4879 }, { "epoch": 7.83, "learning_rate": 7.203725723345224e-08, "logits/chosen": -1.3675966262817383, "logits/rejected": -1.342687964439392, "logps/chosen": -169.2969970703125, "logps/rejected": -291.97515869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.345970153808594, "rewards/margins": 13.875682830810547, "rewards/rejected": -22.22165298461914, "step": 4880 }, { "epoch": 7.83, "learning_rate": 7.193816884661118e-08, "logits/chosen": -1.512346863746643, "logits/rejected": -1.5079658031463623, "logps/chosen": -200.2500762939453, "logps/rejected": -308.8843688964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.375031471252441, "rewards/margins": 11.974227905273438, "rewards/rejected": -21.349260330200195, "step": 4881 }, { "epoch": 7.84, "learning_rate": 7.18390804597701e-08, "logits/chosen": -1.5738625526428223, "logits/rejected": -1.544896125793457, "logps/chosen": -159.74859619140625, "logps/rejected": -324.83026123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.241399765014648, "rewards/margins": 17.356098175048828, "rewards/rejected": -24.597497940063477, "step": 4882 }, { "epoch": 7.84, "learning_rate": 7.173999207292905e-08, "logits/chosen": -1.6212846040725708, "logits/rejected": -1.6652681827545166, "logps/chosen": -93.72679901123047, "logps/rejected": -243.66583251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5787863731384277, "rewards/margins": 14.963802337646484, "rewards/rejected": -17.54258918762207, "step": 4883 }, { "epoch": 7.84, "learning_rate": 7.164090368608798e-08, "logits/chosen": -1.5900284051895142, "logits/rejected": -1.5766512155532837, "logps/chosen": -189.3577880859375, "logps/rejected": -337.0574645996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.735782623291016, "rewards/margins": 14.990422248840332, "rewards/rejected": -23.726205825805664, "step": 4884 }, { "epoch": 7.84, "learning_rate": 7.154181529924693e-08, "logits/chosen": -1.3795907497406006, "logits/rejected": -1.39642333984375, "logps/chosen": -191.7391357421875, "logps/rejected": -317.1395263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.795097351074219, "rewards/margins": 12.655288696289062, "rewards/rejected": -22.45038604736328, "step": 4885 }, { "epoch": 7.84, "learning_rate": 7.144272691240586e-08, "logits/chosen": -1.498264193534851, "logits/rejected": -1.4970345497131348, "logps/chosen": -163.73892211914062, "logps/rejected": -304.87860107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.945647239685059, "rewards/margins": 14.318655967712402, "rewards/rejected": -21.26430320739746, "step": 4886 }, { "epoch": 7.84, "learning_rate": 7.13436385255648e-08, "logits/chosen": -1.3527100086212158, "logits/rejected": -1.3110758066177368, "logps/chosen": -156.9625244140625, "logps/rejected": -372.102294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.291568756103516, "rewards/margins": 18.70430564880371, "rewards/rejected": -26.99587631225586, "step": 4887 }, { "epoch": 7.85, "learning_rate": 7.124455013872374e-08, "logits/chosen": -1.3910733461380005, "logits/rejected": -1.416500449180603, "logps/chosen": -186.55238342285156, "logps/rejected": -330.9005126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.427082061767578, "rewards/margins": 15.034650802612305, "rewards/rejected": -24.46173095703125, "step": 4888 }, { "epoch": 7.85, "learning_rate": 7.114546175188268e-08, "logits/chosen": -1.6703134775161743, "logits/rejected": -1.6009621620178223, "logps/chosen": -148.96749877929688, "logps/rejected": -270.2138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.679254055023193, "rewards/margins": 11.9889497756958, "rewards/rejected": -18.66820526123047, "step": 4889 }, { "epoch": 7.85, "learning_rate": 7.104637336504161e-08, "logits/chosen": -1.5680301189422607, "logits/rejected": -1.685950756072998, "logps/chosen": -180.64540100097656, "logps/rejected": -342.57568359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.302730083465576, "rewards/margins": 16.24761962890625, "rewards/rejected": -23.55034828186035, "step": 4890 }, { "epoch": 7.85, "learning_rate": 7.094728497820056e-08, "logits/chosen": -1.5419106483459473, "logits/rejected": -1.5170013904571533, "logps/chosen": -176.76315307617188, "logps/rejected": -334.3760986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8622050285339355, "rewards/margins": 16.078624725341797, "rewards/rejected": -23.940828323364258, "step": 4891 }, { "epoch": 7.85, "learning_rate": 7.084819659135949e-08, "logits/chosen": -1.4276074171066284, "logits/rejected": -1.4459203481674194, "logps/chosen": -155.1407012939453, "logps/rejected": -257.4079895019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.861623764038086, "rewards/margins": 11.5476655960083, "rewards/rejected": -18.409290313720703, "step": 4892 }, { "epoch": 7.85, "learning_rate": 7.074910820451844e-08, "logits/chosen": -1.3824695348739624, "logits/rejected": -1.50946843624115, "logps/chosen": -134.61505126953125, "logps/rejected": -325.2096862792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.051602363586426, "rewards/margins": 17.127782821655273, "rewards/rejected": -24.17938804626465, "step": 4893 }, { "epoch": 7.86, "learning_rate": 7.065001981767736e-08, "logits/chosen": -1.3380247354507446, "logits/rejected": -1.3723082542419434, "logps/chosen": -190.09011840820312, "logps/rejected": -320.698486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.417593955993652, "rewards/margins": 13.10705280303955, "rewards/rejected": -21.524646759033203, "step": 4894 }, { "epoch": 7.86, "learning_rate": 7.05509314308363e-08, "logits/chosen": -1.4565954208374023, "logits/rejected": -1.5765899419784546, "logps/chosen": -177.61026000976562, "logps/rejected": -301.9615173339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.033174514770508, "rewards/margins": 10.82702922821045, "rewards/rejected": -19.86020278930664, "step": 4895 }, { "epoch": 7.86, "learning_rate": 7.045184304399524e-08, "logits/chosen": -1.2899677753448486, "logits/rejected": -1.3608392477035522, "logps/chosen": -156.04837036132812, "logps/rejected": -261.5401916503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.698054313659668, "rewards/margins": 9.375447273254395, "rewards/rejected": -17.073501586914062, "step": 4896 }, { "epoch": 7.86, "learning_rate": 7.035275465715418e-08, "logits/chosen": -1.398398995399475, "logits/rejected": -1.4138731956481934, "logps/chosen": -104.43282318115234, "logps/rejected": -297.4237976074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.847512722015381, "rewards/margins": 15.656156539916992, "rewards/rejected": -20.50366973876953, "step": 4897 }, { "epoch": 7.86, "learning_rate": 7.025366627031312e-08, "logits/chosen": -1.4477061033248901, "logits/rejected": -1.4416265487670898, "logps/chosen": -170.9147186279297, "logps/rejected": -339.1037902832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.541336059570312, "rewards/margins": 15.939701080322266, "rewards/rejected": -24.481037139892578, "step": 4898 }, { "epoch": 7.86, "learning_rate": 7.015457788347205e-08, "logits/chosen": -1.3790475130081177, "logits/rejected": -1.3672919273376465, "logps/chosen": -154.5796661376953, "logps/rejected": -333.11883544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.381752014160156, "rewards/margins": 17.15927505493164, "rewards/rejected": -24.541027069091797, "step": 4899 }, { "epoch": 7.87, "learning_rate": 7.0055489496631e-08, "logits/chosen": -1.526892066001892, "logits/rejected": -1.5011378526687622, "logps/chosen": -168.27645874023438, "logps/rejected": -273.31927490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.904084205627441, "rewards/margins": 12.019091606140137, "rewards/rejected": -18.923175811767578, "step": 4900 }, { "epoch": 7.87, "learning_rate": 6.995640110978993e-08, "logits/chosen": -1.5221351385116577, "logits/rejected": -1.5968172550201416, "logps/chosen": -186.08580017089844, "logps/rejected": -318.0239562988281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.667919158935547, "rewards/margins": 12.113765716552734, "rewards/rejected": -21.78168487548828, "step": 4901 }, { "epoch": 7.87, "learning_rate": 6.985731272294888e-08, "logits/chosen": -1.2596631050109863, "logits/rejected": -1.374326467514038, "logps/chosen": -121.94046783447266, "logps/rejected": -317.29559326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3215532302856445, "rewards/margins": 16.598562240600586, "rewards/rejected": -21.920116424560547, "step": 4902 }, { "epoch": 7.87, "learning_rate": 6.97582243361078e-08, "logits/chosen": -1.704198956489563, "logits/rejected": -1.694022536277771, "logps/chosen": -117.92302703857422, "logps/rejected": -264.2262878417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0380120277404785, "rewards/margins": 14.897703170776367, "rewards/rejected": -18.935714721679688, "step": 4903 }, { "epoch": 7.87, "learning_rate": 6.965913594926674e-08, "logits/chosen": -1.3551716804504395, "logits/rejected": -1.3493599891662598, "logps/chosen": -176.69558715820312, "logps/rejected": -334.8759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.961166381835938, "rewards/margins": 15.426485061645508, "rewards/rejected": -24.387653350830078, "step": 4904 }, { "epoch": 7.87, "learning_rate": 6.956004756242569e-08, "logits/chosen": -1.4815881252288818, "logits/rejected": -1.5030934810638428, "logps/chosen": -171.90492248535156, "logps/rejected": -348.76031494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.492511749267578, "rewards/margins": 16.735692977905273, "rewards/rejected": -26.228206634521484, "step": 4905 }, { "epoch": 7.87, "learning_rate": 6.946095917558462e-08, "logits/chosen": -1.452528715133667, "logits/rejected": -1.5219497680664062, "logps/chosen": -138.089111328125, "logps/rejected": -292.38775634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.458005905151367, "rewards/margins": 14.487727165222168, "rewards/rejected": -20.94573402404785, "step": 4906 }, { "epoch": 7.88, "learning_rate": 6.936187078874356e-08, "logits/chosen": -1.6059489250183105, "logits/rejected": -1.5751614570617676, "logps/chosen": -217.15975952148438, "logps/rejected": -358.1731262207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.674202919006348, "rewards/margins": 14.268885612487793, "rewards/rejected": -24.94308853149414, "step": 4907 }, { "epoch": 7.88, "learning_rate": 6.926278240190249e-08, "logits/chosen": -1.3200490474700928, "logits/rejected": -1.4186997413635254, "logps/chosen": -151.211669921875, "logps/rejected": -319.1556091308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.489862442016602, "rewards/margins": 14.365056991577148, "rewards/rejected": -22.854917526245117, "step": 4908 }, { "epoch": 7.88, "learning_rate": 6.916369401506144e-08, "logits/chosen": -1.6514923572540283, "logits/rejected": -1.5708317756652832, "logps/chosen": -174.29356384277344, "logps/rejected": -292.65740966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.79392147064209, "rewards/margins": 11.790153503417969, "rewards/rejected": -20.584075927734375, "step": 4909 }, { "epoch": 7.88, "learning_rate": 6.906460562822037e-08, "logits/chosen": -1.308393955230713, "logits/rejected": -1.2948050498962402, "logps/chosen": -135.64602661132812, "logps/rejected": -325.2373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.573548316955566, "rewards/margins": 17.381759643554688, "rewards/rejected": -23.95530891418457, "step": 4910 }, { "epoch": 7.88, "learning_rate": 6.89655172413793e-08, "logits/chosen": -1.3928608894348145, "logits/rejected": -1.3743236064910889, "logps/chosen": -157.57521057128906, "logps/rejected": -310.216552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3480024337768555, "rewards/margins": 14.799158096313477, "rewards/rejected": -21.14716148376465, "step": 4911 }, { "epoch": 7.88, "learning_rate": 6.886642885453825e-08, "logits/chosen": -1.4388487339019775, "logits/rejected": -1.4170498847961426, "logps/chosen": -171.99237060546875, "logps/rejected": -310.688720703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.627808570861816, "rewards/margins": 14.217021942138672, "rewards/rejected": -21.844829559326172, "step": 4912 }, { "epoch": 7.89, "learning_rate": 6.876734046769718e-08, "logits/chosen": -1.570708990097046, "logits/rejected": -1.557126522064209, "logps/chosen": -117.17453002929688, "logps/rejected": -261.977783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.515327453613281, "rewards/margins": 13.855026245117188, "rewards/rejected": -18.37035369873047, "step": 4913 }, { "epoch": 7.89, "learning_rate": 6.866825208085613e-08, "logits/chosen": -1.445966362953186, "logits/rejected": -1.4163416624069214, "logps/chosen": -185.894287109375, "logps/rejected": -341.7824401855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.783063888549805, "rewards/margins": 17.014976501464844, "rewards/rejected": -26.798038482666016, "step": 4914 }, { "epoch": 7.89, "learning_rate": 6.856916369401505e-08, "logits/chosen": -1.4198733568191528, "logits/rejected": -1.5225330591201782, "logps/chosen": -137.33880615234375, "logps/rejected": -267.93701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.230825901031494, "rewards/margins": 11.987138748168945, "rewards/rejected": -18.217966079711914, "step": 4915 }, { "epoch": 7.89, "learning_rate": 6.8470075307174e-08, "logits/chosen": -1.572982907295227, "logits/rejected": -1.5584124326705933, "logps/chosen": -150.33554077148438, "logps/rejected": -269.40008544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.301774978637695, "rewards/margins": 12.810094833374023, "rewards/rejected": -19.11186981201172, "step": 4916 }, { "epoch": 7.89, "learning_rate": 6.837098692033294e-08, "logits/chosen": -1.45878005027771, "logits/rejected": -1.4536166191101074, "logps/chosen": -143.82791137695312, "logps/rejected": -271.90301513671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.52540397644043, "rewards/margins": 13.98352336883545, "rewards/rejected": -19.508926391601562, "step": 4917 }, { "epoch": 7.89, "learning_rate": 6.827189853349187e-08, "logits/chosen": -1.3929550647735596, "logits/rejected": -1.3690567016601562, "logps/chosen": -164.65451049804688, "logps/rejected": -297.92333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.360965728759766, "rewards/margins": 12.315488815307617, "rewards/rejected": -19.676454544067383, "step": 4918 }, { "epoch": 7.9, "learning_rate": 6.817281014665081e-08, "logits/chosen": -1.465235948562622, "logits/rejected": -1.474284291267395, "logps/chosen": -117.70922088623047, "logps/rejected": -274.656494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.360558032989502, "rewards/margins": 15.549505233764648, "rewards/rejected": -20.910064697265625, "step": 4919 }, { "epoch": 7.9, "learning_rate": 6.807372175980974e-08, "logits/chosen": -1.3068559169769287, "logits/rejected": -1.367431640625, "logps/chosen": -149.6103515625, "logps/rejected": -304.3005065917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6090850830078125, "rewards/margins": 14.959970474243164, "rewards/rejected": -21.569055557250977, "step": 4920 }, { "epoch": 7.9, "learning_rate": 6.797463337296869e-08, "logits/chosen": -1.7524347305297852, "logits/rejected": -1.7123677730560303, "logps/chosen": -100.44873046875, "logps/rejected": -278.62139892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.124645948410034, "rewards/margins": 17.43526840209961, "rewards/rejected": -20.55991554260254, "step": 4921 }, { "epoch": 7.9, "learning_rate": 6.787554498612762e-08, "logits/chosen": -1.2780205011367798, "logits/rejected": -1.2836096286773682, "logps/chosen": -182.94839477539062, "logps/rejected": -312.30133056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.063398361206055, "rewards/margins": 11.68855094909668, "rewards/rejected": -22.751949310302734, "step": 4922 }, { "epoch": 7.9, "learning_rate": 6.777645659928655e-08, "logits/chosen": -1.6318585872650146, "logits/rejected": -1.618431806564331, "logps/chosen": -163.79794311523438, "logps/rejected": -307.68353271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.305566787719727, "rewards/margins": 13.616195678710938, "rewards/rejected": -21.921762466430664, "step": 4923 }, { "epoch": 7.9, "learning_rate": 6.76773682124455e-08, "logits/chosen": -1.4422380924224854, "logits/rejected": -1.549297571182251, "logps/chosen": -185.56094360351562, "logps/rejected": -311.69195556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.67754077911377, "rewards/margins": 12.493606567382812, "rewards/rejected": -22.1711483001709, "step": 4924 }, { "epoch": 7.91, "learning_rate": 6.757827982560443e-08, "logits/chosen": -1.5294691324234009, "logits/rejected": -1.4318064451217651, "logps/chosen": -161.6126708984375, "logps/rejected": -322.1305236816406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.884021759033203, "rewards/margins": 16.34178924560547, "rewards/rejected": -22.225811004638672, "step": 4925 }, { "epoch": 7.91, "learning_rate": 6.747919143876338e-08, "logits/chosen": -1.6290870904922485, "logits/rejected": -1.5757665634155273, "logps/chosen": -172.023193359375, "logps/rejected": -281.43048095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.551963806152344, "rewards/margins": 13.084405899047852, "rewards/rejected": -20.636369705200195, "step": 4926 }, { "epoch": 7.91, "learning_rate": 6.738010305192231e-08, "logits/chosen": -1.736436128616333, "logits/rejected": -1.5924285650253296, "logps/chosen": -134.4371795654297, "logps/rejected": -230.40011596679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.368408679962158, "rewards/margins": 12.347579002380371, "rewards/rejected": -16.715988159179688, "step": 4927 }, { "epoch": 7.91, "learning_rate": 6.728101466508125e-08, "logits/chosen": -1.6860144138336182, "logits/rejected": -1.629030704498291, "logps/chosen": -152.30418395996094, "logps/rejected": -262.88812255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.742213726043701, "rewards/margins": 10.183951377868652, "rewards/rejected": -17.926166534423828, "step": 4928 }, { "epoch": 7.91, "learning_rate": 6.718192627824018e-08, "logits/chosen": -1.386312484741211, "logits/rejected": -1.4824061393737793, "logps/chosen": -175.2683868408203, "logps/rejected": -339.5726318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.86451530456543, "rewards/margins": 13.59428596496582, "rewards/rejected": -22.458803176879883, "step": 4929 }, { "epoch": 7.91, "learning_rate": 6.708283789139913e-08, "logits/chosen": -1.4574838876724243, "logits/rejected": -1.4437687397003174, "logps/chosen": -175.93087768554688, "logps/rejected": -319.0188903808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.888320446014404, "rewards/margins": 14.431939125061035, "rewards/rejected": -21.320261001586914, "step": 4930 }, { "epoch": 7.91, "learning_rate": 6.698374950455807e-08, "logits/chosen": -1.5819079875946045, "logits/rejected": -1.6094446182250977, "logps/chosen": -181.61605834960938, "logps/rejected": -360.66424560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.39846420288086, "rewards/margins": 15.379329681396484, "rewards/rejected": -25.777793884277344, "step": 4931 }, { "epoch": 7.92, "learning_rate": 6.6884661117717e-08, "logits/chosen": -1.4996777772903442, "logits/rejected": -1.4895281791687012, "logps/chosen": -172.89317321777344, "logps/rejected": -331.88507080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.344186782836914, "rewards/margins": 15.572493553161621, "rewards/rejected": -23.91668128967285, "step": 4932 }, { "epoch": 7.92, "learning_rate": 6.678557273087594e-08, "logits/chosen": -1.3394207954406738, "logits/rejected": -1.2772724628448486, "logps/chosen": -199.49844360351562, "logps/rejected": -331.49835205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.969564437866211, "rewards/margins": 14.164509773254395, "rewards/rejected": -25.13407325744629, "step": 4933 }, { "epoch": 7.92, "learning_rate": 6.668648434403487e-08, "logits/chosen": -1.3203749656677246, "logits/rejected": -1.3941376209259033, "logps/chosen": -160.28851318359375, "logps/rejected": -311.13397216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.571490287780762, "rewards/margins": 14.694190979003906, "rewards/rejected": -22.265682220458984, "step": 4934 }, { "epoch": 7.92, "learning_rate": 6.658739595719382e-08, "logits/chosen": -1.5060335397720337, "logits/rejected": -1.550856113433838, "logps/chosen": -180.84442138671875, "logps/rejected": -322.32696533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.170495986938477, "rewards/margins": 13.701642990112305, "rewards/rejected": -22.87213897705078, "step": 4935 }, { "epoch": 7.92, "learning_rate": 6.648830757035275e-08, "logits/chosen": -1.4521610736846924, "logits/rejected": -1.502049446105957, "logps/chosen": -171.68682861328125, "logps/rejected": -348.28533935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.277660369873047, "rewards/margins": 13.672930717468262, "rewards/rejected": -22.950590133666992, "step": 4936 }, { "epoch": 7.92, "learning_rate": 6.638921918351169e-08, "logits/chosen": -1.3623425960540771, "logits/rejected": -1.4009084701538086, "logps/chosen": -181.5767059326172, "logps/rejected": -348.095947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.904523849487305, "rewards/margins": 15.734140396118164, "rewards/rejected": -26.63866424560547, "step": 4937 }, { "epoch": 7.93, "learning_rate": 6.629013079667063e-08, "logits/chosen": -1.4777913093566895, "logits/rejected": -1.4538905620574951, "logps/chosen": -160.15347290039062, "logps/rejected": -320.8857727050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.095374584197998, "rewards/margins": 16.72042465209961, "rewards/rejected": -23.815799713134766, "step": 4938 }, { "epoch": 7.93, "learning_rate": 6.619104240982957e-08, "logits/chosen": -1.6272486448287964, "logits/rejected": -1.672382116317749, "logps/chosen": -144.9543914794922, "logps/rejected": -276.45721435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.407447814941406, "rewards/margins": 12.101672172546387, "rewards/rejected": -19.50912094116211, "step": 4939 }, { "epoch": 7.93, "learning_rate": 6.60919540229885e-08, "logits/chosen": -1.4902807474136353, "logits/rejected": -1.4490572214126587, "logps/chosen": -180.0673065185547, "logps/rejected": -291.7605895996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.824848175048828, "rewards/margins": 12.952550888061523, "rewards/rejected": -20.77739906311035, "step": 4940 }, { "epoch": 7.93, "learning_rate": 6.599286563614743e-08, "logits/chosen": -1.489199161529541, "logits/rejected": -1.5155024528503418, "logps/chosen": -155.2980499267578, "logps/rejected": -304.07159423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.514796257019043, "rewards/margins": 15.524482727050781, "rewards/rejected": -22.039278030395508, "step": 4941 }, { "epoch": 7.93, "learning_rate": 6.589377724930638e-08, "logits/chosen": -1.517151117324829, "logits/rejected": -1.4745386838912964, "logps/chosen": -136.1370391845703, "logps/rejected": -288.864990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.968955039978027, "rewards/margins": 15.656716346740723, "rewards/rejected": -20.62567138671875, "step": 4942 }, { "epoch": 7.93, "learning_rate": 6.579468886246533e-08, "logits/chosen": -1.60651433467865, "logits/rejected": -1.5080313682556152, "logps/chosen": -139.59405517578125, "logps/rejected": -308.830810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.950434684753418, "rewards/margins": 15.956064224243164, "rewards/rejected": -21.9064998626709, "step": 4943 }, { "epoch": 7.94, "learning_rate": 6.569560047562425e-08, "logits/chosen": -1.5636556148529053, "logits/rejected": -1.5785789489746094, "logps/chosen": -164.36764526367188, "logps/rejected": -345.20263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.680541038513184, "rewards/margins": 16.847183227539062, "rewards/rejected": -23.527725219726562, "step": 4944 }, { "epoch": 7.94, "learning_rate": 6.559651208878319e-08, "logits/chosen": -1.4103288650512695, "logits/rejected": -1.3671824932098389, "logps/chosen": -154.36090087890625, "logps/rejected": -307.64276123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8948469161987305, "rewards/margins": 14.568997383117676, "rewards/rejected": -22.463844299316406, "step": 4945 }, { "epoch": 7.94, "learning_rate": 6.549742370194213e-08, "logits/chosen": -1.4077417850494385, "logits/rejected": -1.4079010486602783, "logps/chosen": -186.15493774414062, "logps/rejected": -333.4803466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.2022123336792, "rewards/margins": 15.131638526916504, "rewards/rejected": -23.333852767944336, "step": 4946 }, { "epoch": 7.94, "learning_rate": 6.539833531510107e-08, "logits/chosen": -1.2601714134216309, "logits/rejected": -1.3539129495620728, "logps/chosen": -135.70098876953125, "logps/rejected": -254.24169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.175784587860107, "rewards/margins": 11.772151947021484, "rewards/rejected": -17.94793701171875, "step": 4947 }, { "epoch": 7.94, "learning_rate": 6.529924692825999e-08, "logits/chosen": -1.3664238452911377, "logits/rejected": -1.5036628246307373, "logps/chosen": -156.41690063476562, "logps/rejected": -362.9529113769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.37971830368042, "rewards/margins": 18.131986618041992, "rewards/rejected": -25.511703491210938, "step": 4948 }, { "epoch": 7.94, "learning_rate": 6.520015854141894e-08, "logits/chosen": -1.426912546157837, "logits/rejected": -1.5171469449996948, "logps/chosen": -137.11573791503906, "logps/rejected": -274.56158447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1341753005981445, "rewards/margins": 13.728302001953125, "rewards/rejected": -18.862478256225586, "step": 4949 }, { "epoch": 7.95, "learning_rate": 6.510107015457789e-08, "logits/chosen": -1.555643081665039, "logits/rejected": -1.5726553201675415, "logps/chosen": -142.3778076171875, "logps/rejected": -265.91009521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.54888916015625, "rewards/margins": 11.924670219421387, "rewards/rejected": -18.473560333251953, "step": 4950 }, { "epoch": 7.95, "learning_rate": 6.500198176773682e-08, "logits/chosen": -1.430159568786621, "logits/rejected": -1.4085278511047363, "logps/chosen": -187.0888671875, "logps/rejected": -313.93096923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.806796073913574, "rewards/margins": 14.09773063659668, "rewards/rejected": -22.90452766418457, "step": 4951 }, { "epoch": 7.95, "learning_rate": 6.490289338089577e-08, "logits/chosen": -1.4882748126983643, "logits/rejected": -1.4203832149505615, "logps/chosen": -183.2484893798828, "logps/rejected": -329.5894775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.050426483154297, "rewards/margins": 15.854513168334961, "rewards/rejected": -23.904939651489258, "step": 4952 }, { "epoch": 7.95, "learning_rate": 6.480380499405469e-08, "logits/chosen": -1.3607265949249268, "logits/rejected": -1.4896742105484009, "logps/chosen": -116.90552520751953, "logps/rejected": -325.79290771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.151294708251953, "rewards/margins": 16.765911102294922, "rewards/rejected": -21.917205810546875, "step": 4953 }, { "epoch": 7.95, "learning_rate": 6.470471660721363e-08, "logits/chosen": -1.4554328918457031, "logits/rejected": -1.5189085006713867, "logps/chosen": -195.41641235351562, "logps/rejected": -334.3274230957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.564616203308105, "rewards/margins": 13.46866226196289, "rewards/rejected": -23.033279418945312, "step": 4954 }, { "epoch": 7.95, "learning_rate": 6.460562822037257e-08, "logits/chosen": -1.2886685132980347, "logits/rejected": -1.3147509098052979, "logps/chosen": -144.1387939453125, "logps/rejected": -280.32208251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4489850997924805, "rewards/margins": 15.213923454284668, "rewards/rejected": -18.66290855407715, "step": 4955 }, { "epoch": 7.96, "learning_rate": 6.450653983353151e-08, "logits/chosen": -1.4308241605758667, "logits/rejected": -1.3839426040649414, "logps/chosen": -123.96199798583984, "logps/rejected": -235.07562255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0044426918029785, "rewards/margins": 11.59727668762207, "rewards/rejected": -16.60171890258789, "step": 4956 }, { "epoch": 7.96, "learning_rate": 6.440745144669045e-08, "logits/chosen": -1.5133806467056274, "logits/rejected": -1.4385645389556885, "logps/chosen": -124.51720428466797, "logps/rejected": -243.2098388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.224242210388184, "rewards/margins": 12.646797180175781, "rewards/rejected": -17.87104034423828, "step": 4957 }, { "epoch": 7.96, "learning_rate": 6.430836305984938e-08, "logits/chosen": -1.5922462940216064, "logits/rejected": -1.5461504459381104, "logps/chosen": -129.0787353515625, "logps/rejected": -269.197998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.945950031280518, "rewards/margins": 14.44717025756836, "rewards/rejected": -20.39312171936035, "step": 4958 }, { "epoch": 7.96, "learning_rate": 6.420927467300833e-08, "logits/chosen": -1.3654509782791138, "logits/rejected": -1.4389649629592896, "logps/chosen": -146.37771606445312, "logps/rejected": -268.5397644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4635701179504395, "rewards/margins": 13.40743637084961, "rewards/rejected": -18.871004104614258, "step": 4959 }, { "epoch": 7.96, "learning_rate": 6.411018628616726e-08, "logits/chosen": -1.6731510162353516, "logits/rejected": -1.6004998683929443, "logps/chosen": -166.66026306152344, "logps/rejected": -320.0062561035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.881400108337402, "rewards/margins": 15.552593231201172, "rewards/rejected": -23.43399429321289, "step": 4960 }, { "epoch": 7.96, "learning_rate": 6.401109789932619e-08, "logits/chosen": -1.4367592334747314, "logits/rejected": -1.4243688583374023, "logps/chosen": -157.8418731689453, "logps/rejected": -297.84539794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.967150688171387, "rewards/margins": 14.239580154418945, "rewards/rejected": -21.20673179626465, "step": 4961 }, { "epoch": 7.96, "learning_rate": 6.391200951248514e-08, "logits/chosen": -1.6338472366333008, "logits/rejected": -1.6681883335113525, "logps/chosen": -113.20653533935547, "logps/rejected": -281.8238830566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4965133666992188, "rewards/margins": 15.922796249389648, "rewards/rejected": -19.419309616088867, "step": 4962 }, { "epoch": 7.97, "learning_rate": 6.381292112564407e-08, "logits/chosen": -1.7049845457077026, "logits/rejected": -1.6671713590621948, "logps/chosen": -109.30989837646484, "logps/rejected": -213.17506408691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.086953163146973, "rewards/margins": 10.633581161499023, "rewards/rejected": -14.72053337097168, "step": 4963 }, { "epoch": 7.97, "learning_rate": 6.371383273880302e-08, "logits/chosen": -1.6931822299957275, "logits/rejected": -1.6428049802780151, "logps/chosen": -112.86708068847656, "logps/rejected": -235.69680786132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9381697177886963, "rewards/margins": 11.91849136352539, "rewards/rejected": -15.856659889221191, "step": 4964 }, { "epoch": 7.97, "learning_rate": 6.361474435196194e-08, "logits/chosen": -1.3625528812408447, "logits/rejected": -1.468430995941162, "logps/chosen": -141.3026885986328, "logps/rejected": -324.77508544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.184837341308594, "rewards/margins": 14.380599975585938, "rewards/rejected": -19.56543731689453, "step": 4965 }, { "epoch": 7.97, "learning_rate": 6.351565596512088e-08, "logits/chosen": -1.6710295677185059, "logits/rejected": -1.4180018901824951, "logps/chosen": -213.15365600585938, "logps/rejected": -264.39947509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.823093414306641, "rewards/margins": 12.569363594055176, "rewards/rejected": -20.392457962036133, "step": 4966 }, { "epoch": 7.97, "learning_rate": 6.341656757827982e-08, "logits/chosen": -1.2967956066131592, "logits/rejected": -1.3344626426696777, "logps/chosen": -146.22398376464844, "logps/rejected": -300.6174621582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.068172454833984, "rewards/margins": 14.251825332641602, "rewards/rejected": -21.319997787475586, "step": 4967 }, { "epoch": 7.97, "learning_rate": 6.331747919143876e-08, "logits/chosen": -1.384425163269043, "logits/rejected": -1.426023244857788, "logps/chosen": -140.7677001953125, "logps/rejected": -315.2598876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.485176086425781, "rewards/margins": 14.645751953125, "rewards/rejected": -21.13092803955078, "step": 4968 }, { "epoch": 7.98, "learning_rate": 6.321839080459771e-08, "logits/chosen": -1.4688713550567627, "logits/rejected": -1.3148144483566284, "logps/chosen": -204.29237365722656, "logps/rejected": -277.81982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.133615493774414, "rewards/margins": 12.297032356262207, "rewards/rejected": -20.430646896362305, "step": 4969 }, { "epoch": 7.98, "learning_rate": 6.311930241775663e-08, "logits/chosen": -1.5003223419189453, "logits/rejected": -1.4820430278778076, "logps/chosen": -201.69631958007812, "logps/rejected": -304.2740478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.915660858154297, "rewards/margins": 10.943458557128906, "rewards/rejected": -19.859119415283203, "step": 4970 }, { "epoch": 7.98, "learning_rate": 6.302021403091558e-08, "logits/chosen": -1.5471842288970947, "logits/rejected": -1.5335127115249634, "logps/chosen": -198.89312744140625, "logps/rejected": -271.3827819824219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.205077171325684, "rewards/margins": 9.3900728225708, "rewards/rejected": -18.595149993896484, "step": 4971 }, { "epoch": 7.98, "learning_rate": 6.292112564407451e-08, "logits/chosen": -1.3199806213378906, "logits/rejected": -1.2877893447875977, "logps/chosen": -141.52639770507812, "logps/rejected": -253.82131958007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.901370048522949, "rewards/margins": 12.347278594970703, "rewards/rejected": -18.24864959716797, "step": 4972 }, { "epoch": 7.98, "learning_rate": 6.282203725723346e-08, "logits/chosen": -1.6656053066253662, "logits/rejected": -1.5141671895980835, "logps/chosen": -161.15919494628906, "logps/rejected": -288.236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.856696605682373, "rewards/margins": 13.049251556396484, "rewards/rejected": -19.905948638916016, "step": 4973 }, { "epoch": 7.98, "learning_rate": 6.272294887039238e-08, "logits/chosen": -1.6660563945770264, "logits/rejected": -1.6018450260162354, "logps/chosen": -148.5950469970703, "logps/rejected": -271.9561462402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2045698165893555, "rewards/margins": 13.365599632263184, "rewards/rejected": -19.570167541503906, "step": 4974 }, { "epoch": 7.99, "learning_rate": 6.262386048355132e-08, "logits/chosen": -1.5474579334259033, "logits/rejected": -1.5740324258804321, "logps/chosen": -184.74191284179688, "logps/rejected": -305.7305908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.559179306030273, "rewards/margins": 12.886354446411133, "rewards/rejected": -22.445531845092773, "step": 4975 }, { "epoch": 7.99, "learning_rate": 6.252477209671027e-08, "logits/chosen": -1.4101706743240356, "logits/rejected": -1.4202852249145508, "logps/chosen": -105.90115356445312, "logps/rejected": -262.1482238769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.630397796630859, "rewards/margins": 13.885977745056152, "rewards/rejected": -19.516376495361328, "step": 4976 }, { "epoch": 7.99, "learning_rate": 6.24256837098692e-08, "logits/chosen": -1.5503305196762085, "logits/rejected": -1.575042724609375, "logps/chosen": -167.067626953125, "logps/rejected": -299.2735290527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.530710220336914, "rewards/margins": 12.551409721374512, "rewards/rejected": -21.082120895385742, "step": 4977 }, { "epoch": 7.99, "learning_rate": 6.232659532302814e-08, "logits/chosen": -1.6049646139144897, "logits/rejected": -1.683699369430542, "logps/chosen": -151.47415161132812, "logps/rejected": -326.3045654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.738122940063477, "rewards/margins": 16.468547821044922, "rewards/rejected": -23.206668853759766, "step": 4978 }, { "epoch": 7.99, "learning_rate": 6.222750693618708e-08, "logits/chosen": -1.3567569255828857, "logits/rejected": -1.4126079082489014, "logps/chosen": -180.74627685546875, "logps/rejected": -307.66693115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.347441673278809, "rewards/margins": 12.836677551269531, "rewards/rejected": -22.184120178222656, "step": 4979 }, { "epoch": 7.99, "learning_rate": 6.212841854934602e-08, "logits/chosen": -1.575981616973877, "logits/rejected": -1.59954035282135, "logps/chosen": -144.99526977539062, "logps/rejected": -273.07342529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.455984115600586, "rewards/margins": 12.769399642944336, "rewards/rejected": -18.225383758544922, "step": 4980 }, { "epoch": 8.0, "learning_rate": 6.202933016250495e-08, "logits/chosen": -1.5606162548065186, "logits/rejected": -1.5900264978408813, "logps/chosen": -169.50506591796875, "logps/rejected": -302.9367370605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.332746505737305, "rewards/margins": 13.671635627746582, "rewards/rejected": -23.004383087158203, "step": 4981 }, { "epoch": 8.0, "learning_rate": 6.193024177566388e-08, "logits/chosen": -1.5249961614608765, "logits/rejected": -1.5267322063446045, "logps/chosen": -169.36351013183594, "logps/rejected": -318.2764892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.830323219299316, "rewards/margins": 15.454629898071289, "rewards/rejected": -23.28495216369629, "step": 4982 }, { "epoch": 8.0, "learning_rate": 6.183115338882283e-08, "logits/chosen": -1.5648270845413208, "logits/rejected": -1.610274076461792, "logps/chosen": -205.89422607421875, "logps/rejected": -363.3619384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.861175537109375, "rewards/margins": 15.284310340881348, "rewards/rejected": -27.145484924316406, "step": 4983 }, { "epoch": 8.0, "learning_rate": 6.173206500198176e-08, "logits/chosen": -1.54854154586792, "logits/rejected": -1.5049291849136353, "logps/chosen": -168.19802856445312, "logps/rejected": -331.0521240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.510188102722168, "rewards/margins": 17.429115295410156, "rewards/rejected": -23.93930435180664, "step": 4984 }, { "epoch": 8.0, "learning_rate": 6.163297661514071e-08, "logits/chosen": -1.5311787128448486, "logits/rejected": -1.5140671730041504, "logps/chosen": -180.87197875976562, "logps/rejected": -316.3639831542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.322641372680664, "rewards/margins": 13.630735397338867, "rewards/rejected": -22.95337677001953, "step": 4985 }, { "epoch": 8.0, "learning_rate": 6.153388822829964e-08, "logits/chosen": -1.4589574337005615, "logits/rejected": -1.4391427040100098, "logps/chosen": -173.80447387695312, "logps/rejected": -320.8588562011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.797813415527344, "rewards/margins": 14.803401947021484, "rewards/rejected": -23.601215362548828, "step": 4986 }, { "epoch": 8.0, "learning_rate": 6.143479984145858e-08, "logits/chosen": -1.5569584369659424, "logits/rejected": -1.4803327322006226, "logps/chosen": -166.62579345703125, "logps/rejected": -265.63787841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.664750099182129, "rewards/margins": 11.281028747558594, "rewards/rejected": -17.945777893066406, "step": 4987 }, { "epoch": 8.01, "learning_rate": 6.133571145461751e-08, "logits/chosen": -1.2887532711029053, "logits/rejected": -1.3380173444747925, "logps/chosen": -118.62287902832031, "logps/rejected": -267.23309326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.078327178955078, "rewards/margins": 13.959972381591797, "rewards/rejected": -19.038301467895508, "step": 4988 }, { "epoch": 8.01, "learning_rate": 6.123662306777646e-08, "logits/chosen": -1.4412992000579834, "logits/rejected": -1.4180266857147217, "logps/chosen": -168.19607543945312, "logps/rejected": -313.91058349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.206597328186035, "rewards/margins": 13.737187385559082, "rewards/rejected": -21.943784713745117, "step": 4989 }, { "epoch": 8.01, "learning_rate": 6.113753468093539e-08, "logits/chosen": -1.300450325012207, "logits/rejected": -1.3777892589569092, "logps/chosen": -175.92239379882812, "logps/rejected": -366.51141357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.123052597045898, "rewards/margins": 17.542163848876953, "rewards/rejected": -25.665218353271484, "step": 4990 }, { "epoch": 8.01, "learning_rate": 6.103844629409434e-08, "logits/chosen": -1.442967414855957, "logits/rejected": -1.3799229860305786, "logps/chosen": -81.39944458007812, "logps/rejected": -200.34912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6719273328781128, "rewards/margins": 13.236421585083008, "rewards/rejected": -14.90834903717041, "step": 4991 }, { "epoch": 8.01, "learning_rate": 6.093935790725327e-08, "logits/chosen": -1.4674681425094604, "logits/rejected": -1.4519776105880737, "logps/chosen": -215.80662536621094, "logps/rejected": -351.411376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.270995140075684, "rewards/margins": 17.03670883178711, "rewards/rejected": -26.307703018188477, "step": 4992 }, { "epoch": 8.01, "learning_rate": 6.08402695204122e-08, "logits/chosen": -1.44843590259552, "logits/rejected": -1.463646650314331, "logps/chosen": -128.83209228515625, "logps/rejected": -233.4813232421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.037652969360352, "rewards/margins": 10.914144515991211, "rewards/rejected": -15.951797485351562, "step": 4993 }, { "epoch": 8.02, "learning_rate": 6.074118113357115e-08, "logits/chosen": -1.4362345933914185, "logits/rejected": -1.461738109588623, "logps/chosen": -126.75273132324219, "logps/rejected": -288.6739807128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.79638671875, "rewards/margins": 15.205348014831543, "rewards/rejected": -21.001733779907227, "step": 4994 }, { "epoch": 8.02, "learning_rate": 6.064209274673008e-08, "logits/chosen": -1.455554485321045, "logits/rejected": -1.4738891124725342, "logps/chosen": -179.00338745117188, "logps/rejected": -311.30364990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.892496109008789, "rewards/margins": 13.97335433959961, "rewards/rejected": -22.865848541259766, "step": 4995 }, { "epoch": 8.02, "learning_rate": 6.054300435988902e-08, "logits/chosen": -1.6714742183685303, "logits/rejected": -1.6012918949127197, "logps/chosen": -157.69412231445312, "logps/rejected": -289.41217041015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.400862693786621, "rewards/margins": 13.939992904663086, "rewards/rejected": -20.34085464477539, "step": 4996 }, { "epoch": 8.02, "learning_rate": 6.044391597304795e-08, "logits/chosen": -1.47527015209198, "logits/rejected": -1.5775623321533203, "logps/chosen": -126.01803588867188, "logps/rejected": -257.599853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.485997200012207, "rewards/margins": 11.969842910766602, "rewards/rejected": -18.455841064453125, "step": 4997 }, { "epoch": 8.02, "learning_rate": 6.03448275862069e-08, "logits/chosen": -1.6175439357757568, "logits/rejected": -1.5628142356872559, "logps/chosen": -95.38114166259766, "logps/rejected": -195.00906372070312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.452406406402588, "rewards/margins": 11.002397537231445, "rewards/rejected": -14.454805374145508, "step": 4998 }, { "epoch": 8.02, "learning_rate": 6.024573919936583e-08, "logits/chosen": -1.3363347053527832, "logits/rejected": -1.3700473308563232, "logps/chosen": -167.11839294433594, "logps/rejected": -290.07281494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.656893730163574, "rewards/margins": 13.182232856750488, "rewards/rejected": -20.839126586914062, "step": 4999 }, { "epoch": 8.03, "learning_rate": 6.014665081252478e-08, "logits/chosen": -1.5741082429885864, "logits/rejected": -1.6684659719467163, "logps/chosen": -120.42105102539062, "logps/rejected": -261.6542663574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.292778491973877, "rewards/margins": 12.81362247467041, "rewards/rejected": -18.106399536132812, "step": 5000 }, { "epoch": 8.03, "learning_rate": 6.004756242568371e-08, "logits/chosen": -1.4564976692199707, "logits/rejected": -1.5475804805755615, "logps/chosen": -152.11944580078125, "logps/rejected": -331.1118469238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.008017063140869, "rewards/margins": 16.91470718383789, "rewards/rejected": -22.92272186279297, "step": 5001 }, { "epoch": 8.03, "learning_rate": 5.994847403884264e-08, "logits/chosen": -1.2886805534362793, "logits/rejected": -1.3354151248931885, "logps/chosen": -154.04531860351562, "logps/rejected": -312.11102294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.464323997497559, "rewards/margins": 14.941532135009766, "rewards/rejected": -22.405855178833008, "step": 5002 }, { "epoch": 8.03, "learning_rate": 5.984938565200158e-08, "logits/chosen": -1.4713118076324463, "logits/rejected": -1.5071650743484497, "logps/chosen": -166.52761840820312, "logps/rejected": -311.44915771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.020369529724121, "rewards/margins": 12.726841926574707, "rewards/rejected": -21.747211456298828, "step": 5003 }, { "epoch": 8.03, "learning_rate": 5.975029726516052e-08, "logits/chosen": -1.6593457460403442, "logits/rejected": -1.597397804260254, "logps/chosen": -145.5205078125, "logps/rejected": -290.1800231933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3631391525268555, "rewards/margins": 15.62936782836914, "rewards/rejected": -21.992507934570312, "step": 5004 }, { "epoch": 8.03, "learning_rate": 5.965120887831946e-08, "logits/chosen": -1.5022236108779907, "logits/rejected": -1.4690546989440918, "logps/chosen": -177.4291534423828, "logps/rejected": -290.5950012207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.010746955871582, "rewards/margins": 12.048233032226562, "rewards/rejected": -21.058979034423828, "step": 5005 }, { "epoch": 8.04, "learning_rate": 5.9552120491478395e-08, "logits/chosen": -1.5052225589752197, "logits/rejected": -1.553720474243164, "logps/chosen": -199.36993408203125, "logps/rejected": -359.786376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.366771697998047, "rewards/margins": 15.638596534729004, "rewards/rejected": -26.005369186401367, "step": 5006 }, { "epoch": 8.04, "learning_rate": 5.9453032104637335e-08, "logits/chosen": -1.437647819519043, "logits/rejected": -1.3140289783477783, "logps/chosen": -176.91995239257812, "logps/rejected": -270.0015563964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.503847122192383, "rewards/margins": 12.328627586364746, "rewards/rejected": -20.832475662231445, "step": 5007 }, { "epoch": 8.04, "learning_rate": 5.935394371779627e-08, "logits/chosen": -1.516730546951294, "logits/rejected": -1.546327829360962, "logps/chosen": -134.669677734375, "logps/rejected": -344.0626220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.657210350036621, "rewards/margins": 18.82769775390625, "rewards/rejected": -25.484909057617188, "step": 5008 }, { "epoch": 8.04, "learning_rate": 5.925485533095521e-08, "logits/chosen": -1.4851889610290527, "logits/rejected": -1.5952776670455933, "logps/chosen": -165.61199951171875, "logps/rejected": -306.33160400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.671361923217773, "rewards/margins": 12.877870559692383, "rewards/rejected": -21.549232482910156, "step": 5009 }, { "epoch": 8.04, "learning_rate": 5.915576694411414e-08, "logits/chosen": -1.5311331748962402, "logits/rejected": -1.4370718002319336, "logps/chosen": -190.90576171875, "logps/rejected": -280.8857116699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.194364547729492, "rewards/margins": 10.498478889465332, "rewards/rejected": -19.69284439086914, "step": 5010 }, { "epoch": 8.04, "learning_rate": 5.905667855727309e-08, "logits/chosen": -1.484938144683838, "logits/rejected": -1.5076799392700195, "logps/chosen": -137.46905517578125, "logps/rejected": -316.4364318847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.962255001068115, "rewards/margins": 16.017789840698242, "rewards/rejected": -21.980045318603516, "step": 5011 }, { "epoch": 8.04, "learning_rate": 5.895759017043202e-08, "logits/chosen": -1.5497853755950928, "logits/rejected": -1.5418919324874878, "logps/chosen": -197.76304626464844, "logps/rejected": -306.16680908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.680582046508789, "rewards/margins": 11.870372772216797, "rewards/rejected": -22.55095672607422, "step": 5012 }, { "epoch": 8.05, "learning_rate": 5.885850178359096e-08, "logits/chosen": -1.3082879781723022, "logits/rejected": -1.306286096572876, "logps/chosen": -143.9158935546875, "logps/rejected": -283.1452941894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0566606521606445, "rewards/margins": 14.042704582214355, "rewards/rejected": -21.099363327026367, "step": 5013 }, { "epoch": 8.05, "learning_rate": 5.8759413396749895e-08, "logits/chosen": -1.2717808485031128, "logits/rejected": -1.2968636751174927, "logps/chosen": -157.106201171875, "logps/rejected": -279.19952392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7638397216796875, "rewards/margins": 12.463899612426758, "rewards/rejected": -19.227739334106445, "step": 5014 }, { "epoch": 8.05, "learning_rate": 5.8660325009908834e-08, "logits/chosen": -1.6310466527938843, "logits/rejected": -1.6260137557983398, "logps/chosen": -140.15975952148438, "logps/rejected": -283.4669494628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.531830787658691, "rewards/margins": 13.725627899169922, "rewards/rejected": -20.257457733154297, "step": 5015 }, { "epoch": 8.05, "learning_rate": 5.856123662306777e-08, "logits/chosen": -1.6143629550933838, "logits/rejected": -1.6170204877853394, "logps/chosen": -187.99343872070312, "logps/rejected": -307.4961853027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.199689865112305, "rewards/margins": 13.070430755615234, "rewards/rejected": -22.27012062072754, "step": 5016 }, { "epoch": 8.05, "learning_rate": 5.8462148236226714e-08, "logits/chosen": -1.482712984085083, "logits/rejected": -1.4227051734924316, "logps/chosen": -167.87205505371094, "logps/rejected": -277.3843078613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.175382614135742, "rewards/margins": 12.382181167602539, "rewards/rejected": -19.55756378173828, "step": 5017 }, { "epoch": 8.05, "learning_rate": 5.8363059849385654e-08, "logits/chosen": -1.4423280954360962, "logits/rejected": -1.4525494575500488, "logps/chosen": -170.44740295410156, "logps/rejected": -271.85394287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.565171241760254, "rewards/margins": 9.839584350585938, "rewards/rejected": -19.404756546020508, "step": 5018 }, { "epoch": 8.06, "learning_rate": 5.826397146254459e-08, "logits/chosen": -1.5584708452224731, "logits/rejected": -1.5922813415527344, "logps/chosen": -90.78547668457031, "logps/rejected": -237.15399169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5323948860168457, "rewards/margins": 13.885372161865234, "rewards/rejected": -16.417766571044922, "step": 5019 }, { "epoch": 8.06, "learning_rate": 5.816488307570353e-08, "logits/chosen": -1.301735281944275, "logits/rejected": -1.3792381286621094, "logps/chosen": -168.683837890625, "logps/rejected": -282.667724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.480048179626465, "rewards/margins": 10.640990257263184, "rewards/rejected": -19.12103843688965, "step": 5020 }, { "epoch": 8.06, "learning_rate": 5.806579468886246e-08, "logits/chosen": -1.7400531768798828, "logits/rejected": -1.7655744552612305, "logps/chosen": -113.97752380371094, "logps/rejected": -265.209228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.333478927612305, "rewards/margins": 15.259838104248047, "rewards/rejected": -20.59331703186035, "step": 5021 }, { "epoch": 8.06, "learning_rate": 5.79667063020214e-08, "logits/chosen": -1.4923796653747559, "logits/rejected": -1.546484112739563, "logps/chosen": -126.71984100341797, "logps/rejected": -295.7197570800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.491053581237793, "rewards/margins": 15.13868236541748, "rewards/rejected": -21.629737854003906, "step": 5022 }, { "epoch": 8.06, "learning_rate": 5.7867617915180334e-08, "logits/chosen": -1.362676739692688, "logits/rejected": -1.3625218868255615, "logps/chosen": -175.61293029785156, "logps/rejected": -303.1582336425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.784147262573242, "rewards/margins": 13.387413024902344, "rewards/rejected": -23.17156219482422, "step": 5023 }, { "epoch": 8.06, "learning_rate": 5.776852952833928e-08, "logits/chosen": -1.4822663068771362, "logits/rejected": -1.4683314561843872, "logps/chosen": -170.95077514648438, "logps/rejected": -288.95501708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.182988166809082, "rewards/margins": 12.430891990661621, "rewards/rejected": -20.613880157470703, "step": 5024 }, { "epoch": 8.07, "learning_rate": 5.7669441141498214e-08, "logits/chosen": -1.4152772426605225, "logits/rejected": -1.4138387441635132, "logps/chosen": -142.9827423095703, "logps/rejected": -280.953369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.155002593994141, "rewards/margins": 13.590129852294922, "rewards/rejected": -20.745132446289062, "step": 5025 }, { "epoch": 8.07, "learning_rate": 5.7570352754657154e-08, "logits/chosen": -1.456222414970398, "logits/rejected": -1.4256058931350708, "logps/chosen": -156.11849975585938, "logps/rejected": -307.74884033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.136623859405518, "rewards/margins": 16.76593780517578, "rewards/rejected": -22.90256118774414, "step": 5026 }, { "epoch": 8.07, "learning_rate": 5.747126436781609e-08, "logits/chosen": -1.422315001487732, "logits/rejected": -1.4800069332122803, "logps/chosen": -187.79588317871094, "logps/rejected": -334.79937744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.436535835266113, "rewards/margins": 15.997604370117188, "rewards/rejected": -24.434139251708984, "step": 5027 }, { "epoch": 8.07, "learning_rate": 5.737217598097503e-08, "logits/chosen": -1.4873183965682983, "logits/rejected": -1.365874171257019, "logps/chosen": -172.91888427734375, "logps/rejected": -314.40185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.924319267272949, "rewards/margins": 14.928220748901367, "rewards/rejected": -22.852540969848633, "step": 5028 }, { "epoch": 8.07, "learning_rate": 5.727308759413396e-08, "logits/chosen": -1.3354215621948242, "logits/rejected": -1.4064228534698486, "logps/chosen": -162.83189392089844, "logps/rejected": -303.8578796386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.677907943725586, "rewards/margins": 12.015957832336426, "rewards/rejected": -19.693864822387695, "step": 5029 }, { "epoch": 8.07, "learning_rate": 5.717399920729291e-08, "logits/chosen": -1.539892554283142, "logits/rejected": -1.6223673820495605, "logps/chosen": -145.84188842773438, "logps/rejected": -337.563720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.503178119659424, "rewards/margins": 17.84561538696289, "rewards/rejected": -24.348793029785156, "step": 5030 }, { "epoch": 8.08, "learning_rate": 5.707491082045184e-08, "logits/chosen": -1.511381983757019, "logits/rejected": -1.4424058198928833, "logps/chosen": -150.05303955078125, "logps/rejected": -291.4830017089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.549310207366943, "rewards/margins": 14.710238456726074, "rewards/rejected": -21.25954818725586, "step": 5031 }, { "epoch": 8.08, "learning_rate": 5.697582243361078e-08, "logits/chosen": -1.4249032735824585, "logits/rejected": -1.512303113937378, "logps/chosen": -127.4808120727539, "logps/rejected": -265.1947937011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.535235404968262, "rewards/margins": 12.452552795410156, "rewards/rejected": -17.987789154052734, "step": 5032 }, { "epoch": 8.08, "learning_rate": 5.687673404676971e-08, "logits/chosen": -1.2953213453292847, "logits/rejected": -1.3237937688827515, "logps/chosen": -162.0536346435547, "logps/rejected": -318.8172607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.727736473083496, "rewards/margins": 15.681451797485352, "rewards/rejected": -23.40918731689453, "step": 5033 }, { "epoch": 8.08, "learning_rate": 5.677764565992865e-08, "logits/chosen": -1.5516060590744019, "logits/rejected": -1.520662784576416, "logps/chosen": -198.44358825683594, "logps/rejected": -348.1696472167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.100923538208008, "rewards/margins": 15.151809692382812, "rewards/rejected": -25.25273323059082, "step": 5034 }, { "epoch": 8.08, "learning_rate": 5.6678557273087586e-08, "logits/chosen": -1.579177975654602, "logits/rejected": -1.559841513633728, "logps/chosen": -140.9101104736328, "logps/rejected": -286.6640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.182375431060791, "rewards/margins": 14.258125305175781, "rewards/rejected": -20.440502166748047, "step": 5035 }, { "epoch": 8.08, "learning_rate": 5.657946888624653e-08, "logits/chosen": -1.5426607131958008, "logits/rejected": -1.6032276153564453, "logps/chosen": -167.694091796875, "logps/rejected": -284.43414306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.116045951843262, "rewards/margins": 10.494705200195312, "rewards/rejected": -18.61075210571289, "step": 5036 }, { "epoch": 8.09, "learning_rate": 5.6480380499405466e-08, "logits/chosen": -1.36954927444458, "logits/rejected": -1.3541929721832275, "logps/chosen": -145.0393524169922, "logps/rejected": -315.21649169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.954878330230713, "rewards/margins": 14.591072082519531, "rewards/rejected": -21.54595184326172, "step": 5037 }, { "epoch": 8.09, "learning_rate": 5.6381292112564406e-08, "logits/chosen": -1.643929123878479, "logits/rejected": -1.6987746953964233, "logps/chosen": -111.92271423339844, "logps/rejected": -275.7811584472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.963015079498291, "rewards/margins": 15.289705276489258, "rewards/rejected": -19.25271987915039, "step": 5038 }, { "epoch": 8.09, "learning_rate": 5.6282203725723346e-08, "logits/chosen": -1.5256891250610352, "logits/rejected": -1.4382858276367188, "logps/chosen": -126.73175048828125, "logps/rejected": -257.52459716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.362931251525879, "rewards/margins": 14.006728172302246, "rewards/rejected": -18.369659423828125, "step": 5039 }, { "epoch": 8.09, "learning_rate": 5.618311533888228e-08, "logits/chosen": -1.4876856803894043, "logits/rejected": -1.5014699697494507, "logps/chosen": -165.00323486328125, "logps/rejected": -282.1630554199219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.614208221435547, "rewards/margins": 11.139680862426758, "rewards/rejected": -20.753889083862305, "step": 5040 }, { "epoch": 8.09, "learning_rate": 5.608402695204122e-08, "logits/chosen": -1.561333417892456, "logits/rejected": -1.5315662622451782, "logps/chosen": -158.8871307373047, "logps/rejected": -331.2682800292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.816561222076416, "rewards/margins": 17.442014694213867, "rewards/rejected": -23.258575439453125, "step": 5041 }, { "epoch": 8.09, "learning_rate": 5.598493856520015e-08, "logits/chosen": -1.5518509149551392, "logits/rejected": -1.420501470565796, "logps/chosen": -204.3192596435547, "logps/rejected": -263.35943603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.69873332977295, "rewards/margins": 10.078238487243652, "rewards/rejected": -18.776973724365234, "step": 5042 }, { "epoch": 8.09, "learning_rate": 5.58858501783591e-08, "logits/chosen": -1.4123389720916748, "logits/rejected": -1.4243537187576294, "logps/chosen": -164.47825622558594, "logps/rejected": -311.9554443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.81943416595459, "rewards/margins": 14.671142578125, "rewards/rejected": -22.490577697753906, "step": 5043 }, { "epoch": 8.1, "learning_rate": 5.578676179151803e-08, "logits/chosen": -1.5765647888183594, "logits/rejected": -1.6028544902801514, "logps/chosen": -166.08102416992188, "logps/rejected": -295.8233947753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.654403686523438, "rewards/margins": 13.038127899169922, "rewards/rejected": -21.692533493041992, "step": 5044 }, { "epoch": 8.1, "learning_rate": 5.568767340467697e-08, "logits/chosen": -1.489105463027954, "logits/rejected": -1.5064350366592407, "logps/chosen": -160.70042419433594, "logps/rejected": -285.38397216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.599658966064453, "rewards/margins": 13.865150451660156, "rewards/rejected": -22.46480941772461, "step": 5045 }, { "epoch": 8.1, "learning_rate": 5.5588585017835906e-08, "logits/chosen": -1.5630371570587158, "logits/rejected": -1.4994441270828247, "logps/chosen": -193.54808044433594, "logps/rejected": -293.2505798339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.045967102050781, "rewards/margins": 11.9987154006958, "rewards/rejected": -22.0446834564209, "step": 5046 }, { "epoch": 8.1, "learning_rate": 5.5489496630994846e-08, "logits/chosen": -1.538164734840393, "logits/rejected": -1.4827446937561035, "logps/chosen": -192.9226837158203, "logps/rejected": -320.33245849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.058818817138672, "rewards/margins": 13.763336181640625, "rewards/rejected": -22.822154998779297, "step": 5047 }, { "epoch": 8.1, "learning_rate": 5.539040824415378e-08, "logits/chosen": -1.3537883758544922, "logits/rejected": -1.3938060998916626, "logps/chosen": -164.91629028320312, "logps/rejected": -321.3673095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.238584518432617, "rewards/margins": 13.330362319946289, "rewards/rejected": -22.568946838378906, "step": 5048 }, { "epoch": 8.1, "learning_rate": 5.5291319857312725e-08, "logits/chosen": -1.5404731035232544, "logits/rejected": -1.497546672821045, "logps/chosen": -148.090576171875, "logps/rejected": -273.2918395996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.117520809173584, "rewards/margins": 12.487574577331543, "rewards/rejected": -19.60509490966797, "step": 5049 }, { "epoch": 8.11, "learning_rate": 5.519223147047166e-08, "logits/chosen": -1.5431071519851685, "logits/rejected": -1.5490161180496216, "logps/chosen": -144.009765625, "logps/rejected": -302.0162658691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.666836261749268, "rewards/margins": 15.751458168029785, "rewards/rejected": -22.41829490661621, "step": 5050 }, { "epoch": 8.11, "learning_rate": 5.50931430836306e-08, "logits/chosen": -1.3245344161987305, "logits/rejected": -1.433255672454834, "logps/chosen": -151.63150024414062, "logps/rejected": -315.32684326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.11798095703125, "rewards/margins": 16.10100746154785, "rewards/rejected": -24.2189884185791, "step": 5051 }, { "epoch": 8.11, "learning_rate": 5.499405469678953e-08, "logits/chosen": -1.3919018507003784, "logits/rejected": -1.3819198608398438, "logps/chosen": -169.50888061523438, "logps/rejected": -335.4990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.446106910705566, "rewards/margins": 14.945223808288574, "rewards/rejected": -24.39133071899414, "step": 5052 }, { "epoch": 8.11, "learning_rate": 5.489496630994847e-08, "logits/chosen": -1.4256622791290283, "logits/rejected": -1.3341045379638672, "logps/chosen": -148.5101318359375, "logps/rejected": -232.42657470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.070347785949707, "rewards/margins": 12.743900299072266, "rewards/rejected": -17.814247131347656, "step": 5053 }, { "epoch": 8.11, "learning_rate": 5.4795877923107405e-08, "logits/chosen": -1.5300273895263672, "logits/rejected": -1.550735592842102, "logps/chosen": -147.5670166015625, "logps/rejected": -301.1936950683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.415641784667969, "rewards/margins": 13.905941009521484, "rewards/rejected": -21.321582794189453, "step": 5054 }, { "epoch": 8.11, "learning_rate": 5.4696789536266345e-08, "logits/chosen": -1.5175191164016724, "logits/rejected": -1.4347615242004395, "logps/chosen": -187.27963256835938, "logps/rejected": -277.860107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.056796073913574, "rewards/margins": 10.569830894470215, "rewards/rejected": -19.626625061035156, "step": 5055 }, { "epoch": 8.12, "learning_rate": 5.4597701149425285e-08, "logits/chosen": -1.4434763193130493, "logits/rejected": -1.4689055681228638, "logps/chosen": -115.36345672607422, "logps/rejected": -257.468017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.2415337562561035, "rewards/margins": 14.268461227416992, "rewards/rejected": -18.509994506835938, "step": 5056 }, { "epoch": 8.12, "learning_rate": 5.4498612762584225e-08, "logits/chosen": -1.348152756690979, "logits/rejected": -1.3407429456710815, "logps/chosen": -141.50672912597656, "logps/rejected": -371.22857666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.665949821472168, "rewards/margins": 19.32552719116211, "rewards/rejected": -26.991477966308594, "step": 5057 }, { "epoch": 8.12, "learning_rate": 5.439952437574316e-08, "logits/chosen": -1.351288080215454, "logits/rejected": -1.3172062635421753, "logps/chosen": -124.28700256347656, "logps/rejected": -238.76849365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.020299434661865, "rewards/margins": 12.471121788024902, "rewards/rejected": -17.49142074584961, "step": 5058 }, { "epoch": 8.12, "learning_rate": 5.43004359889021e-08, "logits/chosen": -1.736776351928711, "logits/rejected": -1.6537883281707764, "logps/chosen": -157.4159698486328, "logps/rejected": -281.6681823730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.627684116363525, "rewards/margins": 12.948099136352539, "rewards/rejected": -20.575782775878906, "step": 5059 }, { "epoch": 8.12, "learning_rate": 5.420134760206103e-08, "logits/chosen": -1.2693170309066772, "logits/rejected": -1.4157339334487915, "logps/chosen": -172.37344360351562, "logps/rejected": -341.18060302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.837562561035156, "rewards/margins": 15.221444129943848, "rewards/rejected": -23.059005737304688, "step": 5060 }, { "epoch": 8.12, "learning_rate": 5.410225921521997e-08, "logits/chosen": -1.5979796648025513, "logits/rejected": -1.5742353200912476, "logps/chosen": -116.18873596191406, "logps/rejected": -240.2010498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.072524547576904, "rewards/margins": 12.628009796142578, "rewards/rejected": -16.70053482055664, "step": 5061 }, { "epoch": 8.13, "learning_rate": 5.400317082837892e-08, "logits/chosen": -1.5865850448608398, "logits/rejected": -1.5695440769195557, "logps/chosen": -146.6780242919922, "logps/rejected": -264.7309875488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6397576332092285, "rewards/margins": 11.654539108276367, "rewards/rejected": -18.29429817199707, "step": 5062 }, { "epoch": 8.13, "learning_rate": 5.390408244153785e-08, "logits/chosen": -1.4456971883773804, "logits/rejected": -1.555199146270752, "logps/chosen": -171.9786834716797, "logps/rejected": -320.063232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.749044418334961, "rewards/margins": 14.354634284973145, "rewards/rejected": -23.103679656982422, "step": 5063 }, { "epoch": 8.13, "learning_rate": 5.380499405469679e-08, "logits/chosen": -1.4507858753204346, "logits/rejected": -1.482527732849121, "logps/chosen": -134.48248291015625, "logps/rejected": -284.8904724121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.931842803955078, "rewards/margins": 14.497659683227539, "rewards/rejected": -21.429502487182617, "step": 5064 }, { "epoch": 8.13, "learning_rate": 5.3705905667855724e-08, "logits/chosen": -1.496683120727539, "logits/rejected": -1.524594783782959, "logps/chosen": -163.3860626220703, "logps/rejected": -339.73797607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.885995864868164, "rewards/margins": 16.588932037353516, "rewards/rejected": -25.474929809570312, "step": 5065 }, { "epoch": 8.13, "learning_rate": 5.3606817281014664e-08, "logits/chosen": -1.6507940292358398, "logits/rejected": -1.6027113199234009, "logps/chosen": -153.49307250976562, "logps/rejected": -323.869873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.315009593963623, "rewards/margins": 16.979751586914062, "rewards/rejected": -24.294761657714844, "step": 5066 }, { "epoch": 8.13, "learning_rate": 5.35077288941736e-08, "logits/chosen": -1.7583986520767212, "logits/rejected": -1.657734751701355, "logps/chosen": -129.37164306640625, "logps/rejected": -250.50729370117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.866494655609131, "rewards/margins": 14.284355163574219, "rewards/rejected": -19.150850296020508, "step": 5067 }, { "epoch": 8.13, "learning_rate": 5.340864050733254e-08, "logits/chosen": -1.5237237215042114, "logits/rejected": -1.6158084869384766, "logps/chosen": -153.3120880126953, "logps/rejected": -309.6342468261719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.401657104492188, "rewards/margins": 14.24422550201416, "rewards/rejected": -22.645883560180664, "step": 5068 }, { "epoch": 8.14, "learning_rate": 5.330955212049148e-08, "logits/chosen": -1.333419680595398, "logits/rejected": -1.3436493873596191, "logps/chosen": -148.09750366210938, "logps/rejected": -308.4159240722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.034862995147705, "rewards/margins": 15.137421607971191, "rewards/rejected": -21.172285079956055, "step": 5069 }, { "epoch": 8.14, "learning_rate": 5.321046373365042e-08, "logits/chosen": -1.4813337326049805, "logits/rejected": -1.4650534391403198, "logps/chosen": -134.33457946777344, "logps/rejected": -289.8671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.656108856201172, "rewards/margins": 15.921088218688965, "rewards/rejected": -22.577198028564453, "step": 5070 }, { "epoch": 8.14, "learning_rate": 5.311137534680935e-08, "logits/chosen": -1.481584072113037, "logits/rejected": -1.4548063278198242, "logps/chosen": -242.00047302246094, "logps/rejected": -347.12603759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.28624439239502, "rewards/margins": 12.193798065185547, "rewards/rejected": -25.48004150390625, "step": 5071 }, { "epoch": 8.14, "learning_rate": 5.301228695996829e-08, "logits/chosen": -1.2863926887512207, "logits/rejected": -1.3679172992706299, "logps/chosen": -152.23240661621094, "logps/rejected": -306.7030029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8059773445129395, "rewards/margins": 14.300333023071289, "rewards/rejected": -22.10630989074707, "step": 5072 }, { "epoch": 8.14, "learning_rate": 5.2913198573127224e-08, "logits/chosen": -1.4340667724609375, "logits/rejected": -1.390170693397522, "logps/chosen": -217.89894104003906, "logps/rejected": -337.4667663574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.58055305480957, "rewards/margins": 12.346675872802734, "rewards/rejected": -23.927228927612305, "step": 5073 }, { "epoch": 8.14, "learning_rate": 5.2814110186286164e-08, "logits/chosen": -1.483917236328125, "logits/rejected": -1.4543235301971436, "logps/chosen": -116.32361602783203, "logps/rejected": -300.0391845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.299561023712158, "rewards/margins": 17.454004287719727, "rewards/rejected": -20.753562927246094, "step": 5074 }, { "epoch": 8.15, "learning_rate": 5.2715021799445104e-08, "logits/chosen": -1.4699612855911255, "logits/rejected": -1.4675521850585938, "logps/chosen": -113.22071075439453, "logps/rejected": -260.8274841308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.277024745941162, "rewards/margins": 14.758099555969238, "rewards/rejected": -19.035123825073242, "step": 5075 }, { "epoch": 8.15, "learning_rate": 5.2615933412604044e-08, "logits/chosen": -1.428161859512329, "logits/rejected": -1.5020971298217773, "logps/chosen": -168.10629272460938, "logps/rejected": -312.34979248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.963953018188477, "rewards/margins": 12.337993621826172, "rewards/rejected": -21.30194854736328, "step": 5076 }, { "epoch": 8.15, "learning_rate": 5.251684502576298e-08, "logits/chosen": -1.3886592388153076, "logits/rejected": -1.4737379550933838, "logps/chosen": -135.15199279785156, "logps/rejected": -320.681884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.636810302734375, "rewards/margins": 15.491537094116211, "rewards/rejected": -21.12834930419922, "step": 5077 }, { "epoch": 8.15, "learning_rate": 5.241775663892192e-08, "logits/chosen": -1.4938642978668213, "logits/rejected": -1.555249810218811, "logps/chosen": -126.94692993164062, "logps/rejected": -302.3984069824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.227471351623535, "rewards/margins": 12.944963455200195, "rewards/rejected": -18.172435760498047, "step": 5078 }, { "epoch": 8.15, "learning_rate": 5.231866825208085e-08, "logits/chosen": -1.601623773574829, "logits/rejected": -1.454893946647644, "logps/chosen": -205.5631103515625, "logps/rejected": -310.2101135253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.927489280700684, "rewards/margins": 13.477229118347168, "rewards/rejected": -21.40471839904785, "step": 5079 }, { "epoch": 8.15, "learning_rate": 5.221957986523979e-08, "logits/chosen": -1.3790454864501953, "logits/rejected": -1.362449049949646, "logps/chosen": -141.59085083007812, "logps/rejected": -305.05804443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.994536876678467, "rewards/margins": 15.143936157226562, "rewards/rejected": -21.13847541809082, "step": 5080 }, { "epoch": 8.16, "learning_rate": 5.212049147839872e-08, "logits/chosen": -1.6148502826690674, "logits/rejected": -1.613560438156128, "logps/chosen": -121.201416015625, "logps/rejected": -306.168701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.573497772216797, "rewards/margins": 17.48575210571289, "rewards/rejected": -22.059249877929688, "step": 5081 }, { "epoch": 8.16, "learning_rate": 5.202140309155767e-08, "logits/chosen": -1.4012417793273926, "logits/rejected": -1.4275753498077393, "logps/chosen": -123.32818603515625, "logps/rejected": -295.35870361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.903429985046387, "rewards/margins": 17.19191551208496, "rewards/rejected": -22.095346450805664, "step": 5082 }, { "epoch": 8.16, "learning_rate": 5.19223147047166e-08, "logits/chosen": -1.4553865194320679, "logits/rejected": -1.477596640586853, "logps/chosen": -161.32533264160156, "logps/rejected": -308.0281982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.62190055847168, "rewards/margins": 14.484363555908203, "rewards/rejected": -22.106266021728516, "step": 5083 }, { "epoch": 8.16, "learning_rate": 5.182322631787554e-08, "logits/chosen": -1.4680508375167847, "logits/rejected": -1.537972092628479, "logps/chosen": -128.23106384277344, "logps/rejected": -325.38482666015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.54862117767334, "rewards/margins": 17.914318084716797, "rewards/rejected": -23.462940216064453, "step": 5084 }, { "epoch": 8.16, "learning_rate": 5.172413793103448e-08, "logits/chosen": -1.4114954471588135, "logits/rejected": -1.4394900798797607, "logps/chosen": -193.1170654296875, "logps/rejected": -348.067138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.064460754394531, "rewards/margins": 14.09317398071289, "rewards/rejected": -24.157634735107422, "step": 5085 }, { "epoch": 8.16, "learning_rate": 5.1625049544193416e-08, "logits/chosen": -1.3996275663375854, "logits/rejected": -1.4009391069412231, "logps/chosen": -193.96548461914062, "logps/rejected": -316.7656555175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.882570266723633, "rewards/margins": 11.83572769165039, "rewards/rejected": -21.718297958374023, "step": 5086 }, { "epoch": 8.17, "learning_rate": 5.1525961157352356e-08, "logits/chosen": -1.3705863952636719, "logits/rejected": -1.3913012742996216, "logps/chosen": -167.556640625, "logps/rejected": -310.730712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.111763000488281, "rewards/margins": 14.128606796264648, "rewards/rejected": -22.24036979675293, "step": 5087 }, { "epoch": 8.17, "learning_rate": 5.1426872770511296e-08, "logits/chosen": -1.7474887371063232, "logits/rejected": -1.748692512512207, "logps/chosen": -79.67659759521484, "logps/rejected": -243.1836395263672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.365725040435791, "rewards/margins": 15.220466613769531, "rewards/rejected": -17.586193084716797, "step": 5088 }, { "epoch": 8.17, "learning_rate": 5.1327784383670236e-08, "logits/chosen": -1.5200344324111938, "logits/rejected": -1.5627028942108154, "logps/chosen": -89.03565979003906, "logps/rejected": -236.24752807617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.160780191421509, "rewards/margins": 13.283350944519043, "rewards/rejected": -16.444129943847656, "step": 5089 }, { "epoch": 8.17, "learning_rate": 5.122869599682917e-08, "logits/chosen": -1.508188247680664, "logits/rejected": -1.395262598991394, "logps/chosen": -161.50033569335938, "logps/rejected": -251.58206176757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.188433647155762, "rewards/margins": 10.691720962524414, "rewards/rejected": -18.88015365600586, "step": 5090 }, { "epoch": 8.17, "learning_rate": 5.112960760998811e-08, "logits/chosen": -1.4029464721679688, "logits/rejected": -1.4177160263061523, "logps/chosen": -166.69992065429688, "logps/rejected": -322.7857360839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.454630851745605, "rewards/margins": 15.319622993469238, "rewards/rejected": -23.774253845214844, "step": 5091 }, { "epoch": 8.17, "learning_rate": 5.103051922314704e-08, "logits/chosen": -1.6754188537597656, "logits/rejected": -1.5991618633270264, "logps/chosen": -188.69818115234375, "logps/rejected": -347.3271789550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.433531761169434, "rewards/margins": 16.410064697265625, "rewards/rejected": -23.843597412109375, "step": 5092 }, { "epoch": 8.17, "learning_rate": 5.093143083630598e-08, "logits/chosen": -1.4758355617523193, "logits/rejected": -1.4905071258544922, "logps/chosen": -139.42510986328125, "logps/rejected": -295.32208251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.321619510650635, "rewards/margins": 15.53695297241211, "rewards/rejected": -20.85857391357422, "step": 5093 }, { "epoch": 8.18, "learning_rate": 5.0832342449464916e-08, "logits/chosen": -1.3711936473846436, "logits/rejected": -1.3864518404006958, "logps/chosen": -181.0755615234375, "logps/rejected": -293.3828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.107027053833008, "rewards/margins": 10.652737617492676, "rewards/rejected": -19.759763717651367, "step": 5094 }, { "epoch": 8.18, "learning_rate": 5.073325406262386e-08, "logits/chosen": -1.4505525827407837, "logits/rejected": -1.4753735065460205, "logps/chosen": -125.75383758544922, "logps/rejected": -298.8839111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.598180770874023, "rewards/margins": 15.174306869506836, "rewards/rejected": -20.77248764038086, "step": 5095 }, { "epoch": 8.18, "learning_rate": 5.0634165675782796e-08, "logits/chosen": -1.4835758209228516, "logits/rejected": -1.4890576601028442, "logps/chosen": -171.454833984375, "logps/rejected": -298.5585021972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.203041076660156, "rewards/margins": 13.888840675354004, "rewards/rejected": -22.091880798339844, "step": 5096 }, { "epoch": 8.18, "learning_rate": 5.0535077288941735e-08, "logits/chosen": -1.634300947189331, "logits/rejected": -1.7208561897277832, "logps/chosen": -110.46183776855469, "logps/rejected": -292.8895263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.009483814239502, "rewards/margins": 17.37281608581543, "rewards/rejected": -21.382301330566406, "step": 5097 }, { "epoch": 8.18, "learning_rate": 5.043598890210067e-08, "logits/chosen": -1.5398123264312744, "logits/rejected": -1.4083995819091797, "logps/chosen": -188.59837341308594, "logps/rejected": -295.8351745605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.349726676940918, "rewards/margins": 13.911592483520508, "rewards/rejected": -21.261316299438477, "step": 5098 }, { "epoch": 8.18, "learning_rate": 5.033690051525961e-08, "logits/chosen": -1.4137226343154907, "logits/rejected": -1.4587455987930298, "logps/chosen": -148.58836364746094, "logps/rejected": -271.8800048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.362157821655273, "rewards/margins": 12.289802551269531, "rewards/rejected": -18.651960372924805, "step": 5099 }, { "epoch": 8.19, "learning_rate": 5.023781212841854e-08, "logits/chosen": -1.5358881950378418, "logits/rejected": -1.4015371799468994, "logps/chosen": -157.66680908203125, "logps/rejected": -260.55096435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.502677917480469, "rewards/margins": 12.420709609985352, "rewards/rejected": -19.923385620117188, "step": 5100 }, { "epoch": 8.19, "learning_rate": 5.013872374157749e-08, "logits/chosen": -1.4223418235778809, "logits/rejected": -1.4735400676727295, "logps/chosen": -194.16067504882812, "logps/rejected": -359.5694885253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.50988483428955, "rewards/margins": 15.929627418518066, "rewards/rejected": -26.439512252807617, "step": 5101 }, { "epoch": 8.19, "learning_rate": 5.003963535473642e-08, "logits/chosen": -1.4215372800827026, "logits/rejected": -1.3772810697555542, "logps/chosen": -146.60418701171875, "logps/rejected": -287.7825622558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.455900192260742, "rewards/margins": 14.481273651123047, "rewards/rejected": -19.937171936035156, "step": 5102 }, { "epoch": 8.19, "learning_rate": 4.994054696789536e-08, "logits/chosen": -1.414801836013794, "logits/rejected": -1.3963478803634644, "logps/chosen": -200.4111328125, "logps/rejected": -349.3544616699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.312349319458008, "rewards/margins": 14.568628311157227, "rewards/rejected": -24.880977630615234, "step": 5103 }, { "epoch": 8.19, "learning_rate": 4.9841458581054295e-08, "logits/chosen": -1.4842941761016846, "logits/rejected": -1.4571878910064697, "logps/chosen": -145.43260192871094, "logps/rejected": -281.86614990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.811075210571289, "rewards/margins": 15.679715156555176, "rewards/rejected": -20.49078941345215, "step": 5104 }, { "epoch": 8.19, "learning_rate": 4.9742370194213235e-08, "logits/chosen": -1.4301748275756836, "logits/rejected": -1.460841178894043, "logps/chosen": -140.57542419433594, "logps/rejected": -258.31640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.294828414916992, "rewards/margins": 12.014095306396484, "rewards/rejected": -18.30892562866211, "step": 5105 }, { "epoch": 8.2, "learning_rate": 4.964328180737217e-08, "logits/chosen": -1.2384023666381836, "logits/rejected": -1.3091481924057007, "logps/chosen": -129.36288452148438, "logps/rejected": -249.0443115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.156897068023682, "rewards/margins": 10.944259643554688, "rewards/rejected": -18.101158142089844, "step": 5106 }, { "epoch": 8.2, "learning_rate": 4.9544193420531115e-08, "logits/chosen": -1.4444642066955566, "logits/rejected": -1.402392864227295, "logps/chosen": -119.92792510986328, "logps/rejected": -264.2916564941406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.0727434158325195, "rewards/margins": 14.476799011230469, "rewards/rejected": -20.549541473388672, "step": 5107 }, { "epoch": 8.2, "learning_rate": 4.9445105033690055e-08, "logits/chosen": -1.4735033512115479, "logits/rejected": -1.4431796073913574, "logps/chosen": -188.45013427734375, "logps/rejected": -314.29364013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.397211074829102, "rewards/margins": 13.13325023651123, "rewards/rejected": -20.530460357666016, "step": 5108 }, { "epoch": 8.2, "learning_rate": 4.934601664684899e-08, "logits/chosen": -1.4985976219177246, "logits/rejected": -1.521950602531433, "logps/chosen": -151.35899353027344, "logps/rejected": -324.0318298339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.544938564300537, "rewards/margins": 17.623151779174805, "rewards/rejected": -24.1680908203125, "step": 5109 }, { "epoch": 8.2, "learning_rate": 4.924692826000793e-08, "logits/chosen": -1.3496787548065186, "logits/rejected": -1.4562433958053589, "logps/chosen": -131.52581787109375, "logps/rejected": -259.6567077636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.331926345825195, "rewards/margins": 12.337860107421875, "rewards/rejected": -17.66978645324707, "step": 5110 }, { "epoch": 8.2, "learning_rate": 4.914783987316686e-08, "logits/chosen": -1.3963894844055176, "logits/rejected": -1.4670207500457764, "logps/chosen": -153.27484130859375, "logps/rejected": -316.08056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.25260591506958, "rewards/margins": 14.616416931152344, "rewards/rejected": -21.869022369384766, "step": 5111 }, { "epoch": 8.21, "learning_rate": 4.90487514863258e-08, "logits/chosen": -1.51712167263031, "logits/rejected": -1.569057822227478, "logps/chosen": -71.88379669189453, "logps/rejected": -193.1371307373047, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1308507919311523, "rewards/margins": 10.70413875579834, "rewards/rejected": -12.834989547729492, "step": 5112 }, { "epoch": 8.21, "learning_rate": 4.8949663099484734e-08, "logits/chosen": -1.4194964170455933, "logits/rejected": -1.3743622303009033, "logps/chosen": -128.7135772705078, "logps/rejected": -243.84156799316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7843804359436035, "rewards/margins": 12.251638412475586, "rewards/rejected": -18.03601837158203, "step": 5113 }, { "epoch": 8.21, "learning_rate": 4.885057471264368e-08, "logits/chosen": -1.5524508953094482, "logits/rejected": -1.5147383213043213, "logps/chosen": -125.58204650878906, "logps/rejected": -280.8977966308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.116694450378418, "rewards/margins": 15.524200439453125, "rewards/rejected": -20.64089584350586, "step": 5114 }, { "epoch": 8.21, "learning_rate": 4.8751486325802614e-08, "logits/chosen": -1.3182706832885742, "logits/rejected": -1.3741024732589722, "logps/chosen": -127.41967010498047, "logps/rejected": -288.11529541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.105171203613281, "rewards/margins": 15.086261749267578, "rewards/rejected": -21.191434860229492, "step": 5115 }, { "epoch": 8.21, "learning_rate": 4.8652397938961554e-08, "logits/chosen": -1.5511788129806519, "logits/rejected": -1.5150864124298096, "logps/chosen": -197.15579223632812, "logps/rejected": -328.03466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.27137565612793, "rewards/margins": 13.625238418579102, "rewards/rejected": -23.896615982055664, "step": 5116 }, { "epoch": 8.21, "learning_rate": 4.855330955212049e-08, "logits/chosen": -1.2001014947891235, "logits/rejected": -1.2234985828399658, "logps/chosen": -161.50521850585938, "logps/rejected": -297.93402099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.792704582214355, "rewards/margins": 14.234567642211914, "rewards/rejected": -23.027273178100586, "step": 5117 }, { "epoch": 8.22, "learning_rate": 4.845422116527943e-08, "logits/chosen": -1.5437005758285522, "logits/rejected": -1.5481176376342773, "logps/chosen": -186.07591247558594, "logps/rejected": -274.8922119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.598838806152344, "rewards/margins": 10.377668380737305, "rewards/rejected": -19.97650718688965, "step": 5118 }, { "epoch": 8.22, "learning_rate": 4.835513277843836e-08, "logits/chosen": -1.2566767930984497, "logits/rejected": -1.3876781463623047, "logps/chosen": -125.52757263183594, "logps/rejected": -289.8056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.215272903442383, "rewards/margins": 13.30276107788086, "rewards/rejected": -19.51803207397461, "step": 5119 }, { "epoch": 8.22, "learning_rate": 4.825604439159731e-08, "logits/chosen": -1.5804853439331055, "logits/rejected": -1.5793533325195312, "logps/chosen": -142.08921813964844, "logps/rejected": -339.85626220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.612100601196289, "rewards/margins": 20.0799503326416, "rewards/rejected": -25.69205093383789, "step": 5120 }, { "epoch": 8.22, "learning_rate": 4.815695600475624e-08, "logits/chosen": -1.5343151092529297, "logits/rejected": -1.589090347290039, "logps/chosen": -115.3098373413086, "logps/rejected": -316.15118408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.301639556884766, "rewards/margins": 18.414833068847656, "rewards/rejected": -22.716474533081055, "step": 5121 }, { "epoch": 8.22, "learning_rate": 4.805786761791518e-08, "logits/chosen": -1.5532604455947876, "logits/rejected": -1.490398645401001, "logps/chosen": -172.0666046142578, "logps/rejected": -302.9784240722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.875302791595459, "rewards/margins": 15.574329376220703, "rewards/rejected": -23.449630737304688, "step": 5122 }, { "epoch": 8.22, "learning_rate": 4.7958779231074114e-08, "logits/chosen": -1.5807676315307617, "logits/rejected": -1.6356165409088135, "logps/chosen": -115.237060546875, "logps/rejected": -273.85540771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.211627960205078, "rewards/margins": 14.481056213378906, "rewards/rejected": -17.692684173583984, "step": 5123 }, { "epoch": 8.22, "learning_rate": 4.7859690844233054e-08, "logits/chosen": -1.586439609527588, "logits/rejected": -1.6394612789154053, "logps/chosen": -152.42369079589844, "logps/rejected": -266.5174865722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.795353889465332, "rewards/margins": 11.712722778320312, "rewards/rejected": -18.508075714111328, "step": 5124 }, { "epoch": 8.23, "learning_rate": 4.776060245739199e-08, "logits/chosen": -1.6260563135147095, "logits/rejected": -1.5742969512939453, "logps/chosen": -167.72543334960938, "logps/rejected": -286.3341369628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.882300853729248, "rewards/margins": 12.845094680786133, "rewards/rejected": -20.72739601135254, "step": 5125 }, { "epoch": 8.23, "learning_rate": 4.766151407055093e-08, "logits/chosen": -1.60086190700531, "logits/rejected": -1.5827391147613525, "logps/chosen": -177.86361694335938, "logps/rejected": -323.115478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7728962898254395, "rewards/margins": 13.331098556518555, "rewards/rejected": -21.103994369506836, "step": 5126 }, { "epoch": 8.23, "learning_rate": 4.756242568370987e-08, "logits/chosen": -1.311976671218872, "logits/rejected": -1.3685868978500366, "logps/chosen": -151.03897094726562, "logps/rejected": -294.99102783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2030487060546875, "rewards/margins": 13.47610855102539, "rewards/rejected": -20.67915916442871, "step": 5127 }, { "epoch": 8.23, "learning_rate": 4.7463337296868807e-08, "logits/chosen": -1.458266019821167, "logits/rejected": -1.515960693359375, "logps/chosen": -170.29443359375, "logps/rejected": -318.7456359863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.084096431732178, "rewards/margins": 13.62713623046875, "rewards/rejected": -20.711231231689453, "step": 5128 }, { "epoch": 8.23, "learning_rate": 4.736424891002774e-08, "logits/chosen": -1.4271690845489502, "logits/rejected": -1.4491811990737915, "logps/chosen": -178.47567749023438, "logps/rejected": -324.24005126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.71942138671875, "rewards/margins": 13.913209915161133, "rewards/rejected": -22.632631301879883, "step": 5129 }, { "epoch": 8.23, "learning_rate": 4.726516052318668e-08, "logits/chosen": -1.3975355625152588, "logits/rejected": -1.4172781705856323, "logps/chosen": -153.28440856933594, "logps/rejected": -317.8710021972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.059256553649902, "rewards/margins": 15.881263732910156, "rewards/rejected": -22.940519332885742, "step": 5130 }, { "epoch": 8.24, "learning_rate": 4.716607213634562e-08, "logits/chosen": -1.5428593158721924, "logits/rejected": -1.4763214588165283, "logps/chosen": -133.68008422851562, "logps/rejected": -257.60614013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.394123077392578, "rewards/margins": 13.68747329711914, "rewards/rejected": -18.08159637451172, "step": 5131 }, { "epoch": 8.24, "learning_rate": 4.706698374950455e-08, "logits/chosen": -1.484191656112671, "logits/rejected": -1.4783127307891846, "logps/chosen": -120.51153564453125, "logps/rejected": -228.36972045898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.537084102630615, "rewards/margins": 11.270488739013672, "rewards/rejected": -15.807573318481445, "step": 5132 }, { "epoch": 8.24, "learning_rate": 4.69678953626635e-08, "logits/chosen": -1.4122920036315918, "logits/rejected": -1.4997228384017944, "logps/chosen": -153.8106689453125, "logps/rejected": -318.89801025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.372363567352295, "rewards/margins": 14.612371444702148, "rewards/rejected": -20.9847354888916, "step": 5133 }, { "epoch": 8.24, "learning_rate": 4.686880697582243e-08, "logits/chosen": -1.3468425273895264, "logits/rejected": -1.3429430723190308, "logps/chosen": -110.84014129638672, "logps/rejected": -230.60585021972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1200337409973145, "rewards/margins": 12.461665153503418, "rewards/rejected": -16.581697463989258, "step": 5134 }, { "epoch": 8.24, "learning_rate": 4.676971858898137e-08, "logits/chosen": -1.3859641551971436, "logits/rejected": -1.3686461448669434, "logps/chosen": -215.64468383789062, "logps/rejected": -359.96905517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.716794967651367, "rewards/margins": 15.555336952209473, "rewards/rejected": -28.272132873535156, "step": 5135 }, { "epoch": 8.24, "learning_rate": 4.6670630202140306e-08, "logits/chosen": -1.4891364574432373, "logits/rejected": -1.4850022792816162, "logps/chosen": -166.08688354492188, "logps/rejected": -307.7164611816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.702917098999023, "rewards/margins": 13.721424102783203, "rewards/rejected": -22.424339294433594, "step": 5136 }, { "epoch": 8.25, "learning_rate": 4.6571541815299246e-08, "logits/chosen": -1.6219451427459717, "logits/rejected": -1.5299346446990967, "logps/chosen": -101.31915283203125, "logps/rejected": -266.40838623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2050039768218994, "rewards/margins": 16.019376754760742, "rewards/rejected": -18.224380493164062, "step": 5137 }, { "epoch": 8.25, "learning_rate": 4.647245342845818e-08, "logits/chosen": -1.38709557056427, "logits/rejected": -1.410101056098938, "logps/chosen": -138.79559326171875, "logps/rejected": -289.8111572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.619272708892822, "rewards/margins": 14.890365600585938, "rewards/rejected": -21.509639739990234, "step": 5138 }, { "epoch": 8.25, "learning_rate": 4.637336504161712e-08, "logits/chosen": -1.6790928840637207, "logits/rejected": -1.7206487655639648, "logps/chosen": -123.65803527832031, "logps/rejected": -323.7371826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.414533615112305, "rewards/margins": 19.56269073486328, "rewards/rejected": -23.977222442626953, "step": 5139 }, { "epoch": 8.25, "learning_rate": 4.627427665477606e-08, "logits/chosen": -1.4670294523239136, "logits/rejected": -1.3743274211883545, "logps/chosen": -216.0963897705078, "logps/rejected": -311.1836242675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.221027374267578, "rewards/margins": 12.269194602966309, "rewards/rejected": -22.490222930908203, "step": 5140 }, { "epoch": 8.25, "learning_rate": 4.6175188267935e-08, "logits/chosen": -1.3313297033309937, "logits/rejected": -1.3589342832565308, "logps/chosen": -165.27383422851562, "logps/rejected": -304.97900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.033641815185547, "rewards/margins": 14.711692810058594, "rewards/rejected": -23.74533462524414, "step": 5141 }, { "epoch": 8.25, "learning_rate": 4.607609988109393e-08, "logits/chosen": -1.6586132049560547, "logits/rejected": -1.6788184642791748, "logps/chosen": -115.73136901855469, "logps/rejected": -273.6580810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.866942882537842, "rewards/margins": 15.593997955322266, "rewards/rejected": -20.460941314697266, "step": 5142 }, { "epoch": 8.26, "learning_rate": 4.597701149425287e-08, "logits/chosen": -1.5454264879226685, "logits/rejected": -1.4608381986618042, "logps/chosen": -156.799560546875, "logps/rejected": -253.8870849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.654820442199707, "rewards/margins": 9.879606246948242, "rewards/rejected": -17.534425735473633, "step": 5143 }, { "epoch": 8.26, "learning_rate": 4.5877923107411806e-08, "logits/chosen": -1.4987781047821045, "logits/rejected": -1.5228865146636963, "logps/chosen": -139.7482452392578, "logps/rejected": -312.787841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.337204456329346, "rewards/margins": 16.64303207397461, "rewards/rejected": -21.980236053466797, "step": 5144 }, { "epoch": 8.26, "learning_rate": 4.5778834720570745e-08, "logits/chosen": -1.4717087745666504, "logits/rejected": -1.5544406175613403, "logps/chosen": -179.5944366455078, "logps/rejected": -304.38116455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.945579528808594, "rewards/margins": 13.935062408447266, "rewards/rejected": -20.88064193725586, "step": 5145 }, { "epoch": 8.26, "learning_rate": 4.5679746333729685e-08, "logits/chosen": -1.5290045738220215, "logits/rejected": -1.5296361446380615, "logps/chosen": -127.41400146484375, "logps/rejected": -272.05950927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.897920608520508, "rewards/margins": 13.842733383178711, "rewards/rejected": -19.74065399169922, "step": 5146 }, { "epoch": 8.26, "learning_rate": 4.5580657946888625e-08, "logits/chosen": -1.6737252473831177, "logits/rejected": -1.6110094785690308, "logps/chosen": -179.14944458007812, "logps/rejected": -319.37322998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.549347877502441, "rewards/margins": 16.061315536499023, "rewards/rejected": -24.61066246032715, "step": 5147 }, { "epoch": 8.26, "learning_rate": 4.548156956004756e-08, "logits/chosen": -1.7044216394424438, "logits/rejected": -1.6126453876495361, "logps/chosen": -191.3749542236328, "logps/rejected": -304.8066101074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.563261985778809, "rewards/margins": 12.610811233520508, "rewards/rejected": -21.174074172973633, "step": 5148 }, { "epoch": 8.26, "learning_rate": 4.53824811732065e-08, "logits/chosen": -1.3605120182037354, "logits/rejected": -1.391648769378662, "logps/chosen": -171.09124755859375, "logps/rejected": -382.6812744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.649832725524902, "rewards/margins": 19.418827056884766, "rewards/rejected": -28.06865882873535, "step": 5149 }, { "epoch": 8.27, "learning_rate": 4.528339278636543e-08, "logits/chosen": -1.3749734163284302, "logits/rejected": -1.3194057941436768, "logps/chosen": -147.36846923828125, "logps/rejected": -326.4046325683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.211730003356934, "rewards/margins": 18.41791343688965, "rewards/rejected": -24.62964630126953, "step": 5150 }, { "epoch": 8.27, "learning_rate": 4.518430439952437e-08, "logits/chosen": -1.594281792640686, "logits/rejected": -1.5282139778137207, "logps/chosen": -199.19683837890625, "logps/rejected": -338.7375183105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.40318775177002, "rewards/margins": 15.229990005493164, "rewards/rejected": -24.6331787109375, "step": 5151 }, { "epoch": 8.27, "learning_rate": 4.5085216012683305e-08, "logits/chosen": -1.4249906539916992, "logits/rejected": -1.468375325202942, "logps/chosen": -166.62429809570312, "logps/rejected": -360.3803405761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.918121814727783, "rewards/margins": 15.921875953674316, "rewards/rejected": -23.839996337890625, "step": 5152 }, { "epoch": 8.27, "learning_rate": 4.498612762584225e-08, "logits/chosen": -1.4988665580749512, "logits/rejected": -1.519554615020752, "logps/chosen": -232.14016723632812, "logps/rejected": -353.3851623535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.406328201293945, "rewards/margins": 13.696742057800293, "rewards/rejected": -26.103071212768555, "step": 5153 }, { "epoch": 8.27, "learning_rate": 4.488703923900119e-08, "logits/chosen": -1.4366344213485718, "logits/rejected": -1.4614864587783813, "logps/chosen": -237.99087524414062, "logps/rejected": -310.2522277832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.63541030883789, "rewards/margins": 11.056614875793457, "rewards/rejected": -22.692026138305664, "step": 5154 }, { "epoch": 8.27, "learning_rate": 4.4787950852160125e-08, "logits/chosen": -1.44674551486969, "logits/rejected": -1.5373154878616333, "logps/chosen": -177.5706329345703, "logps/rejected": -318.5850524902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.852296829223633, "rewards/margins": 12.784524917602539, "rewards/rejected": -22.636821746826172, "step": 5155 }, { "epoch": 8.28, "learning_rate": 4.4688862465319065e-08, "logits/chosen": -1.3579710721969604, "logits/rejected": -1.389004111289978, "logps/chosen": -196.2001953125, "logps/rejected": -346.69207763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.4711275100708, "rewards/margins": 13.181644439697266, "rewards/rejected": -23.65277099609375, "step": 5156 }, { "epoch": 8.28, "learning_rate": 4.4589774078478e-08, "logits/chosen": -1.484028935432434, "logits/rejected": -1.5528507232666016, "logps/chosen": -89.6433334350586, "logps/rejected": -254.35528564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.154792547225952, "rewards/margins": 14.168242454528809, "rewards/rejected": -17.323034286499023, "step": 5157 }, { "epoch": 8.28, "learning_rate": 4.449068569163694e-08, "logits/chosen": -1.375214695930481, "logits/rejected": -1.3577628135681152, "logps/chosen": -169.30833435058594, "logps/rejected": -265.0123291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.69028091430664, "rewards/margins": 10.162666320800781, "rewards/rejected": -18.852947235107422, "step": 5158 }, { "epoch": 8.28, "learning_rate": 4.439159730479588e-08, "logits/chosen": -1.3717260360717773, "logits/rejected": -1.4075770378112793, "logps/chosen": -180.40802001953125, "logps/rejected": -303.2076721191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.104228019714355, "rewards/margins": 11.733637809753418, "rewards/rejected": -21.837867736816406, "step": 5159 }, { "epoch": 8.28, "learning_rate": 4.429250891795482e-08, "logits/chosen": -1.3849165439605713, "logits/rejected": -1.452247142791748, "logps/chosen": -144.41082763671875, "logps/rejected": -270.09185791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.715343475341797, "rewards/margins": 11.57532024383545, "rewards/rejected": -19.290664672851562, "step": 5160 }, { "epoch": 8.28, "learning_rate": 4.419342053111375e-08, "logits/chosen": -1.6619629859924316, "logits/rejected": -1.5076358318328857, "logps/chosen": -183.44464111328125, "logps/rejected": -304.575927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.939453125, "rewards/margins": 14.84921646118164, "rewards/rejected": -22.78866958618164, "step": 5161 }, { "epoch": 8.29, "learning_rate": 4.409433214427269e-08, "logits/chosen": -1.4069055318832397, "logits/rejected": -1.4857240915298462, "logps/chosen": -193.20741271972656, "logps/rejected": -330.2534484863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.965778350830078, "rewards/margins": 11.52481746673584, "rewards/rejected": -22.490596771240234, "step": 5162 }, { "epoch": 8.29, "learning_rate": 4.3995243757431624e-08, "logits/chosen": -1.4729615449905396, "logits/rejected": -1.4703017473220825, "logps/chosen": -157.56854248046875, "logps/rejected": -276.4106140136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.401023864746094, "rewards/margins": 10.746482849121094, "rewards/rejected": -19.147504806518555, "step": 5163 }, { "epoch": 8.29, "learning_rate": 4.3896155370590564e-08, "logits/chosen": -1.4879143238067627, "logits/rejected": -1.453555703163147, "logps/chosen": -163.07437133789062, "logps/rejected": -293.39215087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.800394058227539, "rewards/margins": 13.85392951965332, "rewards/rejected": -21.65432357788086, "step": 5164 }, { "epoch": 8.29, "learning_rate": 4.3797066983749504e-08, "logits/chosen": -1.4726214408874512, "logits/rejected": -1.5315223932266235, "logps/chosen": -139.57550048828125, "logps/rejected": -301.63751220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.223726749420166, "rewards/margins": 14.908395767211914, "rewards/rejected": -21.132122039794922, "step": 5165 }, { "epoch": 8.29, "learning_rate": 4.3697978596908444e-08, "logits/chosen": -1.5038235187530518, "logits/rejected": -1.6269805431365967, "logps/chosen": -121.80186462402344, "logps/rejected": -281.73876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.848247051239014, "rewards/margins": 13.319499015808105, "rewards/rejected": -18.16774559020996, "step": 5166 }, { "epoch": 8.29, "learning_rate": 4.359889021006738e-08, "logits/chosen": -1.3230708837509155, "logits/rejected": -1.4108960628509521, "logps/chosen": -123.50393676757812, "logps/rejected": -269.09716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.834733247756958, "rewards/margins": 14.46695327758789, "rewards/rejected": -18.301685333251953, "step": 5167 }, { "epoch": 8.3, "learning_rate": 4.349980182322632e-08, "logits/chosen": -1.4811941385269165, "logits/rejected": -1.5559921264648438, "logps/chosen": -210.59011840820312, "logps/rejected": -341.37274169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.070332527160645, "rewards/margins": 13.439475059509277, "rewards/rejected": -24.509807586669922, "step": 5168 }, { "epoch": 8.3, "learning_rate": 4.340071343638525e-08, "logits/chosen": -1.5536938905715942, "logits/rejected": -1.597390055656433, "logps/chosen": -110.17076873779297, "logps/rejected": -261.3765869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4465441703796387, "rewards/margins": 14.587087631225586, "rewards/rejected": -18.033632278442383, "step": 5169 }, { "epoch": 8.3, "learning_rate": 4.330162504954419e-08, "logits/chosen": -1.4611495733261108, "logits/rejected": -1.4258003234863281, "logps/chosen": -187.3755645751953, "logps/rejected": -301.1697998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.046670913696289, "rewards/margins": 13.879430770874023, "rewards/rejected": -21.926101684570312, "step": 5170 }, { "epoch": 8.3, "learning_rate": 4.3202536662703124e-08, "logits/chosen": -1.4131512641906738, "logits/rejected": -1.4416091442108154, "logps/chosen": -155.99752807617188, "logps/rejected": -280.8774108886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4151716232299805, "rewards/margins": 13.072311401367188, "rewards/rejected": -20.487483978271484, "step": 5171 }, { "epoch": 8.3, "learning_rate": 4.310344827586207e-08, "logits/chosen": -1.6658716201782227, "logits/rejected": -1.5841137170791626, "logps/chosen": -188.8218994140625, "logps/rejected": -302.11627197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.584357261657715, "rewards/margins": 14.219542503356934, "rewards/rejected": -20.80390167236328, "step": 5172 }, { "epoch": 8.3, "learning_rate": 4.3004359889021004e-08, "logits/chosen": -1.5297801494598389, "logits/rejected": -1.5144885778427124, "logps/chosen": -182.51376342773438, "logps/rejected": -345.5895080566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.79861831665039, "rewards/margins": 16.811128616333008, "rewards/rejected": -25.6097469329834, "step": 5173 }, { "epoch": 8.3, "learning_rate": 4.2905271502179943e-08, "logits/chosen": -1.2920570373535156, "logits/rejected": -1.3044688701629639, "logps/chosen": -215.5894775390625, "logps/rejected": -363.36810302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.452165603637695, "rewards/margins": 13.231330871582031, "rewards/rejected": -26.683496475219727, "step": 5174 }, { "epoch": 8.31, "learning_rate": 4.280618311533888e-08, "logits/chosen": -1.352297306060791, "logits/rejected": -1.3863108158111572, "logps/chosen": -180.0340576171875, "logps/rejected": -340.9566650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.30068588256836, "rewards/margins": 14.409372329711914, "rewards/rejected": -24.710058212280273, "step": 5175 }, { "epoch": 8.31, "learning_rate": 4.2707094728497817e-08, "logits/chosen": -1.5788507461547852, "logits/rejected": -1.49993896484375, "logps/chosen": -106.58621978759766, "logps/rejected": -261.23052978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.640275239944458, "rewards/margins": 16.37055015563965, "rewards/rejected": -20.010826110839844, "step": 5176 }, { "epoch": 8.31, "learning_rate": 4.2608006341656757e-08, "logits/chosen": -1.487769365310669, "logits/rejected": -1.4698734283447266, "logps/chosen": -181.6748809814453, "logps/rejected": -298.8360595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.310256958007812, "rewards/margins": 11.734797477722168, "rewards/rejected": -20.045055389404297, "step": 5177 }, { "epoch": 8.31, "learning_rate": 4.2508917954815696e-08, "logits/chosen": -1.4002796411514282, "logits/rejected": -1.488922119140625, "logps/chosen": -154.93101501464844, "logps/rejected": -323.5036315917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.533254623413086, "rewards/margins": 16.19569206237793, "rewards/rejected": -23.728946685791016, "step": 5178 }, { "epoch": 8.31, "learning_rate": 4.2409829567974636e-08, "logits/chosen": -1.5944528579711914, "logits/rejected": -1.5497052669525146, "logps/chosen": -182.45437622070312, "logps/rejected": -300.54595947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.90088176727295, "rewards/margins": 12.162219047546387, "rewards/rejected": -22.063100814819336, "step": 5179 }, { "epoch": 8.31, "learning_rate": 4.231074118113357e-08, "logits/chosen": -1.4936680793762207, "logits/rejected": -1.4964358806610107, "logps/chosen": -163.2779083251953, "logps/rejected": -322.61187744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.648446083068848, "rewards/margins": 18.054241180419922, "rewards/rejected": -24.702686309814453, "step": 5180 }, { "epoch": 8.32, "learning_rate": 4.221165279429251e-08, "logits/chosen": -1.5535444021224976, "logits/rejected": -1.6234707832336426, "logps/chosen": -137.41046142578125, "logps/rejected": -299.76593017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.560539722442627, "rewards/margins": 13.806644439697266, "rewards/rejected": -19.367185592651367, "step": 5181 }, { "epoch": 8.32, "learning_rate": 4.211256440745144e-08, "logits/chosen": -1.6586534976959229, "logits/rejected": -1.5753275156021118, "logps/chosen": -153.56471252441406, "logps/rejected": -281.11285400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.163983345031738, "rewards/margins": 13.705907821655273, "rewards/rejected": -19.869892120361328, "step": 5182 }, { "epoch": 8.32, "learning_rate": 4.201347602061038e-08, "logits/chosen": -1.3893486261367798, "logits/rejected": -1.464874029159546, "logps/chosen": -159.2215118408203, "logps/rejected": -374.84893798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.057983875274658, "rewards/margins": 19.59354591369629, "rewards/rejected": -26.651531219482422, "step": 5183 }, { "epoch": 8.32, "learning_rate": 4.1914387633769316e-08, "logits/chosen": -1.585132122039795, "logits/rejected": -1.6148730516433716, "logps/chosen": -130.2586212158203, "logps/rejected": -289.8560791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.059906959533691, "rewards/margins": 14.284687995910645, "rewards/rejected": -20.344594955444336, "step": 5184 }, { "epoch": 8.32, "learning_rate": 4.181529924692826e-08, "logits/chosen": -1.4038074016571045, "logits/rejected": -1.4598780870437622, "logps/chosen": -158.65277099609375, "logps/rejected": -272.58880615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.650260925292969, "rewards/margins": 11.788235664367676, "rewards/rejected": -18.438495635986328, "step": 5185 }, { "epoch": 8.32, "learning_rate": 4.1716210860087196e-08, "logits/chosen": -1.4436628818511963, "logits/rejected": -1.439208984375, "logps/chosen": -214.38075256347656, "logps/rejected": -352.32550048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.86561107635498, "rewards/margins": 11.648797988891602, "rewards/rejected": -22.5144100189209, "step": 5186 }, { "epoch": 8.33, "learning_rate": 4.1617122473246136e-08, "logits/chosen": -1.4136898517608643, "logits/rejected": -1.4545224905014038, "logps/chosen": -164.27230834960938, "logps/rejected": -272.8527526855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.27429723739624, "rewards/margins": 11.074076652526855, "rewards/rejected": -18.348373413085938, "step": 5187 }, { "epoch": 8.33, "learning_rate": 4.151803408640507e-08, "logits/chosen": -1.3120441436767578, "logits/rejected": -1.3818018436431885, "logps/chosen": -177.75006103515625, "logps/rejected": -314.7161865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.241519927978516, "rewards/margins": 12.669726371765137, "rewards/rejected": -21.91124725341797, "step": 5188 }, { "epoch": 8.33, "learning_rate": 4.141894569956401e-08, "logits/chosen": -1.6070737838745117, "logits/rejected": -1.6179251670837402, "logps/chosen": -136.48150634765625, "logps/rejected": -287.1611633300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.623805522918701, "rewards/margins": 14.824835777282715, "rewards/rejected": -20.44864273071289, "step": 5189 }, { "epoch": 8.33, "learning_rate": 4.131985731272294e-08, "logits/chosen": -1.5084950923919678, "logits/rejected": -1.569200038909912, "logps/chosen": -185.41343688964844, "logps/rejected": -314.920166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.299433708190918, "rewards/margins": 11.884430885314941, "rewards/rejected": -22.18386459350586, "step": 5190 }, { "epoch": 8.33, "learning_rate": 4.122076892588189e-08, "logits/chosen": -1.5399200916290283, "logits/rejected": -1.4711393117904663, "logps/chosen": -148.44656372070312, "logps/rejected": -298.0243835449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.551476955413818, "rewards/margins": 15.160928726196289, "rewards/rejected": -22.712406158447266, "step": 5191 }, { "epoch": 8.33, "learning_rate": 4.112168053904082e-08, "logits/chosen": -1.6096912622451782, "logits/rejected": -1.609188199043274, "logps/chosen": -113.57937622070312, "logps/rejected": -238.70111083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9879555702209473, "rewards/margins": 12.71081256866455, "rewards/rejected": -15.698768615722656, "step": 5192 }, { "epoch": 8.34, "learning_rate": 4.102259215219976e-08, "logits/chosen": -1.4353996515274048, "logits/rejected": -1.361790657043457, "logps/chosen": -160.63870239257812, "logps/rejected": -293.07550048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.527153015136719, "rewards/margins": 14.260472297668457, "rewards/rejected": -21.78762435913086, "step": 5193 }, { "epoch": 8.34, "learning_rate": 4.0923503765358695e-08, "logits/chosen": -1.490644931793213, "logits/rejected": -1.520757794380188, "logps/chosen": -193.22459411621094, "logps/rejected": -359.93585205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.841415405273438, "rewards/margins": 17.33649444580078, "rewards/rejected": -26.17790985107422, "step": 5194 }, { "epoch": 8.34, "learning_rate": 4.0824415378517635e-08, "logits/chosen": -1.372765064239502, "logits/rejected": -1.4092589616775513, "logps/chosen": -130.4632568359375, "logps/rejected": -269.2786560058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.908160209655762, "rewards/margins": 13.3020601272583, "rewards/rejected": -19.210220336914062, "step": 5195 }, { "epoch": 8.34, "learning_rate": 4.072532699167657e-08, "logits/chosen": -1.5817519426345825, "logits/rejected": -1.6100218296051025, "logps/chosen": -156.6029052734375, "logps/rejected": -288.60137939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.43353796005249, "rewards/margins": 12.578824043273926, "rewards/rejected": -19.012361526489258, "step": 5196 }, { "epoch": 8.34, "learning_rate": 4.062623860483551e-08, "logits/chosen": -1.5307211875915527, "logits/rejected": -1.5063010454177856, "logps/chosen": -127.19876098632812, "logps/rejected": -265.962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5858564376831055, "rewards/margins": 12.719900131225586, "rewards/rejected": -18.305757522583008, "step": 5197 }, { "epoch": 8.34, "learning_rate": 4.0527150217994455e-08, "logits/chosen": -1.5902678966522217, "logits/rejected": -1.6491656303405762, "logps/chosen": -112.53759765625, "logps/rejected": -257.5395202636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9026384353637695, "rewards/margins": 13.203434944152832, "rewards/rejected": -18.1060733795166, "step": 5198 }, { "epoch": 8.35, "learning_rate": 4.042806183115339e-08, "logits/chosen": -1.4810292720794678, "logits/rejected": -1.4441901445388794, "logps/chosen": -132.93214416503906, "logps/rejected": -309.232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.081235408782959, "rewards/margins": 17.548236846923828, "rewards/rejected": -22.629474639892578, "step": 5199 }, { "epoch": 8.35, "learning_rate": 4.032897344431233e-08, "logits/chosen": -1.5036870241165161, "logits/rejected": -1.5365803241729736, "logps/chosen": -87.29761505126953, "logps/rejected": -251.39393615722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.52522349357605, "rewards/margins": 15.190144538879395, "rewards/rejected": -17.715368270874023, "step": 5200 }, { "epoch": 8.35, "learning_rate": 4.022988505747126e-08, "logits/chosen": -1.683577060699463, "logits/rejected": -1.671195387840271, "logps/chosen": -157.57785034179688, "logps/rejected": -243.27049255371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.680592060089111, "rewards/margins": 11.694573402404785, "rewards/rejected": -18.375165939331055, "step": 5201 }, { "epoch": 8.35, "learning_rate": 4.01307966706302e-08, "logits/chosen": -1.4120746850967407, "logits/rejected": -1.3659647703170776, "logps/chosen": -213.24258422851562, "logps/rejected": -306.2867431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.79353141784668, "rewards/margins": 11.055852890014648, "rewards/rejected": -21.849384307861328, "step": 5202 }, { "epoch": 8.35, "learning_rate": 4.0031708283789135e-08, "logits/chosen": -1.5959912538528442, "logits/rejected": -1.588282585144043, "logps/chosen": -122.85997009277344, "logps/rejected": -242.13272094726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.986435413360596, "rewards/margins": 13.014169692993164, "rewards/rejected": -18.0006046295166, "step": 5203 }, { "epoch": 8.35, "learning_rate": 3.993261989694808e-08, "logits/chosen": -1.5501956939697266, "logits/rejected": -1.5993369817733765, "logps/chosen": -177.4984130859375, "logps/rejected": -285.6552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.829047679901123, "rewards/margins": 11.893478393554688, "rewards/rejected": -18.72252655029297, "step": 5204 }, { "epoch": 8.35, "learning_rate": 3.9833531510107015e-08, "logits/chosen": -1.4408024549484253, "logits/rejected": -1.4746854305267334, "logps/chosen": -157.671142578125, "logps/rejected": -312.572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.414280414581299, "rewards/margins": 15.366766929626465, "rewards/rejected": -22.781047821044922, "step": 5205 }, { "epoch": 8.36, "learning_rate": 3.9734443123265955e-08, "logits/chosen": -1.3644756078720093, "logits/rejected": -1.3172588348388672, "logps/chosen": -189.35586547851562, "logps/rejected": -288.980712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.214544296264648, "rewards/margins": 11.953958511352539, "rewards/rejected": -22.168502807617188, "step": 5206 }, { "epoch": 8.36, "learning_rate": 3.963535473642489e-08, "logits/chosen": -1.5451061725616455, "logits/rejected": -1.696789264678955, "logps/chosen": -109.16316986083984, "logps/rejected": -319.6854248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.684279918670654, "rewards/margins": 17.410396575927734, "rewards/rejected": -23.094676971435547, "step": 5207 }, { "epoch": 8.36, "learning_rate": 3.953626634958383e-08, "logits/chosen": -1.2956593036651611, "logits/rejected": -1.356959581375122, "logps/chosen": -148.85525512695312, "logps/rejected": -286.4496154785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.627510070800781, "rewards/margins": 14.392566680908203, "rewards/rejected": -21.020076751708984, "step": 5208 }, { "epoch": 8.36, "learning_rate": 3.943717796274276e-08, "logits/chosen": -1.4594923257827759, "logits/rejected": -1.4739028215408325, "logps/chosen": -137.37973022460938, "logps/rejected": -271.63177490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.977591037750244, "rewards/margins": 13.095155715942383, "rewards/rejected": -20.07274627685547, "step": 5209 }, { "epoch": 8.36, "learning_rate": 3.93380895759017e-08, "logits/chosen": -1.4284814596176147, "logits/rejected": -1.4777988195419312, "logps/chosen": -114.83776092529297, "logps/rejected": -253.7913360595703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.382091999053955, "rewards/margins": 13.83060073852539, "rewards/rejected": -19.21269416809082, "step": 5210 }, { "epoch": 8.36, "learning_rate": 3.923900118906064e-08, "logits/chosen": -1.4801216125488281, "logits/rejected": -1.5413568019866943, "logps/chosen": -183.3799285888672, "logps/rejected": -318.33795166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.508111000061035, "rewards/margins": 11.898266792297363, "rewards/rejected": -21.406375885009766, "step": 5211 }, { "epoch": 8.37, "learning_rate": 3.913991280221958e-08, "logits/chosen": -1.5402439832687378, "logits/rejected": -1.636942982673645, "logps/chosen": -96.20692443847656, "logps/rejected": -295.10992431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4697232246398926, "rewards/margins": 17.992088317871094, "rewards/rejected": -21.461811065673828, "step": 5212 }, { "epoch": 8.37, "learning_rate": 3.9040824415378514e-08, "logits/chosen": -1.30869460105896, "logits/rejected": -1.3125211000442505, "logps/chosen": -147.60020446777344, "logps/rejected": -290.9935607910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.307347297668457, "rewards/margins": 14.866090774536133, "rewards/rejected": -21.173437118530273, "step": 5213 }, { "epoch": 8.37, "learning_rate": 3.8941736028537454e-08, "logits/chosen": -1.4778603315353394, "logits/rejected": -1.4492323398590088, "logps/chosen": -144.427001953125, "logps/rejected": -292.8571472167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.690684795379639, "rewards/margins": 14.892791748046875, "rewards/rejected": -21.58347511291504, "step": 5214 }, { "epoch": 8.37, "learning_rate": 3.884264764169639e-08, "logits/chosen": -1.4486738443374634, "logits/rejected": -1.498188853263855, "logps/chosen": -135.227294921875, "logps/rejected": -255.45899963378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.159071445465088, "rewards/margins": 11.745023727416992, "rewards/rejected": -17.904094696044922, "step": 5215 }, { "epoch": 8.37, "learning_rate": 3.874355925485533e-08, "logits/chosen": -1.436732530593872, "logits/rejected": -1.416080355644226, "logps/chosen": -199.09585571289062, "logps/rejected": -333.44146728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.779031753540039, "rewards/margins": 12.885775566101074, "rewards/rejected": -23.66480827331543, "step": 5216 }, { "epoch": 8.37, "learning_rate": 3.864447086801427e-08, "logits/chosen": -1.3233386278152466, "logits/rejected": -1.4388632774353027, "logps/chosen": -156.7172088623047, "logps/rejected": -257.047607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9866943359375, "rewards/margins": 10.689627647399902, "rewards/rejected": -18.67632293701172, "step": 5217 }, { "epoch": 8.38, "learning_rate": 3.854538248117321e-08, "logits/chosen": -1.6032655239105225, "logits/rejected": -1.530480980873108, "logps/chosen": -169.4097900390625, "logps/rejected": -278.61834716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.414036750793457, "rewards/margins": 12.499564170837402, "rewards/rejected": -19.91360092163086, "step": 5218 }, { "epoch": 8.38, "learning_rate": 3.844629409433214e-08, "logits/chosen": -1.5775647163391113, "logits/rejected": -1.504279613494873, "logps/chosen": -204.453369140625, "logps/rejected": -318.447021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.104912757873535, "rewards/margins": 13.439745903015137, "rewards/rejected": -22.544658660888672, "step": 5219 }, { "epoch": 8.38, "learning_rate": 3.834720570749108e-08, "logits/chosen": -1.4296634197235107, "logits/rejected": -1.5080779790878296, "logps/chosen": -141.64187622070312, "logps/rejected": -315.785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.224814414978027, "rewards/margins": 14.674476623535156, "rewards/rejected": -19.8992919921875, "step": 5220 }, { "epoch": 8.38, "learning_rate": 3.824811732065002e-08, "logits/chosen": -1.2795367240905762, "logits/rejected": -1.3242545127868652, "logps/chosen": -160.01315307617188, "logps/rejected": -321.10931396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5537896156311035, "rewards/margins": 15.468358993530273, "rewards/rejected": -22.02215003967285, "step": 5221 }, { "epoch": 8.38, "learning_rate": 3.8149028933808953e-08, "logits/chosen": -1.5650579929351807, "logits/rejected": -1.6196603775024414, "logps/chosen": -147.55059814453125, "logps/rejected": -303.05364990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.800615310668945, "rewards/margins": 15.06912899017334, "rewards/rejected": -20.86974334716797, "step": 5222 }, { "epoch": 8.38, "learning_rate": 3.8049940546967893e-08, "logits/chosen": -1.5150845050811768, "logits/rejected": -1.5031527280807495, "logps/chosen": -232.778076171875, "logps/rejected": -347.3345642089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.210617065429688, "rewards/margins": 11.124911308288574, "rewards/rejected": -22.335529327392578, "step": 5223 }, { "epoch": 8.39, "learning_rate": 3.795085216012683e-08, "logits/chosen": -1.5516533851623535, "logits/rejected": -1.446041226387024, "logps/chosen": -223.13400268554688, "logps/rejected": -328.33770751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.57532024383545, "rewards/margins": 12.697330474853516, "rewards/rejected": -23.27265167236328, "step": 5224 }, { "epoch": 8.39, "learning_rate": 3.785176377328577e-08, "logits/chosen": -1.507722020149231, "logits/rejected": -1.5008347034454346, "logps/chosen": -156.37905883789062, "logps/rejected": -323.9336242675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.938154220581055, "rewards/margins": 15.982820510864258, "rewards/rejected": -23.920974731445312, "step": 5225 }, { "epoch": 8.39, "learning_rate": 3.7752675386444706e-08, "logits/chosen": -1.4427845478057861, "logits/rejected": -1.4726704359054565, "logps/chosen": -140.3387908935547, "logps/rejected": -258.430419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.452826023101807, "rewards/margins": 11.602678298950195, "rewards/rejected": -18.055503845214844, "step": 5226 }, { "epoch": 8.39, "learning_rate": 3.7653586999603646e-08, "logits/chosen": -1.403045415878296, "logits/rejected": -1.4540843963623047, "logps/chosen": -174.86976623535156, "logps/rejected": -274.11212158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.453418731689453, "rewards/margins": 11.237517356872559, "rewards/rejected": -19.690937042236328, "step": 5227 }, { "epoch": 8.39, "learning_rate": 3.755449861276258e-08, "logits/chosen": -1.4833954572677612, "logits/rejected": -1.4912006855010986, "logps/chosen": -171.1744384765625, "logps/rejected": -312.7405700683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.513127326965332, "rewards/margins": 14.481616020202637, "rewards/rejected": -20.99474334716797, "step": 5228 }, { "epoch": 8.39, "learning_rate": 3.745541022592152e-08, "logits/chosen": -1.4925827980041504, "logits/rejected": -1.5075308084487915, "logps/chosen": -131.9743194580078, "logps/rejected": -295.12353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.229432106018066, "rewards/margins": 15.406041145324707, "rewards/rejected": -21.635473251342773, "step": 5229 }, { "epoch": 8.39, "learning_rate": 3.735632183908046e-08, "logits/chosen": -1.4781956672668457, "logits/rejected": -1.4534273147583008, "logps/chosen": -169.12677001953125, "logps/rejected": -317.49493408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.609350681304932, "rewards/margins": 16.122787475585938, "rewards/rejected": -23.73213768005371, "step": 5230 }, { "epoch": 8.4, "learning_rate": 3.72572334522394e-08, "logits/chosen": -1.388777256011963, "logits/rejected": -1.482797622680664, "logps/chosen": -160.55197143554688, "logps/rejected": -289.3282775878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.502782821655273, "rewards/margins": 10.882402420043945, "rewards/rejected": -19.38518524169922, "step": 5231 }, { "epoch": 8.4, "learning_rate": 3.715814506539833e-08, "logits/chosen": -1.4367626905441284, "logits/rejected": -1.4412535429000854, "logps/chosen": -158.38580322265625, "logps/rejected": -295.0966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.823751449584961, "rewards/margins": 14.007349014282227, "rewards/rejected": -20.831098556518555, "step": 5232 }, { "epoch": 8.4, "learning_rate": 3.705905667855727e-08, "logits/chosen": -1.4811846017837524, "logits/rejected": -1.4702043533325195, "logps/chosen": -173.4826202392578, "logps/rejected": -348.9783020019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.265663146972656, "rewards/margins": 17.80527687072754, "rewards/rejected": -27.070938110351562, "step": 5233 }, { "epoch": 8.4, "learning_rate": 3.6959968291716206e-08, "logits/chosen": -1.5183436870574951, "logits/rejected": -1.5205351114273071, "logps/chosen": -151.37306213378906, "logps/rejected": -301.3514099121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.086937427520752, "rewards/margins": 14.309895515441895, "rewards/rejected": -20.396833419799805, "step": 5234 }, { "epoch": 8.4, "learning_rate": 3.6860879904875146e-08, "logits/chosen": -1.325316071510315, "logits/rejected": -1.3548123836517334, "logps/chosen": -159.5791015625, "logps/rejected": -320.996337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.832362174987793, "rewards/margins": 14.226916313171387, "rewards/rejected": -22.05927848815918, "step": 5235 }, { "epoch": 8.4, "learning_rate": 3.6761791518034086e-08, "logits/chosen": -1.717349648475647, "logits/rejected": -1.6162256002426147, "logps/chosen": -133.58148193359375, "logps/rejected": -262.2670593261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.764529228210449, "rewards/margins": 13.215309143066406, "rewards/rejected": -18.979839324951172, "step": 5236 }, { "epoch": 8.41, "learning_rate": 3.6662703131193026e-08, "logits/chosen": -1.328948736190796, "logits/rejected": -1.4306353330612183, "logps/chosen": -158.95480346679688, "logps/rejected": -338.68853759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.287768840789795, "rewards/margins": 15.488578796386719, "rewards/rejected": -22.776350021362305, "step": 5237 }, { "epoch": 8.41, "learning_rate": 3.656361474435196e-08, "logits/chosen": -1.5069340467453003, "logits/rejected": -1.530819058418274, "logps/chosen": -179.6285400390625, "logps/rejected": -341.1346740722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.034302711486816, "rewards/margins": 15.756423950195312, "rewards/rejected": -24.790727615356445, "step": 5238 }, { "epoch": 8.41, "learning_rate": 3.64645263575109e-08, "logits/chosen": -1.5345032215118408, "logits/rejected": -1.497902512550354, "logps/chosen": -154.32662963867188, "logps/rejected": -258.1642761230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.487472057342529, "rewards/margins": 11.348966598510742, "rewards/rejected": -18.83643913269043, "step": 5239 }, { "epoch": 8.41, "learning_rate": 3.636543797066983e-08, "logits/chosen": -1.4818075895309448, "logits/rejected": -1.4600462913513184, "logps/chosen": -124.35047912597656, "logps/rejected": -310.7143249511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.721432209014893, "rewards/margins": 18.107807159423828, "rewards/rejected": -22.829238891601562, "step": 5240 }, { "epoch": 8.41, "learning_rate": 3.626634958382877e-08, "logits/chosen": -1.588261604309082, "logits/rejected": -1.6189210414886475, "logps/chosen": -164.98509216308594, "logps/rejected": -327.1116027832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.240730285644531, "rewards/margins": 13.817481994628906, "rewards/rejected": -22.058212280273438, "step": 5241 }, { "epoch": 8.41, "learning_rate": 3.6167261196987705e-08, "logits/chosen": -1.480536937713623, "logits/rejected": -1.5416618585586548, "logps/chosen": -165.72998046875, "logps/rejected": -317.8761901855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.31076717376709, "rewards/margins": 12.711857795715332, "rewards/rejected": -21.022624969482422, "step": 5242 }, { "epoch": 8.42, "learning_rate": 3.606817281014665e-08, "logits/chosen": -1.3673644065856934, "logits/rejected": -1.415358304977417, "logps/chosen": -181.00172424316406, "logps/rejected": -332.39764404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.423822402954102, "rewards/margins": 13.488641738891602, "rewards/rejected": -22.912464141845703, "step": 5243 }, { "epoch": 8.42, "learning_rate": 3.596908442330559e-08, "logits/chosen": -1.4683160781860352, "logits/rejected": -1.4639520645141602, "logps/chosen": -166.43063354492188, "logps/rejected": -284.51336669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.344657897949219, "rewards/margins": 11.52599811553955, "rewards/rejected": -20.870655059814453, "step": 5244 }, { "epoch": 8.42, "learning_rate": 3.5869996036464525e-08, "logits/chosen": -1.4296300411224365, "logits/rejected": -1.4758840799331665, "logps/chosen": -174.25518798828125, "logps/rejected": -337.86871337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.002779006958008, "rewards/margins": 15.148078918457031, "rewards/rejected": -24.15085792541504, "step": 5245 }, { "epoch": 8.42, "learning_rate": 3.5770907649623465e-08, "logits/chosen": -1.2787925004959106, "logits/rejected": -1.3471494913101196, "logps/chosen": -159.3515625, "logps/rejected": -325.78497314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.208935737609863, "rewards/margins": 14.292337417602539, "rewards/rejected": -22.50127410888672, "step": 5246 }, { "epoch": 8.42, "learning_rate": 3.56718192627824e-08, "logits/chosen": -1.6844663619995117, "logits/rejected": -1.6411373615264893, "logps/chosen": -178.90060424804688, "logps/rejected": -287.5382080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.559731483459473, "rewards/margins": 11.933184623718262, "rewards/rejected": -19.4929141998291, "step": 5247 }, { "epoch": 8.42, "learning_rate": 3.557273087594134e-08, "logits/chosen": -1.5073035955429077, "logits/rejected": -1.533237338066101, "logps/chosen": -120.0220718383789, "logps/rejected": -252.7034454345703, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.639236927032471, "rewards/margins": 13.25374698638916, "rewards/rejected": -17.892982482910156, "step": 5248 }, { "epoch": 8.43, "learning_rate": 3.547364248910028e-08, "logits/chosen": -1.5172070264816284, "logits/rejected": -1.6212756633758545, "logps/chosen": -183.94058227539062, "logps/rejected": -301.8458251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.061911582946777, "rewards/margins": 12.137192726135254, "rewards/rejected": -21.19910430908203, "step": 5249 }, { "epoch": 8.43, "learning_rate": 3.537455410225922e-08, "logits/chosen": -1.6743720769882202, "logits/rejected": -1.678865671157837, "logps/chosen": -144.3065643310547, "logps/rejected": -300.917724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.954261779785156, "rewards/margins": 16.9066219329834, "rewards/rejected": -22.860883712768555, "step": 5250 }, { "epoch": 8.43, "learning_rate": 3.527546571541815e-08, "logits/chosen": -1.504973292350769, "logits/rejected": -1.5569875240325928, "logps/chosen": -188.63986206054688, "logps/rejected": -363.56304931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.361087799072266, "rewards/margins": 16.084028244018555, "rewards/rejected": -25.44511604309082, "step": 5251 }, { "epoch": 8.43, "learning_rate": 3.517637732857709e-08, "logits/chosen": -1.4265438318252563, "logits/rejected": -1.4330487251281738, "logps/chosen": -183.1820831298828, "logps/rejected": -317.7728271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.840529441833496, "rewards/margins": 12.590977668762207, "rewards/rejected": -21.431507110595703, "step": 5252 }, { "epoch": 8.43, "learning_rate": 3.5077288941736025e-08, "logits/chosen": -1.5267068147659302, "logits/rejected": -1.5230908393859863, "logps/chosen": -144.85147094726562, "logps/rejected": -308.7808837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.223055839538574, "rewards/margins": 17.005367279052734, "rewards/rejected": -23.228424072265625, "step": 5253 }, { "epoch": 8.43, "learning_rate": 3.4978200554894965e-08, "logits/chosen": -1.3757261037826538, "logits/rejected": -1.4029710292816162, "logps/chosen": -180.6495819091797, "logps/rejected": -289.6479797363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.855740547180176, "rewards/margins": 12.342764854431152, "rewards/rejected": -20.198505401611328, "step": 5254 }, { "epoch": 8.43, "learning_rate": 3.48791121680539e-08, "logits/chosen": -1.4965698719024658, "logits/rejected": -1.4330377578735352, "logps/chosen": -142.36761474609375, "logps/rejected": -278.189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.528367519378662, "rewards/margins": 13.754584312438965, "rewards/rejected": -20.28295135498047, "step": 5255 }, { "epoch": 8.44, "learning_rate": 3.4780023781212844e-08, "logits/chosen": -1.4552291631698608, "logits/rejected": -1.459513783454895, "logps/chosen": -155.85476684570312, "logps/rejected": -309.05364990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.420252799987793, "rewards/margins": 15.126474380493164, "rewards/rejected": -23.546728134155273, "step": 5256 }, { "epoch": 8.44, "learning_rate": 3.468093539437178e-08, "logits/chosen": -1.7350515127182007, "logits/rejected": -1.7191799879074097, "logps/chosen": -145.90423583984375, "logps/rejected": -255.39877319335938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.994962692260742, "rewards/margins": 11.831372261047363, "rewards/rejected": -17.826335906982422, "step": 5257 }, { "epoch": 8.44, "learning_rate": 3.458184700753072e-08, "logits/chosen": -1.405827522277832, "logits/rejected": -1.4365687370300293, "logps/chosen": -158.16921997070312, "logps/rejected": -334.7548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.476980209350586, "rewards/margins": 15.617199897766113, "rewards/rejected": -24.094181060791016, "step": 5258 }, { "epoch": 8.44, "learning_rate": 3.448275862068965e-08, "logits/chosen": -1.6482388973236084, "logits/rejected": -1.679917335510254, "logps/chosen": -123.4623031616211, "logps/rejected": -263.8641357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.875180244445801, "rewards/margins": 13.310525894165039, "rewards/rejected": -18.185705184936523, "step": 5259 }, { "epoch": 8.44, "learning_rate": 3.438367023384859e-08, "logits/chosen": -1.7168251276016235, "logits/rejected": -1.6762628555297852, "logps/chosen": -130.94448852539062, "logps/rejected": -288.4057922363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0485944747924805, "rewards/margins": 16.81787872314453, "rewards/rejected": -21.866472244262695, "step": 5260 }, { "epoch": 8.44, "learning_rate": 3.4284581847007524e-08, "logits/chosen": -1.555330753326416, "logits/rejected": -1.4921875, "logps/chosen": -203.64268493652344, "logps/rejected": -363.66339111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.730734825134277, "rewards/margins": 16.88429832458496, "rewards/rejected": -24.615032196044922, "step": 5261 }, { "epoch": 8.45, "learning_rate": 3.418549346016647e-08, "logits/chosen": -1.3942115306854248, "logits/rejected": -1.43662691116333, "logps/chosen": -170.955078125, "logps/rejected": -331.31463623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.326119422912598, "rewards/margins": 14.603921890258789, "rewards/rejected": -22.930042266845703, "step": 5262 }, { "epoch": 8.45, "learning_rate": 3.4086405073325404e-08, "logits/chosen": -1.3293402194976807, "logits/rejected": -1.2608534097671509, "logps/chosen": -183.66546630859375, "logps/rejected": -289.68023681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.536821365356445, "rewards/margins": 11.918441772460938, "rewards/rejected": -20.455263137817383, "step": 5263 }, { "epoch": 8.45, "learning_rate": 3.3987316686484344e-08, "logits/chosen": -1.2768402099609375, "logits/rejected": -1.2991749048233032, "logps/chosen": -174.94752502441406, "logps/rejected": -271.28680419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8092427253723145, "rewards/margins": 11.630203247070312, "rewards/rejected": -19.4394474029541, "step": 5264 }, { "epoch": 8.45, "learning_rate": 3.388822829964328e-08, "logits/chosen": -1.3512332439422607, "logits/rejected": -1.4244446754455566, "logps/chosen": -159.18727111816406, "logps/rejected": -303.037841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.419425964355469, "rewards/margins": 13.911615371704102, "rewards/rejected": -23.331039428710938, "step": 5265 }, { "epoch": 8.45, "learning_rate": 3.378913991280222e-08, "logits/chosen": -1.4186686277389526, "logits/rejected": -1.464728832244873, "logps/chosen": -144.76004028320312, "logps/rejected": -292.1171569824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.278541564941406, "rewards/margins": 13.1346435546875, "rewards/rejected": -20.413185119628906, "step": 5266 }, { "epoch": 8.45, "learning_rate": 3.369005152596116e-08, "logits/chosen": -1.587506651878357, "logits/rejected": -1.6604866981506348, "logps/chosen": -137.87789916992188, "logps/rejected": -276.397216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.571342945098877, "rewards/margins": 15.248785972595215, "rewards/rejected": -19.82012939453125, "step": 5267 }, { "epoch": 8.46, "learning_rate": 3.359096313912009e-08, "logits/chosen": -1.2956652641296387, "logits/rejected": -1.3072736263275146, "logps/chosen": -118.21259307861328, "logps/rejected": -226.0149688720703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.594743728637695, "rewards/margins": 9.97244930267334, "rewards/rejected": -16.56719398498535, "step": 5268 }, { "epoch": 8.46, "learning_rate": 3.349187475227904e-08, "logits/chosen": -1.4776756763458252, "logits/rejected": -1.4153850078582764, "logps/chosen": -142.50091552734375, "logps/rejected": -241.03480529785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.651760101318359, "rewards/margins": 11.095256805419922, "rewards/rejected": -17.74701499938965, "step": 5269 }, { "epoch": 8.46, "learning_rate": 3.339278636543797e-08, "logits/chosen": -1.37542724609375, "logits/rejected": -1.3787496089935303, "logps/chosen": -137.85227966308594, "logps/rejected": -252.88967895507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.452975749969482, "rewards/margins": 11.205835342407227, "rewards/rejected": -17.658811569213867, "step": 5270 }, { "epoch": 8.46, "learning_rate": 3.329369797859691e-08, "logits/chosen": -1.4245548248291016, "logits/rejected": -1.5212044715881348, "logps/chosen": -155.41943359375, "logps/rejected": -264.7571105957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.742001533508301, "rewards/margins": 10.198533058166504, "rewards/rejected": -17.940536499023438, "step": 5271 }, { "epoch": 8.46, "learning_rate": 3.319460959175584e-08, "logits/chosen": -1.5715467929840088, "logits/rejected": -1.5951907634735107, "logps/chosen": -164.9494171142578, "logps/rejected": -285.21099853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.394432544708252, "rewards/margins": 12.578388214111328, "rewards/rejected": -18.972820281982422, "step": 5272 }, { "epoch": 8.46, "learning_rate": 3.309552120491478e-08, "logits/chosen": -1.4774184226989746, "logits/rejected": -1.4651622772216797, "logps/chosen": -194.15777587890625, "logps/rejected": -325.71368408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.182168006896973, "rewards/margins": 14.27343463897705, "rewards/rejected": -24.455602645874023, "step": 5273 }, { "epoch": 8.47, "learning_rate": 3.2996432818073717e-08, "logits/chosen": -1.4617109298706055, "logits/rejected": -1.5035383701324463, "logps/chosen": -187.14645385742188, "logps/rejected": -329.8739318847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.873849868774414, "rewards/margins": 12.725637435913086, "rewards/rejected": -22.5994873046875, "step": 5274 }, { "epoch": 8.47, "learning_rate": 3.289734443123266e-08, "logits/chosen": -1.5149080753326416, "logits/rejected": -1.4508497714996338, "logps/chosen": -177.79336547851562, "logps/rejected": -340.61419677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.437387466430664, "rewards/margins": 17.84250259399414, "rewards/rejected": -26.279890060424805, "step": 5275 }, { "epoch": 8.47, "learning_rate": 3.2798256044391596e-08, "logits/chosen": -1.4497653245925903, "logits/rejected": -1.4628615379333496, "logps/chosen": -231.46063232421875, "logps/rejected": -344.92242431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.06665325164795, "rewards/margins": 12.750734329223633, "rewards/rejected": -25.8173885345459, "step": 5276 }, { "epoch": 8.47, "learning_rate": 3.2699167657550536e-08, "logits/chosen": -1.531959056854248, "logits/rejected": -1.504407525062561, "logps/chosen": -97.27474212646484, "logps/rejected": -215.31446838378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.338731527328491, "rewards/margins": 13.836234092712402, "rewards/rejected": -16.174964904785156, "step": 5277 }, { "epoch": 8.47, "learning_rate": 3.260007927070947e-08, "logits/chosen": -1.7689625024795532, "logits/rejected": -1.8111995458602905, "logps/chosen": -153.70382690429688, "logps/rejected": -320.62530517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.893176555633545, "rewards/margins": 14.239767074584961, "rewards/rejected": -21.132944107055664, "step": 5278 }, { "epoch": 8.47, "learning_rate": 3.250099088386841e-08, "logits/chosen": -1.5055395364761353, "logits/rejected": -1.4872915744781494, "logps/chosen": -170.01864624023438, "logps/rejected": -283.79266357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.440652847290039, "rewards/margins": 12.669122695922852, "rewards/rejected": -21.10977554321289, "step": 5279 }, { "epoch": 8.48, "learning_rate": 3.240190249702734e-08, "logits/chosen": -1.5183337926864624, "logits/rejected": -1.5557817220687866, "logps/chosen": -146.268310546875, "logps/rejected": -281.91796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.824717998504639, "rewards/margins": 13.495906829833984, "rewards/rejected": -20.32062530517578, "step": 5280 }, { "epoch": 8.48, "learning_rate": 3.230281411018628e-08, "logits/chosen": -1.5488252639770508, "logits/rejected": -1.5374385118484497, "logps/chosen": -162.37452697753906, "logps/rejected": -323.90362548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.974320888519287, "rewards/margins": 16.99317741394043, "rewards/rejected": -22.967498779296875, "step": 5281 }, { "epoch": 8.48, "learning_rate": 3.220372572334522e-08, "logits/chosen": -1.3706458806991577, "logits/rejected": -1.3929518461227417, "logps/chosen": -133.4015350341797, "logps/rejected": -311.0599365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.546018600463867, "rewards/margins": 16.59040069580078, "rewards/rejected": -22.13641929626465, "step": 5282 }, { "epoch": 8.48, "learning_rate": 3.210463733650416e-08, "logits/chosen": -1.4392982721328735, "logits/rejected": -1.6263833045959473, "logps/chosen": -125.22857666015625, "logps/rejected": -303.8836669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.116809844970703, "rewards/margins": 13.589061737060547, "rewards/rejected": -18.70587158203125, "step": 5283 }, { "epoch": 8.48, "learning_rate": 3.2005548949663096e-08, "logits/chosen": -1.613411784172058, "logits/rejected": -1.6499463319778442, "logps/chosen": -151.08807373046875, "logps/rejected": -328.4481506347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.604355335235596, "rewards/margins": 15.575393676757812, "rewards/rejected": -23.179750442504883, "step": 5284 }, { "epoch": 8.48, "learning_rate": 3.1906460562822036e-08, "logits/chosen": -1.5024439096450806, "logits/rejected": -1.5183054208755493, "logps/chosen": -153.01881408691406, "logps/rejected": -286.07354736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.479766845703125, "rewards/margins": 12.720052719116211, "rewards/rejected": -19.199819564819336, "step": 5285 }, { "epoch": 8.48, "learning_rate": 3.180737217598097e-08, "logits/chosen": -1.2655105590820312, "logits/rejected": -1.3786059617996216, "logps/chosen": -126.8326187133789, "logps/rejected": -318.4454650878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.387579441070557, "rewards/margins": 15.695318222045898, "rewards/rejected": -21.082897186279297, "step": 5286 }, { "epoch": 8.49, "learning_rate": 3.170828378913991e-08, "logits/chosen": -1.4749244451522827, "logits/rejected": -1.4553438425064087, "logps/chosen": -139.03369140625, "logps/rejected": -301.682373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.121720790863037, "rewards/margins": 17.284679412841797, "rewards/rejected": -22.406400680541992, "step": 5287 }, { "epoch": 8.49, "learning_rate": 3.1609195402298855e-08, "logits/chosen": -1.3963615894317627, "logits/rejected": -1.3579996824264526, "logps/chosen": -228.40176391601562, "logps/rejected": -327.9131164550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.890525817871094, "rewards/margins": 11.006903648376465, "rewards/rejected": -22.897428512573242, "step": 5288 }, { "epoch": 8.49, "learning_rate": 3.151010701545779e-08, "logits/chosen": -1.4673823118209839, "logits/rejected": -1.4762057065963745, "logps/chosen": -182.8981170654297, "logps/rejected": -333.2408447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.263812065124512, "rewards/margins": 15.405882835388184, "rewards/rejected": -25.669696807861328, "step": 5289 }, { "epoch": 8.49, "learning_rate": 3.141101862861673e-08, "logits/chosen": -1.57244873046875, "logits/rejected": -1.4905040264129639, "logps/chosen": -174.80377197265625, "logps/rejected": -312.9595642089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.74770975112915, "rewards/margins": 14.778083801269531, "rewards/rejected": -22.525793075561523, "step": 5290 }, { "epoch": 8.49, "learning_rate": 3.131193024177566e-08, "logits/chosen": -1.436783790588379, "logits/rejected": -1.454984188079834, "logps/chosen": -160.1622314453125, "logps/rejected": -280.0646057128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.099636077880859, "rewards/margins": 11.83138656616211, "rewards/rejected": -18.93102264404297, "step": 5291 }, { "epoch": 8.49, "learning_rate": 3.12128418549346e-08, "logits/chosen": -1.4559252262115479, "logits/rejected": -1.4469587802886963, "logps/chosen": -155.05364990234375, "logps/rejected": -275.8875732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.305416107177734, "rewards/margins": 11.933229446411133, "rewards/rejected": -18.238643646240234, "step": 5292 }, { "epoch": 8.5, "learning_rate": 3.111375346809354e-08, "logits/chosen": -1.2877352237701416, "logits/rejected": -1.3325095176696777, "logps/chosen": -176.763671875, "logps/rejected": -346.1600036621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.831888198852539, "rewards/margins": 13.47097396850586, "rewards/rejected": -23.3028621673584, "step": 5293 }, { "epoch": 8.5, "learning_rate": 3.1014665081252475e-08, "logits/chosen": -1.5829613208770752, "logits/rejected": -1.5854076147079468, "logps/chosen": -156.02044677734375, "logps/rejected": -295.3919372558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.817516803741455, "rewards/margins": 14.865289688110352, "rewards/rejected": -22.68280792236328, "step": 5294 }, { "epoch": 8.5, "learning_rate": 3.0915576694411415e-08, "logits/chosen": -1.5560623407363892, "logits/rejected": -1.5792150497436523, "logps/chosen": -147.4964599609375, "logps/rejected": -321.537353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.483662128448486, "rewards/margins": 15.627803802490234, "rewards/rejected": -23.111465454101562, "step": 5295 }, { "epoch": 8.5, "learning_rate": 3.0816488307570355e-08, "logits/chosen": -1.3383984565734863, "logits/rejected": -1.34153413772583, "logps/chosen": -159.6108856201172, "logps/rejected": -310.8880310058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9567036628723145, "rewards/margins": 14.363517761230469, "rewards/rejected": -22.320220947265625, "step": 5296 }, { "epoch": 8.5, "learning_rate": 3.071739992072929e-08, "logits/chosen": -1.5884387493133545, "logits/rejected": -1.530075192451477, "logps/chosen": -133.04588317871094, "logps/rejected": -246.40078735351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2665300369262695, "rewards/margins": 12.690102577209473, "rewards/rejected": -15.956633567810059, "step": 5297 }, { "epoch": 8.5, "learning_rate": 3.061831153388823e-08, "logits/chosen": -1.4160763025283813, "logits/rejected": -1.3645884990692139, "logps/chosen": -183.54364013671875, "logps/rejected": -317.9031066894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.60521411895752, "rewards/margins": 13.707200050354004, "rewards/rejected": -22.312414169311523, "step": 5298 }, { "epoch": 8.51, "learning_rate": 3.051922314704717e-08, "logits/chosen": -1.4247241020202637, "logits/rejected": -1.4499180316925049, "logps/chosen": -185.49659729003906, "logps/rejected": -323.62738037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.65927791595459, "rewards/margins": 13.500141143798828, "rewards/rejected": -24.1594181060791, "step": 5299 }, { "epoch": 8.51, "learning_rate": 3.04201347602061e-08, "logits/chosen": -1.3241069316864014, "logits/rejected": -1.3838847875595093, "logps/chosen": -137.77255249023438, "logps/rejected": -239.1744842529297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.953025817871094, "rewards/margins": 10.195388793945312, "rewards/rejected": -16.148414611816406, "step": 5300 }, { "epoch": 8.51, "learning_rate": 3.032104637336504e-08, "logits/chosen": -1.2676341533660889, "logits/rejected": -1.3638372421264648, "logps/chosen": -201.07879638671875, "logps/rejected": -357.8717041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.245229721069336, "rewards/margins": 13.346054077148438, "rewards/rejected": -25.59128189086914, "step": 5301 }, { "epoch": 8.51, "learning_rate": 3.0221957986523975e-08, "logits/chosen": -1.5287601947784424, "logits/rejected": -1.3903735876083374, "logps/chosen": -148.32960510253906, "logps/rejected": -283.9595947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2362060546875, "rewards/margins": 15.048372268676758, "rewards/rejected": -22.284578323364258, "step": 5302 }, { "epoch": 8.51, "learning_rate": 3.0122869599682914e-08, "logits/chosen": -1.5863018035888672, "logits/rejected": -1.4737548828125, "logps/chosen": -142.0721435546875, "logps/rejected": -280.8223571777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.60758113861084, "rewards/margins": 14.054903030395508, "rewards/rejected": -18.662485122680664, "step": 5303 }, { "epoch": 8.51, "learning_rate": 3.0023781212841854e-08, "logits/chosen": -1.569753646850586, "logits/rejected": -1.6052526235580444, "logps/chosen": -142.28759765625, "logps/rejected": -282.66461181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.251734733581543, "rewards/margins": 12.034758567810059, "rewards/rejected": -18.2864933013916, "step": 5304 }, { "epoch": 8.52, "learning_rate": 2.992469282600079e-08, "logits/chosen": -1.4601396322250366, "logits/rejected": -1.498223900794983, "logps/chosen": -209.66970825195312, "logps/rejected": -355.26446533203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.724623680114746, "rewards/margins": 15.446722030639648, "rewards/rejected": -25.171348571777344, "step": 5305 }, { "epoch": 8.52, "learning_rate": 2.982560443915973e-08, "logits/chosen": -1.4334731101989746, "logits/rejected": -1.340925931930542, "logps/chosen": -187.26287841796875, "logps/rejected": -301.74932861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.62242317199707, "rewards/margins": 13.84769058227539, "rewards/rejected": -21.470111846923828, "step": 5306 }, { "epoch": 8.52, "learning_rate": 2.9726516052318668e-08, "logits/chosen": -1.4139223098754883, "logits/rejected": -1.3967254161834717, "logps/chosen": -172.67572021484375, "logps/rejected": -301.9642333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.542247772216797, "rewards/margins": 14.454609870910645, "rewards/rejected": -21.996858596801758, "step": 5307 }, { "epoch": 8.52, "learning_rate": 2.9627427665477604e-08, "logits/chosen": -1.257135033607483, "logits/rejected": -1.3382675647735596, "logps/chosen": -200.52813720703125, "logps/rejected": -341.8114013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.910919189453125, "rewards/margins": 14.416033744812012, "rewards/rejected": -25.326953887939453, "step": 5308 }, { "epoch": 8.52, "learning_rate": 2.9528339278636544e-08, "logits/chosen": -1.5355281829833984, "logits/rejected": -1.5197689533233643, "logps/chosen": -184.58172607421875, "logps/rejected": -320.15869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.406135559082031, "rewards/margins": 13.743247985839844, "rewards/rejected": -22.149381637573242, "step": 5309 }, { "epoch": 8.52, "learning_rate": 2.942925089179548e-08, "logits/chosen": -1.5456422567367554, "logits/rejected": -1.4861034154891968, "logps/chosen": -192.066650390625, "logps/rejected": -303.46527099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.090340614318848, "rewards/margins": 11.113665580749512, "rewards/rejected": -20.20400619506836, "step": 5310 }, { "epoch": 8.52, "learning_rate": 2.9330162504954417e-08, "logits/chosen": -1.2441223859786987, "logits/rejected": -1.3233139514923096, "logps/chosen": -124.32218170166016, "logps/rejected": -288.00579833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.10972785949707, "rewards/margins": 14.956622123718262, "rewards/rejected": -22.066349029541016, "step": 5311 }, { "epoch": 8.53, "learning_rate": 2.9231074118113357e-08, "logits/chosen": -1.609637975692749, "logits/rejected": -1.5811042785644531, "logps/chosen": -161.30661010742188, "logps/rejected": -304.3509521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.190261840820312, "rewards/margins": 14.63977336883545, "rewards/rejected": -22.830036163330078, "step": 5312 }, { "epoch": 8.53, "learning_rate": 2.9131985731272294e-08, "logits/chosen": -1.5359563827514648, "logits/rejected": -1.4793047904968262, "logps/chosen": -143.94017028808594, "logps/rejected": -246.0155029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.811089992523193, "rewards/margins": 12.697280883789062, "rewards/rejected": -17.508371353149414, "step": 5313 }, { "epoch": 8.53, "learning_rate": 2.903289734443123e-08, "logits/chosen": -1.4869608879089355, "logits/rejected": -1.5062944889068604, "logps/chosen": -205.3507080078125, "logps/rejected": -378.263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.85128116607666, "rewards/margins": 15.265522956848145, "rewards/rejected": -26.116804122924805, "step": 5314 }, { "epoch": 8.53, "learning_rate": 2.8933808957590167e-08, "logits/chosen": -1.474402904510498, "logits/rejected": -1.4510655403137207, "logps/chosen": -156.89437866210938, "logps/rejected": -314.3916320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.406707286834717, "rewards/margins": 14.190284729003906, "rewards/rejected": -21.59699249267578, "step": 5315 }, { "epoch": 8.53, "learning_rate": 2.8834720570749107e-08, "logits/chosen": -1.300208330154419, "logits/rejected": -1.3384495973587036, "logps/chosen": -153.16539001464844, "logps/rejected": -331.3764343261719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.708973407745361, "rewards/margins": 14.941812515258789, "rewards/rejected": -22.650787353515625, "step": 5316 }, { "epoch": 8.53, "learning_rate": 2.8735632183908043e-08, "logits/chosen": -1.5049374103546143, "logits/rejected": -1.4860382080078125, "logps/chosen": -143.37864685058594, "logps/rejected": -333.35089111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.017789363861084, "rewards/margins": 16.98884391784668, "rewards/rejected": -24.006633758544922, "step": 5317 }, { "epoch": 8.54, "learning_rate": 2.863654379706698e-08, "logits/chosen": -1.5117892026901245, "logits/rejected": -1.4568637609481812, "logps/chosen": -149.87579345703125, "logps/rejected": -230.55477905273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.554865837097168, "rewards/margins": 9.966217994689941, "rewards/rejected": -16.52108383178711, "step": 5318 }, { "epoch": 8.54, "learning_rate": 2.853745541022592e-08, "logits/chosen": -1.444486141204834, "logits/rejected": -1.4373953342437744, "logps/chosen": -176.1895751953125, "logps/rejected": -298.9335632324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.927757263183594, "rewards/margins": 12.629419326782227, "rewards/rejected": -21.55717658996582, "step": 5319 }, { "epoch": 8.54, "learning_rate": 2.8438367023384857e-08, "logits/chosen": -1.457320213317871, "logits/rejected": -1.501973032951355, "logps/chosen": -182.3843994140625, "logps/rejected": -350.89111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.802604675292969, "rewards/margins": 15.620682716369629, "rewards/rejected": -25.42328643798828, "step": 5320 }, { "epoch": 8.54, "learning_rate": 2.8339278636543793e-08, "logits/chosen": -1.5452220439910889, "logits/rejected": -1.549513339996338, "logps/chosen": -103.06615447998047, "logps/rejected": -269.7901611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5740480422973633, "rewards/margins": 16.437833786010742, "rewards/rejected": -20.01188087463379, "step": 5321 }, { "epoch": 8.54, "learning_rate": 2.8240190249702733e-08, "logits/chosen": -1.497855305671692, "logits/rejected": -1.4981880187988281, "logps/chosen": -192.87225341796875, "logps/rejected": -340.9180908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.363377571105957, "rewards/margins": 16.250537872314453, "rewards/rejected": -25.613914489746094, "step": 5322 }, { "epoch": 8.54, "learning_rate": 2.8141101862861673e-08, "logits/chosen": -1.6020822525024414, "logits/rejected": -1.6253806352615356, "logps/chosen": -151.6761474609375, "logps/rejected": -356.7997131347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.516665458679199, "rewards/margins": 18.157249450683594, "rewards/rejected": -24.67391586303711, "step": 5323 }, { "epoch": 8.55, "learning_rate": 2.804201347602061e-08, "logits/chosen": -1.455428957939148, "logits/rejected": -1.4456976652145386, "logps/chosen": -112.84719848632812, "logps/rejected": -234.60345458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.896343231201172, "rewards/margins": 11.796037673950195, "rewards/rejected": -16.692380905151367, "step": 5324 }, { "epoch": 8.55, "learning_rate": 2.794292508917955e-08, "logits/chosen": -1.3105742931365967, "logits/rejected": -1.3650133609771729, "logps/chosen": -136.65003967285156, "logps/rejected": -254.03179931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.534463405609131, "rewards/margins": 10.589797019958496, "rewards/rejected": -17.12425994873047, "step": 5325 }, { "epoch": 8.55, "learning_rate": 2.7843836702338486e-08, "logits/chosen": -1.4856257438659668, "logits/rejected": -1.3440372943878174, "logps/chosen": -173.63877868652344, "logps/rejected": -299.33905029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.506446361541748, "rewards/margins": 15.080118179321289, "rewards/rejected": -22.586565017700195, "step": 5326 }, { "epoch": 8.55, "learning_rate": 2.7744748315497423e-08, "logits/chosen": -1.6603140830993652, "logits/rejected": -1.621292233467102, "logps/chosen": -158.8105010986328, "logps/rejected": -260.1262512207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.53774356842041, "rewards/margins": 11.541341781616211, "rewards/rejected": -19.079086303710938, "step": 5327 }, { "epoch": 8.55, "learning_rate": 2.7645659928656363e-08, "logits/chosen": -1.5368571281433105, "logits/rejected": -1.4936940670013428, "logps/chosen": -198.28060913085938, "logps/rejected": -350.18701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.879422187805176, "rewards/margins": 15.236845016479492, "rewards/rejected": -27.116268157958984, "step": 5328 }, { "epoch": 8.55, "learning_rate": 2.75465715418153e-08, "logits/chosen": -1.4456475973129272, "logits/rejected": -1.4464054107666016, "logps/chosen": -175.69000244140625, "logps/rejected": -308.7908935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.185356140136719, "rewards/margins": 15.754175186157227, "rewards/rejected": -22.939531326293945, "step": 5329 }, { "epoch": 8.56, "learning_rate": 2.7447483154974236e-08, "logits/chosen": -1.6373707056045532, "logits/rejected": -1.6571340560913086, "logps/chosen": -148.97531127929688, "logps/rejected": -263.9482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.391737461090088, "rewards/margins": 12.460760116577148, "rewards/rejected": -18.852497100830078, "step": 5330 }, { "epoch": 8.56, "learning_rate": 2.7348394768133173e-08, "logits/chosen": -1.3494153022766113, "logits/rejected": -1.3698318004608154, "logps/chosen": -151.35052490234375, "logps/rejected": -293.3700256347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.013472557067871, "rewards/margins": 14.061891555786133, "rewards/rejected": -22.075363159179688, "step": 5331 }, { "epoch": 8.56, "learning_rate": 2.7249306381292112e-08, "logits/chosen": -1.4712655544281006, "logits/rejected": -1.4885873794555664, "logps/chosen": -184.94256591796875, "logps/rejected": -326.4329833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.715487480163574, "rewards/margins": 14.819951057434082, "rewards/rejected": -24.535438537597656, "step": 5332 }, { "epoch": 8.56, "learning_rate": 2.715021799445105e-08, "logits/chosen": -1.4161916971206665, "logits/rejected": -1.4481312036514282, "logps/chosen": -148.6622314453125, "logps/rejected": -285.2359924316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.4013090133667, "rewards/margins": 12.715184211730957, "rewards/rejected": -21.116493225097656, "step": 5333 }, { "epoch": 8.56, "learning_rate": 2.7051129607609986e-08, "logits/chosen": -1.449228048324585, "logits/rejected": -1.5207549333572388, "logps/chosen": -185.97470092773438, "logps/rejected": -307.57464599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.617990970611572, "rewards/margins": 14.06053638458252, "rewards/rejected": -20.67852783203125, "step": 5334 }, { "epoch": 8.56, "learning_rate": 2.6952041220768926e-08, "logits/chosen": -1.525787115097046, "logits/rejected": -1.5952388048171997, "logps/chosen": -129.30865478515625, "logps/rejected": -289.1247253417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.446216583251953, "rewards/margins": 14.339208602905273, "rewards/rejected": -20.785425186157227, "step": 5335 }, { "epoch": 8.57, "learning_rate": 2.6852952833927862e-08, "logits/chosen": -1.4558258056640625, "logits/rejected": -1.491516351699829, "logps/chosen": -174.3102569580078, "logps/rejected": -328.10162353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.144646644592285, "rewards/margins": 13.971656799316406, "rewards/rejected": -24.116304397583008, "step": 5336 }, { "epoch": 8.57, "learning_rate": 2.67538644470868e-08, "logits/chosen": -1.4059357643127441, "logits/rejected": -1.4465179443359375, "logps/chosen": -133.94570922851562, "logps/rejected": -291.1237487792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8507466316223145, "rewards/margins": 13.945453643798828, "rewards/rejected": -20.796199798583984, "step": 5337 }, { "epoch": 8.57, "learning_rate": 2.665477606024574e-08, "logits/chosen": -1.4161136150360107, "logits/rejected": -1.4050191640853882, "logps/chosen": -177.13253784179688, "logps/rejected": -291.8352355957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.949589729309082, "rewards/margins": 13.923676490783691, "rewards/rejected": -21.873266220092773, "step": 5338 }, { "epoch": 8.57, "learning_rate": 2.6555687673404675e-08, "logits/chosen": -1.5811034440994263, "logits/rejected": -1.6446846723556519, "logps/chosen": -149.21510314941406, "logps/rejected": -254.60293579101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.169478893280029, "rewards/margins": 10.767034530639648, "rewards/rejected": -16.936513900756836, "step": 5339 }, { "epoch": 8.57, "learning_rate": 2.6456599286563612e-08, "logits/chosen": -1.6126396656036377, "logits/rejected": -1.639028787612915, "logps/chosen": -122.22483825683594, "logps/rejected": -343.1899719238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.837463617324829, "rewards/margins": 22.26229476928711, "rewards/rejected": -26.099760055541992, "step": 5340 }, { "epoch": 8.57, "learning_rate": 2.6357510899722552e-08, "logits/chosen": -1.4560182094573975, "logits/rejected": -1.4611486196517944, "logps/chosen": -189.0106201171875, "logps/rejected": -345.90045166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.175492286682129, "rewards/margins": 14.048584938049316, "rewards/rejected": -23.224079132080078, "step": 5341 }, { "epoch": 8.57, "learning_rate": 2.625842251288149e-08, "logits/chosen": -1.4171768426895142, "logits/rejected": -1.4787187576293945, "logps/chosen": -173.33970642089844, "logps/rejected": -314.5396423339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.783740997314453, "rewards/margins": 14.352156639099121, "rewards/rejected": -22.13589859008789, "step": 5342 }, { "epoch": 8.58, "learning_rate": 2.6159334126040425e-08, "logits/chosen": -1.6631457805633545, "logits/rejected": -1.6889064311981201, "logps/chosen": -189.94525146484375, "logps/rejected": -305.4121398925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.514226913452148, "rewards/margins": 14.296371459960938, "rewards/rejected": -20.810598373413086, "step": 5343 }, { "epoch": 8.58, "learning_rate": 2.606024573919936e-08, "logits/chosen": -1.4028581380844116, "logits/rejected": -1.5513808727264404, "logps/chosen": -154.04302978515625, "logps/rejected": -363.2364196777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.899682998657227, "rewards/margins": 15.675442695617676, "rewards/rejected": -23.57512664794922, "step": 5344 }, { "epoch": 8.58, "learning_rate": 2.59611573523583e-08, "logits/chosen": -1.5182373523712158, "logits/rejected": -1.5643322467803955, "logps/chosen": -139.8280792236328, "logps/rejected": -308.3028564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.419684886932373, "rewards/margins": 15.564657211303711, "rewards/rejected": -22.984342575073242, "step": 5345 }, { "epoch": 8.58, "learning_rate": 2.586206896551724e-08, "logits/chosen": -1.3556487560272217, "logits/rejected": -1.4041593074798584, "logps/chosen": -179.55807495117188, "logps/rejected": -256.1380920410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.479679107666016, "rewards/margins": 9.009721755981445, "rewards/rejected": -18.48940086364746, "step": 5346 }, { "epoch": 8.58, "learning_rate": 2.5762980578676178e-08, "logits/chosen": -1.438423752784729, "logits/rejected": -1.5021463632583618, "logps/chosen": -124.65530395507812, "logps/rejected": -329.4876708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.844111442565918, "rewards/margins": 17.670217514038086, "rewards/rejected": -23.51432991027832, "step": 5347 }, { "epoch": 8.58, "learning_rate": 2.5663892191835118e-08, "logits/chosen": -1.4445135593414307, "logits/rejected": -1.4502590894699097, "logps/chosen": -127.3590316772461, "logps/rejected": -309.8711853027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.838980674743652, "rewards/margins": 17.33839988708496, "rewards/rejected": -22.17738151550293, "step": 5348 }, { "epoch": 8.59, "learning_rate": 2.5564803804994055e-08, "logits/chosen": -1.5463895797729492, "logits/rejected": -1.446671724319458, "logps/chosen": -151.12588500976562, "logps/rejected": -264.6460266113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.093775749206543, "rewards/margins": 12.63621711730957, "rewards/rejected": -18.729991912841797, "step": 5349 }, { "epoch": 8.59, "learning_rate": 2.546571541815299e-08, "logits/chosen": -1.5445969104766846, "logits/rejected": -1.6242945194244385, "logps/chosen": -145.60572814941406, "logps/rejected": -325.014404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6104607582092285, "rewards/margins": 15.057937622070312, "rewards/rejected": -21.668397903442383, "step": 5350 }, { "epoch": 8.59, "learning_rate": 2.536662703131193e-08, "logits/chosen": -1.4595754146575928, "logits/rejected": -1.464603066444397, "logps/chosen": -169.99652099609375, "logps/rejected": -303.6057434082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.481922149658203, "rewards/margins": 14.053696632385254, "rewards/rejected": -21.53561782836914, "step": 5351 }, { "epoch": 8.59, "learning_rate": 2.5267538644470868e-08, "logits/chosen": -1.4705016613006592, "logits/rejected": -1.5546985864639282, "logps/chosen": -150.09193420410156, "logps/rejected": -311.1763916015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.911481857299805, "rewards/margins": 15.489324569702148, "rewards/rejected": -22.400806427001953, "step": 5352 }, { "epoch": 8.59, "learning_rate": 2.5168450257629804e-08, "logits/chosen": -1.327255129814148, "logits/rejected": -1.3192100524902344, "logps/chosen": -186.3140869140625, "logps/rejected": -326.7289123535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.561616897583008, "rewards/margins": 14.925683975219727, "rewards/rejected": -24.487300872802734, "step": 5353 }, { "epoch": 8.59, "learning_rate": 2.5069361870788744e-08, "logits/chosen": -1.3725125789642334, "logits/rejected": -1.4012531042099, "logps/chosen": -147.37391662597656, "logps/rejected": -292.7457275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.230949878692627, "rewards/margins": 13.747657775878906, "rewards/rejected": -20.978607177734375, "step": 5354 }, { "epoch": 8.6, "learning_rate": 2.497027348394768e-08, "logits/chosen": -1.5507032871246338, "logits/rejected": -1.5826445817947388, "logps/chosen": -191.09640502929688, "logps/rejected": -314.06878662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.122461318969727, "rewards/margins": 13.332747459411621, "rewards/rejected": -23.455209732055664, "step": 5355 }, { "epoch": 8.6, "learning_rate": 2.4871185097106617e-08, "logits/chosen": -1.4001367092132568, "logits/rejected": -1.385588526725769, "logps/chosen": -189.4335479736328, "logps/rejected": -343.1103820800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.374775886535645, "rewards/margins": 14.609548568725586, "rewards/rejected": -24.984323501586914, "step": 5356 }, { "epoch": 8.6, "learning_rate": 2.4772096710265557e-08, "logits/chosen": -1.5781500339508057, "logits/rejected": -1.6264827251434326, "logps/chosen": -107.07189178466797, "logps/rejected": -297.6793212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.524516582489014, "rewards/margins": 15.93104362487793, "rewards/rejected": -20.45555877685547, "step": 5357 }, { "epoch": 8.6, "learning_rate": 2.4673008323424494e-08, "logits/chosen": -1.5420665740966797, "logits/rejected": -1.5506874322891235, "logps/chosen": -127.18698120117188, "logps/rejected": -325.80718994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.931512355804443, "rewards/margins": 18.012338638305664, "rewards/rejected": -22.943851470947266, "step": 5358 }, { "epoch": 8.6, "learning_rate": 2.457391993658343e-08, "logits/chosen": -1.5057554244995117, "logits/rejected": -1.5669517517089844, "logps/chosen": -151.4854278564453, "logps/rejected": -312.54058837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.736536979675293, "rewards/margins": 14.653890609741211, "rewards/rejected": -21.390426635742188, "step": 5359 }, { "epoch": 8.6, "learning_rate": 2.4474831549742367e-08, "logits/chosen": -1.6885393857955933, "logits/rejected": -1.6867952346801758, "logps/chosen": -110.49950408935547, "logps/rejected": -238.9984130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.738239049911499, "rewards/margins": 13.492115020751953, "rewards/rejected": -17.23035430908203, "step": 5360 }, { "epoch": 8.61, "learning_rate": 2.4375743162901307e-08, "logits/chosen": -1.5343379974365234, "logits/rejected": -1.5779157876968384, "logps/chosen": -144.8140869140625, "logps/rejected": -320.1214294433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.915621757507324, "rewards/margins": 16.665185928344727, "rewards/rejected": -23.580806732177734, "step": 5361 }, { "epoch": 8.61, "learning_rate": 2.4276654776060244e-08, "logits/chosen": -1.350724220275879, "logits/rejected": -1.3347891569137573, "logps/chosen": -154.35902404785156, "logps/rejected": -295.4147644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9923095703125, "rewards/margins": 14.798417091369629, "rewards/rejected": -22.790725708007812, "step": 5362 }, { "epoch": 8.61, "learning_rate": 2.417756638921918e-08, "logits/chosen": -1.5498409271240234, "logits/rejected": -1.5183720588684082, "logps/chosen": -151.90362548828125, "logps/rejected": -320.0537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.150221824645996, "rewards/margins": 15.93820571899414, "rewards/rejected": -23.088428497314453, "step": 5363 }, { "epoch": 8.61, "learning_rate": 2.407847800237812e-08, "logits/chosen": -1.4356343746185303, "logits/rejected": -1.422674536705017, "logps/chosen": -140.69793701171875, "logps/rejected": -318.1617736816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.633386611938477, "rewards/margins": 17.060449600219727, "rewards/rejected": -22.693836212158203, "step": 5364 }, { "epoch": 8.61, "learning_rate": 2.3979389615537057e-08, "logits/chosen": -1.7460296154022217, "logits/rejected": -1.6841731071472168, "logps/chosen": -183.9495849609375, "logps/rejected": -288.3733825683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.479047775268555, "rewards/margins": 12.009764671325684, "rewards/rejected": -20.488813400268555, "step": 5365 }, { "epoch": 8.61, "learning_rate": 2.3880301228695993e-08, "logits/chosen": -1.4182606935501099, "logits/rejected": -1.3940268754959106, "logps/chosen": -192.3302459716797, "logps/rejected": -330.31011962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.414491653442383, "rewards/margins": 13.323473930358887, "rewards/rejected": -23.737964630126953, "step": 5366 }, { "epoch": 8.61, "learning_rate": 2.3781212841854933e-08, "logits/chosen": -1.4237298965454102, "logits/rejected": -1.4595540761947632, "logps/chosen": -192.94329833984375, "logps/rejected": -342.6048278808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.63095235824585, "rewards/margins": 16.023216247558594, "rewards/rejected": -23.65416717529297, "step": 5367 }, { "epoch": 8.62, "learning_rate": 2.368212445501387e-08, "logits/chosen": -1.4925380945205688, "logits/rejected": -1.4754083156585693, "logps/chosen": -138.5514373779297, "logps/rejected": -247.54693603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4846954345703125, "rewards/margins": 11.605632781982422, "rewards/rejected": -18.090328216552734, "step": 5368 }, { "epoch": 8.62, "learning_rate": 2.358303606817281e-08, "logits/chosen": -1.2720502614974976, "logits/rejected": -1.2640637159347534, "logps/chosen": -176.77548217773438, "logps/rejected": -333.67779541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.369275093078613, "rewards/margins": 14.81212043762207, "rewards/rejected": -24.181394577026367, "step": 5369 }, { "epoch": 8.62, "learning_rate": 2.348394768133175e-08, "logits/chosen": -1.398460865020752, "logits/rejected": -1.4351037740707397, "logps/chosen": -192.73634338378906, "logps/rejected": -358.3936462402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.989845275878906, "rewards/margins": 15.684565544128418, "rewards/rejected": -25.674409866333008, "step": 5370 }, { "epoch": 8.62, "learning_rate": 2.3384859294490686e-08, "logits/chosen": -1.4406020641326904, "logits/rejected": -1.4983959197998047, "logps/chosen": -142.67947387695312, "logps/rejected": -307.992431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7846198081970215, "rewards/margins": 13.613470077514648, "rewards/rejected": -19.398090362548828, "step": 5371 }, { "epoch": 8.62, "learning_rate": 2.3285770907649623e-08, "logits/chosen": -1.5201971530914307, "logits/rejected": -1.5204408168792725, "logps/chosen": -92.43046569824219, "logps/rejected": -260.2508239746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.104915618896484, "rewards/margins": 14.969611167907715, "rewards/rejected": -19.074527740478516, "step": 5372 }, { "epoch": 8.62, "learning_rate": 2.318668252080856e-08, "logits/chosen": -1.570996880531311, "logits/rejected": -1.5043530464172363, "logps/chosen": -105.13887023925781, "logps/rejected": -265.59002685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5006332397460938, "rewards/margins": 16.251750946044922, "rewards/rejected": -19.752384185791016, "step": 5373 }, { "epoch": 8.63, "learning_rate": 2.30875941339675e-08, "logits/chosen": -1.3687586784362793, "logits/rejected": -1.387278437614441, "logps/chosen": -154.98455810546875, "logps/rejected": -299.95672607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.355508804321289, "rewards/margins": 14.531704902648926, "rewards/rejected": -21.88721466064453, "step": 5374 }, { "epoch": 8.63, "learning_rate": 2.2988505747126436e-08, "logits/chosen": -1.581653356552124, "logits/rejected": -1.6940927505493164, "logps/chosen": -102.49609375, "logps/rejected": -295.60711669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.051746845245361, "rewards/margins": 16.530868530273438, "rewards/rejected": -20.58261489868164, "step": 5375 }, { "epoch": 8.63, "learning_rate": 2.2889417360285373e-08, "logits/chosen": -1.3821258544921875, "logits/rejected": -1.449644684791565, "logps/chosen": -153.39483642578125, "logps/rejected": -314.54534912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.683879375457764, "rewards/margins": 14.320371627807617, "rewards/rejected": -21.00425148010254, "step": 5376 }, { "epoch": 8.63, "learning_rate": 2.2790328973444313e-08, "logits/chosen": -1.4055495262145996, "logits/rejected": -1.3504836559295654, "logps/chosen": -198.59628295898438, "logps/rejected": -315.4260559082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.824270248413086, "rewards/margins": 12.801178932189941, "rewards/rejected": -23.625450134277344, "step": 5377 }, { "epoch": 8.63, "learning_rate": 2.269124058660325e-08, "logits/chosen": -1.4928538799285889, "logits/rejected": -1.4539557695388794, "logps/chosen": -178.90550231933594, "logps/rejected": -304.42657470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.62002182006836, "rewards/margins": 13.678658485412598, "rewards/rejected": -22.29867935180664, "step": 5378 }, { "epoch": 8.63, "learning_rate": 2.2592152199762186e-08, "logits/chosen": -1.4649628400802612, "logits/rejected": -1.4860939979553223, "logps/chosen": -216.16201782226562, "logps/rejected": -326.2729797363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.439777374267578, "rewards/margins": 12.445284843444824, "rewards/rejected": -23.885061264038086, "step": 5379 }, { "epoch": 8.64, "learning_rate": 2.2493063812921126e-08, "logits/chosen": -1.4282879829406738, "logits/rejected": -1.486496925354004, "logps/chosen": -177.47610473632812, "logps/rejected": -325.82806396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.224937438964844, "rewards/margins": 15.76201057434082, "rewards/rejected": -23.986948013305664, "step": 5380 }, { "epoch": 8.64, "learning_rate": 2.2393975426080062e-08, "logits/chosen": -1.5253872871398926, "logits/rejected": -1.4889881610870361, "logps/chosen": -146.39993286132812, "logps/rejected": -312.7686462402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.283271312713623, "rewards/margins": 17.400283813476562, "rewards/rejected": -23.683555603027344, "step": 5381 }, { "epoch": 8.64, "learning_rate": 2.2294887039239e-08, "logits/chosen": -1.3739409446716309, "logits/rejected": -1.3800280094146729, "logps/chosen": -144.0288848876953, "logps/rejected": -323.41851806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.348338603973389, "rewards/margins": 17.14202308654785, "rewards/rejected": -23.4903621673584, "step": 5382 }, { "epoch": 8.64, "learning_rate": 2.219579865239794e-08, "logits/chosen": -1.4577126502990723, "logits/rejected": -1.4336931705474854, "logps/chosen": -166.07357788085938, "logps/rejected": -302.9776611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.884833335876465, "rewards/margins": 14.11009407043457, "rewards/rejected": -22.99492645263672, "step": 5383 }, { "epoch": 8.64, "learning_rate": 2.2096710265556876e-08, "logits/chosen": -1.3703802824020386, "logits/rejected": -1.5221235752105713, "logps/chosen": -141.48550415039062, "logps/rejected": -291.4110107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.26084566116333, "rewards/margins": 13.141111373901367, "rewards/rejected": -18.40195655822754, "step": 5384 }, { "epoch": 8.64, "learning_rate": 2.1997621878715812e-08, "logits/chosen": -1.337878942489624, "logits/rejected": -1.3832314014434814, "logps/chosen": -167.31829833984375, "logps/rejected": -324.15179443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.515613555908203, "rewards/margins": 14.695659637451172, "rewards/rejected": -23.211273193359375, "step": 5385 }, { "epoch": 8.65, "learning_rate": 2.1898533491874752e-08, "logits/chosen": -1.3303240537643433, "logits/rejected": -1.3096201419830322, "logps/chosen": -151.6603546142578, "logps/rejected": -336.8458251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.609926223754883, "rewards/margins": 18.21916389465332, "rewards/rejected": -25.829092025756836, "step": 5386 }, { "epoch": 8.65, "learning_rate": 2.179944510503369e-08, "logits/chosen": -1.4761427640914917, "logits/rejected": -1.4257992506027222, "logps/chosen": -162.93414306640625, "logps/rejected": -282.4807434082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.215331077575684, "rewards/margins": 12.161579132080078, "rewards/rejected": -20.376911163330078, "step": 5387 }, { "epoch": 8.65, "learning_rate": 2.1700356718192625e-08, "logits/chosen": -1.6836434602737427, "logits/rejected": -1.6146459579467773, "logps/chosen": -199.28614807128906, "logps/rejected": -335.4100341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.920113563537598, "rewards/margins": 14.349318504333496, "rewards/rejected": -23.269432067871094, "step": 5388 }, { "epoch": 8.65, "learning_rate": 2.1601268331351562e-08, "logits/chosen": -1.3653361797332764, "logits/rejected": -1.3894007205963135, "logps/chosen": -135.3428955078125, "logps/rejected": -269.2462463378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.989072322845459, "rewards/margins": 13.346622467041016, "rewards/rejected": -18.335695266723633, "step": 5389 }, { "epoch": 8.65, "learning_rate": 2.1502179944510502e-08, "logits/chosen": -1.395554542541504, "logits/rejected": -1.3156098127365112, "logps/chosen": -180.25840759277344, "logps/rejected": -330.35980224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.841594696044922, "rewards/margins": 14.786198616027832, "rewards/rejected": -22.627792358398438, "step": 5390 }, { "epoch": 8.65, "learning_rate": 2.140309155766944e-08, "logits/chosen": -1.3485603332519531, "logits/rejected": -1.249395728111267, "logps/chosen": -200.5875701904297, "logps/rejected": -317.04608154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.751371383666992, "rewards/margins": 13.044334411621094, "rewards/rejected": -22.795703887939453, "step": 5391 }, { "epoch": 8.65, "learning_rate": 2.1304003170828378e-08, "logits/chosen": -1.2516285181045532, "logits/rejected": -1.2582335472106934, "logps/chosen": -150.2400360107422, "logps/rejected": -290.5525817871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.992382526397705, "rewards/margins": 13.625835418701172, "rewards/rejected": -20.61821746826172, "step": 5392 }, { "epoch": 8.66, "learning_rate": 2.1204914783987318e-08, "logits/chosen": -1.4317381381988525, "logits/rejected": -1.4259569644927979, "logps/chosen": -186.71127319335938, "logps/rejected": -302.021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.708990097045898, "rewards/margins": 11.027623176574707, "rewards/rejected": -20.736614227294922, "step": 5393 }, { "epoch": 8.66, "learning_rate": 2.1105826397146255e-08, "logits/chosen": -1.478076457977295, "logits/rejected": -1.4546988010406494, "logps/chosen": -141.7439422607422, "logps/rejected": -245.03428649902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.180087089538574, "rewards/margins": 12.007574081420898, "rewards/rejected": -17.187660217285156, "step": 5394 }, { "epoch": 8.66, "learning_rate": 2.100673801030519e-08, "logits/chosen": -1.4127628803253174, "logits/rejected": -1.450626254081726, "logps/chosen": -140.04383850097656, "logps/rejected": -272.35711669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.133778095245361, "rewards/margins": 13.330741882324219, "rewards/rejected": -20.464519500732422, "step": 5395 }, { "epoch": 8.66, "learning_rate": 2.090764962346413e-08, "logits/chosen": -1.4417724609375, "logits/rejected": -1.4480160474777222, "logps/chosen": -111.80876159667969, "logps/rejected": -282.4365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2673702239990234, "rewards/margins": 16.908626556396484, "rewards/rejected": -20.175996780395508, "step": 5396 }, { "epoch": 8.66, "learning_rate": 2.0808561236623068e-08, "logits/chosen": -1.534409523010254, "logits/rejected": -1.52290940284729, "logps/chosen": -132.01840209960938, "logps/rejected": -261.24591064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.408312797546387, "rewards/margins": 14.081130981445312, "rewards/rejected": -19.489442825317383, "step": 5397 }, { "epoch": 8.66, "learning_rate": 2.0709472849782005e-08, "logits/chosen": -1.261778712272644, "logits/rejected": -1.2889846563339233, "logps/chosen": -134.3740234375, "logps/rejected": -278.6933288574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.665897369384766, "rewards/margins": 13.359652519226074, "rewards/rejected": -20.025550842285156, "step": 5398 }, { "epoch": 8.67, "learning_rate": 2.0610384462940944e-08, "logits/chosen": -1.406590461730957, "logits/rejected": -1.3658708333969116, "logps/chosen": -217.77938842773438, "logps/rejected": -356.9627685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.433937072753906, "rewards/margins": 14.385049819946289, "rewards/rejected": -26.818984985351562, "step": 5399 }, { "epoch": 8.67, "learning_rate": 2.051129607609988e-08, "logits/chosen": -1.527463674545288, "logits/rejected": -1.5510847568511963, "logps/chosen": -133.41136169433594, "logps/rejected": -286.19879150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.954122543334961, "rewards/margins": 15.279735565185547, "rewards/rejected": -21.233858108520508, "step": 5400 }, { "epoch": 8.67, "learning_rate": 2.0412207689258818e-08, "logits/chosen": -1.5254827737808228, "logits/rejected": -1.6011466979980469, "logps/chosen": -183.349609375, "logps/rejected": -319.93072509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.533164978027344, "rewards/margins": 14.015229225158691, "rewards/rejected": -22.54839515686035, "step": 5401 }, { "epoch": 8.67, "learning_rate": 2.0313119302417754e-08, "logits/chosen": -1.6539002656936646, "logits/rejected": -1.6926480531692505, "logps/chosen": -155.47262573242188, "logps/rejected": -335.24609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.804910659790039, "rewards/margins": 17.76555061340332, "rewards/rejected": -24.57046127319336, "step": 5402 }, { "epoch": 8.67, "learning_rate": 2.0214030915576694e-08, "logits/chosen": -1.3920235633850098, "logits/rejected": -1.382941722869873, "logps/chosen": -145.57815551757812, "logps/rejected": -333.572509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.759631633758545, "rewards/margins": 16.444793701171875, "rewards/rejected": -23.204423904418945, "step": 5403 }, { "epoch": 8.67, "learning_rate": 2.011494252873563e-08, "logits/chosen": -1.4340091943740845, "logits/rejected": -1.4690555334091187, "logps/chosen": -161.47503662109375, "logps/rejected": -309.48602294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.348018646240234, "rewards/margins": 13.977944374084473, "rewards/rejected": -22.325963973999023, "step": 5404 }, { "epoch": 8.68, "learning_rate": 2.0015854141894567e-08, "logits/chosen": -1.6192803382873535, "logits/rejected": -1.4791932106018066, "logps/chosen": -180.9537353515625, "logps/rejected": -282.48809814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.903130531311035, "rewards/margins": 12.08569049835205, "rewards/rejected": -19.988821029663086, "step": 5405 }, { "epoch": 8.68, "learning_rate": 1.9916765755053507e-08, "logits/chosen": -1.3572323322296143, "logits/rejected": -1.4935731887817383, "logps/chosen": -135.30239868164062, "logps/rejected": -305.8617858886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.775205612182617, "rewards/margins": 16.02194595336914, "rewards/rejected": -22.797151565551758, "step": 5406 }, { "epoch": 8.68, "learning_rate": 1.9817677368212444e-08, "logits/chosen": -1.3814445734024048, "logits/rejected": -1.3636243343353271, "logps/chosen": -170.69541931152344, "logps/rejected": -276.16925048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.715913772583008, "rewards/margins": 11.35075569152832, "rewards/rejected": -20.066669464111328, "step": 5407 }, { "epoch": 8.68, "learning_rate": 1.971858898137138e-08, "logits/chosen": -1.4001219272613525, "logits/rejected": -1.4201138019561768, "logps/chosen": -199.0594024658203, "logps/rejected": -420.7574157714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.527790069580078, "rewards/margins": 20.509042739868164, "rewards/rejected": -31.036832809448242, "step": 5408 }, { "epoch": 8.68, "learning_rate": 1.961950059453032e-08, "logits/chosen": -1.3986068964004517, "logits/rejected": -1.4479210376739502, "logps/chosen": -133.54501342773438, "logps/rejected": -307.1135559082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.434083938598633, "rewards/margins": 16.85643768310547, "rewards/rejected": -23.290523529052734, "step": 5409 }, { "epoch": 8.68, "learning_rate": 1.9520412207689257e-08, "logits/chosen": -1.3960983753204346, "logits/rejected": -1.3753490447998047, "logps/chosen": -191.25808715820312, "logps/rejected": -289.48345947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.997090339660645, "rewards/margins": 12.245109558105469, "rewards/rejected": -21.242198944091797, "step": 5410 }, { "epoch": 8.69, "learning_rate": 1.9421323820848194e-08, "logits/chosen": -1.436080813407898, "logits/rejected": -1.5596458911895752, "logps/chosen": -120.0604476928711, "logps/rejected": -291.50396728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.287513256072998, "rewards/margins": 15.447583198547363, "rewards/rejected": -19.735095977783203, "step": 5411 }, { "epoch": 8.69, "learning_rate": 1.9322235434007134e-08, "logits/chosen": -1.3437585830688477, "logits/rejected": -1.3850083351135254, "logps/chosen": -137.52354431152344, "logps/rejected": -275.0857849121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.597171783447266, "rewards/margins": 14.516707420349121, "rewards/rejected": -21.113880157470703, "step": 5412 }, { "epoch": 8.69, "learning_rate": 1.922314704716607e-08, "logits/chosen": -1.513985276222229, "logits/rejected": -1.5127615928649902, "logps/chosen": -165.81092834472656, "logps/rejected": -274.78375244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.767459392547607, "rewards/margins": 11.585197448730469, "rewards/rejected": -18.352657318115234, "step": 5413 }, { "epoch": 8.69, "learning_rate": 1.912405866032501e-08, "logits/chosen": -1.4278709888458252, "logits/rejected": -1.539350986480713, "logps/chosen": -140.39010620117188, "logps/rejected": -296.0400085449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.729770660400391, "rewards/margins": 11.984197616577148, "rewards/rejected": -17.71396827697754, "step": 5414 }, { "epoch": 8.69, "learning_rate": 1.9024970273483947e-08, "logits/chosen": -1.3946603536605835, "logits/rejected": -1.388910174369812, "logps/chosen": -188.58749389648438, "logps/rejected": -324.81683349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.928207397460938, "rewards/margins": 14.119169235229492, "rewards/rejected": -23.047374725341797, "step": 5415 }, { "epoch": 8.69, "learning_rate": 1.8925881886642887e-08, "logits/chosen": -1.4341158866882324, "logits/rejected": -1.408185362815857, "logps/chosen": -164.11605834960938, "logps/rejected": -311.2739562988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.85394287109375, "rewards/margins": 12.989866256713867, "rewards/rejected": -20.843809127807617, "step": 5416 }, { "epoch": 8.7, "learning_rate": 1.8826793499801823e-08, "logits/chosen": -1.5777959823608398, "logits/rejected": -1.6440120935440063, "logps/chosen": -142.80491638183594, "logps/rejected": -301.3917541503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2647552490234375, "rewards/margins": 15.107778549194336, "rewards/rejected": -22.372533798217773, "step": 5417 }, { "epoch": 8.7, "learning_rate": 1.872770511296076e-08, "logits/chosen": -1.4760940074920654, "logits/rejected": -1.5089678764343262, "logps/chosen": -152.63711547851562, "logps/rejected": -333.5794982910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.641969203948975, "rewards/margins": 16.97591781616211, "rewards/rejected": -23.617889404296875, "step": 5418 }, { "epoch": 8.7, "learning_rate": 1.86286167261197e-08, "logits/chosen": -1.4798284769058228, "logits/rejected": -1.4175519943237305, "logps/chosen": -180.53802490234375, "logps/rejected": -290.6514892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.436674118041992, "rewards/margins": 11.1602144241333, "rewards/rejected": -20.59688949584961, "step": 5419 }, { "epoch": 8.7, "learning_rate": 1.8529528339278636e-08, "logits/chosen": -1.56388521194458, "logits/rejected": -1.4788331985473633, "logps/chosen": -160.33489990234375, "logps/rejected": -317.0961608886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.187560081481934, "rewards/margins": 16.138172149658203, "rewards/rejected": -23.32573127746582, "step": 5420 }, { "epoch": 8.7, "learning_rate": 1.8430439952437573e-08, "logits/chosen": -1.3289744853973389, "logits/rejected": -1.31874418258667, "logps/chosen": -131.88381958007812, "logps/rejected": -238.69798278808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.557104110717773, "rewards/margins": 11.977872848510742, "rewards/rejected": -16.534975051879883, "step": 5421 }, { "epoch": 8.7, "learning_rate": 1.8331351565596513e-08, "logits/chosen": -1.6553711891174316, "logits/rejected": -1.6399145126342773, "logps/chosen": -173.7509765625, "logps/rejected": -272.24554443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.07326889038086, "rewards/margins": 11.623566627502441, "rewards/rejected": -19.696834564208984, "step": 5422 }, { "epoch": 8.7, "learning_rate": 1.823226317875545e-08, "logits/chosen": -1.5901862382888794, "logits/rejected": -1.5946159362792969, "logps/chosen": -128.5502166748047, "logps/rejected": -300.73773193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.353278160095215, "rewards/margins": 16.295543670654297, "rewards/rejected": -20.648822784423828, "step": 5423 }, { "epoch": 8.71, "learning_rate": 1.8133174791914386e-08, "logits/chosen": -1.313542127609253, "logits/rejected": -1.33736252784729, "logps/chosen": -186.00204467773438, "logps/rejected": -309.9263000488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.433143615722656, "rewards/margins": 12.175969123840332, "rewards/rejected": -22.609113693237305, "step": 5424 }, { "epoch": 8.71, "learning_rate": 1.8034086405073326e-08, "logits/chosen": -1.5076936483383179, "logits/rejected": -1.3755697011947632, "logps/chosen": -156.71273803710938, "logps/rejected": -278.5020751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.852901458740234, "rewards/margins": 12.804498672485352, "rewards/rejected": -20.657400131225586, "step": 5425 }, { "epoch": 8.71, "learning_rate": 1.7934998018232263e-08, "logits/chosen": -1.5447584390640259, "logits/rejected": -1.542043685913086, "logps/chosen": -178.9134979248047, "logps/rejected": -293.19342041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.491615295410156, "rewards/margins": 11.710564613342285, "rewards/rejected": -20.202178955078125, "step": 5426 }, { "epoch": 8.71, "learning_rate": 1.78359096313912e-08, "logits/chosen": -1.5317679643630981, "logits/rejected": -1.5133222341537476, "logps/chosen": -141.9131317138672, "logps/rejected": -274.0616760253906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.702897071838379, "rewards/margins": 13.289934158325195, "rewards/rejected": -18.99283218383789, "step": 5427 }, { "epoch": 8.71, "learning_rate": 1.773682124455014e-08, "logits/chosen": -1.4113404750823975, "logits/rejected": -1.4432847499847412, "logps/chosen": -147.66091918945312, "logps/rejected": -271.05718994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.670530319213867, "rewards/margins": 13.641035079956055, "rewards/rejected": -20.311565399169922, "step": 5428 }, { "epoch": 8.71, "learning_rate": 1.7637732857709076e-08, "logits/chosen": -1.2269777059555054, "logits/rejected": -1.304579257965088, "logps/chosen": -202.24560546875, "logps/rejected": -332.7342834472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.360701560974121, "rewards/margins": 11.67175006866455, "rewards/rejected": -24.032451629638672, "step": 5429 }, { "epoch": 8.72, "learning_rate": 1.7538644470868012e-08, "logits/chosen": -1.3934226036071777, "logits/rejected": -1.3837147951126099, "logps/chosen": -193.45388793945312, "logps/rejected": -295.3465270996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.002714157104492, "rewards/margins": 11.014525413513184, "rewards/rejected": -22.01723861694336, "step": 5430 }, { "epoch": 8.72, "learning_rate": 1.743955608402695e-08, "logits/chosen": -1.3453655242919922, "logits/rejected": -1.3514504432678223, "logps/chosen": -150.5539093017578, "logps/rejected": -285.1641845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.312990188598633, "rewards/margins": 12.527724266052246, "rewards/rejected": -18.840713500976562, "step": 5431 }, { "epoch": 8.72, "learning_rate": 1.734046769718589e-08, "logits/chosen": -1.5243191719055176, "logits/rejected": -1.5087764263153076, "logps/chosen": -125.31169128417969, "logps/rejected": -268.10748291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.824173927307129, "rewards/margins": 14.451338768005371, "rewards/rejected": -20.2755126953125, "step": 5432 }, { "epoch": 8.72, "learning_rate": 1.7241379310344825e-08, "logits/chosen": -1.5840343236923218, "logits/rejected": -1.6018993854522705, "logps/chosen": -171.3840789794922, "logps/rejected": -316.93011474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.325183868408203, "rewards/margins": 16.179174423217773, "rewards/rejected": -22.504358291625977, "step": 5433 }, { "epoch": 8.72, "learning_rate": 1.7142290923503762e-08, "logits/chosen": -1.406834363937378, "logits/rejected": -1.3962639570236206, "logps/chosen": -181.0482177734375, "logps/rejected": -285.4689025878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.471445083618164, "rewards/margins": 10.917606353759766, "rewards/rejected": -20.38905143737793, "step": 5434 }, { "epoch": 8.72, "learning_rate": 1.7043202536662702e-08, "logits/chosen": -1.4812595844268799, "logits/rejected": -1.4489531517028809, "logps/chosen": -132.31918334960938, "logps/rejected": -285.6187744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.942589282989502, "rewards/margins": 15.669097900390625, "rewards/rejected": -21.6116886138916, "step": 5435 }, { "epoch": 8.73, "learning_rate": 1.694411414982164e-08, "logits/chosen": -1.4812015295028687, "logits/rejected": -1.4928863048553467, "logps/chosen": -261.6613464355469, "logps/rejected": -365.59429931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -14.705549240112305, "rewards/margins": 9.916019439697266, "rewards/rejected": -24.62156867980957, "step": 5436 }, { "epoch": 8.73, "learning_rate": 1.684502576298058e-08, "logits/chosen": -1.4849706888198853, "logits/rejected": -1.5397485494613647, "logps/chosen": -138.46188354492188, "logps/rejected": -284.5140380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.828947067260742, "rewards/margins": 14.241378784179688, "rewards/rejected": -21.07032585144043, "step": 5437 }, { "epoch": 8.73, "learning_rate": 1.674593737613952e-08, "logits/chosen": -1.6498498916625977, "logits/rejected": -1.5815578699111938, "logps/chosen": -180.39195251464844, "logps/rejected": -319.35791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.804305076599121, "rewards/margins": 14.553720474243164, "rewards/rejected": -23.35802459716797, "step": 5438 }, { "epoch": 8.73, "learning_rate": 1.6646848989298455e-08, "logits/chosen": -1.4684767723083496, "logits/rejected": -1.5074323415756226, "logps/chosen": -188.34475708007812, "logps/rejected": -340.31842041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.619147300720215, "rewards/margins": 14.9807767868042, "rewards/rejected": -24.599924087524414, "step": 5439 }, { "epoch": 8.73, "learning_rate": 1.654776060245739e-08, "logits/chosen": -1.5392978191375732, "logits/rejected": -1.5886207818984985, "logps/chosen": -124.53779602050781, "logps/rejected": -252.47906494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.796805381774902, "rewards/margins": 11.98562240600586, "rewards/rejected": -17.782428741455078, "step": 5440 }, { "epoch": 8.73, "learning_rate": 1.644867221561633e-08, "logits/chosen": -1.248887062072754, "logits/rejected": -1.2908222675323486, "logps/chosen": -115.50232696533203, "logps/rejected": -286.98486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.075676441192627, "rewards/margins": 17.060115814208984, "rewards/rejected": -22.135791778564453, "step": 5441 }, { "epoch": 8.74, "learning_rate": 1.6349583828775268e-08, "logits/chosen": -1.6042277812957764, "logits/rejected": -1.6753432750701904, "logps/chosen": -132.127685546875, "logps/rejected": -304.082275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.450425148010254, "rewards/margins": 15.736482620239258, "rewards/rejected": -21.186908721923828, "step": 5442 }, { "epoch": 8.74, "learning_rate": 1.6250495441934205e-08, "logits/chosen": -1.4198064804077148, "logits/rejected": -1.383195161819458, "logps/chosen": -150.97293090820312, "logps/rejected": -321.0423889160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.083958625793457, "rewards/margins": 16.40924835205078, "rewards/rejected": -24.493206024169922, "step": 5443 }, { "epoch": 8.74, "learning_rate": 1.615140705509314e-08, "logits/chosen": -1.5442233085632324, "logits/rejected": -1.524083137512207, "logps/chosen": -162.9457550048828, "logps/rejected": -274.1092529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.804529190063477, "rewards/margins": 11.530630111694336, "rewards/rejected": -18.335159301757812, "step": 5444 }, { "epoch": 8.74, "learning_rate": 1.605231866825208e-08, "logits/chosen": -1.5310945510864258, "logits/rejected": -1.6240178346633911, "logps/chosen": -135.0478515625, "logps/rejected": -322.2422790527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.128134727478027, "rewards/margins": 16.688838958740234, "rewards/rejected": -22.816972732543945, "step": 5445 }, { "epoch": 8.74, "learning_rate": 1.5953230281411018e-08, "logits/chosen": -1.469875693321228, "logits/rejected": -1.4334754943847656, "logps/chosen": -119.8460693359375, "logps/rejected": -277.662841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.21774435043335, "rewards/margins": 15.953648567199707, "rewards/rejected": -20.1713924407959, "step": 5446 }, { "epoch": 8.74, "learning_rate": 1.5854141894569954e-08, "logits/chosen": -1.60468590259552, "logits/rejected": -1.675989031791687, "logps/chosen": -138.124755859375, "logps/rejected": -287.9798583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6595354080200195, "rewards/margins": 13.307097434997559, "rewards/rejected": -17.966632843017578, "step": 5447 }, { "epoch": 8.74, "learning_rate": 1.5755053507728894e-08, "logits/chosen": -1.5474847555160522, "logits/rejected": -1.524472951889038, "logps/chosen": -127.24958038330078, "logps/rejected": -266.0911865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.745676517486572, "rewards/margins": 14.86463737487793, "rewards/rejected": -19.610313415527344, "step": 5448 }, { "epoch": 8.75, "learning_rate": 1.565596512088783e-08, "logits/chosen": -1.3649873733520508, "logits/rejected": -1.3556294441223145, "logps/chosen": -189.20260620117188, "logps/rejected": -333.01422119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.14459228515625, "rewards/margins": 13.50729751586914, "rewards/rejected": -23.65188980102539, "step": 5449 }, { "epoch": 8.75, "learning_rate": 1.555687673404677e-08, "logits/chosen": -1.4408241510391235, "logits/rejected": -1.5028953552246094, "logps/chosen": -142.3921661376953, "logps/rejected": -272.3212585449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.730820655822754, "rewards/margins": 12.205185890197754, "rewards/rejected": -18.93600845336914, "step": 5450 }, { "epoch": 8.75, "learning_rate": 1.5457788347205708e-08, "logits/chosen": -1.488051414489746, "logits/rejected": -1.4649136066436768, "logps/chosen": -202.55831909179688, "logps/rejected": -309.7406921386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.431904792785645, "rewards/margins": 10.457812309265137, "rewards/rejected": -20.88971710205078, "step": 5451 }, { "epoch": 8.75, "learning_rate": 1.5358699960364644e-08, "logits/chosen": -1.5831212997436523, "logits/rejected": -1.5285779237747192, "logps/chosen": -113.00989532470703, "logps/rejected": -232.22071838378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.422863960266113, "rewards/margins": 13.608236312866211, "rewards/rejected": -18.031099319458008, "step": 5452 }, { "epoch": 8.75, "learning_rate": 1.5259611573523584e-08, "logits/chosen": -1.5451223850250244, "logits/rejected": -1.568666934967041, "logps/chosen": -139.60467529296875, "logps/rejected": -311.4291076660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.633073806762695, "rewards/margins": 16.752666473388672, "rewards/rejected": -22.385740280151367, "step": 5453 }, { "epoch": 8.75, "learning_rate": 1.516052318668252e-08, "logits/chosen": -1.4125510454177856, "logits/rejected": -1.4446179866790771, "logps/chosen": -155.23416137695312, "logps/rejected": -305.07086181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.894509792327881, "rewards/margins": 15.13357925415039, "rewards/rejected": -22.028087615966797, "step": 5454 }, { "epoch": 8.76, "learning_rate": 1.5061434799841457e-08, "logits/chosen": -1.4478164911270142, "logits/rejected": -1.4046560525894165, "logps/chosen": -177.50704956054688, "logps/rejected": -331.8160400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.673406600952148, "rewards/margins": 14.341483116149902, "rewards/rejected": -24.014888763427734, "step": 5455 }, { "epoch": 8.76, "learning_rate": 1.4962346413000394e-08, "logits/chosen": -1.3503386974334717, "logits/rejected": -1.3980176448822021, "logps/chosen": -119.40214538574219, "logps/rejected": -292.3028564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.000727653503418, "rewards/margins": 17.554149627685547, "rewards/rejected": -21.55487823486328, "step": 5456 }, { "epoch": 8.76, "learning_rate": 1.4863258026159334e-08, "logits/chosen": -1.302933692932129, "logits/rejected": -1.3859922885894775, "logps/chosen": -138.23703002929688, "logps/rejected": -280.4346923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.196687698364258, "rewards/margins": 12.23564338684082, "rewards/rejected": -18.432331085205078, "step": 5457 }, { "epoch": 8.76, "learning_rate": 1.4764169639318272e-08, "logits/chosen": -1.4657410383224487, "logits/rejected": -1.49823796749115, "logps/chosen": -226.15228271484375, "logps/rejected": -353.049072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.273387908935547, "rewards/margins": 12.98002815246582, "rewards/rejected": -24.253416061401367, "step": 5458 }, { "epoch": 8.76, "learning_rate": 1.4665081252477209e-08, "logits/chosen": -1.7210065126419067, "logits/rejected": -1.6744036674499512, "logps/chosen": -104.07920837402344, "logps/rejected": -273.23077392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.03729772567749, "rewards/margins": 15.186262130737305, "rewards/rejected": -19.223560333251953, "step": 5459 }, { "epoch": 8.76, "learning_rate": 1.4565992865636147e-08, "logits/chosen": -1.5666556358337402, "logits/rejected": -1.5169615745544434, "logps/chosen": -171.88156127929688, "logps/rejected": -268.0328063964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.795080184936523, "rewards/margins": 11.920706748962402, "rewards/rejected": -18.715787887573242, "step": 5460 }, { "epoch": 8.77, "learning_rate": 1.4466904478795083e-08, "logits/chosen": -1.329107642173767, "logits/rejected": -1.4294683933258057, "logps/chosen": -206.47232055664062, "logps/rejected": -315.1406555175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.299001693725586, "rewards/margins": 10.380867958068848, "rewards/rejected": -22.67987060546875, "step": 5461 }, { "epoch": 8.77, "learning_rate": 1.4367816091954022e-08, "logits/chosen": -1.2842626571655273, "logits/rejected": -1.3369603157043457, "logps/chosen": -130.5120086669922, "logps/rejected": -289.1618347167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.903784275054932, "rewards/margins": 14.653454780578613, "rewards/rejected": -21.557239532470703, "step": 5462 }, { "epoch": 8.77, "learning_rate": 1.426872770511296e-08, "logits/chosen": -1.4478962421417236, "logits/rejected": -1.4000808000564575, "logps/chosen": -168.21865844726562, "logps/rejected": -277.811767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.19401741027832, "rewards/margins": 11.881623268127441, "rewards/rejected": -20.075641632080078, "step": 5463 }, { "epoch": 8.77, "learning_rate": 1.4169639318271897e-08, "logits/chosen": -1.3364923000335693, "logits/rejected": -1.383387804031372, "logps/chosen": -131.06129455566406, "logps/rejected": -287.3696594238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.811740398406982, "rewards/margins": 15.013370513916016, "rewards/rejected": -20.825111389160156, "step": 5464 }, { "epoch": 8.77, "learning_rate": 1.4070550931430837e-08, "logits/chosen": -1.6418187618255615, "logits/rejected": -1.5758846998214722, "logps/chosen": -192.51864624023438, "logps/rejected": -330.5244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.80848503112793, "rewards/margins": 15.82055377960205, "rewards/rejected": -24.629037857055664, "step": 5465 }, { "epoch": 8.77, "learning_rate": 1.3971462544589775e-08, "logits/chosen": -1.7147173881530762, "logits/rejected": -1.6863746643066406, "logps/chosen": -153.59031677246094, "logps/rejected": -298.2607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4989166259765625, "rewards/margins": 13.980252265930176, "rewards/rejected": -19.479167938232422, "step": 5466 }, { "epoch": 8.78, "learning_rate": 1.3872374157748711e-08, "logits/chosen": -1.6767550706863403, "logits/rejected": -1.5682001113891602, "logps/chosen": -143.5502166748047, "logps/rejected": -276.6429748535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.507812023162842, "rewards/margins": 14.43783950805664, "rewards/rejected": -19.945650100708008, "step": 5467 }, { "epoch": 8.78, "learning_rate": 1.377328577090765e-08, "logits/chosen": -1.3700613975524902, "logits/rejected": -1.3547468185424805, "logps/chosen": -170.75778198242188, "logps/rejected": -296.966064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.036312103271484, "rewards/margins": 11.84096622467041, "rewards/rejected": -20.87727928161621, "step": 5468 }, { "epoch": 8.78, "learning_rate": 1.3674197384066586e-08, "logits/chosen": -1.6304874420166016, "logits/rejected": -1.5334157943725586, "logps/chosen": -110.52366638183594, "logps/rejected": -227.3395538330078, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2489142417907715, "rewards/margins": 14.229912757873535, "rewards/rejected": -16.47882652282715, "step": 5469 }, { "epoch": 8.78, "learning_rate": 1.3575108997225525e-08, "logits/chosen": -1.3230500221252441, "logits/rejected": -1.266008734703064, "logps/chosen": -158.57864379882812, "logps/rejected": -270.51165771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.989897727966309, "rewards/margins": 13.523969650268555, "rewards/rejected": -19.513866424560547, "step": 5470 }, { "epoch": 8.78, "learning_rate": 1.3476020610384463e-08, "logits/chosen": -1.639865517616272, "logits/rejected": -1.5158402919769287, "logps/chosen": -212.3350830078125, "logps/rejected": -318.9066162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.404208183288574, "rewards/margins": 13.162343978881836, "rewards/rejected": -22.566551208496094, "step": 5471 }, { "epoch": 8.78, "learning_rate": 1.33769322235434e-08, "logits/chosen": -1.3388512134552002, "logits/rejected": -1.2424776554107666, "logps/chosen": -161.70059204101562, "logps/rejected": -296.9508056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.19691276550293, "rewards/margins": 14.218036651611328, "rewards/rejected": -22.414949417114258, "step": 5472 }, { "epoch": 8.78, "learning_rate": 1.3277843836702338e-08, "logits/chosen": -1.5766406059265137, "logits/rejected": -1.686234474182129, "logps/chosen": -141.12493896484375, "logps/rejected": -296.121826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.619752407073975, "rewards/margins": 14.204973220825195, "rewards/rejected": -19.824726104736328, "step": 5473 }, { "epoch": 8.79, "learning_rate": 1.3178755449861276e-08, "logits/chosen": -1.6397099494934082, "logits/rejected": -1.675049066543579, "logps/chosen": -149.4064483642578, "logps/rejected": -299.13592529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2261128425598145, "rewards/margins": 16.540115356445312, "rewards/rejected": -22.76622772216797, "step": 5474 }, { "epoch": 8.79, "learning_rate": 1.3079667063020213e-08, "logits/chosen": -1.4659183025360107, "logits/rejected": -1.4307548999786377, "logps/chosen": -174.8655242919922, "logps/rejected": -324.6886901855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.44254207611084, "rewards/margins": 16.410329818725586, "rewards/rejected": -24.852872848510742, "step": 5475 }, { "epoch": 8.79, "learning_rate": 1.298057867617915e-08, "logits/chosen": -1.5121554136276245, "logits/rejected": -1.5198297500610352, "logps/chosen": -184.95260620117188, "logps/rejected": -321.17523193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.547597885131836, "rewards/margins": 14.086271286010742, "rewards/rejected": -23.63387107849121, "step": 5476 }, { "epoch": 8.79, "learning_rate": 1.2881490289338089e-08, "logits/chosen": -1.389732837677002, "logits/rejected": -1.4177206754684448, "logps/chosen": -196.6880340576172, "logps/rejected": -305.2585144042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.282025337219238, "rewards/margins": 10.888951301574707, "rewards/rejected": -21.170976638793945, "step": 5477 }, { "epoch": 8.79, "learning_rate": 1.2782401902497027e-08, "logits/chosen": -1.5755412578582764, "logits/rejected": -1.5377511978149414, "logps/chosen": -110.13674926757812, "logps/rejected": -269.607666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.820122718811035, "rewards/margins": 17.253894805908203, "rewards/rejected": -21.074016571044922, "step": 5478 }, { "epoch": 8.79, "learning_rate": 1.2683313515655966e-08, "logits/chosen": -1.4063007831573486, "logits/rejected": -1.3440320491790771, "logps/chosen": -163.69207763671875, "logps/rejected": -298.35009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1937255859375, "rewards/margins": 14.703405380249023, "rewards/rejected": -20.897132873535156, "step": 5479 }, { "epoch": 8.8, "learning_rate": 1.2584225128814902e-08, "logits/chosen": -1.445094347000122, "logits/rejected": -1.3985655307769775, "logps/chosen": -206.08135986328125, "logps/rejected": -335.59326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.888328552246094, "rewards/margins": 11.823844909667969, "rewards/rejected": -21.712173461914062, "step": 5480 }, { "epoch": 8.8, "learning_rate": 1.248513674197384e-08, "logits/chosen": -1.5109736919403076, "logits/rejected": -1.4794654846191406, "logps/chosen": -114.2931900024414, "logps/rejected": -308.140380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.898513317108154, "rewards/margins": 17.211626052856445, "rewards/rejected": -23.110137939453125, "step": 5481 }, { "epoch": 8.8, "learning_rate": 1.2386048355132779e-08, "logits/chosen": -1.4989091157913208, "logits/rejected": -1.4662905931472778, "logps/chosen": -146.74285888671875, "logps/rejected": -267.79351806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0835280418396, "rewards/margins": 12.715306282043457, "rewards/rejected": -19.79883575439453, "step": 5482 }, { "epoch": 8.8, "learning_rate": 1.2286959968291715e-08, "logits/chosen": -1.3322941064834595, "logits/rejected": -1.395934820175171, "logps/chosen": -167.77523803710938, "logps/rejected": -282.19921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.862197399139404, "rewards/margins": 11.602316856384277, "rewards/rejected": -19.464515686035156, "step": 5483 }, { "epoch": 8.8, "learning_rate": 1.2187871581450654e-08, "logits/chosen": -1.4081549644470215, "logits/rejected": -1.4481157064437866, "logps/chosen": -136.6598663330078, "logps/rejected": -273.2109680175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.03472375869751, "rewards/margins": 13.806780815124512, "rewards/rejected": -18.84150505065918, "step": 5484 }, { "epoch": 8.8, "learning_rate": 1.208878319460959e-08, "logits/chosen": -1.4753353595733643, "logits/rejected": -1.4552594423294067, "logps/chosen": -186.96337890625, "logps/rejected": -320.4122619628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.2470121383667, "rewards/margins": 14.187028884887695, "rewards/rejected": -23.43404197692871, "step": 5485 }, { "epoch": 8.81, "learning_rate": 1.1989694807768528e-08, "logits/chosen": -1.5732872486114502, "logits/rejected": -1.5312550067901611, "logps/chosen": -147.61984252929688, "logps/rejected": -234.83689880371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.150317668914795, "rewards/margins": 10.661298751831055, "rewards/rejected": -16.811616897583008, "step": 5486 }, { "epoch": 8.81, "learning_rate": 1.1890606420927467e-08, "logits/chosen": -1.4916954040527344, "logits/rejected": -1.5641014575958252, "logps/chosen": -155.28509521484375, "logps/rejected": -306.8409423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8343505859375, "rewards/margins": 13.516738891601562, "rewards/rejected": -21.351089477539062, "step": 5487 }, { "epoch": 8.81, "learning_rate": 1.1791518034086405e-08, "logits/chosen": -1.4803061485290527, "logits/rejected": -1.3641369342803955, "logps/chosen": -139.37228393554688, "logps/rejected": -238.83946228027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.590522050857544, "rewards/margins": 13.139620780944824, "rewards/rejected": -16.73014259338379, "step": 5488 }, { "epoch": 8.81, "learning_rate": 1.1692429647245343e-08, "logits/chosen": -1.5225646495819092, "logits/rejected": -1.4935842752456665, "logps/chosen": -144.2186279296875, "logps/rejected": -286.1135559082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.663084030151367, "rewards/margins": 15.020281791687012, "rewards/rejected": -20.683364868164062, "step": 5489 }, { "epoch": 8.81, "learning_rate": 1.159334126040428e-08, "logits/chosen": -1.3419159650802612, "logits/rejected": -1.3243415355682373, "logps/chosen": -188.1663360595703, "logps/rejected": -316.7386779785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.293554306030273, "rewards/margins": 12.919158935546875, "rewards/rejected": -22.21271324157715, "step": 5490 }, { "epoch": 8.81, "learning_rate": 1.1494252873563218e-08, "logits/chosen": -1.6183624267578125, "logits/rejected": -1.5771924257278442, "logps/chosen": -152.2018280029297, "logps/rejected": -295.3165283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.817328929901123, "rewards/margins": 14.649947166442871, "rewards/rejected": -20.467275619506836, "step": 5491 }, { "epoch": 8.82, "learning_rate": 1.1395164486722156e-08, "logits/chosen": -1.3251830339431763, "logits/rejected": -1.3176028728485107, "logps/chosen": -147.64845275878906, "logps/rejected": -267.0843505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.172133445739746, "rewards/margins": 13.301788330078125, "rewards/rejected": -20.473922729492188, "step": 5492 }, { "epoch": 8.82, "learning_rate": 1.1296076099881093e-08, "logits/chosen": -1.5732475519180298, "logits/rejected": -1.5450807809829712, "logps/chosen": -168.5752410888672, "logps/rejected": -306.53179931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.051141738891602, "rewards/margins": 12.41102409362793, "rewards/rejected": -21.462167739868164, "step": 5493 }, { "epoch": 8.82, "learning_rate": 1.1196987713040031e-08, "logits/chosen": -1.4722305536270142, "logits/rejected": -1.480183720588684, "logps/chosen": -164.18246459960938, "logps/rejected": -270.2085876464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.897164821624756, "rewards/margins": 11.725311279296875, "rewards/rejected": -19.62247657775879, "step": 5494 }, { "epoch": 8.82, "learning_rate": 1.109789932619897e-08, "logits/chosen": -1.5381789207458496, "logits/rejected": -1.5828497409820557, "logps/chosen": -122.709716796875, "logps/rejected": -300.283935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.456516742706299, "rewards/margins": 16.429759979248047, "rewards/rejected": -21.88627815246582, "step": 5495 }, { "epoch": 8.82, "learning_rate": 1.0998810939357906e-08, "logits/chosen": -1.4375251531600952, "logits/rejected": -1.4297800064086914, "logps/chosen": -171.7742919921875, "logps/rejected": -294.5749206542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.053570747375488, "rewards/margins": 13.733942031860352, "rewards/rejected": -21.787513732910156, "step": 5496 }, { "epoch": 8.82, "learning_rate": 1.0899722552516844e-08, "logits/chosen": -1.4139801263809204, "logits/rejected": -1.4273046255111694, "logps/chosen": -145.86782836914062, "logps/rejected": -271.0716857910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.100580215454102, "rewards/margins": 12.698921203613281, "rewards/rejected": -20.799501419067383, "step": 5497 }, { "epoch": 8.83, "learning_rate": 1.0800634165675781e-08, "logits/chosen": -1.266104817390442, "logits/rejected": -1.2670576572418213, "logps/chosen": -162.82872009277344, "logps/rejected": -306.3579406738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.750912666320801, "rewards/margins": 13.844608306884766, "rewards/rejected": -21.595521926879883, "step": 5498 }, { "epoch": 8.83, "learning_rate": 1.070154577883472e-08, "logits/chosen": -1.3478909730911255, "logits/rejected": -1.305830478668213, "logps/chosen": -177.2524871826172, "logps/rejected": -299.0887756347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.421171188354492, "rewards/margins": 14.002289772033691, "rewards/rejected": -21.4234619140625, "step": 5499 }, { "epoch": 8.83, "learning_rate": 1.0602457391993659e-08, "logits/chosen": -1.384503722190857, "logits/rejected": -1.4365882873535156, "logps/chosen": -119.20057678222656, "logps/rejected": -320.4990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.318152904510498, "rewards/margins": 15.804553985595703, "rewards/rejected": -22.12270736694336, "step": 5500 }, { "epoch": 8.83, "learning_rate": 1.0503369005152596e-08, "logits/chosen": -1.4457693099975586, "logits/rejected": -1.478636384010315, "logps/chosen": -157.18975830078125, "logps/rejected": -311.40972900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.174690246582031, "rewards/margins": 14.979238510131836, "rewards/rejected": -22.153928756713867, "step": 5501 }, { "epoch": 8.83, "learning_rate": 1.0404280618311534e-08, "logits/chosen": -1.3819780349731445, "logits/rejected": -1.3861660957336426, "logps/chosen": -133.33018493652344, "logps/rejected": -279.2955017089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.094486236572266, "rewards/margins": 14.40609359741211, "rewards/rejected": -20.500579833984375, "step": 5502 }, { "epoch": 8.83, "learning_rate": 1.0305192231470472e-08, "logits/chosen": -1.384071707725525, "logits/rejected": -1.5587239265441895, "logps/chosen": -164.39601135253906, "logps/rejected": -296.20855712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.887092590332031, "rewards/margins": 11.937942504882812, "rewards/rejected": -20.825035095214844, "step": 5503 }, { "epoch": 8.83, "learning_rate": 1.0206103844629409e-08, "logits/chosen": -1.5197057723999023, "logits/rejected": -1.5847768783569336, "logps/chosen": -201.81466674804688, "logps/rejected": -368.3904113769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.558990478515625, "rewards/margins": 15.77241325378418, "rewards/rejected": -26.331405639648438, "step": 5504 }, { "epoch": 8.84, "learning_rate": 1.0107015457788347e-08, "logits/chosen": -1.302932858467102, "logits/rejected": -1.3689221143722534, "logps/chosen": -179.6098175048828, "logps/rejected": -309.509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.091455459594727, "rewards/margins": 13.42479133605957, "rewards/rejected": -21.516246795654297, "step": 5505 }, { "epoch": 8.84, "learning_rate": 1.0007927070947284e-08, "logits/chosen": -1.4446508884429932, "logits/rejected": -1.2898387908935547, "logps/chosen": -175.23019409179688, "logps/rejected": -261.03900146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.882005214691162, "rewards/margins": 11.478946685791016, "rewards/rejected": -19.360952377319336, "step": 5506 }, { "epoch": 8.84, "learning_rate": 9.908838684106222e-09, "logits/chosen": -1.4923994541168213, "logits/rejected": -1.4724560976028442, "logps/chosen": -193.4761199951172, "logps/rejected": -295.7413330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.44789981842041, "rewards/margins": 13.000259399414062, "rewards/rejected": -22.44816017150879, "step": 5507 }, { "epoch": 8.84, "learning_rate": 9.80975029726516e-09, "logits/chosen": -1.7107822895050049, "logits/rejected": -1.7238879203796387, "logps/chosen": -119.33875274658203, "logps/rejected": -314.3605651855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6182198524475098, "rewards/margins": 18.876056671142578, "rewards/rejected": -22.49427604675293, "step": 5508 }, { "epoch": 8.84, "learning_rate": 9.710661910424097e-09, "logits/chosen": -1.4171314239501953, "logits/rejected": -1.3932805061340332, "logps/chosen": -193.07138061523438, "logps/rejected": -319.2784118652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.281110763549805, "rewards/margins": 14.629986763000488, "rewards/rejected": -24.911096572875977, "step": 5509 }, { "epoch": 8.84, "learning_rate": 9.611573523583035e-09, "logits/chosen": -1.3705233335494995, "logits/rejected": -1.3821635246276855, "logps/chosen": -177.92129516601562, "logps/rejected": -375.7994384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.335589408874512, "rewards/margins": 16.886127471923828, "rewards/rejected": -26.221717834472656, "step": 5510 }, { "epoch": 8.85, "learning_rate": 9.512485136741973e-09, "logits/chosen": -1.3588300943374634, "logits/rejected": -1.3951325416564941, "logps/chosen": -135.93341064453125, "logps/rejected": -275.56109619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.945231914520264, "rewards/margins": 13.470478057861328, "rewards/rejected": -20.41571044921875, "step": 5511 }, { "epoch": 8.85, "learning_rate": 9.413396749900912e-09, "logits/chosen": -1.5015413761138916, "logits/rejected": -1.4614593982696533, "logps/chosen": -151.64794921875, "logps/rejected": -279.6134948730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.345219612121582, "rewards/margins": 12.83059310913086, "rewards/rejected": -20.175811767578125, "step": 5512 }, { "epoch": 8.85, "learning_rate": 9.31430836305985e-09, "logits/chosen": -1.444108247756958, "logits/rejected": -1.4823031425476074, "logps/chosen": -145.8060302734375, "logps/rejected": -288.7027282714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.133156776428223, "rewards/margins": 13.690107345581055, "rewards/rejected": -20.823265075683594, "step": 5513 }, { "epoch": 8.85, "learning_rate": 9.215219976218786e-09, "logits/chosen": -1.645614743232727, "logits/rejected": -1.5895781517028809, "logps/chosen": -145.66864013671875, "logps/rejected": -301.36688232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.907896995544434, "rewards/margins": 16.92184829711914, "rewards/rejected": -21.829744338989258, "step": 5514 }, { "epoch": 8.85, "learning_rate": 9.116131589377725e-09, "logits/chosen": -1.461030125617981, "logits/rejected": -1.415977954864502, "logps/chosen": -154.76885986328125, "logps/rejected": -295.974853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.504090309143066, "rewards/margins": 15.350045204162598, "rewards/rejected": -21.854135513305664, "step": 5515 }, { "epoch": 8.85, "learning_rate": 9.017043202536663e-09, "logits/chosen": -1.3060734272003174, "logits/rejected": -1.422250747680664, "logps/chosen": -135.39761352539062, "logps/rejected": -279.13958740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.569437503814697, "rewards/margins": 11.639434814453125, "rewards/rejected": -19.208871841430664, "step": 5516 }, { "epoch": 8.86, "learning_rate": 8.9179548156956e-09, "logits/chosen": -1.742134928703308, "logits/rejected": -1.6507973670959473, "logps/chosen": -136.32376098632812, "logps/rejected": -297.00006103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.076655864715576, "rewards/margins": 17.22898292541504, "rewards/rejected": -22.30563735961914, "step": 5517 }, { "epoch": 8.86, "learning_rate": 8.818866428854538e-09, "logits/chosen": -1.3851580619812012, "logits/rejected": -1.3640110492706299, "logps/chosen": -163.94798278808594, "logps/rejected": -299.5345153808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7249250411987305, "rewards/margins": 15.897686958312988, "rewards/rejected": -21.62261199951172, "step": 5518 }, { "epoch": 8.86, "learning_rate": 8.719778042013474e-09, "logits/chosen": -1.440573811531067, "logits/rejected": -1.534522294998169, "logps/chosen": -132.98004150390625, "logps/rejected": -304.2972106933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.619424819946289, "rewards/margins": 14.860980033874512, "rewards/rejected": -20.480405807495117, "step": 5519 }, { "epoch": 8.86, "learning_rate": 8.620689655172413e-09, "logits/chosen": -1.5582575798034668, "logits/rejected": -1.6070797443389893, "logps/chosen": -138.66014099121094, "logps/rejected": -295.8648986816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.639702796936035, "rewards/margins": 14.555841445922852, "rewards/rejected": -20.195545196533203, "step": 5520 }, { "epoch": 8.86, "learning_rate": 8.521601268331351e-09, "logits/chosen": -1.4509820938110352, "logits/rejected": -1.466586947441101, "logps/chosen": -139.82342529296875, "logps/rejected": -313.79364013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.7833662033081055, "rewards/margins": 16.5905818939209, "rewards/rejected": -22.373947143554688, "step": 5521 }, { "epoch": 8.86, "learning_rate": 8.42251288149029e-09, "logits/chosen": -1.3790929317474365, "logits/rejected": -1.4052289724349976, "logps/chosen": -163.9189453125, "logps/rejected": -343.37255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.157495498657227, "rewards/margins": 17.31974220275879, "rewards/rejected": -24.477235794067383, "step": 5522 }, { "epoch": 8.87, "learning_rate": 8.323424494649227e-09, "logits/chosen": -1.5175716876983643, "logits/rejected": -1.4883137941360474, "logps/chosen": -133.22207641601562, "logps/rejected": -302.68731689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.047994613647461, "rewards/margins": 15.730720520019531, "rewards/rejected": -21.778715133666992, "step": 5523 }, { "epoch": 8.87, "learning_rate": 8.224336107808166e-09, "logits/chosen": -1.4338479042053223, "logits/rejected": -1.4013028144836426, "logps/chosen": -149.87025451660156, "logps/rejected": -252.82778930664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.232932090759277, "rewards/margins": 10.907346725463867, "rewards/rejected": -19.140277862548828, "step": 5524 }, { "epoch": 8.87, "learning_rate": 8.125247720967102e-09, "logits/chosen": -1.5191059112548828, "logits/rejected": -1.4490551948547363, "logps/chosen": -162.77291870117188, "logps/rejected": -307.58416748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.386801719665527, "rewards/margins": 14.635686874389648, "rewards/rejected": -22.022489547729492, "step": 5525 }, { "epoch": 8.87, "learning_rate": 8.02615933412604e-09, "logits/chosen": -1.3489198684692383, "logits/rejected": -1.3723788261413574, "logps/chosen": -179.38687133789062, "logps/rejected": -296.58013916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.085679054260254, "rewards/margins": 13.62321662902832, "rewards/rejected": -22.708894729614258, "step": 5526 }, { "epoch": 8.87, "learning_rate": 7.927070947284977e-09, "logits/chosen": -1.5028525590896606, "logits/rejected": -1.5426260232925415, "logps/chosen": -159.83554077148438, "logps/rejected": -311.2879638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.359861373901367, "rewards/margins": 15.2382230758667, "rewards/rejected": -21.598085403442383, "step": 5527 }, { "epoch": 8.87, "learning_rate": 7.827982560443915e-09, "logits/chosen": -1.3957993984222412, "logits/rejected": -1.4196135997772217, "logps/chosen": -131.39993286132812, "logps/rejected": -269.0301513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.169286727905273, "rewards/margins": 13.595429420471191, "rewards/rejected": -18.76471710205078, "step": 5528 }, { "epoch": 8.87, "learning_rate": 7.728894173602854e-09, "logits/chosen": -1.3473258018493652, "logits/rejected": -1.3635883331298828, "logps/chosen": -144.09149169921875, "logps/rejected": -309.547607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.28753662109375, "rewards/margins": 14.876259803771973, "rewards/rejected": -22.163795471191406, "step": 5529 }, { "epoch": 8.88, "learning_rate": 7.629805786761792e-09, "logits/chosen": -1.570701241493225, "logits/rejected": -1.5551426410675049, "logps/chosen": -193.306396484375, "logps/rejected": -317.7586364746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.674365043640137, "rewards/margins": 13.35536003112793, "rewards/rejected": -23.02972412109375, "step": 5530 }, { "epoch": 8.88, "learning_rate": 7.530717399920729e-09, "logits/chosen": -1.3250340223312378, "logits/rejected": -1.3717126846313477, "logps/chosen": -164.00286865234375, "logps/rejected": -339.76922607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.149408340454102, "rewards/margins": 15.273136138916016, "rewards/rejected": -23.422544479370117, "step": 5531 }, { "epoch": 8.88, "learning_rate": 7.431629013079667e-09, "logits/chosen": -1.5440555810928345, "logits/rejected": -1.5173321962356567, "logps/chosen": -149.22607421875, "logps/rejected": -296.1837158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6196393966674805, "rewards/margins": 15.882364273071289, "rewards/rejected": -21.502004623413086, "step": 5532 }, { "epoch": 8.88, "learning_rate": 7.332540626238604e-09, "logits/chosen": -1.4197148084640503, "logits/rejected": -1.522092342376709, "logps/chosen": -151.8905029296875, "logps/rejected": -286.54022216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.587240219116211, "rewards/margins": 12.029026985168457, "rewards/rejected": -19.61626625061035, "step": 5533 }, { "epoch": 8.88, "learning_rate": 7.233452239397542e-09, "logits/chosen": -1.453104853630066, "logits/rejected": -1.4792566299438477, "logps/chosen": -125.20794677734375, "logps/rejected": -306.026611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.250020980834961, "rewards/margins": 18.0235538482666, "rewards/rejected": -23.273574829101562, "step": 5534 }, { "epoch": 8.88, "learning_rate": 7.13436385255648e-09, "logits/chosen": -1.2641032934188843, "logits/rejected": -1.3249813318252563, "logps/chosen": -199.99769592285156, "logps/rejected": -331.6988525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.577983856201172, "rewards/margins": 11.192280769348145, "rewards/rejected": -22.770265579223633, "step": 5535 }, { "epoch": 8.89, "learning_rate": 7.035275465715418e-09, "logits/chosen": -1.3295549154281616, "logits/rejected": -1.4133901596069336, "logps/chosen": -221.13027954101562, "logps/rejected": -353.1766357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.57441234588623, "rewards/margins": 14.887897491455078, "rewards/rejected": -26.462308883666992, "step": 5536 }, { "epoch": 8.89, "learning_rate": 6.936187078874356e-09, "logits/chosen": -1.276961088180542, "logits/rejected": -1.2500523328781128, "logps/chosen": -205.25877380371094, "logps/rejected": -337.3580017089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.073221206665039, "rewards/margins": 16.732133865356445, "rewards/rejected": -24.805355072021484, "step": 5537 }, { "epoch": 8.89, "learning_rate": 6.837098692033293e-09, "logits/chosen": -1.5873289108276367, "logits/rejected": -1.5893609523773193, "logps/chosen": -122.88469696044922, "logps/rejected": -286.1046447753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.05858039855957, "rewards/margins": 16.562393188476562, "rewards/rejected": -20.6209716796875, "step": 5538 }, { "epoch": 8.89, "learning_rate": 6.738010305192231e-09, "logits/chosen": -1.4773001670837402, "logits/rejected": -1.4367588758468628, "logps/chosen": -227.33412170410156, "logps/rejected": -342.91943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.257869720458984, "rewards/margins": 12.835556030273438, "rewards/rejected": -25.093427658081055, "step": 5539 }, { "epoch": 8.89, "learning_rate": 6.638921918351169e-09, "logits/chosen": -1.4713149070739746, "logits/rejected": -1.4697266817092896, "logps/chosen": -145.75970458984375, "logps/rejected": -278.59014892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.746730804443359, "rewards/margins": 13.78388786315918, "rewards/rejected": -20.53061866760254, "step": 5540 }, { "epoch": 8.89, "learning_rate": 6.539833531510106e-09, "logits/chosen": -1.3435224294662476, "logits/rejected": -1.326120138168335, "logps/chosen": -203.05392456054688, "logps/rejected": -347.365478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.239815711975098, "rewards/margins": 13.646158218383789, "rewards/rejected": -25.885974884033203, "step": 5541 }, { "epoch": 8.9, "learning_rate": 6.4407451446690445e-09, "logits/chosen": -1.5499751567840576, "logits/rejected": -1.5833988189697266, "logps/chosen": -170.71823120117188, "logps/rejected": -291.5944519042969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.566473007202148, "rewards/margins": 10.870803833007812, "rewards/rejected": -21.437278747558594, "step": 5542 }, { "epoch": 8.9, "learning_rate": 6.341656757827983e-09, "logits/chosen": -1.4533368349075317, "logits/rejected": -1.4359675645828247, "logps/chosen": -161.0867156982422, "logps/rejected": -267.2300109863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.448526382446289, "rewards/margins": 10.051337242126465, "rewards/rejected": -18.49986457824707, "step": 5543 }, { "epoch": 8.9, "learning_rate": 6.24256837098692e-09, "logits/chosen": -1.348979115486145, "logits/rejected": -1.380164623260498, "logps/chosen": -162.74703979492188, "logps/rejected": -315.3556213378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.217746734619141, "rewards/margins": 16.287269592285156, "rewards/rejected": -22.505016326904297, "step": 5544 }, { "epoch": 8.9, "learning_rate": 6.143479984145858e-09, "logits/chosen": -1.4021108150482178, "logits/rejected": -1.4533365964889526, "logps/chosen": -190.911376953125, "logps/rejected": -346.6524353027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.786212921142578, "rewards/margins": 14.31003189086914, "rewards/rejected": -24.09624481201172, "step": 5545 }, { "epoch": 8.9, "learning_rate": 6.044391597304795e-09, "logits/chosen": -1.3962293863296509, "logits/rejected": -1.4132952690124512, "logps/chosen": -189.43765258789062, "logps/rejected": -362.11334228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.910516738891602, "rewards/margins": 16.944488525390625, "rewards/rejected": -26.855003356933594, "step": 5546 }, { "epoch": 8.9, "learning_rate": 5.945303210463733e-09, "logits/chosen": -1.629744291305542, "logits/rejected": -1.6379599571228027, "logps/chosen": -223.2711181640625, "logps/rejected": -349.8612976074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.427894592285156, "rewards/margins": 13.59115982055664, "rewards/rejected": -24.019054412841797, "step": 5547 }, { "epoch": 8.91, "learning_rate": 5.846214823622672e-09, "logits/chosen": -1.2757009267807007, "logits/rejected": -1.3995387554168701, "logps/chosen": -141.5907440185547, "logps/rejected": -258.3895263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.266718864440918, "rewards/margins": 11.240586280822754, "rewards/rejected": -17.507305145263672, "step": 5548 }, { "epoch": 8.91, "learning_rate": 5.747126436781609e-09, "logits/chosen": -1.5586762428283691, "logits/rejected": -1.5566110610961914, "logps/chosen": -107.01903533935547, "logps/rejected": -233.42617797851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3329362869262695, "rewards/margins": 13.111557960510254, "rewards/rejected": -16.444494247436523, "step": 5549 }, { "epoch": 8.91, "learning_rate": 5.6480380499405465e-09, "logits/chosen": -1.4988871812820435, "logits/rejected": -1.4981805086135864, "logps/chosen": -190.57154846191406, "logps/rejected": -351.7620544433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.43215274810791, "rewards/margins": 16.082725524902344, "rewards/rejected": -24.514881134033203, "step": 5550 }, { "epoch": 8.91, "learning_rate": 5.548949663099485e-09, "logits/chosen": -1.649268388748169, "logits/rejected": -1.5936956405639648, "logps/chosen": -152.95741271972656, "logps/rejected": -314.008056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.201050758361816, "rewards/margins": 17.049192428588867, "rewards/rejected": -22.250242233276367, "step": 5551 }, { "epoch": 8.91, "learning_rate": 5.449861276258422e-09, "logits/chosen": -1.470146656036377, "logits/rejected": -1.45078706741333, "logps/chosen": -158.98272705078125, "logps/rejected": -273.12066650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.780348300933838, "rewards/margins": 12.038002014160156, "rewards/rejected": -17.818349838256836, "step": 5552 }, { "epoch": 8.91, "learning_rate": 5.35077288941736e-09, "logits/chosen": -1.3815776109695435, "logits/rejected": -1.3912830352783203, "logps/chosen": -117.28530883789062, "logps/rejected": -244.11898803710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.505147933959961, "rewards/margins": 12.078999519348145, "rewards/rejected": -16.58414649963379, "step": 5553 }, { "epoch": 8.91, "learning_rate": 5.251684502576298e-09, "logits/chosen": -1.4675558805465698, "logits/rejected": -1.4227486848831177, "logps/chosen": -173.59228515625, "logps/rejected": -293.31854248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.284395217895508, "rewards/margins": 11.588385581970215, "rewards/rejected": -20.872783660888672, "step": 5554 }, { "epoch": 8.92, "learning_rate": 5.152596115735236e-09, "logits/chosen": -1.365803837776184, "logits/rejected": -1.37013578414917, "logps/chosen": -169.20761108398438, "logps/rejected": -282.65252685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.261795043945312, "rewards/margins": 11.028141021728516, "rewards/rejected": -19.289934158325195, "step": 5555 }, { "epoch": 8.92, "learning_rate": 5.0535077288941735e-09, "logits/chosen": -1.8048027753829956, "logits/rejected": -1.7473621368408203, "logps/chosen": -171.9785614013672, "logps/rejected": -308.8185119628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.648723602294922, "rewards/margins": 15.320053100585938, "rewards/rejected": -21.96877670288086, "step": 5556 }, { "epoch": 8.92, "learning_rate": 4.954419342053111e-09, "logits/chosen": -1.6720865964889526, "logits/rejected": -1.6294585466384888, "logps/chosen": -150.77810668945312, "logps/rejected": -261.4216613769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.018840789794922, "rewards/margins": 12.596641540527344, "rewards/rejected": -18.615482330322266, "step": 5557 }, { "epoch": 8.92, "learning_rate": 4.855330955212048e-09, "logits/chosen": -1.3651245832443237, "logits/rejected": -1.4107718467712402, "logps/chosen": -171.5366973876953, "logps/rejected": -318.0671081542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.04908561706543, "rewards/margins": 14.390945434570312, "rewards/rejected": -23.440031051635742, "step": 5558 }, { "epoch": 8.92, "learning_rate": 4.756242568370987e-09, "logits/chosen": -1.3267184495925903, "logits/rejected": -1.4430619478225708, "logps/chosen": -148.0964813232422, "logps/rejected": -279.9290771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.811586380004883, "rewards/margins": 10.503451347351074, "rewards/rejected": -19.315038681030273, "step": 5559 }, { "epoch": 8.92, "learning_rate": 4.657154181529925e-09, "logits/chosen": -1.3373745679855347, "logits/rejected": -1.3786394596099854, "logps/chosen": -181.12130737304688, "logps/rejected": -318.3215637207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.593098640441895, "rewards/margins": 11.806376457214355, "rewards/rejected": -21.39947509765625, "step": 5560 }, { "epoch": 8.93, "learning_rate": 4.558065794688862e-09, "logits/chosen": -1.580721378326416, "logits/rejected": -1.5192993879318237, "logps/chosen": -129.4248046875, "logps/rejected": -270.49114990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.460580348968506, "rewards/margins": 15.458990097045898, "rewards/rejected": -19.919570922851562, "step": 5561 }, { "epoch": 8.93, "learning_rate": 4.4589774078478e-09, "logits/chosen": -1.49764084815979, "logits/rejected": -1.5842139720916748, "logps/chosen": -152.20143127441406, "logps/rejected": -288.29473876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.002392768859863, "rewards/margins": 13.14461898803711, "rewards/rejected": -20.147010803222656, "step": 5562 }, { "epoch": 8.93, "learning_rate": 4.359889021006737e-09, "logits/chosen": -1.626151442527771, "logits/rejected": -1.6294300556182861, "logps/chosen": -92.47596740722656, "logps/rejected": -248.18862915039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.906686544418335, "rewards/margins": 14.758903503417969, "rewards/rejected": -17.665590286254883, "step": 5563 }, { "epoch": 8.93, "learning_rate": 4.2608006341656755e-09, "logits/chosen": -1.373234748840332, "logits/rejected": -1.368178367614746, "logps/chosen": -126.8079605102539, "logps/rejected": -323.01446533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.826100826263428, "rewards/margins": 17.397804260253906, "rewards/rejected": -23.223907470703125, "step": 5564 }, { "epoch": 8.93, "learning_rate": 4.161712247324614e-09, "logits/chosen": -1.472311019897461, "logits/rejected": -1.4220939874649048, "logps/chosen": -139.84767150878906, "logps/rejected": -291.8489990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.948934555053711, "rewards/margins": 14.550586700439453, "rewards/rejected": -21.499521255493164, "step": 5565 }, { "epoch": 8.93, "learning_rate": 4.062623860483551e-09, "logits/chosen": -1.4732526540756226, "logits/rejected": -1.5243475437164307, "logps/chosen": -191.86984252929688, "logps/rejected": -287.5022277832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.804506301879883, "rewards/margins": 10.611030578613281, "rewards/rejected": -17.415536880493164, "step": 5566 }, { "epoch": 8.94, "learning_rate": 3.963535473642489e-09, "logits/chosen": -1.4829237461090088, "logits/rejected": -1.5580952167510986, "logps/chosen": -146.04263305664062, "logps/rejected": -280.3955993652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.363846778869629, "rewards/margins": 11.082490921020508, "rewards/rejected": -18.44633674621582, "step": 5567 }, { "epoch": 8.94, "learning_rate": 3.864447086801427e-09, "logits/chosen": -1.5107572078704834, "logits/rejected": -1.4684278964996338, "logps/chosen": -135.573974609375, "logps/rejected": -282.4186706542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.035415172576904, "rewards/margins": 16.25564193725586, "rewards/rejected": -22.291057586669922, "step": 5568 }, { "epoch": 8.94, "learning_rate": 3.765358699960364e-09, "logits/chosen": -1.3906028270721436, "logits/rejected": -1.3945419788360596, "logps/chosen": -173.60520935058594, "logps/rejected": -303.6537170410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.97482442855835, "rewards/margins": 14.139511108398438, "rewards/rejected": -21.114336013793945, "step": 5569 }, { "epoch": 8.94, "learning_rate": 3.666270313119302e-09, "logits/chosen": -1.4650046825408936, "logits/rejected": -1.4454541206359863, "logps/chosen": -156.11907958984375, "logps/rejected": -286.27685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.076201438903809, "rewards/margins": 13.653706550598145, "rewards/rejected": -19.729907989501953, "step": 5570 }, { "epoch": 8.94, "learning_rate": 3.56718192627824e-09, "logits/chosen": -1.477257490158081, "logits/rejected": -1.4355814456939697, "logps/chosen": -170.29989624023438, "logps/rejected": -291.88916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.768489837646484, "rewards/margins": 12.574531555175781, "rewards/rejected": -21.343021392822266, "step": 5571 }, { "epoch": 8.94, "learning_rate": 3.468093539437178e-09, "logits/chosen": -1.3901575803756714, "logits/rejected": -1.4330360889434814, "logps/chosen": -146.53173828125, "logps/rejected": -340.9048767089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.530895233154297, "rewards/margins": 17.121395111083984, "rewards/rejected": -25.65228843688965, "step": 5572 }, { "epoch": 8.95, "learning_rate": 3.3690051525961157e-09, "logits/chosen": -1.2886419296264648, "logits/rejected": -1.3536070585250854, "logps/chosen": -180.0691680908203, "logps/rejected": -350.9300537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.001100540161133, "rewards/margins": 15.590032577514648, "rewards/rejected": -24.59113311767578, "step": 5573 }, { "epoch": 8.95, "learning_rate": 3.269916765755053e-09, "logits/chosen": -1.4912701845169067, "logits/rejected": -1.483092188835144, "logps/chosen": -169.70684814453125, "logps/rejected": -311.35986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.20219612121582, "rewards/margins": 15.46371841430664, "rewards/rejected": -23.66591453552246, "step": 5574 }, { "epoch": 8.95, "learning_rate": 3.1708283789139914e-09, "logits/chosen": -1.4749550819396973, "logits/rejected": -1.4195890426635742, "logps/chosen": -198.25767517089844, "logps/rejected": -323.0523376464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.985148429870605, "rewards/margins": 13.699481010437012, "rewards/rejected": -23.68463134765625, "step": 5575 }, { "epoch": 8.95, "learning_rate": 3.071739992072929e-09, "logits/chosen": -1.437975525856018, "logits/rejected": -1.3715416193008423, "logps/chosen": -159.55397033691406, "logps/rejected": -296.2626647949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.059662818908691, "rewards/margins": 14.328288078308105, "rewards/rejected": -22.387950897216797, "step": 5576 }, { "epoch": 8.95, "learning_rate": 2.9726516052318667e-09, "logits/chosen": -1.4831945896148682, "logits/rejected": -1.4334884881973267, "logps/chosen": -144.580810546875, "logps/rejected": -300.0434265136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.061241149902344, "rewards/margins": 15.85600471496582, "rewards/rejected": -21.91724395751953, "step": 5577 }, { "epoch": 8.95, "learning_rate": 2.8735632183908045e-09, "logits/chosen": -1.5568534135818481, "logits/rejected": -1.546190857887268, "logps/chosen": -174.35440063476562, "logps/rejected": -305.08575439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.438225746154785, "rewards/margins": 12.976125717163086, "rewards/rejected": -21.414352416992188, "step": 5578 }, { "epoch": 8.96, "learning_rate": 2.7744748315497424e-09, "logits/chosen": -1.5426157712936401, "logits/rejected": -1.527779459953308, "logps/chosen": -205.07852172851562, "logps/rejected": -314.773193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.837130546569824, "rewards/margins": 12.570311546325684, "rewards/rejected": -22.407442092895508, "step": 5579 }, { "epoch": 8.96, "learning_rate": 2.67538644470868e-09, "logits/chosen": -1.5151317119598389, "logits/rejected": -1.4831182956695557, "logps/chosen": -133.38833618164062, "logps/rejected": -304.69268798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.247411251068115, "rewards/margins": 16.467342376708984, "rewards/rejected": -22.714752197265625, "step": 5580 }, { "epoch": 8.96, "learning_rate": 2.576298057867618e-09, "logits/chosen": -1.605851173400879, "logits/rejected": -1.5621271133422852, "logps/chosen": -145.75132751464844, "logps/rejected": -308.599853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.437315464019775, "rewards/margins": 15.989755630493164, "rewards/rejected": -22.42707061767578, "step": 5581 }, { "epoch": 8.96, "learning_rate": 2.4772096710265555e-09, "logits/chosen": -1.4194228649139404, "logits/rejected": -1.440765142440796, "logps/chosen": -193.83885192871094, "logps/rejected": -325.5533447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.937275886535645, "rewards/margins": 12.562469482421875, "rewards/rejected": -22.499744415283203, "step": 5582 }, { "epoch": 8.96, "learning_rate": 2.3781212841854933e-09, "logits/chosen": -1.446515679359436, "logits/rejected": -1.5454959869384766, "logps/chosen": -155.822021484375, "logps/rejected": -311.8072204589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.569726943969727, "rewards/margins": 14.861775398254395, "rewards/rejected": -22.431501388549805, "step": 5583 }, { "epoch": 8.96, "learning_rate": 2.279032897344431e-09, "logits/chosen": -1.3862296342849731, "logits/rejected": -1.4577624797821045, "logps/chosen": -211.242919921875, "logps/rejected": -335.4261169433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.653841018676758, "rewards/margins": 11.934491157531738, "rewards/rejected": -22.588333129882812, "step": 5584 }, { "epoch": 8.96, "learning_rate": 2.1799445105033686e-09, "logits/chosen": -1.6489660739898682, "logits/rejected": -1.6513805389404297, "logps/chosen": -150.65390014648438, "logps/rejected": -311.5079040527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2020673751831055, "rewards/margins": 15.719917297363281, "rewards/rejected": -22.921985626220703, "step": 5585 }, { "epoch": 8.97, "learning_rate": 2.080856123662307e-09, "logits/chosen": -1.2698103189468384, "logits/rejected": -1.2529903650283813, "logps/chosen": -197.02630615234375, "logps/rejected": -307.866455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.570103645324707, "rewards/margins": 11.92452621459961, "rewards/rejected": -23.494630813598633, "step": 5586 }, { "epoch": 8.97, "learning_rate": 1.9817677368212443e-09, "logits/chosen": -1.4508063793182373, "logits/rejected": -1.4695771932601929, "logps/chosen": -127.34912109375, "logps/rejected": -284.9428405761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.792441368103027, "rewards/margins": 16.06023597717285, "rewards/rejected": -20.852678298950195, "step": 5587 }, { "epoch": 8.97, "learning_rate": 1.882679349980182e-09, "logits/chosen": -1.3122761249542236, "logits/rejected": -1.2863131761550903, "logps/chosen": -175.91993713378906, "logps/rejected": -264.55914306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.034013748168945, "rewards/margins": 12.792272567749023, "rewards/rejected": -20.82628631591797, "step": 5588 }, { "epoch": 8.97, "learning_rate": 1.78359096313912e-09, "logits/chosen": -1.6139024496078491, "logits/rejected": -1.6038577556610107, "logps/chosen": -157.0148162841797, "logps/rejected": -273.06488037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.329857349395752, "rewards/margins": 13.040611267089844, "rewards/rejected": -19.37047004699707, "step": 5589 }, { "epoch": 8.97, "learning_rate": 1.6845025762980578e-09, "logits/chosen": -1.5084803104400635, "logits/rejected": -1.502764105796814, "logps/chosen": -182.354248046875, "logps/rejected": -313.660400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.31116008758545, "rewards/margins": 14.6881103515625, "rewards/rejected": -22.999269485473633, "step": 5590 }, { "epoch": 8.97, "learning_rate": 1.5854141894569957e-09, "logits/chosen": -1.684743881225586, "logits/rejected": -1.6472859382629395, "logps/chosen": -140.39602661132812, "logps/rejected": -255.92982482910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5803985595703125, "rewards/margins": 13.090886116027832, "rewards/rejected": -18.671283721923828, "step": 5591 }, { "epoch": 8.98, "learning_rate": 1.4863258026159333e-09, "logits/chosen": -1.3962390422821045, "logits/rejected": -1.4182277917861938, "logps/chosen": -163.5077667236328, "logps/rejected": -267.0746154785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4592742919921875, "rewards/margins": 11.378170013427734, "rewards/rejected": -18.837444305419922, "step": 5592 }, { "epoch": 8.98, "learning_rate": 1.3872374157748712e-09, "logits/chosen": -1.4572205543518066, "logits/rejected": -1.5391861200332642, "logps/chosen": -95.77008056640625, "logps/rejected": -247.80026245117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4822535514831543, "rewards/margins": 15.075722694396973, "rewards/rejected": -18.55797576904297, "step": 5593 }, { "epoch": 8.98, "learning_rate": 1.288149028933809e-09, "logits/chosen": -1.4385879039764404, "logits/rejected": -1.5150814056396484, "logps/chosen": -173.08328247070312, "logps/rejected": -323.7647399902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.58642578125, "rewards/margins": 15.04301643371582, "rewards/rejected": -24.629444122314453, "step": 5594 }, { "epoch": 8.98, "learning_rate": 1.1890606420927467e-09, "logits/chosen": -1.4460622072219849, "logits/rejected": -1.4188734292984009, "logps/chosen": -128.3372344970703, "logps/rejected": -273.07989501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.00007963180542, "rewards/margins": 13.88518238067627, "rewards/rejected": -19.88526153564453, "step": 5595 }, { "epoch": 8.98, "learning_rate": 1.0899722552516843e-09, "logits/chosen": -1.4325439929962158, "logits/rejected": -1.466303825378418, "logps/chosen": -172.8763885498047, "logps/rejected": -296.3722839355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.317228317260742, "rewards/margins": 11.966876029968262, "rewards/rejected": -22.284103393554688, "step": 5596 }, { "epoch": 8.98, "learning_rate": 9.908838684106222e-10, "logits/chosen": -1.4646642208099365, "logits/rejected": -1.49484121799469, "logps/chosen": -133.38018798828125, "logps/rejected": -297.247314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.331569671630859, "rewards/margins": 14.043962478637695, "rewards/rejected": -20.375532150268555, "step": 5597 }, { "epoch": 8.99, "learning_rate": 8.9179548156956e-10, "logits/chosen": -1.6434353590011597, "logits/rejected": -1.576925277709961, "logps/chosen": -180.9561767578125, "logps/rejected": -310.79766845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.983611106872559, "rewards/margins": 14.025321960449219, "rewards/rejected": -22.008934020996094, "step": 5598 }, { "epoch": 8.99, "learning_rate": 7.927070947284978e-10, "logits/chosen": -1.6101654767990112, "logits/rejected": -1.574453353881836, "logps/chosen": -144.18386840820312, "logps/rejected": -272.75811767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9166460037231445, "rewards/margins": 13.731451034545898, "rewards/rejected": -18.648096084594727, "step": 5599 }, { "epoch": 8.99, "learning_rate": 6.936187078874356e-10, "logits/chosen": -1.3635761737823486, "logits/rejected": -1.3398789167404175, "logps/chosen": -156.15357971191406, "logps/rejected": -298.1269226074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.194400787353516, "rewards/margins": 14.788064002990723, "rewards/rejected": -19.982465744018555, "step": 5600 }, { "epoch": 8.99, "learning_rate": 5.945303210463733e-10, "logits/chosen": -1.3833613395690918, "logits/rejected": -1.353509545326233, "logps/chosen": -148.7933349609375, "logps/rejected": -310.07171630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.010641098022461, "rewards/margins": 15.126819610595703, "rewards/rejected": -22.137460708618164, "step": 5601 }, { "epoch": 8.99, "learning_rate": 4.954419342053111e-10, "logits/chosen": -1.438132643699646, "logits/rejected": -1.4972431659698486, "logps/chosen": -138.6864776611328, "logps/rejected": -316.56048583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.831529140472412, "rewards/margins": 17.135868072509766, "rewards/rejected": -22.967397689819336, "step": 5602 }, { "epoch": 8.99, "learning_rate": 3.963535473642489e-10, "logits/chosen": -1.5281355381011963, "logits/rejected": -1.458997130393982, "logps/chosen": -155.76519775390625, "logps/rejected": -291.20855712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.240975379943848, "rewards/margins": 13.647465705871582, "rewards/rejected": -19.88844108581543, "step": 5603 }, { "epoch": 9.0, "learning_rate": 2.9726516052318667e-10, "logits/chosen": -1.405860185623169, "logits/rejected": -1.4780182838439941, "logps/chosen": -152.6331024169922, "logps/rejected": -338.17169189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.931190490722656, "rewards/margins": 16.70195960998535, "rewards/rejected": -24.633150100708008, "step": 5604 }, { "epoch": 9.0, "learning_rate": 1.9817677368212446e-10, "logits/chosen": -1.6140244007110596, "logits/rejected": -1.5737364292144775, "logps/chosen": -160.79434204101562, "logps/rejected": -299.6746826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.942219257354736, "rewards/margins": 16.059476852416992, "rewards/rejected": -21.00169563293457, "step": 5605 }, { "epoch": 9.0, "learning_rate": 9.908838684106223e-11, "logits/chosen": -1.5047757625579834, "logits/rejected": -1.5366731882095337, "logps/chosen": -186.2240447998047, "logps/rejected": -350.0401916503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.321247100830078, "rewards/margins": 15.944011688232422, "rewards/rejected": -26.265260696411133, "step": 5606 }, { "epoch": 9.0, "learning_rate": 0.0, "logits/chosen": -1.2633585929870605, "logits/rejected": -1.3158758878707886, "logps/chosen": -154.55526733398438, "logps/rejected": -312.0606994628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.210287094116211, "rewards/margins": 14.50362777709961, "rewards/rejected": -21.71391487121582, "step": 5607 }, { "epoch": 9.0, "step": 5607, "total_flos": 0.0, "train_loss": 0.12788055668324702, "train_runtime": 21079.0027, "train_samples_per_second": 8.509, "train_steps_per_second": 0.266 } ], "logging_steps": 1.0, "max_steps": 5607, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 10000, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }