diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,78528 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 5607, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 8.9126559714795e-10, + "logits/chosen": -1.1435530185699463, + "logits/rejected": -1.1284090280532837, + "logps/chosen": -80.93795776367188, + "logps/rejected": -82.66487884521484, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.7825311942959e-09, + "logits/chosen": -1.3172465562820435, + "logits/rejected": -1.3799258470535278, + "logps/chosen": -76.21844482421875, + "logps/rejected": -76.22967529296875, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 2.67379679144385e-09, + "logits/chosen": -1.2846455574035645, + "logits/rejected": -1.2674322128295898, + "logps/chosen": -92.08033752441406, + "logps/rejected": -89.7175521850586, + "loss": 0.6921, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0014805792598053813, + "rewards/margins": 0.005074121057987213, + "rewards/rejected": -0.003593540983274579, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 3.5650623885918e-09, + "logits/chosen": -1.1334583759307861, + "logits/rejected": -1.1685543060302734, + "logps/chosen": -65.57213592529297, + "logps/rejected": -87.30081939697266, + "loss": 0.6958, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.005223465617746115, + "rewards/margins": -0.006298257503658533, + "rewards/rejected": 0.001074791420251131, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 4.45632798573975e-09, + "logits/chosen": -1.3780877590179443, + "logits/rejected": -1.3577934503555298, + "logps/chosen": -112.97721099853516, + "logps/rejected": -92.511474609375, + "loss": 0.695, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.011667252518236637, + "rewards/margins": 0.0011396417394280434, + "rewards/rejected": 0.01052760984748602, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 5.3475935828877e-09, + "logits/chosen": -1.1994566917419434, + "logits/rejected": -1.1853225231170654, + "logps/chosen": -91.11537170410156, + "logps/rejected": -77.11100769042969, + "loss": 0.697, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0005447386065497994, + "rewards/margins": -0.008729648776352406, + "rewards/rejected": 0.009274386800825596, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 6.2388591800356504e-09, + "logits/chosen": -1.214829921722412, + "logits/rejected": -1.1547781229019165, + "logps/chosen": -116.8848876953125, + "logps/rejected": -88.86708068847656, + "loss": 0.6952, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013874244876205921, + "rewards/margins": -0.0020969377364963293, + "rewards/rejected": -0.011777305975556374, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 7.1301247771836e-09, + "logits/chosen": -1.3349050283432007, + "logits/rejected": -1.3133201599121094, + "logps/chosen": -92.63496398925781, + "logps/rejected": -84.50946807861328, + "loss": 0.6934, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.013919067569077015, + "rewards/margins": 0.01947193220257759, + "rewards/rejected": -0.005552864633500576, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 8.021390374331551e-09, + "logits/chosen": -1.3035023212432861, + "logits/rejected": -1.2244908809661865, + "logps/chosen": -87.8225326538086, + "logps/rejected": -96.7188491821289, + "loss": 0.6943, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017471885308623314, + "rewards/margins": 0.034188367426395416, + "rewards/rejected": -0.016716480255126953, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 8.9126559714795e-09, + "logits/chosen": -1.2716120481491089, + "logits/rejected": -1.2810933589935303, + "logps/chosen": -75.09244537353516, + "logps/rejected": -81.57750701904297, + "loss": 0.6957, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.02150268666446209, + "rewards/margins": -0.012680627405643463, + "rewards/rejected": -0.008822059258818626, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 9.803921568627451e-09, + "logits/chosen": -1.3337805271148682, + "logits/rejected": -1.3374552726745605, + "logps/chosen": -54.97137451171875, + "logps/rejected": -77.16277313232422, + "loss": 0.6864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017574407160282135, + "rewards/margins": 0.030472660437226295, + "rewards/rejected": -0.01289825513958931, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 1.06951871657754e-08, + "logits/chosen": -1.2354915142059326, + "logits/rejected": -1.2200746536254883, + "logps/chosen": -78.03058624267578, + "logps/rejected": -74.38961029052734, + "loss": 0.6921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02010498195886612, + "rewards/margins": 0.021007061004638672, + "rewards/rejected": -0.0009020804427564144, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 1.1586452762923352e-08, + "logits/chosen": -1.1696809530258179, + "logits/rejected": -1.224806308746338, + "logps/chosen": -53.55621337890625, + "logps/rejected": -65.93765258789062, + "loss": 0.6909, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.004014873411506414, + "rewards/margins": -0.00037641567178070545, + "rewards/rejected": 0.0043912893161177635, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 1.2477718360071301e-08, + "logits/chosen": -1.177738070487976, + "logits/rejected": -1.1873152256011963, + "logps/chosen": -81.28230285644531, + "logps/rejected": -85.71018981933594, + "loss": 0.6928, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.008526802062988281, + "rewards/margins": -0.023273276165127754, + "rewards/rejected": 0.014746475033462048, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 1.336898395721925e-08, + "logits/chosen": -1.131972312927246, + "logits/rejected": -1.2693361043930054, + "logps/chosen": -92.68744659423828, + "logps/rejected": -101.66392517089844, + "loss": 0.692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03864727169275284, + "rewards/margins": 0.026333998888731003, + "rewards/rejected": 0.012313270941376686, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 1.42602495543672e-08, + "logits/chosen": -1.3204665184020996, + "logits/rejected": -1.2761931419372559, + "logps/chosen": -95.74635314941406, + "logps/rejected": -91.4443588256836, + "loss": 0.6891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01901693269610405, + "rewards/margins": 0.03508748859167099, + "rewards/rejected": -0.01607055589556694, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 1.5151515151515152e-08, + "logits/chosen": -1.1829864978790283, + "logits/rejected": -1.2121195793151855, + "logps/chosen": -86.8672103881836, + "logps/rejected": -77.72029113769531, + "loss": 0.6946, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03137397766113281, + "rewards/margins": 0.03347301483154297, + "rewards/rejected": -0.0020990371704101562, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 1.6042780748663103e-08, + "logits/chosen": -1.1455968618392944, + "logits/rejected": -1.1175220012664795, + "logps/chosen": -89.77186584472656, + "logps/rejected": -80.4815444946289, + "loss": 0.6904, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02430114708840847, + "rewards/margins": -0.03043375164270401, + "rewards/rejected": 0.006132603157311678, + "step": 18 + }, + { + "epoch": 0.03, + "learning_rate": 1.693404634581105e-08, + "logits/chosen": -1.1125303506851196, + "logits/rejected": -1.0411198139190674, + "logps/chosen": -74.30352783203125, + "logps/rejected": -74.13288879394531, + "loss": 0.6939, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.031328774988651276, + "rewards/margins": -0.026041794568300247, + "rewards/rejected": -0.005286979489028454, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 1.7825311942959e-08, + "logits/chosen": -1.3786427974700928, + "logits/rejected": -1.3799937963485718, + "logps/chosen": -120.43927764892578, + "logps/rejected": -118.28070068359375, + "loss": 0.6957, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.031014632433652878, + "rewards/margins": -0.018782615661621094, + "rewards/rejected": -0.012232016772031784, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 1.8716577540106948e-08, + "logits/chosen": -1.072481632232666, + "logits/rejected": -1.051990270614624, + "logps/chosen": -73.76265716552734, + "logps/rejected": -83.66194152832031, + "loss": 0.6922, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.003286171006038785, + "rewards/margins": -0.0012664794921875, + "rewards/rejected": -0.0020196912810206413, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 1.9607843137254902e-08, + "logits/chosen": -1.2472436428070068, + "logits/rejected": -1.2484467029571533, + "logps/chosen": -73.8175277709961, + "logps/rejected": -82.17957305908203, + "loss": 0.694, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.00903787650167942, + "rewards/margins": 0.004723548889160156, + "rewards/rejected": 0.004314327612519264, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 2.0499108734402852e-08, + "logits/chosen": -1.4827728271484375, + "logits/rejected": -1.438743233680725, + "logps/chosen": -81.41081237792969, + "logps/rejected": -114.13521575927734, + "loss": 0.6919, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.013235283084213734, + "rewards/margins": 0.01791839674115181, + "rewards/rejected": -0.004683113191276789, + "step": 23 + }, + { + "epoch": 0.04, + "learning_rate": 2.13903743315508e-08, + "logits/chosen": -1.171872854232788, + "logits/rejected": -1.0910282135009766, + "logps/chosen": -112.17322540283203, + "logps/rejected": -94.95963287353516, + "loss": 0.6966, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.006474972702562809, + "rewards/margins": 0.018526744097471237, + "rewards/rejected": -0.02500171586871147, + "step": 24 + }, + { + "epoch": 0.04, + "learning_rate": 2.228163992869875e-08, + "logits/chosen": -1.2558165788650513, + "logits/rejected": -1.2542061805725098, + "logps/chosen": -81.30638122558594, + "logps/rejected": -77.05133056640625, + "loss": 0.6983, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.012335395440459251, + "rewards/margins": -0.027157213538885117, + "rewards/rejected": 0.014821816235780716, + "step": 25 + }, + { + "epoch": 0.04, + "learning_rate": 2.3172905525846704e-08, + "logits/chosen": -1.0553042888641357, + "logits/rejected": -1.1029515266418457, + "logps/chosen": -75.61978149414062, + "logps/rejected": -102.44906616210938, + "loss": 0.6915, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.014333153143525124, + "rewards/margins": 0.02329235151410103, + "rewards/rejected": -0.008959198370575905, + "step": 26 + }, + { + "epoch": 0.04, + "learning_rate": 2.406417112299465e-08, + "logits/chosen": -1.2646524906158447, + "logits/rejected": -1.263723611831665, + "logps/chosen": -69.84374237060547, + "logps/rejected": -64.16584014892578, + "loss": 0.689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03945665434002876, + "rewards/margins": 0.015945911407470703, + "rewards/rejected": 0.02351074293255806, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 2.4955436720142602e-08, + "logits/chosen": -1.2104555368423462, + "logits/rejected": -1.1757853031158447, + "logps/chosen": -74.42176818847656, + "logps/rejected": -86.26324462890625, + "loss": 0.6905, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.024415016174316406, + "rewards/margins": 0.026796627789735794, + "rewards/rejected": -0.002381610684096813, + "step": 28 + }, + { + "epoch": 0.05, + "learning_rate": 2.5846702317290552e-08, + "logits/chosen": -1.2878258228302002, + "logits/rejected": -1.409778118133545, + "logps/chosen": -72.87904357910156, + "logps/rejected": -105.05842590332031, + "loss": 0.6983, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0002737997565418482, + "rewards/margins": -0.029415415599942207, + "rewards/rejected": 0.029141617938876152, + "step": 29 + }, + { + "epoch": 0.05, + "learning_rate": 2.67379679144385e-08, + "logits/chosen": -1.0850152969360352, + "logits/rejected": -1.0075057744979858, + "logps/chosen": -66.09901428222656, + "logps/rejected": -76.13536834716797, + "loss": 0.6985, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.004696083255112171, + "rewards/margins": 0.0047134398482739925, + "rewards/rejected": -0.009409523569047451, + "step": 30 + }, + { + "epoch": 0.05, + "learning_rate": 2.7629233511586453e-08, + "logits/chosen": -1.023692011833191, + "logits/rejected": -0.9985321164131165, + "logps/chosen": -78.41415405273438, + "logps/rejected": -96.77940368652344, + "loss": 0.6927, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0017423636745661497, + "rewards/margins": 0.0020977025851607323, + "rewards/rejected": -0.0003553388814907521, + "step": 31 + }, + { + "epoch": 0.05, + "learning_rate": 2.85204991087344e-08, + "logits/chosen": -1.1439048051834106, + "logits/rejected": -1.19899320602417, + "logps/chosen": -88.75979614257812, + "logps/rejected": -94.55015563964844, + "loss": 0.6981, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.018172454088926315, + "rewards/margins": 0.0387752503156662, + "rewards/rejected": -0.020602799952030182, + "step": 32 + }, + { + "epoch": 0.05, + "learning_rate": 2.941176470588235e-08, + "logits/chosen": -1.3200393915176392, + "logits/rejected": -1.2867811918258667, + "logps/chosen": -92.84091186523438, + "logps/rejected": -73.74278259277344, + "loss": 0.6941, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.008177567273378372, + "rewards/margins": 0.01898803561925888, + "rewards/rejected": -0.010810472071170807, + "step": 33 + }, + { + "epoch": 0.05, + "learning_rate": 3.0303030303030305e-08, + "logits/chosen": -1.2860469818115234, + "logits/rejected": -1.176468849182129, + "logps/chosen": -79.335205078125, + "logps/rejected": -83.57208251953125, + "loss": 0.6928, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0002735140733420849, + "rewards/margins": 0.018741607666015625, + "rewards/rejected": -0.019015122205018997, + "step": 34 + }, + { + "epoch": 0.06, + "learning_rate": 3.119429590017825e-08, + "logits/chosen": -1.3077772855758667, + "logits/rejected": -1.2369978427886963, + "logps/chosen": -94.12866973876953, + "logps/rejected": -87.97264862060547, + "loss": 0.6958, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.022930718958377838, + "rewards/margins": -0.02383594401180744, + "rewards/rejected": 0.0009052273817360401, + "step": 35 + }, + { + "epoch": 0.06, + "learning_rate": 3.2085561497326206e-08, + "logits/chosen": -1.5053722858428955, + "logits/rejected": -1.4844354391098022, + "logps/chosen": -88.12223052978516, + "logps/rejected": -94.58377075195312, + "loss": 0.6896, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0020842552185058594, + "rewards/margins": 0.012209415435791016, + "rewards/rejected": -0.010125160217285156, + "step": 36 + }, + { + "epoch": 0.06, + "learning_rate": 3.297682709447415e-08, + "logits/chosen": -1.3753101825714111, + "logits/rejected": -1.4570457935333252, + "logps/chosen": -93.10310363769531, + "logps/rejected": -114.03001403808594, + "loss": 0.6922, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.012287425808608532, + "rewards/margins": 0.03860483318567276, + "rewards/rejected": -0.026317404583096504, + "step": 37 + }, + { + "epoch": 0.06, + "learning_rate": 3.38680926916221e-08, + "logits/chosen": -1.0623289346694946, + "logits/rejected": -1.0550472736358643, + "logps/chosen": -85.97282409667969, + "logps/rejected": -94.81553649902344, + "loss": 0.6915, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01326742209494114, + "rewards/margins": -0.0038396830204874277, + "rewards/rejected": -0.00942773837596178, + "step": 38 + }, + { + "epoch": 0.06, + "learning_rate": 3.475935828877005e-08, + "logits/chosen": -1.1288137435913086, + "logits/rejected": -1.1795384883880615, + "logps/chosen": -68.82371520996094, + "logps/rejected": -62.92646026611328, + "loss": 0.6972, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01655597612261772, + "rewards/margins": 0.009626006707549095, + "rewards/rejected": -0.026181982830166817, + "step": 39 + }, + { + "epoch": 0.06, + "learning_rate": 3.5650623885918e-08, + "logits/chosen": -1.2615855932235718, + "logits/rejected": -1.3178114891052246, + "logps/chosen": -75.31927490234375, + "logps/rejected": -70.06914520263672, + "loss": 0.6913, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.007126140408217907, + "rewards/margins": -0.01494908519089222, + "rewards/rejected": 0.007822942920029163, + "step": 40 + }, + { + "epoch": 0.07, + "learning_rate": 3.654188948306595e-08, + "logits/chosen": -1.2270495891571045, + "logits/rejected": -1.2540949583053589, + "logps/chosen": -97.13208770751953, + "logps/rejected": -105.10755920410156, + "loss": 0.6936, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.020570946857333183, + "rewards/margins": -0.03218650817871094, + "rewards/rejected": 0.011615563184022903, + "step": 41 + }, + { + "epoch": 0.07, + "learning_rate": 3.7433155080213896e-08, + "logits/chosen": -1.099852442741394, + "logits/rejected": -1.232105016708374, + "logps/chosen": -88.85939025878906, + "logps/rejected": -95.95564270019531, + "loss": 0.6935, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.015971947461366653, + "rewards/margins": 0.0004159929230809212, + "rewards/rejected": 0.015555954538285732, + "step": 42 + }, + { + "epoch": 0.07, + "learning_rate": 3.832442067736185e-08, + "logits/chosen": -1.3121964931488037, + "logits/rejected": -1.3425496816635132, + "logps/chosen": -120.95159912109375, + "logps/rejected": -95.6594009399414, + "loss": 0.6957, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.016383934766054153, + "rewards/margins": -0.013637255877256393, + "rewards/rejected": -0.0027466770261526108, + "step": 43 + }, + { + "epoch": 0.07, + "learning_rate": 3.9215686274509804e-08, + "logits/chosen": -1.1658923625946045, + "logits/rejected": -1.11716890335083, + "logps/chosen": -79.65956115722656, + "logps/rejected": -100.74508666992188, + "loss": 0.6936, + "rewards/accuracies": 0.5, + "rewards/chosen": 9.164831135421991e-05, + "rewards/margins": -0.014169787988066673, + "rewards/rejected": 0.014261436648666859, + "step": 44 + }, + { + "epoch": 0.07, + "learning_rate": 4.0106951871657754e-08, + "logits/chosen": -1.1392419338226318, + "logits/rejected": -1.1202974319458008, + "logps/chosen": -103.25138854980469, + "logps/rejected": -85.0802001953125, + "loss": 0.6896, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01794738881289959, + "rewards/margins": 0.017765045166015625, + "rewards/rejected": 0.00018234271556138992, + "step": 45 + }, + { + "epoch": 0.07, + "learning_rate": 4.0998217468805705e-08, + "logits/chosen": -1.1125216484069824, + "logits/rejected": -1.1562517881393433, + "logps/chosen": -88.44237518310547, + "logps/rejected": -82.15657043457031, + "loss": 0.6945, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.008600997738540173, + "rewards/margins": -0.0014699934981763363, + "rewards/rejected": 0.010070991702377796, + "step": 46 + }, + { + "epoch": 0.08, + "learning_rate": 4.1889483065953655e-08, + "logits/chosen": -1.4318203926086426, + "logits/rejected": -1.4092633724212646, + "logps/chosen": -96.57569122314453, + "logps/rejected": -92.53756713867188, + "loss": 0.689, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013475132174789906, + "rewards/margins": 0.009241009131073952, + "rewards/rejected": 0.0042341239750385284, + "step": 47 + }, + { + "epoch": 0.08, + "learning_rate": 4.27807486631016e-08, + "logits/chosen": -1.3281644582748413, + "logits/rejected": -1.2621127367019653, + "logps/chosen": -106.2555160522461, + "logps/rejected": -62.03329849243164, + "loss": 0.6884, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01718731038272381, + "rewards/margins": 0.006827164441347122, + "rewards/rejected": -0.02401447296142578, + "step": 48 + }, + { + "epoch": 0.08, + "learning_rate": 4.367201426024955e-08, + "logits/chosen": -0.8626635074615479, + "logits/rejected": -0.9543948173522949, + "logps/chosen": -107.60574340820312, + "logps/rejected": -87.0836410522461, + "loss": 0.6903, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00990066584199667, + "rewards/margins": -0.016774367541074753, + "rewards/rejected": 0.006873703561723232, + "step": 49 + }, + { + "epoch": 0.08, + "learning_rate": 4.45632798573975e-08, + "logits/chosen": -1.2819422483444214, + "logits/rejected": -1.2799181938171387, + "logps/chosen": -81.89445495605469, + "logps/rejected": -75.72430419921875, + "loss": 0.6953, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.007124138064682484, + "rewards/margins": -0.01375818345695734, + "rewards/rejected": 0.006634045392274857, + "step": 50 + }, + { + "epoch": 0.08, + "learning_rate": 4.545454545454545e-08, + "logits/chosen": -1.1933066844940186, + "logits/rejected": -1.20717191696167, + "logps/chosen": -105.99375915527344, + "logps/rejected": -94.26728820800781, + "loss": 0.6908, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.05494384467601776, + "rewards/margins": 0.022523686289787292, + "rewards/rejected": 0.03242015838623047, + "step": 51 + }, + { + "epoch": 0.08, + "learning_rate": 4.634581105169341e-08, + "logits/chosen": -1.120680809020996, + "logits/rejected": -1.26053786277771, + "logps/chosen": -64.94696807861328, + "logps/rejected": -67.43417358398438, + "loss": 0.6895, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.018457412719726562, + "rewards/margins": 0.03609037399291992, + "rewards/rejected": -0.01763296127319336, + "step": 52 + }, + { + "epoch": 0.09, + "learning_rate": 4.723707664884135e-08, + "logits/chosen": -1.1840916872024536, + "logits/rejected": -1.2131097316741943, + "logps/chosen": -71.52344512939453, + "logps/rejected": -62.33109664916992, + "loss": 0.6948, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.019170094281435013, + "rewards/margins": 0.022496510297060013, + "rewards/rejected": -0.0033264162484556437, + "step": 53 + }, + { + "epoch": 0.09, + "learning_rate": 4.81283422459893e-08, + "logits/chosen": -1.202501654624939, + "logits/rejected": -1.224836826324463, + "logps/chosen": -72.78712463378906, + "logps/rejected": -66.86026000976562, + "loss": 0.7008, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.012299585156142712, + "rewards/margins": -0.004205942153930664, + "rewards/rejected": 0.01650552824139595, + "step": 54 + }, + { + "epoch": 0.09, + "learning_rate": 4.901960784313725e-08, + "logits/chosen": -1.2900843620300293, + "logits/rejected": -1.295175313949585, + "logps/chosen": -87.58349609375, + "logps/rejected": -96.93292999267578, + "loss": 0.691, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0020314217545092106, + "rewards/margins": -0.009500597603619099, + "rewards/rejected": 0.011532019823789597, + "step": 55 + }, + { + "epoch": 0.09, + "learning_rate": 4.9910873440285203e-08, + "logits/chosen": -1.2029945850372314, + "logits/rejected": -1.1422687768936157, + "logps/chosen": -84.79794311523438, + "logps/rejected": -100.21464538574219, + "loss": 0.6949, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.006718254182487726, + "rewards/margins": -0.00048532476648688316, + "rewards/rejected": 0.007203578948974609, + "step": 56 + }, + { + "epoch": 0.09, + "learning_rate": 5.0802139037433154e-08, + "logits/chosen": -1.20074462890625, + "logits/rejected": -1.2153781652450562, + "logps/chosen": -79.45578002929688, + "logps/rejected": -78.18204498291016, + "loss": 0.697, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0031892764382064342, + "rewards/margins": -0.0035653107333928347, + "rewards/rejected": 0.00037603359669446945, + "step": 57 + }, + { + "epoch": 0.09, + "learning_rate": 5.1693404634581104e-08, + "logits/chosen": -1.1200675964355469, + "logits/rejected": -1.1578197479248047, + "logps/chosen": -75.36161804199219, + "logps/rejected": -93.00215911865234, + "loss": 0.6916, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.005454349331557751, + "rewards/margins": 0.011344622820615768, + "rewards/rejected": -0.016798973083496094, + "step": 58 + }, + { + "epoch": 0.09, + "learning_rate": 5.258467023172905e-08, + "logits/chosen": -1.4972758293151855, + "logits/rejected": -1.517890214920044, + "logps/chosen": -73.85816192626953, + "logps/rejected": -91.58338928222656, + "loss": 0.6976, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.009135151281952858, + "rewards/margins": -0.0029885279946029186, + "rewards/rejected": -0.006146621890366077, + "step": 59 + }, + { + "epoch": 0.1, + "learning_rate": 5.3475935828877e-08, + "logits/chosen": -1.1440531015396118, + "logits/rejected": -1.2267813682556152, + "logps/chosen": -78.04267883300781, + "logps/rejected": -75.09068298339844, + "loss": 0.6968, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0037278179079294205, + "rewards/margins": 0.0017625801265239716, + "rewards/rejected": -0.005490398034453392, + "step": 60 + }, + { + "epoch": 0.1, + "learning_rate": 5.4367201426024956e-08, + "logits/chosen": -1.3389140367507935, + "logits/rejected": -1.2966046333312988, + "logps/chosen": -111.0379867553711, + "logps/rejected": -92.76790618896484, + "loss": 0.6947, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.009166527539491653, + "rewards/margins": -0.0020053863991051912, + "rewards/rejected": -0.007161140441894531, + "step": 61 + }, + { + "epoch": 0.1, + "learning_rate": 5.5258467023172907e-08, + "logits/chosen": -1.1153532266616821, + "logits/rejected": -1.08543860912323, + "logps/chosen": -73.2496337890625, + "logps/rejected": -69.35696411132812, + "loss": 0.6946, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.010410403832793236, + "rewards/margins": 0.0034862528555095196, + "rewards/rejected": -0.013896657153964043, + "step": 62 + }, + { + "epoch": 0.1, + "learning_rate": 5.614973262032086e-08, + "logits/chosen": -1.1505615711212158, + "logits/rejected": -1.1815423965454102, + "logps/chosen": -81.41874694824219, + "logps/rejected": -80.91473388671875, + "loss": 0.6928, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0011878013610839844, + "rewards/margins": 0.016660023480653763, + "rewards/rejected": -0.015472222119569778, + "step": 63 + }, + { + "epoch": 0.1, + "learning_rate": 5.70409982174688e-08, + "logits/chosen": -1.2964575290679932, + "logits/rejected": -1.2357640266418457, + "logps/chosen": -76.82340240478516, + "logps/rejected": -81.20909881591797, + "loss": 0.6943, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.012250232510268688, + "rewards/margins": -0.016117095947265625, + "rewards/rejected": 0.003866863902658224, + "step": 64 + }, + { + "epoch": 0.1, + "learning_rate": 5.793226381461675e-08, + "logits/chosen": -1.2380067110061646, + "logits/rejected": -1.1993043422698975, + "logps/chosen": -84.76339721679688, + "logps/rejected": -101.11196899414062, + "loss": 0.696, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0003474232507869601, + "rewards/margins": -0.007950305938720703, + "rewards/rejected": 0.008297729305922985, + "step": 65 + }, + { + "epoch": 0.11, + "learning_rate": 5.88235294117647e-08, + "logits/chosen": -1.3593496084213257, + "logits/rejected": -1.4487707614898682, + "logps/chosen": -84.22575378417969, + "logps/rejected": -76.25640869140625, + "loss": 0.6904, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.008716392330825329, + "rewards/margins": -0.010149955749511719, + "rewards/rejected": 0.0014335631858557463, + "step": 66 + }, + { + "epoch": 0.11, + "learning_rate": 5.971479500891265e-08, + "logits/chosen": -1.190024733543396, + "logits/rejected": -1.196122407913208, + "logps/chosen": -72.794677734375, + "logps/rejected": -77.54939270019531, + "loss": 0.6928, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.009924030862748623, + "rewards/margins": 8.163508027791977e-05, + "rewards/rejected": 0.009842395782470703, + "step": 67 + }, + { + "epoch": 0.11, + "learning_rate": 6.060606060606061e-08, + "logits/chosen": -1.1421548128128052, + "logits/rejected": -1.1087861061096191, + "logps/chosen": -97.81107330322266, + "logps/rejected": -84.20929718017578, + "loss": 0.6926, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.005739974789321423, + "rewards/margins": -0.01303710974752903, + "rewards/rejected": 0.00729713449254632, + "step": 68 + }, + { + "epoch": 0.11, + "learning_rate": 6.149732620320855e-08, + "logits/chosen": -1.3280189037322998, + "logits/rejected": -1.3729376792907715, + "logps/chosen": -71.10903930664062, + "logps/rejected": -93.05481719970703, + "loss": 0.6936, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.004936314187943935, + "rewards/margins": 0.0032421117648482323, + "rewards/rejected": 0.0016942024230957031, + "step": 69 + }, + { + "epoch": 0.11, + "learning_rate": 6.23885918003565e-08, + "logits/chosen": -1.2318353652954102, + "logits/rejected": -1.2696576118469238, + "logps/chosen": -87.1533203125, + "logps/rejected": -79.95406341552734, + "loss": 0.6946, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.008516121655702591, + "rewards/margins": -0.006702518090605736, + "rewards/rejected": -0.0018136021681129932, + "step": 70 + }, + { + "epoch": 0.11, + "learning_rate": 6.327985739750445e-08, + "logits/chosen": -1.1851850748062134, + "logits/rejected": -1.1961557865142822, + "logps/chosen": -88.64912414550781, + "logps/rejected": -97.94415283203125, + "loss": 0.6984, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.019295310601592064, + "rewards/margins": -0.014165686443448067, + "rewards/rejected": -0.00512962369248271, + "step": 71 + }, + { + "epoch": 0.12, + "learning_rate": 6.417112299465241e-08, + "logits/chosen": -1.3361687660217285, + "logits/rejected": -1.2918274402618408, + "logps/chosen": -72.51200866699219, + "logps/rejected": -94.09864044189453, + "loss": 0.6945, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.002146434970200062, + "rewards/margins": -0.007274341303855181, + "rewards/rejected": 0.00942077673971653, + "step": 72 + }, + { + "epoch": 0.12, + "learning_rate": 6.506238859180036e-08, + "logits/chosen": -1.173933982849121, + "logits/rejected": -1.243645191192627, + "logps/chosen": -98.51423645019531, + "logps/rejected": -77.10397338867188, + "loss": 0.6948, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013683319091796875, + "rewards/margins": 0.0037531850393861532, + "rewards/rejected": 0.009930133819580078, + "step": 73 + }, + { + "epoch": 0.12, + "learning_rate": 6.59536541889483e-08, + "logits/chosen": -1.320555329322815, + "logits/rejected": -1.2917400598526, + "logps/chosen": -100.91671752929688, + "logps/rejected": -86.16136169433594, + "loss": 0.6944, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.022674178704619408, + "rewards/margins": -0.011483382433652878, + "rewards/rejected": -0.01119079627096653, + "step": 74 + }, + { + "epoch": 0.12, + "learning_rate": 6.684491978609626e-08, + "logits/chosen": -1.1561110019683838, + "logits/rejected": -1.2179300785064697, + "logps/chosen": -76.69493103027344, + "logps/rejected": -73.7353286743164, + "loss": 0.7003, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.000678634736686945, + "rewards/margins": 0.00713272113353014, + "rewards/rejected": -0.006454085931181908, + "step": 75 + }, + { + "epoch": 0.12, + "learning_rate": 6.77361853832442e-08, + "logits/chosen": -0.9949007034301758, + "logits/rejected": -1.088796854019165, + "logps/chosen": -73.66630554199219, + "logps/rejected": -88.02740478515625, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.016364479437470436, + "rewards/margins": 0.0032363897189497948, + "rewards/rejected": 0.013128089718520641, + "step": 76 + }, + { + "epoch": 0.12, + "learning_rate": 6.862745098039216e-08, + "logits/chosen": -1.1558034420013428, + "logits/rejected": -1.074524164199829, + "logps/chosen": -92.88443756103516, + "logps/rejected": -97.43186950683594, + "loss": 0.6871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012816810049116611, + "rewards/margins": 0.026808548718690872, + "rewards/rejected": -0.013991737738251686, + "step": 77 + }, + { + "epoch": 0.13, + "learning_rate": 6.95187165775401e-08, + "logits/chosen": -1.161596417427063, + "logits/rejected": -1.1057379245758057, + "logps/chosen": -111.82357788085938, + "logps/rejected": -101.24547576904297, + "loss": 0.69, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.016997529193758965, + "rewards/margins": -0.002710532397031784, + "rewards/rejected": -0.014286995865404606, + "step": 78 + }, + { + "epoch": 0.13, + "learning_rate": 7.040998217468805e-08, + "logits/chosen": -1.1989147663116455, + "logits/rejected": -1.229402780532837, + "logps/chosen": -102.84278106689453, + "logps/rejected": -109.17547607421875, + "loss": 0.6888, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0013053894508630037, + "rewards/margins": -0.008565520867705345, + "rewards/rejected": 0.007260132115334272, + "step": 79 + }, + { + "epoch": 0.13, + "learning_rate": 7.1301247771836e-08, + "logits/chosen": -1.3043824434280396, + "logits/rejected": -1.3470394611358643, + "logps/chosen": -101.53778076171875, + "logps/rejected": -98.79399871826172, + "loss": 0.7023, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.026903916150331497, + "rewards/margins": -0.04309577867388725, + "rewards/rejected": 0.016191862523555756, + "step": 80 + }, + { + "epoch": 0.13, + "learning_rate": 7.219251336898395e-08, + "logits/chosen": -1.2793223857879639, + "logits/rejected": -1.3029398918151855, + "logps/chosen": -102.49290466308594, + "logps/rejected": -95.17884826660156, + "loss": 0.6945, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.008845615200698376, + "rewards/margins": 0.008823300711810589, + "rewards/rejected": -0.017668915912508965, + "step": 81 + }, + { + "epoch": 0.13, + "learning_rate": 7.30837789661319e-08, + "logits/chosen": -1.2792365550994873, + "logits/rejected": -1.343281626701355, + "logps/chosen": -83.31664276123047, + "logps/rejected": -81.52680969238281, + "loss": 0.6909, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.00019426352810114622, + "rewards/margins": -0.009444141760468483, + "rewards/rejected": 0.009638404473662376, + "step": 82 + }, + { + "epoch": 0.13, + "learning_rate": 7.397504456327985e-08, + "logits/chosen": -1.191178798675537, + "logits/rejected": -1.2203766107559204, + "logps/chosen": -99.72430419921875, + "logps/rejected": -99.75408935546875, + "loss": 0.6925, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.013767242431640625, + "rewards/margins": -0.015200234018266201, + "rewards/rejected": 0.0014329918194562197, + "step": 83 + }, + { + "epoch": 0.13, + "learning_rate": 7.486631016042779e-08, + "logits/chosen": -1.2354995012283325, + "logits/rejected": -1.320689082145691, + "logps/chosen": -83.50981140136719, + "logps/rejected": -93.85690307617188, + "loss": 0.6841, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.012112140655517578, + "rewards/margins": 0.022345637902617455, + "rewards/rejected": -0.010233497247099876, + "step": 84 + }, + { + "epoch": 0.14, + "learning_rate": 7.575757575757576e-08, + "logits/chosen": -1.0114192962646484, + "logits/rejected": -1.1006252765655518, + "logps/chosen": -61.200103759765625, + "logps/rejected": -76.0113525390625, + "loss": 0.6963, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0027909281197935343, + "rewards/margins": 0.007663249969482422, + "rewards/rejected": -0.010454177856445312, + "step": 85 + }, + { + "epoch": 0.14, + "learning_rate": 7.66488413547237e-08, + "logits/chosen": -1.3288710117340088, + "logits/rejected": -1.2539799213409424, + "logps/chosen": -65.80592346191406, + "logps/rejected": -63.5286865234375, + "loss": 0.6875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0201434139162302, + "rewards/margins": 0.023700810968875885, + "rewards/rejected": -0.0035573961213231087, + "step": 86 + }, + { + "epoch": 0.14, + "learning_rate": 7.754010695187166e-08, + "logits/chosen": -1.0129657983779907, + "logits/rejected": -1.0055696964263916, + "logps/chosen": -51.792137145996094, + "logps/rejected": -69.52151489257812, + "loss": 0.6963, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014261627569794655, + "rewards/margins": -0.006102276034653187, + "rewards/rejected": -0.008159352466464043, + "step": 87 + }, + { + "epoch": 0.14, + "learning_rate": 7.843137254901961e-08, + "logits/chosen": -1.0171966552734375, + "logits/rejected": -0.9589600563049316, + "logps/chosen": -89.1555404663086, + "logps/rejected": -84.46793365478516, + "loss": 0.6917, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.004412651062011719, + "rewards/margins": -0.008263587020337582, + "rewards/rejected": 0.0038509368896484375, + "step": 88 + }, + { + "epoch": 0.14, + "learning_rate": 7.932263814616755e-08, + "logits/chosen": -1.095560908317566, + "logits/rejected": -1.1174005270004272, + "logps/chosen": -84.72117614746094, + "logps/rejected": -98.22677612304688, + "loss": 0.6961, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00022869138047099113, + "rewards/margins": 0.0016847611404955387, + "rewards/rejected": -0.0014560702256858349, + "step": 89 + }, + { + "epoch": 0.14, + "learning_rate": 8.021390374331551e-08, + "logits/chosen": -1.3894323110580444, + "logits/rejected": -1.3977967500686646, + "logps/chosen": -91.18753051757812, + "logps/rejected": -86.59329223632812, + "loss": 0.6908, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.036875344812870026, + "rewards/margins": 0.044654086232185364, + "rewards/rejected": -0.007778740022331476, + "step": 90 + }, + { + "epoch": 0.15, + "learning_rate": 8.110516934046345e-08, + "logits/chosen": -1.090181589126587, + "logits/rejected": -1.0431842803955078, + "logps/chosen": -72.19103240966797, + "logps/rejected": -67.673095703125, + "loss": 0.6954, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0002225879579782486, + "rewards/margins": -0.009957600384950638, + "rewards/rejected": 0.010180187411606312, + "step": 91 + }, + { + "epoch": 0.15, + "learning_rate": 8.199643493761141e-08, + "logits/chosen": -1.1848105192184448, + "logits/rejected": -1.1699137687683105, + "logps/chosen": -82.49169158935547, + "logps/rejected": -78.44300079345703, + "loss": 0.6897, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.01538839377462864, + "rewards/margins": -0.03306303173303604, + "rewards/rejected": 0.017674636095762253, + "step": 92 + }, + { + "epoch": 0.15, + "learning_rate": 8.288770053475935e-08, + "logits/chosen": -1.3438172340393066, + "logits/rejected": -1.2754483222961426, + "logps/chosen": -78.58428192138672, + "logps/rejected": -101.5166244506836, + "loss": 0.6904, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013726996257901192, + "rewards/margins": -0.02110939286649227, + "rewards/rejected": 0.007382392883300781, + "step": 93 + }, + { + "epoch": 0.15, + "learning_rate": 8.377896613190731e-08, + "logits/chosen": -1.277827262878418, + "logits/rejected": -1.3513429164886475, + "logps/chosen": -65.3743896484375, + "logps/rejected": -80.37142944335938, + "loss": 0.6947, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.005968189332634211, + "rewards/margins": -0.0009260177612304688, + "rewards/rejected": 0.006894207559525967, + "step": 94 + }, + { + "epoch": 0.15, + "learning_rate": 8.467023172905525e-08, + "logits/chosen": -1.2165343761444092, + "logits/rejected": -1.132431983947754, + "logps/chosen": -59.45248794555664, + "logps/rejected": -70.6431884765625, + "loss": 0.6902, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.014494800940155983, + "rewards/margins": -0.03155374526977539, + "rewards/rejected": 0.017058944329619408, + "step": 95 + }, + { + "epoch": 0.15, + "learning_rate": 8.55614973262032e-08, + "logits/chosen": -1.2969179153442383, + "logits/rejected": -1.3179165124893188, + "logps/chosen": -48.20478439331055, + "logps/rejected": -67.55217742919922, + "loss": 0.6981, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.010920858941972256, + "rewards/margins": -0.016322661191225052, + "rewards/rejected": 0.005401803180575371, + "step": 96 + }, + { + "epoch": 0.16, + "learning_rate": 8.645276292335116e-08, + "logits/chosen": -1.0689046382904053, + "logits/rejected": -1.1024280786514282, + "logps/chosen": -123.50025939941406, + "logps/rejected": -101.05804443359375, + "loss": 0.6934, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.021617889404296875, + "rewards/margins": 0.003183746710419655, + "rewards/rejected": 0.01843414269387722, + "step": 97 + }, + { + "epoch": 0.16, + "learning_rate": 8.73440285204991e-08, + "logits/chosen": -1.0501301288604736, + "logits/rejected": -1.1594352722167969, + "logps/chosen": -81.53727722167969, + "logps/rejected": -104.27888488769531, + "loss": 0.6938, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.006105995737016201, + "rewards/margins": -0.017554093152284622, + "rewards/rejected": 0.023660089820623398, + "step": 98 + }, + { + "epoch": 0.16, + "learning_rate": 8.823529411764706e-08, + "logits/chosen": -1.2999415397644043, + "logits/rejected": -1.309904932975769, + "logps/chosen": -68.355712890625, + "logps/rejected": -83.71822357177734, + "loss": 0.6954, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0024814605712890625, + "rewards/margins": -0.011721325106918812, + "rewards/rejected": 0.00923986453562975, + "step": 99 + }, + { + "epoch": 0.16, + "learning_rate": 8.9126559714795e-08, + "logits/chosen": -1.1235615015029907, + "logits/rejected": -1.137649416923523, + "logps/chosen": -94.91709899902344, + "logps/rejected": -105.24687194824219, + "loss": 0.6904, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0030929564964026213, + "rewards/margins": 0.007976721972227097, + "rewards/rejected": -0.004883766174316406, + "step": 100 + }, + { + "epoch": 0.16, + "learning_rate": 9.001782531194294e-08, + "logits/chosen": -1.1797455549240112, + "logits/rejected": -1.1092474460601807, + "logps/chosen": -84.97235107421875, + "logps/rejected": -67.17857360839844, + "loss": 0.6977, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.017327498644590378, + "rewards/margins": 0.028324604034423828, + "rewards/rejected": -0.010997104458510876, + "step": 101 + }, + { + "epoch": 0.16, + "learning_rate": 9.09090909090909e-08, + "logits/chosen": -1.191285490989685, + "logits/rejected": -1.1985759735107422, + "logps/chosen": -82.893310546875, + "logps/rejected": -72.94395446777344, + "loss": 0.6897, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.007577705197036266, + "rewards/margins": 0.015474701300263405, + "rewards/rejected": -0.023052407428622246, + "step": 102 + }, + { + "epoch": 0.17, + "learning_rate": 9.180035650623885e-08, + "logits/chosen": -1.247119426727295, + "logits/rejected": -1.1441011428833008, + "logps/chosen": -72.40908813476562, + "logps/rejected": -86.26667785644531, + "loss": 0.699, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.01435003336519003, + "rewards/margins": -0.0020779615733772516, + "rewards/rejected": 0.016427993774414062, + "step": 103 + }, + { + "epoch": 0.17, + "learning_rate": 9.269162210338682e-08, + "logits/chosen": -1.253834843635559, + "logits/rejected": -1.2230826616287231, + "logps/chosen": -60.051551818847656, + "logps/rejected": -66.02203369140625, + "loss": 0.6903, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.006067848764359951, + "rewards/margins": 0.010862255468964577, + "rewards/rejected": -0.004794406238943338, + "step": 104 + }, + { + "epoch": 0.17, + "learning_rate": 9.358288770053476e-08, + "logits/chosen": -1.2417004108428955, + "logits/rejected": -1.2845187187194824, + "logps/chosen": -71.45110321044922, + "logps/rejected": -75.50022888183594, + "loss": 0.6933, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0034451482351869345, + "rewards/margins": -0.006827355362474918, + "rewards/rejected": 0.0033822059631347656, + "step": 105 + }, + { + "epoch": 0.17, + "learning_rate": 9.44741532976827e-08, + "logits/chosen": -1.2560675144195557, + "logits/rejected": -1.3176476955413818, + "logps/chosen": -72.40788269042969, + "logps/rejected": -65.07300567626953, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.007790183648467064, + "rewards/margins": 0.002993679838255048, + "rewards/rejected": 0.004796504974365234, + "step": 106 + }, + { + "epoch": 0.17, + "learning_rate": 9.536541889483066e-08, + "logits/chosen": -1.268344521522522, + "logits/rejected": -1.2234901189804077, + "logps/chosen": -96.00572204589844, + "logps/rejected": -87.59380340576172, + "loss": 0.695, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.010178660973906517, + "rewards/margins": -0.0005763047374784946, + "rewards/rejected": -0.00960235670208931, + "step": 107 + }, + { + "epoch": 0.17, + "learning_rate": 9.62566844919786e-08, + "logits/chosen": -0.9771088361740112, + "logits/rejected": -1.0272163152694702, + "logps/chosen": -89.22775268554688, + "logps/rejected": -75.48786163330078, + "loss": 0.6916, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.006131744012236595, + "rewards/margins": -0.019008636474609375, + "rewards/rejected": 0.01287689246237278, + "step": 108 + }, + { + "epoch": 0.17, + "learning_rate": 9.714795008912656e-08, + "logits/chosen": -1.2823981046676636, + "logits/rejected": -1.2501758337020874, + "logps/chosen": -121.24838256835938, + "logps/rejected": -106.70596313476562, + "loss": 0.696, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.013632584363222122, + "rewards/margins": 0.009738731198012829, + "rewards/rejected": 0.003893852699548006, + "step": 109 + }, + { + "epoch": 0.18, + "learning_rate": 9.80392156862745e-08, + "logits/chosen": -1.1504831314086914, + "logits/rejected": -1.1097204685211182, + "logps/chosen": -58.84628677368164, + "logps/rejected": -70.464111328125, + "loss": 0.6953, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01100912131369114, + "rewards/margins": -0.021300887688994408, + "rewards/rejected": 0.010291767306625843, + "step": 110 + }, + { + "epoch": 0.18, + "learning_rate": 9.893048128342246e-08, + "logits/chosen": -1.1563762426376343, + "logits/rejected": -1.16436767578125, + "logps/chosen": -86.93107604980469, + "logps/rejected": -77.52449035644531, + "loss": 0.6934, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00881652906537056, + "rewards/margins": -0.02929544448852539, + "rewards/rejected": 0.02047891542315483, + "step": 111 + }, + { + "epoch": 0.18, + "learning_rate": 9.982174688057041e-08, + "logits/chosen": -1.1488101482391357, + "logits/rejected": -1.084565281867981, + "logps/chosen": -73.11860656738281, + "logps/rejected": -71.21179962158203, + "loss": 0.6944, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.018657494336366653, + "rewards/margins": -0.04843597859144211, + "rewards/rejected": 0.029778478667140007, + "step": 112 + }, + { + "epoch": 0.18, + "learning_rate": 1.0071301247771835e-07, + "logits/chosen": -1.1814026832580566, + "logits/rejected": -1.1518394947052002, + "logps/chosen": -73.14173889160156, + "logps/rejected": -64.669921875, + "loss": 0.6942, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.015314674004912376, + "rewards/margins": -0.011362170800566673, + "rewards/rejected": -0.003952503204345703, + "step": 113 + }, + { + "epoch": 0.18, + "learning_rate": 1.0160427807486631e-07, + "logits/chosen": -1.1952779293060303, + "logits/rejected": -1.2421295642852783, + "logps/chosen": -79.21939086914062, + "logps/rejected": -65.20329284667969, + "loss": 0.6868, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.002678298857063055, + "rewards/margins": 0.008234024979174137, + "rewards/rejected": -0.01091232243925333, + "step": 114 + }, + { + "epoch": 0.18, + "learning_rate": 1.0249554367201425e-07, + "logits/chosen": -1.3426727056503296, + "logits/rejected": -1.3321638107299805, + "logps/chosen": -90.40554809570312, + "logps/rejected": -81.71943664550781, + "loss": 0.6888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0161882396787405, + "rewards/margins": 0.03692302852869034, + "rewards/rejected": -0.020734786987304688, + "step": 115 + }, + { + "epoch": 0.19, + "learning_rate": 1.0338680926916221e-07, + "logits/chosen": -1.1115459203720093, + "logits/rejected": -1.146256685256958, + "logps/chosen": -80.80537414550781, + "logps/rejected": -65.19287109375, + "loss": 0.6855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02475748211145401, + "rewards/margins": 0.018207073211669922, + "rewards/rejected": 0.0065504079684615135, + "step": 116 + }, + { + "epoch": 0.19, + "learning_rate": 1.0427807486631015e-07, + "logits/chosen": -1.0816545486450195, + "logits/rejected": -1.0843591690063477, + "logps/chosen": -73.26416015625, + "logps/rejected": -81.66397094726562, + "loss": 0.6944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027614975348114967, + "rewards/margins": 0.03924961015582085, + "rewards/rejected": -0.011634635739028454, + "step": 117 + }, + { + "epoch": 0.19, + "learning_rate": 1.051693404634581e-07, + "logits/chosen": -1.1614558696746826, + "logits/rejected": -1.1129220724105835, + "logps/chosen": -82.39322662353516, + "logps/rejected": -84.50247955322266, + "loss": 0.6958, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.002945899497717619, + "rewards/margins": -0.011921119876205921, + "rewards/rejected": 0.008975219912827015, + "step": 118 + }, + { + "epoch": 0.19, + "learning_rate": 1.0606060606060605e-07, + "logits/chosen": -1.004603385925293, + "logits/rejected": -1.0580523014068604, + "logps/chosen": -87.60785675048828, + "logps/rejected": -82.76498413085938, + "loss": 0.6884, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.002479648683220148, + "rewards/margins": 0.006744003854691982, + "rewards/rejected": -0.0042643556371331215, + "step": 119 + }, + { + "epoch": 0.19, + "learning_rate": 1.06951871657754e-07, + "logits/chosen": -1.127279281616211, + "logits/rejected": -1.243972659111023, + "logps/chosen": -98.51547241210938, + "logps/rejected": -73.45686340332031, + "loss": 0.6975, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.006734561175107956, + "rewards/margins": 0.0008148197084665298, + "rewards/rejected": 0.005919742397964001, + "step": 120 + }, + { + "epoch": 0.19, + "learning_rate": 1.0784313725490195e-07, + "logits/chosen": -1.1682674884796143, + "logits/rejected": -1.2230191230773926, + "logps/chosen": -92.4317626953125, + "logps/rejected": -68.97154998779297, + "loss": 0.6926, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.014802360907196999, + "rewards/margins": -0.008460521697998047, + "rewards/rejected": -0.006341838743537664, + "step": 121 + }, + { + "epoch": 0.2, + "learning_rate": 1.0873440285204991e-07, + "logits/chosen": -1.3279443979263306, + "logits/rejected": -1.3496955633163452, + "logps/chosen": -96.37141418457031, + "logps/rejected": -83.9437255859375, + "loss": 0.6916, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.022973060607910156, + "rewards/margins": 0.009210778400301933, + "rewards/rejected": 0.013762283138930798, + "step": 122 + }, + { + "epoch": 0.2, + "learning_rate": 1.0962566844919786e-07, + "logits/chosen": -1.1670727729797363, + "logits/rejected": -1.1303013563156128, + "logps/chosen": -89.6489028930664, + "logps/rejected": -90.10624694824219, + "loss": 0.6922, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.006630897521972656, + "rewards/margins": 0.012310028076171875, + "rewards/rejected": -0.005679131019860506, + "step": 123 + }, + { + "epoch": 0.2, + "learning_rate": 1.1051693404634581e-07, + "logits/chosen": -1.1634020805358887, + "logits/rejected": -1.223006010055542, + "logps/chosen": -81.78547668457031, + "logps/rejected": -81.63375854492188, + "loss": 0.6909, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011829186230897903, + "rewards/margins": 0.02196311764419079, + "rewards/rejected": -0.03379230573773384, + "step": 124 + }, + { + "epoch": 0.2, + "learning_rate": 1.1140819964349376e-07, + "logits/chosen": -1.1678276062011719, + "logits/rejected": -1.1843390464782715, + "logps/chosen": -73.96417236328125, + "logps/rejected": -79.01814270019531, + "loss": 0.6849, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.007498741615563631, + "rewards/margins": 0.01339798141270876, + "rewards/rejected": -0.005899238400161266, + "step": 125 + }, + { + "epoch": 0.2, + "learning_rate": 1.1229946524064171e-07, + "logits/chosen": -1.1589395999908447, + "logits/rejected": -1.1751755475997925, + "logps/chosen": -86.82635498046875, + "logps/rejected": -95.4218521118164, + "loss": 0.6969, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.020419882610440254, + "rewards/margins": -0.01981792412698269, + "rewards/rejected": -0.0006019594147801399, + "step": 126 + }, + { + "epoch": 0.2, + "learning_rate": 1.1319073083778966e-07, + "logits/chosen": -1.6151647567749023, + "logits/rejected": -1.646388053894043, + "logps/chosen": -67.51033782958984, + "logps/rejected": -72.9522933959961, + "loss": 0.6859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022501755505800247, + "rewards/margins": 0.024559784680604935, + "rewards/rejected": -0.0020580291748046875, + "step": 127 + }, + { + "epoch": 0.21, + "learning_rate": 1.140819964349376e-07, + "logits/chosen": -1.1699700355529785, + "logits/rejected": -1.0814133882522583, + "logps/chosen": -91.83600616455078, + "logps/rejected": -86.32220458984375, + "loss": 0.6947, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0037170411087572575, + "rewards/margins": -0.0029117583762854338, + "rewards/rejected": 0.006628799252212048, + "step": 128 + }, + { + "epoch": 0.21, + "learning_rate": 1.1497326203208556e-07, + "logits/chosen": -1.2395000457763672, + "logits/rejected": -1.1810842752456665, + "logps/chosen": -80.84342956542969, + "logps/rejected": -110.77255249023438, + "loss": 0.6907, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.03232545778155327, + "rewards/margins": -0.0032841686625033617, + "rewards/rejected": 0.03560962527990341, + "step": 129 + }, + { + "epoch": 0.21, + "learning_rate": 1.158645276292335e-07, + "logits/chosen": -1.1515140533447266, + "logits/rejected": -1.248138189315796, + "logps/chosen": -76.70220947265625, + "logps/rejected": -62.8144416809082, + "loss": 0.6872, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.003283405676484108, + "rewards/margins": -0.0019655227661132812, + "rewards/rejected": -0.001317883376032114, + "step": 130 + }, + { + "epoch": 0.21, + "learning_rate": 1.1675579322638146e-07, + "logits/chosen": -1.4005471467971802, + "logits/rejected": -1.2750332355499268, + "logps/chosen": -89.06306457519531, + "logps/rejected": -93.75791931152344, + "loss": 0.6889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.015990447252988815, + "rewards/margins": 0.026848316192626953, + "rewards/rejected": -0.010857868939638138, + "step": 131 + }, + { + "epoch": 0.21, + "learning_rate": 1.176470588235294e-07, + "logits/chosen": -1.3139766454696655, + "logits/rejected": -1.2992300987243652, + "logps/chosen": -76.49496459960938, + "logps/rejected": -103.74681091308594, + "loss": 0.6981, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0029782294295728207, + "rewards/margins": -0.02903452143073082, + "rewards/rejected": 0.02605629153549671, + "step": 132 + }, + { + "epoch": 0.21, + "learning_rate": 1.1853832442067736e-07, + "logits/chosen": -1.0782837867736816, + "logits/rejected": -1.0972421169281006, + "logps/chosen": -102.93182373046875, + "logps/rejected": -106.13312530517578, + "loss": 0.6918, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.003751564072445035, + "rewards/margins": -0.013335609808564186, + "rewards/rejected": 0.017087174579501152, + "step": 133 + }, + { + "epoch": 0.22, + "learning_rate": 1.194295900178253e-07, + "logits/chosen": -1.4395631551742554, + "logits/rejected": -1.402905821800232, + "logps/chosen": -75.32859802246094, + "logps/rejected": -78.72920227050781, + "loss": 0.6913, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.001145745161920786, + "rewards/margins": 0.00497551029548049, + "rewards/rejected": -0.003829765133559704, + "step": 134 + }, + { + "epoch": 0.22, + "learning_rate": 1.2032085561497325e-07, + "logits/chosen": -1.0842761993408203, + "logits/rejected": -1.086230993270874, + "logps/chosen": -96.27215576171875, + "logps/rejected": -92.43170928955078, + "loss": 0.6918, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.004030800424516201, + "rewards/margins": -0.02459125779569149, + "rewards/rejected": 0.020560454577207565, + "step": 135 + }, + { + "epoch": 0.22, + "learning_rate": 1.2121212121212122e-07, + "logits/chosen": -1.1672528982162476, + "logits/rejected": -1.1845693588256836, + "logps/chosen": -85.19649505615234, + "logps/rejected": -89.12590026855469, + "loss": 0.6897, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.009124755859375, + "rewards/margins": 0.01982726901769638, + "rewards/rejected": -0.01070251502096653, + "step": 136 + }, + { + "epoch": 0.22, + "learning_rate": 1.2210338680926916e-07, + "logits/chosen": -1.1850414276123047, + "logits/rejected": -1.1530171632766724, + "logps/chosen": -98.41848754882812, + "logps/rejected": -68.85673522949219, + "loss": 0.6884, + "rewards/accuracies": 0.25, + "rewards/chosen": -6.999960169196129e-05, + "rewards/margins": 0.005049895960837603, + "rewards/rejected": -0.005119895562529564, + "step": 137 + }, + { + "epoch": 0.22, + "learning_rate": 1.229946524064171e-07, + "logits/chosen": -1.3228200674057007, + "logits/rejected": -1.2559106349945068, + "logps/chosen": -106.5719223022461, + "logps/rejected": -97.08259582519531, + "loss": 0.6932, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.003446007613092661, + "rewards/margins": 0.008224296383559704, + "rewards/rejected": -0.011670304462313652, + "step": 138 + }, + { + "epoch": 0.22, + "learning_rate": 1.2388591800356505e-07, + "logits/chosen": -1.1929320096969604, + "logits/rejected": -1.1778711080551147, + "logps/chosen": -112.883544921875, + "logps/rejected": -110.8670425415039, + "loss": 0.6946, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00234222412109375, + "rewards/margins": 0.002154732123017311, + "rewards/rejected": 0.0001874919980764389, + "step": 139 + }, + { + "epoch": 0.22, + "learning_rate": 1.24777183600713e-07, + "logits/chosen": -1.2298780679702759, + "logits/rejected": -1.208550214767456, + "logps/chosen": -85.53558349609375, + "logps/rejected": -96.42930603027344, + "loss": 0.696, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.014610577374696732, + "rewards/margins": -0.015222837217152119, + "rewards/rejected": 0.029833413660526276, + "step": 140 + }, + { + "epoch": 0.23, + "learning_rate": 1.2566844919786097e-07, + "logits/chosen": -1.1011197566986084, + "logits/rejected": -1.103609323501587, + "logps/chosen": -103.02680969238281, + "logps/rejected": -103.75904846191406, + "loss": 0.6966, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.012478637509047985, + "rewards/margins": 0.002953434595838189, + "rewards/rejected": -0.015432070940732956, + "step": 141 + }, + { + "epoch": 0.23, + "learning_rate": 1.265597147950089e-07, + "logits/chosen": -1.2788375616073608, + "logits/rejected": -1.321542739868164, + "logps/chosen": -78.17115020751953, + "logps/rejected": -81.06138610839844, + "loss": 0.6854, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0042650229297578335, + "rewards/margins": 0.014875983819365501, + "rewards/rejected": -0.019141007214784622, + "step": 142 + }, + { + "epoch": 0.23, + "learning_rate": 1.2745098039215685e-07, + "logits/chosen": -1.1079347133636475, + "logits/rejected": -1.0206882953643799, + "logps/chosen": -85.5785140991211, + "logps/rejected": -84.78573608398438, + "loss": 0.6971, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.04109363630414009, + "rewards/margins": -0.0006904592737555504, + "rewards/rejected": -0.040403176099061966, + "step": 143 + }, + { + "epoch": 0.23, + "learning_rate": 1.2834224598930482e-07, + "logits/chosen": -1.1014010906219482, + "logits/rejected": -1.1406495571136475, + "logps/chosen": -60.63249206542969, + "logps/rejected": -55.480812072753906, + "loss": 0.694, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00833749771118164, + "rewards/margins": -0.022405626252293587, + "rewards/rejected": 0.014068126678466797, + "step": 144 + }, + { + "epoch": 0.23, + "learning_rate": 1.2923351158645277e-07, + "logits/chosen": -1.2830541133880615, + "logits/rejected": -1.2623753547668457, + "logps/chosen": -80.148681640625, + "logps/rejected": -73.31366729736328, + "loss": 0.6918, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0032028192654252052, + "rewards/margins": 0.0021616918966174126, + "rewards/rejected": 0.0010411262046545744, + "step": 145 + }, + { + "epoch": 0.23, + "learning_rate": 1.301247771836007e-07, + "logits/chosen": -1.1449625492095947, + "logits/rejected": -1.1683745384216309, + "logps/chosen": -60.46405029296875, + "logps/rejected": -72.65692138671875, + "loss": 0.6816, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.03136143833398819, + "rewards/margins": 0.03643498569726944, + "rewards/rejected": -0.00507354736328125, + "step": 146 + }, + { + "epoch": 0.24, + "learning_rate": 1.3101604278074866e-07, + "logits/chosen": -1.2042714357376099, + "logits/rejected": -1.1847949028015137, + "logps/chosen": -74.31504821777344, + "logps/rejected": -75.55863952636719, + "loss": 0.6895, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02252321131527424, + "rewards/margins": -0.02265758439898491, + "rewards/rejected": 0.00013437285088002682, + "step": 147 + }, + { + "epoch": 0.24, + "learning_rate": 1.319073083778966e-07, + "logits/chosen": -1.0576329231262207, + "logits/rejected": -1.2008721828460693, + "logps/chosen": -65.27671813964844, + "logps/rejected": -92.62938690185547, + "loss": 0.6899, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021604442968964577, + "rewards/margins": -0.01308126375079155, + "rewards/rejected": -0.008523178286850452, + "step": 148 + }, + { + "epoch": 0.24, + "learning_rate": 1.3279857397504457e-07, + "logits/chosen": -1.107293725013733, + "logits/rejected": -1.2227246761322021, + "logps/chosen": -92.2590560913086, + "logps/rejected": -87.06993103027344, + "loss": 0.6874, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.006343650631606579, + "rewards/margins": -0.0033267969265580177, + "rewards/rejected": 0.009670448489487171, + "step": 149 + }, + { + "epoch": 0.24, + "learning_rate": 1.3368983957219251e-07, + "logits/chosen": -1.2669398784637451, + "logits/rejected": -1.3035881519317627, + "logps/chosen": -75.93667602539062, + "logps/rejected": -74.04835510253906, + "loss": 0.686, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.003344536293298006, + "rewards/margins": -0.011771298944950104, + "rewards/rejected": 0.008426761254668236, + "step": 150 + }, + { + "epoch": 0.24, + "learning_rate": 1.3458110516934046e-07, + "logits/chosen": -1.2319573163986206, + "logits/rejected": -1.248245358467102, + "logps/chosen": -81.9886474609375, + "logps/rejected": -84.21199035644531, + "loss": 0.6851, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.005156421568244696, + "rewards/margins": 0.008423137478530407, + "rewards/rejected": -0.013579560443758965, + "step": 151 + }, + { + "epoch": 0.24, + "learning_rate": 1.354723707664884e-07, + "logits/chosen": -1.2663254737854004, + "logits/rejected": -1.3274998664855957, + "logps/chosen": -53.74374008178711, + "logps/rejected": -69.90965270996094, + "loss": 0.6895, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.01172170601785183, + "rewards/margins": -0.01493387296795845, + "rewards/rejected": 0.02665557898581028, + "step": 152 + }, + { + "epoch": 0.25, + "learning_rate": 1.3636363636363635e-07, + "logits/chosen": -1.1918727159500122, + "logits/rejected": -1.2879536151885986, + "logps/chosen": -68.19717407226562, + "logps/rejected": -91.91706848144531, + "loss": 0.6936, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.011574840173125267, + "rewards/margins": -0.014360904693603516, + "rewards/rejected": 0.025935746729373932, + "step": 153 + }, + { + "epoch": 0.25, + "learning_rate": 1.3725490196078432e-07, + "logits/chosen": -1.110097050666809, + "logits/rejected": -1.2036888599395752, + "logps/chosen": -92.6609878540039, + "logps/rejected": -96.52682495117188, + "loss": 0.6883, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.005514146760106087, + "rewards/margins": -0.006318665109574795, + "rewards/rejected": 0.011832811869680882, + "step": 154 + }, + { + "epoch": 0.25, + "learning_rate": 1.3814616755793226e-07, + "logits/chosen": -1.285487413406372, + "logits/rejected": -1.2902686595916748, + "logps/chosen": -72.99339294433594, + "logps/rejected": -64.3037109375, + "loss": 0.6868, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04228544235229492, + "rewards/margins": 0.027436494827270508, + "rewards/rejected": 0.014848947525024414, + "step": 155 + }, + { + "epoch": 0.25, + "learning_rate": 1.390374331550802e-07, + "logits/chosen": -1.1897838115692139, + "logits/rejected": -1.25021493434906, + "logps/chosen": -77.29078674316406, + "logps/rejected": -78.77659606933594, + "loss": 0.6921, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.005116081330925226, + "rewards/margins": -0.005609511863440275, + "rewards/rejected": 0.010725593194365501, + "step": 156 + }, + { + "epoch": 0.25, + "learning_rate": 1.3992869875222815e-07, + "logits/chosen": -1.2299305200576782, + "logits/rejected": -1.2583080530166626, + "logps/chosen": -74.7567138671875, + "logps/rejected": -77.03506469726562, + "loss": 0.6913, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0184447281062603, + "rewards/margins": 0.006376456934958696, + "rewards/rejected": 0.012068272568285465, + "step": 157 + }, + { + "epoch": 0.25, + "learning_rate": 1.408199643493761e-07, + "logits/chosen": -1.4284183979034424, + "logits/rejected": -1.4431934356689453, + "logps/chosen": -73.93280029296875, + "logps/rejected": -68.55723571777344, + "loss": 0.6844, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0240675937384367, + "rewards/margins": 0.00622797105461359, + "rewards/rejected": 0.017839621752500534, + "step": 158 + }, + { + "epoch": 0.26, + "learning_rate": 1.4171122994652406e-07, + "logits/chosen": -0.9991000890731812, + "logits/rejected": -1.0306744575500488, + "logps/chosen": -80.68217468261719, + "logps/rejected": -76.83024597167969, + "loss": 0.6871, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0027531625237315893, + "rewards/margins": 0.04543151706457138, + "rewards/rejected": -0.04818468168377876, + "step": 159 + }, + { + "epoch": 0.26, + "learning_rate": 1.42602495543672e-07, + "logits/chosen": -1.0936920642852783, + "logits/rejected": -1.1092602014541626, + "logps/chosen": -82.60357666015625, + "logps/rejected": -75.55970001220703, + "loss": 0.6886, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.010325146839022636, + "rewards/margins": 0.004160404670983553, + "rewards/rejected": 0.006164741702377796, + "step": 160 + }, + { + "epoch": 0.26, + "learning_rate": 1.4349376114081995e-07, + "logits/chosen": -1.2158175706863403, + "logits/rejected": -1.2169979810714722, + "logps/chosen": -128.269287109375, + "logps/rejected": -90.16124725341797, + "loss": 0.6914, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0017871854361146688, + "rewards/margins": -0.004718400072306395, + "rewards/rejected": 0.00650558527559042, + "step": 161 + }, + { + "epoch": 0.26, + "learning_rate": 1.443850267379679e-07, + "logits/chosen": -1.1846238374710083, + "logits/rejected": -1.2020435333251953, + "logps/chosen": -80.48837280273438, + "logps/rejected": -78.94100952148438, + "loss": 0.6914, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.009000301361083984, + "rewards/margins": -0.024142839014530182, + "rewards/rejected": 0.015142536722123623, + "step": 162 + }, + { + "epoch": 0.26, + "learning_rate": 1.4527629233511584e-07, + "logits/chosen": -1.2966161966323853, + "logits/rejected": -1.3049771785736084, + "logps/chosen": -104.6605224609375, + "logps/rejected": -100.4988784790039, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035912513732910156, + "rewards/margins": 0.05647735670208931, + "rewards/rejected": -0.020564841106534004, + "step": 163 + }, + { + "epoch": 0.26, + "learning_rate": 1.461675579322638e-07, + "logits/chosen": -1.2582217454910278, + "logits/rejected": -1.2453134059906006, + "logps/chosen": -78.0390396118164, + "logps/rejected": -84.03144836425781, + "loss": 0.6912, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.010477638803422451, + "rewards/margins": -0.0006960853934288025, + "rewards/rejected": 0.011173725128173828, + "step": 164 + }, + { + "epoch": 0.26, + "learning_rate": 1.4705882352941175e-07, + "logits/chosen": -1.3116538524627686, + "logits/rejected": -1.368645191192627, + "logps/chosen": -90.49311828613281, + "logps/rejected": -96.47917938232422, + "loss": 0.6953, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.014243889600038528, + "rewards/margins": -0.001439286395907402, + "rewards/rejected": 0.01568317413330078, + "step": 165 + }, + { + "epoch": 0.27, + "learning_rate": 1.479500891265597e-07, + "logits/chosen": -1.1102708578109741, + "logits/rejected": -1.0590384006500244, + "logps/chosen": -95.77940368652344, + "logps/rejected": -96.39000701904297, + "loss": 0.6821, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.02114238776266575, + "rewards/margins": 0.014655305072665215, + "rewards/rejected": 0.006487083155661821, + "step": 166 + }, + { + "epoch": 0.27, + "learning_rate": 1.4884135472370764e-07, + "logits/chosen": -1.2088526487350464, + "logits/rejected": -1.257607340812683, + "logps/chosen": -129.15087890625, + "logps/rejected": -122.53927612304688, + "loss": 0.6898, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.029584500938653946, + "rewards/margins": 0.0133909210562706, + "rewards/rejected": 0.016193581745028496, + "step": 167 + }, + { + "epoch": 0.27, + "learning_rate": 1.4973262032085558e-07, + "logits/chosen": -1.1217880249023438, + "logits/rejected": -1.1830638647079468, + "logps/chosen": -82.94430541992188, + "logps/rejected": -77.02546691894531, + "loss": 0.6972, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.018335916101932526, + "rewards/margins": 0.024320602416992188, + "rewards/rejected": -0.005984686780720949, + "step": 168 + }, + { + "epoch": 0.27, + "learning_rate": 1.5062388591800358e-07, + "logits/chosen": -1.0542296171188354, + "logits/rejected": -1.0406967401504517, + "logps/chosen": -96.65447998046875, + "logps/rejected": -117.56917572021484, + "loss": 0.6973, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.012562274001538754, + "rewards/margins": 0.0050829872488975525, + "rewards/rejected": 0.007479285821318626, + "step": 169 + }, + { + "epoch": 0.27, + "learning_rate": 1.5151515151515152e-07, + "logits/chosen": -1.0995701551437378, + "logits/rejected": -1.186861276626587, + "logps/chosen": -92.70673370361328, + "logps/rejected": -95.64176940917969, + "loss": 0.6934, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.013466835021972656, + "rewards/margins": -0.0192292220890522, + "rewards/rejected": 0.0057623861357569695, + "step": 170 + }, + { + "epoch": 0.27, + "learning_rate": 1.5240641711229947e-07, + "logits/chosen": -1.1159003973007202, + "logits/rejected": -1.2103300094604492, + "logps/chosen": -81.66310119628906, + "logps/rejected": -78.21598815917969, + "loss": 0.7001, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.022819900885224342, + "rewards/margins": 0.0027945516631007195, + "rewards/rejected": 0.020025350153446198, + "step": 171 + }, + { + "epoch": 0.28, + "learning_rate": 1.532976827094474e-07, + "logits/chosen": -1.056949496269226, + "logits/rejected": -1.0529078245162964, + "logps/chosen": -90.16043853759766, + "logps/rejected": -96.05148315429688, + "loss": 0.6974, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.015632057562470436, + "rewards/margins": -0.0013263700529932976, + "rewards/rejected": 0.01695842668414116, + "step": 172 + }, + { + "epoch": 0.28, + "learning_rate": 1.5418894830659536e-07, + "logits/chosen": -1.3446035385131836, + "logits/rejected": -1.3823251724243164, + "logps/chosen": -87.43083190917969, + "logps/rejected": -95.5851821899414, + "loss": 0.6845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04805288463830948, + "rewards/margins": 0.046666622161865234, + "rewards/rejected": 0.0013862615451216698, + "step": 173 + }, + { + "epoch": 0.28, + "learning_rate": 1.5508021390374333e-07, + "logits/chosen": -1.210251808166504, + "logits/rejected": -1.1726438999176025, + "logps/chosen": -55.933528900146484, + "logps/rejected": -68.61833190917969, + "loss": 0.7022, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0025814054533839226, + "rewards/margins": -0.005895042791962624, + "rewards/rejected": 0.0033136368729174137, + "step": 174 + }, + { + "epoch": 0.28, + "learning_rate": 1.5597147950089127e-07, + "logits/chosen": -1.4685124158859253, + "logits/rejected": -1.3827660083770752, + "logps/chosen": -85.58157348632812, + "logps/rejected": -69.87470245361328, + "loss": 0.6856, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.009827231988310814, + "rewards/margins": 0.012124920263886452, + "rewards/rejected": -0.002297687344253063, + "step": 175 + }, + { + "epoch": 0.28, + "learning_rate": 1.5686274509803921e-07, + "logits/chosen": -1.3376049995422363, + "logits/rejected": -1.3044078350067139, + "logps/chosen": -86.311767578125, + "logps/rejected": -75.01780700683594, + "loss": 0.6901, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.02645111083984375, + "rewards/margins": 0.022428512573242188, + "rewards/rejected": 0.00402259873226285, + "step": 176 + }, + { + "epoch": 0.28, + "learning_rate": 1.5775401069518716e-07, + "logits/chosen": -1.31523859500885, + "logits/rejected": -1.2526075839996338, + "logps/chosen": -91.53955078125, + "logps/rejected": -114.09359741210938, + "loss": 0.6783, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.034424781799316406, + "rewards/margins": 0.0620538704097271, + "rewards/rejected": -0.02762908861041069, + "step": 177 + }, + { + "epoch": 0.29, + "learning_rate": 1.586452762923351e-07, + "logits/chosen": -1.0876498222351074, + "logits/rejected": -1.1826117038726807, + "logps/chosen": -82.99105072021484, + "logps/rejected": -74.619873046875, + "loss": 0.6859, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.004779815673828125, + "rewards/margins": -0.030318640172481537, + "rewards/rejected": 0.02553882636129856, + "step": 178 + }, + { + "epoch": 0.29, + "learning_rate": 1.5953654188948307e-07, + "logits/chosen": -1.4762051105499268, + "logits/rejected": -1.4248936176300049, + "logps/chosen": -110.35680389404297, + "logps/rejected": -94.32610321044922, + "loss": 0.6858, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02806110493838787, + "rewards/margins": -0.005148697644472122, + "rewards/rejected": -0.0229124054312706, + "step": 179 + }, + { + "epoch": 0.29, + "learning_rate": 1.6042780748663102e-07, + "logits/chosen": -1.441422462463379, + "logits/rejected": -1.3664965629577637, + "logps/chosen": -94.42696380615234, + "logps/rejected": -73.93683624267578, + "loss": 0.6817, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.022891998291015625, + "rewards/margins": 0.012346649542450905, + "rewards/rejected": 0.01054534874856472, + "step": 180 + }, + { + "epoch": 0.29, + "learning_rate": 1.6131907308377896e-07, + "logits/chosen": -0.9797192215919495, + "logits/rejected": -1.0084091424942017, + "logps/chosen": -76.98902893066406, + "logps/rejected": -98.45145416259766, + "loss": 0.688, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.01589355431497097, + "rewards/margins": 0.02487964741885662, + "rewards/rejected": -0.008986091241240501, + "step": 181 + }, + { + "epoch": 0.29, + "learning_rate": 1.622103386809269e-07, + "logits/chosen": -1.1448172330856323, + "logits/rejected": -1.2438055276870728, + "logps/chosen": -64.28092956542969, + "logps/rejected": -88.49763488769531, + "loss": 0.6867, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.002738380804657936, + "rewards/margins": 0.038314249366521835, + "rewards/rejected": -0.03557587042450905, + "step": 182 + }, + { + "epoch": 0.29, + "learning_rate": 1.6310160427807487e-07, + "logits/chosen": -1.2652349472045898, + "logits/rejected": -1.2993640899658203, + "logps/chosen": -72.77031707763672, + "logps/rejected": -72.89250183105469, + "loss": 0.6984, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.023846054449677467, + "rewards/margins": -0.019249439239501953, + "rewards/rejected": 0.04309549555182457, + "step": 183 + }, + { + "epoch": 0.3, + "learning_rate": 1.6399286987522282e-07, + "logits/chosen": -1.1174217462539673, + "logits/rejected": -1.125874400138855, + "logps/chosen": -73.41419982910156, + "logps/rejected": -87.02188110351562, + "loss": 0.703, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.002849388401955366, + "rewards/margins": -0.02789783477783203, + "rewards/rejected": 0.025048445910215378, + "step": 184 + }, + { + "epoch": 0.3, + "learning_rate": 1.6488413547237076e-07, + "logits/chosen": -1.5134884119033813, + "logits/rejected": -1.450934648513794, + "logps/chosen": -79.44047546386719, + "logps/rejected": -65.8511962890625, + "loss": 0.6909, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013629722408950329, + "rewards/margins": 0.005207587033510208, + "rewards/rejected": 0.008422136306762695, + "step": 185 + }, + { + "epoch": 0.3, + "learning_rate": 1.657754010695187e-07, + "logits/chosen": -1.0555158853530884, + "logits/rejected": -1.0353024005889893, + "logps/chosen": -94.71896362304688, + "logps/rejected": -86.52894592285156, + "loss": 0.6861, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.019899560138583183, + "rewards/margins": -0.010300257243216038, + "rewards/rejected": -0.009599304758012295, + "step": 186 + }, + { + "epoch": 0.3, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -1.2581530809402466, + "logits/rejected": -1.3141635656356812, + "logps/chosen": -112.1159439086914, + "logps/rejected": -97.25033569335938, + "loss": 0.6977, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.034465789794921875, + "rewards/margins": 0.04063625633716583, + "rewards/rejected": -0.006170463282614946, + "step": 187 + }, + { + "epoch": 0.3, + "learning_rate": 1.6755793226381462e-07, + "logits/chosen": -1.2355010509490967, + "logits/rejected": -1.249297857284546, + "logps/chosen": -91.64498901367188, + "logps/rejected": -89.41168212890625, + "loss": 0.6849, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0010955813340842724, + "rewards/margins": 0.0016227737069129944, + "rewards/rejected": -0.0005271914415061474, + "step": 188 + }, + { + "epoch": 0.3, + "learning_rate": 1.6844919786096256e-07, + "logits/chosen": -1.3012874126434326, + "logits/rejected": -1.3630125522613525, + "logps/chosen": -68.51990509033203, + "logps/rejected": -76.87109375, + "loss": 0.6877, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01358041726052761, + "rewards/margins": 0.016314316540956497, + "rewards/rejected": -0.002733898116275668, + "step": 189 + }, + { + "epoch": 0.3, + "learning_rate": 1.693404634581105e-07, + "logits/chosen": -1.170872449874878, + "logits/rejected": -1.1361101865768433, + "logps/chosen": -70.6655044555664, + "logps/rejected": -56.43096160888672, + "loss": 0.6799, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.012186527252197266, + "rewards/margins": 0.007533167488873005, + "rewards/rejected": 0.004653359763324261, + "step": 190 + }, + { + "epoch": 0.31, + "learning_rate": 1.7023172905525845e-07, + "logits/chosen": -1.0795990228652954, + "logits/rejected": -1.078825831413269, + "logps/chosen": -101.79287719726562, + "logps/rejected": -88.37467956542969, + "loss": 0.6974, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.008512306958436966, + "rewards/margins": -0.01216125674545765, + "rewards/rejected": 0.0036489497870206833, + "step": 191 + }, + { + "epoch": 0.31, + "learning_rate": 1.711229946524064e-07, + "logits/chosen": -1.2868682146072388, + "logits/rejected": -1.2432432174682617, + "logps/chosen": -91.89615631103516, + "logps/rejected": -83.5869369506836, + "loss": 0.6833, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.007503128610551357, + "rewards/margins": 0.020963573828339577, + "rewards/rejected": -0.013460446149110794, + "step": 192 + }, + { + "epoch": 0.31, + "learning_rate": 1.7201426024955437e-07, + "logits/chosen": -1.264981985092163, + "logits/rejected": -1.2010303735733032, + "logps/chosen": -95.26475524902344, + "logps/rejected": -82.73414611816406, + "loss": 0.6908, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0174986831843853, + "rewards/margins": -0.0011507999151945114, + "rewards/rejected": 0.01864948309957981, + "step": 193 + }, + { + "epoch": 0.31, + "learning_rate": 1.729055258467023e-07, + "logits/chosen": -1.2113144397735596, + "logits/rejected": -0.978897213935852, + "logps/chosen": -68.5088119506836, + "logps/rejected": -91.72132873535156, + "loss": 0.6766, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.013660717755556107, + "rewards/margins": 0.027810953557491302, + "rewards/rejected": -0.014150239527225494, + "step": 194 + }, + { + "epoch": 0.31, + "learning_rate": 1.7379679144385025e-07, + "logits/chosen": -1.3174318075180054, + "logits/rejected": -1.4233349561691284, + "logps/chosen": -68.59219360351562, + "logps/rejected": -74.4993667602539, + "loss": 0.6817, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01985139772295952, + "rewards/margins": -0.0039217001758515835, + "rewards/rejected": 0.02377310022711754, + "step": 195 + }, + { + "epoch": 0.31, + "learning_rate": 1.746880570409982e-07, + "logits/chosen": -1.145938754081726, + "logits/rejected": -1.253713607788086, + "logps/chosen": -77.18778228759766, + "logps/rejected": -89.86077117919922, + "loss": 0.6892, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.007975578308105469, + "rewards/margins": -0.0084991455078125, + "rewards/rejected": 0.0005235671997070312, + "step": 196 + }, + { + "epoch": 0.32, + "learning_rate": 1.7557932263814614e-07, + "logits/chosen": -1.2652584314346313, + "logits/rejected": -1.2274739742279053, + "logps/chosen": -99.92034912109375, + "logps/rejected": -104.35964965820312, + "loss": 0.6809, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.03284492343664169, + "rewards/margins": 0.017350386828184128, + "rewards/rejected": 0.015494538471102715, + "step": 197 + }, + { + "epoch": 0.32, + "learning_rate": 1.764705882352941e-07, + "logits/chosen": -1.2105274200439453, + "logits/rejected": -1.117488980293274, + "logps/chosen": -85.86508178710938, + "logps/rejected": -71.0420150756836, + "loss": 0.688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05680304020643234, + "rewards/margins": 0.10031509399414062, + "rewards/rejected": -0.04351206123828888, + "step": 198 + }, + { + "epoch": 0.32, + "learning_rate": 1.7736185383244206e-07, + "logits/chosen": -1.2283986806869507, + "logits/rejected": -1.256447672843933, + "logps/chosen": -71.98006439208984, + "logps/rejected": -80.81570434570312, + "loss": 0.6966, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.008759688585996628, + "rewards/margins": -0.031231118366122246, + "rewards/rejected": 0.02247142791748047, + "step": 199 + }, + { + "epoch": 0.32, + "learning_rate": 1.7825311942959e-07, + "logits/chosen": -1.295608401298523, + "logits/rejected": -1.3207356929779053, + "logps/chosen": -81.70811462402344, + "logps/rejected": -84.58729553222656, + "loss": 0.699, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0452427864074707, + "rewards/margins": 0.024469852447509766, + "rewards/rejected": 0.020772933959960938, + "step": 200 + }, + { + "epoch": 0.32, + "learning_rate": 1.7914438502673794e-07, + "logits/chosen": -1.2896004915237427, + "logits/rejected": -1.2928847074508667, + "logps/chosen": -67.5517578125, + "logps/rejected": -78.02079010009766, + "loss": 0.689, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.048406124114990234, + "rewards/margins": 0.07088927924633026, + "rewards/rejected": -0.022483156993985176, + "step": 201 + }, + { + "epoch": 0.32, + "learning_rate": 1.800356506238859e-07, + "logits/chosen": -1.0671335458755493, + "logits/rejected": -1.0690761804580688, + "logps/chosen": -97.25962829589844, + "logps/rejected": -94.14231872558594, + "loss": 0.6864, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0874597579240799, + "rewards/margins": 0.06820917129516602, + "rewards/rejected": 0.01925058290362358, + "step": 202 + }, + { + "epoch": 0.33, + "learning_rate": 1.8092691622103386e-07, + "logits/chosen": -1.3357504606246948, + "logits/rejected": -1.4083187580108643, + "logps/chosen": -69.22675323486328, + "logps/rejected": -78.70281219482422, + "loss": 0.6955, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.019612502306699753, + "rewards/margins": -0.02757863886654377, + "rewards/rejected": 0.007966136559844017, + "step": 203 + }, + { + "epoch": 0.33, + "learning_rate": 1.818181818181818e-07, + "logits/chosen": -1.154530644416809, + "logits/rejected": -1.1778064966201782, + "logps/chosen": -87.59091186523438, + "logps/rejected": -74.59123992919922, + "loss": 0.6681, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.014525031670928001, + "rewards/margins": -0.011475468054413795, + "rewards/rejected": -0.003049564315006137, + "step": 204 + }, + { + "epoch": 0.33, + "learning_rate": 1.8270944741532975e-07, + "logits/chosen": -1.2200852632522583, + "logits/rejected": -1.2569987773895264, + "logps/chosen": -96.88046264648438, + "logps/rejected": -106.29316711425781, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07605037838220596, + "rewards/margins": 0.0842885971069336, + "rewards/rejected": -0.00823822058737278, + "step": 205 + }, + { + "epoch": 0.33, + "learning_rate": 1.836007130124777e-07, + "logits/chosen": -1.1513392925262451, + "logits/rejected": -1.0660691261291504, + "logps/chosen": -80.135009765625, + "logps/rejected": -76.30355072021484, + "loss": 0.686, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.055201053619384766, + "rewards/margins": 0.07844934612512589, + "rewards/rejected": -0.02324829250574112, + "step": 206 + }, + { + "epoch": 0.33, + "learning_rate": 1.8449197860962566e-07, + "logits/chosen": -1.354569435119629, + "logits/rejected": -1.2773513793945312, + "logps/chosen": -95.84893798828125, + "logps/rejected": -72.3054428100586, + "loss": 0.6895, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.03253040462732315, + "rewards/margins": 0.03406982496380806, + "rewards/rejected": -0.0015394208021461964, + "step": 207 + }, + { + "epoch": 0.33, + "learning_rate": 1.8538324420677363e-07, + "logits/chosen": -1.1984279155731201, + "logits/rejected": -1.2835437059402466, + "logps/chosen": -111.05964660644531, + "logps/rejected": -89.39425659179688, + "loss": 0.6852, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.008302688598632812, + "rewards/margins": -0.014565277844667435, + "rewards/rejected": 0.022867966443300247, + "step": 208 + }, + { + "epoch": 0.34, + "learning_rate": 1.8627450980392158e-07, + "logits/chosen": -1.1748929023742676, + "logits/rejected": -1.1685056686401367, + "logps/chosen": -72.09039306640625, + "logps/rejected": -86.42749786376953, + "loss": 0.6857, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.03194408491253853, + "rewards/margins": -0.02014312893152237, + "rewards/rejected": 0.0520872101187706, + "step": 209 + }, + { + "epoch": 0.34, + "learning_rate": 1.8716577540106952e-07, + "logits/chosen": -1.343977689743042, + "logits/rejected": -1.3213236331939697, + "logps/chosen": -88.28121948242188, + "logps/rejected": -115.24087524414062, + "loss": 0.7002, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.05695114657282829, + "rewards/margins": 0.029232025146484375, + "rewards/rejected": 0.02771911583840847, + "step": 210 + }, + { + "epoch": 0.34, + "learning_rate": 1.8805704099821746e-07, + "logits/chosen": -1.2223469018936157, + "logits/rejected": -1.1940040588378906, + "logps/chosen": -83.94541931152344, + "logps/rejected": -78.602783203125, + "loss": 0.6647, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09268398582935333, + "rewards/margins": 0.11456432938575745, + "rewards/rejected": -0.021880339831113815, + "step": 211 + }, + { + "epoch": 0.34, + "learning_rate": 1.889483065953654e-07, + "logits/chosen": -1.275195837020874, + "logits/rejected": -1.2824195623397827, + "logps/chosen": -84.36309814453125, + "logps/rejected": -61.45875930786133, + "loss": 0.6943, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0012431144714355469, + "rewards/margins": -0.0024353028275072575, + "rewards/rejected": 0.0011921883560717106, + "step": 212 + }, + { + "epoch": 0.34, + "learning_rate": 1.8983957219251338e-07, + "logits/chosen": -1.1856679916381836, + "logits/rejected": -1.1402510404586792, + "logps/chosen": -95.24775695800781, + "logps/rejected": -124.43846130371094, + "loss": 0.6958, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04189911112189293, + "rewards/margins": 0.006594468839466572, + "rewards/rejected": 0.03530464321374893, + "step": 213 + }, + { + "epoch": 0.34, + "learning_rate": 1.9073083778966132e-07, + "logits/chosen": -1.3067560195922852, + "logits/rejected": -1.2862671613693237, + "logps/chosen": -66.29334259033203, + "logps/rejected": -68.76512145996094, + "loss": 0.6792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03160810470581055, + "rewards/margins": 0.04609499126672745, + "rewards/rejected": -0.014486884698271751, + "step": 214 + }, + { + "epoch": 0.35, + "learning_rate": 1.9162210338680927e-07, + "logits/chosen": -1.1827154159545898, + "logits/rejected": -1.270613431930542, + "logps/chosen": -84.06794738769531, + "logps/rejected": -67.85546875, + "loss": 0.685, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03467101976275444, + "rewards/margins": 0.02241525799036026, + "rewards/rejected": 0.01225576363503933, + "step": 215 + }, + { + "epoch": 0.35, + "learning_rate": 1.925133689839572e-07, + "logits/chosen": -1.2252124547958374, + "logits/rejected": -1.2743034362792969, + "logps/chosen": -62.56760025024414, + "logps/rejected": -104.43107604980469, + "loss": 0.6895, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.021578025072813034, + "rewards/margins": 0.042513083666563034, + "rewards/rejected": -0.02093506045639515, + "step": 216 + }, + { + "epoch": 0.35, + "learning_rate": 1.9340463458110515e-07, + "logits/chosen": -1.067476511001587, + "logits/rejected": -1.0687108039855957, + "logps/chosen": -76.008544921875, + "logps/rejected": -88.04805755615234, + "loss": 0.6923, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.002370451809838414, + "rewards/margins": -0.018574140965938568, + "rewards/rejected": 0.016203688457608223, + "step": 217 + }, + { + "epoch": 0.35, + "learning_rate": 1.9429590017825312e-07, + "logits/chosen": -1.2216835021972656, + "logits/rejected": -1.1859307289123535, + "logps/chosen": -89.28216552734375, + "logps/rejected": -87.74323272705078, + "loss": 0.6903, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.027384568005800247, + "rewards/margins": 0.018771935254335403, + "rewards/rejected": 0.008612633682787418, + "step": 218 + }, + { + "epoch": 0.35, + "learning_rate": 1.9518716577540107e-07, + "logits/chosen": -1.101264476776123, + "logits/rejected": -1.1525955200195312, + "logps/chosen": -82.58341979980469, + "logps/rejected": -48.99835968017578, + "loss": 0.6804, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.015311813913285732, + "rewards/margins": -0.005624962039291859, + "rewards/rejected": 0.02093677595257759, + "step": 219 + }, + { + "epoch": 0.35, + "learning_rate": 1.96078431372549e-07, + "logits/chosen": -1.2214932441711426, + "logits/rejected": -1.1190643310546875, + "logps/chosen": -94.95525360107422, + "logps/rejected": -108.86164855957031, + "loss": 0.6786, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.09720917046070099, + "rewards/margins": 0.09963169693946838, + "rewards/rejected": -0.0024225234519690275, + "step": 220 + }, + { + "epoch": 0.35, + "learning_rate": 1.9696969696969696e-07, + "logits/chosen": -1.1900205612182617, + "logits/rejected": -1.2499765157699585, + "logps/chosen": -76.85565185546875, + "logps/rejected": -83.38502502441406, + "loss": 0.6805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06594543159008026, + "rewards/margins": 0.13332080841064453, + "rewards/rejected": -0.06737537682056427, + "step": 221 + }, + { + "epoch": 0.36, + "learning_rate": 1.9786096256684493e-07, + "logits/chosen": -1.19120192527771, + "logits/rejected": -1.2155377864837646, + "logps/chosen": -102.14087677001953, + "logps/rejected": -80.68405151367188, + "loss": 0.6878, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.02893543429672718, + "rewards/margins": 0.052164457738399506, + "rewards/rejected": -0.023229029029607773, + "step": 222 + }, + { + "epoch": 0.36, + "learning_rate": 1.9875222816399287e-07, + "logits/chosen": -1.2805598974227905, + "logits/rejected": -1.2869868278503418, + "logps/chosen": -75.24971771240234, + "logps/rejected": -82.32807922363281, + "loss": 0.6993, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06184406578540802, + "rewards/margins": 0.03207845985889435, + "rewards/rejected": 0.029765605926513672, + "step": 223 + }, + { + "epoch": 0.36, + "learning_rate": 1.9964349376114081e-07, + "logits/chosen": -1.3543906211853027, + "logits/rejected": -1.321485996246338, + "logps/chosen": -89.03424072265625, + "logps/rejected": -89.04637908935547, + "loss": 0.6926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10586948692798615, + "rewards/margins": 0.07139110565185547, + "rewards/rejected": 0.03447837755084038, + "step": 224 + }, + { + "epoch": 0.36, + "learning_rate": 2.0053475935828876e-07, + "logits/chosen": -1.291060209274292, + "logits/rejected": -1.2525196075439453, + "logps/chosen": -109.12630462646484, + "logps/rejected": -113.05500030517578, + "loss": 0.6905, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.03588543087244034, + "rewards/margins": -0.028558731079101562, + "rewards/rejected": -0.007326698396354914, + "step": 225 + }, + { + "epoch": 0.36, + "learning_rate": 2.014260249554367e-07, + "logits/chosen": -1.2375459671020508, + "logits/rejected": -1.1319429874420166, + "logps/chosen": -87.06940460205078, + "logps/rejected": -81.87649536132812, + "loss": 0.6879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06458874046802521, + "rewards/margins": 0.07568836212158203, + "rewards/rejected": -0.011099625378847122, + "step": 226 + }, + { + "epoch": 0.36, + "learning_rate": 2.0231729055258467e-07, + "logits/chosen": -1.2574338912963867, + "logits/rejected": -1.3508352041244507, + "logps/chosen": -51.461002349853516, + "logps/rejected": -52.147308349609375, + "loss": 0.689, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.017365265637636185, + "rewards/margins": 0.016486644744873047, + "rewards/rejected": 0.0008786208927631378, + "step": 227 + }, + { + "epoch": 0.37, + "learning_rate": 2.0320855614973262e-07, + "logits/chosen": -1.225588321685791, + "logits/rejected": -1.278594732284546, + "logps/chosen": -76.51856994628906, + "logps/rejected": -84.6484375, + "loss": 0.6834, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011736584827303886, + "rewards/margins": -0.013005736283957958, + "rewards/rejected": 0.001269150641746819, + "step": 228 + }, + { + "epoch": 0.37, + "learning_rate": 2.0409982174688056e-07, + "logits/chosen": -1.2359657287597656, + "logits/rejected": -1.1987488269805908, + "logps/chosen": -97.26112365722656, + "logps/rejected": -100.06990051269531, + "loss": 0.6755, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0042654043063521385, + "rewards/margins": 0.015516474843025208, + "rewards/rejected": -0.011251069605350494, + "step": 229 + }, + { + "epoch": 0.37, + "learning_rate": 2.049910873440285e-07, + "logits/chosen": -1.1559699773788452, + "logits/rejected": -1.2827870845794678, + "logps/chosen": -89.52205657958984, + "logps/rejected": -93.19667053222656, + "loss": 0.6874, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04632082208991051, + "rewards/margins": 0.03583383560180664, + "rewards/rejected": 0.010486985556781292, + "step": 230 + }, + { + "epoch": 0.37, + "learning_rate": 2.0588235294117645e-07, + "logits/chosen": -1.2297463417053223, + "logits/rejected": -1.3464181423187256, + "logps/chosen": -92.26803588867188, + "logps/rejected": -95.75495147705078, + "loss": 0.6882, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03621730953454971, + "rewards/margins": 0.10506764054298401, + "rewards/rejected": -0.0688503310084343, + "step": 231 + }, + { + "epoch": 0.37, + "learning_rate": 2.0677361853832442e-07, + "logits/chosen": -1.3085564374923706, + "logits/rejected": -1.1796925067901611, + "logps/chosen": -80.08882904052734, + "logps/rejected": -90.08299255371094, + "loss": 0.6822, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.054616548120975494, + "rewards/margins": 0.015880106016993523, + "rewards/rejected": 0.03873644024133682, + "step": 232 + }, + { + "epoch": 0.37, + "learning_rate": 2.0766488413547236e-07, + "logits/chosen": -1.3770502805709839, + "logits/rejected": -1.294053077697754, + "logps/chosen": -79.77741241455078, + "logps/rejected": -96.57077026367188, + "loss": 0.6851, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0904666930437088, + "rewards/margins": 0.07377853244543076, + "rewards/rejected": 0.016688156872987747, + "step": 233 + }, + { + "epoch": 0.38, + "learning_rate": 2.085561497326203e-07, + "logits/chosen": -1.0940014123916626, + "logits/rejected": -1.0471937656402588, + "logps/chosen": -79.23580169677734, + "logps/rejected": -65.29253387451172, + "loss": 0.6696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0256697665899992, + "rewards/margins": 0.04503040388226509, + "rewards/rejected": -0.01936063915491104, + "step": 234 + }, + { + "epoch": 0.38, + "learning_rate": 2.0944741532976825e-07, + "logits/chosen": -1.4373263120651245, + "logits/rejected": -1.3277997970581055, + "logps/chosen": -86.504638671875, + "logps/rejected": -94.64060974121094, + "loss": 0.6859, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04969749599695206, + "rewards/margins": 0.05911875143647194, + "rewards/rejected": -0.009421253576874733, + "step": 235 + }, + { + "epoch": 0.38, + "learning_rate": 2.103386809269162e-07, + "logits/chosen": -1.0195074081420898, + "logits/rejected": -1.0604435205459595, + "logps/chosen": -73.08753967285156, + "logps/rejected": -78.03620147705078, + "loss": 0.6818, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.02312946319580078, + "rewards/margins": -0.03335971757769585, + "rewards/rejected": 0.01023025531321764, + "step": 236 + }, + { + "epoch": 0.38, + "learning_rate": 2.1122994652406416e-07, + "logits/chosen": -1.3641736507415771, + "logits/rejected": -1.3585805892944336, + "logps/chosen": -80.21371459960938, + "logps/rejected": -113.59886169433594, + "loss": 0.6639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.043053723871707916, + "rewards/margins": 0.07565870881080627, + "rewards/rejected": -0.03260498121380806, + "step": 237 + }, + { + "epoch": 0.38, + "learning_rate": 2.121212121212121e-07, + "logits/chosen": -1.2279131412506104, + "logits/rejected": -1.2762916088104248, + "logps/chosen": -65.65019226074219, + "logps/rejected": -71.62332153320312, + "loss": 0.6835, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.01965961419045925, + "rewards/margins": 0.02842998504638672, + "rewards/rejected": -0.008770370855927467, + "step": 238 + }, + { + "epoch": 0.38, + "learning_rate": 2.1301247771836005e-07, + "logits/chosen": -1.0042330026626587, + "logits/rejected": -1.0530705451965332, + "logps/chosen": -80.02039337158203, + "logps/rejected": -73.7850341796875, + "loss": 0.6769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06413745880126953, + "rewards/margins": 0.05847463756799698, + "rewards/rejected": 0.005662822630256414, + "step": 239 + }, + { + "epoch": 0.39, + "learning_rate": 2.13903743315508e-07, + "logits/chosen": -1.196266770362854, + "logits/rejected": -1.23978590965271, + "logps/chosen": -86.27023315429688, + "logps/rejected": -88.421875, + "loss": 0.6932, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0023431777954101562, + "rewards/margins": 0.03253936767578125, + "rewards/rejected": -0.034882545471191406, + "step": 240 + }, + { + "epoch": 0.39, + "learning_rate": 2.1479500891265594e-07, + "logits/chosen": -1.13138747215271, + "logits/rejected": -1.0820896625518799, + "logps/chosen": -89.10614013671875, + "logps/rejected": -95.83501434326172, + "loss": 0.6848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07065601646900177, + "rewards/margins": 0.05399303883314133, + "rewards/rejected": 0.01666298136115074, + "step": 241 + }, + { + "epoch": 0.39, + "learning_rate": 2.156862745098039e-07, + "logits/chosen": -1.1835781335830688, + "logits/rejected": -1.2211390733718872, + "logps/chosen": -105.93952941894531, + "logps/rejected": -112.36106872558594, + "loss": 0.6813, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09962844103574753, + "rewards/margins": 0.11508216708898544, + "rewards/rejected": -0.015453722327947617, + "step": 242 + }, + { + "epoch": 0.39, + "learning_rate": 2.1657754010695188e-07, + "logits/chosen": -0.9580905437469482, + "logits/rejected": -0.9684072136878967, + "logps/chosen": -117.10250091552734, + "logps/rejected": -85.48649597167969, + "loss": 0.6938, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06850758194923401, + "rewards/margins": 0.08076611161231995, + "rewards/rejected": -0.012258529663085938, + "step": 243 + }, + { + "epoch": 0.39, + "learning_rate": 2.1746880570409982e-07, + "logits/chosen": -1.2071748971939087, + "logits/rejected": -1.1654880046844482, + "logps/chosen": -114.59228515625, + "logps/rejected": -108.928955078125, + "loss": 0.6714, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.06423740088939667, + "rewards/margins": 0.013858222402632236, + "rewards/rejected": 0.050379179418087006, + "step": 244 + }, + { + "epoch": 0.39, + "learning_rate": 2.1836007130124777e-07, + "logits/chosen": -1.4055743217468262, + "logits/rejected": -1.4316047430038452, + "logps/chosen": -83.58515930175781, + "logps/rejected": -83.21244812011719, + "loss": 0.6759, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.11131229251623154, + "rewards/margins": 0.12444982677698135, + "rewards/rejected": -0.013137530535459518, + "step": 245 + }, + { + "epoch": 0.39, + "learning_rate": 2.192513368983957e-07, + "logits/chosen": -1.5405527353286743, + "logits/rejected": -1.4525147676467896, + "logps/chosen": -87.57505798339844, + "logps/rejected": -94.3548583984375, + "loss": 0.6803, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.019840242341160774, + "rewards/margins": -0.0008686091750860214, + "rewards/rejected": -0.018971635028719902, + "step": 246 + }, + { + "epoch": 0.4, + "learning_rate": 2.2014260249554368e-07, + "logits/chosen": -1.1597777605056763, + "logits/rejected": -1.0557482242584229, + "logps/chosen": -87.28373718261719, + "logps/rejected": -108.77473449707031, + "loss": 0.6833, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.029638197273015976, + "rewards/margins": -0.017582416534423828, + "rewards/rejected": 0.047220610082149506, + "step": 247 + }, + { + "epoch": 0.4, + "learning_rate": 2.2103386809269163e-07, + "logits/chosen": -1.150583028793335, + "logits/rejected": -1.199609637260437, + "logps/chosen": -69.0417709350586, + "logps/rejected": -68.70387268066406, + "loss": 0.6839, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.007855510339140892, + "rewards/margins": -0.016690729185938835, + "rewards/rejected": 0.024546241387724876, + "step": 248 + }, + { + "epoch": 0.4, + "learning_rate": 2.2192513368983957e-07, + "logits/chosen": -1.2985551357269287, + "logits/rejected": -1.287369728088379, + "logps/chosen": -85.56787109375, + "logps/rejected": -83.8221206665039, + "loss": 0.6892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0590452216565609, + "rewards/margins": 0.06892690807580948, + "rewards/rejected": -0.009881686419248581, + "step": 249 + }, + { + "epoch": 0.4, + "learning_rate": 2.2281639928698751e-07, + "logits/chosen": -1.2801727056503296, + "logits/rejected": -1.2373180389404297, + "logps/chosen": -82.9253158569336, + "logps/rejected": -94.94396209716797, + "loss": 0.6849, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07767181098461151, + "rewards/margins": 0.09010696411132812, + "rewards/rejected": -0.012435151264071465, + "step": 250 + }, + { + "epoch": 0.4, + "learning_rate": 2.2370766488413546e-07, + "logits/chosen": -1.451326847076416, + "logits/rejected": -1.4487104415893555, + "logps/chosen": -70.09224700927734, + "logps/rejected": -81.06734466552734, + "loss": 0.669, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06737308204174042, + "rewards/margins": 0.018830107524991035, + "rewards/rejected": 0.04854297637939453, + "step": 251 + }, + { + "epoch": 0.4, + "learning_rate": 2.2459893048128343e-07, + "logits/chosen": -1.2333167791366577, + "logits/rejected": -1.2645306587219238, + "logps/chosen": -106.37818908691406, + "logps/rejected": -86.23303985595703, + "loss": 0.6977, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04568672180175781, + "rewards/margins": -0.08175735175609589, + "rewards/rejected": 0.03607063367962837, + "step": 252 + }, + { + "epoch": 0.41, + "learning_rate": 2.2549019607843137e-07, + "logits/chosen": -1.0072137117385864, + "logits/rejected": -1.0914777517318726, + "logps/chosen": -62.423851013183594, + "logps/rejected": -72.65961456298828, + "loss": 0.6843, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0005303383804857731, + "rewards/margins": -0.03809156268835068, + "rewards/rejected": 0.03756122663617134, + "step": 253 + }, + { + "epoch": 0.41, + "learning_rate": 2.2638146167557932e-07, + "logits/chosen": -0.885297417640686, + "logits/rejected": -0.91676926612854, + "logps/chosen": -99.86686706542969, + "logps/rejected": -91.1577377319336, + "loss": 0.6999, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.037349700927734375, + "rewards/margins": -0.0669986754655838, + "rewards/rejected": 0.10434836894273758, + "step": 254 + }, + { + "epoch": 0.41, + "learning_rate": 2.2727272727272726e-07, + "logits/chosen": -1.2678251266479492, + "logits/rejected": -1.3043278455734253, + "logps/chosen": -89.47602844238281, + "logps/rejected": -106.13329315185547, + "loss": 0.6728, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.05332384258508682, + "rewards/margins": 0.015943430364131927, + "rewards/rejected": 0.0373804084956646, + "step": 255 + }, + { + "epoch": 0.41, + "learning_rate": 2.281639928698752e-07, + "logits/chosen": -0.9590396881103516, + "logits/rejected": -1.0655536651611328, + "logps/chosen": -84.81697082519531, + "logps/rejected": -97.45384216308594, + "loss": 0.68, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07383251190185547, + "rewards/margins": 0.040572166442871094, + "rewards/rejected": 0.033260345458984375, + "step": 256 + }, + { + "epoch": 0.41, + "learning_rate": 2.2905525846702317e-07, + "logits/chosen": -1.0711615085601807, + "logits/rejected": -1.066248893737793, + "logps/chosen": -85.9084243774414, + "logps/rejected": -90.72894287109375, + "loss": 0.6811, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.013261604122817516, + "rewards/margins": -0.05296192318201065, + "rewards/rejected": 0.06622352451086044, + "step": 257 + }, + { + "epoch": 0.41, + "learning_rate": 2.2994652406417112e-07, + "logits/chosen": -1.2024062871932983, + "logits/rejected": -1.1923305988311768, + "logps/chosen": -85.6546630859375, + "logps/rejected": -90.01422882080078, + "loss": 0.6926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12012015283107758, + "rewards/margins": 0.1374165564775467, + "rewards/rejected": -0.017296411097049713, + "step": 258 + }, + { + "epoch": 0.42, + "learning_rate": 2.3083778966131906e-07, + "logits/chosen": -1.3026809692382812, + "logits/rejected": -1.2740707397460938, + "logps/chosen": -85.69515991210938, + "logps/rejected": -86.67021942138672, + "loss": 0.6743, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.05926590412855148, + "rewards/margins": -0.0055942535400390625, + "rewards/rejected": 0.06486015021800995, + "step": 259 + }, + { + "epoch": 0.42, + "learning_rate": 2.31729055258467e-07, + "logits/chosen": -1.0968396663665771, + "logits/rejected": -1.1124399900436401, + "logps/chosen": -109.53699493408203, + "logps/rejected": -99.47398376464844, + "loss": 0.6889, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.10410556942224503, + "rewards/margins": 0.055524252355098724, + "rewards/rejected": 0.048581313341856, + "step": 260 + }, + { + "epoch": 0.42, + "learning_rate": 2.3262032085561498e-07, + "logits/chosen": -1.2599648237228394, + "logits/rejected": -1.2472862005233765, + "logps/chosen": -88.58628845214844, + "logps/rejected": -98.78579711914062, + "loss": 0.6807, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09660253673791885, + "rewards/margins": 0.06635971367359161, + "rewards/rejected": 0.03024282492697239, + "step": 261 + }, + { + "epoch": 0.42, + "learning_rate": 2.3351158645276292e-07, + "logits/chosen": -1.2192515134811401, + "logits/rejected": -1.2429025173187256, + "logps/chosen": -113.75950622558594, + "logps/rejected": -97.70962524414062, + "loss": 0.6724, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04426403343677521, + "rewards/margins": 0.05276508629322052, + "rewards/rejected": -0.008501052856445312, + "step": 262 + }, + { + "epoch": 0.42, + "learning_rate": 2.3440285204991086e-07, + "logits/chosen": -1.1820435523986816, + "logits/rejected": -1.3682022094726562, + "logps/chosen": -94.43738555908203, + "logps/rejected": -118.38250732421875, + "loss": 0.6761, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.11442070454359055, + "rewards/margins": 0.11597920209169388, + "rewards/rejected": -0.0015584924258291721, + "step": 263 + }, + { + "epoch": 0.42, + "learning_rate": 2.352941176470588e-07, + "logits/chosen": -1.3031953573226929, + "logits/rejected": -1.3824700117111206, + "logps/chosen": -76.28455352783203, + "logps/rejected": -111.09174346923828, + "loss": 0.6683, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.05090579763054848, + "rewards/margins": 0.027386857196688652, + "rewards/rejected": 0.023518944159150124, + "step": 264 + }, + { + "epoch": 0.43, + "learning_rate": 2.3618538324420675e-07, + "logits/chosen": -1.47735595703125, + "logits/rejected": -1.4064598083496094, + "logps/chosen": -91.46055603027344, + "logps/rejected": -90.98507690429688, + "loss": 0.6689, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06539182364940643, + "rewards/margins": 0.05169401317834854, + "rewards/rejected": 0.01369781605899334, + "step": 265 + }, + { + "epoch": 0.43, + "learning_rate": 2.3707664884135472e-07, + "logits/chosen": -1.2291524410247803, + "logits/rejected": -1.2295732498168945, + "logps/chosen": -91.46742248535156, + "logps/rejected": -65.66752624511719, + "loss": 0.6616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1306879073381424, + "rewards/margins": 0.10604839026927948, + "rewards/rejected": 0.024639511480927467, + "step": 266 + }, + { + "epoch": 0.43, + "learning_rate": 2.3796791443850267e-07, + "logits/chosen": -1.049019694328308, + "logits/rejected": -1.0370814800262451, + "logps/chosen": -85.65200805664062, + "logps/rejected": -127.39057159423828, + "loss": 0.6918, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.01296691969037056, + "rewards/margins": -0.15692175924777985, + "rewards/rejected": 0.1698886901140213, + "step": 267 + }, + { + "epoch": 0.43, + "learning_rate": 2.388591800356506e-07, + "logits/chosen": -1.2315694093704224, + "logits/rejected": -1.1849857568740845, + "logps/chosen": -75.49824523925781, + "logps/rejected": -76.63864135742188, + "loss": 0.6838, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04204292595386505, + "rewards/margins": 0.009609700180590153, + "rewards/rejected": 0.03243322670459747, + "step": 268 + }, + { + "epoch": 0.43, + "learning_rate": 2.3975044563279855e-07, + "logits/chosen": -1.0717147588729858, + "logits/rejected": -1.1135541200637817, + "logps/chosen": -83.387939453125, + "logps/rejected": -69.6025619506836, + "loss": 0.6914, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.02753486856818199, + "rewards/margins": -0.028724290430545807, + "rewards/rejected": 0.0562591589987278, + "step": 269 + }, + { + "epoch": 0.43, + "learning_rate": 2.406417112299465e-07, + "logits/chosen": -1.1316823959350586, + "logits/rejected": -1.161321997642517, + "logps/chosen": -76.32443237304688, + "logps/rejected": -73.42608642578125, + "loss": 0.6998, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04347391426563263, + "rewards/margins": -0.016432952135801315, + "rewards/rejected": 0.059906862676143646, + "step": 270 + }, + { + "epoch": 0.43, + "learning_rate": 2.415329768270945e-07, + "logits/chosen": -1.2896268367767334, + "logits/rejected": -1.291379451751709, + "logps/chosen": -80.56018829345703, + "logps/rejected": -122.67534637451172, + "loss": 0.6727, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.14456282556056976, + "rewards/margins": 0.11899366229772568, + "rewards/rejected": 0.02556915022432804, + "step": 271 + }, + { + "epoch": 0.44, + "learning_rate": 2.4242424242424244e-07, + "logits/chosen": -1.3808834552764893, + "logits/rejected": -1.4008418321609497, + "logps/chosen": -81.40470123291016, + "logps/rejected": -71.37447357177734, + "loss": 0.6794, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.04850349575281143, + "rewards/margins": -0.05490855872631073, + "rewards/rejected": 0.10341206192970276, + "step": 272 + }, + { + "epoch": 0.44, + "learning_rate": 2.433155080213904e-07, + "logits/chosen": -1.2501654624938965, + "logits/rejected": -1.1767557859420776, + "logps/chosen": -87.16156005859375, + "logps/rejected": -85.14739990234375, + "loss": 0.6687, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.10303746163845062, + "rewards/margins": 0.05751323327422142, + "rewards/rejected": 0.04552421718835831, + "step": 273 + }, + { + "epoch": 0.44, + "learning_rate": 2.4420677361853833e-07, + "logits/chosen": -1.3462412357330322, + "logits/rejected": -1.3435797691345215, + "logps/chosen": -101.33477783203125, + "logps/rejected": -82.9685287475586, + "loss": 0.6649, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.05540180206298828, + "rewards/margins": -0.03626727685332298, + "rewards/rejected": 0.09166907519102097, + "step": 274 + }, + { + "epoch": 0.44, + "learning_rate": 2.4509803921568627e-07, + "logits/chosen": -1.4355266094207764, + "logits/rejected": -1.392993688583374, + "logps/chosen": -66.99176025390625, + "logps/rejected": -71.37014770507812, + "loss": 0.6819, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02662467770278454, + "rewards/margins": 0.04171285405755043, + "rewards/rejected": -0.015088176354765892, + "step": 275 + }, + { + "epoch": 0.44, + "learning_rate": 2.459893048128342e-07, + "logits/chosen": -1.0971918106079102, + "logits/rejected": -1.0950058698654175, + "logps/chosen": -113.23728942871094, + "logps/rejected": -85.44772338867188, + "loss": 0.668, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16021136939525604, + "rewards/margins": 0.1308424025774002, + "rewards/rejected": 0.029368974268436432, + "step": 276 + }, + { + "epoch": 0.44, + "learning_rate": 2.4688057040998216e-07, + "logits/chosen": -1.1685456037521362, + "logits/rejected": -1.21990168094635, + "logps/chosen": -79.86454010009766, + "logps/rejected": -57.833099365234375, + "loss": 0.6648, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.11408253014087677, + "rewards/margins": 0.05703301727771759, + "rewards/rejected": 0.05704950913786888, + "step": 277 + }, + { + "epoch": 0.45, + "learning_rate": 2.477718360071301e-07, + "logits/chosen": -1.162726640701294, + "logits/rejected": -1.2013813257217407, + "logps/chosen": -76.55426788330078, + "logps/rejected": -92.88990783691406, + "loss": 0.6693, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16482925415039062, + "rewards/margins": 0.10860061645507812, + "rewards/rejected": 0.0562286376953125, + "step": 278 + }, + { + "epoch": 0.45, + "learning_rate": 2.4866310160427805e-07, + "logits/chosen": -1.1953692436218262, + "logits/rejected": -1.2959855794906616, + "logps/chosen": -95.178466796875, + "logps/rejected": -104.57371520996094, + "loss": 0.6627, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.09443511813879013, + "rewards/margins": 0.02760467678308487, + "rewards/rejected": 0.06683044135570526, + "step": 279 + }, + { + "epoch": 0.45, + "learning_rate": 2.49554367201426e-07, + "logits/chosen": -1.2878360748291016, + "logits/rejected": -1.2261579036712646, + "logps/chosen": -65.76364135742188, + "logps/rejected": -74.13623046875, + "loss": 0.6734, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.062433626502752304, + "rewards/margins": 0.008893683552742004, + "rewards/rejected": 0.0535399429500103, + "step": 280 + }, + { + "epoch": 0.45, + "learning_rate": 2.5044563279857393e-07, + "logits/chosen": -1.1199045181274414, + "logits/rejected": -1.0260179042816162, + "logps/chosen": -99.21446228027344, + "logps/rejected": -110.10317993164062, + "loss": 0.6631, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07889652252197266, + "rewards/margins": 0.08441983163356781, + "rewards/rejected": -0.005523303523659706, + "step": 281 + }, + { + "epoch": 0.45, + "learning_rate": 2.5133689839572193e-07, + "logits/chosen": -1.2737451791763306, + "logits/rejected": -1.3133862018585205, + "logps/chosen": -83.18331909179688, + "logps/rejected": -96.2452392578125, + "loss": 0.6879, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.03858843073248863, + "rewards/margins": -0.051953792572021484, + "rewards/rejected": 0.09054221957921982, + "step": 282 + }, + { + "epoch": 0.45, + "learning_rate": 2.522281639928699e-07, + "logits/chosen": -1.218468427658081, + "logits/rejected": -1.1898638010025024, + "logps/chosen": -75.5515365600586, + "logps/rejected": -73.00982666015625, + "loss": 0.6597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14914913475513458, + "rewards/margins": 0.10491552203893661, + "rewards/rejected": 0.04423360899090767, + "step": 283 + }, + { + "epoch": 0.46, + "learning_rate": 2.531194295900178e-07, + "logits/chosen": -1.0265485048294067, + "logits/rejected": -1.0492814779281616, + "logps/chosen": -77.20716857910156, + "logps/rejected": -76.20745849609375, + "loss": 0.6633, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.25422269105911255, + "rewards/margins": 0.11567412316799164, + "rewards/rejected": 0.1385485678911209, + "step": 284 + }, + { + "epoch": 0.46, + "learning_rate": 2.5401069518716576e-07, + "logits/chosen": -1.4330224990844727, + "logits/rejected": -1.5077335834503174, + "logps/chosen": -96.22386169433594, + "logps/rejected": -112.85494995117188, + "loss": 0.691, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2253652662038803, + "rewards/margins": 0.19391365349292755, + "rewards/rejected": 0.03145160526037216, + "step": 285 + }, + { + "epoch": 0.46, + "learning_rate": 2.549019607843137e-07, + "logits/chosen": -1.150084376335144, + "logits/rejected": -1.0905041694641113, + "logps/chosen": -97.41041564941406, + "logps/rejected": -98.05892181396484, + "loss": 0.66, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10761947929859161, + "rewards/margins": 0.08484935760498047, + "rewards/rejected": 0.022770121693611145, + "step": 286 + }, + { + "epoch": 0.46, + "learning_rate": 2.5579322638146165e-07, + "logits/chosen": -1.2912417650222778, + "logits/rejected": -1.329816222190857, + "logps/chosen": -83.36727905273438, + "logps/rejected": -68.7192153930664, + "loss": 0.6517, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.11727103590965271, + "rewards/margins": 0.07184983044862747, + "rewards/rejected": 0.04542122036218643, + "step": 287 + }, + { + "epoch": 0.46, + "learning_rate": 2.5668449197860965e-07, + "logits/chosen": -1.1300667524337769, + "logits/rejected": -1.2059800624847412, + "logps/chosen": -109.12397766113281, + "logps/rejected": -91.6461181640625, + "loss": 0.652, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2241353988647461, + "rewards/margins": 0.09366723895072937, + "rewards/rejected": 0.13046817481517792, + "step": 288 + }, + { + "epoch": 0.46, + "learning_rate": 2.5757575757575754e-07, + "logits/chosen": -1.2609786987304688, + "logits/rejected": -1.2727422714233398, + "logps/chosen": -80.6039047241211, + "logps/rejected": -100.51264953613281, + "loss": 0.6749, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16222628951072693, + "rewards/margins": 0.05164431780576706, + "rewards/rejected": 0.11058197915554047, + "step": 289 + }, + { + "epoch": 0.47, + "learning_rate": 2.5846702317290554e-07, + "logits/chosen": -1.2211748361587524, + "logits/rejected": -1.2957744598388672, + "logps/chosen": -81.94351196289062, + "logps/rejected": -65.82157897949219, + "loss": 0.6717, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06339158862829208, + "rewards/margins": 0.019843099638819695, + "rewards/rejected": 0.04354849085211754, + "step": 290 + }, + { + "epoch": 0.47, + "learning_rate": 2.593582887700534e-07, + "logits/chosen": -1.1203752756118774, + "logits/rejected": -1.250995397567749, + "logps/chosen": -71.03523254394531, + "logps/rejected": -103.40093994140625, + "loss": 0.6876, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.07859501987695694, + "rewards/margins": -0.11592496186494827, + "rewards/rejected": 0.1945199966430664, + "step": 291 + }, + { + "epoch": 0.47, + "learning_rate": 2.602495543672014e-07, + "logits/chosen": -1.1564207077026367, + "logits/rejected": -1.2927607297897339, + "logps/chosen": -79.52952575683594, + "logps/rejected": -75.32966613769531, + "loss": 0.6662, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.16619624197483063, + "rewards/margins": 0.02776785008609295, + "rewards/rejected": 0.13842840492725372, + "step": 292 + }, + { + "epoch": 0.47, + "learning_rate": 2.6114081996434937e-07, + "logits/chosen": -1.1339128017425537, + "logits/rejected": -1.2188538312911987, + "logps/chosen": -68.42950439453125, + "logps/rejected": -67.3084487915039, + "loss": 0.6508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17174777388572693, + "rewards/margins": 0.14832183718681335, + "rewards/rejected": 0.023425960913300514, + "step": 293 + }, + { + "epoch": 0.47, + "learning_rate": 2.620320855614973e-07, + "logits/chosen": -1.2407300472259521, + "logits/rejected": -1.147329568862915, + "logps/chosen": -72.10598754882812, + "logps/rejected": -74.8037109375, + "loss": 0.6831, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.06572581082582474, + "rewards/margins": -0.001997658982872963, + "rewards/rejected": 0.06772346049547195, + "step": 294 + }, + { + "epoch": 0.47, + "learning_rate": 2.6292335115864525e-07, + "logits/chosen": -0.969772458076477, + "logits/rejected": -1.02313232421875, + "logps/chosen": -109.25942993164062, + "logps/rejected": -96.70529174804688, + "loss": 0.6759, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.103376105427742, + "rewards/margins": -0.022210123017430305, + "rewards/rejected": 0.12558622658252716, + "step": 295 + }, + { + "epoch": 0.48, + "learning_rate": 2.638146167557932e-07, + "logits/chosen": -1.3709019422531128, + "logits/rejected": -1.260079264640808, + "logps/chosen": -88.28411865234375, + "logps/rejected": -86.83455657958984, + "loss": 0.67, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.13328608870506287, + "rewards/margins": 0.020631026476621628, + "rewards/rejected": 0.11265506595373154, + "step": 296 + }, + { + "epoch": 0.48, + "learning_rate": 2.6470588235294114e-07, + "logits/chosen": -1.2452988624572754, + "logits/rejected": -1.314401626586914, + "logps/chosen": -80.23633575439453, + "logps/rejected": -74.17669677734375, + "loss": 0.6598, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09177089482545853, + "rewards/margins": -0.006086774170398712, + "rewards/rejected": 0.09785766154527664, + "step": 297 + }, + { + "epoch": 0.48, + "learning_rate": 2.6559714795008914e-07, + "logits/chosen": -1.3040896654129028, + "logits/rejected": -1.2823154926300049, + "logps/chosen": -78.94120025634766, + "logps/rejected": -89.09233856201172, + "loss": 0.6608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21634846925735474, + "rewards/margins": 0.3118218183517456, + "rewards/rejected": -0.09547338634729385, + "step": 298 + }, + { + "epoch": 0.48, + "learning_rate": 2.6648841354723703e-07, + "logits/chosen": -1.1973069906234741, + "logits/rejected": -1.2220531702041626, + "logps/chosen": -81.25874328613281, + "logps/rejected": -77.93314361572266, + "loss": 0.688, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.13577900826931, + "rewards/margins": 0.040225982666015625, + "rewards/rejected": 0.09555301815271378, + "step": 299 + }, + { + "epoch": 0.48, + "learning_rate": 2.6737967914438503e-07, + "logits/chosen": -1.2609388828277588, + "logits/rejected": -1.2270541191101074, + "logps/chosen": -77.33185577392578, + "logps/rejected": -84.50601196289062, + "loss": 0.678, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.15515795350074768, + "rewards/margins": 0.08373098075389862, + "rewards/rejected": 0.07142696529626846, + "step": 300 + }, + { + "epoch": 0.48, + "learning_rate": 2.6827094474153297e-07, + "logits/chosen": -0.9112197756767273, + "logits/rejected": -0.9008002877235413, + "logps/chosen": -105.34730529785156, + "logps/rejected": -96.0273208618164, + "loss": 0.6654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13273297250270844, + "rewards/margins": 0.11414032429456711, + "rewards/rejected": 0.01859264448285103, + "step": 301 + }, + { + "epoch": 0.48, + "learning_rate": 2.691622103386809e-07, + "logits/chosen": -1.1602048873901367, + "logits/rejected": -1.2361183166503906, + "logps/chosen": -69.30332946777344, + "logps/rejected": -59.33778762817383, + "loss": 0.6815, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.10628003627061844, + "rewards/margins": 0.03762922063469887, + "rewards/rejected": 0.06865081936120987, + "step": 302 + }, + { + "epoch": 0.49, + "learning_rate": 2.700534759358289e-07, + "logits/chosen": -1.2832103967666626, + "logits/rejected": -1.2536712884902954, + "logps/chosen": -87.33821868896484, + "logps/rejected": -83.83084106445312, + "loss": 0.6751, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.13247795403003693, + "rewards/margins": 0.062273457646369934, + "rewards/rejected": 0.07020450383424759, + "step": 303 + }, + { + "epoch": 0.49, + "learning_rate": 2.709447415329768e-07, + "logits/chosen": -1.4619874954223633, + "logits/rejected": -1.5157349109649658, + "logps/chosen": -89.15220642089844, + "logps/rejected": -93.2753677368164, + "loss": 0.6698, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.022847745567560196, + "rewards/margins": -0.009965900331735611, + "rewards/rejected": 0.03281364589929581, + "step": 304 + }, + { + "epoch": 0.49, + "learning_rate": 2.718360071301248e-07, + "logits/chosen": -1.135348916053772, + "logits/rejected": -1.1251702308654785, + "logps/chosen": -69.99653625488281, + "logps/rejected": -67.7619400024414, + "loss": 0.6858, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.12758751213550568, + "rewards/margins": 0.026541896164417267, + "rewards/rejected": 0.10104560852050781, + "step": 305 + }, + { + "epoch": 0.49, + "learning_rate": 2.727272727272727e-07, + "logits/chosen": -1.2959814071655273, + "logits/rejected": -1.3914355039596558, + "logps/chosen": -84.02043151855469, + "logps/rejected": -75.88203430175781, + "loss": 0.6495, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09288692474365234, + "rewards/margins": 0.07419758290052414, + "rewards/rejected": 0.018689343705773354, + "step": 306 + }, + { + "epoch": 0.49, + "learning_rate": 2.736185383244207e-07, + "logits/chosen": -1.3438833951950073, + "logits/rejected": -1.2410920858383179, + "logps/chosen": -103.746337890625, + "logps/rejected": -95.27484893798828, + "loss": 0.7026, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.032576750963926315, + "rewards/margins": -0.10241146385669708, + "rewards/rejected": 0.06983470916748047, + "step": 307 + }, + { + "epoch": 0.49, + "learning_rate": 2.7450980392156863e-07, + "logits/chosen": -1.1224687099456787, + "logits/rejected": -1.2203575372695923, + "logps/chosen": -100.82917785644531, + "logps/rejected": -104.21343231201172, + "loss": 0.6879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22812862694263458, + "rewards/margins": 0.18242168426513672, + "rewards/rejected": 0.045706942677497864, + "step": 308 + }, + { + "epoch": 0.5, + "learning_rate": 2.754010695187166e-07, + "logits/chosen": -1.0806572437286377, + "logits/rejected": -1.0251590013504028, + "logps/chosen": -88.9085693359375, + "logps/rejected": -99.7394790649414, + "loss": 0.6612, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.09342250227928162, + "rewards/margins": 0.040767572820186615, + "rewards/rejected": 0.0526549331843853, + "step": 309 + }, + { + "epoch": 0.5, + "learning_rate": 2.762923351158645e-07, + "logits/chosen": -1.1136000156402588, + "logits/rejected": -1.153560996055603, + "logps/chosen": -74.92514038085938, + "logps/rejected": -68.93941497802734, + "loss": 0.6677, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06848563998937607, + "rewards/margins": -0.012454701587557793, + "rewards/rejected": 0.08094033598899841, + "step": 310 + }, + { + "epoch": 0.5, + "learning_rate": 2.7718360071301246e-07, + "logits/chosen": -1.2570738792419434, + "logits/rejected": -1.236356496810913, + "logps/chosen": -65.06344604492188, + "logps/rejected": -58.971641540527344, + "loss": 0.6608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07606229931116104, + "rewards/margins": 0.06727390736341476, + "rewards/rejected": 0.008788393810391426, + "step": 311 + }, + { + "epoch": 0.5, + "learning_rate": 2.780748663101604e-07, + "logits/chosen": -1.2095311880111694, + "logits/rejected": -1.2186963558197021, + "logps/chosen": -104.74409484863281, + "logps/rejected": -83.13262939453125, + "loss": 0.647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31059589982032776, + "rewards/margins": 0.2990642488002777, + "rewards/rejected": 0.011531639844179153, + "step": 312 + }, + { + "epoch": 0.5, + "learning_rate": 2.789661319073084e-07, + "logits/chosen": -1.143808364868164, + "logits/rejected": -1.0819718837738037, + "logps/chosen": -67.19629669189453, + "logps/rejected": -77.13871002197266, + "loss": 0.6707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25130119919776917, + "rewards/margins": 0.24421121180057526, + "rewards/rejected": 0.007089996710419655, + "step": 313 + }, + { + "epoch": 0.5, + "learning_rate": 2.798573975044563e-07, + "logits/chosen": -1.1860305070877075, + "logits/rejected": -1.2017343044281006, + "logps/chosen": -72.12841033935547, + "logps/rejected": -87.95671081542969, + "loss": 0.6534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14963217079639435, + "rewards/margins": 0.12384997308254242, + "rewards/rejected": 0.025782205164432526, + "step": 314 + }, + { + "epoch": 0.51, + "learning_rate": 2.807486631016043e-07, + "logits/chosen": -1.1104300022125244, + "logits/rejected": -1.1325981616973877, + "logps/chosen": -75.87508392333984, + "logps/rejected": -100.69789123535156, + "loss": 0.6678, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.049971774220466614, + "rewards/margins": -0.07510032504796982, + "rewards/rejected": 0.12507209181785583, + "step": 315 + }, + { + "epoch": 0.51, + "learning_rate": 2.816399286987522e-07, + "logits/chosen": -1.3556714057922363, + "logits/rejected": -1.3343536853790283, + "logps/chosen": -94.25479888916016, + "logps/rejected": -94.40919494628906, + "loss": 0.684, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.28538981080055237, + "rewards/margins": 0.2286706119775772, + "rewards/rejected": 0.056719209998846054, + "step": 316 + }, + { + "epoch": 0.51, + "learning_rate": 2.825311942959002e-07, + "logits/chosen": -1.2946072816848755, + "logits/rejected": -1.2683013677597046, + "logps/chosen": -78.06330108642578, + "logps/rejected": -65.232421875, + "loss": 0.6474, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0706791877746582, + "rewards/margins": -0.03911658376455307, + "rewards/rejected": -0.03156261518597603, + "step": 317 + }, + { + "epoch": 0.51, + "learning_rate": 2.834224598930481e-07, + "logits/chosen": -1.3057444095611572, + "logits/rejected": -1.3305914402008057, + "logps/chosen": -67.47688293457031, + "logps/rejected": -77.79869079589844, + "loss": 0.6454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3435845673084259, + "rewards/margins": 0.2820982336997986, + "rewards/rejected": 0.061486341059207916, + "step": 318 + }, + { + "epoch": 0.51, + "learning_rate": 2.8431372549019607e-07, + "logits/chosen": -1.2416703701019287, + "logits/rejected": -1.3033418655395508, + "logps/chosen": -88.55001068115234, + "logps/rejected": -70.91754913330078, + "loss": 0.6809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21004284918308258, + "rewards/margins": 0.22509776055812836, + "rewards/rejected": -0.015054893679916859, + "step": 319 + }, + { + "epoch": 0.51, + "learning_rate": 2.85204991087344e-07, + "logits/chosen": -1.1377530097961426, + "logits/rejected": -1.1824742555618286, + "logps/chosen": -73.46063232421875, + "logps/rejected": -100.76456451416016, + "loss": 0.6489, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.13749217987060547, + "rewards/margins": 0.0714053213596344, + "rewards/rejected": 0.06608686596155167, + "step": 320 + }, + { + "epoch": 0.52, + "learning_rate": 2.8609625668449196e-07, + "logits/chosen": -1.2777680158615112, + "logits/rejected": -1.2261133193969727, + "logps/chosen": -83.79942321777344, + "logps/rejected": -97.80702209472656, + "loss": 0.6428, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0702640563249588, + "rewards/margins": 0.031407542526721954, + "rewards/rejected": 0.03885651007294655, + "step": 321 + }, + { + "epoch": 0.52, + "learning_rate": 2.869875222816399e-07, + "logits/chosen": -1.3164100646972656, + "logits/rejected": -1.3429471254348755, + "logps/chosen": -86.61787414550781, + "logps/rejected": -87.35304260253906, + "loss": 0.645, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.18511180579662323, + "rewards/margins": 0.09819040447473526, + "rewards/rejected": 0.08692140877246857, + "step": 322 + }, + { + "epoch": 0.52, + "learning_rate": 2.878787878787879e-07, + "logits/chosen": -1.2861645221710205, + "logits/rejected": -1.3452929258346558, + "logps/chosen": -82.19828033447266, + "logps/rejected": -93.90924072265625, + "loss": 0.6665, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.13947677612304688, + "rewards/margins": 0.037457745522260666, + "rewards/rejected": 0.10201903432607651, + "step": 323 + }, + { + "epoch": 0.52, + "learning_rate": 2.887700534759358e-07, + "logits/chosen": -1.378412127494812, + "logits/rejected": -1.3714053630828857, + "logps/chosen": -79.28540802001953, + "logps/rejected": -91.89629364013672, + "loss": 0.659, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07279205322265625, + "rewards/margins": 0.10650424659252167, + "rewards/rejected": -0.03371219336986542, + "step": 324 + }, + { + "epoch": 0.52, + "learning_rate": 2.896613190730838e-07, + "logits/chosen": -0.9697253108024597, + "logits/rejected": -0.9202805757522583, + "logps/chosen": -92.09651184082031, + "logps/rejected": -92.39553833007812, + "loss": 0.6525, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1783168762922287, + "rewards/margins": 0.07809047400951385, + "rewards/rejected": 0.10022640228271484, + "step": 325 + }, + { + "epoch": 0.52, + "learning_rate": 2.905525846702317e-07, + "logits/chosen": -1.0326021909713745, + "logits/rejected": -1.1439400911331177, + "logps/chosen": -91.49354553222656, + "logps/rejected": -96.96902465820312, + "loss": 0.6611, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06546631455421448, + "rewards/margins": 0.0043199509382247925, + "rewards/rejected": 0.061146363615989685, + "step": 326 + }, + { + "epoch": 0.52, + "learning_rate": 2.9144385026737967e-07, + "logits/chosen": -1.3942519426345825, + "logits/rejected": -1.3838871717453003, + "logps/chosen": -107.79054260253906, + "logps/rejected": -100.28005981445312, + "loss": 0.6734, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06261520087718964, + "rewards/margins": -0.008522801101207733, + "rewards/rejected": 0.07113800197839737, + "step": 327 + }, + { + "epoch": 0.53, + "learning_rate": 2.923351158645276e-07, + "logits/chosen": -1.0869139432907104, + "logits/rejected": -1.1671937704086304, + "logps/chosen": -86.10123443603516, + "logps/rejected": -65.01278686523438, + "loss": 0.6586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15183678269386292, + "rewards/margins": 0.17026816308498383, + "rewards/rejected": -0.018431376665830612, + "step": 328 + }, + { + "epoch": 0.53, + "learning_rate": 2.9322638146167556e-07, + "logits/chosen": -1.3306090831756592, + "logits/rejected": -1.2829090356826782, + "logps/chosen": -79.16413879394531, + "logps/rejected": -79.23770141601562, + "loss": 0.665, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.056937411427497864, + "rewards/margins": 0.04890613257884979, + "rewards/rejected": 0.008031275123357773, + "step": 329 + }, + { + "epoch": 0.53, + "learning_rate": 2.941176470588235e-07, + "logits/chosen": -1.2391971349716187, + "logits/rejected": -1.182995319366455, + "logps/chosen": -86.610595703125, + "logps/rejected": -89.02350616455078, + "loss": 0.6531, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.0064332978799939156, + "rewards/margins": -0.06546249985694885, + "rewards/rejected": 0.0718957930803299, + "step": 330 + }, + { + "epoch": 0.53, + "learning_rate": 2.9500891265597145e-07, + "logits/chosen": -1.4625813961029053, + "logits/rejected": -1.3253648281097412, + "logps/chosen": -82.2672119140625, + "logps/rejected": -105.3332290649414, + "loss": 0.6673, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.058600619435310364, + "rewards/margins": -0.005243297666311264, + "rewards/rejected": 0.06384392082691193, + "step": 331 + }, + { + "epoch": 0.53, + "learning_rate": 2.959001782531194e-07, + "logits/chosen": -1.2394137382507324, + "logits/rejected": -1.2723644971847534, + "logps/chosen": -77.5280990600586, + "logps/rejected": -77.68917083740234, + "loss": 0.6611, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07767380774021149, + "rewards/margins": -0.0977436974644661, + "rewards/rejected": 0.02006988599896431, + "step": 332 + }, + { + "epoch": 0.53, + "learning_rate": 2.967914438502674e-07, + "logits/chosen": -1.2740813493728638, + "logits/rejected": -1.25852632522583, + "logps/chosen": -86.94034576416016, + "logps/rejected": -92.96407318115234, + "loss": 0.6438, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.15614834427833557, + "rewards/margins": 0.13373985886573792, + "rewards/rejected": 0.022408489137887955, + "step": 333 + }, + { + "epoch": 0.54, + "learning_rate": 2.976827094474153e-07, + "logits/chosen": -1.0233662128448486, + "logits/rejected": -1.0137670040130615, + "logps/chosen": -73.70561981201172, + "logps/rejected": -68.85248565673828, + "loss": 0.6815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07474251091480255, + "rewards/margins": 0.10938635468482971, + "rewards/rejected": -0.03464384004473686, + "step": 334 + }, + { + "epoch": 0.54, + "learning_rate": 2.985739750445633e-07, + "logits/chosen": -1.344076156616211, + "logits/rejected": -1.356690526008606, + "logps/chosen": -74.8858642578125, + "logps/rejected": -87.20622253417969, + "loss": 0.6723, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.05453595891594887, + "rewards/margins": 0.09962186217308044, + "rewards/rejected": -0.045085906982421875, + "step": 335 + }, + { + "epoch": 0.54, + "learning_rate": 2.9946524064171117e-07, + "logits/chosen": -1.0446094274520874, + "logits/rejected": -1.1010466814041138, + "logps/chosen": -94.72644805908203, + "logps/rejected": -95.78164672851562, + "loss": 0.6672, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2673872113227844, + "rewards/margins": 0.1685742437839508, + "rewards/rejected": 0.09881296008825302, + "step": 336 + }, + { + "epoch": 0.54, + "learning_rate": 3.0035650623885916e-07, + "logits/chosen": -1.3309125900268555, + "logits/rejected": -1.325650930404663, + "logps/chosen": -57.150413513183594, + "logps/rejected": -85.42903137207031, + "loss": 0.6582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1457572877407074, + "rewards/margins": 0.17408093810081482, + "rewards/rejected": -0.02832365222275257, + "step": 337 + }, + { + "epoch": 0.54, + "learning_rate": 3.0124777183600716e-07, + "logits/chosen": -1.131293773651123, + "logits/rejected": -1.1094197034835815, + "logps/chosen": -70.03778076171875, + "logps/rejected": -114.54940795898438, + "loss": 0.6316, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.05447092279791832, + "rewards/margins": 0.07138299942016602, + "rewards/rejected": -0.016912082210183144, + "step": 338 + }, + { + "epoch": 0.54, + "learning_rate": 3.0213903743315505e-07, + "logits/chosen": -1.033536672592163, + "logits/rejected": -1.0619078874588013, + "logps/chosen": -104.26609802246094, + "logps/rejected": -97.17399597167969, + "loss": 0.6977, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.19521158933639526, + "rewards/margins": 0.09221534430980682, + "rewards/rejected": 0.10299625992774963, + "step": 339 + }, + { + "epoch": 0.55, + "learning_rate": 3.0303030303030305e-07, + "logits/chosen": -1.4197341203689575, + "logits/rejected": -1.4034355878829956, + "logps/chosen": -95.6905517578125, + "logps/rejected": -105.3212661743164, + "loss": 0.6722, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.05179291218519211, + "rewards/margins": 0.00024356693029403687, + "rewards/rejected": 0.05154934152960777, + "step": 340 + }, + { + "epoch": 0.55, + "learning_rate": 3.0392156862745094e-07, + "logits/chosen": -1.1902062892913818, + "logits/rejected": -1.191072702407837, + "logps/chosen": -62.47047424316406, + "logps/rejected": -75.2205810546875, + "loss": 0.6456, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1127171516418457, + "rewards/margins": 0.1579171121120453, + "rewards/rejected": -0.04519996792078018, + "step": 341 + }, + { + "epoch": 0.55, + "learning_rate": 3.0481283422459894e-07, + "logits/chosen": -1.2813851833343506, + "logits/rejected": -1.3402589559555054, + "logps/chosen": -79.7896728515625, + "logps/rejected": -83.75751495361328, + "loss": 0.6686, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.039339445531368256, + "rewards/margins": -0.10005035996437073, + "rewards/rejected": 0.13938981294631958, + "step": 342 + }, + { + "epoch": 0.55, + "learning_rate": 3.057040998217469e-07, + "logits/chosen": -1.1492787599563599, + "logits/rejected": -1.1995255947113037, + "logps/chosen": -70.57642364501953, + "logps/rejected": -81.1024398803711, + "loss": 0.6584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14896260201931, + "rewards/margins": 0.16111516952514648, + "rewards/rejected": -0.012152578681707382, + "step": 343 + }, + { + "epoch": 0.55, + "learning_rate": 3.065953654188948e-07, + "logits/chosen": -1.135663628578186, + "logits/rejected": -1.2606675624847412, + "logps/chosen": -69.75550079345703, + "logps/rejected": -90.71636199951172, + "loss": 0.682, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.08117876201868057, + "rewards/margins": 0.11775828152894974, + "rewards/rejected": -0.03657951578497887, + "step": 344 + }, + { + "epoch": 0.55, + "learning_rate": 3.0748663101604277e-07, + "logits/chosen": -1.1516859531402588, + "logits/rejected": -1.1780242919921875, + "logps/chosen": -121.22928619384766, + "logps/rejected": -77.4291763305664, + "loss": 0.654, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.12420845031738281, + "rewards/margins": 0.0853872299194336, + "rewards/rejected": 0.03882122039794922, + "step": 345 + }, + { + "epoch": 0.56, + "learning_rate": 3.083778966131907e-07, + "logits/chosen": -1.165571928024292, + "logits/rejected": -1.129453420639038, + "logps/chosen": -85.010498046875, + "logps/rejected": -78.66081237792969, + "loss": 0.6905, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.11477203667163849, + "rewards/margins": -0.020890049636363983, + "rewards/rejected": 0.13566207885742188, + "step": 346 + }, + { + "epoch": 0.56, + "learning_rate": 3.0926916221033866e-07, + "logits/chosen": -1.1392394304275513, + "logits/rejected": -1.2081985473632812, + "logps/chosen": -111.85536193847656, + "logps/rejected": -112.08555603027344, + "loss": 0.6448, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.12098044902086258, + "rewards/margins": 0.10723160207271576, + "rewards/rejected": 0.013748839497566223, + "step": 347 + }, + { + "epoch": 0.56, + "learning_rate": 3.1016042780748665e-07, + "logits/chosen": -1.1994439363479614, + "logits/rejected": -1.2931379079818726, + "logps/chosen": -87.59042358398438, + "logps/rejected": -89.93650817871094, + "loss": 0.637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18579445779323578, + "rewards/margins": 0.2058069258928299, + "rewards/rejected": -0.020012473687529564, + "step": 348 + }, + { + "epoch": 0.56, + "learning_rate": 3.1105169340463454e-07, + "logits/chosen": -1.197122573852539, + "logits/rejected": -1.2733999490737915, + "logps/chosen": -96.97772216796875, + "logps/rejected": -108.40806579589844, + "loss": 0.6369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42491811513900757, + "rewards/margins": 0.33811426162719727, + "rewards/rejected": 0.08680381625890732, + "step": 349 + }, + { + "epoch": 0.56, + "learning_rate": 3.1194295900178254e-07, + "logits/chosen": -1.389410376548767, + "logits/rejected": -1.4083772897720337, + "logps/chosen": -79.16168212890625, + "logps/rejected": -74.05593872070312, + "loss": 0.6676, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.1258258819580078, + "rewards/margins": 0.1736762970685959, + "rewards/rejected": -0.04785041883587837, + "step": 350 + }, + { + "epoch": 0.56, + "learning_rate": 3.1283422459893043e-07, + "logits/chosen": -1.2818942070007324, + "logits/rejected": -1.251924991607666, + "logps/chosen": -100.95098876953125, + "logps/rejected": -86.05398559570312, + "loss": 0.6439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21145668625831604, + "rewards/margins": 0.4483541250228882, + "rewards/rejected": -0.23689746856689453, + "step": 351 + }, + { + "epoch": 0.57, + "learning_rate": 3.1372549019607843e-07, + "logits/chosen": -1.2242491245269775, + "logits/rejected": -1.2055611610412598, + "logps/chosen": -93.73908996582031, + "logps/rejected": -91.90907287597656, + "loss": 0.6419, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2045745849609375, + "rewards/margins": 0.07897510379552841, + "rewards/rejected": 0.1255994737148285, + "step": 352 + }, + { + "epoch": 0.57, + "learning_rate": 3.1461675579322637e-07, + "logits/chosen": -1.1034070253372192, + "logits/rejected": -1.047081708908081, + "logps/chosen": -95.1728515625, + "logps/rejected": -87.18228149414062, + "loss": 0.6929, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.05260887369513512, + "rewards/margins": -0.03103962168097496, + "rewards/rejected": 0.08364849537611008, + "step": 353 + }, + { + "epoch": 0.57, + "learning_rate": 3.155080213903743e-07, + "logits/chosen": -1.3019390106201172, + "logits/rejected": -1.3352599143981934, + "logps/chosen": -92.92581176757812, + "logps/rejected": -76.58450317382812, + "loss": 0.6532, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.2540329694747925, + "rewards/margins": 0.22556601464748383, + "rewards/rejected": 0.028466984629631042, + "step": 354 + }, + { + "epoch": 0.57, + "learning_rate": 3.1639928698752226e-07, + "logits/chosen": -1.0122573375701904, + "logits/rejected": -1.0527276992797852, + "logps/chosen": -80.72872924804688, + "logps/rejected": -92.98417663574219, + "loss": 0.6332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10311374813318253, + "rewards/margins": 0.11633892357349396, + "rewards/rejected": -0.013225173577666283, + "step": 355 + }, + { + "epoch": 0.57, + "learning_rate": 3.172905525846702e-07, + "logits/chosen": -1.4666850566864014, + "logits/rejected": -1.3761698007583618, + "logps/chosen": -82.38252258300781, + "logps/rejected": -90.35575866699219, + "loss": 0.6527, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08907909691333771, + "rewards/margins": 0.219102680683136, + "rewards/rejected": -0.3081817626953125, + "step": 356 + }, + { + "epoch": 0.57, + "learning_rate": 3.1818181818181815e-07, + "logits/chosen": -1.2391200065612793, + "logits/rejected": -1.1638773679733276, + "logps/chosen": -82.595703125, + "logps/rejected": -92.2404556274414, + "loss": 0.6453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1964838057756424, + "rewards/margins": 0.11183013021945953, + "rewards/rejected": 0.08465366065502167, + "step": 357 + }, + { + "epoch": 0.57, + "learning_rate": 3.1907308377896615e-07, + "logits/chosen": -1.2415614128112793, + "logits/rejected": -1.3072352409362793, + "logps/chosen": -82.42324829101562, + "logps/rejected": -79.86811828613281, + "loss": 0.6147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3321830928325653, + "rewards/margins": 0.3355535566806793, + "rewards/rejected": -0.003370479680597782, + "step": 358 + }, + { + "epoch": 0.58, + "learning_rate": 3.1996434937611404e-07, + "logits/chosen": -1.4379420280456543, + "logits/rejected": -1.4619537591934204, + "logps/chosen": -96.29267883300781, + "logps/rejected": -99.57647705078125, + "loss": 0.6722, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10045166313648224, + "rewards/margins": -0.1403556764125824, + "rewards/rejected": 0.039904020726680756, + "step": 359 + }, + { + "epoch": 0.58, + "learning_rate": 3.2085561497326203e-07, + "logits/chosen": -1.025461196899414, + "logits/rejected": -0.9652552604675293, + "logps/chosen": -66.25369262695312, + "logps/rejected": -86.99246215820312, + "loss": 0.6554, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09439974278211594, + "rewards/margins": 0.05792675167322159, + "rewards/rejected": 0.03647299110889435, + "step": 360 + }, + { + "epoch": 0.58, + "learning_rate": 3.2174688057041e-07, + "logits/chosen": -1.1521224975585938, + "logits/rejected": -1.2380375862121582, + "logps/chosen": -97.18840789794922, + "logps/rejected": -99.73968505859375, + "loss": 0.6752, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04388751834630966, + "rewards/margins": -0.15880241990089417, + "rewards/rejected": 0.1149148941040039, + "step": 361 + }, + { + "epoch": 0.58, + "learning_rate": 3.226381461675579e-07, + "logits/chosen": -1.208679437637329, + "logits/rejected": -1.185349702835083, + "logps/chosen": -72.53080749511719, + "logps/rejected": -80.53642272949219, + "loss": 0.661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2304014265537262, + "rewards/margins": 0.2845538258552551, + "rewards/rejected": -0.05415239557623863, + "step": 362 + }, + { + "epoch": 0.58, + "learning_rate": 3.2352941176470586e-07, + "logits/chosen": -1.336916446685791, + "logits/rejected": -1.3191590309143066, + "logps/chosen": -94.59489440917969, + "logps/rejected": -86.10430908203125, + "loss": 0.6473, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11104259639978409, + "rewards/margins": 0.09010010212659836, + "rewards/rejected": -0.20114269852638245, + "step": 363 + }, + { + "epoch": 0.58, + "learning_rate": 3.244206773618538e-07, + "logits/chosen": -1.2220344543457031, + "logits/rejected": -1.233252763748169, + "logps/chosen": -68.93061828613281, + "logps/rejected": -66.35883331298828, + "loss": 0.6541, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07871419191360474, + "rewards/margins": 0.13881589472293854, + "rewards/rejected": -0.0601017028093338, + "step": 364 + }, + { + "epoch": 0.59, + "learning_rate": 3.2531194295900175e-07, + "logits/chosen": -1.2429455518722534, + "logits/rejected": -1.1710143089294434, + "logps/chosen": -85.29800415039062, + "logps/rejected": -92.43489837646484, + "loss": 0.6475, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0367710143327713, + "rewards/margins": -0.1528114378452301, + "rewards/rejected": 0.1160404235124588, + "step": 365 + }, + { + "epoch": 0.59, + "learning_rate": 3.2620320855614975e-07, + "logits/chosen": -1.3044629096984863, + "logits/rejected": -1.3333033323287964, + "logps/chosen": -68.10717010498047, + "logps/rejected": -60.813785552978516, + "loss": 0.6339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2690240740776062, + "rewards/margins": 0.3610461354255676, + "rewards/rejected": -0.09202203899621964, + "step": 366 + }, + { + "epoch": 0.59, + "learning_rate": 3.2709447415329764e-07, + "logits/chosen": -1.2491205930709839, + "logits/rejected": -1.2234408855438232, + "logps/chosen": -89.05366516113281, + "logps/rejected": -91.87873840332031, + "loss": 0.6682, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.10182952880859375, + "rewards/margins": -0.040028758347034454, + "rewards/rejected": 0.1418582946062088, + "step": 367 + }, + { + "epoch": 0.59, + "learning_rate": 3.2798573975044564e-07, + "logits/chosen": -1.2973251342773438, + "logits/rejected": -1.288782000541687, + "logps/chosen": -101.07843017578125, + "logps/rejected": -106.12442016601562, + "loss": 0.6715, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.15144634246826172, + "rewards/margins": -0.18129310011863708, + "rewards/rejected": 0.3327394425868988, + "step": 368 + }, + { + "epoch": 0.59, + "learning_rate": 3.2887700534759353e-07, + "logits/chosen": -1.32097589969635, + "logits/rejected": -1.3288911581039429, + "logps/chosen": -83.82524108886719, + "logps/rejected": -89.81623840332031, + "loss": 0.6705, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.13176709413528442, + "rewards/margins": -0.039643287658691406, + "rewards/rejected": 0.17141036689281464, + "step": 369 + }, + { + "epoch": 0.59, + "learning_rate": 3.297682709447415e-07, + "logits/chosen": -1.2945737838745117, + "logits/rejected": -1.3219444751739502, + "logps/chosen": -81.30706787109375, + "logps/rejected": -67.80621337890625, + "loss": 0.6445, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1486591398715973, + "rewards/margins": 0.10006695240736008, + "rewards/rejected": 0.04859218746423721, + "step": 370 + }, + { + "epoch": 0.6, + "learning_rate": 3.3065953654188947e-07, + "logits/chosen": -1.2011052370071411, + "logits/rejected": -1.3189787864685059, + "logps/chosen": -83.37198638916016, + "logps/rejected": -86.59442901611328, + "loss": 0.6368, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03623456880450249, + "rewards/margins": 0.02062254212796688, + "rewards/rejected": 0.015612030401825905, + "step": 371 + }, + { + "epoch": 0.6, + "learning_rate": 3.315508021390374e-07, + "logits/chosen": -1.1101040840148926, + "logits/rejected": -1.2037853002548218, + "logps/chosen": -90.63269805908203, + "logps/rejected": -104.23745727539062, + "loss": 0.6407, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013490297831594944, + "rewards/margins": 0.019208140671253204, + "rewards/rejected": -0.032698437571525574, + "step": 372 + }, + { + "epoch": 0.6, + "learning_rate": 3.3244206773618536e-07, + "logits/chosen": -1.4860928058624268, + "logits/rejected": -1.4265408515930176, + "logps/chosen": -55.30668640136719, + "logps/rejected": -53.76654052734375, + "loss": 0.6303, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.069188691675663, + "rewards/margins": -0.09972162544727325, + "rewards/rejected": 0.03053293190896511, + "step": 373 + }, + { + "epoch": 0.6, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -1.3011339902877808, + "logits/rejected": -1.298499345779419, + "logps/chosen": -85.10960388183594, + "logps/rejected": -76.63179016113281, + "loss": 0.6355, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06693020462989807, + "rewards/margins": 0.14191971719264984, + "rewards/rejected": -0.07498951256275177, + "step": 374 + }, + { + "epoch": 0.6, + "learning_rate": 3.342245989304813e-07, + "logits/chosen": -1.3101189136505127, + "logits/rejected": -1.1973093748092651, + "logps/chosen": -88.72516632080078, + "logps/rejected": -98.17828369140625, + "loss": 0.6462, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3494857847690582, + "rewards/margins": 0.3560778796672821, + "rewards/rejected": -0.006592085584998131, + "step": 375 + }, + { + "epoch": 0.6, + "learning_rate": 3.3511586452762924e-07, + "logits/chosen": -1.1811562776565552, + "logits/rejected": -1.166032314300537, + "logps/chosen": -76.493896484375, + "logps/rejected": -82.70040893554688, + "loss": 0.7138, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.057799145579338074, + "rewards/margins": 0.02832164615392685, + "rewards/rejected": 0.029477499425411224, + "step": 376 + }, + { + "epoch": 0.61, + "learning_rate": 3.360071301247772e-07, + "logits/chosen": -1.093819499015808, + "logits/rejected": -1.0885708332061768, + "logps/chosen": -69.27837371826172, + "logps/rejected": -78.768798828125, + "loss": 0.684, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04837637022137642, + "rewards/margins": 0.1414116770029068, + "rewards/rejected": -0.09303531795740128, + "step": 377 + }, + { + "epoch": 0.61, + "learning_rate": 3.3689839572192513e-07, + "logits/chosen": -1.0821808576583862, + "logits/rejected": -1.220176100730896, + "logps/chosen": -86.40784454345703, + "logps/rejected": -76.07899475097656, + "loss": 0.608, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.01283950824290514, + "rewards/margins": 0.0928201675415039, + "rewards/rejected": -0.07998065650463104, + "step": 378 + }, + { + "epoch": 0.61, + "learning_rate": 3.3778966131907307e-07, + "logits/chosen": -1.3450623750686646, + "logits/rejected": -1.2655870914459229, + "logps/chosen": -86.39474487304688, + "logps/rejected": -88.1534194946289, + "loss": 0.6891, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.19252319633960724, + "rewards/margins": 0.10598678886890411, + "rewards/rejected": 0.08653640747070312, + "step": 379 + }, + { + "epoch": 0.61, + "learning_rate": 3.38680926916221e-07, + "logits/chosen": -1.100825548171997, + "logits/rejected": -1.1182647943496704, + "logps/chosen": -81.43254852294922, + "logps/rejected": -79.96047973632812, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0992332473397255, + "rewards/margins": 0.2478349804878235, + "rewards/rejected": -0.1486017256975174, + "step": 380 + }, + { + "epoch": 0.61, + "learning_rate": 3.39572192513369e-07, + "logits/chosen": -1.1832740306854248, + "logits/rejected": -1.0460376739501953, + "logps/chosen": -110.68850708007812, + "logps/rejected": -83.01714324951172, + "loss": 0.6388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13920727372169495, + "rewards/margins": 0.2141868621110916, + "rewards/rejected": -0.07497958838939667, + "step": 381 + }, + { + "epoch": 0.61, + "learning_rate": 3.404634581105169e-07, + "logits/chosen": -1.2313740253448486, + "logits/rejected": -1.2706027030944824, + "logps/chosen": -67.66912078857422, + "logps/rejected": -82.19145965576172, + "loss": 0.6094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11870632320642471, + "rewards/margins": 0.3881695866584778, + "rewards/rejected": -0.2694632411003113, + "step": 382 + }, + { + "epoch": 0.61, + "learning_rate": 3.413547237076649e-07, + "logits/chosen": -1.3749035596847534, + "logits/rejected": -1.3463488817214966, + "logps/chosen": -87.93421936035156, + "logps/rejected": -92.43940734863281, + "loss": 0.6267, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0707298293709755, + "rewards/margins": 0.17520791292190552, + "rewards/rejected": -0.24593773484230042, + "step": 383 + }, + { + "epoch": 0.62, + "learning_rate": 3.422459893048128e-07, + "logits/chosen": -1.3163602352142334, + "logits/rejected": -1.2924959659576416, + "logps/chosen": -81.60529327392578, + "logps/rejected": -74.29386138916016, + "loss": 0.6752, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.12190552055835724, + "rewards/margins": 0.3265104591846466, + "rewards/rejected": -0.20460492372512817, + "step": 384 + }, + { + "epoch": 0.62, + "learning_rate": 3.431372549019608e-07, + "logits/chosen": -1.3704231977462769, + "logits/rejected": -1.4214121103286743, + "logps/chosen": -97.04386901855469, + "logps/rejected": -94.88743591308594, + "loss": 0.6235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1238035261631012, + "rewards/margins": 0.15635471045970917, + "rewards/rejected": -0.03255119174718857, + "step": 385 + }, + { + "epoch": 0.62, + "learning_rate": 3.4402852049910873e-07, + "logits/chosen": -1.38521409034729, + "logits/rejected": -1.3705888986587524, + "logps/chosen": -71.66741943359375, + "logps/rejected": -98.85029602050781, + "loss": 0.616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14586620032787323, + "rewards/margins": 0.3557409346103668, + "rewards/rejected": -0.2098747342824936, + "step": 386 + }, + { + "epoch": 0.62, + "learning_rate": 3.449197860962567e-07, + "logits/chosen": -1.3385391235351562, + "logits/rejected": -1.2553272247314453, + "logps/chosen": -94.73194885253906, + "logps/rejected": -103.12027740478516, + "loss": 0.6515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29380762577056885, + "rewards/margins": 0.25849783420562744, + "rewards/rejected": 0.035309791564941406, + "step": 387 + }, + { + "epoch": 0.62, + "learning_rate": 3.458110516934046e-07, + "logits/chosen": -1.2103568315505981, + "logits/rejected": -1.2251718044281006, + "logps/chosen": -68.93494415283203, + "logps/rejected": -112.6019287109375, + "loss": 0.7005, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.017379092052578926, + "rewards/margins": 0.02281179279088974, + "rewards/rejected": -0.0054327016696333885, + "step": 388 + }, + { + "epoch": 0.62, + "learning_rate": 3.4670231729055257e-07, + "logits/chosen": -1.2033101320266724, + "logits/rejected": -1.172431230545044, + "logps/chosen": -95.48100280761719, + "logps/rejected": -90.35955810546875, + "loss": 0.634, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.012022022157907486, + "rewards/margins": 0.08636283129453659, + "rewards/rejected": -0.09838485717773438, + "step": 389 + }, + { + "epoch": 0.63, + "learning_rate": 3.475935828877005e-07, + "logits/chosen": -1.1878324747085571, + "logits/rejected": -1.1600477695465088, + "logps/chosen": -117.0290298461914, + "logps/rejected": -121.62348937988281, + "loss": 0.6994, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2388772964477539, + "rewards/margins": 0.18195724487304688, + "rewards/rejected": 0.05692005529999733, + "step": 390 + }, + { + "epoch": 0.63, + "learning_rate": 3.484848484848485e-07, + "logits/chosen": -1.1555097103118896, + "logits/rejected": -1.1768207550048828, + "logps/chosen": -65.32601928710938, + "logps/rejected": -85.47814178466797, + "loss": 0.6526, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.15381231904029846, + "rewards/margins": -0.003842446953058243, + "rewards/rejected": 0.1576547771692276, + "step": 391 + }, + { + "epoch": 0.63, + "learning_rate": 3.493761140819964e-07, + "logits/chosen": -1.2543988227844238, + "logits/rejected": -1.165090560913086, + "logps/chosen": -75.44717407226562, + "logps/rejected": -73.69781494140625, + "loss": 0.6549, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.17111997306346893, + "rewards/margins": 0.2362898886203766, + "rewards/rejected": -0.06516990065574646, + "step": 392 + }, + { + "epoch": 0.63, + "learning_rate": 3.502673796791444e-07, + "logits/chosen": -1.2911741733551025, + "logits/rejected": -1.3765945434570312, + "logps/chosen": -72.8716049194336, + "logps/rejected": -76.04766845703125, + "loss": 0.644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09749536216259003, + "rewards/margins": 0.11990642547607422, + "rewards/rejected": -0.022411061450839043, + "step": 393 + }, + { + "epoch": 0.63, + "learning_rate": 3.511586452762923e-07, + "logits/chosen": -1.1218092441558838, + "logits/rejected": -1.1795790195465088, + "logps/chosen": -75.23165893554688, + "logps/rejected": -100.90333557128906, + "loss": 0.702, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07753906399011612, + "rewards/margins": 0.23031121492385864, + "rewards/rejected": -0.15277214348316193, + "step": 394 + }, + { + "epoch": 0.63, + "learning_rate": 3.520499108734403e-07, + "logits/chosen": -1.2529432773590088, + "logits/rejected": -1.1057623624801636, + "logps/chosen": -87.33377075195312, + "logps/rejected": -77.89424896240234, + "loss": 0.6006, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.6743290424346924, + "rewards/margins": 0.674172043800354, + "rewards/rejected": 0.00015697465278208256, + "step": 395 + }, + { + "epoch": 0.64, + "learning_rate": 3.529411764705882e-07, + "logits/chosen": -1.3106813430786133, + "logits/rejected": -1.2821837663650513, + "logps/chosen": -105.39674377441406, + "logps/rejected": -103.61710357666016, + "loss": 0.5822, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3019241392612457, + "rewards/margins": 0.19212952256202698, + "rewards/rejected": 0.10979461669921875, + "step": 396 + }, + { + "epoch": 0.64, + "learning_rate": 3.5383244206773617e-07, + "logits/chosen": -1.1724590063095093, + "logits/rejected": -1.095735788345337, + "logps/chosen": -110.49842834472656, + "logps/rejected": -98.04237365722656, + "loss": 0.606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21506786346435547, + "rewards/margins": 0.3884463906288147, + "rewards/rejected": -0.17337855696678162, + "step": 397 + }, + { + "epoch": 0.64, + "learning_rate": 3.547237076648841e-07, + "logits/chosen": -1.5231051445007324, + "logits/rejected": -1.5002015829086304, + "logps/chosen": -100.35980224609375, + "logps/rejected": -96.23614501953125, + "loss": 0.6705, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15456752479076385, + "rewards/margins": -0.07866973429918289, + "rewards/rejected": -0.07589778304100037, + "step": 398 + }, + { + "epoch": 0.64, + "learning_rate": 3.5561497326203206e-07, + "logits/chosen": -1.1538028717041016, + "logits/rejected": -1.2506217956542969, + "logps/chosen": -95.0109634399414, + "logps/rejected": -84.59688568115234, + "loss": 0.6128, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07230892777442932, + "rewards/margins": 0.1273467093706131, + "rewards/rejected": -0.05503778159618378, + "step": 399 + }, + { + "epoch": 0.64, + "learning_rate": 3.5650623885918e-07, + "logits/chosen": -1.2661176919937134, + "logits/rejected": -1.233147144317627, + "logps/chosen": -96.84146881103516, + "logps/rejected": -102.0416259765625, + "loss": 0.5738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21052856743335724, + "rewards/margins": 0.5785010457038879, + "rewards/rejected": -0.3679724633693695, + "step": 400 + }, + { + "epoch": 0.64, + "learning_rate": 3.57397504456328e-07, + "logits/chosen": -1.133629322052002, + "logits/rejected": -1.1447527408599854, + "logps/chosen": -92.78630065917969, + "logps/rejected": -103.00799560546875, + "loss": 0.5324, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.054760172963142395, + "rewards/margins": -0.021310605108737946, + "rewards/rejected": 0.07607078552246094, + "step": 401 + }, + { + "epoch": 0.65, + "learning_rate": 3.582887700534759e-07, + "logits/chosen": -1.2577345371246338, + "logits/rejected": -1.1366872787475586, + "logps/chosen": -71.67396545410156, + "logps/rejected": -97.9654312133789, + "loss": 0.6239, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.5582008361816406, + "rewards/margins": 0.511823832988739, + "rewards/rejected": 0.04637698829174042, + "step": 402 + }, + { + "epoch": 0.65, + "learning_rate": 3.591800356506239e-07, + "logits/chosen": -1.2527836561203003, + "logits/rejected": -1.2571171522140503, + "logps/chosen": -102.89054870605469, + "logps/rejected": -101.0600357055664, + "loss": 0.6907, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1261884719133377, + "rewards/margins": -0.23575058579444885, + "rewards/rejected": 0.10956211388111115, + "step": 403 + }, + { + "epoch": 0.65, + "learning_rate": 3.600713012477718e-07, + "logits/chosen": -1.4421614408493042, + "logits/rejected": -1.4135040044784546, + "logps/chosen": -64.36327362060547, + "logps/rejected": -71.80448150634766, + "loss": 0.7028, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.03604850918054581, + "rewards/margins": -0.05248910188674927, + "rewards/rejected": 0.016440588980913162, + "step": 404 + }, + { + "epoch": 0.65, + "learning_rate": 3.609625668449198e-07, + "logits/chosen": -1.1047401428222656, + "logits/rejected": -1.0041768550872803, + "logps/chosen": -77.09197998046875, + "logps/rejected": -82.11797332763672, + "loss": 0.6283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.280493825674057, + "rewards/margins": 0.32525700330734253, + "rewards/rejected": -0.04476318508386612, + "step": 405 + }, + { + "epoch": 0.65, + "learning_rate": 3.618538324420677e-07, + "logits/chosen": -1.2029763460159302, + "logits/rejected": -1.2655439376831055, + "logps/chosen": -79.59333801269531, + "logps/rejected": -80.0947036743164, + "loss": 0.621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16619901359081268, + "rewards/margins": 0.24877768754959106, + "rewards/rejected": -0.08257866650819778, + "step": 406 + }, + { + "epoch": 0.65, + "learning_rate": 3.6274509803921566e-07, + "logits/chosen": -1.3102222681045532, + "logits/rejected": -1.3523752689361572, + "logps/chosen": -79.61971282958984, + "logps/rejected": -80.59629821777344, + "loss": 0.7123, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.017102908343076706, + "rewards/margins": 0.08415079116821289, + "rewards/rejected": -0.1012537032365799, + "step": 407 + }, + { + "epoch": 0.65, + "learning_rate": 3.636363636363636e-07, + "logits/chosen": -1.329397439956665, + "logits/rejected": -1.2707304954528809, + "logps/chosen": -99.12361907958984, + "logps/rejected": -82.02562713623047, + "loss": 0.7147, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.25493907928466797, + "rewards/margins": -0.4594806730747223, + "rewards/rejected": 0.20454159379005432, + "step": 408 + }, + { + "epoch": 0.66, + "learning_rate": 3.6452762923351155e-07, + "logits/chosen": -1.2714667320251465, + "logits/rejected": -1.2921520471572876, + "logps/chosen": -71.1310806274414, + "logps/rejected": -68.48872375488281, + "loss": 0.5946, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1605023443698883, + "rewards/margins": 0.42172977328300476, + "rewards/rejected": -0.26122742891311646, + "step": 409 + }, + { + "epoch": 0.66, + "learning_rate": 3.654188948306595e-07, + "logits/chosen": -1.5152530670166016, + "logits/rejected": -1.5307106971740723, + "logps/chosen": -98.88082885742188, + "logps/rejected": -106.0846176147461, + "loss": 0.5964, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06149701774120331, + "rewards/margins": 0.42133378982543945, + "rewards/rejected": -0.48283082246780396, + "step": 410 + }, + { + "epoch": 0.66, + "learning_rate": 3.663101604278075e-07, + "logits/chosen": -1.2989656925201416, + "logits/rejected": -1.331050157546997, + "logps/chosen": -90.549072265625, + "logps/rejected": -105.76593017578125, + "loss": 0.6343, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.23155003786087036, + "rewards/margins": -0.1511373519897461, + "rewards/rejected": -0.08041267096996307, + "step": 411 + }, + { + "epoch": 0.66, + "learning_rate": 3.672014260249554e-07, + "logits/chosen": -1.0633184909820557, + "logits/rejected": -1.1093069314956665, + "logps/chosen": -78.18463134765625, + "logps/rejected": -68.89715576171875, + "loss": 0.6547, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.002037622034549713, + "rewards/margins": 0.14608974754810333, + "rewards/rejected": -0.14812736213207245, + "step": 412 + }, + { + "epoch": 0.66, + "learning_rate": 3.680926916221034e-07, + "logits/chosen": -1.3019121885299683, + "logits/rejected": -1.237338900566101, + "logps/chosen": -71.09500122070312, + "logps/rejected": -81.72965240478516, + "loss": 0.6335, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.18916082382202148, + "rewards/margins": 0.1384078860282898, + "rewards/rejected": 0.050752922892570496, + "step": 413 + }, + { + "epoch": 0.66, + "learning_rate": 3.689839572192513e-07, + "logits/chosen": -1.277834177017212, + "logits/rejected": -1.2826359272003174, + "logps/chosen": -66.15577697753906, + "logps/rejected": -72.5907974243164, + "loss": 0.706, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0682130828499794, + "rewards/margins": 0.07889804989099503, + "rewards/rejected": -0.010684968903660774, + "step": 414 + }, + { + "epoch": 0.67, + "learning_rate": 3.6987522281639927e-07, + "logits/chosen": -1.239984393119812, + "logits/rejected": -1.2482647895812988, + "logps/chosen": -80.39286804199219, + "logps/rejected": -77.93550109863281, + "loss": 0.6302, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.028056718409061432, + "rewards/margins": -0.08626452833414078, + "rewards/rejected": 0.11432123184204102, + "step": 415 + }, + { + "epoch": 0.67, + "learning_rate": 3.7076648841354726e-07, + "logits/chosen": -1.0962185859680176, + "logits/rejected": -1.1565070152282715, + "logps/chosen": -110.23213195800781, + "logps/rejected": -97.54344177246094, + "loss": 0.6743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16205865144729614, + "rewards/margins": 0.3520887494087219, + "rewards/rejected": -0.19003009796142578, + "step": 416 + }, + { + "epoch": 0.67, + "learning_rate": 3.7165775401069515e-07, + "logits/chosen": -1.1722536087036133, + "logits/rejected": -1.2589123249053955, + "logps/chosen": -82.18974304199219, + "logps/rejected": -96.46875, + "loss": 0.6358, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.019259454682469368, + "rewards/margins": -0.16217099130153656, + "rewards/rejected": 0.14291153848171234, + "step": 417 + }, + { + "epoch": 0.67, + "learning_rate": 3.7254901960784315e-07, + "logits/chosen": -1.235077142715454, + "logits/rejected": -1.342763066291809, + "logps/chosen": -78.13264465332031, + "logps/rejected": -71.77613830566406, + "loss": 0.6382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07785630226135254, + "rewards/margins": 0.30776700377464294, + "rewards/rejected": -0.3856232464313507, + "step": 418 + }, + { + "epoch": 0.67, + "learning_rate": 3.7344028520499104e-07, + "logits/chosen": -0.9298986196517944, + "logits/rejected": -0.9937185049057007, + "logps/chosen": -85.10452270507812, + "logps/rejected": -75.92705535888672, + "loss": 0.6695, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.20175619423389435, + "rewards/margins": -0.21358099579811096, + "rewards/rejected": 0.011824799701571465, + "step": 419 + }, + { + "epoch": 0.67, + "learning_rate": 3.7433155080213904e-07, + "logits/chosen": -1.2044453620910645, + "logits/rejected": -1.169341802597046, + "logps/chosen": -75.73751831054688, + "logps/rejected": -65.20587921142578, + "loss": 0.6217, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1165313720703125, + "rewards/margins": 0.02438211441040039, + "rewards/rejected": -0.1409134864807129, + "step": 420 + }, + { + "epoch": 0.68, + "learning_rate": 3.75222816399287e-07, + "logits/chosen": -1.0531060695648193, + "logits/rejected": -1.1608250141143799, + "logps/chosen": -92.49887084960938, + "logps/rejected": -80.49767303466797, + "loss": 0.6618, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.09879360347986221, + "rewards/margins": -0.23068523406982422, + "rewards/rejected": 0.1318916231393814, + "step": 421 + }, + { + "epoch": 0.68, + "learning_rate": 3.761140819964349e-07, + "logits/chosen": -1.084995150566101, + "logits/rejected": -1.0075992345809937, + "logps/chosen": -92.8894271850586, + "logps/rejected": -86.93257904052734, + "loss": 0.5995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.343304842710495, + "rewards/margins": 0.4756888449192047, + "rewards/rejected": -0.1323840171098709, + "step": 422 + }, + { + "epoch": 0.68, + "learning_rate": 3.7700534759358287e-07, + "logits/chosen": -0.9901492595672607, + "logits/rejected": -1.0132852792739868, + "logps/chosen": -96.18614196777344, + "logps/rejected": -100.50067901611328, + "loss": 0.6225, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.02076416090130806, + "rewards/margins": -0.06052512675523758, + "rewards/rejected": 0.08128928393125534, + "step": 423 + }, + { + "epoch": 0.68, + "learning_rate": 3.778966131907308e-07, + "logits/chosen": -1.2984669208526611, + "logits/rejected": -1.2945114374160767, + "logps/chosen": -97.54997253417969, + "logps/rejected": -99.01551055908203, + "loss": 0.5635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029336359351873398, + "rewards/margins": 0.4102731943130493, + "rewards/rejected": -0.38093680143356323, + "step": 424 + }, + { + "epoch": 0.68, + "learning_rate": 3.7878787878787876e-07, + "logits/chosen": -1.296586513519287, + "logits/rejected": -1.3621975183486938, + "logps/chosen": -63.68661117553711, + "logps/rejected": -65.74790954589844, + "loss": 0.6138, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2702306807041168, + "rewards/margins": 0.534076988697052, + "rewards/rejected": -0.2638463079929352, + "step": 425 + }, + { + "epoch": 0.68, + "learning_rate": 3.7967914438502675e-07, + "logits/chosen": -1.231501579284668, + "logits/rejected": -1.154856562614441, + "logps/chosen": -101.69435119628906, + "logps/rejected": -89.25820922851562, + "loss": 0.6063, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2861669659614563, + "rewards/margins": 0.4406183362007141, + "rewards/rejected": -0.1544513702392578, + "step": 426 + }, + { + "epoch": 0.69, + "learning_rate": 3.8057040998217465e-07, + "logits/chosen": -1.1180994510650635, + "logits/rejected": -1.1067962646484375, + "logps/chosen": -76.10966491699219, + "logps/rejected": -89.00008392333984, + "loss": 0.6789, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.046874530613422394, + "rewards/margins": 0.16661272943019867, + "rewards/rejected": -0.11973819881677628, + "step": 427 + }, + { + "epoch": 0.69, + "learning_rate": 3.8146167557932264e-07, + "logits/chosen": -1.363166332244873, + "logits/rejected": -1.322693943977356, + "logps/chosen": -83.13065338134766, + "logps/rejected": -82.67440795898438, + "loss": 0.6405, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16470997035503387, + "rewards/margins": 0.32157471776008606, + "rewards/rejected": -0.156864732503891, + "step": 428 + }, + { + "epoch": 0.69, + "learning_rate": 3.8235294117647053e-07, + "logits/chosen": -1.310354471206665, + "logits/rejected": -1.2848073244094849, + "logps/chosen": -88.2528305053711, + "logps/rejected": -97.04901885986328, + "loss": 0.5913, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.14081469178199768, + "rewards/margins": 0.4230685234069824, + "rewards/rejected": -0.28225386142730713, + "step": 429 + }, + { + "epoch": 0.69, + "learning_rate": 3.8324420677361853e-07, + "logits/chosen": -1.2690279483795166, + "logits/rejected": -1.2807759046554565, + "logps/chosen": -89.16944885253906, + "logps/rejected": -83.23316192626953, + "loss": 0.5713, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.016077138483524323, + "rewards/margins": 0.04096364974975586, + "rewards/rejected": -0.024886513128876686, + "step": 430 + }, + { + "epoch": 0.69, + "learning_rate": 3.841354723707665e-07, + "logits/chosen": -1.2887147665023804, + "logits/rejected": -1.280814528465271, + "logps/chosen": -120.57044219970703, + "logps/rejected": -81.70338439941406, + "loss": 0.6032, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.07986507564783096, + "rewards/margins": 0.07214917987585068, + "rewards/rejected": 0.007715891115367413, + "step": 431 + }, + { + "epoch": 0.69, + "learning_rate": 3.850267379679144e-07, + "logits/chosen": -1.2743338346481323, + "logits/rejected": -1.327607274055481, + "logps/chosen": -99.2079849243164, + "logps/rejected": -106.66618347167969, + "loss": 0.655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1152595579624176, + "rewards/margins": 0.24007073044776917, + "rewards/rejected": -0.12481116503477097, + "step": 432 + }, + { + "epoch": 0.7, + "learning_rate": 3.8591800356506236e-07, + "logits/chosen": -1.2637909650802612, + "logits/rejected": -1.3247734308242798, + "logps/chosen": -92.75313568115234, + "logps/rejected": -121.42718505859375, + "loss": 0.725, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.19493962824344635, + "rewards/margins": -0.2441082000732422, + "rewards/rejected": 0.04916858300566673, + "step": 433 + }, + { + "epoch": 0.7, + "learning_rate": 3.868092691622103e-07, + "logits/chosen": -1.3659807443618774, + "logits/rejected": -1.4013768434524536, + "logps/chosen": -65.14983367919922, + "logps/rejected": -70.91036987304688, + "loss": 0.5735, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2728325128555298, + "rewards/margins": 0.5080000162124634, + "rewards/rejected": -0.2351675182580948, + "step": 434 + }, + { + "epoch": 0.7, + "learning_rate": 3.8770053475935825e-07, + "logits/chosen": -1.2288713455200195, + "logits/rejected": -1.2812716960906982, + "logps/chosen": -89.29964447021484, + "logps/rejected": -83.95532989501953, + "loss": 0.6349, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.029742244631052017, + "rewards/margins": -0.03181428462266922, + "rewards/rejected": 0.002072051167488098, + "step": 435 + }, + { + "epoch": 0.7, + "learning_rate": 3.8859180035650625e-07, + "logits/chosen": -1.312159538269043, + "logits/rejected": -1.2309988737106323, + "logps/chosen": -73.71180725097656, + "logps/rejected": -79.66343688964844, + "loss": 0.5641, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.4514661133289337, + "rewards/margins": 0.45228320360183716, + "rewards/rejected": -0.0008171088993549347, + "step": 436 + }, + { + "epoch": 0.7, + "learning_rate": 3.8948306595365414e-07, + "logits/chosen": -1.286069631576538, + "logits/rejected": -1.2134954929351807, + "logps/chosen": -97.17425537109375, + "logps/rejected": -81.6067886352539, + "loss": 0.6285, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.1706571727991104, + "rewards/margins": 0.040442466735839844, + "rewards/rejected": 0.13021469116210938, + "step": 437 + }, + { + "epoch": 0.7, + "learning_rate": 3.9037433155080213e-07, + "logits/chosen": -1.2278181314468384, + "logits/rejected": -1.2416651248931885, + "logps/chosen": -83.85308074951172, + "logps/rejected": -82.41248321533203, + "loss": 0.5662, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0012120248284190893, + "rewards/margins": 0.1378270983695984, + "rewards/rejected": -0.13903912901878357, + "step": 438 + }, + { + "epoch": 0.7, + "learning_rate": 3.9126559714795e-07, + "logits/chosen": -1.307107925415039, + "logits/rejected": -1.2987736463546753, + "logps/chosen": -84.60057067871094, + "logps/rejected": -86.71941375732422, + "loss": 0.6738, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07804012298583984, + "rewards/margins": 0.04080143943428993, + "rewards/rejected": -0.11884155124425888, + "step": 439 + }, + { + "epoch": 0.71, + "learning_rate": 3.92156862745098e-07, + "logits/chosen": -1.2865593433380127, + "logits/rejected": -1.2043914794921875, + "logps/chosen": -89.62144470214844, + "logps/rejected": -85.88352966308594, + "loss": 0.6085, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.04045906662940979, + "rewards/margins": -0.09667320549488068, + "rewards/rejected": 0.056214142590761185, + "step": 440 + }, + { + "epoch": 0.71, + "learning_rate": 3.9304812834224597e-07, + "logits/chosen": -1.153688907623291, + "logits/rejected": -1.2368963956832886, + "logps/chosen": -65.02372741699219, + "logps/rejected": -88.77371978759766, + "loss": 0.6256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20344524085521698, + "rewards/margins": 0.6848279237747192, + "rewards/rejected": -0.48138266801834106, + "step": 441 + }, + { + "epoch": 0.71, + "learning_rate": 3.939393939393939e-07, + "logits/chosen": -1.4094884395599365, + "logits/rejected": -1.4219439029693604, + "logps/chosen": -44.74273681640625, + "logps/rejected": -56.57225036621094, + "loss": 0.6507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0022291187196969986, + "rewards/margins": 0.09854794293642044, + "rewards/rejected": -0.096318818628788, + "step": 442 + }, + { + "epoch": 0.71, + "learning_rate": 3.9483065953654185e-07, + "logits/chosen": -1.425800085067749, + "logits/rejected": -1.3602354526519775, + "logps/chosen": -65.93795013427734, + "logps/rejected": -77.2974853515625, + "loss": 0.6193, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1508585810661316, + "rewards/margins": 0.0631929486989975, + "rewards/rejected": -0.21405154466629028, + "step": 443 + }, + { + "epoch": 0.71, + "learning_rate": 3.9572192513368985e-07, + "logits/chosen": -1.198150634765625, + "logits/rejected": -1.2567341327667236, + "logps/chosen": -80.4825439453125, + "logps/rejected": -83.21296691894531, + "loss": 0.5793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08257760852575302, + "rewards/margins": 0.3871579170227051, + "rewards/rejected": -0.30458030104637146, + "step": 444 + }, + { + "epoch": 0.71, + "learning_rate": 3.9661319073083774e-07, + "logits/chosen": -1.2651132345199585, + "logits/rejected": -1.3281967639923096, + "logps/chosen": -93.98489379882812, + "logps/rejected": -116.48754119873047, + "loss": 0.6354, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08730335533618927, + "rewards/margins": -0.015226557850837708, + "rewards/rejected": -0.07207679003477097, + "step": 445 + }, + { + "epoch": 0.72, + "learning_rate": 3.9750445632798574e-07, + "logits/chosen": -1.3390636444091797, + "logits/rejected": -1.1928832530975342, + "logps/chosen": -94.0273666381836, + "logps/rejected": -72.57819366455078, + "loss": 0.6415, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.20120640099048615, + "rewards/margins": -0.01834353804588318, + "rewards/rejected": 0.21954993903636932, + "step": 446 + }, + { + "epoch": 0.72, + "learning_rate": 3.9839572192513363e-07, + "logits/chosen": -1.0589655637741089, + "logits/rejected": -1.0946158170700073, + "logps/chosen": -83.42378234863281, + "logps/rejected": -116.17475891113281, + "loss": 0.6579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1473393440246582, + "rewards/margins": 0.4214257597923279, + "rewards/rejected": -0.2740863859653473, + "step": 447 + }, + { + "epoch": 0.72, + "learning_rate": 3.9928698752228163e-07, + "logits/chosen": -1.3913222551345825, + "logits/rejected": -1.452187418937683, + "logps/chosen": -98.67301940917969, + "logps/rejected": -95.60882568359375, + "loss": 0.5858, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.32782667875289917, + "rewards/margins": 0.39873600006103516, + "rewards/rejected": -0.07090931385755539, + "step": 448 + }, + { + "epoch": 0.72, + "learning_rate": 4.001782531194296e-07, + "logits/chosen": -1.4081518650054932, + "logits/rejected": -1.3985217809677124, + "logps/chosen": -71.8021469116211, + "logps/rejected": -99.19285583496094, + "loss": 0.5986, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.07844753563404083, + "rewards/margins": -0.020090483129024506, + "rewards/rejected": -0.05835704505443573, + "step": 449 + }, + { + "epoch": 0.72, + "learning_rate": 4.010695187165775e-07, + "logits/chosen": -1.290668249130249, + "logits/rejected": -1.3075284957885742, + "logps/chosen": -83.3858413696289, + "logps/rejected": -91.45824432373047, + "loss": 0.6105, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.022474385797977448, + "rewards/margins": -0.0019368194043636322, + "rewards/rejected": 0.024411197751760483, + "step": 450 + }, + { + "epoch": 0.72, + "learning_rate": 4.019607843137255e-07, + "logits/chosen": -1.4386577606201172, + "logits/rejected": -1.3995468616485596, + "logps/chosen": -87.14105987548828, + "logps/rejected": -82.3614273071289, + "loss": 0.5944, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.17357024550437927, + "rewards/margins": 0.18619516491889954, + "rewards/rejected": -0.012624932453036308, + "step": 451 + }, + { + "epoch": 0.73, + "learning_rate": 4.028520499108734e-07, + "logits/chosen": -1.0986660718917847, + "logits/rejected": -0.9879285097122192, + "logps/chosen": -84.15496063232422, + "logps/rejected": -115.26528930664062, + "loss": 0.5758, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.11674938350915909, + "rewards/margins": 0.00457992497831583, + "rewards/rejected": 0.11216945946216583, + "step": 452 + }, + { + "epoch": 0.73, + "learning_rate": 4.037433155080214e-07, + "logits/chosen": -1.1762040853500366, + "logits/rejected": -1.0985565185546875, + "logps/chosen": -100.03556823730469, + "logps/rejected": -88.69412231445312, + "loss": 0.5683, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.022872548550367355, + "rewards/margins": 0.1401207000017166, + "rewards/rejected": -0.11724815517663956, + "step": 453 + }, + { + "epoch": 0.73, + "learning_rate": 4.0463458110516934e-07, + "logits/chosen": -1.1346769332885742, + "logits/rejected": -1.0895377397537231, + "logps/chosen": -83.95707702636719, + "logps/rejected": -75.14948272705078, + "loss": 0.6249, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.09833468496799469, + "rewards/margins": 0.1491558849811554, + "rewards/rejected": -0.0508212111890316, + "step": 454 + }, + { + "epoch": 0.73, + "learning_rate": 4.055258467023173e-07, + "logits/chosen": -1.2797484397888184, + "logits/rejected": -1.2674553394317627, + "logps/chosen": -77.8741455078125, + "logps/rejected": -88.85834503173828, + "loss": 0.5555, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013510417193174362, + "rewards/margins": 0.07482538372278214, + "rewards/rejected": -0.0883358046412468, + "step": 455 + }, + { + "epoch": 0.73, + "learning_rate": 4.0641711229946523e-07, + "logits/chosen": -1.3066165447235107, + "logits/rejected": -1.2400354146957397, + "logps/chosen": -78.11245727539062, + "logps/rejected": -90.78754425048828, + "loss": 0.6177, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.14486083388328552, + "rewards/margins": 0.484927773475647, + "rewards/rejected": -0.34006690979003906, + "step": 456 + }, + { + "epoch": 0.73, + "learning_rate": 4.073083778966132e-07, + "logits/chosen": -1.2984991073608398, + "logits/rejected": -1.298667073249817, + "logps/chosen": -104.656982421875, + "logps/rejected": -80.5057601928711, + "loss": 0.6831, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.021123699843883514, + "rewards/margins": -0.13160055875778198, + "rewards/rejected": 0.11047688126564026, + "step": 457 + }, + { + "epoch": 0.74, + "learning_rate": 4.081996434937611e-07, + "logits/chosen": -1.4500277042388916, + "logits/rejected": -1.5118764638900757, + "logps/chosen": -100.04800415039062, + "logps/rejected": -86.5777816772461, + "loss": 0.6482, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019234277307987213, + "rewards/margins": 0.288046270608902, + "rewards/rejected": -0.3072805404663086, + "step": 458 + }, + { + "epoch": 0.74, + "learning_rate": 4.090909090909091e-07, + "logits/chosen": -1.3442444801330566, + "logits/rejected": -1.3969312906265259, + "logps/chosen": -114.2316665649414, + "logps/rejected": -106.92385864257812, + "loss": 0.6321, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.18799743056297302, + "rewards/margins": -0.05285205319523811, + "rewards/rejected": -0.1351453810930252, + "step": 459 + }, + { + "epoch": 0.74, + "learning_rate": 4.09982174688057e-07, + "logits/chosen": -1.203410267829895, + "logits/rejected": -1.133986473083496, + "logps/chosen": -95.15753936767578, + "logps/rejected": -83.20284271240234, + "loss": 0.5944, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.23127174377441406, + "rewards/margins": 0.298564076423645, + "rewards/rejected": -0.06729232519865036, + "step": 460 + }, + { + "epoch": 0.74, + "learning_rate": 4.10873440285205e-07, + "logits/chosen": -1.2983790636062622, + "logits/rejected": -1.2700847387313843, + "logps/chosen": -88.56631469726562, + "logps/rejected": -96.26264953613281, + "loss": 0.6372, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.2767552137374878, + "rewards/margins": 0.15458115935325623, + "rewards/rejected": 0.12217407673597336, + "step": 461 + }, + { + "epoch": 0.74, + "learning_rate": 4.117647058823529e-07, + "logits/chosen": -1.3383396863937378, + "logits/rejected": -1.2539695501327515, + "logps/chosen": -68.66460418701172, + "logps/rejected": -77.72669982910156, + "loss": 0.603, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.29955366253852844, + "rewards/margins": 0.4738039970397949, + "rewards/rejected": -0.17425031960010529, + "step": 462 + }, + { + "epoch": 0.74, + "learning_rate": 4.126559714795009e-07, + "logits/chosen": -1.2988406419754028, + "logits/rejected": -1.3252497911453247, + "logps/chosen": -89.51847076416016, + "logps/rejected": -86.02799987792969, + "loss": 0.5326, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09732237458229065, + "rewards/margins": 0.30294695496559143, + "rewards/rejected": -0.20562458038330078, + "step": 463 + }, + { + "epoch": 0.74, + "learning_rate": 4.1354723707664884e-07, + "logits/chosen": -1.1332547664642334, + "logits/rejected": -1.0900863409042358, + "logps/chosen": -125.12483978271484, + "logps/rejected": -90.96456146240234, + "loss": 0.5653, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.039594076573848724, + "rewards/margins": 0.21002072095870972, + "rewards/rejected": -0.24961480498313904, + "step": 464 + }, + { + "epoch": 0.75, + "learning_rate": 4.144385026737968e-07, + "logits/chosen": -1.2806475162506104, + "logits/rejected": -1.2539416551589966, + "logps/chosen": -82.32113647460938, + "logps/rejected": -83.65682983398438, + "loss": 0.5596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14572773873806, + "rewards/margins": 0.6775773763656616, + "rewards/rejected": -0.5318496823310852, + "step": 465 + }, + { + "epoch": 0.75, + "learning_rate": 4.153297682709447e-07, + "logits/chosen": -1.2206542491912842, + "logits/rejected": -1.2182166576385498, + "logps/chosen": -72.96160125732422, + "logps/rejected": -65.20282745361328, + "loss": 0.6074, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2063087671995163, + "rewards/margins": 0.09210057556629181, + "rewards/rejected": 0.1142081767320633, + "step": 466 + }, + { + "epoch": 0.75, + "learning_rate": 4.1622103386809267e-07, + "logits/chosen": -1.063786506652832, + "logits/rejected": -1.082587480545044, + "logps/chosen": -89.14126586914062, + "logps/rejected": -126.00691223144531, + "loss": 0.6159, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.02730713039636612, + "rewards/margins": 0.5163610577583313, + "rewards/rejected": -0.5436682105064392, + "step": 467 + }, + { + "epoch": 0.75, + "learning_rate": 4.171122994652406e-07, + "logits/chosen": -1.1326887607574463, + "logits/rejected": -1.1859641075134277, + "logps/chosen": -84.5179443359375, + "logps/rejected": -94.02557373046875, + "loss": 0.6589, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.23464013636112213, + "rewards/margins": 0.22856560349464417, + "rewards/rejected": 0.006074526347219944, + "step": 468 + }, + { + "epoch": 0.75, + "learning_rate": 4.180035650623886e-07, + "logits/chosen": -1.3083617687225342, + "logits/rejected": -1.2259063720703125, + "logps/chosen": -69.37739562988281, + "logps/rejected": -78.87149047851562, + "loss": 0.579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13563308119773865, + "rewards/margins": 0.20477294921875, + "rewards/rejected": -0.06913986057043076, + "step": 469 + }, + { + "epoch": 0.75, + "learning_rate": 4.188948306595365e-07, + "logits/chosen": -1.2209584712982178, + "logits/rejected": -1.1905081272125244, + "logps/chosen": -76.27886962890625, + "logps/rejected": -81.78018951416016, + "loss": 0.6121, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.16167010366916656, + "rewards/margins": 0.023904796689748764, + "rewards/rejected": 0.1377653181552887, + "step": 470 + }, + { + "epoch": 0.76, + "learning_rate": 4.197860962566845e-07, + "logits/chosen": -1.3911468982696533, + "logits/rejected": -1.398682713508606, + "logps/chosen": -60.501556396484375, + "logps/rejected": -68.39920806884766, + "loss": 0.6455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18640299141407013, + "rewards/margins": 0.2992064654827118, + "rewards/rejected": -0.11280346661806107, + "step": 471 + }, + { + "epoch": 0.76, + "learning_rate": 4.206773618538324e-07, + "logits/chosen": -1.116468906402588, + "logits/rejected": -1.1297643184661865, + "logps/chosen": -108.1712417602539, + "logps/rejected": -87.00677490234375, + "loss": 0.63, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10710355639457703, + "rewards/margins": 0.3907243609428406, + "rewards/rejected": -0.4978279173374176, + "step": 472 + }, + { + "epoch": 0.76, + "learning_rate": 4.215686274509804e-07, + "logits/chosen": -1.5154005289077759, + "logits/rejected": -1.4778096675872803, + "logps/chosen": -63.15167999267578, + "logps/rejected": -72.02388000488281, + "loss": 0.5746, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.17897653579711914, + "rewards/margins": 0.13677901029586792, + "rewards/rejected": 0.042197518050670624, + "step": 473 + }, + { + "epoch": 0.76, + "learning_rate": 4.2245989304812833e-07, + "logits/chosen": -1.3389841318130493, + "logits/rejected": -1.2838752269744873, + "logps/chosen": -84.42916107177734, + "logps/rejected": -90.39540100097656, + "loss": 0.6403, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012149907648563385, + "rewards/margins": 0.041347406804561615, + "rewards/rejected": -0.053497314453125, + "step": 474 + }, + { + "epoch": 0.76, + "learning_rate": 4.2335115864527627e-07, + "logits/chosen": -1.3151636123657227, + "logits/rejected": -1.2679822444915771, + "logps/chosen": -87.32825469970703, + "logps/rejected": -83.658203125, + "loss": 0.578, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.11957035958766937, + "rewards/margins": 0.06148138269782066, + "rewards/rejected": 0.05808897316455841, + "step": 475 + }, + { + "epoch": 0.76, + "learning_rate": 4.242424242424242e-07, + "logits/chosen": -1.190159797668457, + "logits/rejected": -1.178831696510315, + "logps/chosen": -95.5047607421875, + "logps/rejected": -105.34327697753906, + "loss": 0.5845, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12953606247901917, + "rewards/margins": 0.1360061764717102, + "rewards/rejected": -0.265542209148407, + "step": 476 + }, + { + "epoch": 0.77, + "learning_rate": 4.2513368983957216e-07, + "logits/chosen": -1.153006672859192, + "logits/rejected": -1.1645163297653198, + "logps/chosen": -87.86785125732422, + "logps/rejected": -92.99889373779297, + "loss": 0.6725, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.5782763361930847, + "rewards/margins": 0.47999846935272217, + "rewards/rejected": 0.09827785938978195, + "step": 477 + }, + { + "epoch": 0.77, + "learning_rate": 4.260249554367201e-07, + "logits/chosen": -1.2326632738113403, + "logits/rejected": -1.2825582027435303, + "logps/chosen": -85.50484466552734, + "logps/rejected": -105.1346435546875, + "loss": 0.6395, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2969025671482086, + "rewards/margins": 0.4198203682899475, + "rewards/rejected": -0.12291783839464188, + "step": 478 + }, + { + "epoch": 0.77, + "learning_rate": 4.269162210338681e-07, + "logits/chosen": -1.3497681617736816, + "logits/rejected": -1.4065210819244385, + "logps/chosen": -96.04811096191406, + "logps/rejected": -108.46571350097656, + "loss": 0.6808, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.006238652393221855, + "rewards/margins": 0.04013318940997124, + "rewards/rejected": -0.04637184366583824, + "step": 479 + }, + { + "epoch": 0.77, + "learning_rate": 4.27807486631016e-07, + "logits/chosen": -1.2886266708374023, + "logits/rejected": -1.2083921432495117, + "logps/chosen": -83.16029357910156, + "logps/rejected": -68.50463104248047, + "loss": 0.5767, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.06715640425682068, + "rewards/margins": 0.0225057452917099, + "rewards/rejected": -0.08966217190027237, + "step": 480 + }, + { + "epoch": 0.77, + "learning_rate": 4.28698752228164e-07, + "logits/chosen": -1.2284700870513916, + "logits/rejected": -1.117881178855896, + "logps/chosen": -83.8816909790039, + "logps/rejected": -99.60755157470703, + "loss": 0.5359, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.27510881423950195, + "rewards/margins": 0.41564759612083435, + "rewards/rejected": -0.1405387818813324, + "step": 481 + }, + { + "epoch": 0.77, + "learning_rate": 4.295900178253119e-07, + "logits/chosen": -1.2234052419662476, + "logits/rejected": -1.2356905937194824, + "logps/chosen": -86.45711517333984, + "logps/rejected": -90.36863708496094, + "loss": 0.6247, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03613615036010742, + "rewards/margins": -0.32101961970329285, + "rewards/rejected": 0.35715579986572266, + "step": 482 + }, + { + "epoch": 0.78, + "learning_rate": 4.304812834224599e-07, + "logits/chosen": -1.3215898275375366, + "logits/rejected": -1.2860819101333618, + "logps/chosen": -79.0875244140625, + "logps/rejected": -67.99415588378906, + "loss": 0.5611, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11069746315479279, + "rewards/margins": 0.16057005524635315, + "rewards/rejected": -0.27126753330230713, + "step": 483 + }, + { + "epoch": 0.78, + "learning_rate": 4.313725490196078e-07, + "logits/chosen": -1.3516974449157715, + "logits/rejected": -1.4098927974700928, + "logps/chosen": -96.32000732421875, + "logps/rejected": -94.46757507324219, + "loss": 0.5992, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.020639605820178986, + "rewards/margins": 0.2952066659927368, + "rewards/rejected": -0.27456703782081604, + "step": 484 + }, + { + "epoch": 0.78, + "learning_rate": 4.3226381461675576e-07, + "logits/chosen": -1.4597803354263306, + "logits/rejected": -1.3645751476287842, + "logps/chosen": -74.60675811767578, + "logps/rejected": -71.40281677246094, + "loss": 0.5958, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.046377092599868774, + "rewards/margins": -0.06542091071605682, + "rewards/rejected": 0.019043825566768646, + "step": 485 + }, + { + "epoch": 0.78, + "learning_rate": 4.3315508021390376e-07, + "logits/chosen": -1.166602373123169, + "logits/rejected": -1.2259540557861328, + "logps/chosen": -97.90666198730469, + "logps/rejected": -90.58246612548828, + "loss": 0.6174, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3365577757358551, + "rewards/margins": 0.32069388031959534, + "rewards/rejected": -0.6572516560554504, + "step": 486 + }, + { + "epoch": 0.78, + "learning_rate": 4.3404634581105165e-07, + "logits/chosen": -1.1641950607299805, + "logits/rejected": -1.323347568511963, + "logps/chosen": -76.68212890625, + "logps/rejected": -97.8846206665039, + "loss": 0.6491, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.09745101630687714, + "rewards/margins": -0.05226903408765793, + "rewards/rejected": -0.0451819933950901, + "step": 487 + }, + { + "epoch": 0.78, + "learning_rate": 4.3493761140819965e-07, + "logits/chosen": -1.320125699043274, + "logits/rejected": -1.314950942993164, + "logps/chosen": -67.85660552978516, + "logps/rejected": -74.83441162109375, + "loss": 0.6189, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3528144061565399, + "rewards/margins": 0.3462958335876465, + "rewards/rejected": 0.006518557667732239, + "step": 488 + }, + { + "epoch": 0.78, + "learning_rate": 4.358288770053476e-07, + "logits/chosen": -1.3386001586914062, + "logits/rejected": -1.350630760192871, + "logps/chosen": -73.04529571533203, + "logps/rejected": -82.00672912597656, + "loss": 0.5815, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06825485080480576, + "rewards/margins": 0.14499531686306, + "rewards/rejected": -0.21325016021728516, + "step": 489 + }, + { + "epoch": 0.79, + "learning_rate": 4.3672014260249554e-07, + "logits/chosen": -1.1427404880523682, + "logits/rejected": -1.177492380142212, + "logps/chosen": -99.52444458007812, + "logps/rejected": -101.83626556396484, + "loss": 0.6346, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.08892592787742615, + "rewards/margins": -0.11826153099536896, + "rewards/rejected": 0.029335597530007362, + "step": 490 + }, + { + "epoch": 0.79, + "learning_rate": 4.376114081996435e-07, + "logits/chosen": -1.274001955986023, + "logits/rejected": -1.245922565460205, + "logps/chosen": -87.59429168701172, + "logps/rejected": -91.91871643066406, + "loss": 0.6426, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.23058490455150604, + "rewards/margins": -0.1302284300327301, + "rewards/rejected": -0.10035648941993713, + "step": 491 + }, + { + "epoch": 0.79, + "learning_rate": 4.385026737967914e-07, + "logits/chosen": -1.1912789344787598, + "logits/rejected": -1.1602954864501953, + "logps/chosen": -88.62810516357422, + "logps/rejected": -87.14308166503906, + "loss": 0.5823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14948883652687073, + "rewards/margins": 0.4789545238018036, + "rewards/rejected": -0.32946568727493286, + "step": 492 + }, + { + "epoch": 0.79, + "learning_rate": 4.3939393939393937e-07, + "logits/chosen": -1.3624935150146484, + "logits/rejected": -1.2808935642242432, + "logps/chosen": -65.746337890625, + "logps/rejected": -74.42325592041016, + "loss": 0.5125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2584618926048279, + "rewards/margins": 0.5056008696556091, + "rewards/rejected": -0.24713897705078125, + "step": 493 + }, + { + "epoch": 0.79, + "learning_rate": 4.4028520499108736e-07, + "logits/chosen": -1.2846308946609497, + "logits/rejected": -1.4656882286071777, + "logps/chosen": -77.66488647460938, + "logps/rejected": -58.457427978515625, + "loss": 0.61, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.055696919560432434, + "rewards/margins": 0.050402313470840454, + "rewards/rejected": 0.0052946098148822784, + "step": 494 + }, + { + "epoch": 0.79, + "learning_rate": 4.4117647058823526e-07, + "logits/chosen": -1.5594539642333984, + "logits/rejected": -1.508124589920044, + "logps/chosen": -87.69598388671875, + "logps/rejected": -106.2069091796875, + "loss": 0.5802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40858763456344604, + "rewards/margins": 1.0477604866027832, + "rewards/rejected": -0.6391727328300476, + "step": 495 + }, + { + "epoch": 0.8, + "learning_rate": 4.4206773618538325e-07, + "logits/chosen": -1.2237030267715454, + "logits/rejected": -1.2619823217391968, + "logps/chosen": -114.19792175292969, + "logps/rejected": -105.03025817871094, + "loss": 0.588, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3114168345928192, + "rewards/margins": 0.5542125701904297, + "rewards/rejected": -0.24279576539993286, + "step": 496 + }, + { + "epoch": 0.8, + "learning_rate": 4.4295900178253114e-07, + "logits/chosen": -1.1107803583145142, + "logits/rejected": -1.1437745094299316, + "logps/chosen": -79.51960754394531, + "logps/rejected": -92.55284881591797, + "loss": 0.5983, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10698748379945755, + "rewards/margins": -0.014848999679088593, + "rewards/rejected": -0.09213848412036896, + "step": 497 + }, + { + "epoch": 0.8, + "learning_rate": 4.4385026737967914e-07, + "logits/chosen": -1.3689204454421997, + "logits/rejected": -1.3806201219558716, + "logps/chosen": -76.36368560791016, + "logps/rejected": -90.34919738769531, + "loss": 0.5986, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.33824339509010315, + "rewards/margins": 0.469723641872406, + "rewards/rejected": -0.13148020207881927, + "step": 498 + }, + { + "epoch": 0.8, + "learning_rate": 4.447415329768271e-07, + "logits/chosen": -1.270858883857727, + "logits/rejected": -1.2354862689971924, + "logps/chosen": -82.68109130859375, + "logps/rejected": -65.69955444335938, + "loss": 0.7413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26749736070632935, + "rewards/margins": 0.40030500292778015, + "rewards/rejected": -0.1328076422214508, + "step": 499 + }, + { + "epoch": 0.8, + "learning_rate": 4.4563279857397503e-07, + "logits/chosen": -1.2757349014282227, + "logits/rejected": -1.2853527069091797, + "logps/chosen": -107.81135559082031, + "logps/rejected": -104.32655334472656, + "loss": 0.5196, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06813926249742508, + "rewards/margins": 0.8135612607002258, + "rewards/rejected": -0.7454220056533813, + "step": 500 + }, + { + "epoch": 0.8, + "learning_rate": 4.4652406417112297e-07, + "logits/chosen": -1.386798620223999, + "logits/rejected": -1.4083800315856934, + "logps/chosen": -83.87106323242188, + "logps/rejected": -76.98448944091797, + "loss": 0.6505, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.43621501326560974, + "rewards/margins": 0.3472493290901184, + "rewards/rejected": 0.08896571397781372, + "step": 501 + }, + { + "epoch": 0.81, + "learning_rate": 4.474153297682709e-07, + "logits/chosen": -1.3509119749069214, + "logits/rejected": -1.3410181999206543, + "logps/chosen": -60.21222686767578, + "logps/rejected": -74.73516845703125, + "loss": 0.612, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0664929449558258, + "rewards/margins": 0.11449499428272247, + "rewards/rejected": -0.048002053052186966, + "step": 502 + }, + { + "epoch": 0.81, + "learning_rate": 4.4830659536541886e-07, + "logits/chosen": -1.105777382850647, + "logits/rejected": -1.1056811809539795, + "logps/chosen": -98.85588836669922, + "logps/rejected": -118.79904174804688, + "loss": 0.7098, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.14117011427879333, + "rewards/margins": 0.1986784040927887, + "rewards/rejected": -0.057508282363414764, + "step": 503 + }, + { + "epoch": 0.81, + "learning_rate": 4.4919786096256686e-07, + "logits/chosen": -1.1480892896652222, + "logits/rejected": -1.2372040748596191, + "logps/chosen": -87.48677062988281, + "logps/rejected": -94.84223937988281, + "loss": 0.5946, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.014048953540623188, + "rewards/margins": -0.17580872774124146, + "rewards/rejected": 0.18985766172409058, + "step": 504 + }, + { + "epoch": 0.81, + "learning_rate": 4.5008912655971475e-07, + "logits/chosen": -1.3914356231689453, + "logits/rejected": -1.3938919305801392, + "logps/chosen": -82.96173095703125, + "logps/rejected": -79.52250671386719, + "loss": 0.6397, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06953773647546768, + "rewards/margins": 0.2171051800251007, + "rewards/rejected": -0.2866429388523102, + "step": 505 + }, + { + "epoch": 0.81, + "learning_rate": 4.5098039215686274e-07, + "logits/chosen": -1.2574098110198975, + "logits/rejected": -1.2677593231201172, + "logps/chosen": -96.27035522460938, + "logps/rejected": -87.74009704589844, + "loss": 0.6185, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12315329909324646, + "rewards/margins": 0.24885998666286469, + "rewards/rejected": -0.37201327085494995, + "step": 506 + }, + { + "epoch": 0.81, + "learning_rate": 4.5187165775401064e-07, + "logits/chosen": -1.288081407546997, + "logits/rejected": -1.1755859851837158, + "logps/chosen": -55.842926025390625, + "logps/rejected": -82.43338012695312, + "loss": 0.7029, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04859896004199982, + "rewards/margins": 0.24106062948703766, + "rewards/rejected": -0.28965961933135986, + "step": 507 + }, + { + "epoch": 0.82, + "learning_rate": 4.5276292335115863e-07, + "logits/chosen": -1.1611793041229248, + "logits/rejected": -1.0328072309494019, + "logps/chosen": -99.04428100585938, + "logps/rejected": -93.73161315917969, + "loss": 0.5782, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.11443749070167542, + "rewards/margins": 0.10516633093357086, + "rewards/rejected": 0.00927114486694336, + "step": 508 + }, + { + "epoch": 0.82, + "learning_rate": 4.536541889483066e-07, + "logits/chosen": -1.3150384426116943, + "logits/rejected": -1.319748878479004, + "logps/chosen": -106.81163787841797, + "logps/rejected": -120.40884399414062, + "loss": 0.5103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24300481379032135, + "rewards/margins": 1.0338271856307983, + "rewards/rejected": -0.7908223867416382, + "step": 509 + }, + { + "epoch": 0.82, + "learning_rate": 4.545454545454545e-07, + "logits/chosen": -1.28767728805542, + "logits/rejected": -1.2918866872787476, + "logps/chosen": -76.27578735351562, + "logps/rejected": -89.114501953125, + "loss": 0.6208, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.05444173142313957, + "rewards/margins": -0.051602739840745926, + "rewards/rejected": -0.002838999032974243, + "step": 510 + }, + { + "epoch": 0.82, + "learning_rate": 4.5543672014260246e-07, + "logits/chosen": -1.1188105344772339, + "logits/rejected": -1.1183234453201294, + "logps/chosen": -77.97904205322266, + "logps/rejected": -84.43910217285156, + "loss": 0.6107, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.08819914609193802, + "rewards/margins": 0.1966385841369629, + "rewards/rejected": -0.10843944549560547, + "step": 511 + }, + { + "epoch": 0.82, + "learning_rate": 4.563279857397504e-07, + "logits/chosen": -1.0546026229858398, + "logits/rejected": -1.16685152053833, + "logps/chosen": -82.48338317871094, + "logps/rejected": -92.44326782226562, + "loss": 0.5062, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06616096198558807, + "rewards/margins": 0.6463236212730408, + "rewards/rejected": -0.5801626443862915, + "step": 512 + }, + { + "epoch": 0.82, + "learning_rate": 4.5721925133689835e-07, + "logits/chosen": -1.4381133317947388, + "logits/rejected": -1.4179308414459229, + "logps/chosen": -76.12765502929688, + "logps/rejected": -98.17503356933594, + "loss": 0.6139, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.07124081254005432, + "rewards/margins": 0.17919884622097015, + "rewards/rejected": -0.2504396438598633, + "step": 513 + }, + { + "epoch": 0.83, + "learning_rate": 4.5811051693404635e-07, + "logits/chosen": -1.035050630569458, + "logits/rejected": -1.0544079542160034, + "logps/chosen": -69.63946533203125, + "logps/rejected": -98.0809326171875, + "loss": 0.6549, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09673299640417099, + "rewards/margins": 0.23705987632274628, + "rewards/rejected": -0.14032688736915588, + "step": 514 + }, + { + "epoch": 0.83, + "learning_rate": 4.5900178253119424e-07, + "logits/chosen": -1.2437968254089355, + "logits/rejected": -1.3745887279510498, + "logps/chosen": -67.9451904296875, + "logps/rejected": -118.43597412109375, + "loss": 0.5989, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0662335455417633, + "rewards/margins": 0.12917622923851013, + "rewards/rejected": -0.06294269859790802, + "step": 515 + }, + { + "epoch": 0.83, + "learning_rate": 4.5989304812834224e-07, + "logits/chosen": -1.3039069175720215, + "logits/rejected": -1.274573802947998, + "logps/chosen": -93.60668182373047, + "logps/rejected": -98.05294799804688, + "loss": 0.5838, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.21373215317726135, + "rewards/margins": 0.0997920036315918, + "rewards/rejected": -0.31352412700653076, + "step": 516 + }, + { + "epoch": 0.83, + "learning_rate": 4.6078431372549013e-07, + "logits/chosen": -0.9579216241836548, + "logits/rejected": -0.9002583622932434, + "logps/chosen": -81.09269714355469, + "logps/rejected": -103.15159606933594, + "loss": 0.6325, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.26695510745048523, + "rewards/margins": 0.07634346187114716, + "rewards/rejected": -0.3432985544204712, + "step": 517 + }, + { + "epoch": 0.83, + "learning_rate": 4.616755793226381e-07, + "logits/chosen": -1.107038974761963, + "logits/rejected": -1.0528725385665894, + "logps/chosen": -75.65821838378906, + "logps/rejected": -87.76038360595703, + "loss": 0.6774, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.1812681257724762, + "rewards/margins": -0.12633362412452698, + "rewards/rejected": 0.3076017498970032, + "step": 518 + }, + { + "epoch": 0.83, + "learning_rate": 4.6256684491978607e-07, + "logits/chosen": -1.201785922050476, + "logits/rejected": -1.1985136270523071, + "logps/chosen": -80.35051727294922, + "logps/rejected": -81.10623168945312, + "loss": 0.5669, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.12764224410057068, + "rewards/margins": 0.30668601393699646, + "rewards/rejected": -0.17904376983642578, + "step": 519 + }, + { + "epoch": 0.83, + "learning_rate": 4.63458110516934e-07, + "logits/chosen": -1.3224291801452637, + "logits/rejected": -1.3507975339889526, + "logps/chosen": -71.86321258544922, + "logps/rejected": -90.33245849609375, + "loss": 0.4835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22387278079986572, + "rewards/margins": 1.1589363813400269, + "rewards/rejected": -0.9350636005401611, + "step": 520 + }, + { + "epoch": 0.84, + "learning_rate": 4.6434937611408196e-07, + "logits/chosen": -1.4065959453582764, + "logits/rejected": -1.3828284740447998, + "logps/chosen": -74.67182922363281, + "logps/rejected": -83.63604736328125, + "loss": 0.5226, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.20358523726463318, + "rewards/margins": 0.7563045024871826, + "rewards/rejected": -0.5527192950248718, + "step": 521 + }, + { + "epoch": 0.84, + "learning_rate": 4.6524064171122995e-07, + "logits/chosen": -1.3299577236175537, + "logits/rejected": -1.279381513595581, + "logps/chosen": -111.23765563964844, + "logps/rejected": -88.43760681152344, + "loss": 0.5822, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03902149200439453, + "rewards/margins": 0.11511565744876862, + "rewards/rejected": -0.15413713455200195, + "step": 522 + }, + { + "epoch": 0.84, + "learning_rate": 4.6613190730837784e-07, + "logits/chosen": -1.1045217514038086, + "logits/rejected": -1.1174486875534058, + "logps/chosen": -77.10578918457031, + "logps/rejected": -99.78474426269531, + "loss": 0.5509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13691720366477966, + "rewards/margins": 0.6185317039489746, + "rewards/rejected": -0.48161450028419495, + "step": 523 + }, + { + "epoch": 0.84, + "learning_rate": 4.6702317290552584e-07, + "logits/chosen": -1.3873976469039917, + "logits/rejected": -1.359724521636963, + "logps/chosen": -97.50643920898438, + "logps/rejected": -103.14022827148438, + "loss": 0.5671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14090441167354584, + "rewards/margins": 0.8121362924575806, + "rewards/rejected": -0.9530407190322876, + "step": 524 + }, + { + "epoch": 0.84, + "learning_rate": 4.679144385026738e-07, + "logits/chosen": -1.388555884361267, + "logits/rejected": -1.3609460592269897, + "logps/chosen": -66.62744140625, + "logps/rejected": -88.90987396240234, + "loss": 0.4806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1254931390285492, + "rewards/margins": 1.0332814455032349, + "rewards/rejected": -0.9077882766723633, + "step": 525 + }, + { + "epoch": 0.84, + "learning_rate": 4.6880570409982173e-07, + "logits/chosen": -1.3501313924789429, + "logits/rejected": -1.3655818700790405, + "logps/chosen": -71.48239135742188, + "logps/rejected": -98.6288070678711, + "loss": 0.6287, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.5461432933807373, + "rewards/margins": 0.8678500652313232, + "rewards/rejected": -0.32170677185058594, + "step": 526 + }, + { + "epoch": 0.85, + "learning_rate": 4.696969696969697e-07, + "logits/chosen": -1.1225335597991943, + "logits/rejected": -1.2468353509902954, + "logps/chosen": -80.09088134765625, + "logps/rejected": -79.40737915039062, + "loss": 0.7199, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.026747509837150574, + "rewards/margins": 0.008074279874563217, + "rewards/rejected": -0.03482179343700409, + "step": 527 + }, + { + "epoch": 0.85, + "learning_rate": 4.705882352941176e-07, + "logits/chosen": -1.2061996459960938, + "logits/rejected": -1.1446453332901, + "logps/chosen": -61.434844970703125, + "logps/rejected": -86.4167251586914, + "loss": 0.5524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08185644447803497, + "rewards/margins": 0.4391217529773712, + "rewards/rejected": -0.35726529359817505, + "step": 528 + }, + { + "epoch": 0.85, + "learning_rate": 4.714795008912656e-07, + "logits/chosen": -1.2546424865722656, + "logits/rejected": -1.2719806432724, + "logps/chosen": -103.4587173461914, + "logps/rejected": -103.05526733398438, + "loss": 0.5637, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.47841280698776245, + "rewards/margins": 0.5294553637504578, + "rewards/rejected": -0.05104255676269531, + "step": 529 + }, + { + "epoch": 0.85, + "learning_rate": 4.723707664884135e-07, + "logits/chosen": -1.197046160697937, + "logits/rejected": -1.2775962352752686, + "logps/chosen": -81.32250213623047, + "logps/rejected": -108.00031280517578, + "loss": 0.5489, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2760895788669586, + "rewards/margins": 0.4764505624771118, + "rewards/rejected": -0.200360968708992, + "step": 530 + }, + { + "epoch": 0.85, + "learning_rate": 4.732620320855615e-07, + "logits/chosen": -1.231143593788147, + "logits/rejected": -1.2688180208206177, + "logps/chosen": -74.77055358886719, + "logps/rejected": -90.42788696289062, + "loss": 0.6115, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.09370565414428711, + "rewards/margins": 0.414714515209198, + "rewards/rejected": -0.3210088908672333, + "step": 531 + }, + { + "epoch": 0.85, + "learning_rate": 4.7415329768270945e-07, + "logits/chosen": -1.4162225723266602, + "logits/rejected": -1.404780626296997, + "logps/chosen": -79.75340270996094, + "logps/rejected": -110.08251953125, + "loss": 0.6176, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.5088580846786499, + "rewards/margins": 0.2608366310596466, + "rewards/rejected": 0.24802151322364807, + "step": 532 + }, + { + "epoch": 0.86, + "learning_rate": 4.750445632798574e-07, + "logits/chosen": -1.1498849391937256, + "logits/rejected": -1.187833547592163, + "logps/chosen": -83.81990814208984, + "logps/rejected": -82.23490905761719, + "loss": 0.5914, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.22813062369823456, + "rewards/margins": -0.005987636744976044, + "rewards/rejected": -0.2221429944038391, + "step": 533 + }, + { + "epoch": 0.86, + "learning_rate": 4.7593582887700533e-07, + "logits/chosen": -1.3782057762145996, + "logits/rejected": -1.3437566757202148, + "logps/chosen": -74.18921661376953, + "logps/rejected": -73.58160400390625, + "loss": 0.5428, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01146039366722107, + "rewards/margins": 0.22365587949752808, + "rewards/rejected": -0.23511627316474915, + "step": 534 + }, + { + "epoch": 0.86, + "learning_rate": 4.768270944741533e-07, + "logits/chosen": -1.072864055633545, + "logits/rejected": -1.0429401397705078, + "logps/chosen": -101.87779235839844, + "logps/rejected": -97.3675537109375, + "loss": 0.5175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07676525413990021, + "rewards/margins": 0.931308925151825, + "rewards/rejected": -0.8545436859130859, + "step": 535 + }, + { + "epoch": 0.86, + "learning_rate": 4.777183600713012e-07, + "logits/chosen": -1.2275487184524536, + "logits/rejected": -1.269572138786316, + "logps/chosen": -85.5536117553711, + "logps/rejected": -92.15901184082031, + "loss": 0.5412, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2822950482368469, + "rewards/margins": 0.10773582756519318, + "rewards/rejected": 0.17455920577049255, + "step": 536 + }, + { + "epoch": 0.86, + "learning_rate": 4.786096256684492e-07, + "logits/chosen": -1.298561692237854, + "logits/rejected": -1.2600250244140625, + "logps/chosen": -85.48323822021484, + "logps/rejected": -78.68019104003906, + "loss": 0.6211, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07721023261547089, + "rewards/margins": 0.15014401078224182, + "rewards/rejected": -0.07293376326560974, + "step": 537 + }, + { + "epoch": 0.86, + "learning_rate": 4.795008912655971e-07, + "logits/chosen": -1.3352574110031128, + "logits/rejected": -1.2423317432403564, + "logps/chosen": -88.6736068725586, + "logps/rejected": -104.8981704711914, + "loss": 0.5699, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.4905332624912262, + "rewards/margins": 0.6380726099014282, + "rewards/rejected": -0.14753934741020203, + "step": 538 + }, + { + "epoch": 0.87, + "learning_rate": 4.803921568627451e-07, + "logits/chosen": -1.2144358158111572, + "logits/rejected": -1.2555122375488281, + "logps/chosen": -87.49639129638672, + "logps/rejected": -82.2857666015625, + "loss": 0.5641, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.036625102162361145, + "rewards/margins": 0.206405371427536, + "rewards/rejected": -0.16978025436401367, + "step": 539 + }, + { + "epoch": 0.87, + "learning_rate": 4.81283422459893e-07, + "logits/chosen": -1.3311662673950195, + "logits/rejected": -1.333019733428955, + "logps/chosen": -88.40093994140625, + "logps/rejected": -84.13224792480469, + "loss": 0.5598, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.12027893215417862, + "rewards/margins": 0.511608898639679, + "rewards/rejected": -0.39132997393608093, + "step": 540 + }, + { + "epoch": 0.87, + "learning_rate": 4.82174688057041e-07, + "logits/chosen": -1.214052677154541, + "logits/rejected": -1.1910885572433472, + "logps/chosen": -75.60552978515625, + "logps/rejected": -84.18815612792969, + "loss": 0.6806, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13181447982788086, + "rewards/margins": 0.07297420501708984, + "rewards/rejected": -0.2047886699438095, + "step": 541 + }, + { + "epoch": 0.87, + "learning_rate": 4.83065953654189e-07, + "logits/chosen": -1.4014720916748047, + "logits/rejected": -1.3548715114593506, + "logps/chosen": -87.47981262207031, + "logps/rejected": -92.57257080078125, + "loss": 0.5512, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16412773728370667, + "rewards/margins": 0.4658505618572235, + "rewards/rejected": -0.30172282457351685, + "step": 542 + }, + { + "epoch": 0.87, + "learning_rate": 4.839572192513369e-07, + "logits/chosen": -1.30021071434021, + "logits/rejected": -1.3385143280029297, + "logps/chosen": -73.56980895996094, + "logps/rejected": -86.39370727539062, + "loss": 0.588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3292170763015747, + "rewards/margins": 0.5708808302879333, + "rewards/rejected": -0.24166375398635864, + "step": 543 + }, + { + "epoch": 0.87, + "learning_rate": 4.848484848484849e-07, + "logits/chosen": -1.225258231163025, + "logits/rejected": -1.2333037853240967, + "logps/chosen": -96.12332153320312, + "logps/rejected": -90.2834243774414, + "loss": 0.5256, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.18250027298927307, + "rewards/margins": 0.40183037519454956, + "rewards/rejected": -0.21933013200759888, + "step": 544 + }, + { + "epoch": 0.87, + "learning_rate": 4.857397504456328e-07, + "logits/chosen": -1.3764548301696777, + "logits/rejected": -1.3731576204299927, + "logps/chosen": -98.42424774169922, + "logps/rejected": -86.71283721923828, + "loss": 0.6197, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.10954676568508148, + "rewards/margins": -0.16430866718292236, + "rewards/rejected": 0.27385538816452026, + "step": 545 + }, + { + "epoch": 0.88, + "learning_rate": 4.866310160427808e-07, + "logits/chosen": -1.5454630851745605, + "logits/rejected": -1.4743194580078125, + "logps/chosen": -107.26051330566406, + "logps/rejected": -84.01083374023438, + "loss": 0.5673, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.28673800826072693, + "rewards/margins": 0.26270198822021484, + "rewards/rejected": 0.024036023765802383, + "step": 546 + }, + { + "epoch": 0.88, + "learning_rate": 4.875222816399287e-07, + "logits/chosen": -1.2737960815429688, + "logits/rejected": -1.2832921743392944, + "logps/chosen": -77.96357727050781, + "logps/rejected": -88.15559387207031, + "loss": 0.4448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33258840441703796, + "rewards/margins": 1.1377196311950684, + "rewards/rejected": -0.8051311373710632, + "step": 547 + }, + { + "epoch": 0.88, + "learning_rate": 4.884135472370767e-07, + "logits/chosen": -1.3180967569351196, + "logits/rejected": -1.2780282497406006, + "logps/chosen": -96.52350616455078, + "logps/rejected": -94.44822692871094, + "loss": 0.4853, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4558217227458954, + "rewards/margins": 0.961876630783081, + "rewards/rejected": -0.5060548782348633, + "step": 548 + }, + { + "epoch": 0.88, + "learning_rate": 4.893048128342245e-07, + "logits/chosen": -1.2838759422302246, + "logits/rejected": -1.318559169769287, + "logps/chosen": -69.40345764160156, + "logps/rejected": -68.38792419433594, + "loss": 0.4806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30686864256858826, + "rewards/margins": 0.5407623648643494, + "rewards/rejected": -0.23389369249343872, + "step": 549 + }, + { + "epoch": 0.88, + "learning_rate": 4.901960784313725e-07, + "logits/chosen": -1.1323721408843994, + "logits/rejected": -1.198715090751648, + "logps/chosen": -77.53663635253906, + "logps/rejected": -86.59736633300781, + "loss": 0.6526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04195786267518997, + "rewards/margins": 0.4667506515979767, + "rewards/rejected": -0.4247927665710449, + "step": 550 + }, + { + "epoch": 0.88, + "learning_rate": 4.910873440285204e-07, + "logits/chosen": -1.294614315032959, + "logits/rejected": -1.3211073875427246, + "logps/chosen": -54.81311798095703, + "logps/rejected": -58.249305725097656, + "loss": 0.612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08941354602575302, + "rewards/margins": 0.586482048034668, + "rewards/rejected": -0.49706852436065674, + "step": 551 + }, + { + "epoch": 0.89, + "learning_rate": 4.919786096256684e-07, + "logits/chosen": -1.0424494743347168, + "logits/rejected": -1.069559931755066, + "logps/chosen": -66.43762969970703, + "logps/rejected": -77.0403060913086, + "loss": 0.524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2138873189687729, + "rewards/margins": 0.795117974281311, + "rewards/rejected": -0.581230640411377, + "step": 552 + }, + { + "epoch": 0.89, + "learning_rate": 4.928698752228163e-07, + "logits/chosen": -1.2054654359817505, + "logits/rejected": -1.191928744316101, + "logps/chosen": -121.63521575927734, + "logps/rejected": -97.16558837890625, + "loss": 0.7453, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.17204265296459198, + "rewards/margins": 0.2710127830505371, + "rewards/rejected": -0.09897013008594513, + "step": 553 + }, + { + "epoch": 0.89, + "learning_rate": 4.937611408199643e-07, + "logits/chosen": -1.2801506519317627, + "logits/rejected": -1.238020658493042, + "logps/chosen": -95.95796203613281, + "logps/rejected": -96.977783203125, + "loss": 0.5683, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06383419036865234, + "rewards/margins": 0.5699779987335205, + "rewards/rejected": -0.5061437487602234, + "step": 554 + }, + { + "epoch": 0.89, + "learning_rate": 4.946524064171122e-07, + "logits/chosen": -1.4404335021972656, + "logits/rejected": -1.452319622039795, + "logps/chosen": -89.66966247558594, + "logps/rejected": -91.53480529785156, + "loss": 0.5671, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.16168269515037537, + "rewards/margins": -0.2626934051513672, + "rewards/rejected": 0.10101071000099182, + "step": 555 + }, + { + "epoch": 0.89, + "learning_rate": 4.955436720142602e-07, + "logits/chosen": -1.5597975254058838, + "logits/rejected": -1.5821168422698975, + "logps/chosen": -80.59342956542969, + "logps/rejected": -87.59696960449219, + "loss": 0.4188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21060650050640106, + "rewards/margins": 1.4811652898788452, + "rewards/rejected": -1.2705588340759277, + "step": 556 + }, + { + "epoch": 0.89, + "learning_rate": 4.964349376114082e-07, + "logits/chosen": -1.3771765232086182, + "logits/rejected": -1.3455736637115479, + "logps/chosen": -60.690284729003906, + "logps/rejected": -78.45352172851562, + "loss": 0.6013, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20071697235107422, + "rewards/margins": 0.21986961364746094, + "rewards/rejected": -0.42058658599853516, + "step": 557 + }, + { + "epoch": 0.9, + "learning_rate": 4.973262032085561e-07, + "logits/chosen": -1.4628134965896606, + "logits/rejected": -1.364511489868164, + "logps/chosen": -86.15171813964844, + "logps/rejected": -101.25032043457031, + "loss": 0.6716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17940084636211395, + "rewards/margins": 0.5485048294067383, + "rewards/rejected": -0.7279056906700134, + "step": 558 + }, + { + "epoch": 0.9, + "learning_rate": 4.982174688057041e-07, + "logits/chosen": -1.33989679813385, + "logits/rejected": -1.3297724723815918, + "logps/chosen": -71.61750030517578, + "logps/rejected": -86.65336608886719, + "loss": 0.4819, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.39479389786720276, + "rewards/margins": 0.9868059158325195, + "rewards/rejected": -0.5920120477676392, + "step": 559 + }, + { + "epoch": 0.9, + "learning_rate": 4.99108734402852e-07, + "logits/chosen": -1.3494040966033936, + "logits/rejected": -1.4182546138763428, + "logps/chosen": -74.16643524169922, + "logps/rejected": -90.03691101074219, + "loss": 0.4271, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.23634515702724457, + "rewards/margins": 0.9433552026748657, + "rewards/rejected": -0.7070100903511047, + "step": 560 + }, + { + "epoch": 0.9, + "learning_rate": 5e-07, + "logits/chosen": -1.285981297492981, + "logits/rejected": -1.258286714553833, + "logps/chosen": -85.81561279296875, + "logps/rejected": -88.25396728515625, + "loss": 0.4903, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04454001784324646, + "rewards/margins": 1.0039767026901245, + "rewards/rejected": -0.9594365954399109, + "step": 561 + }, + { + "epoch": 0.9, + "learning_rate": 4.999009116131589e-07, + "logits/chosen": -1.120640754699707, + "logits/rejected": -1.178164005279541, + "logps/chosen": -67.48035430908203, + "logps/rejected": -63.35462188720703, + "loss": 0.6207, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2988602817058563, + "rewards/margins": 0.04768940806388855, + "rewards/rejected": -0.3465496897697449, + "step": 562 + }, + { + "epoch": 0.9, + "learning_rate": 4.998018232263179e-07, + "logits/chosen": -1.1995348930358887, + "logits/rejected": -1.1698591709136963, + "logps/chosen": -90.9691162109375, + "logps/rejected": -101.98523712158203, + "loss": 0.5705, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.30659523606300354, + "rewards/margins": -0.2975767254829407, + "rewards/rejected": -0.009018510580062866, + "step": 563 + }, + { + "epoch": 0.91, + "learning_rate": 4.997027348394768e-07, + "logits/chosen": -1.227642297744751, + "logits/rejected": -1.3569164276123047, + "logps/chosen": -88.36021423339844, + "logps/rejected": -85.75800323486328, + "loss": 0.5984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20611286163330078, + "rewards/margins": 0.4835100471973419, + "rewards/rejected": -0.27739715576171875, + "step": 564 + }, + { + "epoch": 0.91, + "learning_rate": 4.996036464526357e-07, + "logits/chosen": -1.4158629179000854, + "logits/rejected": -1.3493050336837769, + "logps/chosen": -73.33955383300781, + "logps/rejected": -78.94459533691406, + "loss": 0.5957, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.36537277698516846, + "rewards/margins": 0.15291184186935425, + "rewards/rejected": -0.5182846188545227, + "step": 565 + }, + { + "epoch": 0.91, + "learning_rate": 4.995045580657946e-07, + "logits/chosen": -1.0916895866394043, + "logits/rejected": -1.1476216316223145, + "logps/chosen": -86.02511596679688, + "logps/rejected": -115.76933288574219, + "loss": 0.6424, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.12086418271064758, + "rewards/margins": 0.6599949598312378, + "rewards/rejected": -0.5391308069229126, + "step": 566 + }, + { + "epoch": 0.91, + "learning_rate": 4.994054696789536e-07, + "logits/chosen": -1.2955822944641113, + "logits/rejected": -1.3777801990509033, + "logps/chosen": -80.52973175048828, + "logps/rejected": -117.38594818115234, + "loss": 0.5107, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3260672688484192, + "rewards/margins": -0.2619982957839966, + "rewards/rejected": -0.0640689805150032, + "step": 567 + }, + { + "epoch": 0.91, + "learning_rate": 4.993063812921125e-07, + "logits/chosen": -1.1383839845657349, + "logits/rejected": -1.1670085191726685, + "logps/chosen": -91.63558959960938, + "logps/rejected": -112.77704620361328, + "loss": 0.4847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3510518968105316, + "rewards/margins": 0.9902811646461487, + "rewards/rejected": -0.6392291784286499, + "step": 568 + }, + { + "epoch": 0.91, + "learning_rate": 4.992072929052714e-07, + "logits/chosen": -1.2374907732009888, + "logits/rejected": -1.2569301128387451, + "logps/chosen": -72.45689392089844, + "logps/rejected": -75.0589599609375, + "loss": 0.5812, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.5854387283325195, + "rewards/margins": -0.5143527984619141, + "rewards/rejected": -0.07108592987060547, + "step": 569 + }, + { + "epoch": 0.91, + "learning_rate": 4.991082045184305e-07, + "logits/chosen": -1.3399752378463745, + "logits/rejected": -1.2377240657806396, + "logps/chosen": -64.93412780761719, + "logps/rejected": -101.14253234863281, + "loss": 0.5135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18759813904762268, + "rewards/margins": 1.3550052642822266, + "rewards/rejected": -1.1674072742462158, + "step": 570 + }, + { + "epoch": 0.92, + "learning_rate": 4.990091161315894e-07, + "logits/chosen": -1.2700810432434082, + "logits/rejected": -1.336404800415039, + "logps/chosen": -94.3863754272461, + "logps/rejected": -107.42289733886719, + "loss": 0.6091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6374274492263794, + "rewards/margins": 1.5782051086425781, + "rewards/rejected": -0.940777599811554, + "step": 571 + }, + { + "epoch": 0.92, + "learning_rate": 4.989100277447483e-07, + "logits/chosen": -1.3706709146499634, + "logits/rejected": -1.3005098104476929, + "logps/chosen": -87.9084701538086, + "logps/rejected": -95.61963653564453, + "loss": 0.4605, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2238912731409073, + "rewards/margins": 0.42033764719963074, + "rewards/rejected": -0.19644641876220703, + "step": 572 + }, + { + "epoch": 0.92, + "learning_rate": 4.988109393579073e-07, + "logits/chosen": -0.9631893038749695, + "logits/rejected": -1.0513523817062378, + "logps/chosen": -92.02458190917969, + "logps/rejected": -70.5799560546875, + "loss": 0.5872, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14784622192382812, + "rewards/margins": 0.40031006932258606, + "rewards/rejected": -0.5481562614440918, + "step": 573 + }, + { + "epoch": 0.92, + "learning_rate": 4.987118509710662e-07, + "logits/chosen": -1.2372655868530273, + "logits/rejected": -1.3549309968948364, + "logps/chosen": -76.55875396728516, + "logps/rejected": -95.23396301269531, + "loss": 0.6052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03763227164745331, + "rewards/margins": 0.33491814136505127, + "rewards/rejected": -0.29728585481643677, + "step": 574 + }, + { + "epoch": 0.92, + "learning_rate": 4.986127625842251e-07, + "logits/chosen": -1.4140807390213013, + "logits/rejected": -1.3425477743148804, + "logps/chosen": -82.0469970703125, + "logps/rejected": -94.49651336669922, + "loss": 0.5915, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.055813416838645935, + "rewards/margins": 0.1146366074681282, + "rewards/rejected": -0.17045000195503235, + "step": 575 + }, + { + "epoch": 0.92, + "learning_rate": 4.98513674197384e-07, + "logits/chosen": -1.3198412656784058, + "logits/rejected": -1.2619305849075317, + "logps/chosen": -88.65512084960938, + "logps/rejected": -97.06603240966797, + "loss": 0.5274, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.37089288234710693, + "rewards/margins": 0.4976494014263153, + "rewards/rejected": -0.8685423135757446, + "step": 576 + }, + { + "epoch": 0.93, + "learning_rate": 4.98414585810543e-07, + "logits/chosen": -1.2501246929168701, + "logits/rejected": -1.2402082681655884, + "logps/chosen": -76.59994506835938, + "logps/rejected": -73.78067016601562, + "loss": 0.5572, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16381186246871948, + "rewards/margins": 0.4064028859138489, + "rewards/rejected": -0.242590993642807, + "step": 577 + }, + { + "epoch": 0.93, + "learning_rate": 4.983154974237019e-07, + "logits/chosen": -1.2636576890945435, + "logits/rejected": -1.3445930480957031, + "logps/chosen": -83.80538177490234, + "logps/rejected": -128.08084106445312, + "loss": 0.5559, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.4779362678527832, + "rewards/margins": 1.111436367034912, + "rewards/rejected": -0.6335000991821289, + "step": 578 + }, + { + "epoch": 0.93, + "learning_rate": 4.982164090368608e-07, + "logits/chosen": -1.0301737785339355, + "logits/rejected": -1.0386723279953003, + "logps/chosen": -78.90385437011719, + "logps/rejected": -89.29969787597656, + "loss": 0.4674, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.23647700250148773, + "rewards/margins": 0.30727100372314453, + "rewards/rejected": -0.070794016122818, + "step": 579 + }, + { + "epoch": 0.93, + "learning_rate": 4.981173206500197e-07, + "logits/chosen": -1.1910960674285889, + "logits/rejected": -1.2563642263412476, + "logps/chosen": -76.75785827636719, + "logps/rejected": -101.27616882324219, + "loss": 0.5398, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16353550553321838, + "rewards/margins": 0.8526790738105774, + "rewards/rejected": -0.6891435384750366, + "step": 580 + }, + { + "epoch": 0.93, + "learning_rate": 4.980182322631787e-07, + "logits/chosen": -1.2392325401306152, + "logits/rejected": -1.295891284942627, + "logps/chosen": -85.30451965332031, + "logps/rejected": -77.27715301513672, + "loss": 0.544, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3910151422023773, + "rewards/margins": 0.15746518969535828, + "rewards/rejected": -0.5484803318977356, + "step": 581 + }, + { + "epoch": 0.93, + "learning_rate": 4.979191438763377e-07, + "logits/chosen": -1.152367353439331, + "logits/rejected": -1.1962895393371582, + "logps/chosen": -72.00267791748047, + "logps/rejected": -93.83041381835938, + "loss": 0.498, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2042720913887024, + "rewards/margins": 0.20998209714889526, + "rewards/rejected": -0.005710035562515259, + "step": 582 + }, + { + "epoch": 0.94, + "learning_rate": 4.978200554894967e-07, + "logits/chosen": -1.3009443283081055, + "logits/rejected": -1.2929425239562988, + "logps/chosen": -86.97715759277344, + "logps/rejected": -91.3478012084961, + "loss": 0.5083, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.18478050827980042, + "rewards/margins": 0.48371171951293945, + "rewards/rejected": -0.29893121123313904, + "step": 583 + }, + { + "epoch": 0.94, + "learning_rate": 4.977209671026556e-07, + "logits/chosen": -1.2644720077514648, + "logits/rejected": -1.2851758003234863, + "logps/chosen": -71.13385772705078, + "logps/rejected": -71.88487243652344, + "loss": 0.5852, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2822767198085785, + "rewards/margins": 0.4247641861438751, + "rewards/rejected": -0.14248742163181305, + "step": 584 + }, + { + "epoch": 0.94, + "learning_rate": 4.976218787158145e-07, + "logits/chosen": -1.2103443145751953, + "logits/rejected": -1.1881924867630005, + "logps/chosen": -64.32135009765625, + "logps/rejected": -80.29042053222656, + "loss": 0.5376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3851920962333679, + "rewards/margins": 0.863133430480957, + "rewards/rejected": -0.4779413342475891, + "step": 585 + }, + { + "epoch": 0.94, + "learning_rate": 4.975227903289734e-07, + "logits/chosen": -1.1768839359283447, + "logits/rejected": -1.179174542427063, + "logps/chosen": -59.19326400756836, + "logps/rejected": -56.490089416503906, + "loss": 0.4396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3354972004890442, + "rewards/margins": 0.5555751323699951, + "rewards/rejected": -0.22007791697978973, + "step": 586 + }, + { + "epoch": 0.94, + "learning_rate": 4.974237019421323e-07, + "logits/chosen": -1.3507404327392578, + "logits/rejected": -1.393140196800232, + "logps/chosen": -76.04619598388672, + "logps/rejected": -76.75607299804688, + "loss": 0.5188, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.17205534875392914, + "rewards/margins": 0.7690553665161133, + "rewards/rejected": -0.5970000624656677, + "step": 587 + }, + { + "epoch": 0.94, + "learning_rate": 4.973246135552913e-07, + "logits/chosen": -1.1391477584838867, + "logits/rejected": -1.222862720489502, + "logps/chosen": -100.2994155883789, + "logps/rejected": -111.29771423339844, + "loss": 0.5536, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1966579556465149, + "rewards/margins": 0.5500324368476868, + "rewards/rejected": -0.7466903924942017, + "step": 588 + }, + { + "epoch": 0.95, + "learning_rate": 4.972255251684502e-07, + "logits/chosen": -1.1883690357208252, + "logits/rejected": -1.1895211935043335, + "logps/chosen": -75.74125671386719, + "logps/rejected": -92.78091430664062, + "loss": 0.5756, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.057890601456165314, + "rewards/margins": 0.19253160059452057, + "rewards/rejected": -0.2504222095012665, + "step": 589 + }, + { + "epoch": 0.95, + "learning_rate": 4.971264367816091e-07, + "logits/chosen": -1.33736252784729, + "logits/rejected": -1.3468315601348877, + "logps/chosen": -56.19721984863281, + "logps/rejected": -89.37432861328125, + "loss": 0.5893, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.15139514207839966, + "rewards/margins": 0.7856346368789673, + "rewards/rejected": -0.6342394948005676, + "step": 590 + }, + { + "epoch": 0.95, + "learning_rate": 4.970273483947681e-07, + "logits/chosen": -1.25485098361969, + "logits/rejected": -1.2543526887893677, + "logps/chosen": -84.75244140625, + "logps/rejected": -89.346923828125, + "loss": 0.5758, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.17230664193630219, + "rewards/margins": 0.5664899945259094, + "rewards/rejected": -0.39418336749076843, + "step": 591 + }, + { + "epoch": 0.95, + "learning_rate": 4.96928260007927e-07, + "logits/chosen": -1.245758295059204, + "logits/rejected": -1.2876516580581665, + "logps/chosen": -95.44541931152344, + "logps/rejected": -98.16511535644531, + "loss": 0.5025, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1914152204990387, + "rewards/margins": 1.2667051553726196, + "rewards/rejected": -1.0752899646759033, + "step": 592 + }, + { + "epoch": 0.95, + "learning_rate": 4.968291716210861e-07, + "logits/chosen": -1.4653078317642212, + "logits/rejected": -1.4944977760314941, + "logps/chosen": -77.44109344482422, + "logps/rejected": -86.93052673339844, + "loss": 0.6532, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2208063155412674, + "rewards/margins": 0.4184355139732361, + "rewards/rejected": -0.1976291686296463, + "step": 593 + }, + { + "epoch": 0.95, + "learning_rate": 4.96730083234245e-07, + "logits/chosen": -1.2143566608428955, + "logits/rejected": -1.117998719215393, + "logps/chosen": -117.43280792236328, + "logps/rejected": -126.66216278076172, + "loss": 0.5213, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17674694955348969, + "rewards/margins": 0.4255586564540863, + "rewards/rejected": -0.6023055911064148, + "step": 594 + }, + { + "epoch": 0.96, + "learning_rate": 4.966309948474039e-07, + "logits/chosen": -1.1985645294189453, + "logits/rejected": -1.18355131149292, + "logps/chosen": -88.03733825683594, + "logps/rejected": -98.13749694824219, + "loss": 0.614, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.042481809854507446, + "rewards/margins": 0.20142421126365662, + "rewards/rejected": -0.24390602111816406, + "step": 595 + }, + { + "epoch": 0.96, + "learning_rate": 4.965319064605628e-07, + "logits/chosen": -1.3978559970855713, + "logits/rejected": -1.4010615348815918, + "logps/chosen": -105.0785903930664, + "logps/rejected": -115.11529541015625, + "loss": 0.5549, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16706030070781708, + "rewards/margins": 0.5290287733078003, + "rewards/rejected": -0.361968457698822, + "step": 596 + }, + { + "epoch": 0.96, + "learning_rate": 4.964328180737217e-07, + "logits/chosen": -1.3503137826919556, + "logits/rejected": -1.3680930137634277, + "logps/chosen": -105.66533660888672, + "logps/rejected": -105.16546630859375, + "loss": 0.4967, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04705962538719177, + "rewards/margins": 0.7302875518798828, + "rewards/rejected": -0.6832278966903687, + "step": 597 + }, + { + "epoch": 0.96, + "learning_rate": 4.963337296868807e-07, + "logits/chosen": -1.0336048603057861, + "logits/rejected": -1.0683064460754395, + "logps/chosen": -96.23306274414062, + "logps/rejected": -112.42273712158203, + "loss": 0.4875, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15882760286331177, + "rewards/margins": 0.8142921924591064, + "rewards/rejected": -0.9731197357177734, + "step": 598 + }, + { + "epoch": 0.96, + "learning_rate": 4.962346413000396e-07, + "logits/chosen": -1.227872371673584, + "logits/rejected": -1.2946739196777344, + "logps/chosen": -73.75743865966797, + "logps/rejected": -83.07308959960938, + "loss": 0.4614, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02288997173309326, + "rewards/margins": 1.1017094850540161, + "rewards/rejected": -1.0788193941116333, + "step": 599 + }, + { + "epoch": 0.96, + "learning_rate": 4.961355529131985e-07, + "logits/chosen": -1.2668145895004272, + "logits/rejected": -1.214590072631836, + "logps/chosen": -66.786865234375, + "logps/rejected": -93.34317779541016, + "loss": 0.472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13436584174633026, + "rewards/margins": 0.6218290328979492, + "rewards/rejected": -0.7561948895454407, + "step": 600 + }, + { + "epoch": 0.96, + "learning_rate": 4.960364645263575e-07, + "logits/chosen": -1.1189699172973633, + "logits/rejected": -1.1388099193572998, + "logps/chosen": -78.88019561767578, + "logps/rejected": -94.41413879394531, + "loss": 0.598, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1018582284450531, + "rewards/margins": 0.27284228801727295, + "rewards/rejected": -0.17098407447338104, + "step": 601 + }, + { + "epoch": 0.97, + "learning_rate": 4.959373761395164e-07, + "logits/chosen": -1.1796256303787231, + "logits/rejected": -1.3338419198989868, + "logps/chosen": -115.3305435180664, + "logps/rejected": -113.66812133789062, + "loss": 0.5184, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.11392012238502502, + "rewards/margins": 0.6475144624710083, + "rewards/rejected": -0.5335943102836609, + "step": 602 + }, + { + "epoch": 0.97, + "learning_rate": 4.958382877526754e-07, + "logits/chosen": -1.1492931842803955, + "logits/rejected": -1.1668894290924072, + "logps/chosen": -111.79397583007812, + "logps/rejected": -99.64217376708984, + "loss": 0.4899, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.05117340385913849, + "rewards/margins": 1.0088286399841309, + "rewards/rejected": -1.0600019693374634, + "step": 603 + }, + { + "epoch": 0.97, + "learning_rate": 4.957391993658343e-07, + "logits/chosen": -1.369917392730713, + "logits/rejected": -1.3833253383636475, + "logps/chosen": -91.1497573852539, + "logps/rejected": -89.06022644042969, + "loss": 0.5099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005936337634921074, + "rewards/margins": 0.7539305686950684, + "rewards/rejected": -0.7598669528961182, + "step": 604 + }, + { + "epoch": 0.97, + "learning_rate": 4.956401109789933e-07, + "logits/chosen": -1.218024492263794, + "logits/rejected": -1.1901224851608276, + "logps/chosen": -70.98052215576172, + "logps/rejected": -83.63078308105469, + "loss": 0.5684, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20148998498916626, + "rewards/margins": 0.8203837871551514, + "rewards/rejected": -0.6188938617706299, + "step": 605 + }, + { + "epoch": 0.97, + "learning_rate": 4.955410225921522e-07, + "logits/chosen": -1.0874919891357422, + "logits/rejected": -1.0735647678375244, + "logps/chosen": -96.29199981689453, + "logps/rejected": -109.10118103027344, + "loss": 0.5126, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.012957759201526642, + "rewards/margins": 0.8108606338500977, + "rewards/rejected": -0.7979028820991516, + "step": 606 + }, + { + "epoch": 0.97, + "learning_rate": 4.954419342053111e-07, + "logits/chosen": -1.308809757232666, + "logits/rejected": -1.3217300176620483, + "logps/chosen": -84.76177978515625, + "logps/rejected": -103.04386901855469, + "loss": 0.6098, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.03036022186279297, + "rewards/margins": 0.5690226554870605, + "rewards/rejected": -0.5993828177452087, + "step": 607 + }, + { + "epoch": 0.98, + "learning_rate": 4.953428458184701e-07, + "logits/chosen": -1.3524197340011597, + "logits/rejected": -1.2285836935043335, + "logps/chosen": -65.55500793457031, + "logps/rejected": -73.93505859375, + "loss": 0.5174, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.008192157372832298, + "rewards/margins": 0.0657425969839096, + "rewards/rejected": -0.07393474876880646, + "step": 608 + }, + { + "epoch": 0.98, + "learning_rate": 4.95243757431629e-07, + "logits/chosen": -1.394136667251587, + "logits/rejected": -1.4213712215423584, + "logps/chosen": -92.72711181640625, + "logps/rejected": -100.61636352539062, + "loss": 0.6154, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04054677486419678, + "rewards/margins": 0.5561317801475525, + "rewards/rejected": -0.5155849456787109, + "step": 609 + }, + { + "epoch": 0.98, + "learning_rate": 4.951446690447879e-07, + "logits/chosen": -1.3426814079284668, + "logits/rejected": -1.3937761783599854, + "logps/chosen": -110.43621826171875, + "logps/rejected": -112.39772033691406, + "loss": 0.4923, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.22036132216453552, + "rewards/margins": 1.0566819906234741, + "rewards/rejected": -0.836320698261261, + "step": 610 + }, + { + "epoch": 0.98, + "learning_rate": 4.950455806579469e-07, + "logits/chosen": -1.336385726928711, + "logits/rejected": -1.3817124366760254, + "logps/chosen": -55.61134338378906, + "logps/rejected": -67.29580688476562, + "loss": 0.5324, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4319985508918762, + "rewards/margins": 0.3919748067855835, + "rewards/rejected": -0.8239734172821045, + "step": 611 + }, + { + "epoch": 0.98, + "learning_rate": 4.949464922711058e-07, + "logits/chosen": -1.2799365520477295, + "logits/rejected": -1.323362112045288, + "logps/chosen": -96.78672790527344, + "logps/rejected": -101.25515747070312, + "loss": 0.4555, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.012155160307884216, + "rewards/margins": 0.9761806130409241, + "rewards/rejected": -0.9640255570411682, + "step": 612 + }, + { + "epoch": 0.98, + "learning_rate": 4.948474038842647e-07, + "logits/chosen": -1.1801824569702148, + "logits/rejected": -1.1360538005828857, + "logps/chosen": -86.85538482666016, + "logps/rejected": -112.64627838134766, + "loss": 0.5193, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.23429737985134125, + "rewards/margins": 0.8688868284225464, + "rewards/rejected": -1.1031842231750488, + "step": 613 + }, + { + "epoch": 0.99, + "learning_rate": 4.947483154974237e-07, + "logits/chosen": -1.2512667179107666, + "logits/rejected": -1.3186497688293457, + "logps/chosen": -87.35343933105469, + "logps/rejected": -68.60981750488281, + "loss": 0.5044, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.11721611022949219, + "rewards/margins": 0.06015154719352722, + "rewards/rejected": 0.05706453323364258, + "step": 614 + }, + { + "epoch": 0.99, + "learning_rate": 4.946492271105826e-07, + "logits/chosen": -1.293550729751587, + "logits/rejected": -1.2342557907104492, + "logps/chosen": -98.51182556152344, + "logps/rejected": -101.82608795166016, + "loss": 0.4262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.327775776386261, + "rewards/margins": 1.0352253913879395, + "rewards/rejected": -0.7074496746063232, + "step": 615 + }, + { + "epoch": 0.99, + "learning_rate": 4.945501387237415e-07, + "logits/chosen": -1.2498924732208252, + "logits/rejected": -1.210220456123352, + "logps/chosen": -84.36942291259766, + "logps/rejected": -113.22923278808594, + "loss": 0.3929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21220609545707703, + "rewards/margins": 1.5308270454406738, + "rewards/rejected": -1.318621039390564, + "step": 616 + }, + { + "epoch": 0.99, + "learning_rate": 4.944510503369005e-07, + "logits/chosen": -1.2861942052841187, + "logits/rejected": -1.324744462966919, + "logps/chosen": -92.80854797363281, + "logps/rejected": -88.98739624023438, + "loss": 0.5026, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.303274929523468, + "rewards/margins": 0.8929769992828369, + "rewards/rejected": -1.1962519884109497, + "step": 617 + }, + { + "epoch": 0.99, + "learning_rate": 4.943519619500594e-07, + "logits/chosen": -1.2218245267868042, + "logits/rejected": -1.1458569765090942, + "logps/chosen": -82.04283142089844, + "logps/rejected": -98.89848327636719, + "loss": 0.5439, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.23666420578956604, + "rewards/margins": 1.128169059753418, + "rewards/rejected": -1.3648332357406616, + "step": 618 + }, + { + "epoch": 0.99, + "learning_rate": 4.942528735632184e-07, + "logits/chosen": -1.181472659111023, + "logits/rejected": -1.2209782600402832, + "logps/chosen": -84.9579086303711, + "logps/rejected": -97.18292999267578, + "loss": 0.559, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5308420658111572, + "rewards/margins": 0.18872787058353424, + "rewards/rejected": -0.7195700407028198, + "step": 619 + }, + { + "epoch": 1.0, + "learning_rate": 4.941537851763773e-07, + "logits/chosen": -1.118780493736267, + "logits/rejected": -1.217832088470459, + "logps/chosen": -84.7042236328125, + "logps/rejected": -59.18722152709961, + "loss": 0.5081, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3175990879535675, + "rewards/margins": 0.09654952585697174, + "rewards/rejected": -0.41414862871170044, + "step": 620 + }, + { + "epoch": 1.0, + "learning_rate": 4.940546967895363e-07, + "logits/chosen": -1.3697404861450195, + "logits/rejected": -1.3122416734695435, + "logps/chosen": -80.81403350830078, + "logps/rejected": -74.36764526367188, + "loss": 0.4562, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.14627933502197266, + "rewards/margins": 1.3645817041397095, + "rewards/rejected": -1.2183023691177368, + "step": 621 + }, + { + "epoch": 1.0, + "learning_rate": 4.939556084026952e-07, + "logits/chosen": -1.4454662799835205, + "logits/rejected": -1.5150693655014038, + "logps/chosen": -113.60740661621094, + "logps/rejected": -124.25518798828125, + "loss": 0.548, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06348437815904617, + "rewards/margins": 0.7040247321128845, + "rewards/rejected": -0.6405403017997742, + "step": 622 + }, + { + "epoch": 1.0, + "learning_rate": 4.938565200158541e-07, + "logits/chosen": -1.3987926244735718, + "logits/rejected": -1.3848011493682861, + "logps/chosen": -83.26103210449219, + "logps/rejected": -113.33751678466797, + "loss": 0.4615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3207220137119293, + "rewards/margins": 0.8599616885185242, + "rewards/rejected": -0.5392396450042725, + "step": 623 + }, + { + "epoch": 1.0, + "learning_rate": 4.93757431629013e-07, + "logits/chosen": -1.1824086904525757, + "logits/rejected": -1.2046773433685303, + "logps/chosen": -108.81339263916016, + "logps/rejected": -105.2822265625, + "loss": 0.4835, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.25489044189453125, + "rewards/margins": 1.1353492736816406, + "rewards/rejected": -0.8804588317871094, + "step": 624 + }, + { + "epoch": 1.0, + "learning_rate": 4.93658343242172e-07, + "logits/chosen": -1.4204611778259277, + "logits/rejected": -1.465986967086792, + "logps/chosen": -67.87345123291016, + "logps/rejected": -94.70164489746094, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3485986590385437, + "rewards/margins": 1.519789695739746, + "rewards/rejected": -1.1711910963058472, + "step": 625 + }, + { + "epoch": 1.0, + "learning_rate": 4.935592548553309e-07, + "logits/chosen": -1.3747793436050415, + "logits/rejected": -1.3640648126602173, + "logps/chosen": -127.38607788085938, + "logps/rejected": -107.25726318359375, + "loss": 0.4574, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.024099163711071014, + "rewards/margins": 1.0369360446929932, + "rewards/rejected": -1.0128369331359863, + "step": 626 + }, + { + "epoch": 1.01, + "learning_rate": 4.934601664684898e-07, + "logits/chosen": -1.4290785789489746, + "logits/rejected": -1.4127848148345947, + "logps/chosen": -85.94258880615234, + "logps/rejected": -93.57180786132812, + "loss": 0.4062, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08154525607824326, + "rewards/margins": 0.78749680519104, + "rewards/rejected": -0.8690420389175415, + "step": 627 + }, + { + "epoch": 1.01, + "learning_rate": 4.933610780816487e-07, + "logits/chosen": -1.1691243648529053, + "logits/rejected": -1.1604645252227783, + "logps/chosen": -83.20337677001953, + "logps/rejected": -83.00849151611328, + "loss": 0.3516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2836381793022156, + "rewards/margins": 0.9041110873222351, + "rewards/rejected": -0.6204729080200195, + "step": 628 + }, + { + "epoch": 1.01, + "learning_rate": 4.932619896948078e-07, + "logits/chosen": -1.3732610940933228, + "logits/rejected": -1.4009371995925903, + "logps/chosen": -82.5809555053711, + "logps/rejected": -114.51441192626953, + "loss": 0.3926, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.18428926169872284, + "rewards/margins": 0.7399064302444458, + "rewards/rejected": -0.5556171536445618, + "step": 629 + }, + { + "epoch": 1.01, + "learning_rate": 4.931629013079667e-07, + "logits/chosen": -1.0174378156661987, + "logits/rejected": -0.9966703057289124, + "logps/chosen": -77.53195190429688, + "logps/rejected": -89.54998016357422, + "loss": 0.5007, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07600125670433044, + "rewards/margins": 0.30136364698410034, + "rewards/rejected": -0.3773649334907532, + "step": 630 + }, + { + "epoch": 1.01, + "learning_rate": 4.930638129211257e-07, + "logits/chosen": -1.2752962112426758, + "logits/rejected": -1.3282191753387451, + "logps/chosen": -74.81553649902344, + "logps/rejected": -70.57725524902344, + "loss": 0.3933, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.012401953339576721, + "rewards/margins": 0.6531705856323242, + "rewards/rejected": -0.6655725836753845, + "step": 631 + }, + { + "epoch": 1.01, + "learning_rate": 4.929647245342846e-07, + "logits/chosen": -1.1984384059906006, + "logits/rejected": -1.2730480432510376, + "logps/chosen": -67.83452606201172, + "logps/rejected": -85.81764221191406, + "loss": 0.4048, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1890590488910675, + "rewards/margins": 1.2360830307006836, + "rewards/rejected": -1.0470240116119385, + "step": 632 + }, + { + "epoch": 1.02, + "learning_rate": 4.928656361474435e-07, + "logits/chosen": -1.2535192966461182, + "logits/rejected": -1.2146508693695068, + "logps/chosen": -76.38894653320312, + "logps/rejected": -83.81744384765625, + "loss": 0.5053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008367255330085754, + "rewards/margins": 1.4612643718719482, + "rewards/rejected": -1.469631552696228, + "step": 633 + }, + { + "epoch": 1.02, + "learning_rate": 4.927665477606024e-07, + "logits/chosen": -1.3400647640228271, + "logits/rejected": -1.249745488166809, + "logps/chosen": -83.00265502929688, + "logps/rejected": -89.74468231201172, + "loss": 0.3128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0014201141893863678, + "rewards/margins": 1.8063328266143799, + "rewards/rejected": -1.8049126863479614, + "step": 634 + }, + { + "epoch": 1.02, + "learning_rate": 4.926674593737614e-07, + "logits/chosen": -1.2703955173492432, + "logits/rejected": -1.246978998184204, + "logps/chosen": -72.07546997070312, + "logps/rejected": -92.34222412109375, + "loss": 0.4488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39483940601348877, + "rewards/margins": 1.74713134765625, + "rewards/rejected": -1.3522919416427612, + "step": 635 + }, + { + "epoch": 1.02, + "learning_rate": 4.925683709869203e-07, + "logits/chosen": -1.2539812326431274, + "logits/rejected": -1.2792292833328247, + "logps/chosen": -92.83717346191406, + "logps/rejected": -88.76817321777344, + "loss": 0.5116, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2486865222454071, + "rewards/margins": 0.9229283332824707, + "rewards/rejected": -1.1716147661209106, + "step": 636 + }, + { + "epoch": 1.02, + "learning_rate": 4.924692826000792e-07, + "logits/chosen": -1.188881278038025, + "logits/rejected": -1.2930753231048584, + "logps/chosen": -93.86711120605469, + "logps/rejected": -114.76409912109375, + "loss": 0.3959, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5074115991592407, + "rewards/margins": 0.9505544900894165, + "rewards/rejected": -1.4579660892486572, + "step": 637 + }, + { + "epoch": 1.02, + "learning_rate": 4.923701942132381e-07, + "logits/chosen": -1.2970342636108398, + "logits/rejected": -1.3914799690246582, + "logps/chosen": -71.81866455078125, + "logps/rejected": -85.15555572509766, + "loss": 0.3697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06648483872413635, + "rewards/margins": 1.2776933908462524, + "rewards/rejected": -1.3441780805587769, + "step": 638 + }, + { + "epoch": 1.03, + "learning_rate": 4.922711058263971e-07, + "logits/chosen": -1.4044644832611084, + "logits/rejected": -1.4443693161010742, + "logps/chosen": -85.48546600341797, + "logps/rejected": -113.81663513183594, + "loss": 0.4003, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14920464158058167, + "rewards/margins": 1.5218045711517334, + "rewards/rejected": -1.6710090637207031, + "step": 639 + }, + { + "epoch": 1.03, + "learning_rate": 4.92172017439556e-07, + "logits/chosen": -1.400816559791565, + "logits/rejected": -1.4438532590866089, + "logps/chosen": -80.0377197265625, + "logps/rejected": -99.41096496582031, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10934562236070633, + "rewards/margins": 1.4306946992874146, + "rewards/rejected": -1.3213491439819336, + "step": 640 + }, + { + "epoch": 1.03, + "learning_rate": 4.920729290527151e-07, + "logits/chosen": -1.279848337173462, + "logits/rejected": -1.2278246879577637, + "logps/chosen": -99.23127746582031, + "logps/rejected": -108.05778503417969, + "loss": 0.3572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013964645564556122, + "rewards/margins": 0.42387890815734863, + "rewards/rejected": -0.4378435015678406, + "step": 641 + }, + { + "epoch": 1.03, + "learning_rate": 4.91973840665874e-07, + "logits/chosen": -1.0680007934570312, + "logits/rejected": -1.0609631538391113, + "logps/chosen": -75.86782836914062, + "logps/rejected": -100.1400375366211, + "loss": 0.4444, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4477252960205078, + "rewards/margins": 0.3204069137573242, + "rewards/rejected": -0.768132209777832, + "step": 642 + }, + { + "epoch": 1.03, + "learning_rate": 4.918747522790329e-07, + "logits/chosen": -1.1777209043502808, + "logits/rejected": -1.2381162643432617, + "logps/chosen": -60.29715347290039, + "logps/rejected": -86.87454986572266, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20973452925682068, + "rewards/margins": 1.9457286596298218, + "rewards/rejected": -1.7359941005706787, + "step": 643 + }, + { + "epoch": 1.03, + "learning_rate": 4.917756638921918e-07, + "logits/chosen": -1.3921170234680176, + "logits/rejected": -1.38982355594635, + "logps/chosen": -107.87478637695312, + "logps/rejected": -119.84930419921875, + "loss": 0.4759, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.18342742323875427, + "rewards/margins": 1.3088815212249756, + "rewards/rejected": -1.4923089742660522, + "step": 644 + }, + { + "epoch": 1.04, + "learning_rate": 4.916765755053507e-07, + "logits/chosen": -1.4214887619018555, + "logits/rejected": -1.3831188678741455, + "logps/chosen": -87.8571548461914, + "logps/rejected": -115.83609008789062, + "loss": 0.3721, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.04127369821071625, + "rewards/margins": 1.2303071022033691, + "rewards/rejected": -1.271580696105957, + "step": 645 + }, + { + "epoch": 1.04, + "learning_rate": 4.915774871185097e-07, + "logits/chosen": -1.3584530353546143, + "logits/rejected": -1.3286323547363281, + "logps/chosen": -102.18888854980469, + "logps/rejected": -116.63023376464844, + "loss": 0.3835, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02864512801170349, + "rewards/margins": 1.191394567489624, + "rewards/rejected": -1.22003972530365, + "step": 646 + }, + { + "epoch": 1.04, + "learning_rate": 4.914783987316686e-07, + "logits/chosen": -1.2144412994384766, + "logits/rejected": -1.2142349481582642, + "logps/chosen": -70.0474853515625, + "logps/rejected": -70.82455444335938, + "loss": 0.5009, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4862457811832428, + "rewards/margins": 0.018204249441623688, + "rewards/rejected": -0.5044500827789307, + "step": 647 + }, + { + "epoch": 1.04, + "learning_rate": 4.913793103448275e-07, + "logits/chosen": -1.4385855197906494, + "logits/rejected": -1.4576252698898315, + "logps/chosen": -71.42688751220703, + "logps/rejected": -107.62899780273438, + "loss": 0.5393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.271390438079834, + "rewards/margins": 2.428725481033325, + "rewards/rejected": -2.157335042953491, + "step": 648 + }, + { + "epoch": 1.04, + "learning_rate": 4.912802219579865e-07, + "logits/chosen": -1.5344856977462769, + "logits/rejected": -1.4828660488128662, + "logps/chosen": -99.17671966552734, + "logps/rejected": -109.19749450683594, + "loss": 0.3673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36937353014945984, + "rewards/margins": 1.0891389846801758, + "rewards/rejected": -1.4585124254226685, + "step": 649 + }, + { + "epoch": 1.04, + "learning_rate": 4.911811335711454e-07, + "logits/chosen": -1.361491084098816, + "logits/rejected": -1.3482656478881836, + "logps/chosen": -73.83683013916016, + "logps/rejected": -90.90443420410156, + "loss": 0.3095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16256314516067505, + "rewards/margins": 2.009800434112549, + "rewards/rejected": -1.8472371101379395, + "step": 650 + }, + { + "epoch": 1.04, + "learning_rate": 4.910820451843043e-07, + "logits/chosen": -1.4432443380355835, + "logits/rejected": -1.5034996271133423, + "logps/chosen": -85.41169738769531, + "logps/rejected": -99.66044616699219, + "loss": 0.4121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3063993453979492, + "rewards/margins": 1.8867002725601196, + "rewards/rejected": -1.5803008079528809, + "step": 651 + }, + { + "epoch": 1.05, + "learning_rate": 4.909829567974634e-07, + "logits/chosen": -1.2041844129562378, + "logits/rejected": -1.2665297985076904, + "logps/chosen": -76.08789825439453, + "logps/rejected": -79.43038940429688, + "loss": 0.4398, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3352125883102417, + "rewards/margins": 0.8976166844367981, + "rewards/rejected": -1.2328293323516846, + "step": 652 + }, + { + "epoch": 1.05, + "learning_rate": 4.908838684106223e-07, + "logits/chosen": -1.3578230142593384, + "logits/rejected": -1.391003131866455, + "logps/chosen": -91.78827667236328, + "logps/rejected": -84.38101959228516, + "loss": 0.4628, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6770832538604736, + "rewards/margins": 0.312582403421402, + "rewards/rejected": -0.9896656274795532, + "step": 653 + }, + { + "epoch": 1.05, + "learning_rate": 4.907847800237812e-07, + "logits/chosen": -1.421005368232727, + "logits/rejected": -1.4379866123199463, + "logps/chosen": -82.587890625, + "logps/rejected": -108.2442626953125, + "loss": 0.3911, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9373767375946045, + "rewards/margins": 1.229305386543274, + "rewards/rejected": -2.166682004928589, + "step": 654 + }, + { + "epoch": 1.05, + "learning_rate": 4.906856916369401e-07, + "logits/chosen": -1.4695162773132324, + "logits/rejected": -1.4607484340667725, + "logps/chosen": -109.91849517822266, + "logps/rejected": -106.9648666381836, + "loss": 0.4322, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7354822158813477, + "rewards/margins": 1.1511354446411133, + "rewards/rejected": -1.886617660522461, + "step": 655 + }, + { + "epoch": 1.05, + "learning_rate": 4.905866032500991e-07, + "logits/chosen": -1.3564175367355347, + "logits/rejected": -1.3141993284225464, + "logps/chosen": -82.68637084960938, + "logps/rejected": -95.49859619140625, + "loss": 0.3537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15954247117042542, + "rewards/margins": 1.99716055393219, + "rewards/rejected": -2.156702995300293, + "step": 656 + }, + { + "epoch": 1.05, + "learning_rate": 4.90487514863258e-07, + "logits/chosen": -1.421112298965454, + "logits/rejected": -1.484668254852295, + "logps/chosen": -73.33814239501953, + "logps/rejected": -107.28533172607422, + "loss": 0.3578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32613545656204224, + "rewards/margins": 1.340680718421936, + "rewards/rejected": -1.014545202255249, + "step": 657 + }, + { + "epoch": 1.06, + "learning_rate": 4.903884264764169e-07, + "logits/chosen": -1.3598849773406982, + "logits/rejected": -1.2592506408691406, + "logps/chosen": -81.93537139892578, + "logps/rejected": -85.50978088378906, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09509792923927307, + "rewards/margins": 1.9154412746429443, + "rewards/rejected": -1.8203434944152832, + "step": 658 + }, + { + "epoch": 1.06, + "learning_rate": 4.902893380895759e-07, + "logits/chosen": -1.4744057655334473, + "logits/rejected": -1.4016293287277222, + "logps/chosen": -88.7152328491211, + "logps/rejected": -93.18165588378906, + "loss": 0.4269, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3904764652252197, + "rewards/margins": 0.2449941784143448, + "rewards/rejected": -0.6354706287384033, + "step": 659 + }, + { + "epoch": 1.06, + "learning_rate": 4.901902497027348e-07, + "logits/chosen": -1.4940810203552246, + "logits/rejected": -1.533150553703308, + "logps/chosen": -92.45540618896484, + "logps/rejected": -104.93388366699219, + "loss": 0.456, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16409960389137268, + "rewards/margins": 1.9249449968338013, + "rewards/rejected": -2.0890445709228516, + "step": 660 + }, + { + "epoch": 1.06, + "learning_rate": 4.900911613158937e-07, + "logits/chosen": -1.452685832977295, + "logits/rejected": -1.387607455253601, + "logps/chosen": -79.71768188476562, + "logps/rejected": -110.98922729492188, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08903427422046661, + "rewards/margins": 2.114292621612549, + "rewards/rejected": -2.0252583026885986, + "step": 661 + }, + { + "epoch": 1.06, + "learning_rate": 4.899920729290527e-07, + "logits/chosen": -1.307018518447876, + "logits/rejected": -1.2686035633087158, + "logps/chosen": -80.6418685913086, + "logps/rejected": -113.55203247070312, + "loss": 0.4011, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06917839497327805, + "rewards/margins": 1.9063173532485962, + "rewards/rejected": -1.9754958152770996, + "step": 662 + }, + { + "epoch": 1.06, + "learning_rate": 4.898929845422117e-07, + "logits/chosen": -1.3047620058059692, + "logits/rejected": -1.375331163406372, + "logps/chosen": -93.12171173095703, + "logps/rejected": -87.462158203125, + "loss": 0.4294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5945455431938171, + "rewards/margins": 1.3139636516571045, + "rewards/rejected": -0.7194180488586426, + "step": 663 + }, + { + "epoch": 1.07, + "learning_rate": 4.897938961553706e-07, + "logits/chosen": -1.4076361656188965, + "logits/rejected": -1.4384138584136963, + "logps/chosen": -68.136474609375, + "logps/rejected": -91.2838134765625, + "loss": 0.3755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08144272863864899, + "rewards/margins": 0.995979905128479, + "rewards/rejected": -0.9145371913909912, + "step": 664 + }, + { + "epoch": 1.07, + "learning_rate": 4.896948077685295e-07, + "logits/chosen": -1.3898680210113525, + "logits/rejected": -1.3664714097976685, + "logps/chosen": -75.07269287109375, + "logps/rejected": -97.89827728271484, + "loss": 0.4304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.367051899433136, + "rewards/margins": 0.6964758038520813, + "rewards/rejected": -0.3294239044189453, + "step": 665 + }, + { + "epoch": 1.07, + "learning_rate": 4.895957193816884e-07, + "logits/chosen": -1.252596378326416, + "logits/rejected": -1.2643063068389893, + "logps/chosen": -86.65847778320312, + "logps/rejected": -91.89356994628906, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4718170166015625, + "rewards/margins": 1.935595989227295, + "rewards/rejected": -1.4637789726257324, + "step": 666 + }, + { + "epoch": 1.07, + "learning_rate": 4.894966309948474e-07, + "logits/chosen": -1.3138413429260254, + "logits/rejected": -1.3359873294830322, + "logps/chosen": -63.763797760009766, + "logps/rejected": -105.08572387695312, + "loss": 0.4126, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3451036214828491, + "rewards/margins": 1.1234796047210693, + "rewards/rejected": -0.7783759832382202, + "step": 667 + }, + { + "epoch": 1.07, + "learning_rate": 4.893975426080063e-07, + "logits/chosen": -1.3729325532913208, + "logits/rejected": -1.370099425315857, + "logps/chosen": -76.80646514892578, + "logps/rejected": -88.10457611083984, + "loss": 0.4534, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.47632887959480286, + "rewards/margins": 1.4175050258636475, + "rewards/rejected": -0.941176176071167, + "step": 668 + }, + { + "epoch": 1.07, + "learning_rate": 4.892984542211653e-07, + "logits/chosen": -1.577545166015625, + "logits/rejected": -1.4969347715377808, + "logps/chosen": -97.56498718261719, + "logps/rejected": -98.31114959716797, + "loss": 0.3843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1538480669260025, + "rewards/margins": 1.37328040599823, + "rewards/rejected": -1.2194322347640991, + "step": 669 + }, + { + "epoch": 1.08, + "learning_rate": 4.891993658343242e-07, + "logits/chosen": -1.3017845153808594, + "logits/rejected": -1.3382878303527832, + "logps/chosen": -119.81527709960938, + "logps/rejected": -99.70899963378906, + "loss": 0.3721, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1655813306570053, + "rewards/margins": 0.6841763854026794, + "rewards/rejected": -0.8497576713562012, + "step": 670 + }, + { + "epoch": 1.08, + "learning_rate": 4.891002774474831e-07, + "logits/chosen": -1.4028246402740479, + "logits/rejected": -1.3671778440475464, + "logps/chosen": -87.83415222167969, + "logps/rejected": -116.3924560546875, + "loss": 0.4801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7481651902198792, + "rewards/margins": 2.4451420307159424, + "rewards/rejected": -3.193307399749756, + "step": 671 + }, + { + "epoch": 1.08, + "learning_rate": 4.890011890606421e-07, + "logits/chosen": -1.4041634798049927, + "logits/rejected": -1.3903625011444092, + "logps/chosen": -85.5989990234375, + "logps/rejected": -103.05985260009766, + "loss": 0.3319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04308643937110901, + "rewards/margins": 1.465963363647461, + "rewards/rejected": -1.5090497732162476, + "step": 672 + }, + { + "epoch": 1.08, + "learning_rate": 4.88902100673801e-07, + "logits/chosen": -1.2173091173171997, + "logits/rejected": -1.2083362340927124, + "logps/chosen": -74.22984313964844, + "logps/rejected": -99.57096862792969, + "loss": 0.2718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19407843053340912, + "rewards/margins": 1.3219690322875977, + "rewards/rejected": -1.516047477722168, + "step": 673 + }, + { + "epoch": 1.08, + "learning_rate": 4.888030122869599e-07, + "logits/chosen": -1.3940848112106323, + "logits/rejected": -1.3909956216812134, + "logps/chosen": -87.79124450683594, + "logps/rejected": -93.30967712402344, + "loss": 0.4066, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3775016963481903, + "rewards/margins": 2.078174114227295, + "rewards/rejected": -1.7006725072860718, + "step": 674 + }, + { + "epoch": 1.08, + "learning_rate": 4.887039239001188e-07, + "logits/chosen": -1.1757735013961792, + "logits/rejected": -1.1456665992736816, + "logps/chosen": -64.52625274658203, + "logps/rejected": -74.41081237792969, + "loss": 0.3759, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.26755914092063904, + "rewards/margins": 0.6684736609458923, + "rewards/rejected": -0.4009144902229309, + "step": 675 + }, + { + "epoch": 1.09, + "learning_rate": 4.886048355132778e-07, + "logits/chosen": -1.4524002075195312, + "logits/rejected": -1.3943781852722168, + "logps/chosen": -106.05867004394531, + "logps/rejected": -111.13282775878906, + "loss": 0.5199, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.4834062457084656, + "rewards/margins": 0.5546987652778625, + "rewards/rejected": -1.0381050109863281, + "step": 676 + }, + { + "epoch": 1.09, + "learning_rate": 4.885057471264368e-07, + "logits/chosen": -1.3313775062561035, + "logits/rejected": -1.295607089996338, + "logps/chosen": -95.60672760009766, + "logps/rejected": -90.5439453125, + "loss": 0.4146, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.24198472499847412, + "rewards/margins": 0.6441235542297363, + "rewards/rejected": -0.8861082792282104, + "step": 677 + }, + { + "epoch": 1.09, + "learning_rate": 4.884066587395957e-07, + "logits/chosen": -1.4745087623596191, + "logits/rejected": -1.3941504955291748, + "logps/chosen": -110.55616760253906, + "logps/rejected": -112.32786560058594, + "loss": 0.4338, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1369665265083313, + "rewards/margins": 1.4547393321990967, + "rewards/rejected": -1.3177727460861206, + "step": 678 + }, + { + "epoch": 1.09, + "learning_rate": 4.883075703527547e-07, + "logits/chosen": -1.2815626859664917, + "logits/rejected": -1.2682147026062012, + "logps/chosen": -94.577392578125, + "logps/rejected": -87.65025329589844, + "loss": 0.5877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19889946281909943, + "rewards/margins": 1.7685933113098145, + "rewards/rejected": -1.569693922996521, + "step": 679 + }, + { + "epoch": 1.09, + "learning_rate": 4.882084819659136e-07, + "logits/chosen": -1.2902193069458008, + "logits/rejected": -1.3339675664901733, + "logps/chosen": -51.09904479980469, + "logps/rejected": -75.3345947265625, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4521670341491699, + "rewards/margins": 1.5573551654815674, + "rewards/rejected": -1.1051881313323975, + "step": 680 + }, + { + "epoch": 1.09, + "learning_rate": 4.881093935790725e-07, + "logits/chosen": -1.2462666034698486, + "logits/rejected": -1.2348511219024658, + "logps/chosen": -94.88175201416016, + "logps/rejected": -96.019287109375, + "loss": 0.4197, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2923596203327179, + "rewards/margins": 1.2718596458435059, + "rewards/rejected": -1.5642192363739014, + "step": 681 + }, + { + "epoch": 1.09, + "learning_rate": 4.880103051922315e-07, + "logits/chosen": -1.392362356185913, + "logits/rejected": -1.3957639932632446, + "logps/chosen": -86.64036560058594, + "logps/rejected": -114.23690795898438, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8077174425125122, + "rewards/margins": 3.3931140899658203, + "rewards/rejected": -2.5853967666625977, + "step": 682 + }, + { + "epoch": 1.1, + "learning_rate": 4.879112168053904e-07, + "logits/chosen": -1.217178463935852, + "logits/rejected": -1.2532049417495728, + "logps/chosen": -84.75413513183594, + "logps/rejected": -88.60211181640625, + "loss": 0.4016, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2749830484390259, + "rewards/margins": 0.44355031847953796, + "rewards/rejected": -0.16856727004051208, + "step": 683 + }, + { + "epoch": 1.1, + "learning_rate": 4.878121284185493e-07, + "logits/chosen": -1.4306700229644775, + "logits/rejected": -1.4092631340026855, + "logps/chosen": -91.0965576171875, + "logps/rejected": -120.12495422363281, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8886934518814087, + "rewards/margins": 2.7132325172424316, + "rewards/rejected": -1.8245391845703125, + "step": 684 + }, + { + "epoch": 1.1, + "learning_rate": 4.877130400317082e-07, + "logits/chosen": -1.256531000137329, + "logits/rejected": -1.2331968545913696, + "logps/chosen": -98.4859390258789, + "logps/rejected": -111.16522979736328, + "loss": 0.3377, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0002240985631942749, + "rewards/margins": 0.6413943767547607, + "rewards/rejected": -0.64117032289505, + "step": 685 + }, + { + "epoch": 1.1, + "learning_rate": 4.876139516448671e-07, + "logits/chosen": -1.4218237400054932, + "logits/rejected": -1.4343600273132324, + "logps/chosen": -74.59205627441406, + "logps/rejected": -100.2962646484375, + "loss": 0.4305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3101362884044647, + "rewards/margins": 2.4496498107910156, + "rewards/rejected": -2.1395134925842285, + "step": 686 + }, + { + "epoch": 1.1, + "learning_rate": 4.875148632580262e-07, + "logits/chosen": -1.176518201828003, + "logits/rejected": -1.215410590171814, + "logps/chosen": -98.8648681640625, + "logps/rejected": -92.96239471435547, + "loss": 0.5288, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.8273013830184937, + "rewards/margins": -0.33038386702537537, + "rewards/rejected": -0.49691757559776306, + "step": 687 + }, + { + "epoch": 1.1, + "learning_rate": 4.874157748711851e-07, + "logits/chosen": -1.3909225463867188, + "logits/rejected": -1.4012142419815063, + "logps/chosen": -71.40349578857422, + "logps/rejected": -101.88626098632812, + "loss": 0.5532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2450089305639267, + "rewards/margins": 2.2021284103393555, + "rewards/rejected": -1.9571192264556885, + "step": 688 + }, + { + "epoch": 1.11, + "learning_rate": 4.87316686484344e-07, + "logits/chosen": -1.3780250549316406, + "logits/rejected": -1.4211817979812622, + "logps/chosen": -75.49307250976562, + "logps/rejected": -124.76383972167969, + "loss": 0.4276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0431815385818481, + "rewards/margins": 3.264399528503418, + "rewards/rejected": -2.2212181091308594, + "step": 689 + }, + { + "epoch": 1.11, + "learning_rate": 4.87217598097503e-07, + "logits/chosen": -1.3870809078216553, + "logits/rejected": -1.3473308086395264, + "logps/chosen": -81.65586853027344, + "logps/rejected": -64.48933410644531, + "loss": 0.3918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19423332810401917, + "rewards/margins": 0.7103424072265625, + "rewards/rejected": -0.5161091089248657, + "step": 690 + }, + { + "epoch": 1.11, + "learning_rate": 4.871185097106619e-07, + "logits/chosen": -1.4294345378875732, + "logits/rejected": -1.4277026653289795, + "logps/chosen": -83.83518981933594, + "logps/rejected": -105.68745422363281, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7375525236129761, + "rewards/margins": 2.8126306533813477, + "rewards/rejected": -2.075078248977661, + "step": 691 + }, + { + "epoch": 1.11, + "learning_rate": 4.870194213238208e-07, + "logits/chosen": -1.3175357580184937, + "logits/rejected": -1.3599967956542969, + "logps/chosen": -75.39169311523438, + "logps/rejected": -116.61439514160156, + "loss": 0.2932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8548386693000793, + "rewards/margins": 2.965169906616211, + "rewards/rejected": -2.1103315353393555, + "step": 692 + }, + { + "epoch": 1.11, + "learning_rate": 4.869203329369798e-07, + "logits/chosen": -1.2648781538009644, + "logits/rejected": -1.2773479223251343, + "logps/chosen": -93.60340118408203, + "logps/rejected": -115.82379913330078, + "loss": 0.3383, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.385244756937027, + "rewards/margins": 1.4317095279693604, + "rewards/rejected": -1.816954255104065, + "step": 693 + }, + { + "epoch": 1.11, + "learning_rate": 4.868212445501387e-07, + "logits/chosen": -1.431262731552124, + "logits/rejected": -1.3091509342193604, + "logps/chosen": -76.77781677246094, + "logps/rejected": -95.88572692871094, + "loss": 0.5651, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17363770306110382, + "rewards/margins": 0.6252973079681396, + "rewards/rejected": -0.7989349961280823, + "step": 694 + }, + { + "epoch": 1.12, + "learning_rate": 4.867221561632976e-07, + "logits/chosen": -1.400439739227295, + "logits/rejected": -1.2975910902023315, + "logps/chosen": -108.42198181152344, + "logps/rejected": -76.13269805908203, + "loss": 0.4849, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06289122998714447, + "rewards/margins": 0.8940479755401611, + "rewards/rejected": -0.8311567306518555, + "step": 695 + }, + { + "epoch": 1.12, + "learning_rate": 4.866230677764565e-07, + "logits/chosen": -1.283737301826477, + "logits/rejected": -1.26528000831604, + "logps/chosen": -84.72105407714844, + "logps/rejected": -93.80245971679688, + "loss": 0.5532, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4540802240371704, + "rewards/margins": 0.8983899354934692, + "rewards/rejected": -1.3524702787399292, + "step": 696 + }, + { + "epoch": 1.12, + "learning_rate": 4.865239793896155e-07, + "logits/chosen": -1.3320884704589844, + "logits/rejected": -1.3314589262008667, + "logps/chosen": -95.37186431884766, + "logps/rejected": -122.7088851928711, + "loss": 0.4369, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2376115918159485, + "rewards/margins": 3.0371224880218506, + "rewards/rejected": -3.2747340202331543, + "step": 697 + }, + { + "epoch": 1.12, + "learning_rate": 4.864248910027744e-07, + "logits/chosen": -1.435495376586914, + "logits/rejected": -1.4480663537979126, + "logps/chosen": -82.41349792480469, + "logps/rejected": -107.73414611816406, + "loss": 0.4134, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08212928473949432, + "rewards/margins": 1.9673526287078857, + "rewards/rejected": -1.8852232694625854, + "step": 698 + }, + { + "epoch": 1.12, + "learning_rate": 4.863258026159334e-07, + "logits/chosen": -1.1918554306030273, + "logits/rejected": -1.1888558864593506, + "logps/chosen": -104.25653076171875, + "logps/rejected": -89.11030578613281, + "loss": 0.5198, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.39893800020217896, + "rewards/margins": -0.38930776715278625, + "rewards/rejected": -0.009630199521780014, + "step": 699 + }, + { + "epoch": 1.12, + "learning_rate": 4.862267142290924e-07, + "logits/chosen": -1.2214057445526123, + "logits/rejected": -1.1729865074157715, + "logps/chosen": -90.03568267822266, + "logps/rejected": -96.5575180053711, + "loss": 0.3764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17320403456687927, + "rewards/margins": 1.5838011503219604, + "rewards/rejected": -1.757005214691162, + "step": 700 + }, + { + "epoch": 1.13, + "learning_rate": 4.861276258422513e-07, + "logits/chosen": -1.2873765230178833, + "logits/rejected": -1.4059195518493652, + "logps/chosen": -50.244667053222656, + "logps/rejected": -96.05281829833984, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19809409976005554, + "rewards/margins": 2.324190855026245, + "rewards/rejected": -2.1260969638824463, + "step": 701 + }, + { + "epoch": 1.13, + "learning_rate": 4.860285374554102e-07, + "logits/chosen": -1.367733120918274, + "logits/rejected": -1.3186416625976562, + "logps/chosen": -86.34362030029297, + "logps/rejected": -78.70965576171875, + "loss": 0.3492, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.4689810872077942, + "rewards/margins": 0.11503283679485321, + "rewards/rejected": -0.5840139389038086, + "step": 702 + }, + { + "epoch": 1.13, + "learning_rate": 4.859294490685692e-07, + "logits/chosen": -1.2739040851593018, + "logits/rejected": -1.280039668083191, + "logps/chosen": -93.02059936523438, + "logps/rejected": -112.9046630859375, + "loss": 0.3919, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9253799319267273, + "rewards/margins": 0.37055206298828125, + "rewards/rejected": -1.2959319353103638, + "step": 703 + }, + { + "epoch": 1.13, + "learning_rate": 4.858303606817281e-07, + "logits/chosen": -1.3368730545043945, + "logits/rejected": -1.403881549835205, + "logps/chosen": -85.17813110351562, + "logps/rejected": -85.52774047851562, + "loss": 0.521, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4710594415664673, + "rewards/margins": 0.19951912760734558, + "rewards/rejected": -0.6705785989761353, + "step": 704 + }, + { + "epoch": 1.13, + "learning_rate": 4.85731272294887e-07, + "logits/chosen": -1.4163870811462402, + "logits/rejected": -1.5107501745224, + "logps/chosen": -69.57830810546875, + "logps/rejected": -98.72921752929688, + "loss": 0.3689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27763184905052185, + "rewards/margins": 1.5279160737991333, + "rewards/rejected": -1.250284194946289, + "step": 705 + }, + { + "epoch": 1.13, + "learning_rate": 4.856321839080459e-07, + "logits/chosen": -1.386713981628418, + "logits/rejected": -1.3277173042297363, + "logps/chosen": -78.09333038330078, + "logps/rejected": -109.55380249023438, + "loss": 0.2667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12456148862838745, + "rewards/margins": 3.0154080390930176, + "rewards/rejected": -2.8908462524414062, + "step": 706 + }, + { + "epoch": 1.13, + "learning_rate": 4.855330955212049e-07, + "logits/chosen": -1.5303404331207275, + "logits/rejected": -1.4563004970550537, + "logps/chosen": -80.1597900390625, + "logps/rejected": -73.34475708007812, + "loss": 0.3769, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.30568239092826843, + "rewards/margins": 1.245052456855774, + "rewards/rejected": -0.9393701553344727, + "step": 707 + }, + { + "epoch": 1.14, + "learning_rate": 4.854340071343638e-07, + "logits/chosen": -1.3940812349319458, + "logits/rejected": -1.4239314794540405, + "logps/chosen": -79.12136840820312, + "logps/rejected": -128.3715057373047, + "loss": 0.3562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12827616930007935, + "rewards/margins": 3.0281131267547607, + "rewards/rejected": -3.156388998031616, + "step": 708 + }, + { + "epoch": 1.14, + "learning_rate": 4.853349187475227e-07, + "logits/chosen": -1.3046540021896362, + "logits/rejected": -1.2640495300292969, + "logps/chosen": -82.0637435913086, + "logps/rejected": -120.984619140625, + "loss": 0.4317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2928811013698578, + "rewards/margins": 1.5113428831100464, + "rewards/rejected": -1.8042240142822266, + "step": 709 + }, + { + "epoch": 1.14, + "learning_rate": 4.852358303606818e-07, + "logits/chosen": -1.2987786531448364, + "logits/rejected": -1.2881207466125488, + "logps/chosen": -73.8106460571289, + "logps/rejected": -87.80175018310547, + "loss": 0.3021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16426116228103638, + "rewards/margins": 1.7342331409454346, + "rewards/rejected": -1.569972038269043, + "step": 710 + }, + { + "epoch": 1.14, + "learning_rate": 4.851367419738407e-07, + "logits/chosen": -1.2424931526184082, + "logits/rejected": -1.2251152992248535, + "logps/chosen": -70.90630340576172, + "logps/rejected": -89.6545639038086, + "loss": 0.3857, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.30844318866729736, + "rewards/margins": 0.7844092845916748, + "rewards/rejected": -0.47596609592437744, + "step": 711 + }, + { + "epoch": 1.14, + "learning_rate": 4.850376535869996e-07, + "logits/chosen": -1.456213355064392, + "logits/rejected": -1.458816647529602, + "logps/chosen": -89.06340026855469, + "logps/rejected": -99.77567291259766, + "loss": 0.4287, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.005985448136925697, + "rewards/margins": 0.8525654077529907, + "rewards/rejected": -0.8465799689292908, + "step": 712 + }, + { + "epoch": 1.14, + "learning_rate": 4.849385652001585e-07, + "logits/chosen": -1.5520946979522705, + "logits/rejected": -1.5076453685760498, + "logps/chosen": -78.24342346191406, + "logps/rejected": -76.43972778320312, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6748777627944946, + "rewards/margins": 0.9726927280426025, + "rewards/rejected": -0.2978150248527527, + "step": 713 + }, + { + "epoch": 1.15, + "learning_rate": 4.848394768133175e-07, + "logits/chosen": -1.3569817543029785, + "logits/rejected": -1.497166395187378, + "logps/chosen": -64.95269775390625, + "logps/rejected": -121.00613403320312, + "loss": 0.3733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25779610872268677, + "rewards/margins": 2.6353683471679688, + "rewards/rejected": -2.3775722980499268, + "step": 714 + }, + { + "epoch": 1.15, + "learning_rate": 4.847403884264764e-07, + "logits/chosen": -1.472232699394226, + "logits/rejected": -1.4263136386871338, + "logps/chosen": -80.36674499511719, + "logps/rejected": -94.16943359375, + "loss": 0.2439, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.027488499879837036, + "rewards/margins": 2.399116039276123, + "rewards/rejected": -2.3716275691986084, + "step": 715 + }, + { + "epoch": 1.15, + "learning_rate": 4.846413000396353e-07, + "logits/chosen": -1.2371050119400024, + "logits/rejected": -1.3675442934036255, + "logps/chosen": -90.00377655029297, + "logps/rejected": -112.8480224609375, + "loss": 0.3803, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.35150033235549927, + "rewards/margins": 1.3933682441711426, + "rewards/rejected": -1.041867971420288, + "step": 716 + }, + { + "epoch": 1.15, + "learning_rate": 4.845422116527943e-07, + "logits/chosen": -1.357757806777954, + "logits/rejected": -1.3562750816345215, + "logps/chosen": -86.84135437011719, + "logps/rejected": -90.60419464111328, + "loss": 0.3426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2319336235523224, + "rewards/margins": 2.5690128803253174, + "rewards/rejected": -2.3370792865753174, + "step": 717 + }, + { + "epoch": 1.15, + "learning_rate": 4.844431232659532e-07, + "logits/chosen": -1.2183332443237305, + "logits/rejected": -1.3116271495819092, + "logps/chosen": -84.6858139038086, + "logps/rejected": -136.48333740234375, + "loss": 0.4054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03499441593885422, + "rewards/margins": 0.6446696519851685, + "rewards/rejected": -0.6096751689910889, + "step": 718 + }, + { + "epoch": 1.15, + "learning_rate": 4.843440348791121e-07, + "logits/chosen": -1.2306610345840454, + "logits/rejected": -1.2731142044067383, + "logps/chosen": -83.11454772949219, + "logps/rejected": -108.62286376953125, + "loss": 0.36, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3523496687412262, + "rewards/margins": 2.2580440044403076, + "rewards/rejected": -1.9056943655014038, + "step": 719 + }, + { + "epoch": 1.16, + "learning_rate": 4.842449464922711e-07, + "logits/chosen": -1.4083362817764282, + "logits/rejected": -1.3653844594955444, + "logps/chosen": -76.98210144042969, + "logps/rejected": -90.14613342285156, + "loss": 0.3235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06422042846679688, + "rewards/margins": 1.6898572444915771, + "rewards/rejected": -1.7540777921676636, + "step": 720 + }, + { + "epoch": 1.16, + "learning_rate": 4.8414585810543e-07, + "logits/chosen": -1.3067749738693237, + "logits/rejected": -1.4802623987197876, + "logps/chosen": -81.7290267944336, + "logps/rejected": -85.93888854980469, + "loss": 0.425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1996994912624359, + "rewards/margins": 0.5706990361213684, + "rewards/rejected": -0.3709995150566101, + "step": 721 + }, + { + "epoch": 1.16, + "learning_rate": 4.84046769718589e-07, + "logits/chosen": -1.4746081829071045, + "logits/rejected": -1.416220784187317, + "logps/chosen": -86.321533203125, + "logps/rejected": -86.31310272216797, + "loss": 0.5195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39218634366989136, + "rewards/margins": 1.2685749530792236, + "rewards/rejected": -0.8763886094093323, + "step": 722 + }, + { + "epoch": 1.16, + "learning_rate": 4.839476813317479e-07, + "logits/chosen": -1.3782109022140503, + "logits/rejected": -1.3840434551239014, + "logps/chosen": -83.28592681884766, + "logps/rejected": -113.08039093017578, + "loss": 0.4118, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.03958795964717865, + "rewards/margins": 1.1366941928863525, + "rewards/rejected": -1.0971062183380127, + "step": 723 + }, + { + "epoch": 1.16, + "learning_rate": 4.838485929449068e-07, + "logits/chosen": -1.4512611627578735, + "logits/rejected": -1.4973119497299194, + "logps/chosen": -99.79325866699219, + "logps/rejected": -109.58123779296875, + "loss": 0.4482, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6222860813140869, + "rewards/margins": 0.7553225755691528, + "rewards/rejected": -1.3776085376739502, + "step": 724 + }, + { + "epoch": 1.16, + "learning_rate": 4.837495045580658e-07, + "logits/chosen": -1.3606019020080566, + "logits/rejected": -1.4003849029541016, + "logps/chosen": -80.1319351196289, + "logps/rejected": -93.48213958740234, + "loss": 0.4539, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.08363408595323563, + "rewards/margins": 1.8318675756454468, + "rewards/rejected": -1.7482335567474365, + "step": 725 + }, + { + "epoch": 1.17, + "learning_rate": 4.836504161712247e-07, + "logits/chosen": -1.26254403591156, + "logits/rejected": -1.254642367362976, + "logps/chosen": -77.28530883789062, + "logps/rejected": -97.5022964477539, + "loss": 0.2827, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15811175107955933, + "rewards/margins": 1.9200843572616577, + "rewards/rejected": -1.7619726657867432, + "step": 726 + }, + { + "epoch": 1.17, + "learning_rate": 4.835513277843836e-07, + "logits/chosen": -1.1635406017303467, + "logits/rejected": -1.285835862159729, + "logps/chosen": -95.50721740722656, + "logps/rejected": -100.75607299804688, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12072741985321045, + "rewards/margins": 1.947732925415039, + "rewards/rejected": -2.06846022605896, + "step": 727 + }, + { + "epoch": 1.17, + "learning_rate": 4.834522393975426e-07, + "logits/chosen": -1.3621854782104492, + "logits/rejected": -1.3827555179595947, + "logps/chosen": -81.45904541015625, + "logps/rejected": -118.23136901855469, + "loss": 0.2991, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.1145493984222412, + "rewards/margins": 1.8331549167633057, + "rewards/rejected": -2.947704553604126, + "step": 728 + }, + { + "epoch": 1.17, + "learning_rate": 4.833531510107015e-07, + "logits/chosen": -1.478434681892395, + "logits/rejected": -1.5006532669067383, + "logps/chosen": -90.63253021240234, + "logps/rejected": -95.31001281738281, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3823423385620117, + "rewards/margins": 1.9535698890686035, + "rewards/rejected": -1.5712274312973022, + "step": 729 + }, + { + "epoch": 1.17, + "learning_rate": 4.832540626238605e-07, + "logits/chosen": -1.4763373136520386, + "logits/rejected": -1.5043272972106934, + "logps/chosen": -87.62971496582031, + "logps/rejected": -98.61248779296875, + "loss": 0.3364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5257824659347534, + "rewards/margins": 1.0554237365722656, + "rewards/rejected": -1.581206202507019, + "step": 730 + }, + { + "epoch": 1.17, + "learning_rate": 4.831549742370194e-07, + "logits/chosen": -1.2278738021850586, + "logits/rejected": -1.2407901287078857, + "logps/chosen": -81.0696792602539, + "logps/rejected": -122.22711944580078, + "loss": 0.2992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10796594619750977, + "rewards/margins": 2.862574815750122, + "rewards/rejected": -2.754608631134033, + "step": 731 + }, + { + "epoch": 1.17, + "learning_rate": 4.830558858501783e-07, + "logits/chosen": -1.3226670026779175, + "logits/rejected": -1.2967493534088135, + "logps/chosen": -86.84967041015625, + "logps/rejected": -93.37415313720703, + "loss": 0.4527, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.40712404251098633, + "rewards/margins": 0.32398176193237305, + "rewards/rejected": -0.7311058044433594, + "step": 732 + }, + { + "epoch": 1.18, + "learning_rate": 4.829567974633372e-07, + "logits/chosen": -1.3872661590576172, + "logits/rejected": -1.375831127166748, + "logps/chosen": -83.81224060058594, + "logps/rejected": -114.14056396484375, + "loss": 0.2801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2387852817773819, + "rewards/margins": 1.705719232559204, + "rewards/rejected": -1.4669338464736938, + "step": 733 + }, + { + "epoch": 1.18, + "learning_rate": 4.828577090764962e-07, + "logits/chosen": -1.2816146612167358, + "logits/rejected": -1.2391327619552612, + "logps/chosen": -89.1363296508789, + "logps/rejected": -115.42158508300781, + "loss": 0.428, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3459809124469757, + "rewards/margins": 1.4279472827911377, + "rewards/rejected": -1.773928165435791, + "step": 734 + }, + { + "epoch": 1.18, + "learning_rate": 4.827586206896552e-07, + "logits/chosen": -1.1272742748260498, + "logits/rejected": -1.1582305431365967, + "logps/chosen": -104.31429290771484, + "logps/rejected": -103.746826171875, + "loss": 0.317, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.6392549872398376, + "rewards/margins": 1.7178443670272827, + "rewards/rejected": -1.0785894393920898, + "step": 735 + }, + { + "epoch": 1.18, + "learning_rate": 4.826595323028141e-07, + "logits/chosen": -1.1992348432540894, + "logits/rejected": -1.1813983917236328, + "logps/chosen": -80.00708770751953, + "logps/rejected": -96.01605224609375, + "loss": 0.4723, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.6706594824790955, + "rewards/margins": 0.2200649231672287, + "rewards/rejected": -0.8907244205474854, + "step": 736 + }, + { + "epoch": 1.18, + "learning_rate": 4.82560443915973e-07, + "logits/chosen": -1.4851752519607544, + "logits/rejected": -1.4014997482299805, + "logps/chosen": -76.55154418945312, + "logps/rejected": -79.50753021240234, + "loss": 0.4668, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.16261833906173706, + "rewards/margins": 0.7433182001113892, + "rewards/rejected": -0.9059365391731262, + "step": 737 + }, + { + "epoch": 1.18, + "learning_rate": 4.82461355529132e-07, + "logits/chosen": -1.4084041118621826, + "logits/rejected": -1.4284309148788452, + "logps/chosen": -70.0961685180664, + "logps/rejected": -94.36293029785156, + "loss": 0.3101, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.687903642654419, + "rewards/margins": 0.8425151705741882, + "rewards/rejected": -0.15461158752441406, + "step": 738 + }, + { + "epoch": 1.19, + "learning_rate": 4.823622671422909e-07, + "logits/chosen": -1.4665236473083496, + "logits/rejected": -1.4735379219055176, + "logps/chosen": -80.98249816894531, + "logps/rejected": -108.79353332519531, + "loss": 0.4748, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.03623943775892258, + "rewards/margins": 1.6640636920928955, + "rewards/rejected": -1.627824306488037, + "step": 739 + }, + { + "epoch": 1.19, + "learning_rate": 4.822631787554499e-07, + "logits/chosen": -1.637945294380188, + "logits/rejected": -1.5040910243988037, + "logps/chosen": -77.62777709960938, + "logps/rejected": -110.36424255371094, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11681614071130753, + "rewards/margins": 1.5027096271514893, + "rewards/rejected": -1.6195257902145386, + "step": 740 + }, + { + "epoch": 1.19, + "learning_rate": 4.821640903686088e-07, + "logits/chosen": -1.3603579998016357, + "logits/rejected": -1.4150307178497314, + "logps/chosen": -69.90480041503906, + "logps/rejected": -119.72610473632812, + "loss": 0.4112, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.39656582474708557, + "rewards/margins": 1.9443217515945435, + "rewards/rejected": -1.5477559566497803, + "step": 741 + }, + { + "epoch": 1.19, + "learning_rate": 4.820650019817677e-07, + "logits/chosen": -1.3286076784133911, + "logits/rejected": -1.3548285961151123, + "logps/chosen": -106.43323516845703, + "logps/rejected": -106.11851501464844, + "loss": 0.3127, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.34319305419921875, + "rewards/margins": 1.3729503154754639, + "rewards/rejected": -1.7161434888839722, + "step": 742 + }, + { + "epoch": 1.19, + "learning_rate": 4.819659135949266e-07, + "logits/chosen": -1.4347248077392578, + "logits/rejected": -1.4451441764831543, + "logps/chosen": -96.6881332397461, + "logps/rejected": -119.82936096191406, + "loss": 0.2813, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06824912875890732, + "rewards/margins": 0.9606689214706421, + "rewards/rejected": -0.8924198150634766, + "step": 743 + }, + { + "epoch": 1.19, + "learning_rate": 4.818668252080855e-07, + "logits/chosen": -1.213517189025879, + "logits/rejected": -1.183061122894287, + "logps/chosen": -70.6455078125, + "logps/rejected": -121.3418197631836, + "loss": 0.316, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.22858218848705292, + "rewards/margins": 1.4289774894714355, + "rewards/rejected": -1.2003952264785767, + "step": 744 + }, + { + "epoch": 1.2, + "learning_rate": 4.817677368212446e-07, + "logits/chosen": -1.3739423751831055, + "logits/rejected": -1.3826394081115723, + "logps/chosen": -94.8553695678711, + "logps/rejected": -111.91023254394531, + "loss": 0.4128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13024768233299255, + "rewards/margins": 3.171170949935913, + "rewards/rejected": -3.0409233570098877, + "step": 745 + }, + { + "epoch": 1.2, + "learning_rate": 4.816686484344035e-07, + "logits/chosen": -1.3604732751846313, + "logits/rejected": -1.3502367734909058, + "logps/chosen": -84.6005630493164, + "logps/rejected": -127.40678405761719, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25329896807670593, + "rewards/margins": 2.860900402069092, + "rewards/rejected": -2.6076014041900635, + "step": 746 + }, + { + "epoch": 1.2, + "learning_rate": 4.815695600475624e-07, + "logits/chosen": -1.4921693801879883, + "logits/rejected": -1.4455829858779907, + "logps/chosen": -93.55008697509766, + "logps/rejected": -117.99861907958984, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18700505793094635, + "rewards/margins": 1.9049981832504272, + "rewards/rejected": -1.717993140220642, + "step": 747 + }, + { + "epoch": 1.2, + "learning_rate": 4.814704716607214e-07, + "logits/chosen": -1.4955112934112549, + "logits/rejected": -1.516878604888916, + "logps/chosen": -104.51922607421875, + "logps/rejected": -114.1079330444336, + "loss": 0.4066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31188470125198364, + "rewards/margins": 2.3178582191467285, + "rewards/rejected": -2.6297428607940674, + "step": 748 + }, + { + "epoch": 1.2, + "learning_rate": 4.813713832738803e-07, + "logits/chosen": -1.5547142028808594, + "logits/rejected": -1.6513943672180176, + "logps/chosen": -70.39974975585938, + "logps/rejected": -100.32977294921875, + "loss": 0.2914, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.27557528018951416, + "rewards/margins": 1.2797244787216187, + "rewards/rejected": -1.004149317741394, + "step": 749 + }, + { + "epoch": 1.2, + "learning_rate": 4.812722948870392e-07, + "logits/chosen": -1.3909766674041748, + "logits/rejected": -1.3985364437103271, + "logps/chosen": -89.81668853759766, + "logps/rejected": -110.48096466064453, + "loss": 0.3856, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.041695237159729, + "rewards/margins": 0.54599928855896, + "rewards/rejected": -1.587694525718689, + "step": 750 + }, + { + "epoch": 1.21, + "learning_rate": 4.811732065001982e-07, + "logits/chosen": -1.3678425550460815, + "logits/rejected": -1.3341031074523926, + "logps/chosen": -85.22631072998047, + "logps/rejected": -88.07720947265625, + "loss": 0.3761, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10841922461986542, + "rewards/margins": 1.0902758836746216, + "rewards/rejected": -1.1986950635910034, + "step": 751 + }, + { + "epoch": 1.21, + "learning_rate": 4.810741181133571e-07, + "logits/chosen": -1.3009955883026123, + "logits/rejected": -1.3190722465515137, + "logps/chosen": -53.975914001464844, + "logps/rejected": -87.44203186035156, + "loss": 0.1972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13077707588672638, + "rewards/margins": 2.6516196727752686, + "rewards/rejected": -2.7823965549468994, + "step": 752 + }, + { + "epoch": 1.21, + "learning_rate": 4.80975029726516e-07, + "logits/chosen": -1.27962064743042, + "logits/rejected": -1.22838294506073, + "logps/chosen": -87.70652770996094, + "logps/rejected": -93.4171142578125, + "loss": 0.4433, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.08696909248828888, + "rewards/margins": 0.4725745618343353, + "rewards/rejected": -0.3856053948402405, + "step": 753 + }, + { + "epoch": 1.21, + "learning_rate": 4.808759413396749e-07, + "logits/chosen": -1.3934028148651123, + "logits/rejected": -1.393033742904663, + "logps/chosen": -123.2108154296875, + "logps/rejected": -137.65505981445312, + "loss": 0.4008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14924241602420807, + "rewards/margins": 0.3563644587993622, + "rewards/rejected": -0.5056068897247314, + "step": 754 + }, + { + "epoch": 1.21, + "learning_rate": 4.807768529528339e-07, + "logits/chosen": -1.3339588642120361, + "logits/rejected": -1.345149278640747, + "logps/chosen": -101.16690063476562, + "logps/rejected": -103.3846664428711, + "loss": 0.3155, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3908159732818604, + "rewards/margins": 0.6748300194740295, + "rewards/rejected": -2.065645933151245, + "step": 755 + }, + { + "epoch": 1.21, + "learning_rate": 4.806777645659928e-07, + "logits/chosen": -1.4270007610321045, + "logits/rejected": -1.4061098098754883, + "logps/chosen": -64.88216400146484, + "logps/rejected": -113.3449935913086, + "loss": 0.282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5908172726631165, + "rewards/margins": 1.6563498973846436, + "rewards/rejected": -1.0655326843261719, + "step": 756 + }, + { + "epoch": 1.22, + "learning_rate": 4.805786761791518e-07, + "logits/chosen": -1.4576385021209717, + "logits/rejected": -1.4271901845932007, + "logps/chosen": -102.80131530761719, + "logps/rejected": -102.6478042602539, + "loss": 0.3755, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.036965757608413696, + "rewards/margins": 1.8292450904846191, + "rewards/rejected": -1.8662108182907104, + "step": 757 + }, + { + "epoch": 1.22, + "learning_rate": 4.804795877923108e-07, + "logits/chosen": -1.4351778030395508, + "logits/rejected": -1.4006785154342651, + "logps/chosen": -91.8701171875, + "logps/rejected": -119.11117553710938, + "loss": 0.3684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8073354363441467, + "rewards/margins": 1.7560611963272095, + "rewards/rejected": -2.563396692276001, + "step": 758 + }, + { + "epoch": 1.22, + "learning_rate": 4.803804994054697e-07, + "logits/chosen": -1.441806674003601, + "logits/rejected": -1.3243894577026367, + "logps/chosen": -91.26377868652344, + "logps/rejected": -104.00052642822266, + "loss": 0.3093, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32737839221954346, + "rewards/margins": 1.348159670829773, + "rewards/rejected": -1.0207812786102295, + "step": 759 + }, + { + "epoch": 1.22, + "learning_rate": 4.802814110186286e-07, + "logits/chosen": -1.4768427610397339, + "logits/rejected": -1.5375205278396606, + "logps/chosen": -92.62492370605469, + "logps/rejected": -124.95454406738281, + "loss": 0.39, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32660216093063354, + "rewards/margins": 1.3024914264678955, + "rewards/rejected": -1.6290936470031738, + "step": 760 + }, + { + "epoch": 1.22, + "learning_rate": 4.801823226317876e-07, + "logits/chosen": -1.4000916481018066, + "logits/rejected": -1.3609226942062378, + "logps/chosen": -104.15169525146484, + "logps/rejected": -120.63566589355469, + "loss": 0.5661, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0361316204071045, + "rewards/margins": 1.9480665922164917, + "rewards/rejected": -3.9841980934143066, + "step": 761 + }, + { + "epoch": 1.22, + "learning_rate": 4.800832342449465e-07, + "logits/chosen": -1.428407907485962, + "logits/rejected": -1.4114431142807007, + "logps/chosen": -56.21022415161133, + "logps/rejected": -96.48658752441406, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4970060884952545, + "rewards/margins": 3.394813060760498, + "rewards/rejected": -2.8978071212768555, + "step": 762 + }, + { + "epoch": 1.22, + "learning_rate": 4.799841458581054e-07, + "logits/chosen": -1.4527724981307983, + "logits/rejected": -1.387725830078125, + "logps/chosen": -100.71971130371094, + "logps/rejected": -115.25833129882812, + "loss": 0.5031, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7372692227363586, + "rewards/margins": 1.4218025207519531, + "rewards/rejected": -2.159071922302246, + "step": 763 + }, + { + "epoch": 1.23, + "learning_rate": 4.798850574712643e-07, + "logits/chosen": -1.3154606819152832, + "logits/rejected": -1.3589729070663452, + "logps/chosen": -97.50381469726562, + "logps/rejected": -115.63888549804688, + "loss": 0.4125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12407875806093216, + "rewards/margins": 0.3485240936279297, + "rewards/rejected": -0.47260284423828125, + "step": 764 + }, + { + "epoch": 1.23, + "learning_rate": 4.797859690844232e-07, + "logits/chosen": -1.2987507581710815, + "logits/rejected": -1.298435091972351, + "logps/chosen": -75.32613372802734, + "logps/rejected": -96.35774230957031, + "loss": 0.3798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3852194845676422, + "rewards/margins": 1.3482251167297363, + "rewards/rejected": -0.9630056619644165, + "step": 765 + }, + { + "epoch": 1.23, + "learning_rate": 4.796868806975822e-07, + "logits/chosen": -1.399732232093811, + "logits/rejected": -1.440435767173767, + "logps/chosen": -89.42658996582031, + "logps/rejected": -86.78474426269531, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1284601241350174, + "rewards/margins": 1.3479983806610107, + "rewards/rejected": -1.4764585494995117, + "step": 766 + }, + { + "epoch": 1.23, + "learning_rate": 4.795877923107411e-07, + "logits/chosen": -1.3685798645019531, + "logits/rejected": -1.4169092178344727, + "logps/chosen": -66.56224060058594, + "logps/rejected": -79.65753173828125, + "loss": 0.3493, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2497176229953766, + "rewards/margins": 0.7261520624160767, + "rewards/rejected": -0.4764344394207001, + "step": 767 + }, + { + "epoch": 1.23, + "learning_rate": 4.794887039239001e-07, + "logits/chosen": -1.3172869682312012, + "logits/rejected": -1.3385329246520996, + "logps/chosen": -73.50481414794922, + "logps/rejected": -106.94889831542969, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.029346227645874, + "rewards/margins": 1.350346565246582, + "rewards/rejected": -2.379692792892456, + "step": 768 + }, + { + "epoch": 1.23, + "learning_rate": 4.793896155370591e-07, + "logits/chosen": -1.2399847507476807, + "logits/rejected": -1.2732670307159424, + "logps/chosen": -82.31910705566406, + "logps/rejected": -112.87742614746094, + "loss": 0.3784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09287625551223755, + "rewards/margins": 2.260348320007324, + "rewards/rejected": -2.353224277496338, + "step": 769 + }, + { + "epoch": 1.24, + "learning_rate": 4.79290527150218e-07, + "logits/chosen": -1.49796462059021, + "logits/rejected": -1.391014575958252, + "logps/chosen": -85.11933898925781, + "logps/rejected": -112.528076171875, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12838402390480042, + "rewards/margins": 3.7660458087921143, + "rewards/rejected": -3.8944296836853027, + "step": 770 + }, + { + "epoch": 1.24, + "learning_rate": 4.791914387633769e-07, + "logits/chosen": -1.401871681213379, + "logits/rejected": -1.3404828310012817, + "logps/chosen": -105.64678955078125, + "logps/rejected": -113.98499298095703, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8528120517730713, + "rewards/margins": 0.8839074969291687, + "rewards/rejected": -1.7367196083068848, + "step": 771 + }, + { + "epoch": 1.24, + "learning_rate": 4.790923503765359e-07, + "logits/chosen": -1.4404196739196777, + "logits/rejected": -1.4589464664459229, + "logps/chosen": -125.09049987792969, + "logps/rejected": -104.7862777709961, + "loss": 0.3652, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3844785988330841, + "rewards/margins": 0.9573222398757935, + "rewards/rejected": -1.3418008089065552, + "step": 772 + }, + { + "epoch": 1.24, + "learning_rate": 4.789932619896948e-07, + "logits/chosen": -1.568264126777649, + "logits/rejected": -1.5459086894989014, + "logps/chosen": -82.12254333496094, + "logps/rejected": -100.47122192382812, + "loss": 0.4974, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.10537920892238617, + "rewards/margins": 1.7193565368652344, + "rewards/rejected": -1.613977313041687, + "step": 773 + }, + { + "epoch": 1.24, + "learning_rate": 4.788941736028537e-07, + "logits/chosen": -1.446671962738037, + "logits/rejected": -1.472130537033081, + "logps/chosen": -96.64337921142578, + "logps/rejected": -119.39864349365234, + "loss": 0.3869, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2387334108352661, + "rewards/margins": 1.7509739398956299, + "rewards/rejected": -2.9897074699401855, + "step": 774 + }, + { + "epoch": 1.24, + "learning_rate": 4.787950852160126e-07, + "logits/chosen": -1.2101062536239624, + "logits/rejected": -1.1693065166473389, + "logps/chosen": -88.53765106201172, + "logps/rejected": -89.48745727539062, + "loss": 0.4579, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.019217725843191147, + "rewards/margins": 1.8564460277557373, + "rewards/rejected": -1.8756637573242188, + "step": 775 + }, + { + "epoch": 1.25, + "learning_rate": 4.786959968291716e-07, + "logits/chosen": -1.3010807037353516, + "logits/rejected": -1.3451924324035645, + "logps/chosen": -91.60025024414062, + "logps/rejected": -106.26217651367188, + "loss": 0.3164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031602099537849426, + "rewards/margins": 2.002230644226074, + "rewards/rejected": -1.9706284999847412, + "step": 776 + }, + { + "epoch": 1.25, + "learning_rate": 4.785969084423305e-07, + "logits/chosen": -1.4144748449325562, + "logits/rejected": -1.3029708862304688, + "logps/chosen": -92.47920989990234, + "logps/rejected": -105.9443130493164, + "loss": 0.4098, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.5129703879356384, + "rewards/margins": 0.44194185733795166, + "rewards/rejected": 0.07102852314710617, + "step": 777 + }, + { + "epoch": 1.25, + "learning_rate": 4.784978200554895e-07, + "logits/chosen": -1.6324808597564697, + "logits/rejected": -1.603705644607544, + "logps/chosen": -100.6744613647461, + "logps/rejected": -106.129150390625, + "loss": 0.4851, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9775441884994507, + "rewards/margins": 1.110002040863037, + "rewards/rejected": -2.0875461101531982, + "step": 778 + }, + { + "epoch": 1.25, + "learning_rate": 4.783987316686484e-07, + "logits/chosen": -1.4224759340286255, + "logits/rejected": -1.3920093774795532, + "logps/chosen": -108.40132141113281, + "logps/rejected": -145.1244354248047, + "loss": 0.3878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19152098894119263, + "rewards/margins": 3.070927381515503, + "rewards/rejected": -2.879406690597534, + "step": 779 + }, + { + "epoch": 1.25, + "learning_rate": 4.782996432818073e-07, + "logits/chosen": -1.336031436920166, + "logits/rejected": -1.286874532699585, + "logps/chosen": -79.22531127929688, + "logps/rejected": -83.72010803222656, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5669018030166626, + "rewards/margins": 1.361464500427246, + "rewards/rejected": -0.7945627570152283, + "step": 780 + }, + { + "epoch": 1.25, + "learning_rate": 4.782005548949663e-07, + "logits/chosen": -1.3366363048553467, + "logits/rejected": -1.3390461206436157, + "logps/chosen": -111.70911407470703, + "logps/rejected": -119.69509887695312, + "loss": 0.4836, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.3170236647129059, + "rewards/margins": 1.4456250667572021, + "rewards/rejected": -1.1286015510559082, + "step": 781 + }, + { + "epoch": 1.26, + "learning_rate": 4.781014665081253e-07, + "logits/chosen": -1.2851723432540894, + "logits/rejected": -1.2348060607910156, + "logps/chosen": -114.98124694824219, + "logps/rejected": -146.65357971191406, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5482298135757446, + "rewards/margins": 2.464938163757324, + "rewards/rejected": -3.0131678581237793, + "step": 782 + }, + { + "epoch": 1.26, + "learning_rate": 4.780023781212842e-07, + "logits/chosen": -1.3472552299499512, + "logits/rejected": -1.3802173137664795, + "logps/chosen": -79.37167358398438, + "logps/rejected": -90.9935302734375, + "loss": 0.4883, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3286859691143036, + "rewards/margins": 0.35907477140426636, + "rewards/rejected": -0.6877607107162476, + "step": 783 + }, + { + "epoch": 1.26, + "learning_rate": 4.779032897344431e-07, + "logits/chosen": -1.3687454462051392, + "logits/rejected": -1.3064881563186646, + "logps/chosen": -67.77781677246094, + "logps/rejected": -91.71720123291016, + "loss": 0.5268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24964535236358643, + "rewards/margins": 1.000247836112976, + "rewards/rejected": -0.7506026029586792, + "step": 784 + }, + { + "epoch": 1.26, + "learning_rate": 4.77804201347602e-07, + "logits/chosen": -1.2812796831130981, + "logits/rejected": -1.290711760520935, + "logps/chosen": -80.43958282470703, + "logps/rejected": -94.7020263671875, + "loss": 0.5236, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04517107084393501, + "rewards/margins": 0.333005428314209, + "rewards/rejected": -0.2878343462944031, + "step": 785 + }, + { + "epoch": 1.26, + "learning_rate": 4.77705112960761e-07, + "logits/chosen": -1.2289416790008545, + "logits/rejected": -1.2923582792282104, + "logps/chosen": -95.5247802734375, + "logps/rejected": -114.56204986572266, + "loss": 0.3423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09593506157398224, + "rewards/margins": 3.24476957321167, + "rewards/rejected": -3.148834705352783, + "step": 786 + }, + { + "epoch": 1.26, + "learning_rate": 4.776060245739199e-07, + "logits/chosen": -1.3092231750488281, + "logits/rejected": -1.3618532419204712, + "logps/chosen": -98.99152374267578, + "logps/rejected": -138.1865997314453, + "loss": 0.433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01213989220559597, + "rewards/margins": 2.2375378608703613, + "rewards/rejected": -2.225397825241089, + "step": 787 + }, + { + "epoch": 1.26, + "learning_rate": 4.775069361870789e-07, + "logits/chosen": -1.476326823234558, + "logits/rejected": -1.5099215507507324, + "logps/chosen": -86.89228820800781, + "logps/rejected": -93.55575561523438, + "loss": 0.2251, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.8330244421958923, + "rewards/margins": 1.0462112426757812, + "rewards/rejected": -1.8792357444763184, + "step": 788 + }, + { + "epoch": 1.27, + "learning_rate": 4.774078478002378e-07, + "logits/chosen": -1.4523626565933228, + "logits/rejected": -1.3699564933776855, + "logps/chosen": -80.46049499511719, + "logps/rejected": -99.6191635131836, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1960269957780838, + "rewards/margins": 2.0651562213897705, + "rewards/rejected": -2.261183261871338, + "step": 789 + }, + { + "epoch": 1.27, + "learning_rate": 4.773087594133967e-07, + "logits/chosen": -1.428037405014038, + "logits/rejected": -1.3527019023895264, + "logps/chosen": -96.73248291015625, + "logps/rejected": -92.84661865234375, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11638011038303375, + "rewards/margins": 0.6474985480308533, + "rewards/rejected": -0.763878583908081, + "step": 790 + }, + { + "epoch": 1.27, + "learning_rate": 4.772096710265556e-07, + "logits/chosen": -1.5963654518127441, + "logits/rejected": -1.5859627723693848, + "logps/chosen": -85.25105285644531, + "logps/rejected": -118.58192443847656, + "loss": 0.3557, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.040251344442367554, + "rewards/margins": 1.8689073324203491, + "rewards/rejected": -1.9091588258743286, + "step": 791 + }, + { + "epoch": 1.27, + "learning_rate": 4.771105826397145e-07, + "logits/chosen": -1.3078620433807373, + "logits/rejected": -1.2893251180648804, + "logps/chosen": -87.25511932373047, + "logps/rejected": -100.44789123535156, + "loss": 0.3867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2798977792263031, + "rewards/margins": 1.7422631978988647, + "rewards/rejected": -2.0221610069274902, + "step": 792 + }, + { + "epoch": 1.27, + "learning_rate": 4.770114942528736e-07, + "logits/chosen": -1.4090783596038818, + "logits/rejected": -1.4879786968231201, + "logps/chosen": -67.404052734375, + "logps/rejected": -100.92681884765625, + "loss": 0.329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1649092733860016, + "rewards/margins": 1.3881254196166992, + "rewards/rejected": -1.5530345439910889, + "step": 793 + }, + { + "epoch": 1.27, + "learning_rate": 4.769124058660325e-07, + "logits/chosen": -1.4320560693740845, + "logits/rejected": -1.4101804494857788, + "logps/chosen": -84.84634399414062, + "logps/rejected": -102.83170318603516, + "loss": 0.2506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27835631370544434, + "rewards/margins": 2.1748709678649902, + "rewards/rejected": -1.8965147733688354, + "step": 794 + }, + { + "epoch": 1.28, + "learning_rate": 4.7681331747919143e-07, + "logits/chosen": -1.3854682445526123, + "logits/rejected": -1.4578721523284912, + "logps/chosen": -94.77523803710938, + "logps/rejected": -81.2493667602539, + "loss": 0.3842, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.55183345079422, + "rewards/margins": 0.6745489835739136, + "rewards/rejected": -1.2263824939727783, + "step": 795 + }, + { + "epoch": 1.28, + "learning_rate": 4.767142290923504e-07, + "logits/chosen": -1.3333821296691895, + "logits/rejected": -1.3415969610214233, + "logps/chosen": -94.12651824951172, + "logps/rejected": -84.15394592285156, + "loss": 0.4578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17549963295459747, + "rewards/margins": 2.398294448852539, + "rewards/rejected": -2.57379412651062, + "step": 796 + }, + { + "epoch": 1.28, + "learning_rate": 4.766151407055093e-07, + "logits/chosen": -1.2892152070999146, + "logits/rejected": -1.2784297466278076, + "logps/chosen": -95.19532012939453, + "logps/rejected": -88.51426696777344, + "loss": 0.4148, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.4015122354030609, + "rewards/margins": 0.2201913744211197, + "rewards/rejected": -0.6217036247253418, + "step": 797 + }, + { + "epoch": 1.28, + "learning_rate": 4.765160523186682e-07, + "logits/chosen": -1.4371483325958252, + "logits/rejected": -1.3843356370925903, + "logps/chosen": -94.23515319824219, + "logps/rejected": -100.93545532226562, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008195213973522186, + "rewards/margins": 0.8663699626922607, + "rewards/rejected": -0.8745651245117188, + "step": 798 + }, + { + "epoch": 1.28, + "learning_rate": 4.7641696393182716e-07, + "logits/chosen": -1.3883440494537354, + "logits/rejected": -1.328714370727539, + "logps/chosen": -91.39260864257812, + "logps/rejected": -101.96568298339844, + "loss": 0.3789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7243200540542603, + "rewards/margins": 2.4840056896209717, + "rewards/rejected": -3.2083256244659424, + "step": 799 + }, + { + "epoch": 1.28, + "learning_rate": 4.7631787554498607e-07, + "logits/chosen": -1.4376999139785767, + "logits/rejected": -1.4248183965682983, + "logps/chosen": -111.62651062011719, + "logps/rejected": -126.81134033203125, + "loss": 0.3822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5171934366226196, + "rewards/margins": 2.2485544681549072, + "rewards/rejected": -2.7657480239868164, + "step": 800 + }, + { + "epoch": 1.29, + "learning_rate": 4.762187871581451e-07, + "logits/chosen": -1.489363670349121, + "logits/rejected": -1.580581784248352, + "logps/chosen": -84.1202163696289, + "logps/rejected": -123.14726257324219, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4649463891983032, + "rewards/margins": 1.1847479343414307, + "rewards/rejected": -1.6496943235397339, + "step": 801 + }, + { + "epoch": 1.29, + "learning_rate": 4.76119698771304e-07, + "logits/chosen": -1.3916782140731812, + "logits/rejected": -1.3603460788726807, + "logps/chosen": -124.96878051757812, + "logps/rejected": -117.596435546875, + "loss": 0.2264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42084693908691406, + "rewards/margins": 1.2236419916152954, + "rewards/rejected": -1.6444889307022095, + "step": 802 + }, + { + "epoch": 1.29, + "learning_rate": 4.760206103844629e-07, + "logits/chosen": -1.5669660568237305, + "logits/rejected": -1.5764132738113403, + "logps/chosen": -80.19837951660156, + "logps/rejected": -101.46507263183594, + "loss": 0.4359, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21972599625587463, + "rewards/margins": 1.2959332466125488, + "rewards/rejected": -1.515659213066101, + "step": 803 + }, + { + "epoch": 1.29, + "learning_rate": 4.7592152199762185e-07, + "logits/chosen": -1.4628448486328125, + "logits/rejected": -1.5344501733779907, + "logps/chosen": -79.56920623779297, + "logps/rejected": -119.68403625488281, + "loss": 0.2247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05152454972267151, + "rewards/margins": 3.940316677093506, + "rewards/rejected": -3.888792037963867, + "step": 804 + }, + { + "epoch": 1.29, + "learning_rate": 4.7582243361078076e-07, + "logits/chosen": -1.3094149827957153, + "logits/rejected": -1.3560928106307983, + "logps/chosen": -94.40289306640625, + "logps/rejected": -127.07909393310547, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11940211802721024, + "rewards/margins": 3.2833023071289062, + "rewards/rejected": -3.1638998985290527, + "step": 805 + }, + { + "epoch": 1.29, + "learning_rate": 4.7572334522393977e-07, + "logits/chosen": -1.4151484966278076, + "logits/rejected": -1.46943998336792, + "logps/chosen": -81.46760559082031, + "logps/rejected": -118.49510192871094, + "loss": 0.4325, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13408049941062927, + "rewards/margins": 1.3974504470825195, + "rewards/rejected": -1.5315309762954712, + "step": 806 + }, + { + "epoch": 1.3, + "learning_rate": 4.756242568370987e-07, + "logits/chosen": -1.5905771255493164, + "logits/rejected": -1.5391255617141724, + "logps/chosen": -105.71222686767578, + "logps/rejected": -118.84329223632812, + "loss": 0.2266, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0375512838363647, + "rewards/margins": 2.1677026748657227, + "rewards/rejected": -3.205254077911377, + "step": 807 + }, + { + "epoch": 1.3, + "learning_rate": 4.755251684502576e-07, + "logits/chosen": -1.2241172790527344, + "logits/rejected": -1.2152931690216064, + "logps/chosen": -99.58531188964844, + "logps/rejected": -83.66168212890625, + "loss": 0.4019, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.31274864077568054, + "rewards/margins": 1.1582330465316772, + "rewards/rejected": -1.4709817171096802, + "step": 808 + }, + { + "epoch": 1.3, + "learning_rate": 4.7542608006341655e-07, + "logits/chosen": -1.3260515928268433, + "logits/rejected": -1.312002182006836, + "logps/chosen": -88.94812774658203, + "logps/rejected": -120.5614013671875, + "loss": 0.3252, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9283734560012817, + "rewards/margins": 0.5633834004402161, + "rewards/rejected": -1.4917569160461426, + "step": 809 + }, + { + "epoch": 1.3, + "learning_rate": 4.7532699167657545e-07, + "logits/chosen": -1.2858850955963135, + "logits/rejected": -1.2942931652069092, + "logps/chosen": -96.72728729248047, + "logps/rejected": -92.34442138671875, + "loss": 0.3519, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.870337724685669, + "rewards/margins": 0.4562051296234131, + "rewards/rejected": -1.326542854309082, + "step": 810 + }, + { + "epoch": 1.3, + "learning_rate": 4.7522790328973447e-07, + "logits/chosen": -1.4587318897247314, + "logits/rejected": -1.4320892095565796, + "logps/chosen": -85.21399688720703, + "logps/rejected": -120.8559341430664, + "loss": 0.4321, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17783355712890625, + "rewards/margins": 2.835641860961914, + "rewards/rejected": -3.0134754180908203, + "step": 811 + }, + { + "epoch": 1.3, + "learning_rate": 4.7512881490289337e-07, + "logits/chosen": -1.5556178092956543, + "logits/rejected": -1.5189120769500732, + "logps/chosen": -90.63643646240234, + "logps/rejected": -112.4111099243164, + "loss": 0.429, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0250928401947021, + "rewards/margins": 2.35780930519104, + "rewards/rejected": -3.3829023838043213, + "step": 812 + }, + { + "epoch": 1.3, + "learning_rate": 4.750297265160523e-07, + "logits/chosen": -1.4209907054901123, + "logits/rejected": -1.3812050819396973, + "logps/chosen": -97.9415054321289, + "logps/rejected": -124.76395416259766, + "loss": 0.2622, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6255830526351929, + "rewards/margins": 3.5239171981811523, + "rewards/rejected": -4.149499893188477, + "step": 813 + }, + { + "epoch": 1.31, + "learning_rate": 4.7493063812921124e-07, + "logits/chosen": -1.4316115379333496, + "logits/rejected": -1.4794877767562866, + "logps/chosen": -100.68930053710938, + "logps/rejected": -102.78739166259766, + "loss": 0.3869, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8867339491844177, + "rewards/margins": 1.838860273361206, + "rewards/rejected": -2.7255942821502686, + "step": 814 + }, + { + "epoch": 1.31, + "learning_rate": 4.7483154974237015e-07, + "logits/chosen": -1.349687933921814, + "logits/rejected": -1.2229033708572388, + "logps/chosen": -99.9083023071289, + "logps/rejected": -108.62406921386719, + "loss": 0.356, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.7291482090950012, + "rewards/margins": 0.8065196871757507, + "rewards/rejected": -1.535667896270752, + "step": 815 + }, + { + "epoch": 1.31, + "learning_rate": 4.7473246135552916e-07, + "logits/chosen": -1.3574572801589966, + "logits/rejected": -1.313169240951538, + "logps/chosen": -70.1456298828125, + "logps/rejected": -88.99125671386719, + "loss": 0.3236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9733759760856628, + "rewards/margins": 1.976133108139038, + "rewards/rejected": -2.9495091438293457, + "step": 816 + }, + { + "epoch": 1.31, + "learning_rate": 4.7463337296868807e-07, + "logits/chosen": -1.4330763816833496, + "logits/rejected": -1.4408726692199707, + "logps/chosen": -114.13641357421875, + "logps/rejected": -122.54708862304688, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7275932431221008, + "rewards/margins": 3.0821681022644043, + "rewards/rejected": -3.8097615242004395, + "step": 817 + }, + { + "epoch": 1.31, + "learning_rate": 4.7453428458184697e-07, + "logits/chosen": -1.4795721769332886, + "logits/rejected": -1.4657690525054932, + "logps/chosen": -99.6626968383789, + "logps/rejected": -104.28457641601562, + "loss": 0.4687, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.472090482711792, + "rewards/margins": 1.4098808765411377, + "rewards/rejected": -2.8819713592529297, + "step": 818 + }, + { + "epoch": 1.31, + "learning_rate": 4.7443519619500593e-07, + "logits/chosen": -1.4973992109298706, + "logits/rejected": -1.5581955909729004, + "logps/chosen": -104.68208312988281, + "logps/rejected": -133.37879943847656, + "loss": 0.4575, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1233278512954712, + "rewards/margins": 1.7829937934875488, + "rewards/rejected": -2.9063215255737305, + "step": 819 + }, + { + "epoch": 1.32, + "learning_rate": 4.7433610780816484e-07, + "logits/chosen": -1.4560948610305786, + "logits/rejected": -1.4262773990631104, + "logps/chosen": -99.50623321533203, + "logps/rejected": -126.90412902832031, + "loss": 0.3399, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.45924055576324463, + "rewards/margins": 3.5874392986297607, + "rewards/rejected": -4.046679973602295, + "step": 820 + }, + { + "epoch": 1.32, + "learning_rate": 4.742370194213238e-07, + "logits/chosen": -1.4778159856796265, + "logits/rejected": -1.4926472902297974, + "logps/chosen": -74.88053894042969, + "logps/rejected": -94.5679931640625, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6245787143707275, + "rewards/margins": 0.8575637340545654, + "rewards/rejected": -1.4821425676345825, + "step": 821 + }, + { + "epoch": 1.32, + "learning_rate": 4.7413793103448276e-07, + "logits/chosen": -1.427869439125061, + "logits/rejected": -1.5102508068084717, + "logps/chosen": -88.70858001708984, + "logps/rejected": -106.18560791015625, + "loss": 0.4032, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2300591468811035, + "rewards/margins": 2.059913158416748, + "rewards/rejected": -3.2899725437164307, + "step": 822 + }, + { + "epoch": 1.32, + "learning_rate": 4.7403884264764167e-07, + "logits/chosen": -1.4518811702728271, + "logits/rejected": -1.4276622533798218, + "logps/chosen": -98.82361602783203, + "logps/rejected": -102.69120788574219, + "loss": 0.2692, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9755848050117493, + "rewards/margins": 1.6921327114105225, + "rewards/rejected": -2.667717456817627, + "step": 823 + }, + { + "epoch": 1.32, + "learning_rate": 4.739397542608006e-07, + "logits/chosen": -1.5273962020874023, + "logits/rejected": -1.518336534500122, + "logps/chosen": -83.89656066894531, + "logps/rejected": -120.76232147216797, + "loss": 0.3174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40375664830207825, + "rewards/margins": 4.0516204833984375, + "rewards/rejected": -4.455377101898193, + "step": 824 + }, + { + "epoch": 1.32, + "learning_rate": 4.7384066587395953e-07, + "logits/chosen": -1.3329288959503174, + "logits/rejected": -1.3248867988586426, + "logps/chosen": -92.15515899658203, + "logps/rejected": -92.53300476074219, + "loss": 0.3749, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3596939146518707, + "rewards/margins": 0.8146473169326782, + "rewards/rejected": -1.1743412017822266, + "step": 825 + }, + { + "epoch": 1.33, + "learning_rate": 4.737415774871185e-07, + "logits/chosen": -1.4141978025436401, + "logits/rejected": -1.4220844507217407, + "logps/chosen": -85.89578247070312, + "logps/rejected": -105.0001449584961, + "loss": 0.4503, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0263237953186035, + "rewards/margins": 1.174168348312378, + "rewards/rejected": -2.2004921436309814, + "step": 826 + }, + { + "epoch": 1.33, + "learning_rate": 4.7364248910027745e-07, + "logits/chosen": -1.2557504177093506, + "logits/rejected": -1.3233401775360107, + "logps/chosen": -86.15840148925781, + "logps/rejected": -125.18978881835938, + "loss": 0.3324, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7103544473648071, + "rewards/margins": 1.6715140342712402, + "rewards/rejected": -2.381868362426758, + "step": 827 + }, + { + "epoch": 1.33, + "learning_rate": 4.7354340071343636e-07, + "logits/chosen": -1.5153026580810547, + "logits/rejected": -1.4926087856292725, + "logps/chosen": -86.0960693359375, + "logps/rejected": -102.19439697265625, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24425727128982544, + "rewards/margins": 1.7412840127944946, + "rewards/rejected": -1.9855413436889648, + "step": 828 + }, + { + "epoch": 1.33, + "learning_rate": 4.734443123265953e-07, + "logits/chosen": -1.4682657718658447, + "logits/rejected": -1.5269055366516113, + "logps/chosen": -89.58148193359375, + "logps/rejected": -123.80720520019531, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14977970719337463, + "rewards/margins": 3.2305266857147217, + "rewards/rejected": -3.08074688911438, + "step": 829 + }, + { + "epoch": 1.33, + "learning_rate": 4.733452239397542e-07, + "logits/chosen": -1.4432027339935303, + "logits/rejected": -1.4204756021499634, + "logps/chosen": -90.01183319091797, + "logps/rejected": -113.59738159179688, + "loss": 0.3498, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07716560363769531, + "rewards/margins": 2.3963987827301025, + "rewards/rejected": -2.3192331790924072, + "step": 830 + }, + { + "epoch": 1.33, + "learning_rate": 4.7324613555291313e-07, + "logits/chosen": -1.361814022064209, + "logits/rejected": -1.3850769996643066, + "logps/chosen": -108.47561645507812, + "logps/rejected": -137.21133422851562, + "loss": 0.4797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07563057541847229, + "rewards/margins": 2.7891335487365723, + "rewards/rejected": -2.8647642135620117, + "step": 831 + }, + { + "epoch": 1.34, + "learning_rate": 4.7314704716607214e-07, + "logits/chosen": -1.262779712677002, + "logits/rejected": -1.3241057395935059, + "logps/chosen": -95.19525909423828, + "logps/rejected": -104.71087646484375, + "loss": 0.3867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34044378995895386, + "rewards/margins": 0.7455928921699524, + "rewards/rejected": -1.0860366821289062, + "step": 832 + }, + { + "epoch": 1.34, + "learning_rate": 4.7304795877923105e-07, + "logits/chosen": -1.3406627178192139, + "logits/rejected": -1.2960156202316284, + "logps/chosen": -99.34365844726562, + "logps/rejected": -98.27256774902344, + "loss": 0.3434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1813381314277649, + "rewards/margins": 0.7208836674690247, + "rewards/rejected": -0.9022217988967896, + "step": 833 + }, + { + "epoch": 1.34, + "learning_rate": 4.7294887039239e-07, + "logits/chosen": -1.338902473449707, + "logits/rejected": -1.4231345653533936, + "logps/chosen": -98.46669006347656, + "logps/rejected": -107.97233581542969, + "loss": 0.3863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6137487292289734, + "rewards/margins": 1.6740732192993164, + "rewards/rejected": -2.2878217697143555, + "step": 834 + }, + { + "epoch": 1.34, + "learning_rate": 4.728497820055489e-07, + "logits/chosen": -1.3430230617523193, + "logits/rejected": -1.348879337310791, + "logps/chosen": -88.46080017089844, + "logps/rejected": -117.81168365478516, + "loss": 0.2051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6006889343261719, + "rewards/margins": 4.595475196838379, + "rewards/rejected": -5.196164608001709, + "step": 835 + }, + { + "epoch": 1.34, + "learning_rate": 4.727506936187078e-07, + "logits/chosen": -1.4564151763916016, + "logits/rejected": -1.3803036212921143, + "logps/chosen": -94.81198120117188, + "logps/rejected": -110.26583862304688, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01928987354040146, + "rewards/margins": 2.689044713973999, + "rewards/rejected": -2.7083346843719482, + "step": 836 + }, + { + "epoch": 1.34, + "learning_rate": 4.7265160523186684e-07, + "logits/chosen": -1.4067498445510864, + "logits/rejected": -1.4680211544036865, + "logps/chosen": -79.41033935546875, + "logps/rejected": -118.60881042480469, + "loss": 0.4621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9674829840660095, + "rewards/margins": 3.1892507076263428, + "rewards/rejected": -4.156733512878418, + "step": 837 + }, + { + "epoch": 1.35, + "learning_rate": 4.7255251684502574e-07, + "logits/chosen": -1.284503698348999, + "logits/rejected": -1.3900878429412842, + "logps/chosen": -80.24241638183594, + "logps/rejected": -128.83541870117188, + "loss": 0.212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18779459595680237, + "rewards/margins": 2.7223052978515625, + "rewards/rejected": -2.534510612487793, + "step": 838 + }, + { + "epoch": 1.35, + "learning_rate": 4.724534284581847e-07, + "logits/chosen": -1.1986953020095825, + "logits/rejected": -1.223322868347168, + "logps/chosen": -99.10812377929688, + "logps/rejected": -115.06632995605469, + "loss": 0.3217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5027133226394653, + "rewards/margins": 1.7482997179031372, + "rewards/rejected": -2.2510130405426025, + "step": 839 + }, + { + "epoch": 1.35, + "learning_rate": 4.723543400713436e-07, + "logits/chosen": -1.1942161321640015, + "logits/rejected": -1.2155028581619263, + "logps/chosen": -99.63776397705078, + "logps/rejected": -135.48985290527344, + "loss": 0.3249, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9701915979385376, + "rewards/margins": 1.749567985534668, + "rewards/rejected": -2.719759464263916, + "step": 840 + }, + { + "epoch": 1.35, + "learning_rate": 4.722552516845025e-07, + "logits/chosen": -1.3764268159866333, + "logits/rejected": -1.3348503112792969, + "logps/chosen": -84.84028625488281, + "logps/rejected": -101.60136413574219, + "loss": 0.3091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2655399441719055, + "rewards/margins": 1.7303149700164795, + "rewards/rejected": -1.4647752046585083, + "step": 841 + }, + { + "epoch": 1.35, + "learning_rate": 4.721561632976615e-07, + "logits/chosen": -1.3331114053726196, + "logits/rejected": -1.3470219373703003, + "logps/chosen": -104.82176208496094, + "logps/rejected": -126.66482543945312, + "loss": 0.337, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4801672101020813, + "rewards/margins": 3.138784170150757, + "rewards/rejected": -3.6189515590667725, + "step": 842 + }, + { + "epoch": 1.35, + "learning_rate": 4.7205707491082044e-07, + "logits/chosen": -1.4033527374267578, + "logits/rejected": -1.392135739326477, + "logps/chosen": -80.78723907470703, + "logps/rejected": -142.7035675048828, + "loss": 0.3103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08001328259706497, + "rewards/margins": 4.300027847290039, + "rewards/rejected": -4.220014572143555, + "step": 843 + }, + { + "epoch": 1.35, + "learning_rate": 4.719579865239794e-07, + "logits/chosen": -1.3801264762878418, + "logits/rejected": -1.4802361726760864, + "logps/chosen": -93.86898803710938, + "logps/rejected": -124.66912841796875, + "loss": 0.3826, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04860353469848633, + "rewards/margins": 1.5310704708099365, + "rewards/rejected": -1.5796740055084229, + "step": 844 + }, + { + "epoch": 1.36, + "learning_rate": 4.718588981371383e-07, + "logits/chosen": -1.1968863010406494, + "logits/rejected": -1.1818851232528687, + "logps/chosen": -85.33483123779297, + "logps/rejected": -102.30375671386719, + "loss": 0.278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.637770414352417, + "rewards/margins": 2.469513177871704, + "rewards/rejected": -3.107283592224121, + "step": 845 + }, + { + "epoch": 1.36, + "learning_rate": 4.717598097502972e-07, + "logits/chosen": -1.434950590133667, + "logits/rejected": -1.4414716958999634, + "logps/chosen": -89.59851837158203, + "logps/rejected": -128.78265380859375, + "loss": 0.2589, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.28020554780960083, + "rewards/margins": 2.49346923828125, + "rewards/rejected": -2.773674726486206, + "step": 846 + }, + { + "epoch": 1.36, + "learning_rate": 4.7166072136345617e-07, + "logits/chosen": -1.4361101388931274, + "logits/rejected": -1.4887644052505493, + "logps/chosen": -64.52383422851562, + "logps/rejected": -101.86747741699219, + "loss": 0.3651, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4031468331813812, + "rewards/margins": 3.014817237854004, + "rewards/rejected": -3.417963981628418, + "step": 847 + }, + { + "epoch": 1.36, + "learning_rate": 4.7156163297661513e-07, + "logits/chosen": -1.3596985340118408, + "logits/rejected": -1.3542417287826538, + "logps/chosen": -81.66444396972656, + "logps/rejected": -84.52822875976562, + "loss": 0.4375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7115099430084229, + "rewards/margins": 0.5217678546905518, + "rewards/rejected": -1.2332777976989746, + "step": 848 + }, + { + "epoch": 1.36, + "learning_rate": 4.714625445897741e-07, + "logits/chosen": -1.4938799142837524, + "logits/rejected": -1.4043649435043335, + "logps/chosen": -102.88548278808594, + "logps/rejected": -112.03765869140625, + "loss": 0.3989, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9157569408416748, + "rewards/margins": 0.7887274622917175, + "rewards/rejected": -1.704484462738037, + "step": 849 + }, + { + "epoch": 1.36, + "learning_rate": 4.71363456202933e-07, + "logits/chosen": -1.4704091548919678, + "logits/rejected": -1.51737642288208, + "logps/chosen": -104.00901794433594, + "logps/rejected": -109.41404724121094, + "loss": 0.3468, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6444734334945679, + "rewards/margins": 1.5698599815368652, + "rewards/rejected": -3.2143335342407227, + "step": 850 + }, + { + "epoch": 1.37, + "learning_rate": 4.712643678160919e-07, + "logits/chosen": -1.4539318084716797, + "logits/rejected": -1.4793615341186523, + "logps/chosen": -100.28477478027344, + "logps/rejected": -109.55717468261719, + "loss": 0.2531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1670963317155838, + "rewards/margins": 1.9242877960205078, + "rewards/rejected": -2.091383934020996, + "step": 851 + }, + { + "epoch": 1.37, + "learning_rate": 4.7116527942925086e-07, + "logits/chosen": -1.5180578231811523, + "logits/rejected": -1.4551713466644287, + "logps/chosen": -82.52430725097656, + "logps/rejected": -100.09239959716797, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31973332166671753, + "rewards/margins": 2.3931357860565186, + "rewards/rejected": -2.712869167327881, + "step": 852 + }, + { + "epoch": 1.37, + "learning_rate": 4.710661910424098e-07, + "logits/chosen": -1.2889223098754883, + "logits/rejected": -1.2503786087036133, + "logps/chosen": -100.99467468261719, + "logps/rejected": -102.57280731201172, + "loss": 0.4632, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.447457492351532, + "rewards/margins": 1.597954273223877, + "rewards/rejected": -2.0454115867614746, + "step": 853 + }, + { + "epoch": 1.37, + "learning_rate": 4.709671026555688e-07, + "logits/chosen": -1.4053295850753784, + "logits/rejected": -1.5117361545562744, + "logps/chosen": -79.9698715209961, + "logps/rejected": -114.35211944580078, + "loss": 0.3274, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.36292287707328796, + "rewards/margins": 1.8638670444488525, + "rewards/rejected": -2.226789951324463, + "step": 854 + }, + { + "epoch": 1.37, + "learning_rate": 4.708680142687277e-07, + "logits/chosen": -1.518999695777893, + "logits/rejected": -1.445088505744934, + "logps/chosen": -100.81340026855469, + "logps/rejected": -114.19766235351562, + "loss": 0.4015, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.027595043182373, + "rewards/margins": 1.0249669551849365, + "rewards/rejected": -3.0525619983673096, + "step": 855 + }, + { + "epoch": 1.37, + "learning_rate": 4.707689258818866e-07, + "logits/chosen": -1.387768268585205, + "logits/rejected": -1.3432828187942505, + "logps/chosen": -84.4090805053711, + "logps/rejected": -107.17613220214844, + "loss": 0.6037, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4655961990356445, + "rewards/margins": 1.1840386390686035, + "rewards/rejected": -2.649634838104248, + "step": 856 + }, + { + "epoch": 1.38, + "learning_rate": 4.7066983749504556e-07, + "logits/chosen": -1.5120022296905518, + "logits/rejected": -1.5483226776123047, + "logps/chosen": -107.60599517822266, + "logps/rejected": -122.77335357666016, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.149163842201233, + "rewards/margins": 1.7211452722549438, + "rewards/rejected": -2.8703091144561768, + "step": 857 + }, + { + "epoch": 1.38, + "learning_rate": 4.7057074910820446e-07, + "logits/chosen": -1.5409016609191895, + "logits/rejected": -1.5365424156188965, + "logps/chosen": -90.19618225097656, + "logps/rejected": -89.18219757080078, + "loss": 0.4012, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.931050181388855, + "rewards/margins": 0.15985578298568726, + "rewards/rejected": -1.0909059047698975, + "step": 858 + }, + { + "epoch": 1.38, + "learning_rate": 4.704716607213635e-07, + "logits/chosen": -1.4588663578033447, + "logits/rejected": -1.3916168212890625, + "logps/chosen": -108.2625732421875, + "logps/rejected": -132.9908905029297, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6725414991378784, + "rewards/margins": 2.2511508464813232, + "rewards/rejected": -2.923692226409912, + "step": 859 + }, + { + "epoch": 1.38, + "learning_rate": 4.703725723345224e-07, + "logits/chosen": -1.47183096408844, + "logits/rejected": -1.4623241424560547, + "logps/chosen": -86.77767944335938, + "logps/rejected": -83.90248107910156, + "loss": 0.4381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20253248512744904, + "rewards/margins": 2.0272626876831055, + "rewards/rejected": -2.229794979095459, + "step": 860 + }, + { + "epoch": 1.38, + "learning_rate": 4.702734839476813e-07, + "logits/chosen": -1.457409143447876, + "logits/rejected": -1.4948185682296753, + "logps/chosen": -84.48971557617188, + "logps/rejected": -74.28926086425781, + "loss": 0.5545, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.6398317217826843, + "rewards/margins": -0.026532717049121857, + "rewards/rejected": -0.6132989525794983, + "step": 861 + }, + { + "epoch": 1.38, + "learning_rate": 4.7017439556084025e-07, + "logits/chosen": -1.4775032997131348, + "logits/rejected": -1.4491175413131714, + "logps/chosen": -87.09159851074219, + "logps/rejected": -84.67426300048828, + "loss": 0.3967, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.1899977922439575, + "rewards/margins": 0.10587304830551147, + "rewards/rejected": -1.2958707809448242, + "step": 862 + }, + { + "epoch": 1.39, + "learning_rate": 4.7007530717399916e-07, + "logits/chosen": -1.4812092781066895, + "logits/rejected": -1.5229347944259644, + "logps/chosen": -76.72782897949219, + "logps/rejected": -141.01931762695312, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.020018011331558228, + "rewards/margins": 4.005414009094238, + "rewards/rejected": -3.985395908355713, + "step": 863 + }, + { + "epoch": 1.39, + "learning_rate": 4.699762187871581e-07, + "logits/chosen": -1.2668180465698242, + "logits/rejected": -1.4081354141235352, + "logps/chosen": -91.11351013183594, + "logps/rejected": -149.130859375, + "loss": 0.3815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40102118253707886, + "rewards/margins": 2.5794661045074463, + "rewards/rejected": -2.98048734664917, + "step": 864 + }, + { + "epoch": 1.39, + "learning_rate": 4.698771304003171e-07, + "logits/chosen": -1.6380810737609863, + "logits/rejected": -1.5794364213943481, + "logps/chosen": -91.00669860839844, + "logps/rejected": -99.55502319335938, + "loss": 0.249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11631183326244354, + "rewards/margins": 1.5324782133102417, + "rewards/rejected": -1.4161663055419922, + "step": 865 + }, + { + "epoch": 1.39, + "learning_rate": 4.69778042013476e-07, + "logits/chosen": -1.5631719827651978, + "logits/rejected": -1.5234401226043701, + "logps/chosen": -93.40147399902344, + "logps/rejected": -135.0677947998047, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.530066192150116, + "rewards/margins": 3.126030206680298, + "rewards/rejected": -3.6560962200164795, + "step": 866 + }, + { + "epoch": 1.39, + "learning_rate": 4.6967895362663494e-07, + "logits/chosen": -1.3415358066558838, + "logits/rejected": -1.4452779293060303, + "logps/chosen": -67.5946273803711, + "logps/rejected": -98.42156219482422, + "loss": 0.2048, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2359386533498764, + "rewards/margins": 2.9021894931793213, + "rewards/rejected": -3.1381282806396484, + "step": 867 + }, + { + "epoch": 1.39, + "learning_rate": 4.6957986523979385e-07, + "logits/chosen": -1.4457942247390747, + "logits/rejected": -1.4557454586029053, + "logps/chosen": -74.195068359375, + "logps/rejected": -94.54876708984375, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38051164150238037, + "rewards/margins": 2.1703076362609863, + "rewards/rejected": -2.5508193969726562, + "step": 868 + }, + { + "epoch": 1.39, + "learning_rate": 4.694807768529528e-07, + "logits/chosen": -1.2388012409210205, + "logits/rejected": -1.1936167478561401, + "logps/chosen": -66.7254409790039, + "logps/rejected": -87.50965881347656, + "loss": 0.3173, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6898751854896545, + "rewards/margins": 1.0155903100967407, + "rewards/rejected": -1.70546555519104, + "step": 869 + }, + { + "epoch": 1.4, + "learning_rate": 4.6938168846611177e-07, + "logits/chosen": -1.4142231941223145, + "logits/rejected": -1.3796236515045166, + "logps/chosen": -86.62666320800781, + "logps/rejected": -124.37645721435547, + "loss": 0.3945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9362061023712158, + "rewards/margins": 2.50740122795105, + "rewards/rejected": -3.4436073303222656, + "step": 870 + }, + { + "epoch": 1.4, + "learning_rate": 4.692826000792707e-07, + "logits/chosen": -1.451601266860962, + "logits/rejected": -1.4383888244628906, + "logps/chosen": -94.58566284179688, + "logps/rejected": -90.20590209960938, + "loss": 0.3182, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6215428113937378, + "rewards/margins": 0.9986903667449951, + "rewards/rejected": -1.6202332973480225, + "step": 871 + }, + { + "epoch": 1.4, + "learning_rate": 4.6918351169242964e-07, + "logits/chosen": -1.4720675945281982, + "logits/rejected": -1.484776496887207, + "logps/chosen": -93.87531280517578, + "logps/rejected": -101.55762481689453, + "loss": 0.3522, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0442681312561035, + "rewards/margins": 1.2048218250274658, + "rewards/rejected": -2.2490899562835693, + "step": 872 + }, + { + "epoch": 1.4, + "learning_rate": 4.6908442330558854e-07, + "logits/chosen": -1.3939129114151, + "logits/rejected": -1.4021170139312744, + "logps/chosen": -86.27481842041016, + "logps/rejected": -115.74496459960938, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.906700849533081, + "rewards/margins": 1.3566888570785522, + "rewards/rejected": -2.2633895874023438, + "step": 873 + }, + { + "epoch": 1.4, + "learning_rate": 4.689853349187475e-07, + "logits/chosen": -1.4139723777770996, + "logits/rejected": -1.4083621501922607, + "logps/chosen": -113.32118225097656, + "logps/rejected": -113.8725357055664, + "loss": 0.4618, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.456540822982788, + "rewards/margins": 1.6386935710906982, + "rewards/rejected": -3.0952343940734863, + "step": 874 + }, + { + "epoch": 1.4, + "learning_rate": 4.6888624653190646e-07, + "logits/chosen": -1.3117401599884033, + "logits/rejected": -1.3930554389953613, + "logps/chosen": -113.69795227050781, + "logps/rejected": -151.40768432617188, + "loss": 0.4017, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.424972653388977, + "rewards/margins": 2.1442532539367676, + "rewards/rejected": -3.569225788116455, + "step": 875 + }, + { + "epoch": 1.41, + "learning_rate": 4.6878715814506537e-07, + "logits/chosen": -1.317976951599121, + "logits/rejected": -1.3304226398468018, + "logps/chosen": -90.28089904785156, + "logps/rejected": -96.00550079345703, + "loss": 0.2934, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8654288649559021, + "rewards/margins": 1.6907366514205933, + "rewards/rejected": -2.5561654567718506, + "step": 876 + }, + { + "epoch": 1.41, + "learning_rate": 4.6868806975822433e-07, + "logits/chosen": -1.5312275886535645, + "logits/rejected": -1.5187056064605713, + "logps/chosen": -77.1458740234375, + "logps/rejected": -107.45299530029297, + "loss": 0.3769, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.10662470012903214, + "rewards/margins": 1.978196144104004, + "rewards/rejected": -1.8715713024139404, + "step": 877 + }, + { + "epoch": 1.41, + "learning_rate": 4.6858898137138324e-07, + "logits/chosen": -1.434421420097351, + "logits/rejected": -1.461845874786377, + "logps/chosen": -103.7794418334961, + "logps/rejected": -115.21441650390625, + "loss": 0.4336, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2939445972442627, + "rewards/margins": 1.128411054611206, + "rewards/rejected": -2.422355890274048, + "step": 878 + }, + { + "epoch": 1.41, + "learning_rate": 4.6848989298454214e-07, + "logits/chosen": -1.437088966369629, + "logits/rejected": -1.3501012325286865, + "logps/chosen": -107.9480209350586, + "logps/rejected": -109.37284088134766, + "loss": 0.2885, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7518547177314758, + "rewards/margins": 1.5608041286468506, + "rewards/rejected": -2.3126587867736816, + "step": 879 + }, + { + "epoch": 1.41, + "learning_rate": 4.6839080459770116e-07, + "logits/chosen": -1.5032329559326172, + "logits/rejected": -1.4395248889923096, + "logps/chosen": -71.29470825195312, + "logps/rejected": -100.636962890625, + "loss": 0.3382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19406695663928986, + "rewards/margins": 2.7594258785247803, + "rewards/rejected": -2.9534926414489746, + "step": 880 + }, + { + "epoch": 1.41, + "learning_rate": 4.6829171621086006e-07, + "logits/chosen": -1.4943766593933105, + "logits/rejected": -1.4123082160949707, + "logps/chosen": -113.8065414428711, + "logps/rejected": -126.65861511230469, + "loss": 0.3207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3267940282821655, + "rewards/margins": 2.9686355590820312, + "rewards/rejected": -3.295429229736328, + "step": 881 + }, + { + "epoch": 1.42, + "learning_rate": 4.68192627824019e-07, + "logits/chosen": -1.5374174118041992, + "logits/rejected": -1.5144572257995605, + "logps/chosen": -85.26484680175781, + "logps/rejected": -116.28812408447266, + "loss": 0.5004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1549571752548218, + "rewards/margins": 2.4557230472564697, + "rewards/rejected": -3.610680103302002, + "step": 882 + }, + { + "epoch": 1.42, + "learning_rate": 4.6809353943717793e-07, + "logits/chosen": -1.3149759769439697, + "logits/rejected": -1.2872226238250732, + "logps/chosen": -67.63557434082031, + "logps/rejected": -100.64028930664062, + "loss": 0.2515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.043538182973861694, + "rewards/margins": 2.821659803390503, + "rewards/rejected": -2.7781214714050293, + "step": 883 + }, + { + "epoch": 1.42, + "learning_rate": 4.6799445105033684e-07, + "logits/chosen": -1.3905415534973145, + "logits/rejected": -1.2951328754425049, + "logps/chosen": -98.84854125976562, + "logps/rejected": -115.005126953125, + "loss": 0.4011, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.077976942062378, + "rewards/margins": 1.758192539215088, + "rewards/rejected": -2.836169481277466, + "step": 884 + }, + { + "epoch": 1.42, + "learning_rate": 4.6789536266349585e-07, + "logits/chosen": -1.3182439804077148, + "logits/rejected": -1.366581678390503, + "logps/chosen": -104.0657730102539, + "logps/rejected": -114.08162689208984, + "loss": 0.3041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47874563932418823, + "rewards/margins": 1.3076506853103638, + "rewards/rejected": -1.7863962650299072, + "step": 885 + }, + { + "epoch": 1.42, + "learning_rate": 4.6779627427665475e-07, + "logits/chosen": -1.375813603401184, + "logits/rejected": -1.4435150623321533, + "logps/chosen": -71.88319396972656, + "logps/rejected": -125.96277618408203, + "loss": 0.2617, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5722928047180176, + "rewards/margins": 3.961413860321045, + "rewards/rejected": -4.5337066650390625, + "step": 886 + }, + { + "epoch": 1.42, + "learning_rate": 4.676971858898137e-07, + "logits/chosen": -1.3260478973388672, + "logits/rejected": -1.3771055936813354, + "logps/chosen": -73.5811538696289, + "logps/rejected": -104.68587493896484, + "loss": 0.4953, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9636367559432983, + "rewards/margins": 0.5769127011299133, + "rewards/rejected": -1.5405495166778564, + "step": 887 + }, + { + "epoch": 1.43, + "learning_rate": 4.675980975029726e-07, + "logits/chosen": -1.3535733222961426, + "logits/rejected": -1.3486523628234863, + "logps/chosen": -98.47283172607422, + "logps/rejected": -106.98872375488281, + "loss": 0.4608, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.37851354479789734, + "rewards/margins": 1.2820786237716675, + "rewards/rejected": -1.6605923175811768, + "step": 888 + }, + { + "epoch": 1.43, + "learning_rate": 4.6749900911613153e-07, + "logits/chosen": -1.3645515441894531, + "logits/rejected": -1.4006030559539795, + "logps/chosen": -128.36959838867188, + "logps/rejected": -154.19097900390625, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7762003540992737, + "rewards/margins": 2.6028060913085938, + "rewards/rejected": -3.3790063858032227, + "step": 889 + }, + { + "epoch": 1.43, + "learning_rate": 4.6739992072929054e-07, + "logits/chosen": -1.5391274690628052, + "logits/rejected": -1.5041810274124146, + "logps/chosen": -97.58425903320312, + "logps/rejected": -103.10050201416016, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3001726269721985, + "rewards/margins": 1.5541646480560303, + "rewards/rejected": -1.8543373346328735, + "step": 890 + }, + { + "epoch": 1.43, + "learning_rate": 4.6730083234244945e-07, + "logits/chosen": -1.4837735891342163, + "logits/rejected": -1.430382251739502, + "logps/chosen": -85.65047454833984, + "logps/rejected": -117.6192626953125, + "loss": 0.1934, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5197610855102539, + "rewards/margins": 4.435118198394775, + "rewards/rejected": -4.954879283905029, + "step": 891 + }, + { + "epoch": 1.43, + "learning_rate": 4.672017439556084e-07, + "logits/chosen": -1.42747962474823, + "logits/rejected": -1.4147052764892578, + "logps/chosen": -90.48818969726562, + "logps/rejected": -90.69126892089844, + "loss": 0.2246, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3176918029785156, + "rewards/margins": 1.1243195533752441, + "rewards/rejected": -1.4420113563537598, + "step": 892 + }, + { + "epoch": 1.43, + "learning_rate": 4.671026555687673e-07, + "logits/chosen": -1.5101330280303955, + "logits/rejected": -1.5449793338775635, + "logps/chosen": -123.83343505859375, + "logps/rejected": -119.34120178222656, + "loss": 0.3698, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6401113867759705, + "rewards/margins": 2.1373038291931152, + "rewards/rejected": -2.7774152755737305, + "step": 893 + }, + { + "epoch": 1.43, + "learning_rate": 4.670035671819262e-07, + "logits/chosen": -1.3743782043457031, + "logits/rejected": -1.359755516052246, + "logps/chosen": -91.97722625732422, + "logps/rejected": -103.02430725097656, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5685886144638062, + "rewards/margins": 1.3630508184432983, + "rewards/rejected": -1.9316394329071045, + "step": 894 + }, + { + "epoch": 1.44, + "learning_rate": 4.6690447879508523e-07, + "logits/chosen": -1.3846518993377686, + "logits/rejected": -1.4043997526168823, + "logps/chosen": -61.48664855957031, + "logps/rejected": -117.44918823242188, + "loss": 0.1898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14072465896606445, + "rewards/margins": 4.380633354187012, + "rewards/rejected": -4.521358013153076, + "step": 895 + }, + { + "epoch": 1.44, + "learning_rate": 4.6680539040824414e-07, + "logits/chosen": -1.5434738397598267, + "logits/rejected": -1.4383676052093506, + "logps/chosen": -98.13920593261719, + "logps/rejected": -105.07804870605469, + "loss": 0.3408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2275211662054062, + "rewards/margins": 3.0952024459838867, + "rewards/rejected": -2.8676812648773193, + "step": 896 + }, + { + "epoch": 1.44, + "learning_rate": 4.667063020214031e-07, + "logits/chosen": -1.438215732574463, + "logits/rejected": -1.40582275390625, + "logps/chosen": -66.88848876953125, + "logps/rejected": -95.39006042480469, + "loss": 0.2331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5136940479278564, + "rewards/margins": 2.819331645965576, + "rewards/rejected": -3.3330259323120117, + "step": 897 + }, + { + "epoch": 1.44, + "learning_rate": 4.66607213634562e-07, + "logits/chosen": -1.4046623706817627, + "logits/rejected": -1.4286394119262695, + "logps/chosen": -101.06310272216797, + "logps/rejected": -115.10508728027344, + "loss": 0.3611, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3098629117012024, + "rewards/margins": 1.7852933406829834, + "rewards/rejected": -2.09515643119812, + "step": 898 + }, + { + "epoch": 1.44, + "learning_rate": 4.665081252477209e-07, + "logits/chosen": -1.2360715866088867, + "logits/rejected": -1.307201623916626, + "logps/chosen": -93.83660888671875, + "logps/rejected": -114.37376403808594, + "loss": 0.3033, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6931122541427612, + "rewards/margins": 1.3132197856903076, + "rewards/rejected": -2.0063319206237793, + "step": 899 + }, + { + "epoch": 1.44, + "learning_rate": 4.6640903686087993e-07, + "logits/chosen": -1.2921111583709717, + "logits/rejected": -1.2395963668823242, + "logps/chosen": -103.99440002441406, + "logps/rejected": -94.9100341796875, + "loss": 0.4323, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.040859609842300415, + "rewards/margins": 1.0148534774780273, + "rewards/rejected": -1.0557130575180054, + "step": 900 + }, + { + "epoch": 1.45, + "learning_rate": 4.6630994847403883e-07, + "logits/chosen": -1.406374454498291, + "logits/rejected": -1.4066002368927002, + "logps/chosen": -71.69100952148438, + "logps/rejected": -122.65707397460938, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0006952248513698578, + "rewards/margins": 2.781062364578247, + "rewards/rejected": -2.7817575931549072, + "step": 901 + }, + { + "epoch": 1.45, + "learning_rate": 4.6621086008719774e-07, + "logits/chosen": -1.4664994478225708, + "logits/rejected": -1.4376964569091797, + "logps/chosen": -97.77851867675781, + "logps/rejected": -92.36683654785156, + "loss": 0.4842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5444978475570679, + "rewards/margins": 1.086294174194336, + "rewards/rejected": -1.6307920217514038, + "step": 902 + }, + { + "epoch": 1.45, + "learning_rate": 4.661117717003567e-07, + "logits/chosen": -1.3745418787002563, + "logits/rejected": -1.368604063987732, + "logps/chosen": -81.39274597167969, + "logps/rejected": -96.85607147216797, + "loss": 0.3506, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01571149379014969, + "rewards/margins": 0.7546843886375427, + "rewards/rejected": -0.770395815372467, + "step": 903 + }, + { + "epoch": 1.45, + "learning_rate": 4.660126833135156e-07, + "logits/chosen": -1.4186973571777344, + "logits/rejected": -1.4188430309295654, + "logps/chosen": -92.38774108886719, + "logps/rejected": -153.6502685546875, + "loss": 0.3403, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.26283854246139526, + "rewards/margins": 2.900681257247925, + "rewards/rejected": -3.163519859313965, + "step": 904 + }, + { + "epoch": 1.45, + "learning_rate": 4.6591359492667457e-07, + "logits/chosen": -1.501907467842102, + "logits/rejected": -1.4764553308486938, + "logps/chosen": -83.87001037597656, + "logps/rejected": -137.6434783935547, + "loss": 0.4696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34312400221824646, + "rewards/margins": 3.8961918354034424, + "rewards/rejected": -4.239315986633301, + "step": 905 + }, + { + "epoch": 1.45, + "learning_rate": 4.6581450653983353e-07, + "logits/chosen": -1.4294248819351196, + "logits/rejected": -1.403000831604004, + "logps/chosen": -89.98622131347656, + "logps/rejected": -127.34136199951172, + "loss": 0.3404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16145972907543182, + "rewards/margins": 4.4762678146362305, + "rewards/rejected": -4.314807891845703, + "step": 906 + }, + { + "epoch": 1.46, + "learning_rate": 4.6571541815299243e-07, + "logits/chosen": -1.4761618375778198, + "logits/rejected": -1.4380292892456055, + "logps/chosen": -77.11106872558594, + "logps/rejected": -148.62921142578125, + "loss": 0.216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9934589862823486, + "rewards/margins": 3.18493914604187, + "rewards/rejected": -4.178398132324219, + "step": 907 + }, + { + "epoch": 1.46, + "learning_rate": 4.656163297661514e-07, + "logits/chosen": -1.4347518682479858, + "logits/rejected": -1.4876621961593628, + "logps/chosen": -81.31510162353516, + "logps/rejected": -120.35031127929688, + "loss": 0.2697, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06777878850698471, + "rewards/margins": 2.143982172012329, + "rewards/rejected": -2.211760997772217, + "step": 908 + }, + { + "epoch": 1.46, + "learning_rate": 4.655172413793103e-07, + "logits/chosen": -1.3443711996078491, + "logits/rejected": -1.3774003982543945, + "logps/chosen": -79.68647766113281, + "logps/rejected": -100.96809387207031, + "loss": 0.2346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4161483943462372, + "rewards/margins": 3.059497833251953, + "rewards/rejected": -2.6433496475219727, + "step": 909 + }, + { + "epoch": 1.46, + "learning_rate": 4.6541815299246926e-07, + "logits/chosen": -1.3228216171264648, + "logits/rejected": -1.344215989112854, + "logps/chosen": -98.95500946044922, + "logps/rejected": -113.42491149902344, + "loss": 0.4985, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.6723181009292603, + "rewards/margins": 1.2785285711288452, + "rewards/rejected": -0.6062105298042297, + "step": 910 + }, + { + "epoch": 1.46, + "learning_rate": 4.653190646056282e-07, + "logits/chosen": -1.356433391571045, + "logits/rejected": -1.2758326530456543, + "logps/chosen": -80.32989501953125, + "logps/rejected": -115.40151977539062, + "loss": 0.2239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07580652832984924, + "rewards/margins": 4.578855514526367, + "rewards/rejected": -4.654662609100342, + "step": 911 + }, + { + "epoch": 1.46, + "learning_rate": 4.652199762187871e-07, + "logits/chosen": -1.5282936096191406, + "logits/rejected": -1.514829397201538, + "logps/chosen": -86.63407135009766, + "logps/rejected": -109.25668334960938, + "loss": 0.1867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4665224552154541, + "rewards/margins": 1.9845013618469238, + "rewards/rejected": -1.5179787874221802, + "step": 912 + }, + { + "epoch": 1.47, + "learning_rate": 4.651208878319461e-07, + "logits/chosen": -1.5002914667129517, + "logits/rejected": -1.400328516960144, + "logps/chosen": -116.97653198242188, + "logps/rejected": -112.76649475097656, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48691269755363464, + "rewards/margins": 1.9900314807891846, + "rewards/rejected": -1.5031187534332275, + "step": 913 + }, + { + "epoch": 1.47, + "learning_rate": 4.65021799445105e-07, + "logits/chosen": -1.3893752098083496, + "logits/rejected": -1.3898335695266724, + "logps/chosen": -73.63349914550781, + "logps/rejected": -96.27476501464844, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45642921328544617, + "rewards/margins": 0.7621561884880066, + "rewards/rejected": -1.2185853719711304, + "step": 914 + }, + { + "epoch": 1.47, + "learning_rate": 4.6492271105826395e-07, + "logits/chosen": -1.3102054595947266, + "logits/rejected": -1.3869819641113281, + "logps/chosen": -95.91415405273438, + "logps/rejected": -108.11830139160156, + "loss": 0.3124, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.5101137757301331, + "rewards/margins": 2.0352354049682617, + "rewards/rejected": -1.5251215696334839, + "step": 915 + }, + { + "epoch": 1.47, + "learning_rate": 4.648236226714229e-07, + "logits/chosen": -1.4694966077804565, + "logits/rejected": -1.3997411727905273, + "logps/chosen": -77.66216278076172, + "logps/rejected": -111.53197479248047, + "loss": 0.3479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3091737926006317, + "rewards/margins": 3.4702582359313965, + "rewards/rejected": -3.1610846519470215, + "step": 916 + }, + { + "epoch": 1.47, + "learning_rate": 4.647245342845818e-07, + "logits/chosen": -1.4848833084106445, + "logits/rejected": -1.4953184127807617, + "logps/chosen": -80.35623168945312, + "logps/rejected": -105.97962188720703, + "loss": 0.2591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.056652069091796875, + "rewards/margins": 2.926330089569092, + "rewards/rejected": -2.9829821586608887, + "step": 917 + }, + { + "epoch": 1.47, + "learning_rate": 4.646254458977408e-07, + "logits/chosen": -1.2830451726913452, + "logits/rejected": -1.289259433746338, + "logps/chosen": -86.11398315429688, + "logps/rejected": -95.72228240966797, + "loss": 0.34, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04075498506426811, + "rewards/margins": 1.557837724685669, + "rewards/rejected": -1.517082691192627, + "step": 918 + }, + { + "epoch": 1.48, + "learning_rate": 4.645263575108997e-07, + "logits/chosen": -1.3636250495910645, + "logits/rejected": -1.3746397495269775, + "logps/chosen": -97.27259826660156, + "logps/rejected": -105.15577697753906, + "loss": 0.5179, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.4429548382759094, + "rewards/margins": 0.5924179553985596, + "rewards/rejected": -1.0353728532791138, + "step": 919 + }, + { + "epoch": 1.48, + "learning_rate": 4.6442726912405865e-07, + "logits/chosen": -1.442186713218689, + "logits/rejected": -1.4289838075637817, + "logps/chosen": -69.01321411132812, + "logps/rejected": -110.20108032226562, + "loss": 0.24, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.039875805377960205, + "rewards/margins": 4.544281482696533, + "rewards/rejected": -4.584157466888428, + "step": 920 + }, + { + "epoch": 1.48, + "learning_rate": 4.6432818073721755e-07, + "logits/chosen": -1.2980499267578125, + "logits/rejected": -1.353243350982666, + "logps/chosen": -65.94352722167969, + "logps/rejected": -153.7466583251953, + "loss": 0.2812, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.20981617271900177, + "rewards/margins": 3.5718603134155273, + "rewards/rejected": -3.781676769256592, + "step": 921 + }, + { + "epoch": 1.48, + "learning_rate": 4.642290923503765e-07, + "logits/chosen": -1.4613715410232544, + "logits/rejected": -1.4368832111358643, + "logps/chosen": -90.64324951171875, + "logps/rejected": -137.30572509765625, + "loss": 0.3476, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6795763969421387, + "rewards/margins": 3.4955599308013916, + "rewards/rejected": -4.175136089324951, + "step": 922 + }, + { + "epoch": 1.48, + "learning_rate": 4.6413000396353547e-07, + "logits/chosen": -1.3887336254119873, + "logits/rejected": -1.4102400541305542, + "logps/chosen": -73.77508544921875, + "logps/rejected": -97.16085052490234, + "loss": 0.3743, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11372499167919159, + "rewards/margins": 1.213293194770813, + "rewards/rejected": -1.327018141746521, + "step": 923 + }, + { + "epoch": 1.48, + "learning_rate": 4.640309155766944e-07, + "logits/chosen": -1.537772536277771, + "logits/rejected": -1.4633550643920898, + "logps/chosen": -97.54913330078125, + "logps/rejected": -116.5672836303711, + "loss": 0.2697, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.035451143980026245, + "rewards/margins": 4.348438262939453, + "rewards/rejected": -4.383890151977539, + "step": 924 + }, + { + "epoch": 1.48, + "learning_rate": 4.6393182718985334e-07, + "logits/chosen": -1.4375576972961426, + "logits/rejected": -1.2967908382415771, + "logps/chosen": -108.53079223632812, + "logps/rejected": -88.81774139404297, + "loss": 0.3434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08600884675979614, + "rewards/margins": 0.5733117461204529, + "rewards/rejected": -0.659320592880249, + "step": 925 + }, + { + "epoch": 1.49, + "learning_rate": 4.6383273880301225e-07, + "logits/chosen": -1.3495575189590454, + "logits/rejected": -1.4659491777420044, + "logps/chosen": -97.14730072021484, + "logps/rejected": -124.65142822265625, + "loss": 0.2685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0404483824968338, + "rewards/margins": 2.682218551635742, + "rewards/rejected": -2.7226672172546387, + "step": 926 + }, + { + "epoch": 1.49, + "learning_rate": 4.637336504161712e-07, + "logits/chosen": -1.3588322401046753, + "logits/rejected": -1.3224797248840332, + "logps/chosen": -65.47914123535156, + "logps/rejected": -100.37338256835938, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12276049703359604, + "rewards/margins": 1.8228716850280762, + "rewards/rejected": -1.7001111507415771, + "step": 927 + }, + { + "epoch": 1.49, + "learning_rate": 4.6363456202933017e-07, + "logits/chosen": -1.5068373680114746, + "logits/rejected": -1.6560909748077393, + "logps/chosen": -88.4123764038086, + "logps/rejected": -149.53627014160156, + "loss": 0.3686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44875484704971313, + "rewards/margins": 1.9377626180648804, + "rewards/rejected": -2.386517286300659, + "step": 928 + }, + { + "epoch": 1.49, + "learning_rate": 4.6353547364248907e-07, + "logits/chosen": -1.4852499961853027, + "logits/rejected": -1.3858520984649658, + "logps/chosen": -98.94664001464844, + "logps/rejected": -112.57235717773438, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.155055433511734, + "rewards/margins": 2.3171205520629883, + "rewards/rejected": -2.4721760749816895, + "step": 929 + }, + { + "epoch": 1.49, + "learning_rate": 4.6343638525564803e-07, + "logits/chosen": -1.4822731018066406, + "logits/rejected": -1.5193202495574951, + "logps/chosen": -84.67195129394531, + "logps/rejected": -97.95703125, + "loss": 0.3186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11891861259937286, + "rewards/margins": 2.794870376586914, + "rewards/rejected": -2.6759514808654785, + "step": 930 + }, + { + "epoch": 1.49, + "learning_rate": 4.6333729686880694e-07, + "logits/chosen": -1.214950680732727, + "logits/rejected": -1.2424671649932861, + "logps/chosen": -78.85543060302734, + "logps/rejected": -117.67984771728516, + "loss": 0.3866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.330367773771286, + "rewards/margins": 2.4168190956115723, + "rewards/rejected": -2.0864510536193848, + "step": 931 + }, + { + "epoch": 1.5, + "learning_rate": 4.632382084819659e-07, + "logits/chosen": -1.3984066247940063, + "logits/rejected": -1.469548225402832, + "logps/chosen": -93.10995483398438, + "logps/rejected": -119.76518249511719, + "loss": 0.3683, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4178352355957031, + "rewards/margins": 3.8203067779541016, + "rewards/rejected": -4.238142013549805, + "step": 932 + }, + { + "epoch": 1.5, + "learning_rate": 4.6313912009512486e-07, + "logits/chosen": -1.4618090391159058, + "logits/rejected": -1.4438858032226562, + "logps/chosen": -65.69481658935547, + "logps/rejected": -107.59117126464844, + "loss": 0.3207, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2945247292518616, + "rewards/margins": 1.9162434339523315, + "rewards/rejected": -1.6217188835144043, + "step": 933 + }, + { + "epoch": 1.5, + "learning_rate": 4.6304003170828376e-07, + "logits/chosen": -1.5085370540618896, + "logits/rejected": -1.4700191020965576, + "logps/chosen": -96.97554016113281, + "logps/rejected": -111.40365600585938, + "loss": 0.3341, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6325744986534119, + "rewards/margins": 1.8233253955841064, + "rewards/rejected": -2.455899953842163, + "step": 934 + }, + { + "epoch": 1.5, + "learning_rate": 4.6294094332144267e-07, + "logits/chosen": -1.4889116287231445, + "logits/rejected": -1.5522984266281128, + "logps/chosen": -84.9996337890625, + "logps/rejected": -128.31568908691406, + "loss": 0.1234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18504923582077026, + "rewards/margins": 2.8658769130706787, + "rewards/rejected": -3.0509262084960938, + "step": 935 + }, + { + "epoch": 1.5, + "learning_rate": 4.6284185493460163e-07, + "logits/chosen": -1.457676649093628, + "logits/rejected": -1.517907738685608, + "logps/chosen": -61.2161750793457, + "logps/rejected": -110.05174255371094, + "loss": 0.2535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44081756472587585, + "rewards/margins": 3.097536325454712, + "rewards/rejected": -2.6567187309265137, + "step": 936 + }, + { + "epoch": 1.5, + "learning_rate": 4.627427665477606e-07, + "logits/chosen": -1.43813157081604, + "logits/rejected": -1.4654161930084229, + "logps/chosen": -67.3921890258789, + "logps/rejected": -99.4871597290039, + "loss": 0.3411, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.44057297706604004, + "rewards/margins": 2.2915494441986084, + "rewards/rejected": -2.7321224212646484, + "step": 937 + }, + { + "epoch": 1.51, + "learning_rate": 4.6264367816091955e-07, + "logits/chosen": -1.3067927360534668, + "logits/rejected": -1.2871670722961426, + "logps/chosen": -105.64652252197266, + "logps/rejected": -126.95310974121094, + "loss": 0.3251, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.34107863903045654, + "rewards/margins": 2.6164193153381348, + "rewards/rejected": -2.957498073577881, + "step": 938 + }, + { + "epoch": 1.51, + "learning_rate": 4.6254458977407846e-07, + "logits/chosen": -1.4242613315582275, + "logits/rejected": -1.4869288206100464, + "logps/chosen": -87.24775695800781, + "logps/rejected": -102.61793518066406, + "loss": 0.46, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.4714038372039795, + "rewards/margins": 1.0430142879486084, + "rewards/rejected": -0.5716104507446289, + "step": 939 + }, + { + "epoch": 1.51, + "learning_rate": 4.6244550138723736e-07, + "logits/chosen": -1.5490344762802124, + "logits/rejected": -1.5137293338775635, + "logps/chosen": -96.7865982055664, + "logps/rejected": -85.44868469238281, + "loss": 0.4011, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7109324932098389, + "rewards/margins": 0.5258219242095947, + "rewards/rejected": -1.2367545366287231, + "step": 940 + }, + { + "epoch": 1.51, + "learning_rate": 4.623464130003963e-07, + "logits/chosen": -1.3627170324325562, + "logits/rejected": -1.3364777565002441, + "logps/chosen": -94.70975494384766, + "logps/rejected": -121.93834686279297, + "loss": 0.4101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049361519515514374, + "rewards/margins": 2.155473232269287, + "rewards/rejected": -2.106111764907837, + "step": 941 + }, + { + "epoch": 1.51, + "learning_rate": 4.6224732461355523e-07, + "logits/chosen": -1.5533548593521118, + "logits/rejected": -1.5177409648895264, + "logps/chosen": -85.73707580566406, + "logps/rejected": -104.2629623413086, + "loss": 0.2723, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.33773577213287354, + "rewards/margins": 0.7632884979248047, + "rewards/rejected": -1.1010242700576782, + "step": 942 + }, + { + "epoch": 1.51, + "learning_rate": 4.6214823622671424e-07, + "logits/chosen": -1.483729362487793, + "logits/rejected": -1.495360016822815, + "logps/chosen": -70.50445556640625, + "logps/rejected": -124.27365112304688, + "loss": 0.3316, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3351503312587738, + "rewards/margins": 3.1213865280151367, + "rewards/rejected": -3.4565367698669434, + "step": 943 + }, + { + "epoch": 1.52, + "learning_rate": 4.6204914783987315e-07, + "logits/chosen": -1.4071054458618164, + "logits/rejected": -1.5064728260040283, + "logps/chosen": -81.556884765625, + "logps/rejected": -101.21453857421875, + "loss": 0.2191, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7193591594696045, + "rewards/margins": 2.1385772228240967, + "rewards/rejected": -2.857936382293701, + "step": 944 + }, + { + "epoch": 1.52, + "learning_rate": 4.6195005945303206e-07, + "logits/chosen": -1.3786580562591553, + "logits/rejected": -1.4068266153335571, + "logps/chosen": -91.22728729248047, + "logps/rejected": -88.6808853149414, + "loss": 0.4014, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7144365310668945, + "rewards/margins": 1.931282877922058, + "rewards/rejected": -2.645719528198242, + "step": 945 + }, + { + "epoch": 1.52, + "learning_rate": 4.61850971066191e-07, + "logits/chosen": -1.3399759531021118, + "logits/rejected": -1.2982051372528076, + "logps/chosen": -73.30511474609375, + "logps/rejected": -103.96459197998047, + "loss": 0.3539, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.23546436429023743, + "rewards/margins": 2.8628687858581543, + "rewards/rejected": -3.0983331203460693, + "step": 946 + }, + { + "epoch": 1.52, + "learning_rate": 4.617518826793499e-07, + "logits/chosen": -1.259671688079834, + "logits/rejected": -1.2187355756759644, + "logps/chosen": -83.87641906738281, + "logps/rejected": -96.2335205078125, + "loss": 0.4012, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2932940721511841, + "rewards/margins": 1.050594449043274, + "rewards/rejected": -1.343888521194458, + "step": 947 + }, + { + "epoch": 1.52, + "learning_rate": 4.6165279429250894e-07, + "logits/chosen": -1.4927632808685303, + "logits/rejected": -1.4773638248443604, + "logps/chosen": -66.66869354248047, + "logps/rejected": -106.94563293457031, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13209104537963867, + "rewards/margins": 3.3130054473876953, + "rewards/rejected": -3.445096969604492, + "step": 948 + }, + { + "epoch": 1.52, + "learning_rate": 4.6155370590566784e-07, + "logits/chosen": -1.4594792127609253, + "logits/rejected": -1.480252742767334, + "logps/chosen": -93.0942153930664, + "logps/rejected": -115.61190032958984, + "loss": 0.209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24139881134033203, + "rewards/margins": 0.8090328574180603, + "rewards/rejected": -1.050431728363037, + "step": 949 + }, + { + "epoch": 1.52, + "learning_rate": 4.6145461751882675e-07, + "logits/chosen": -1.384578824043274, + "logits/rejected": -1.375487208366394, + "logps/chosen": -110.3106689453125, + "logps/rejected": -130.86264038085938, + "loss": 0.3879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5186079144477844, + "rewards/margins": 2.4797332286834717, + "rewards/rejected": -2.9983413219451904, + "step": 950 + }, + { + "epoch": 1.53, + "learning_rate": 4.613555291319857e-07, + "logits/chosen": -1.523979663848877, + "logits/rejected": -1.4815473556518555, + "logps/chosen": -101.7852554321289, + "logps/rejected": -112.23004150390625, + "loss": 0.4285, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4888429343700409, + "rewards/margins": 2.2766075134277344, + "rewards/rejected": -2.7654504776000977, + "step": 951 + }, + { + "epoch": 1.53, + "learning_rate": 4.612564407451446e-07, + "logits/chosen": -1.4715187549591064, + "logits/rejected": -1.5455567836761475, + "logps/chosen": -99.2022705078125, + "logps/rejected": -112.26203155517578, + "loss": 0.3656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3115118145942688, + "rewards/margins": 2.956911087036133, + "rewards/rejected": -3.268423080444336, + "step": 952 + }, + { + "epoch": 1.53, + "learning_rate": 4.6115735235830363e-07, + "logits/chosen": -1.3547937870025635, + "logits/rejected": -1.3612223863601685, + "logps/chosen": -113.89076232910156, + "logps/rejected": -106.14749908447266, + "loss": 0.4605, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.281350016593933, + "rewards/margins": 1.468174934387207, + "rewards/rejected": -2.7495250701904297, + "step": 953 + }, + { + "epoch": 1.53, + "learning_rate": 4.6105826397146254e-07, + "logits/chosen": -1.3937907218933105, + "logits/rejected": -1.3948326110839844, + "logps/chosen": -98.12734985351562, + "logps/rejected": -103.2861099243164, + "loss": 0.4333, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4036295413970947, + "rewards/margins": -0.6857084631919861, + "rewards/rejected": -0.7179210782051086, + "step": 954 + }, + { + "epoch": 1.53, + "learning_rate": 4.6095917558462144e-07, + "logits/chosen": -1.6271742582321167, + "logits/rejected": -1.4446191787719727, + "logps/chosen": -110.58407592773438, + "logps/rejected": -113.29644012451172, + "loss": 0.3598, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1455158293247223, + "rewards/margins": 1.3364981412887573, + "rewards/rejected": -1.4820139408111572, + "step": 955 + }, + { + "epoch": 1.53, + "learning_rate": 4.608600871977804e-07, + "logits/chosen": -1.4119004011154175, + "logits/rejected": -1.4209461212158203, + "logps/chosen": -90.01239776611328, + "logps/rejected": -113.68350219726562, + "loss": 0.4242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09090936928987503, + "rewards/margins": 2.229572057723999, + "rewards/rejected": -2.320481300354004, + "step": 956 + }, + { + "epoch": 1.54, + "learning_rate": 4.607609988109393e-07, + "logits/chosen": -1.4334943294525146, + "logits/rejected": -1.4541555643081665, + "logps/chosen": -84.09185791015625, + "logps/rejected": -125.57305908203125, + "loss": 0.3691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10397548973560333, + "rewards/margins": 4.778892517089844, + "rewards/rejected": -4.674917221069336, + "step": 957 + }, + { + "epoch": 1.54, + "learning_rate": 4.606619104240983e-07, + "logits/chosen": -1.390702247619629, + "logits/rejected": -1.426508903503418, + "logps/chosen": -97.09410095214844, + "logps/rejected": -145.97866821289062, + "loss": 0.3638, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.844190776348114, + "rewards/margins": 4.024328231811523, + "rewards/rejected": -4.868518829345703, + "step": 958 + }, + { + "epoch": 1.54, + "learning_rate": 4.6056282203725723e-07, + "logits/chosen": -1.6196069717407227, + "logits/rejected": -1.6169894933700562, + "logps/chosen": -94.96128845214844, + "logps/rejected": -112.91299438476562, + "loss": 0.4615, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8193411827087402, + "rewards/margins": 1.3217518329620361, + "rewards/rejected": -2.1410927772521973, + "step": 959 + }, + { + "epoch": 1.54, + "learning_rate": 4.6046373365041614e-07, + "logits/chosen": -1.168076753616333, + "logits/rejected": -1.2507717609405518, + "logps/chosen": -102.6574935913086, + "logps/rejected": -117.6396484375, + "loss": 0.2856, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.5214269757270813, + "rewards/margins": 0.38820287585258484, + "rewards/rejected": -0.9096298217773438, + "step": 960 + }, + { + "epoch": 1.54, + "learning_rate": 4.603646452635751e-07, + "logits/chosen": -1.4920493364334106, + "logits/rejected": -1.4531677961349487, + "logps/chosen": -99.96607971191406, + "logps/rejected": -108.853515625, + "loss": 0.4082, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6153227090835571, + "rewards/margins": 1.878732442855835, + "rewards/rejected": -2.4940550327301025, + "step": 961 + }, + { + "epoch": 1.54, + "learning_rate": 4.60265556876734e-07, + "logits/chosen": -1.429014801979065, + "logits/rejected": -1.380807876586914, + "logps/chosen": -89.24000549316406, + "logps/rejected": -89.98797607421875, + "loss": 0.4782, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.562167763710022, + "rewards/margins": 0.8800352215766907, + "rewards/rejected": -1.4422030448913574, + "step": 962 + }, + { + "epoch": 1.55, + "learning_rate": 4.60166468489893e-07, + "logits/chosen": -1.4058364629745483, + "logits/rejected": -1.370947003364563, + "logps/chosen": -59.75323486328125, + "logps/rejected": -95.22298431396484, + "loss": 0.3141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6056379675865173, + "rewards/margins": 2.2762327194213867, + "rewards/rejected": -2.8818705081939697, + "step": 963 + }, + { + "epoch": 1.55, + "learning_rate": 4.600673801030519e-07, + "logits/chosen": -1.3904266357421875, + "logits/rejected": -1.3679680824279785, + "logps/chosen": -106.01644897460938, + "logps/rejected": -102.35601043701172, + "loss": 0.4476, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.6941536664962769, + "rewards/margins": 1.4509857892990112, + "rewards/rejected": -2.145139217376709, + "step": 964 + }, + { + "epoch": 1.55, + "learning_rate": 4.5996829171621083e-07, + "logits/chosen": -1.452620506286621, + "logits/rejected": -1.4555031061172485, + "logps/chosen": -88.37944793701172, + "logps/rejected": -132.0923309326172, + "loss": 0.3684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22034265100955963, + "rewards/margins": 4.042835235595703, + "rewards/rejected": -4.263177871704102, + "step": 965 + }, + { + "epoch": 1.55, + "learning_rate": 4.598692033293698e-07, + "logits/chosen": -1.443289041519165, + "logits/rejected": -1.413745641708374, + "logps/chosen": -107.01828002929688, + "logps/rejected": -123.83030700683594, + "loss": 0.2466, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19925576448440552, + "rewards/margins": 0.5510685443878174, + "rewards/rejected": -0.7503242492675781, + "step": 966 + }, + { + "epoch": 1.55, + "learning_rate": 4.597701149425287e-07, + "logits/chosen": -1.2009878158569336, + "logits/rejected": -1.2387077808380127, + "logps/chosen": -98.01970672607422, + "logps/rejected": -121.35942077636719, + "loss": 0.3574, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3604060411453247, + "rewards/margins": 1.6319060325622559, + "rewards/rejected": -2.99231219291687, + "step": 967 + }, + { + "epoch": 1.55, + "learning_rate": 4.5967102655568766e-07, + "logits/chosen": -1.4834645986557007, + "logits/rejected": -1.4672602415084839, + "logps/chosen": -68.78179931640625, + "logps/rejected": -86.08404541015625, + "loss": 0.2592, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5369505286216736, + "rewards/margins": 2.483149528503418, + "rewards/rejected": -3.0200998783111572, + "step": 968 + }, + { + "epoch": 1.56, + "learning_rate": 4.595719381688466e-07, + "logits/chosen": -1.356366753578186, + "logits/rejected": -1.3418768644332886, + "logps/chosen": -88.74575805664062, + "logps/rejected": -148.3057403564453, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10303860902786255, + "rewards/margins": 4.134504318237305, + "rewards/rejected": -4.031466007232666, + "step": 969 + }, + { + "epoch": 1.56, + "learning_rate": 4.594728497820055e-07, + "logits/chosen": -1.3647425174713135, + "logits/rejected": -1.386062741279602, + "logps/chosen": -91.63006591796875, + "logps/rejected": -106.4744873046875, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6428052186965942, + "rewards/margins": 1.6235640048980713, + "rewards/rejected": -2.266369342803955, + "step": 970 + }, + { + "epoch": 1.56, + "learning_rate": 4.593737613951645e-07, + "logits/chosen": -1.3042144775390625, + "logits/rejected": -1.3781462907791138, + "logps/chosen": -84.85135650634766, + "logps/rejected": -115.61234283447266, + "loss": 0.3928, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24445706605911255, + "rewards/margins": 3.415691614151001, + "rewards/rejected": -3.171234607696533, + "step": 971 + }, + { + "epoch": 1.56, + "learning_rate": 4.592746730083234e-07, + "logits/chosen": -1.2134361267089844, + "logits/rejected": -1.3466843366622925, + "logps/chosen": -78.45404052734375, + "logps/rejected": -119.23548889160156, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0621427483856678, + "rewards/margins": 2.0400919914245605, + "rewards/rejected": -2.1022346019744873, + "step": 972 + }, + { + "epoch": 1.56, + "learning_rate": 4.591755846214823e-07, + "logits/chosen": -1.4419981241226196, + "logits/rejected": -1.4349675178527832, + "logps/chosen": -90.95451354980469, + "logps/rejected": -131.35958862304688, + "loss": 0.3428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09131927043199539, + "rewards/margins": 2.6647560596466064, + "rewards/rejected": -2.756075620651245, + "step": 973 + }, + { + "epoch": 1.56, + "learning_rate": 4.590764962346413e-07, + "logits/chosen": -1.2763333320617676, + "logits/rejected": -1.3109709024429321, + "logps/chosen": -108.03046417236328, + "logps/rejected": -119.31826782226562, + "loss": 0.2601, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5908733606338501, + "rewards/margins": 0.7891178131103516, + "rewards/rejected": -1.379991054534912, + "step": 974 + }, + { + "epoch": 1.57, + "learning_rate": 4.589774078478002e-07, + "logits/chosen": -1.5588736534118652, + "logits/rejected": -1.598820686340332, + "logps/chosen": -88.26329040527344, + "logps/rejected": -136.84153747558594, + "loss": 0.4334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18850375711917877, + "rewards/margins": 3.8607687950134277, + "rewards/rejected": -4.049272537231445, + "step": 975 + }, + { + "epoch": 1.57, + "learning_rate": 4.588783194609592e-07, + "logits/chosen": -1.415099024772644, + "logits/rejected": -1.5621082782745361, + "logps/chosen": -83.30136108398438, + "logps/rejected": -128.627197265625, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7063982486724854, + "rewards/margins": 3.1114447116851807, + "rewards/rejected": -3.817842960357666, + "step": 976 + }, + { + "epoch": 1.57, + "learning_rate": 4.587792310741181e-07, + "logits/chosen": -1.4861282110214233, + "logits/rejected": -1.4439729452133179, + "logps/chosen": -99.50328063964844, + "logps/rejected": -111.69810485839844, + "loss": 0.4274, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8899587988853455, + "rewards/margins": 1.1180057525634766, + "rewards/rejected": -2.0079643726348877, + "step": 977 + }, + { + "epoch": 1.57, + "learning_rate": 4.58680142687277e-07, + "logits/chosen": -1.4032317399978638, + "logits/rejected": -1.4168193340301514, + "logps/chosen": -79.800048828125, + "logps/rejected": -120.53334045410156, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2418707609176636, + "rewards/margins": 2.364065408706665, + "rewards/rejected": -3.605936050415039, + "step": 978 + }, + { + "epoch": 1.57, + "learning_rate": 4.58581054300436e-07, + "logits/chosen": -1.3687342405319214, + "logits/rejected": -1.4235602617263794, + "logps/chosen": -96.25276184082031, + "logps/rejected": -122.10169982910156, + "loss": 0.3405, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.47098851203918457, + "rewards/margins": 3.1929450035095215, + "rewards/rejected": -3.663933277130127, + "step": 979 + }, + { + "epoch": 1.57, + "learning_rate": 4.584819659135949e-07, + "logits/chosen": -1.5551085472106934, + "logits/rejected": -1.5704835653305054, + "logps/chosen": -90.42012023925781, + "logps/rejected": -130.96365356445312, + "loss": 0.3104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8971171379089355, + "rewards/margins": 3.2032976150512695, + "rewards/rejected": -4.100414752960205, + "step": 980 + }, + { + "epoch": 1.57, + "learning_rate": 4.5838287752675387e-07, + "logits/chosen": -1.258089542388916, + "logits/rejected": -1.3572567701339722, + "logps/chosen": -100.72764587402344, + "logps/rejected": -139.6934814453125, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1437933444976807, + "rewards/margins": 3.6742823123931885, + "rewards/rejected": -4.818075656890869, + "step": 981 + }, + { + "epoch": 1.58, + "learning_rate": 4.582837891399128e-07, + "logits/chosen": -1.4393433332443237, + "logits/rejected": -1.5022224187850952, + "logps/chosen": -100.34344482421875, + "logps/rejected": -93.6104507446289, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5749017596244812, + "rewards/margins": 1.9625037908554077, + "rewards/rejected": -2.537405490875244, + "step": 982 + }, + { + "epoch": 1.58, + "learning_rate": 4.581847007530717e-07, + "logits/chosen": -1.4920133352279663, + "logits/rejected": -1.4392980337142944, + "logps/chosen": -98.83134460449219, + "logps/rejected": -119.5103759765625, + "loss": 0.1781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44587594270706177, + "rewards/margins": 1.9595189094543457, + "rewards/rejected": -1.5136430263519287, + "step": 983 + }, + { + "epoch": 1.58, + "learning_rate": 4.5808561236623064e-07, + "logits/chosen": -1.3878389596939087, + "logits/rejected": -1.329796552658081, + "logps/chosen": -112.741455078125, + "logps/rejected": -135.997314453125, + "loss": 0.3187, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3701050281524658, + "rewards/margins": 4.739919662475586, + "rewards/rejected": -5.110024452209473, + "step": 984 + }, + { + "epoch": 1.58, + "learning_rate": 4.579865239793896e-07, + "logits/chosen": -1.4800124168395996, + "logits/rejected": -1.4861066341400146, + "logps/chosen": -86.2996826171875, + "logps/rejected": -126.10321044921875, + "loss": 0.1605, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.127766415476799, + "rewards/margins": 3.2356300354003906, + "rewards/rejected": -3.363396644592285, + "step": 985 + }, + { + "epoch": 1.58, + "learning_rate": 4.5788743559254856e-07, + "logits/chosen": -1.4616198539733887, + "logits/rejected": -1.4643447399139404, + "logps/chosen": -74.71321868896484, + "logps/rejected": -131.0271453857422, + "loss": 0.2104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04787635803222656, + "rewards/margins": 5.066827774047852, + "rewards/rejected": -5.114704132080078, + "step": 986 + }, + { + "epoch": 1.58, + "learning_rate": 4.5778834720570747e-07, + "logits/chosen": -1.60130774974823, + "logits/rejected": -1.6248971223831177, + "logps/chosen": -80.91893005371094, + "logps/rejected": -140.90005493164062, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.610609233379364, + "rewards/margins": 4.053956031799316, + "rewards/rejected": -3.4433469772338867, + "step": 987 + }, + { + "epoch": 1.59, + "learning_rate": 4.576892588188664e-07, + "logits/chosen": -1.4479496479034424, + "logits/rejected": -1.462864637374878, + "logps/chosen": -85.21322631835938, + "logps/rejected": -136.6701202392578, + "loss": 0.2859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45512983202934265, + "rewards/margins": 3.6391189098358154, + "rewards/rejected": -3.1839890480041504, + "step": 988 + }, + { + "epoch": 1.59, + "learning_rate": 4.5759017043202533e-07, + "logits/chosen": -1.487410068511963, + "logits/rejected": -1.4980055093765259, + "logps/chosen": -73.01914978027344, + "logps/rejected": -82.78298950195312, + "loss": 0.3762, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2930563986301422, + "rewards/margins": 1.0506118535995483, + "rewards/rejected": -1.3436682224273682, + "step": 989 + }, + { + "epoch": 1.59, + "learning_rate": 4.574910820451843e-07, + "logits/chosen": -1.4658799171447754, + "logits/rejected": -1.4789515733718872, + "logps/chosen": -88.97076416015625, + "logps/rejected": -114.97970581054688, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2526715397834778, + "rewards/margins": 2.98732852935791, + "rewards/rejected": -2.7346572875976562, + "step": 990 + }, + { + "epoch": 1.59, + "learning_rate": 4.5739199365834325e-07, + "logits/chosen": -1.4229050874710083, + "logits/rejected": -1.4959819316864014, + "logps/chosen": -75.70956420898438, + "logps/rejected": -132.67205810546875, + "loss": 0.2682, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35063445568084717, + "rewards/margins": 5.046791076660156, + "rewards/rejected": -5.397425174713135, + "step": 991 + }, + { + "epoch": 1.59, + "learning_rate": 4.5729290527150216e-07, + "logits/chosen": -1.4041552543640137, + "logits/rejected": -1.3783067464828491, + "logps/chosen": -79.50238800048828, + "logps/rejected": -138.04954528808594, + "loss": 0.2417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7246279120445251, + "rewards/margins": 4.209466934204102, + "rewards/rejected": -4.9340949058532715, + "step": 992 + }, + { + "epoch": 1.59, + "learning_rate": 4.5719381688466107e-07, + "logits/chosen": -1.435302495956421, + "logits/rejected": -1.4003387689590454, + "logps/chosen": -74.85476684570312, + "logps/rejected": -106.66015625, + "loss": 0.4571, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.062444210052490234, + "rewards/margins": 2.8713998794555664, + "rewards/rejected": -2.808955669403076, + "step": 993 + }, + { + "epoch": 1.6, + "learning_rate": 4.5709472849782003e-07, + "logits/chosen": -1.3543423414230347, + "logits/rejected": -1.394026517868042, + "logps/chosen": -82.84480285644531, + "logps/rejected": -95.69244384765625, + "loss": 0.2889, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7850297093391418, + "rewards/margins": 1.327632188796997, + "rewards/rejected": -2.112661838531494, + "step": 994 + }, + { + "epoch": 1.6, + "learning_rate": 4.56995640110979e-07, + "logits/chosen": -1.4613194465637207, + "logits/rejected": -1.39666748046875, + "logps/chosen": -76.73270416259766, + "logps/rejected": -140.2431640625, + "loss": 0.428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8633755445480347, + "rewards/margins": 5.6245927810668945, + "rewards/rejected": -4.76121711730957, + "step": 995 + }, + { + "epoch": 1.6, + "learning_rate": 4.5689655172413795e-07, + "logits/chosen": -1.471524715423584, + "logits/rejected": -1.4095906019210815, + "logps/chosen": -100.58306121826172, + "logps/rejected": -141.66012573242188, + "loss": 0.389, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08394812047481537, + "rewards/margins": 2.3085222244262695, + "rewards/rejected": -2.392470598220825, + "step": 996 + }, + { + "epoch": 1.6, + "learning_rate": 4.5679746333729685e-07, + "logits/chosen": -1.5060317516326904, + "logits/rejected": -1.4719595909118652, + "logps/chosen": -84.05213928222656, + "logps/rejected": -137.65603637695312, + "loss": 0.2451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.735716700553894, + "rewards/margins": 3.574162721633911, + "rewards/rejected": -2.8384461402893066, + "step": 997 + }, + { + "epoch": 1.6, + "learning_rate": 4.5669837495045576e-07, + "logits/chosen": -1.42974853515625, + "logits/rejected": -1.3278484344482422, + "logps/chosen": -87.77025604248047, + "logps/rejected": -115.01262664794922, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41628727316856384, + "rewards/margins": 3.2024080753326416, + "rewards/rejected": -3.6186954975128174, + "step": 998 + }, + { + "epoch": 1.6, + "learning_rate": 4.565992865636147e-07, + "logits/chosen": -1.3749524354934692, + "logits/rejected": -1.3586478233337402, + "logps/chosen": -76.98604583740234, + "logps/rejected": -120.24432373046875, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.298204243183136, + "rewards/margins": 2.4421188831329346, + "rewards/rejected": -2.1439146995544434, + "step": 999 + }, + { + "epoch": 1.61, + "learning_rate": 4.565001981767737e-07, + "logits/chosen": -1.36294686794281, + "logits/rejected": -1.303245186805725, + "logps/chosen": -72.94529724121094, + "logps/rejected": -125.28617858886719, + "loss": 0.2647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5353199243545532, + "rewards/margins": 4.001642227172852, + "rewards/rejected": -3.466322422027588, + "step": 1000 + }, + { + "epoch": 1.61, + "learning_rate": 4.5640110978993264e-07, + "logits/chosen": -1.532013177871704, + "logits/rejected": -1.4546864032745361, + "logps/chosen": -91.13987731933594, + "logps/rejected": -114.92549896240234, + "loss": 0.3572, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7886568903923035, + "rewards/margins": 1.5922943353652954, + "rewards/rejected": -2.380951404571533, + "step": 1001 + }, + { + "epoch": 1.61, + "learning_rate": 4.5630202140309155e-07, + "logits/chosen": -1.2984434366226196, + "logits/rejected": -1.2635446786880493, + "logps/chosen": -101.37415313720703, + "logps/rejected": -112.83323669433594, + "loss": 0.326, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.4112361967563629, + "rewards/margins": 1.6298748254776, + "rewards/rejected": -2.0411109924316406, + "step": 1002 + }, + { + "epoch": 1.61, + "learning_rate": 4.5620293301625045e-07, + "logits/chosen": -1.4084856510162354, + "logits/rejected": -1.409132957458496, + "logps/chosen": -72.82208251953125, + "logps/rejected": -97.28045654296875, + "loss": 0.3277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09333716332912445, + "rewards/margins": 1.447910189628601, + "rewards/rejected": -1.3545730113983154, + "step": 1003 + }, + { + "epoch": 1.61, + "learning_rate": 4.561038446294094e-07, + "logits/chosen": -1.310517430305481, + "logits/rejected": -1.2920944690704346, + "logps/chosen": -108.4488754272461, + "logps/rejected": -112.8048095703125, + "loss": 0.3072, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3556026816368103, + "rewards/margins": 0.9588367938995361, + "rewards/rejected": -1.3144394159317017, + "step": 1004 + }, + { + "epoch": 1.61, + "learning_rate": 4.560047562425683e-07, + "logits/chosen": -1.4287546873092651, + "logits/rejected": -1.437037706375122, + "logps/chosen": -84.99720764160156, + "logps/rejected": -121.86127471923828, + "loss": 0.2149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1464085578918457, + "rewards/margins": 1.8218069076538086, + "rewards/rejected": -1.675398349761963, + "step": 1005 + }, + { + "epoch": 1.61, + "learning_rate": 4.559056678557273e-07, + "logits/chosen": -1.443516492843628, + "logits/rejected": -1.4030308723449707, + "logps/chosen": -88.12187194824219, + "logps/rejected": -108.17573547363281, + "loss": 0.3085, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.667259693145752, + "rewards/margins": 3.160517930984497, + "rewards/rejected": -3.82777738571167, + "step": 1006 + }, + { + "epoch": 1.62, + "learning_rate": 4.5580657946888624e-07, + "logits/chosen": -1.393242597579956, + "logits/rejected": -1.3325488567352295, + "logps/chosen": -110.41175842285156, + "logps/rejected": -116.48114013671875, + "loss": 0.442, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.030962742865085602, + "rewards/margins": 2.1447501182556152, + "rewards/rejected": -2.175712823867798, + "step": 1007 + }, + { + "epoch": 1.62, + "learning_rate": 4.5570749108204515e-07, + "logits/chosen": -1.478918194770813, + "logits/rejected": -1.3876875638961792, + "logps/chosen": -119.7712173461914, + "logps/rejected": -113.63029479980469, + "loss": 0.3797, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0962377786636353, + "rewards/margins": 2.2505481243133545, + "rewards/rejected": -3.3467857837677, + "step": 1008 + }, + { + "epoch": 1.62, + "learning_rate": 4.556084026952041e-07, + "logits/chosen": -1.4165499210357666, + "logits/rejected": -1.406442642211914, + "logps/chosen": -84.16569519042969, + "logps/rejected": -105.05816650390625, + "loss": 0.2229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06501388549804688, + "rewards/margins": 1.4303061962127686, + "rewards/rejected": -1.495320200920105, + "step": 1009 + }, + { + "epoch": 1.62, + "learning_rate": 4.55509314308363e-07, + "logits/chosen": -1.2490460872650146, + "logits/rejected": -1.2305936813354492, + "logps/chosen": -90.73574829101562, + "logps/rejected": -82.40900421142578, + "loss": 0.3891, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1964359283447266, + "rewards/margins": 0.18889552354812622, + "rewards/rejected": -2.385331630706787, + "step": 1010 + }, + { + "epoch": 1.62, + "learning_rate": 4.5541022592152197e-07, + "logits/chosen": -1.3748347759246826, + "logits/rejected": -1.3894673585891724, + "logps/chosen": -72.97128295898438, + "logps/rejected": -137.94052124023438, + "loss": 0.1897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012136735022068024, + "rewards/margins": 4.159322738647461, + "rewards/rejected": -4.171459674835205, + "step": 1011 + }, + { + "epoch": 1.62, + "learning_rate": 4.5531113753468093e-07, + "logits/chosen": -1.637721300125122, + "logits/rejected": -1.6014314889907837, + "logps/chosen": -97.02202606201172, + "logps/rejected": -110.29731750488281, + "loss": 0.3532, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.16981297731399536, + "rewards/margins": 3.061370372772217, + "rewards/rejected": -2.8915576934814453, + "step": 1012 + }, + { + "epoch": 1.63, + "learning_rate": 4.5521204914783984e-07, + "logits/chosen": -1.5178476572036743, + "logits/rejected": -1.588550090789795, + "logps/chosen": -63.49580764770508, + "logps/rejected": -119.39292907714844, + "loss": 0.3599, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2124195247888565, + "rewards/margins": 2.638204574584961, + "rewards/rejected": -2.4257850646972656, + "step": 1013 + }, + { + "epoch": 1.63, + "learning_rate": 4.551129607609988e-07, + "logits/chosen": -1.4542367458343506, + "logits/rejected": -1.4799245595932007, + "logps/chosen": -91.78561401367188, + "logps/rejected": -125.91111755371094, + "loss": 0.291, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.038205333054065704, + "rewards/margins": 2.898711919784546, + "rewards/rejected": -2.936917304992676, + "step": 1014 + }, + { + "epoch": 1.63, + "learning_rate": 4.550138723741577e-07, + "logits/chosen": -1.4961981773376465, + "logits/rejected": -1.5079925060272217, + "logps/chosen": -79.53777313232422, + "logps/rejected": -113.33588409423828, + "loss": 0.2163, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9412255883216858, + "rewards/margins": 2.387908935546875, + "rewards/rejected": -3.329134464263916, + "step": 1015 + }, + { + "epoch": 1.63, + "learning_rate": 4.5491478398731667e-07, + "logits/chosen": -1.445568323135376, + "logits/rejected": -1.3724085092544556, + "logps/chosen": -87.11100769042969, + "logps/rejected": -107.66336059570312, + "loss": 0.4016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32490110397338867, + "rewards/margins": 2.8108139038085938, + "rewards/rejected": -3.1357150077819824, + "step": 1016 + }, + { + "epoch": 1.63, + "learning_rate": 4.548156956004756e-07, + "logits/chosen": -1.4367178678512573, + "logits/rejected": -1.3677453994750977, + "logps/chosen": -88.8550796508789, + "logps/rejected": -116.97498321533203, + "loss": 0.2565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7192248702049255, + "rewards/margins": 5.035187244415283, + "rewards/rejected": -5.754411697387695, + "step": 1017 + }, + { + "epoch": 1.63, + "learning_rate": 4.5471660721363453e-07, + "logits/chosen": -1.4269119501113892, + "logits/rejected": -1.4527978897094727, + "logps/chosen": -86.99848937988281, + "logps/rejected": -138.4620361328125, + "loss": 0.4902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1984400749206543, + "rewards/margins": 4.010796546936035, + "rewards/rejected": -4.209236145019531, + "step": 1018 + }, + { + "epoch": 1.64, + "learning_rate": 4.546175188267935e-07, + "logits/chosen": -1.3719547986984253, + "logits/rejected": -1.4726009368896484, + "logps/chosen": -97.5919189453125, + "logps/rejected": -150.20089721679688, + "loss": 0.2409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3679748475551605, + "rewards/margins": 2.9058403968811035, + "rewards/rejected": -3.273815393447876, + "step": 1019 + }, + { + "epoch": 1.64, + "learning_rate": 4.545184304399524e-07, + "logits/chosen": -1.5812867879867554, + "logits/rejected": -1.559905767440796, + "logps/chosen": -89.5531005859375, + "logps/rejected": -95.82762145996094, + "loss": 0.31, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4610322117805481, + "rewards/margins": 2.7195839881896973, + "rewards/rejected": -3.1806159019470215, + "step": 1020 + }, + { + "epoch": 1.64, + "learning_rate": 4.544193420531113e-07, + "logits/chosen": -1.208458662033081, + "logits/rejected": -1.2733262777328491, + "logps/chosen": -69.1107177734375, + "logps/rejected": -101.6998291015625, + "loss": 0.415, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005894392728805542, + "rewards/margins": 2.5334744453430176, + "rewards/rejected": -2.5393688678741455, + "step": 1021 + }, + { + "epoch": 1.64, + "learning_rate": 4.543202536662703e-07, + "logits/chosen": -1.2905462980270386, + "logits/rejected": -1.3901782035827637, + "logps/chosen": -69.9092025756836, + "logps/rejected": -118.09671020507812, + "loss": 0.2603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08307639509439468, + "rewards/margins": 3.8405025005340576, + "rewards/rejected": -3.7574262619018555, + "step": 1022 + }, + { + "epoch": 1.64, + "learning_rate": 4.542211652794292e-07, + "logits/chosen": -1.5063741207122803, + "logits/rejected": -1.5178205966949463, + "logps/chosen": -89.95638275146484, + "logps/rejected": -138.3484344482422, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10425510257482529, + "rewards/margins": 4.110670566558838, + "rewards/rejected": -4.214925765991211, + "step": 1023 + }, + { + "epoch": 1.64, + "learning_rate": 4.541220768925882e-07, + "logits/chosen": -1.5085582733154297, + "logits/rejected": -1.5812225341796875, + "logps/chosen": -84.1273193359375, + "logps/rejected": -91.5704574584961, + "loss": 0.2381, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8850663900375366, + "rewards/margins": 1.9850561618804932, + "rewards/rejected": -2.8701224327087402, + "step": 1024 + }, + { + "epoch": 1.65, + "learning_rate": 4.540229885057471e-07, + "logits/chosen": -1.422958254814148, + "logits/rejected": -1.4511523246765137, + "logps/chosen": -96.16607666015625, + "logps/rejected": -127.87907409667969, + "loss": 0.3469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7912813425064087, + "rewards/margins": 2.0947113037109375, + "rewards/rejected": -2.8859927654266357, + "step": 1025 + }, + { + "epoch": 1.65, + "learning_rate": 4.53923900118906e-07, + "logits/chosen": -1.431478500366211, + "logits/rejected": -1.3970046043395996, + "logps/chosen": -98.1852798461914, + "logps/rejected": -118.80416870117188, + "loss": 0.5227, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6585584878921509, + "rewards/margins": 0.5556396245956421, + "rewards/rejected": -1.214198112487793, + "step": 1026 + }, + { + "epoch": 1.65, + "learning_rate": 4.53824811732065e-07, + "logits/chosen": -1.4372775554656982, + "logits/rejected": -1.54835045337677, + "logps/chosen": -106.04728698730469, + "logps/rejected": -134.54307556152344, + "loss": 0.4286, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.72065269947052, + "rewards/margins": 1.7566152811050415, + "rewards/rejected": -2.4772679805755615, + "step": 1027 + }, + { + "epoch": 1.65, + "learning_rate": 4.537257233452239e-07, + "logits/chosen": -1.4035193920135498, + "logits/rejected": -1.4397928714752197, + "logps/chosen": -88.20167541503906, + "logps/rejected": -132.9433135986328, + "loss": 0.1244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.796866774559021, + "rewards/margins": 3.4757883548736572, + "rewards/rejected": -4.272655487060547, + "step": 1028 + }, + { + "epoch": 1.65, + "learning_rate": 4.536266349583829e-07, + "logits/chosen": -1.269483208656311, + "logits/rejected": -1.1745611429214478, + "logps/chosen": -79.4634780883789, + "logps/rejected": -109.49413299560547, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6240548491477966, + "rewards/margins": 1.3824043273925781, + "rewards/rejected": -2.0064592361450195, + "step": 1029 + }, + { + "epoch": 1.65, + "learning_rate": 4.535275465715418e-07, + "logits/chosen": -1.4180091619491577, + "logits/rejected": -1.4124488830566406, + "logps/chosen": -88.13539123535156, + "logps/rejected": -142.025390625, + "loss": 0.1989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1091129332780838, + "rewards/margins": 4.110642910003662, + "rewards/rejected": -4.21975564956665, + "step": 1030 + }, + { + "epoch": 1.65, + "learning_rate": 4.534284581847007e-07, + "logits/chosen": -1.4264159202575684, + "logits/rejected": -1.3508621454238892, + "logps/chosen": -99.21466827392578, + "logps/rejected": -127.93582153320312, + "loss": 0.2642, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3012750744819641, + "rewards/margins": 4.089751243591309, + "rewards/rejected": -4.391026496887207, + "step": 1031 + }, + { + "epoch": 1.66, + "learning_rate": 4.533293697978597e-07, + "logits/chosen": -1.4626266956329346, + "logits/rejected": -1.4811855554580688, + "logps/chosen": -80.46440887451172, + "logps/rejected": -124.21766662597656, + "loss": 0.4178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02819681167602539, + "rewards/margins": 4.2998456954956055, + "rewards/rejected": -4.27164888381958, + "step": 1032 + }, + { + "epoch": 1.66, + "learning_rate": 4.532302814110186e-07, + "logits/chosen": -1.3586946725845337, + "logits/rejected": -1.3178521394729614, + "logps/chosen": -84.30183410644531, + "logps/rejected": -113.43319702148438, + "loss": 0.4027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.047150999307632446, + "rewards/margins": 3.6903724670410156, + "rewards/rejected": -3.7375235557556152, + "step": 1033 + }, + { + "epoch": 1.66, + "learning_rate": 4.5313119302417757e-07, + "logits/chosen": -1.518182635307312, + "logits/rejected": -1.574660301208496, + "logps/chosen": -116.23222351074219, + "logps/rejected": -120.66960144042969, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8400304317474365, + "rewards/margins": 2.0284104347229004, + "rewards/rejected": -2.868440866470337, + "step": 1034 + }, + { + "epoch": 1.66, + "learning_rate": 4.530321046373365e-07, + "logits/chosen": -1.4003864526748657, + "logits/rejected": -1.5168800354003906, + "logps/chosen": -89.8826904296875, + "logps/rejected": -133.73251342773438, + "loss": 0.3533, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8977352976799011, + "rewards/margins": 3.55932879447937, + "rewards/rejected": -4.457064151763916, + "step": 1035 + }, + { + "epoch": 1.66, + "learning_rate": 4.529330162504954e-07, + "logits/chosen": -1.442535400390625, + "logits/rejected": -1.3254705667495728, + "logps/chosen": -72.97885131835938, + "logps/rejected": -102.29803466796875, + "loss": 0.2506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22098512947559357, + "rewards/margins": 2.9353365898132324, + "rewards/rejected": -3.1563215255737305, + "step": 1036 + }, + { + "epoch": 1.66, + "learning_rate": 4.528339278636544e-07, + "logits/chosen": -1.3702672719955444, + "logits/rejected": -1.4936965703964233, + "logps/chosen": -62.087730407714844, + "logps/rejected": -154.0878448486328, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31587573885917664, + "rewards/margins": 7.245948791503906, + "rewards/rejected": -6.930073261260986, + "step": 1037 + }, + { + "epoch": 1.67, + "learning_rate": 4.527348394768133e-07, + "logits/chosen": -1.4480103254318237, + "logits/rejected": -1.411965250968933, + "logps/chosen": -82.81105041503906, + "logps/rejected": -102.310791015625, + "loss": 0.3935, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1755250245332718, + "rewards/margins": 1.6865530014038086, + "rewards/rejected": -1.8620779514312744, + "step": 1038 + }, + { + "epoch": 1.67, + "learning_rate": 4.5263575108997226e-07, + "logits/chosen": -1.4558117389678955, + "logits/rejected": -1.495263934135437, + "logps/chosen": -97.67805480957031, + "logps/rejected": -106.39985656738281, + "loss": 0.1979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.482225626707077, + "rewards/margins": 4.91463565826416, + "rewards/rejected": -4.43241024017334, + "step": 1039 + }, + { + "epoch": 1.67, + "learning_rate": 4.5253666270313117e-07, + "logits/chosen": -1.5360808372497559, + "logits/rejected": -1.5772955417633057, + "logps/chosen": -86.48149108886719, + "logps/rejected": -139.98680114746094, + "loss": 0.3908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23683978617191315, + "rewards/margins": 3.8171653747558594, + "rewards/rejected": -4.0540056228637695, + "step": 1040 + }, + { + "epoch": 1.67, + "learning_rate": 4.524375743162901e-07, + "logits/chosen": -1.536886215209961, + "logits/rejected": -1.602403998374939, + "logps/chosen": -93.90997314453125, + "logps/rejected": -121.5748519897461, + "loss": 0.3027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40353500843048096, + "rewards/margins": 3.748650312423706, + "rewards/rejected": -3.3451151847839355, + "step": 1041 + }, + { + "epoch": 1.67, + "learning_rate": 4.523384859294491e-07, + "logits/chosen": -1.3762619495391846, + "logits/rejected": -1.3634415864944458, + "logps/chosen": -81.89701843261719, + "logps/rejected": -155.55929565429688, + "loss": 0.2559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5206536650657654, + "rewards/margins": 5.212940692901611, + "rewards/rejected": -5.7335944175720215, + "step": 1042 + }, + { + "epoch": 1.67, + "learning_rate": 4.52239397542608e-07, + "logits/chosen": -1.452566146850586, + "logits/rejected": -1.4392071962356567, + "logps/chosen": -107.33621978759766, + "logps/rejected": -116.746826171875, + "loss": 0.2486, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.5756004452705383, + "rewards/margins": 1.6708297729492188, + "rewards/rejected": -2.2464301586151123, + "step": 1043 + }, + { + "epoch": 1.68, + "learning_rate": 4.521403091557669e-07, + "logits/chosen": -1.5514459609985352, + "logits/rejected": -1.47868013381958, + "logps/chosen": -96.85918426513672, + "logps/rejected": -128.58297729492188, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42612600326538086, + "rewards/margins": 3.6576123237609863, + "rewards/rejected": -4.083738327026367, + "step": 1044 + }, + { + "epoch": 1.68, + "learning_rate": 4.5204122076892586e-07, + "logits/chosen": -1.350089430809021, + "logits/rejected": -1.439801812171936, + "logps/chosen": -84.6789321899414, + "logps/rejected": -134.66452026367188, + "loss": 0.3003, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2945765256881714, + "rewards/margins": 3.2298054695129395, + "rewards/rejected": -4.524381637573242, + "step": 1045 + }, + { + "epoch": 1.68, + "learning_rate": 4.5194213238208477e-07, + "logits/chosen": -1.4133107662200928, + "logits/rejected": -1.3913521766662598, + "logps/chosen": -78.6299819946289, + "logps/rejected": -134.9194793701172, + "loss": 0.2404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20943795144557953, + "rewards/margins": 4.958282470703125, + "rewards/rejected": -5.167720317840576, + "step": 1046 + }, + { + "epoch": 1.68, + "learning_rate": 4.5184304399524373e-07, + "logits/chosen": -1.3453984260559082, + "logits/rejected": -1.3584482669830322, + "logps/chosen": -97.62211608886719, + "logps/rejected": -104.47257995605469, + "loss": 0.251, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.30606746673584, + "rewards/margins": 0.358056902885437, + "rewards/rejected": -2.6641244888305664, + "step": 1047 + }, + { + "epoch": 1.68, + "learning_rate": 4.517439556084027e-07, + "logits/chosen": -1.6222773790359497, + "logits/rejected": -1.5341486930847168, + "logps/chosen": -75.28490447998047, + "logps/rejected": -107.370361328125, + "loss": 0.2633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17127564549446106, + "rewards/margins": 3.4766008853912354, + "rewards/rejected": -3.305325508117676, + "step": 1048 + }, + { + "epoch": 1.68, + "learning_rate": 4.516448672215616e-07, + "logits/chosen": -1.5295909643173218, + "logits/rejected": -1.514620304107666, + "logps/chosen": -93.69155883789062, + "logps/rejected": -108.19889068603516, + "loss": 0.2554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36300355195999146, + "rewards/margins": 2.3641197681427, + "rewards/rejected": -2.0011165142059326, + "step": 1049 + }, + { + "epoch": 1.69, + "learning_rate": 4.5154577883472056e-07, + "logits/chosen": -1.283711552619934, + "logits/rejected": -1.3033336400985718, + "logps/chosen": -117.27137756347656, + "logps/rejected": -150.84112548828125, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18366223573684692, + "rewards/margins": 3.317580223083496, + "rewards/rejected": -3.133917808532715, + "step": 1050 + }, + { + "epoch": 1.69, + "learning_rate": 4.5144669044787946e-07, + "logits/chosen": -1.3378827571868896, + "logits/rejected": -1.3847885131835938, + "logps/chosen": -116.90763854980469, + "logps/rejected": -114.95821380615234, + "loss": 0.284, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.2768419981002808, + "rewards/margins": 1.3386785984039307, + "rewards/rejected": -2.615520477294922, + "step": 1051 + }, + { + "epoch": 1.69, + "learning_rate": 4.513476020610384e-07, + "logits/chosen": -1.3551827669143677, + "logits/rejected": -1.3204442262649536, + "logps/chosen": -103.71885681152344, + "logps/rejected": -80.59847259521484, + "loss": 0.2533, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6585525274276733, + "rewards/margins": 0.158555269241333, + "rewards/rejected": -1.817107915878296, + "step": 1052 + }, + { + "epoch": 1.69, + "learning_rate": 4.512485136741974e-07, + "logits/chosen": -1.430235743522644, + "logits/rejected": -1.3302371501922607, + "logps/chosen": -100.59490966796875, + "logps/rejected": -123.57693481445312, + "loss": 0.2913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6153760552406311, + "rewards/margins": 1.5174223184585571, + "rewards/rejected": -2.132798433303833, + "step": 1053 + }, + { + "epoch": 1.69, + "learning_rate": 4.511494252873563e-07, + "logits/chosen": -1.262465238571167, + "logits/rejected": -1.2192037105560303, + "logps/chosen": -86.59632110595703, + "logps/rejected": -132.90249633789062, + "loss": 0.2199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5873657464981079, + "rewards/margins": 3.2408978939056396, + "rewards/rejected": -3.828263521194458, + "step": 1054 + }, + { + "epoch": 1.69, + "learning_rate": 4.5105033690051525e-07, + "logits/chosen": -1.5265709161758423, + "logits/rejected": -1.5036979913711548, + "logps/chosen": -88.03260040283203, + "logps/rejected": -95.89689636230469, + "loss": 0.18, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.35405415296554565, + "rewards/margins": 0.6474675536155701, + "rewards/rejected": -1.0015215873718262, + "step": 1055 + }, + { + "epoch": 1.7, + "learning_rate": 4.5095124851367416e-07, + "logits/chosen": -1.3549554347991943, + "logits/rejected": -1.325401782989502, + "logps/chosen": -92.337158203125, + "logps/rejected": -135.90496826171875, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36584606766700745, + "rewards/margins": 3.4960970878601074, + "rewards/rejected": -3.861943006515503, + "step": 1056 + }, + { + "epoch": 1.7, + "learning_rate": 4.508521601268331e-07, + "logits/chosen": -1.4413080215454102, + "logits/rejected": -1.4919180870056152, + "logps/chosen": -106.11381530761719, + "logps/rejected": -128.36026000976562, + "loss": 0.2685, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9212981462478638, + "rewards/margins": 2.254791736602783, + "rewards/rejected": -3.1760897636413574, + "step": 1057 + }, + { + "epoch": 1.7, + "learning_rate": 4.507530717399921e-07, + "logits/chosen": -1.6301758289337158, + "logits/rejected": -1.5613455772399902, + "logps/chosen": -95.39458465576172, + "logps/rejected": -81.304443359375, + "loss": 0.295, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.9442867636680603, + "rewards/margins": -0.02147933840751648, + "rewards/rejected": -0.9228074550628662, + "step": 1058 + }, + { + "epoch": 1.7, + "learning_rate": 4.50653983353151e-07, + "logits/chosen": -1.3225690126419067, + "logits/rejected": -1.3751764297485352, + "logps/chosen": -79.12251281738281, + "logps/rejected": -108.90084075927734, + "loss": 0.2922, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.512040376663208, + "rewards/margins": 2.5163044929504395, + "rewards/rejected": -3.0283448696136475, + "step": 1059 + }, + { + "epoch": 1.7, + "learning_rate": 4.5055489496630994e-07, + "logits/chosen": -1.4742083549499512, + "logits/rejected": -1.4695045948028564, + "logps/chosen": -74.79436492919922, + "logps/rejected": -96.78173828125, + "loss": 0.3838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04843759536743164, + "rewards/margins": 1.8436591625213623, + "rewards/rejected": -1.7952215671539307, + "step": 1060 + }, + { + "epoch": 1.7, + "learning_rate": 4.5045580657946885e-07, + "logits/chosen": -1.4232239723205566, + "logits/rejected": -1.4293829202651978, + "logps/chosen": -79.98274230957031, + "logps/rejected": -117.50627136230469, + "loss": 0.318, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6750813722610474, + "rewards/margins": 1.3818950653076172, + "rewards/rejected": -2.056976556777954, + "step": 1061 + }, + { + "epoch": 1.7, + "learning_rate": 4.503567181926278e-07, + "logits/chosen": -1.6783498525619507, + "logits/rejected": -1.6045315265655518, + "logps/chosen": -125.43904113769531, + "logps/rejected": -85.48737335205078, + "loss": 0.5179, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3787710666656494, + "rewards/margins": -0.5639164447784424, + "rewards/rejected": -0.814854621887207, + "step": 1062 + }, + { + "epoch": 1.71, + "learning_rate": 4.5025762980578677e-07, + "logits/chosen": -1.379183292388916, + "logits/rejected": -1.3570401668548584, + "logps/chosen": -100.41577911376953, + "logps/rejected": -113.66314697265625, + "loss": 0.4148, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2815616726875305, + "rewards/margins": 1.755793571472168, + "rewards/rejected": -2.0373551845550537, + "step": 1063 + }, + { + "epoch": 1.71, + "learning_rate": 4.501585414189457e-07, + "logits/chosen": -1.5282171964645386, + "logits/rejected": -1.4297924041748047, + "logps/chosen": -86.63506317138672, + "logps/rejected": -107.22927856445312, + "loss": 0.3617, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9416707754135132, + "rewards/margins": 2.338803291320801, + "rewards/rejected": -3.2804737091064453, + "step": 1064 + }, + { + "epoch": 1.71, + "learning_rate": 4.5005945303210464e-07, + "logits/chosen": -1.4039726257324219, + "logits/rejected": -1.3784432411193848, + "logps/chosen": -76.39154052734375, + "logps/rejected": -98.84832763671875, + "loss": 0.2535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8477097749710083, + "rewards/margins": 1.53617525100708, + "rewards/rejected": -2.383884906768799, + "step": 1065 + }, + { + "epoch": 1.71, + "learning_rate": 4.4996036464526354e-07, + "logits/chosen": -1.3545516729354858, + "logits/rejected": -1.3703863620758057, + "logps/chosen": -98.21018981933594, + "logps/rejected": -141.488525390625, + "loss": 0.303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16830329596996307, + "rewards/margins": 4.347773551940918, + "rewards/rejected": -4.516077041625977, + "step": 1066 + }, + { + "epoch": 1.71, + "learning_rate": 4.498612762584225e-07, + "logits/chosen": -1.5420668125152588, + "logits/rejected": -1.4028730392456055, + "logps/chosen": -104.7833251953125, + "logps/rejected": -111.3916244506836, + "loss": 0.2399, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2366269826889038, + "rewards/margins": 0.6806936264038086, + "rewards/rejected": -1.9173206090927124, + "step": 1067 + }, + { + "epoch": 1.71, + "learning_rate": 4.497621878715814e-07, + "logits/chosen": -1.4813660383224487, + "logits/rejected": -1.3661673069000244, + "logps/chosen": -105.32963562011719, + "logps/rejected": -100.39279174804688, + "loss": 0.331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45572999119758606, + "rewards/margins": 2.8203160762786865, + "rewards/rejected": -2.364586114883423, + "step": 1068 + }, + { + "epoch": 1.72, + "learning_rate": 4.4966309948474037e-07, + "logits/chosen": -1.4581712484359741, + "logits/rejected": -1.4627397060394287, + "logps/chosen": -76.64862823486328, + "logps/rejected": -90.83354187011719, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4006434381008148, + "rewards/margins": 2.1035637855529785, + "rewards/rejected": -2.504207134246826, + "step": 1069 + }, + { + "epoch": 1.72, + "learning_rate": 4.4956401109789933e-07, + "logits/chosen": -1.4435802698135376, + "logits/rejected": -1.5501172542572021, + "logps/chosen": -83.49974060058594, + "logps/rejected": -95.44267272949219, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.327453851699829, + "rewards/margins": 1.3115978240966797, + "rewards/rejected": -2.639051914215088, + "step": 1070 + }, + { + "epoch": 1.72, + "learning_rate": 4.4946492271105824e-07, + "logits/chosen": -1.3069149255752563, + "logits/rejected": -1.3026816844940186, + "logps/chosen": -71.71449279785156, + "logps/rejected": -125.48612976074219, + "loss": 0.4668, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8398980498313904, + "rewards/margins": 3.1339049339294434, + "rewards/rejected": -3.9738028049468994, + "step": 1071 + }, + { + "epoch": 1.72, + "learning_rate": 4.493658343242172e-07, + "logits/chosen": -1.4482446908950806, + "logits/rejected": -1.4602234363555908, + "logps/chosen": -82.65309143066406, + "logps/rejected": -105.39080810546875, + "loss": 0.2768, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.903878390789032, + "rewards/margins": 1.622797966003418, + "rewards/rejected": -2.5266764163970947, + "step": 1072 + }, + { + "epoch": 1.72, + "learning_rate": 4.492667459373761e-07, + "logits/chosen": -1.600611925125122, + "logits/rejected": -1.5449509620666504, + "logps/chosen": -92.88256072998047, + "logps/rejected": -121.6624984741211, + "loss": 0.3896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6486048102378845, + "rewards/margins": 2.363903522491455, + "rewards/rejected": -3.0125083923339844, + "step": 1073 + }, + { + "epoch": 1.72, + "learning_rate": 4.4916765755053506e-07, + "logits/chosen": -1.4748024940490723, + "logits/rejected": -1.4480395317077637, + "logps/chosen": -86.96412658691406, + "logps/rejected": -105.46617126464844, + "loss": 0.3819, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39156973361968994, + "rewards/margins": 3.0855212211608887, + "rewards/rejected": -3.477090835571289, + "step": 1074 + }, + { + "epoch": 1.73, + "learning_rate": 4.49068569163694e-07, + "logits/chosen": -1.464053750038147, + "logits/rejected": -1.46418297290802, + "logps/chosen": -80.85247802734375, + "logps/rejected": -158.75973510742188, + "loss": 0.3946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06091576814651489, + "rewards/margins": 5.31303596496582, + "rewards/rejected": -5.252120018005371, + "step": 1075 + }, + { + "epoch": 1.73, + "learning_rate": 4.4896948077685293e-07, + "logits/chosen": -1.538384199142456, + "logits/rejected": -1.5664702653884888, + "logps/chosen": -114.67498779296875, + "logps/rejected": -130.79258728027344, + "loss": 0.4148, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3525482416152954, + "rewards/margins": 1.1498795747756958, + "rewards/rejected": -2.502427577972412, + "step": 1076 + }, + { + "epoch": 1.73, + "learning_rate": 4.4887039239001184e-07, + "logits/chosen": -1.5144250392913818, + "logits/rejected": -1.4580241441726685, + "logps/chosen": -81.31788635253906, + "logps/rejected": -114.04806518554688, + "loss": 0.232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20634660124778748, + "rewards/margins": 3.0916788578033447, + "rewards/rejected": -3.298025369644165, + "step": 1077 + }, + { + "epoch": 1.73, + "learning_rate": 4.487713040031708e-07, + "logits/chosen": -1.3794095516204834, + "logits/rejected": -1.3467565774917603, + "logps/chosen": -94.88206481933594, + "logps/rejected": -128.71141052246094, + "loss": 0.3459, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6721189618110657, + "rewards/margins": 2.6404361724853516, + "rewards/rejected": -3.3125553131103516, + "step": 1078 + }, + { + "epoch": 1.73, + "learning_rate": 4.4867221561632975e-07, + "logits/chosen": -1.440730333328247, + "logits/rejected": -1.5799013376235962, + "logps/chosen": -68.32929229736328, + "logps/rejected": -132.81112670898438, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23945817351341248, + "rewards/margins": 3.8150885105133057, + "rewards/rejected": -4.054546356201172, + "step": 1079 + }, + { + "epoch": 1.73, + "learning_rate": 4.485731272294887e-07, + "logits/chosen": -1.590339183807373, + "logits/rejected": -1.6012974977493286, + "logps/chosen": -83.38775634765625, + "logps/rejected": -145.67654418945312, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3507019281387329, + "rewards/margins": 5.332632541656494, + "rewards/rejected": -4.981930732727051, + "step": 1080 + }, + { + "epoch": 1.74, + "learning_rate": 4.484740388426476e-07, + "logits/chosen": -1.4874387979507446, + "logits/rejected": -1.4385697841644287, + "logps/chosen": -94.24652099609375, + "logps/rejected": -140.5498046875, + "loss": 0.2392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8936440348625183, + "rewards/margins": 2.709390878677368, + "rewards/rejected": -3.6030349731445312, + "step": 1081 + }, + { + "epoch": 1.74, + "learning_rate": 4.4837495045580653e-07, + "logits/chosen": -1.470646619796753, + "logits/rejected": -1.4598760604858398, + "logps/chosen": -86.2561264038086, + "logps/rejected": -120.43228149414062, + "loss": 0.2968, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6623983383178711, + "rewards/margins": 3.943162441253662, + "rewards/rejected": -4.605561256408691, + "step": 1082 + }, + { + "epoch": 1.74, + "learning_rate": 4.482758620689655e-07, + "logits/chosen": -1.332229495048523, + "logits/rejected": -1.4012945890426636, + "logps/chosen": -89.82843017578125, + "logps/rejected": -164.0804443359375, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8525623679161072, + "rewards/margins": 3.3464608192443848, + "rewards/rejected": -4.199023246765137, + "step": 1083 + }, + { + "epoch": 1.74, + "learning_rate": 4.481767736821244e-07, + "logits/chosen": -1.4827896356582642, + "logits/rejected": -1.4563536643981934, + "logps/chosen": -75.58658599853516, + "logps/rejected": -99.70442962646484, + "loss": 0.5232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4295731782913208, + "rewards/margins": 2.659235715866089, + "rewards/rejected": -3.0888092517852783, + "step": 1084 + }, + { + "epoch": 1.74, + "learning_rate": 4.480776852952834e-07, + "logits/chosen": -1.4456613063812256, + "logits/rejected": -1.3833328485488892, + "logps/chosen": -109.15028381347656, + "logps/rejected": -126.31649780273438, + "loss": 0.4026, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4069059491157532, + "rewards/margins": 4.313304901123047, + "rewards/rejected": -4.720211029052734, + "step": 1085 + }, + { + "epoch": 1.74, + "learning_rate": 4.479785969084423e-07, + "logits/chosen": -1.521033763885498, + "logits/rejected": -1.5559215545654297, + "logps/chosen": -70.56893920898438, + "logps/rejected": -145.7672576904297, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4125838577747345, + "rewards/margins": 4.364575386047363, + "rewards/rejected": -4.777159690856934, + "step": 1086 + }, + { + "epoch": 1.74, + "learning_rate": 4.478795085216012e-07, + "logits/chosen": -1.349228858947754, + "logits/rejected": -1.3225287199020386, + "logps/chosen": -99.99003601074219, + "logps/rejected": -121.83547973632812, + "loss": 0.2852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4755058288574219, + "rewards/margins": 2.7556769847869873, + "rewards/rejected": -3.231182813644409, + "step": 1087 + }, + { + "epoch": 1.75, + "learning_rate": 4.477804201347602e-07, + "logits/chosen": -1.381913185119629, + "logits/rejected": -1.4258053302764893, + "logps/chosen": -76.29012298583984, + "logps/rejected": -96.83831787109375, + "loss": 0.2796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4505760371685028, + "rewards/margins": 1.8614392280578613, + "rewards/rejected": -2.3120152950286865, + "step": 1088 + }, + { + "epoch": 1.75, + "learning_rate": 4.476813317479191e-07, + "logits/chosen": -1.4700889587402344, + "logits/rejected": -1.4731850624084473, + "logps/chosen": -94.60760498046875, + "logps/rejected": -131.19912719726562, + "loss": 0.239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4476810693740845, + "rewards/margins": 2.7072272300720215, + "rewards/rejected": -3.1549081802368164, + "step": 1089 + }, + { + "epoch": 1.75, + "learning_rate": 4.475822433610781e-07, + "logits/chosen": -1.578474760055542, + "logits/rejected": -1.491403579711914, + "logps/chosen": -69.76750946044922, + "logps/rejected": -112.67369079589844, + "loss": 0.1784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11113031208515167, + "rewards/margins": 4.865101337432861, + "rewards/rejected": -4.753970623016357, + "step": 1090 + }, + { + "epoch": 1.75, + "learning_rate": 4.47483154974237e-07, + "logits/chosen": -1.526633620262146, + "logits/rejected": -1.5314507484436035, + "logps/chosen": -86.86927032470703, + "logps/rejected": -106.52400970458984, + "loss": 0.331, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5156723260879517, + "rewards/margins": 1.6470139026641846, + "rewards/rejected": -2.162686347961426, + "step": 1091 + }, + { + "epoch": 1.75, + "learning_rate": 4.473840665873959e-07, + "logits/chosen": -1.43513822555542, + "logits/rejected": -1.4202666282653809, + "logps/chosen": -79.26705932617188, + "logps/rejected": -104.52147674560547, + "loss": 0.2807, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08364409953355789, + "rewards/margins": 3.00492000579834, + "rewards/rejected": -3.088563919067383, + "step": 1092 + }, + { + "epoch": 1.75, + "learning_rate": 4.472849782005549e-07, + "logits/chosen": -1.3463760614395142, + "logits/rejected": -1.3760218620300293, + "logps/chosen": -57.15502166748047, + "logps/rejected": -119.88204956054688, + "loss": 0.2827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16354818642139435, + "rewards/margins": 3.2702715396881104, + "rewards/rejected": -3.4338200092315674, + "step": 1093 + }, + { + "epoch": 1.76, + "learning_rate": 4.471858898137138e-07, + "logits/chosen": -1.5014312267303467, + "logits/rejected": -1.4830036163330078, + "logps/chosen": -107.1961669921875, + "logps/rejected": -95.06756591796875, + "loss": 0.2722, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9419006705284119, + "rewards/margins": 1.2118113040924072, + "rewards/rejected": -2.153712034225464, + "step": 1094 + }, + { + "epoch": 1.76, + "learning_rate": 4.470868014268728e-07, + "logits/chosen": -1.4704844951629639, + "logits/rejected": -1.4628537893295288, + "logps/chosen": -92.28192138671875, + "logps/rejected": -91.93578338623047, + "loss": 0.3392, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.0040804147720337, + "rewards/margins": 1.0410929918289185, + "rewards/rejected": -2.0451736450195312, + "step": 1095 + }, + { + "epoch": 1.76, + "learning_rate": 4.469877130400317e-07, + "logits/chosen": -1.3697460889816284, + "logits/rejected": -1.407015085220337, + "logps/chosen": -74.2623291015625, + "logps/rejected": -120.96308898925781, + "loss": 0.3164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13908891379833221, + "rewards/margins": 3.4588427543640137, + "rewards/rejected": -3.5979321002960205, + "step": 1096 + }, + { + "epoch": 1.76, + "learning_rate": 4.468886246531906e-07, + "logits/chosen": -1.4322913885116577, + "logits/rejected": -1.4076693058013916, + "logps/chosen": -93.02253723144531, + "logps/rejected": -120.03521728515625, + "loss": 0.4141, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2982065677642822, + "rewards/margins": 2.507784605026245, + "rewards/rejected": -3.8059911727905273, + "step": 1097 + }, + { + "epoch": 1.76, + "learning_rate": 4.4678953626634957e-07, + "logits/chosen": -1.3977349996566772, + "logits/rejected": -1.3514240980148315, + "logps/chosen": -98.68428039550781, + "logps/rejected": -125.46593475341797, + "loss": 0.2221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6498430967330933, + "rewards/margins": 3.013096809387207, + "rewards/rejected": -3.6629397869110107, + "step": 1098 + }, + { + "epoch": 1.76, + "learning_rate": 4.4669044787950847e-07, + "logits/chosen": -1.5610103607177734, + "logits/rejected": -1.5673413276672363, + "logps/chosen": -130.42335510253906, + "logps/rejected": -122.40464782714844, + "loss": 0.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.40500009059906, + "rewards/margins": 1.704103708267212, + "rewards/rejected": -3.1091039180755615, + "step": 1099 + }, + { + "epoch": 1.77, + "learning_rate": 4.465913594926675e-07, + "logits/chosen": -1.437214970588684, + "logits/rejected": -1.3925405740737915, + "logps/chosen": -77.01715850830078, + "logps/rejected": -119.50595092773438, + "loss": 0.3254, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0707886219024658, + "rewards/margins": 2.7880611419677734, + "rewards/rejected": -3.8588497638702393, + "step": 1100 + }, + { + "epoch": 1.77, + "learning_rate": 4.464922711058264e-07, + "logits/chosen": -1.5650629997253418, + "logits/rejected": -1.4647763967514038, + "logps/chosen": -85.6757583618164, + "logps/rejected": -117.74117279052734, + "loss": 0.2368, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.123727798461914, + "rewards/margins": 2.451791763305664, + "rewards/rejected": -3.575519561767578, + "step": 1101 + }, + { + "epoch": 1.77, + "learning_rate": 4.463931827189853e-07, + "logits/chosen": -1.6730221509933472, + "logits/rejected": -1.6567529439926147, + "logps/chosen": -121.6965103149414, + "logps/rejected": -108.98847961425781, + "loss": 0.2056, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5443521738052368, + "rewards/margins": 2.574545383453369, + "rewards/rejected": -3.1188974380493164, + "step": 1102 + }, + { + "epoch": 1.77, + "learning_rate": 4.4629409433214426e-07, + "logits/chosen": -1.2245882749557495, + "logits/rejected": -1.2482190132141113, + "logps/chosen": -101.35781860351562, + "logps/rejected": -124.62907409667969, + "loss": 0.1774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7636957168579102, + "rewards/margins": 2.8759989738464355, + "rewards/rejected": -3.6396946907043457, + "step": 1103 + }, + { + "epoch": 1.77, + "learning_rate": 4.4619500594530317e-07, + "logits/chosen": -1.3786354064941406, + "logits/rejected": -1.4013890027999878, + "logps/chosen": -73.0827865600586, + "logps/rejected": -130.68959045410156, + "loss": 0.2194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5975524187088013, + "rewards/margins": 4.1290435791015625, + "rewards/rejected": -4.726595878601074, + "step": 1104 + }, + { + "epoch": 1.77, + "learning_rate": 4.460959175584622e-07, + "logits/chosen": -1.4596422910690308, + "logits/rejected": -1.475716471672058, + "logps/chosen": -99.69544982910156, + "logps/rejected": -114.70996856689453, + "loss": 0.348, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7219473123550415, + "rewards/margins": 1.53074312210083, + "rewards/rejected": -2.252690315246582, + "step": 1105 + }, + { + "epoch": 1.78, + "learning_rate": 4.459968291716211e-07, + "logits/chosen": -1.2893527746200562, + "logits/rejected": -1.3132071495056152, + "logps/chosen": -82.46931457519531, + "logps/rejected": -121.64779663085938, + "loss": 0.2415, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0110448598861694, + "rewards/margins": 4.144122123718262, + "rewards/rejected": -5.155167102813721, + "step": 1106 + }, + { + "epoch": 1.78, + "learning_rate": 4.4589774078478e-07, + "logits/chosen": -1.297234058380127, + "logits/rejected": -1.2998920679092407, + "logps/chosen": -83.27278900146484, + "logps/rejected": -108.21867370605469, + "loss": 0.3584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07370337843894958, + "rewards/margins": 3.460516929626465, + "rewards/rejected": -3.5342204570770264, + "step": 1107 + }, + { + "epoch": 1.78, + "learning_rate": 4.4579865239793895e-07, + "logits/chosen": -1.4301910400390625, + "logits/rejected": -1.4035015106201172, + "logps/chosen": -92.59555053710938, + "logps/rejected": -123.40750885009766, + "loss": 0.1784, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8181484341621399, + "rewards/margins": 1.2193174362182617, + "rewards/rejected": -2.037465810775757, + "step": 1108 + }, + { + "epoch": 1.78, + "learning_rate": 4.4569956401109786e-07, + "logits/chosen": -1.413170337677002, + "logits/rejected": -1.388722538948059, + "logps/chosen": -100.48464965820312, + "logps/rejected": -124.29618835449219, + "loss": 0.2461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6696633100509644, + "rewards/margins": 2.691835641860962, + "rewards/rejected": -3.3614988327026367, + "step": 1109 + }, + { + "epoch": 1.78, + "learning_rate": 4.456004756242568e-07, + "logits/chosen": -1.4763774871826172, + "logits/rejected": -1.4763110876083374, + "logps/chosen": -75.666259765625, + "logps/rejected": -115.72643280029297, + "loss": 0.2558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1455482542514801, + "rewards/margins": 3.333186626434326, + "rewards/rejected": -3.4787349700927734, + "step": 1110 + }, + { + "epoch": 1.78, + "learning_rate": 4.455013872374158e-07, + "logits/chosen": -1.5698230266571045, + "logits/rejected": -1.5455045700073242, + "logps/chosen": -126.57763671875, + "logps/rejected": -131.6348876953125, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7747678756713867, + "rewards/margins": 1.7775055170059204, + "rewards/rejected": -2.5522732734680176, + "step": 1111 + }, + { + "epoch": 1.78, + "learning_rate": 4.454022988505747e-07, + "logits/chosen": -1.5321733951568604, + "logits/rejected": -1.5652269124984741, + "logps/chosen": -95.04378509521484, + "logps/rejected": -126.42357635498047, + "loss": 0.1919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9762493371963501, + "rewards/margins": 2.4454522132873535, + "rewards/rejected": -3.421701669692993, + "step": 1112 + }, + { + "epoch": 1.79, + "learning_rate": 4.4530321046373365e-07, + "logits/chosen": -1.5584806203842163, + "logits/rejected": -1.5548278093338013, + "logps/chosen": -82.56033325195312, + "logps/rejected": -144.74102783203125, + "loss": 0.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48829445242881775, + "rewards/margins": 3.8273651599884033, + "rewards/rejected": -4.315659523010254, + "step": 1113 + }, + { + "epoch": 1.79, + "learning_rate": 4.4520412207689255e-07, + "logits/chosen": -1.426113486289978, + "logits/rejected": -1.440136194229126, + "logps/chosen": -111.581298828125, + "logps/rejected": -136.80992126464844, + "loss": 0.2704, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.1083316802978516, + "rewards/margins": 1.2845232486724854, + "rewards/rejected": -2.392854928970337, + "step": 1114 + }, + { + "epoch": 1.79, + "learning_rate": 4.4510503369005146e-07, + "logits/chosen": -1.4045774936676025, + "logits/rejected": -1.3577958345413208, + "logps/chosen": -104.32609558105469, + "logps/rejected": -135.82655334472656, + "loss": 0.2046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26910191774368286, + "rewards/margins": 4.609500408172607, + "rewards/rejected": -4.34039831161499, + "step": 1115 + }, + { + "epoch": 1.79, + "learning_rate": 4.4500594530321047e-07, + "logits/chosen": -1.506330132484436, + "logits/rejected": -1.4744391441345215, + "logps/chosen": -82.18101501464844, + "logps/rejected": -106.8227310180664, + "loss": 0.4005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5574297308921814, + "rewards/margins": 4.268188953399658, + "rewards/rejected": -3.710759162902832, + "step": 1116 + }, + { + "epoch": 1.79, + "learning_rate": 4.449068569163694e-07, + "logits/chosen": -1.369265079498291, + "logits/rejected": -1.416106104850769, + "logps/chosen": -100.55365753173828, + "logps/rejected": -112.09442901611328, + "loss": 0.2224, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.40224552154541, + "rewards/margins": 1.5883125066757202, + "rewards/rejected": -3.99055814743042, + "step": 1117 + }, + { + "epoch": 1.79, + "learning_rate": 4.4480776852952834e-07, + "logits/chosen": -1.5501878261566162, + "logits/rejected": -1.5113954544067383, + "logps/chosen": -109.77569580078125, + "logps/rejected": -131.84716796875, + "loss": 0.3382, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8075101375579834, + "rewards/margins": 1.9141361713409424, + "rewards/rejected": -3.721646308898926, + "step": 1118 + }, + { + "epoch": 1.8, + "learning_rate": 4.4470868014268725e-07, + "logits/chosen": -1.4656833410263062, + "logits/rejected": -1.4832531213760376, + "logps/chosen": -106.02562713623047, + "logps/rejected": -136.04916381835938, + "loss": 0.3436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14781494438648224, + "rewards/margins": 4.71645975112915, + "rewards/rejected": -4.5686445236206055, + "step": 1119 + }, + { + "epoch": 1.8, + "learning_rate": 4.4460959175584615e-07, + "logits/chosen": -1.4737012386322021, + "logits/rejected": -1.5346091985702515, + "logps/chosen": -101.71392822265625, + "logps/rejected": -116.49826049804688, + "loss": 0.1841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7643362283706665, + "rewards/margins": 1.8072431087493896, + "rewards/rejected": -2.5715794563293457, + "step": 1120 + }, + { + "epoch": 1.8, + "learning_rate": 4.4451050336900516e-07, + "logits/chosen": -1.734053134918213, + "logits/rejected": -1.7957476377487183, + "logps/chosen": -79.16026306152344, + "logps/rejected": -130.35885620117188, + "loss": 0.2557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31450358033180237, + "rewards/margins": 5.348422050476074, + "rewards/rejected": -5.662926197052002, + "step": 1121 + }, + { + "epoch": 1.8, + "learning_rate": 4.4441141498216407e-07, + "logits/chosen": -1.588083028793335, + "logits/rejected": -1.5142014026641846, + "logps/chosen": -119.87068176269531, + "logps/rejected": -118.298095703125, + "loss": 0.2849, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.2999893426895142, + "rewards/margins": 0.9374510645866394, + "rewards/rejected": -2.237440347671509, + "step": 1122 + }, + { + "epoch": 1.8, + "learning_rate": 4.4431232659532303e-07, + "logits/chosen": -1.384404182434082, + "logits/rejected": -1.4901758432388306, + "logps/chosen": -88.91020965576172, + "logps/rejected": -123.14022827148438, + "loss": 0.3611, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0145708322525024, + "rewards/margins": 2.124969244003296, + "rewards/rejected": -3.139539957046509, + "step": 1123 + }, + { + "epoch": 1.8, + "learning_rate": 4.4421323820848194e-07, + "logits/chosen": -1.3161647319793701, + "logits/rejected": -1.2584619522094727, + "logps/chosen": -125.53278350830078, + "logps/rejected": -118.26549530029297, + "loss": 0.442, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0059583187103271, + "rewards/margins": 3.3712921142578125, + "rewards/rejected": -4.377250671386719, + "step": 1124 + }, + { + "epoch": 1.81, + "learning_rate": 4.4411414982164085e-07, + "logits/chosen": -1.4043500423431396, + "logits/rejected": -1.3250643014907837, + "logps/chosen": -89.66082763671875, + "logps/rejected": -107.77639770507812, + "loss": 0.3072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16894254088401794, + "rewards/margins": 3.3816473484039307, + "rewards/rejected": -3.5505897998809814, + "step": 1125 + }, + { + "epoch": 1.81, + "learning_rate": 4.440150614347998e-07, + "logits/chosen": -1.3786492347717285, + "logits/rejected": -1.3569953441619873, + "logps/chosen": -131.62857055664062, + "logps/rejected": -144.17645263671875, + "loss": 0.2589, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5294662714004517, + "rewards/margins": 2.5310564041137695, + "rewards/rejected": -3.0605227947235107, + "step": 1126 + }, + { + "epoch": 1.81, + "learning_rate": 4.4391597304795876e-07, + "logits/chosen": -1.3096436262130737, + "logits/rejected": -1.3075162172317505, + "logps/chosen": -104.18170928955078, + "logps/rejected": -109.2806167602539, + "loss": 0.4172, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0973542928695679, + "rewards/margins": 0.8495367765426636, + "rewards/rejected": -1.946890950202942, + "step": 1127 + }, + { + "epoch": 1.81, + "learning_rate": 4.438168846611177e-07, + "logits/chosen": -1.5556668043136597, + "logits/rejected": -1.526099681854248, + "logps/chosen": -89.51115417480469, + "logps/rejected": -119.6861801147461, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2201475203037262, + "rewards/margins": 2.9350225925445557, + "rewards/rejected": -2.7148749828338623, + "step": 1128 + }, + { + "epoch": 1.81, + "learning_rate": 4.4371779627427663e-07, + "logits/chosen": -1.4071581363677979, + "logits/rejected": -1.3497520685195923, + "logps/chosen": -83.18069458007812, + "logps/rejected": -99.1688461303711, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6145305037498474, + "rewards/margins": 2.515110492706299, + "rewards/rejected": -3.129640817642212, + "step": 1129 + }, + { + "epoch": 1.81, + "learning_rate": 4.4361870788743554e-07, + "logits/chosen": -1.6019237041473389, + "logits/rejected": -1.585770845413208, + "logps/chosen": -93.832763671875, + "logps/rejected": -140.34056091308594, + "loss": 0.3235, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3641242980957031, + "rewards/margins": 4.994027137756348, + "rewards/rejected": -5.358151435852051, + "step": 1130 + }, + { + "epoch": 1.82, + "learning_rate": 4.435196195005945e-07, + "logits/chosen": -1.541964054107666, + "logits/rejected": -1.5506433248519897, + "logps/chosen": -98.80640411376953, + "logps/rejected": -125.52391052246094, + "loss": 0.412, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6522318124771118, + "rewards/margins": 3.1384477615356445, + "rewards/rejected": -3.790679454803467, + "step": 1131 + }, + { + "epoch": 1.82, + "learning_rate": 4.4342053111375346e-07, + "logits/chosen": -1.3447757959365845, + "logits/rejected": -1.4185353517532349, + "logps/chosen": -129.2393341064453, + "logps/rejected": -111.12335205078125, + "loss": 0.4856, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0187926292419434, + "rewards/margins": 0.2227599322795868, + "rewards/rejected": -1.2415525913238525, + "step": 1132 + }, + { + "epoch": 1.82, + "learning_rate": 4.433214427269124e-07, + "logits/chosen": -1.3354644775390625, + "logits/rejected": -1.343255877494812, + "logps/chosen": -89.66532897949219, + "logps/rejected": -142.23681640625, + "loss": 0.21, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41055622696876526, + "rewards/margins": 4.547921180725098, + "rewards/rejected": -4.958477973937988, + "step": 1133 + }, + { + "epoch": 1.82, + "learning_rate": 4.432223543400713e-07, + "logits/chosen": -1.4616037607192993, + "logits/rejected": -1.4725106954574585, + "logps/chosen": -87.1032485961914, + "logps/rejected": -111.62832641601562, + "loss": 0.1409, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14945009350776672, + "rewards/margins": 3.4181478023529053, + "rewards/rejected": -3.5675978660583496, + "step": 1134 + }, + { + "epoch": 1.82, + "learning_rate": 4.4312326595323023e-07, + "logits/chosen": -1.463274598121643, + "logits/rejected": -1.4280974864959717, + "logps/chosen": -103.09146118164062, + "logps/rejected": -118.93177032470703, + "loss": 0.2004, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5732893943786621, + "rewards/margins": 1.032784104347229, + "rewards/rejected": -1.6060736179351807, + "step": 1135 + }, + { + "epoch": 1.82, + "learning_rate": 4.430241775663892e-07, + "logits/chosen": -1.410856008529663, + "logits/rejected": -1.511013150215149, + "logps/chosen": -104.16395568847656, + "logps/rejected": -124.48725128173828, + "loss": 0.3189, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1084146574139595, + "rewards/margins": 2.943117380142212, + "rewards/rejected": -3.051532030105591, + "step": 1136 + }, + { + "epoch": 1.83, + "learning_rate": 4.4292508917954815e-07, + "logits/chosen": -1.3448143005371094, + "logits/rejected": -1.364131212234497, + "logps/chosen": -74.6818618774414, + "logps/rejected": -106.64903259277344, + "loss": 0.1943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.185372456908226, + "rewards/margins": 1.9979119300842285, + "rewards/rejected": -2.183284282684326, + "step": 1137 + }, + { + "epoch": 1.83, + "learning_rate": 4.428260007927071e-07, + "logits/chosen": -1.6170040369033813, + "logits/rejected": -1.4395461082458496, + "logps/chosen": -121.81144714355469, + "logps/rejected": -112.10791015625, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08702030777931213, + "rewards/margins": 2.5545389652252197, + "rewards/rejected": -2.4675185680389404, + "step": 1138 + }, + { + "epoch": 1.83, + "learning_rate": 4.42726912405866e-07, + "logits/chosen": -1.4348466396331787, + "logits/rejected": -1.3767318725585938, + "logps/chosen": -87.33828735351562, + "logps/rejected": -127.47589111328125, + "loss": 0.2368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17514675855636597, + "rewards/margins": 4.8033671379089355, + "rewards/rejected": -4.978513717651367, + "step": 1139 + }, + { + "epoch": 1.83, + "learning_rate": 4.426278240190249e-07, + "logits/chosen": -1.611411213874817, + "logits/rejected": -1.6266846656799316, + "logps/chosen": -89.77645874023438, + "logps/rejected": -128.29222106933594, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6310341954231262, + "rewards/margins": 3.7073488235473633, + "rewards/rejected": -4.338383197784424, + "step": 1140 + }, + { + "epoch": 1.83, + "learning_rate": 4.425287356321839e-07, + "logits/chosen": -1.3666199445724487, + "logits/rejected": -1.3877220153808594, + "logps/chosen": -74.13837432861328, + "logps/rejected": -108.85874938964844, + "loss": 0.2623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5633593797683716, + "rewards/margins": 2.4729535579681396, + "rewards/rejected": -3.036313056945801, + "step": 1141 + }, + { + "epoch": 1.83, + "learning_rate": 4.4242964724534284e-07, + "logits/chosen": -1.2901839017868042, + "logits/rejected": -1.351101040840149, + "logps/chosen": -91.44749450683594, + "logps/rejected": -134.0978240966797, + "loss": 0.1988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1326790750026703, + "rewards/margins": 3.1114661693573, + "rewards/rejected": -3.244145393371582, + "step": 1142 + }, + { + "epoch": 1.83, + "learning_rate": 4.423305588585018e-07, + "logits/chosen": -1.5362646579742432, + "logits/rejected": -1.5638731718063354, + "logps/chosen": -107.24700927734375, + "logps/rejected": -118.99673461914062, + "loss": 0.2757, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.27695101499557495, + "rewards/margins": 1.2578264474868774, + "rewards/rejected": -1.5347775220870972, + "step": 1143 + }, + { + "epoch": 1.84, + "learning_rate": 4.422314704716607e-07, + "logits/chosen": -1.5572936534881592, + "logits/rejected": -1.5107938051223755, + "logps/chosen": -87.86715698242188, + "logps/rejected": -157.091552734375, + "loss": 0.2193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.053384698927402496, + "rewards/margins": 5.0714802742004395, + "rewards/rejected": -5.124865531921387, + "step": 1144 + }, + { + "epoch": 1.84, + "learning_rate": 4.421323820848196e-07, + "logits/chosen": -1.6188743114471436, + "logits/rejected": -1.6524907350540161, + "logps/chosen": -115.95286560058594, + "logps/rejected": -129.67098999023438, + "loss": 0.3088, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3764764666557312, + "rewards/margins": 1.1846709251403809, + "rewards/rejected": -1.5611473321914673, + "step": 1145 + }, + { + "epoch": 1.84, + "learning_rate": 4.420332936979786e-07, + "logits/chosen": -1.4773540496826172, + "logits/rejected": -1.4369218349456787, + "logps/chosen": -92.95063018798828, + "logps/rejected": -131.6063690185547, + "loss": 0.2607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1739906370639801, + "rewards/margins": 2.9875521659851074, + "rewards/rejected": -3.1615428924560547, + "step": 1146 + }, + { + "epoch": 1.84, + "learning_rate": 4.419342053111375e-07, + "logits/chosen": -1.4959989786148071, + "logits/rejected": -1.5219298601150513, + "logps/chosen": -113.57337951660156, + "logps/rejected": -128.2231903076172, + "loss": 0.2603, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2344778776168823, + "rewards/margins": 1.5401537418365479, + "rewards/rejected": -2.7746315002441406, + "step": 1147 + }, + { + "epoch": 1.84, + "learning_rate": 4.4183511692429644e-07, + "logits/chosen": -1.2300662994384766, + "logits/rejected": -1.2637790441513062, + "logps/chosen": -97.12615203857422, + "logps/rejected": -157.53573608398438, + "loss": 0.1482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4664745330810547, + "rewards/margins": 4.387078285217285, + "rewards/rejected": -4.85355281829834, + "step": 1148 + }, + { + "epoch": 1.84, + "learning_rate": 4.417360285374554e-07, + "logits/chosen": -1.4424107074737549, + "logits/rejected": -1.4952259063720703, + "logps/chosen": -64.2048568725586, + "logps/rejected": -137.88482666015625, + "loss": 0.1871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23397760093212128, + "rewards/margins": 5.420592308044434, + "rewards/rejected": -5.186614990234375, + "step": 1149 + }, + { + "epoch": 1.85, + "learning_rate": 4.416369401506143e-07, + "logits/chosen": -1.4054783582687378, + "logits/rejected": -1.4213998317718506, + "logps/chosen": -95.42218780517578, + "logps/rejected": -118.47805786132812, + "loss": 0.3047, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.11318788677453995, + "rewards/margins": 3.284956216812134, + "rewards/rejected": -3.1717681884765625, + "step": 1150 + }, + { + "epoch": 1.85, + "learning_rate": 4.4153785176377327e-07, + "logits/chosen": -1.436145544052124, + "logits/rejected": -1.408247709274292, + "logps/chosen": -104.36213684082031, + "logps/rejected": -110.92447662353516, + "loss": 0.3663, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4875376224517822, + "rewards/margins": 0.5683408379554749, + "rewards/rejected": -2.0558786392211914, + "step": 1151 + }, + { + "epoch": 1.85, + "learning_rate": 4.414387633769322e-07, + "logits/chosen": -1.4633475542068481, + "logits/rejected": -1.4694527387619019, + "logps/chosen": -103.59783935546875, + "logps/rejected": -109.9482421875, + "loss": 0.3141, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0534193217754364, + "rewards/margins": 1.4282761812210083, + "rewards/rejected": -1.4816954135894775, + "step": 1152 + }, + { + "epoch": 1.85, + "learning_rate": 4.4133967499009114e-07, + "logits/chosen": -1.469909906387329, + "logits/rejected": -1.544114351272583, + "logps/chosen": -89.6146240234375, + "logps/rejected": -159.44137573242188, + "loss": 0.2757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5161523222923279, + "rewards/margins": 5.252115249633789, + "rewards/rejected": -5.768267631530762, + "step": 1153 + }, + { + "epoch": 1.85, + "learning_rate": 4.412405866032501e-07, + "logits/chosen": -1.4402068853378296, + "logits/rejected": -1.442682147026062, + "logps/chosen": -84.98143768310547, + "logps/rejected": -106.58256530761719, + "loss": 0.246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1329687088727951, + "rewards/margins": 1.7999500036239624, + "rewards/rejected": -1.9329187870025635, + "step": 1154 + }, + { + "epoch": 1.85, + "learning_rate": 4.41141498216409e-07, + "logits/chosen": -1.2598341703414917, + "logits/rejected": -1.2805542945861816, + "logps/chosen": -114.25552368164062, + "logps/rejected": -148.3730010986328, + "loss": 0.2338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3386014997959137, + "rewards/margins": 3.543799638748169, + "rewards/rejected": -3.88240122795105, + "step": 1155 + }, + { + "epoch": 1.86, + "learning_rate": 4.4104240982956796e-07, + "logits/chosen": -1.4567644596099854, + "logits/rejected": -1.436119556427002, + "logps/chosen": -99.5984115600586, + "logps/rejected": -150.86492919921875, + "loss": 0.1541, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6485260128974915, + "rewards/margins": 4.840339183807373, + "rewards/rejected": -5.488865375518799, + "step": 1156 + }, + { + "epoch": 1.86, + "learning_rate": 4.4094332144272687e-07, + "logits/chosen": -1.5326189994812012, + "logits/rejected": -1.4579757452011108, + "logps/chosen": -85.36509704589844, + "logps/rejected": -118.99436950683594, + "loss": 0.2472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22316496074199677, + "rewards/margins": 3.429727077484131, + "rewards/rejected": -3.6528921127319336, + "step": 1157 + }, + { + "epoch": 1.86, + "learning_rate": 4.4084423305588583e-07, + "logits/chosen": -1.4845690727233887, + "logits/rejected": -1.5604444742202759, + "logps/chosen": -90.93140411376953, + "logps/rejected": -116.77921295166016, + "loss": 0.435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044199369847774506, + "rewards/margins": 1.256335973739624, + "rewards/rejected": -1.3005354404449463, + "step": 1158 + }, + { + "epoch": 1.86, + "learning_rate": 4.407451446690448e-07, + "logits/chosen": -1.3148713111877441, + "logits/rejected": -1.3215723037719727, + "logps/chosen": -90.12710571289062, + "logps/rejected": -129.8048553466797, + "loss": 0.3258, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5240349173545837, + "rewards/margins": 3.599539279937744, + "rewards/rejected": -4.123574256896973, + "step": 1159 + }, + { + "epoch": 1.86, + "learning_rate": 4.406460562822037e-07, + "logits/chosen": -1.5579198598861694, + "logits/rejected": -1.5163450241088867, + "logps/chosen": -81.92881774902344, + "logps/rejected": -103.28079986572266, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15590152144432068, + "rewards/margins": 1.3994874954223633, + "rewards/rejected": -1.555389165878296, + "step": 1160 + }, + { + "epoch": 1.86, + "learning_rate": 4.4054696789536266e-07, + "logits/chosen": -1.308337688446045, + "logits/rejected": -1.3262479305267334, + "logps/chosen": -92.2761459350586, + "logps/rejected": -116.37548065185547, + "loss": 0.2404, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7341585159301758, + "rewards/margins": 2.045443058013916, + "rewards/rejected": -2.779601573944092, + "step": 1161 + }, + { + "epoch": 1.87, + "learning_rate": 4.4044787950852156e-07, + "logits/chosen": -1.3823343515396118, + "logits/rejected": -1.424808382987976, + "logps/chosen": -98.17412567138672, + "logps/rejected": -127.60133361816406, + "loss": 0.2772, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3343179225921631, + "rewards/margins": 2.9977846145629883, + "rewards/rejected": -3.3321025371551514, + "step": 1162 + }, + { + "epoch": 1.87, + "learning_rate": 4.403487911216805e-07, + "logits/chosen": -1.581839680671692, + "logits/rejected": -1.5514025688171387, + "logps/chosen": -99.49407196044922, + "logps/rejected": -150.81179809570312, + "loss": 0.2488, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2450356483459473, + "rewards/margins": 4.203108787536621, + "rewards/rejected": -5.44814395904541, + "step": 1163 + }, + { + "epoch": 1.87, + "learning_rate": 4.402497027348395e-07, + "logits/chosen": -1.5067238807678223, + "logits/rejected": -1.4321720600128174, + "logps/chosen": -114.78170776367188, + "logps/rejected": -115.44966888427734, + "loss": 0.3481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8183711767196655, + "rewards/margins": 2.0778887271881104, + "rewards/rejected": -2.8962597846984863, + "step": 1164 + }, + { + "epoch": 1.87, + "learning_rate": 4.401506143479984e-07, + "logits/chosen": -1.4995510578155518, + "logits/rejected": -1.4862397909164429, + "logps/chosen": -91.75689697265625, + "logps/rejected": -116.84862518310547, + "loss": 0.2571, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09284163266420364, + "rewards/margins": 2.978724479675293, + "rewards/rejected": -3.071566104888916, + "step": 1165 + }, + { + "epoch": 1.87, + "learning_rate": 4.4005152596115735e-07, + "logits/chosen": -1.406922459602356, + "logits/rejected": -1.3857518434524536, + "logps/chosen": -106.92034149169922, + "logps/rejected": -117.32227325439453, + "loss": 0.2817, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2269392013549805, + "rewards/margins": 1.7019212245941162, + "rewards/rejected": -2.9288604259490967, + "step": 1166 + }, + { + "epoch": 1.87, + "learning_rate": 4.3995243757431626e-07, + "logits/chosen": -1.4643381834030151, + "logits/rejected": -1.4687392711639404, + "logps/chosen": -90.43661499023438, + "logps/rejected": -104.12216186523438, + "loss": 0.2751, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6796445250511169, + "rewards/margins": 2.532625675201416, + "rewards/rejected": -3.2122702598571777, + "step": 1167 + }, + { + "epoch": 1.87, + "learning_rate": 4.3985334918747516e-07, + "logits/chosen": -1.3387072086334229, + "logits/rejected": -1.3641663789749146, + "logps/chosen": -75.56430053710938, + "logps/rejected": -97.12013244628906, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5694966316223145, + "rewards/margins": 2.289414405822754, + "rewards/rejected": -2.8589110374450684, + "step": 1168 + }, + { + "epoch": 1.88, + "learning_rate": 4.397542608006342e-07, + "logits/chosen": -1.5141063928604126, + "logits/rejected": -1.517671823501587, + "logps/chosen": -88.77940368652344, + "logps/rejected": -114.4530029296875, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45236361026763916, + "rewards/margins": 3.090421199798584, + "rewards/rejected": -3.5427846908569336, + "step": 1169 + }, + { + "epoch": 1.88, + "learning_rate": 4.396551724137931e-07, + "logits/chosen": -1.4767060279846191, + "logits/rejected": -1.534056305885315, + "logps/chosen": -90.92230224609375, + "logps/rejected": -134.12405395507812, + "loss": 0.2503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24561309814453125, + "rewards/margins": 4.372223854064941, + "rewards/rejected": -4.12661075592041, + "step": 1170 + }, + { + "epoch": 1.88, + "learning_rate": 4.3955608402695204e-07, + "logits/chosen": -1.469968557357788, + "logits/rejected": -1.4431248903274536, + "logps/chosen": -89.18746948242188, + "logps/rejected": -163.82220458984375, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21563082933425903, + "rewards/margins": 4.211465358734131, + "rewards/rejected": -3.9958345890045166, + "step": 1171 + }, + { + "epoch": 1.88, + "learning_rate": 4.3945699564011095e-07, + "logits/chosen": -1.3797200918197632, + "logits/rejected": -1.3380579948425293, + "logps/chosen": -101.0837173461914, + "logps/rejected": -100.41899108886719, + "loss": 0.2941, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.1465388536453247, + "rewards/margins": 0.983749508857727, + "rewards/rejected": -2.1302883625030518, + "step": 1172 + }, + { + "epoch": 1.88, + "learning_rate": 4.3935790725326986e-07, + "logits/chosen": -1.4740022420883179, + "logits/rejected": -1.4680290222167969, + "logps/chosen": -92.69856262207031, + "logps/rejected": -111.7843246459961, + "loss": 0.3638, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.9018385410308838, + "rewards/margins": 0.5402414798736572, + "rewards/rejected": -2.442080020904541, + "step": 1173 + }, + { + "epoch": 1.88, + "learning_rate": 4.3925881886642887e-07, + "logits/chosen": -1.492368221282959, + "logits/rejected": -1.536002516746521, + "logps/chosen": -96.66949462890625, + "logps/rejected": -135.04945373535156, + "loss": 0.2372, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.373558908700943, + "rewards/margins": 3.316476583480835, + "rewards/rejected": -3.690035581588745, + "step": 1174 + }, + { + "epoch": 1.89, + "learning_rate": 4.391597304795878e-07, + "logits/chosen": -1.6056745052337646, + "logits/rejected": -1.5846647024154663, + "logps/chosen": -96.71900939941406, + "logps/rejected": -144.92958068847656, + "loss": 0.2513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3814198672771454, + "rewards/margins": 3.488560438156128, + "rewards/rejected": -3.8699803352355957, + "step": 1175 + }, + { + "epoch": 1.89, + "learning_rate": 4.3906064209274673e-07, + "logits/chosen": -1.3526395559310913, + "logits/rejected": -1.2676095962524414, + "logps/chosen": -104.36775970458984, + "logps/rejected": -116.3917007446289, + "loss": 0.2284, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0459997653961182, + "rewards/margins": 1.393481731414795, + "rewards/rejected": -2.439481496810913, + "step": 1176 + }, + { + "epoch": 1.89, + "learning_rate": 4.3896155370590564e-07, + "logits/chosen": -1.3678573369979858, + "logits/rejected": -1.2836650609970093, + "logps/chosen": -77.66122436523438, + "logps/rejected": -131.73995971679688, + "loss": 0.2468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6688933372497559, + "rewards/margins": 2.3928449153900146, + "rewards/rejected": -3.0617382526397705, + "step": 1177 + }, + { + "epoch": 1.89, + "learning_rate": 4.3886246531906455e-07, + "logits/chosen": -1.380350112915039, + "logits/rejected": -1.3832687139511108, + "logps/chosen": -94.66304016113281, + "logps/rejected": -120.02565002441406, + "loss": 0.2431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2853204905986786, + "rewards/margins": 3.1459436416625977, + "rewards/rejected": -3.4312639236450195, + "step": 1178 + }, + { + "epoch": 1.89, + "learning_rate": 4.3876337693222356e-07, + "logits/chosen": -1.4767565727233887, + "logits/rejected": -1.4750235080718994, + "logps/chosen": -60.376197814941406, + "logps/rejected": -127.66610717773438, + "loss": 0.1602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08595733344554901, + "rewards/margins": 5.693400859832764, + "rewards/rejected": -5.607443332672119, + "step": 1179 + }, + { + "epoch": 1.89, + "learning_rate": 4.3866428854538247e-07, + "logits/chosen": -1.5496578216552734, + "logits/rejected": -1.5842692852020264, + "logps/chosen": -82.91809844970703, + "logps/rejected": -116.15814208984375, + "loss": 0.354, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4345642328262329, + "rewards/margins": 3.0495121479034424, + "rewards/rejected": -3.484076499938965, + "step": 1180 + }, + { + "epoch": 1.9, + "learning_rate": 4.3856520015854143e-07, + "logits/chosen": -1.4776942729949951, + "logits/rejected": -1.3530009984970093, + "logps/chosen": -89.12120056152344, + "logps/rejected": -132.88653564453125, + "loss": 0.2379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3749876320362091, + "rewards/margins": 4.288053512573242, + "rewards/rejected": -4.663041591644287, + "step": 1181 + }, + { + "epoch": 1.9, + "learning_rate": 4.3846611177170033e-07, + "logits/chosen": -1.582237720489502, + "logits/rejected": -1.5600926876068115, + "logps/chosen": -91.9615707397461, + "logps/rejected": -131.00454711914062, + "loss": 0.2929, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6899434328079224, + "rewards/margins": 1.436309814453125, + "rewards/rejected": -2.126253128051758, + "step": 1182 + }, + { + "epoch": 1.9, + "learning_rate": 4.3836702338485924e-07, + "logits/chosen": -1.2876200675964355, + "logits/rejected": -1.5342828035354614, + "logps/chosen": -84.59527587890625, + "logps/rejected": -142.63845825195312, + "loss": 0.4152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6641427874565125, + "rewards/margins": 5.849085807800293, + "rewards/rejected": -5.184943199157715, + "step": 1183 + }, + { + "epoch": 1.9, + "learning_rate": 4.3826793499801825e-07, + "logits/chosen": -1.3560165166854858, + "logits/rejected": -1.2364223003387451, + "logps/chosen": -90.56877136230469, + "logps/rejected": -119.66017150878906, + "loss": 0.2816, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7133146524429321, + "rewards/margins": 2.9192657470703125, + "rewards/rejected": -3.632580280303955, + "step": 1184 + }, + { + "epoch": 1.9, + "learning_rate": 4.3816884661117716e-07, + "logits/chosen": -1.4050040245056152, + "logits/rejected": -1.3800404071807861, + "logps/chosen": -89.04590606689453, + "logps/rejected": -109.23188781738281, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45159685611724854, + "rewards/margins": 3.573042631149292, + "rewards/rejected": -4.02463960647583, + "step": 1185 + }, + { + "epoch": 1.9, + "learning_rate": 4.3806975822433607e-07, + "logits/chosen": -1.356734037399292, + "logits/rejected": -1.3993432521820068, + "logps/chosen": -81.81169891357422, + "logps/rejected": -96.60589599609375, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5225321650505066, + "rewards/margins": 1.9893995523452759, + "rewards/rejected": -2.511931896209717, + "step": 1186 + }, + { + "epoch": 1.91, + "learning_rate": 4.3797066983749503e-07, + "logits/chosen": -1.3335936069488525, + "logits/rejected": -1.2813397645950317, + "logps/chosen": -95.46517181396484, + "logps/rejected": -149.29734802246094, + "loss": 0.3013, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.1299850940704346, + "rewards/margins": 3.518430709838867, + "rewards/rejected": -4.648416042327881, + "step": 1187 + }, + { + "epoch": 1.91, + "learning_rate": 4.3787158145065393e-07, + "logits/chosen": -1.3815181255340576, + "logits/rejected": -1.4207813739776611, + "logps/chosen": -103.36969757080078, + "logps/rejected": -113.4195556640625, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.317679762840271, + "rewards/margins": 2.782815933227539, + "rewards/rejected": -3.1004958152770996, + "step": 1188 + }, + { + "epoch": 1.91, + "learning_rate": 4.377724930638129e-07, + "logits/chosen": -1.511907696723938, + "logits/rejected": -1.4634897708892822, + "logps/chosen": -98.75439453125, + "logps/rejected": -126.3021240234375, + "loss": 0.2392, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.095939040184021, + "rewards/margins": 4.268267631530762, + "rewards/rejected": -5.3642072677612305, + "step": 1189 + }, + { + "epoch": 1.91, + "learning_rate": 4.3767340467697185e-07, + "logits/chosen": -1.5224276781082153, + "logits/rejected": -1.4237174987792969, + "logps/chosen": -63.86384201049805, + "logps/rejected": -131.6849822998047, + "loss": 0.1929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8415687084197998, + "rewards/margins": 7.135977268218994, + "rewards/rejected": -6.294408798217773, + "step": 1190 + }, + { + "epoch": 1.91, + "learning_rate": 4.3757431629013076e-07, + "logits/chosen": -1.4060953855514526, + "logits/rejected": -1.4652597904205322, + "logps/chosen": -92.06958770751953, + "logps/rejected": -133.45664978027344, + "loss": 0.312, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5821828246116638, + "rewards/margins": 3.153031349182129, + "rewards/rejected": -3.7352142333984375, + "step": 1191 + }, + { + "epoch": 1.91, + "learning_rate": 4.374752279032897e-07, + "logits/chosen": -1.48503839969635, + "logits/rejected": -1.4470348358154297, + "logps/chosen": -78.3155517578125, + "logps/rejected": -119.70085906982422, + "loss": 0.1562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5490559339523315, + "rewards/margins": 4.884414196014404, + "rewards/rejected": -5.433470249176025, + "step": 1192 + }, + { + "epoch": 1.91, + "learning_rate": 4.3737613951644863e-07, + "logits/chosen": -1.419858455657959, + "logits/rejected": -1.4778980016708374, + "logps/chosen": -69.89869689941406, + "logps/rejected": -105.43055725097656, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.38694679737091064, + "rewards/margins": 3.2332277297973633, + "rewards/rejected": -3.6201744079589844, + "step": 1193 + }, + { + "epoch": 1.92, + "learning_rate": 4.372770511296076e-07, + "logits/chosen": -1.4774479866027832, + "logits/rejected": -1.5088386535644531, + "logps/chosen": -97.91586303710938, + "logps/rejected": -120.28801727294922, + "loss": 0.1481, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6129424571990967, + "rewards/margins": 2.5898637771606445, + "rewards/rejected": -4.20280647277832, + "step": 1194 + }, + { + "epoch": 1.92, + "learning_rate": 4.3717796274276655e-07, + "logits/chosen": -1.2684803009033203, + "logits/rejected": -1.2789084911346436, + "logps/chosen": -92.2757568359375, + "logps/rejected": -139.07098388671875, + "loss": 0.221, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0744972229003906, + "rewards/margins": 1.691317081451416, + "rewards/rejected": -2.7658143043518066, + "step": 1195 + }, + { + "epoch": 1.92, + "learning_rate": 4.3707887435592545e-07, + "logits/chosen": -1.4931836128234863, + "logits/rejected": -1.477477788925171, + "logps/chosen": -125.53368377685547, + "logps/rejected": -124.0606460571289, + "loss": 0.2894, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3477426767349243, + "rewards/margins": 1.2647098302841187, + "rewards/rejected": -2.612452507019043, + "step": 1196 + }, + { + "epoch": 1.92, + "learning_rate": 4.369797859690844e-07, + "logits/chosen": -1.4589720964431763, + "logits/rejected": -1.4705286026000977, + "logps/chosen": -62.70899200439453, + "logps/rejected": -95.15110778808594, + "loss": 0.1515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6232596635818481, + "rewards/margins": 4.047474384307861, + "rewards/rejected": -4.67073392868042, + "step": 1197 + }, + { + "epoch": 1.92, + "learning_rate": 4.368806975822433e-07, + "logits/chosen": -1.388379454612732, + "logits/rejected": -1.4100706577301025, + "logps/chosen": -112.4080810546875, + "logps/rejected": -113.77107238769531, + "loss": 0.2772, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.108973503112793, + "rewards/margins": 2.375863552093506, + "rewards/rejected": -3.484837055206299, + "step": 1198 + }, + { + "epoch": 1.92, + "learning_rate": 4.367816091954023e-07, + "logits/chosen": -1.5462995767593384, + "logits/rejected": -1.634330153465271, + "logps/chosen": -110.84967803955078, + "logps/rejected": -161.56552124023438, + "loss": 0.2869, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7772539258003235, + "rewards/margins": 4.820531845092773, + "rewards/rejected": -5.597785949707031, + "step": 1199 + }, + { + "epoch": 1.93, + "learning_rate": 4.3668252080856124e-07, + "logits/chosen": -1.506972074508667, + "logits/rejected": -1.5112676620483398, + "logps/chosen": -85.52877807617188, + "logps/rejected": -126.54415130615234, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37539368867874146, + "rewards/margins": 3.2514519691467285, + "rewards/rejected": -3.626845598220825, + "step": 1200 + }, + { + "epoch": 1.93, + "learning_rate": 4.3658343242172015e-07, + "logits/chosen": -1.465179443359375, + "logits/rejected": -1.312491774559021, + "logps/chosen": -115.31640625, + "logps/rejected": -104.85160827636719, + "loss": 0.2729, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4553813934326172, + "rewards/margins": 0.5893468856811523, + "rewards/rejected": -2.0447282791137695, + "step": 1201 + }, + { + "epoch": 1.93, + "learning_rate": 4.364843440348791e-07, + "logits/chosen": -1.4024361371994019, + "logits/rejected": -1.3177199363708496, + "logps/chosen": -89.24357604980469, + "logps/rejected": -103.09367370605469, + "loss": 0.2277, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.541273295879364, + "rewards/margins": 1.818574070930481, + "rewards/rejected": -2.3598473072052, + "step": 1202 + }, + { + "epoch": 1.93, + "learning_rate": 4.36385255648038e-07, + "logits/chosen": -1.315090537071228, + "logits/rejected": -1.3472480773925781, + "logps/chosen": -95.97174072265625, + "logps/rejected": -118.57977294921875, + "loss": 0.214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5579822063446045, + "rewards/margins": 1.6121611595153809, + "rewards/rejected": -2.1701436042785645, + "step": 1203 + }, + { + "epoch": 1.93, + "learning_rate": 4.3628616726119697e-07, + "logits/chosen": -1.5062263011932373, + "logits/rejected": -1.5360233783721924, + "logps/chosen": -99.39713287353516, + "logps/rejected": -120.14836120605469, + "loss": 0.2556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6822240948677063, + "rewards/margins": 1.3009471893310547, + "rewards/rejected": -1.9831712245941162, + "step": 1204 + }, + { + "epoch": 1.93, + "learning_rate": 4.3618707887435593e-07, + "logits/chosen": -1.3749008178710938, + "logits/rejected": -1.3927260637283325, + "logps/chosen": -81.13267517089844, + "logps/rejected": -136.30914306640625, + "loss": 0.2492, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1035301685333252, + "rewards/margins": 3.7183332443237305, + "rewards/rejected": -4.821863651275635, + "step": 1205 + }, + { + "epoch": 1.94, + "learning_rate": 4.3608799048751484e-07, + "logits/chosen": -1.5020678043365479, + "logits/rejected": -1.5592821836471558, + "logps/chosen": -67.457763671875, + "logps/rejected": -144.3207244873047, + "loss": 0.2164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48146894574165344, + "rewards/margins": 5.109084129333496, + "rewards/rejected": -5.590552806854248, + "step": 1206 + }, + { + "epoch": 1.94, + "learning_rate": 4.359889021006738e-07, + "logits/chosen": -1.530219316482544, + "logits/rejected": -1.5946519374847412, + "logps/chosen": -94.7843246459961, + "logps/rejected": -160.2211456298828, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0008361339569092, + "rewards/margins": 5.517521858215332, + "rewards/rejected": -6.518357753753662, + "step": 1207 + }, + { + "epoch": 1.94, + "learning_rate": 4.358898137138327e-07, + "logits/chosen": -1.4555699825286865, + "logits/rejected": -1.3856714963912964, + "logps/chosen": -110.25713348388672, + "logps/rejected": -133.6761474609375, + "loss": 0.3717, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7305872440338135, + "rewards/margins": 2.612600803375244, + "rewards/rejected": -4.343188285827637, + "step": 1208 + }, + { + "epoch": 1.94, + "learning_rate": 4.3579072532699167e-07, + "logits/chosen": -1.5927643775939941, + "logits/rejected": -1.5313929319381714, + "logps/chosen": -102.90740966796875, + "logps/rejected": -126.32066345214844, + "loss": 0.2574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.581920862197876, + "rewards/margins": 3.2381186485290527, + "rewards/rejected": -3.8200392723083496, + "step": 1209 + }, + { + "epoch": 1.94, + "learning_rate": 4.3569163694015057e-07, + "logits/chosen": -1.4828330278396606, + "logits/rejected": -1.4520206451416016, + "logps/chosen": -87.55947875976562, + "logps/rejected": -82.96576690673828, + "loss": 0.2426, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3584098815917969, + "rewards/margins": 0.6665577292442322, + "rewards/rejected": -2.024967670440674, + "step": 1210 + }, + { + "epoch": 1.94, + "learning_rate": 4.3559254855330953e-07, + "logits/chosen": -1.4611014127731323, + "logits/rejected": -1.4348227977752686, + "logps/chosen": -62.47296905517578, + "logps/rejected": -110.3625717163086, + "loss": 0.2131, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8339658379554749, + "rewards/margins": 2.628286123275757, + "rewards/rejected": -3.462251901626587, + "step": 1211 + }, + { + "epoch": 1.95, + "learning_rate": 4.354934601664685e-07, + "logits/chosen": -1.5113213062286377, + "logits/rejected": -1.5640305280685425, + "logps/chosen": -113.55604553222656, + "logps/rejected": -142.66705322265625, + "loss": 0.3585, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8062220215797424, + "rewards/margins": 2.268556594848633, + "rewards/rejected": -3.0747785568237305, + "step": 1212 + }, + { + "epoch": 1.95, + "learning_rate": 4.353943717796274e-07, + "logits/chosen": -1.4309282302856445, + "logits/rejected": -1.4370200634002686, + "logps/chosen": -100.47332000732422, + "logps/rejected": -133.22781372070312, + "loss": 0.3919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8284553289413452, + "rewards/margins": 3.3672025203704834, + "rewards/rejected": -4.195657730102539, + "step": 1213 + }, + { + "epoch": 1.95, + "learning_rate": 4.3529528339278636e-07, + "logits/chosen": -1.4369029998779297, + "logits/rejected": -1.4550859928131104, + "logps/chosen": -116.9339599609375, + "logps/rejected": -141.95718383789062, + "loss": 0.2986, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.682491660118103, + "rewards/margins": 4.178305625915527, + "rewards/rejected": -4.86079740524292, + "step": 1214 + }, + { + "epoch": 1.95, + "learning_rate": 4.3519619500594527e-07, + "logits/chosen": -1.616718053817749, + "logits/rejected": -1.6303207874298096, + "logps/chosen": -80.79972839355469, + "logps/rejected": -122.4210433959961, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10389319062232971, + "rewards/margins": 4.316219806671143, + "rewards/rejected": -4.212326526641846, + "step": 1215 + }, + { + "epoch": 1.95, + "learning_rate": 4.350971066191042e-07, + "logits/chosen": -1.53462815284729, + "logits/rejected": -1.5679481029510498, + "logps/chosen": -105.13465881347656, + "logps/rejected": -108.72409057617188, + "loss": 0.256, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2821117341518402, + "rewards/margins": 2.0878329277038574, + "rewards/rejected": -2.3699445724487305, + "step": 1216 + }, + { + "epoch": 1.95, + "learning_rate": 4.349980182322632e-07, + "logits/chosen": -1.534961223602295, + "logits/rejected": -1.487816572189331, + "logps/chosen": -88.59142303466797, + "logps/rejected": -139.10031127929688, + "loss": 0.205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6139363646507263, + "rewards/margins": 5.768303394317627, + "rewards/rejected": -6.38223934173584, + "step": 1217 + }, + { + "epoch": 1.96, + "learning_rate": 4.348989298454221e-07, + "logits/chosen": -1.6388590335845947, + "logits/rejected": -1.641633152961731, + "logps/chosen": -85.38260650634766, + "logps/rejected": -139.23995971679688, + "loss": 0.1915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1994098722934723, + "rewards/margins": 5.395689010620117, + "rewards/rejected": -5.196279525756836, + "step": 1218 + }, + { + "epoch": 1.96, + "learning_rate": 4.34799841458581e-07, + "logits/chosen": -1.5533031225204468, + "logits/rejected": -1.5126149654388428, + "logps/chosen": -86.78959655761719, + "logps/rejected": -99.01382446289062, + "loss": 0.3161, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.265673041343689, + "rewards/margins": 2.393893003463745, + "rewards/rejected": -3.6595659255981445, + "step": 1219 + }, + { + "epoch": 1.96, + "learning_rate": 4.3470075307173996e-07, + "logits/chosen": -1.330951452255249, + "logits/rejected": -1.292251706123352, + "logps/chosen": -73.68235778808594, + "logps/rejected": -105.49578857421875, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1640346348285675, + "rewards/margins": 2.724595069885254, + "rewards/rejected": -2.888629913330078, + "step": 1220 + }, + { + "epoch": 1.96, + "learning_rate": 4.346016646848989e-07, + "logits/chosen": -1.6905272006988525, + "logits/rejected": -1.6001838445663452, + "logps/chosen": -79.28993225097656, + "logps/rejected": -95.04454040527344, + "loss": 0.0938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012753300368785858, + "rewards/margins": 2.346951484680176, + "rewards/rejected": -2.3597049713134766, + "step": 1221 + }, + { + "epoch": 1.96, + "learning_rate": 4.345025762980579e-07, + "logits/chosen": -1.5398207902908325, + "logits/rejected": -1.6387784481048584, + "logps/chosen": -100.39404296875, + "logps/rejected": -117.72673797607422, + "loss": 0.3331, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.18828639388084412, + "rewards/margins": 2.2205238342285156, + "rewards/rejected": -2.4088103771209717, + "step": 1222 + }, + { + "epoch": 1.96, + "learning_rate": 4.344034879112168e-07, + "logits/chosen": -1.6562045812606812, + "logits/rejected": -1.6230638027191162, + "logps/chosen": -85.01177978515625, + "logps/rejected": -103.04752349853516, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3874492943286896, + "rewards/margins": 1.6778422594070435, + "rewards/rejected": -2.0652916431427, + "step": 1223 + }, + { + "epoch": 1.96, + "learning_rate": 4.343043995243757e-07, + "logits/chosen": -1.2507611513137817, + "logits/rejected": -1.2746949195861816, + "logps/chosen": -98.89669036865234, + "logps/rejected": -119.34732055664062, + "loss": 0.4176, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4274120330810547, + "rewards/margins": 0.17870986461639404, + "rewards/rejected": -1.6061218976974487, + "step": 1224 + }, + { + "epoch": 1.97, + "learning_rate": 4.3420531113753465e-07, + "logits/chosen": -1.62452232837677, + "logits/rejected": -1.6041109561920166, + "logps/chosen": -105.58642578125, + "logps/rejected": -117.57750701904297, + "loss": 0.2538, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2381969541311264, + "rewards/margins": 2.327038288116455, + "rewards/rejected": -2.5652356147766113, + "step": 1225 + }, + { + "epoch": 1.97, + "learning_rate": 4.341062227506936e-07, + "logits/chosen": -1.580676794052124, + "logits/rejected": -1.6270904541015625, + "logps/chosen": -84.481689453125, + "logps/rejected": -123.74541473388672, + "loss": 0.3911, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.6112198829650879, + "rewards/margins": 2.1724674701690674, + "rewards/rejected": -2.783687114715576, + "step": 1226 + }, + { + "epoch": 1.97, + "learning_rate": 4.3400713436385257e-07, + "logits/chosen": -1.458744764328003, + "logits/rejected": -1.4648215770721436, + "logps/chosen": -93.22073364257812, + "logps/rejected": -145.3765411376953, + "loss": 0.2707, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.7115075588226318, + "rewards/margins": 0.5977874994277954, + "rewards/rejected": -1.3092951774597168, + "step": 1227 + }, + { + "epoch": 1.97, + "learning_rate": 4.339080459770115e-07, + "logits/chosen": -1.544055461883545, + "logits/rejected": -1.4529588222503662, + "logps/chosen": -88.87449645996094, + "logps/rejected": -100.42530822753906, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1658649444580078, + "rewards/margins": 2.3713438510894775, + "rewards/rejected": -2.205479145050049, + "step": 1228 + }, + { + "epoch": 1.97, + "learning_rate": 4.338089575901704e-07, + "logits/chosen": -1.331592321395874, + "logits/rejected": -1.3773057460784912, + "logps/chosen": -93.46331787109375, + "logps/rejected": -120.49371337890625, + "loss": 0.328, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0815529823303223, + "rewards/margins": 2.121675968170166, + "rewards/rejected": -3.203228712081909, + "step": 1229 + }, + { + "epoch": 1.97, + "learning_rate": 4.3370986920332934e-07, + "logits/chosen": -1.3839352130889893, + "logits/rejected": -1.35260009765625, + "logps/chosen": -111.54293823242188, + "logps/rejected": -145.79348754882812, + "loss": 0.3156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8885555267333984, + "rewards/margins": 3.0533628463745117, + "rewards/rejected": -3.94191837310791, + "step": 1230 + }, + { + "epoch": 1.98, + "learning_rate": 4.3361078081648825e-07, + "logits/chosen": -1.406173825263977, + "logits/rejected": -1.3496291637420654, + "logps/chosen": -83.7090072631836, + "logps/rejected": -130.5758056640625, + "loss": 0.2577, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28057652711868286, + "rewards/margins": 4.063328742980957, + "rewards/rejected": -4.343904972076416, + "step": 1231 + }, + { + "epoch": 1.98, + "learning_rate": 4.3351169242964726e-07, + "logits/chosen": -1.454321026802063, + "logits/rejected": -1.42905855178833, + "logps/chosen": -100.27388000488281, + "logps/rejected": -111.19540405273438, + "loss": 0.285, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.24707776308059692, + "rewards/margins": 2.591421365737915, + "rewards/rejected": -2.838499069213867, + "step": 1232 + }, + { + "epoch": 1.98, + "learning_rate": 4.3341260404280617e-07, + "logits/chosen": -1.4066498279571533, + "logits/rejected": -1.4142098426818848, + "logps/chosen": -96.74811553955078, + "logps/rejected": -124.33453369140625, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17986297607421875, + "rewards/margins": 2.3474373817443848, + "rewards/rejected": -2.5273003578186035, + "step": 1233 + }, + { + "epoch": 1.98, + "learning_rate": 4.333135156559651e-07, + "logits/chosen": -1.3803982734680176, + "logits/rejected": -1.3716531991958618, + "logps/chosen": -71.3891830444336, + "logps/rejected": -121.86614227294922, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014169782400131226, + "rewards/margins": 3.4527339935302734, + "rewards/rejected": -3.4385643005371094, + "step": 1234 + }, + { + "epoch": 1.98, + "learning_rate": 4.3321442726912404e-07, + "logits/chosen": -1.467852234840393, + "logits/rejected": -1.502821922302246, + "logps/chosen": -84.32377624511719, + "logps/rejected": -120.20381164550781, + "loss": 0.3257, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8148152828216553, + "rewards/margins": 1.5343060493469238, + "rewards/rejected": -3.349121570587158, + "step": 1235 + }, + { + "epoch": 1.98, + "learning_rate": 4.3311533888228294e-07, + "logits/chosen": -1.5057395696640015, + "logits/rejected": -1.4680556058883667, + "logps/chosen": -60.75223922729492, + "logps/rejected": -116.8771743774414, + "loss": 0.2273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30038952827453613, + "rewards/margins": 4.097169876098633, + "rewards/rejected": -4.397559642791748, + "step": 1236 + }, + { + "epoch": 1.99, + "learning_rate": 4.3301625049544196e-07, + "logits/chosen": -1.5608302354812622, + "logits/rejected": -1.5874555110931396, + "logps/chosen": -67.79867553710938, + "logps/rejected": -128.5220184326172, + "loss": 0.2444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13367435336112976, + "rewards/margins": 5.033227920532227, + "rewards/rejected": -4.8995537757873535, + "step": 1237 + }, + { + "epoch": 1.99, + "learning_rate": 4.3291716210860086e-07, + "logits/chosen": -1.5044618844985962, + "logits/rejected": -1.5467292070388794, + "logps/chosen": -76.59243774414062, + "logps/rejected": -122.72437286376953, + "loss": 0.2551, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15947704017162323, + "rewards/margins": 3.9455676078796387, + "rewards/rejected": -4.105044364929199, + "step": 1238 + }, + { + "epoch": 1.99, + "learning_rate": 4.3281807372175977e-07, + "logits/chosen": -1.3771075010299683, + "logits/rejected": -1.4101756811141968, + "logps/chosen": -118.90798950195312, + "logps/rejected": -124.225341796875, + "loss": 0.3909, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.3187206983566284, + "rewards/margins": 2.3943827152252197, + "rewards/rejected": -3.7131035327911377, + "step": 1239 + }, + { + "epoch": 1.99, + "learning_rate": 4.3271898533491873e-07, + "logits/chosen": -1.5012879371643066, + "logits/rejected": -1.4926526546478271, + "logps/chosen": -91.9200439453125, + "logps/rejected": -124.88258361816406, + "loss": 0.2289, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9201165437698364, + "rewards/margins": 1.5434389114379883, + "rewards/rejected": -2.4635555744171143, + "step": 1240 + }, + { + "epoch": 1.99, + "learning_rate": 4.3261989694807764e-07, + "logits/chosen": -1.5616192817687988, + "logits/rejected": -1.617734432220459, + "logps/chosen": -99.5982437133789, + "logps/rejected": -142.7492218017578, + "loss": 0.2113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6395364999771118, + "rewards/margins": 2.485624074935913, + "rewards/rejected": -3.1251606941223145, + "step": 1241 + }, + { + "epoch": 1.99, + "learning_rate": 4.3252080856123665e-07, + "logits/chosen": -1.5555245876312256, + "logits/rejected": -1.4841028451919556, + "logps/chosen": -108.14250946044922, + "logps/rejected": -136.6805877685547, + "loss": 0.1811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2455122023820877, + "rewards/margins": 4.978642463684082, + "rewards/rejected": -5.224154472351074, + "step": 1242 + }, + { + "epoch": 2.0, + "learning_rate": 4.3242172017439556e-07, + "logits/chosen": -1.5388514995574951, + "logits/rejected": -1.5513019561767578, + "logps/chosen": -107.13815307617188, + "logps/rejected": -106.31368255615234, + "loss": 0.2627, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4078955054283142, + "rewards/margins": 2.496858835220337, + "rewards/rejected": -2.904754638671875, + "step": 1243 + }, + { + "epoch": 2.0, + "learning_rate": 4.3232263178755446e-07, + "logits/chosen": -1.4790377616882324, + "logits/rejected": -1.4332636594772339, + "logps/chosen": -101.67962646484375, + "logps/rejected": -111.42196655273438, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0826828479766846, + "rewards/margins": 1.8899434804916382, + "rewards/rejected": -2.972626209259033, + "step": 1244 + }, + { + "epoch": 2.0, + "learning_rate": 4.322235434007134e-07, + "logits/chosen": -1.527238368988037, + "logits/rejected": -1.5337402820587158, + "logps/chosen": -119.0438232421875, + "logps/rejected": -122.93560028076172, + "loss": 0.2947, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01847878098487854, + "rewards/margins": 2.083226203918457, + "rewards/rejected": -2.1017048358917236, + "step": 1245 + }, + { + "epoch": 2.0, + "learning_rate": 4.3212445501387233e-07, + "logits/chosen": -1.484950065612793, + "logits/rejected": -1.4397361278533936, + "logps/chosen": -75.26400756835938, + "logps/rejected": -100.62810516357422, + "loss": 0.3457, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.28235310316085815, + "rewards/margins": 2.8017115592956543, + "rewards/rejected": -3.0840647220611572, + "step": 1246 + }, + { + "epoch": 2.0, + "learning_rate": 4.3202536662703134e-07, + "logits/chosen": -1.3811062574386597, + "logits/rejected": -1.4219859838485718, + "logps/chosen": -81.69984436035156, + "logps/rejected": -124.89698791503906, + "loss": 0.1273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2544810175895691, + "rewards/margins": 4.803673267364502, + "rewards/rejected": -5.058154106140137, + "step": 1247 + }, + { + "epoch": 2.0, + "learning_rate": 4.3192627824019025e-07, + "logits/chosen": -1.466765284538269, + "logits/rejected": -1.4616584777832031, + "logps/chosen": -93.19338989257812, + "logps/rejected": -127.35762023925781, + "loss": 0.1427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0927833616733551, + "rewards/margins": 3.1084632873535156, + "rewards/rejected": -3.0156800746917725, + "step": 1248 + }, + { + "epoch": 2.0, + "learning_rate": 4.3182718985334916e-07, + "logits/chosen": -1.5544555187225342, + "logits/rejected": -1.5313024520874023, + "logps/chosen": -119.27314758300781, + "logps/rejected": -139.83743286132812, + "loss": 0.1927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22092323005199432, + "rewards/margins": 4.216795444488525, + "rewards/rejected": -4.437718391418457, + "step": 1249 + }, + { + "epoch": 2.01, + "learning_rate": 4.317281014665081e-07, + "logits/chosen": -1.5011337995529175, + "logits/rejected": -1.508955955505371, + "logps/chosen": -72.34799194335938, + "logps/rejected": -153.66970825195312, + "loss": 0.0893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15802927315235138, + "rewards/margins": 5.3063530921936035, + "rewards/rejected": -5.1483235359191895, + "step": 1250 + }, + { + "epoch": 2.01, + "learning_rate": 4.31629013079667e-07, + "logits/chosen": -1.3680545091629028, + "logits/rejected": -1.3004566431045532, + "logps/chosen": -115.002685546875, + "logps/rejected": -113.25267028808594, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21965044736862183, + "rewards/margins": 3.625779390335083, + "rewards/rejected": -3.4061288833618164, + "step": 1251 + }, + { + "epoch": 2.01, + "learning_rate": 4.31529924692826e-07, + "logits/chosen": -1.4229012727737427, + "logits/rejected": -1.4551069736480713, + "logps/chosen": -67.79412078857422, + "logps/rejected": -148.41510009765625, + "loss": 0.1167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22255849838256836, + "rewards/margins": 4.245892524719238, + "rewards/rejected": -4.023333549499512, + "step": 1252 + }, + { + "epoch": 2.01, + "learning_rate": 4.3143083630598494e-07, + "logits/chosen": -1.407235860824585, + "logits/rejected": -1.4739642143249512, + "logps/chosen": -87.83484649658203, + "logps/rejected": -133.2175750732422, + "loss": 0.2396, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1398205757141113, + "rewards/margins": 3.8166840076446533, + "rewards/rejected": -4.9565043449401855, + "step": 1253 + }, + { + "epoch": 2.01, + "learning_rate": 4.3133174791914385e-07, + "logits/chosen": -1.4445710182189941, + "logits/rejected": -1.44650137424469, + "logps/chosen": -81.70764923095703, + "logps/rejected": -110.63765716552734, + "loss": 0.106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5410981774330139, + "rewards/margins": 3.733992099761963, + "rewards/rejected": -4.275090217590332, + "step": 1254 + }, + { + "epoch": 2.01, + "learning_rate": 4.312326595323028e-07, + "logits/chosen": -1.3969769477844238, + "logits/rejected": -1.4331176280975342, + "logps/chosen": -82.84854125976562, + "logps/rejected": -101.40031433105469, + "loss": 0.1919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.522128701210022, + "rewards/margins": 2.6896324157714844, + "rewards/rejected": -3.211760997772217, + "step": 1255 + }, + { + "epoch": 2.02, + "learning_rate": 4.311335711454617e-07, + "logits/chosen": -1.4795727729797363, + "logits/rejected": -1.3479630947113037, + "logps/chosen": -102.34103393554688, + "logps/rejected": -118.60768127441406, + "loss": 0.1861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15835285186767578, + "rewards/margins": 3.472829580307007, + "rewards/rejected": -3.6311821937561035, + "step": 1256 + }, + { + "epoch": 2.02, + "learning_rate": 4.310344827586206e-07, + "logits/chosen": -1.5303661823272705, + "logits/rejected": -1.4470382928848267, + "logps/chosen": -103.25740051269531, + "logps/rejected": -124.37181854248047, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3342008590698242, + "rewards/margins": 3.673947811126709, + "rewards/rejected": -4.008148193359375, + "step": 1257 + }, + { + "epoch": 2.02, + "learning_rate": 4.3093539437177964e-07, + "logits/chosen": -1.4062355756759644, + "logits/rejected": -1.433314561843872, + "logps/chosen": -82.02986145019531, + "logps/rejected": -156.03985595703125, + "loss": 0.1258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26443159580230713, + "rewards/margins": 7.591645240783691, + "rewards/rejected": -7.327213764190674, + "step": 1258 + }, + { + "epoch": 2.02, + "learning_rate": 4.3083630598493854e-07, + "logits/chosen": -1.3208235502243042, + "logits/rejected": -1.4586138725280762, + "logps/chosen": -61.9752197265625, + "logps/rejected": -106.89537811279297, + "loss": 0.1496, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14034700393676758, + "rewards/margins": 2.4393868446350098, + "rewards/rejected": -2.5797338485717773, + "step": 1259 + }, + { + "epoch": 2.02, + "learning_rate": 4.307372175980975e-07, + "logits/chosen": -1.5099453926086426, + "logits/rejected": -1.4955332279205322, + "logps/chosen": -66.140869140625, + "logps/rejected": -106.26004028320312, + "loss": 0.1196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16897733509540558, + "rewards/margins": 5.1604390144348145, + "rewards/rejected": -4.991461753845215, + "step": 1260 + }, + { + "epoch": 2.02, + "learning_rate": 4.306381292112564e-07, + "logits/chosen": -1.465770959854126, + "logits/rejected": -1.4089298248291016, + "logps/chosen": -99.04536437988281, + "logps/rejected": -128.20281982421875, + "loss": 0.1208, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.33492976427078247, + "rewards/margins": 4.085660457611084, + "rewards/rejected": -4.420590400695801, + "step": 1261 + }, + { + "epoch": 2.03, + "learning_rate": 4.305390408244153e-07, + "logits/chosen": -1.4141796827316284, + "logits/rejected": -1.4638724327087402, + "logps/chosen": -97.93463134765625, + "logps/rejected": -132.31378173828125, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6094722747802734, + "rewards/margins": 3.4959359169006348, + "rewards/rejected": -5.105408191680908, + "step": 1262 + }, + { + "epoch": 2.03, + "learning_rate": 4.3043995243757433e-07, + "logits/chosen": -1.493220329284668, + "logits/rejected": -1.524265170097351, + "logps/chosen": -81.64726257324219, + "logps/rejected": -129.051513671875, + "loss": 0.1432, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.677672028541565, + "rewards/margins": 2.242954730987549, + "rewards/rejected": -3.920626640319824, + "step": 1263 + }, + { + "epoch": 2.03, + "learning_rate": 4.3034086405073324e-07, + "logits/chosen": -1.5418639183044434, + "logits/rejected": -1.5842581987380981, + "logps/chosen": -83.81095886230469, + "logps/rejected": -98.02456665039062, + "loss": 0.1422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003964036703109741, + "rewards/margins": 2.7386045455932617, + "rewards/rejected": -2.734640598297119, + "step": 1264 + }, + { + "epoch": 2.03, + "learning_rate": 4.302417756638922e-07, + "logits/chosen": -1.5881929397583008, + "logits/rejected": -1.6196166276931763, + "logps/chosen": -88.98538208007812, + "logps/rejected": -112.26771545410156, + "loss": 0.2088, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06040230393409729, + "rewards/margins": 2.9770991802215576, + "rewards/rejected": -3.037501811981201, + "step": 1265 + }, + { + "epoch": 2.03, + "learning_rate": 4.301426872770511e-07, + "logits/chosen": -1.5618680715560913, + "logits/rejected": -1.5513938665390015, + "logps/chosen": -77.24048614501953, + "logps/rejected": -121.65913391113281, + "loss": 0.1451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.19279365241527557, + "rewards/margins": 5.798108100891113, + "rewards/rejected": -5.990901470184326, + "step": 1266 + }, + { + "epoch": 2.03, + "learning_rate": 4.3004359889021e-07, + "logits/chosen": -1.341871738433838, + "logits/rejected": -1.3327093124389648, + "logps/chosen": -83.49139404296875, + "logps/rejected": -128.03341674804688, + "loss": 0.0935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3953245282173157, + "rewards/margins": 4.485246181488037, + "rewards/rejected": -4.089921951293945, + "step": 1267 + }, + { + "epoch": 2.04, + "learning_rate": 4.29944510503369e-07, + "logits/chosen": -1.5106936693191528, + "logits/rejected": -1.5004298686981201, + "logps/chosen": -88.78207397460938, + "logps/rejected": -149.87152099609375, + "loss": 0.0829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4655628502368927, + "rewards/margins": 6.004593372344971, + "rewards/rejected": -6.470156192779541, + "step": 1268 + }, + { + "epoch": 2.04, + "learning_rate": 4.2984542211652793e-07, + "logits/chosen": -1.6028718948364258, + "logits/rejected": -1.5930198431015015, + "logps/chosen": -80.30803680419922, + "logps/rejected": -100.2142333984375, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9147788286209106, + "rewards/margins": 2.1014444828033447, + "rewards/rejected": -3.016223430633545, + "step": 1269 + }, + { + "epoch": 2.04, + "learning_rate": 4.297463337296869e-07, + "logits/chosen": -1.4755864143371582, + "logits/rejected": -1.4524469375610352, + "logps/chosen": -84.04154205322266, + "logps/rejected": -109.63292694091797, + "loss": 0.1721, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31514739990234375, + "rewards/margins": 3.342761278152466, + "rewards/rejected": -3.6579086780548096, + "step": 1270 + }, + { + "epoch": 2.04, + "learning_rate": 4.296472453428458e-07, + "logits/chosen": -1.3040111064910889, + "logits/rejected": -1.4470059871673584, + "logps/chosen": -97.77716064453125, + "logps/rejected": -135.5336456298828, + "loss": 0.1121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46023786067962646, + "rewards/margins": 2.5814943313598633, + "rewards/rejected": -3.0417323112487793, + "step": 1271 + }, + { + "epoch": 2.04, + "learning_rate": 4.295481569560047e-07, + "logits/chosen": -1.6371796131134033, + "logits/rejected": -1.6856129169464111, + "logps/chosen": -73.14612579345703, + "logps/rejected": -111.6485824584961, + "loss": 0.1653, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6499501466751099, + "rewards/margins": 3.002352476119995, + "rewards/rejected": -3.6523025035858154, + "step": 1272 + }, + { + "epoch": 2.04, + "learning_rate": 4.2944906856916366e-07, + "logits/chosen": -1.5682568550109863, + "logits/rejected": -1.52036452293396, + "logps/chosen": -68.01469421386719, + "logps/rejected": -116.35298156738281, + "loss": 0.1507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010186254978179932, + "rewards/margins": 5.229900360107422, + "rewards/rejected": -5.240086555480957, + "step": 1273 + }, + { + "epoch": 2.04, + "learning_rate": 4.293499801823226e-07, + "logits/chosen": -1.3977530002593994, + "logits/rejected": -1.3857927322387695, + "logps/chosen": -107.8603286743164, + "logps/rejected": -125.48271179199219, + "loss": 0.266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5130864977836609, + "rewards/margins": 2.9510769844055176, + "rewards/rejected": -3.4641637802124023, + "step": 1274 + }, + { + "epoch": 2.05, + "learning_rate": 4.292508917954816e-07, + "logits/chosen": -1.387150526046753, + "logits/rejected": -1.375797986984253, + "logps/chosen": -80.3109359741211, + "logps/rejected": -107.121337890625, + "loss": 0.2509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11880046129226685, + "rewards/margins": 2.9024014472961426, + "rewards/rejected": -2.7836012840270996, + "step": 1275 + }, + { + "epoch": 2.05, + "learning_rate": 4.291518034086405e-07, + "logits/chosen": -1.60823392868042, + "logits/rejected": -1.5337653160095215, + "logps/chosen": -66.01141357421875, + "logps/rejected": -98.07562255859375, + "loss": 0.2106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6017864942550659, + "rewards/margins": 1.9843767881393433, + "rewards/rejected": -2.586163282394409, + "step": 1276 + }, + { + "epoch": 2.05, + "learning_rate": 4.290527150217994e-07, + "logits/chosen": -1.4426965713500977, + "logits/rejected": -1.488049864768982, + "logps/chosen": -89.1236801147461, + "logps/rejected": -109.98504638671875, + "loss": 0.1449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02813863568007946, + "rewards/margins": 2.3508238792419434, + "rewards/rejected": -2.378962516784668, + "step": 1277 + }, + { + "epoch": 2.05, + "learning_rate": 4.2895362663495835e-07, + "logits/chosen": -1.5795447826385498, + "logits/rejected": -1.6237201690673828, + "logps/chosen": -68.04721069335938, + "logps/rejected": -111.66845703125, + "loss": 0.098, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.1038775444030762, + "rewards/margins": 2.7163329124450684, + "rewards/rejected": -3.8202106952667236, + "step": 1278 + }, + { + "epoch": 2.05, + "learning_rate": 4.288545382481173e-07, + "logits/chosen": -1.4876430034637451, + "logits/rejected": -1.550061821937561, + "logps/chosen": -78.9802017211914, + "logps/rejected": -119.94223022460938, + "loss": 0.1467, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.671481728553772, + "rewards/margins": 3.715240955352783, + "rewards/rejected": -4.386722564697266, + "step": 1279 + }, + { + "epoch": 2.05, + "learning_rate": 4.287554498612763e-07, + "logits/chosen": -1.4919487237930298, + "logits/rejected": -1.4765551090240479, + "logps/chosen": -92.02061462402344, + "logps/rejected": -138.22731018066406, + "loss": 0.1127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7657157778739929, + "rewards/margins": 4.477447509765625, + "rewards/rejected": -5.243163585662842, + "step": 1280 + }, + { + "epoch": 2.06, + "learning_rate": 4.286563614744352e-07, + "logits/chosen": -1.5616093873977661, + "logits/rejected": -1.5489097833633423, + "logps/chosen": -107.42478942871094, + "logps/rejected": -136.08245849609375, + "loss": 0.1117, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.282527208328247, + "rewards/margins": 3.766399383544922, + "rewards/rejected": -5.048926830291748, + "step": 1281 + }, + { + "epoch": 2.06, + "learning_rate": 4.285572730875941e-07, + "logits/chosen": -1.4993157386779785, + "logits/rejected": -1.5647921562194824, + "logps/chosen": -90.16441345214844, + "logps/rejected": -150.462646484375, + "loss": 0.1052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7772684693336487, + "rewards/margins": 4.079909324645996, + "rewards/rejected": -4.857177734375, + "step": 1282 + }, + { + "epoch": 2.06, + "learning_rate": 4.2845818470075305e-07, + "logits/chosen": -1.4780125617980957, + "logits/rejected": -1.4558207988739014, + "logps/chosen": -80.77523803710938, + "logps/rejected": -141.18577575683594, + "loss": 0.1572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5898529887199402, + "rewards/margins": 3.213545322418213, + "rewards/rejected": -3.803398370742798, + "step": 1283 + }, + { + "epoch": 2.06, + "learning_rate": 4.28359096313912e-07, + "logits/chosen": -1.4974738359451294, + "logits/rejected": -1.572270393371582, + "logps/chosen": -117.81903076171875, + "logps/rejected": -140.6079559326172, + "loss": 0.1678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7976266741752625, + "rewards/margins": 3.3109230995178223, + "rewards/rejected": -4.108550071716309, + "step": 1284 + }, + { + "epoch": 2.06, + "learning_rate": 4.2826000792707097e-07, + "logits/chosen": -1.6578664779663086, + "logits/rejected": -1.6431982517242432, + "logps/chosen": -82.29998016357422, + "logps/rejected": -149.7062225341797, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1788790225982666, + "rewards/margins": 5.040714263916016, + "rewards/rejected": -6.219593524932861, + "step": 1285 + }, + { + "epoch": 2.06, + "learning_rate": 4.281609195402299e-07, + "logits/chosen": -1.647971510887146, + "logits/rejected": -1.7033724784851074, + "logps/chosen": -98.63599395751953, + "logps/rejected": -138.95445251464844, + "loss": 0.1032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8130593299865723, + "rewards/margins": 3.6318981647491455, + "rewards/rejected": -4.444957733154297, + "step": 1286 + }, + { + "epoch": 2.07, + "learning_rate": 4.280618311533888e-07, + "logits/chosen": -1.4832065105438232, + "logits/rejected": -1.4377533197402954, + "logps/chosen": -90.29832458496094, + "logps/rejected": -111.72748565673828, + "loss": 0.114, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8295157551765442, + "rewards/margins": 1.7472118139266968, + "rewards/rejected": -2.576727867126465, + "step": 1287 + }, + { + "epoch": 2.07, + "learning_rate": 4.2796274276654774e-07, + "logits/chosen": -1.6706310510635376, + "logits/rejected": -1.6082743406295776, + "logps/chosen": -108.583984375, + "logps/rejected": -155.32073974609375, + "loss": 0.163, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1076383590698242, + "rewards/margins": 4.575101852416992, + "rewards/rejected": -5.682740211486816, + "step": 1288 + }, + { + "epoch": 2.07, + "learning_rate": 4.2786365437970665e-07, + "logits/chosen": -1.553928017616272, + "logits/rejected": -1.464813470840454, + "logps/chosen": -114.1164321899414, + "logps/rejected": -138.7145233154297, + "loss": 0.1022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1080217361450195, + "rewards/margins": 3.4946556091308594, + "rewards/rejected": -4.602676868438721, + "step": 1289 + }, + { + "epoch": 2.07, + "learning_rate": 4.277645659928656e-07, + "logits/chosen": -1.6050275564193726, + "logits/rejected": -1.5640316009521484, + "logps/chosen": -99.86495971679688, + "logps/rejected": -158.56036376953125, + "loss": 0.1845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.66361403465271, + "rewards/margins": 5.470688819885254, + "rewards/rejected": -6.134303092956543, + "step": 1290 + }, + { + "epoch": 2.07, + "learning_rate": 4.2766547760602457e-07, + "logits/chosen": -1.4872000217437744, + "logits/rejected": -1.4102535247802734, + "logps/chosen": -79.47628784179688, + "logps/rejected": -114.87718963623047, + "loss": 0.1067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42906662821769714, + "rewards/margins": 3.4579083919525146, + "rewards/rejected": -3.886974811553955, + "step": 1291 + }, + { + "epoch": 2.07, + "learning_rate": 4.2756638921918347e-07, + "logits/chosen": -1.534097671508789, + "logits/rejected": -1.536102533340454, + "logps/chosen": -107.53334045410156, + "logps/rejected": -158.9542236328125, + "loss": 0.0703, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1069401502609253, + "rewards/margins": 4.953769683837891, + "rewards/rejected": -6.060709476470947, + "step": 1292 + }, + { + "epoch": 2.08, + "learning_rate": 4.2746730083234243e-07, + "logits/chosen": -1.6231398582458496, + "logits/rejected": -1.532738208770752, + "logps/chosen": -84.05986785888672, + "logps/rejected": -100.68190002441406, + "loss": 0.1366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3910371661186218, + "rewards/margins": 2.09564471244812, + "rewards/rejected": -2.4866819381713867, + "step": 1293 + }, + { + "epoch": 2.08, + "learning_rate": 4.2736821244550134e-07, + "logits/chosen": -1.509333610534668, + "logits/rejected": -1.4769201278686523, + "logps/chosen": -87.03996276855469, + "logps/rejected": -117.03504943847656, + "loss": 0.1587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3784305453300476, + "rewards/margins": 2.687852382659912, + "rewards/rejected": -3.0662829875946045, + "step": 1294 + }, + { + "epoch": 2.08, + "learning_rate": 4.272691240586603e-07, + "logits/chosen": -1.4786738157272339, + "logits/rejected": -1.4909909963607788, + "logps/chosen": -91.95360565185547, + "logps/rejected": -119.86714172363281, + "loss": 0.171, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3409894704818726, + "rewards/margins": 1.2484445571899414, + "rewards/rejected": -2.5894339084625244, + "step": 1295 + }, + { + "epoch": 2.08, + "learning_rate": 4.2717003567181926e-07, + "logits/chosen": -1.5702946186065674, + "logits/rejected": -1.5587682723999023, + "logps/chosen": -114.6760025024414, + "logps/rejected": -136.5312957763672, + "loss": 0.2047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44146645069122314, + "rewards/margins": 4.101672172546387, + "rewards/rejected": -4.54313850402832, + "step": 1296 + }, + { + "epoch": 2.08, + "learning_rate": 4.2707094728497817e-07, + "logits/chosen": -1.4429200887680054, + "logits/rejected": -1.4038357734680176, + "logps/chosen": -85.59593200683594, + "logps/rejected": -104.63298034667969, + "loss": 0.1582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9341185092926025, + "rewards/margins": 3.3808820247650146, + "rewards/rejected": -4.315000534057617, + "step": 1297 + }, + { + "epoch": 2.08, + "learning_rate": 4.269718588981371e-07, + "logits/chosen": -1.5841290950775146, + "logits/rejected": -1.6385498046875, + "logps/chosen": -89.0302505493164, + "logps/rejected": -145.2225799560547, + "loss": 0.1178, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2459421157836914, + "rewards/margins": 3.768739700317383, + "rewards/rejected": -5.014681816101074, + "step": 1298 + }, + { + "epoch": 2.09, + "learning_rate": 4.2687277051129603e-07, + "logits/chosen": -1.495919942855835, + "logits/rejected": -1.5673154592514038, + "logps/chosen": -62.68367385864258, + "logps/rejected": -147.11851501464844, + "loss": 0.1061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22919194400310516, + "rewards/margins": 6.810031414031982, + "rewards/rejected": -7.039223670959473, + "step": 1299 + }, + { + "epoch": 2.09, + "learning_rate": 4.26773682124455e-07, + "logits/chosen": -1.636296272277832, + "logits/rejected": -1.5996400117874146, + "logps/chosen": -85.95165252685547, + "logps/rejected": -119.95809936523438, + "loss": 0.1003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2713234424591064, + "rewards/margins": 4.2623114585876465, + "rewards/rejected": -5.533635139465332, + "step": 1300 + }, + { + "epoch": 2.09, + "learning_rate": 4.2667459373761395e-07, + "logits/chosen": -1.5026570558547974, + "logits/rejected": -1.4609479904174805, + "logps/chosen": -82.52363586425781, + "logps/rejected": -135.9720001220703, + "loss": 0.0776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7348797917366028, + "rewards/margins": 4.531760215759277, + "rewards/rejected": -5.2666401863098145, + "step": 1301 + }, + { + "epoch": 2.09, + "learning_rate": 4.2657550535077286e-07, + "logits/chosen": -1.491819143295288, + "logits/rejected": -1.3917618989944458, + "logps/chosen": -98.51071166992188, + "logps/rejected": -115.27081298828125, + "loss": 0.1793, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.411280632019043, + "rewards/margins": 1.1156457662582397, + "rewards/rejected": -2.5269265174865723, + "step": 1302 + }, + { + "epoch": 2.09, + "learning_rate": 4.264764169639318e-07, + "logits/chosen": -1.605468511581421, + "logits/rejected": -1.6631664037704468, + "logps/chosen": -71.73342895507812, + "logps/rejected": -161.34703063964844, + "loss": 0.1208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.491355836391449, + "rewards/margins": 7.22337007522583, + "rewards/rejected": -7.714725971221924, + "step": 1303 + }, + { + "epoch": 2.09, + "learning_rate": 4.263773285770907e-07, + "logits/chosen": -1.6132770776748657, + "logits/rejected": -1.5890449285507202, + "logps/chosen": -102.54973602294922, + "logps/rejected": -139.01644897460938, + "loss": 0.2029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4258519411087036, + "rewards/margins": 2.742027521133423, + "rewards/rejected": -4.167879104614258, + "step": 1304 + }, + { + "epoch": 2.09, + "learning_rate": 4.262782401902497e-07, + "logits/chosen": -1.5892622470855713, + "logits/rejected": -1.6467266082763672, + "logps/chosen": -101.92103576660156, + "logps/rejected": -179.23594665527344, + "loss": 0.0807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5818711519241333, + "rewards/margins": 6.830059051513672, + "rewards/rejected": -7.411930084228516, + "step": 1305 + }, + { + "epoch": 2.1, + "learning_rate": 4.2617915180340865e-07, + "logits/chosen": -1.691285490989685, + "logits/rejected": -1.570646047592163, + "logps/chosen": -126.37004089355469, + "logps/rejected": -130.4638214111328, + "loss": 0.2313, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1759960651397705, + "rewards/margins": 2.188406229019165, + "rewards/rejected": -4.364401817321777, + "step": 1306 + }, + { + "epoch": 2.1, + "learning_rate": 4.2608006341656755e-07, + "logits/chosen": -1.681898832321167, + "logits/rejected": -1.7083895206451416, + "logps/chosen": -98.27877044677734, + "logps/rejected": -124.12835693359375, + "loss": 0.1044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12323473393917084, + "rewards/margins": 2.5677223205566406, + "rewards/rejected": -2.6909570693969727, + "step": 1307 + }, + { + "epoch": 2.1, + "learning_rate": 4.259809750297265e-07, + "logits/chosen": -1.708587884902954, + "logits/rejected": -1.7394015789031982, + "logps/chosen": -112.41564178466797, + "logps/rejected": -131.631103515625, + "loss": 0.1044, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4892308712005615, + "rewards/margins": 2.9743919372558594, + "rewards/rejected": -4.463622570037842, + "step": 1308 + }, + { + "epoch": 2.1, + "learning_rate": 4.258818866428854e-07, + "logits/chosen": -1.49684476852417, + "logits/rejected": -1.5309290885925293, + "logps/chosen": -100.93248748779297, + "logps/rejected": -152.20045471191406, + "loss": 0.108, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3335492610931396, + "rewards/margins": 3.2699379920959473, + "rewards/rejected": -4.603487491607666, + "step": 1309 + }, + { + "epoch": 2.1, + "learning_rate": 4.257827982560443e-07, + "logits/chosen": -1.671911358833313, + "logits/rejected": -1.5683895349502563, + "logps/chosen": -106.39058685302734, + "logps/rejected": -133.99887084960938, + "loss": 0.1588, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1430447101593018, + "rewards/margins": 4.7854156494140625, + "rewards/rejected": -5.928460121154785, + "step": 1310 + }, + { + "epoch": 2.1, + "learning_rate": 4.2568370986920334e-07, + "logits/chosen": -1.5010700225830078, + "logits/rejected": -1.5616753101348877, + "logps/chosen": -96.9313735961914, + "logps/rejected": -132.6835479736328, + "loss": 0.1598, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3041895627975464, + "rewards/margins": 2.4039015769958496, + "rewards/rejected": -3.7080912590026855, + "step": 1311 + }, + { + "epoch": 2.11, + "learning_rate": 4.2558462148236225e-07, + "logits/chosen": -1.5929434299468994, + "logits/rejected": -1.7059326171875, + "logps/chosen": -70.94236755371094, + "logps/rejected": -128.29991149902344, + "loss": 0.0903, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6376080513000488, + "rewards/margins": 5.028254985809326, + "rewards/rejected": -5.665863037109375, + "step": 1312 + }, + { + "epoch": 2.11, + "learning_rate": 4.254855330955212e-07, + "logits/chosen": -1.5884292125701904, + "logits/rejected": -1.557417869567871, + "logps/chosen": -92.57377624511719, + "logps/rejected": -125.71702575683594, + "loss": 0.1674, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7722769975662231, + "rewards/margins": 3.0792806148529053, + "rewards/rejected": -4.851557731628418, + "step": 1313 + }, + { + "epoch": 2.11, + "learning_rate": 4.253864447086801e-07, + "logits/chosen": -1.4356108903884888, + "logits/rejected": -1.4120104312896729, + "logps/chosen": -83.44403076171875, + "logps/rejected": -164.661376953125, + "loss": 0.1642, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1125842332839966, + "rewards/margins": 5.615421295166016, + "rewards/rejected": -6.728005409240723, + "step": 1314 + }, + { + "epoch": 2.11, + "learning_rate": 4.25287356321839e-07, + "logits/chosen": -1.499813437461853, + "logits/rejected": -1.523715615272522, + "logps/chosen": -96.14567565917969, + "logps/rejected": -143.21331787109375, + "loss": 0.2052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.221178650856018, + "rewards/margins": 4.39111328125, + "rewards/rejected": -5.612292289733887, + "step": 1315 + }, + { + "epoch": 2.11, + "learning_rate": 4.2518826793499803e-07, + "logits/chosen": -1.6569608449935913, + "logits/rejected": -1.7023682594299316, + "logps/chosen": -75.20372009277344, + "logps/rejected": -135.3825225830078, + "loss": 0.1672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04155281186103821, + "rewards/margins": 5.630866527557373, + "rewards/rejected": -5.672419548034668, + "step": 1316 + }, + { + "epoch": 2.11, + "learning_rate": 4.2508917954815694e-07, + "logits/chosen": -1.6912086009979248, + "logits/rejected": -1.6092019081115723, + "logps/chosen": -122.26627349853516, + "logps/rejected": -125.98919677734375, + "loss": 0.1749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9559603929519653, + "rewards/margins": 5.213711261749268, + "rewards/rejected": -6.169672012329102, + "step": 1317 + }, + { + "epoch": 2.12, + "learning_rate": 4.249900911613159e-07, + "logits/chosen": -1.5831563472747803, + "logits/rejected": -1.6024799346923828, + "logps/chosen": -128.5894775390625, + "logps/rejected": -148.31863403320312, + "loss": 0.2064, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.634151816368103, + "rewards/margins": 3.184436321258545, + "rewards/rejected": -4.8185882568359375, + "step": 1318 + }, + { + "epoch": 2.12, + "learning_rate": 4.248910027744748e-07, + "logits/chosen": -1.5459542274475098, + "logits/rejected": -1.5303688049316406, + "logps/chosen": -107.01530456542969, + "logps/rejected": -172.8377685546875, + "loss": 0.2366, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4035950899124146, + "rewards/margins": 4.822530746459961, + "rewards/rejected": -6.226125240325928, + "step": 1319 + }, + { + "epoch": 2.12, + "learning_rate": 4.247919143876337e-07, + "logits/chosen": -1.5114527940750122, + "logits/rejected": -1.5106487274169922, + "logps/chosen": -113.87954711914062, + "logps/rejected": -150.77975463867188, + "loss": 0.113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2779321670532227, + "rewards/margins": 4.351925849914551, + "rewards/rejected": -5.629858016967773, + "step": 1320 + }, + { + "epoch": 2.12, + "learning_rate": 4.246928260007927e-07, + "logits/chosen": -1.5606937408447266, + "logits/rejected": -1.5328292846679688, + "logps/chosen": -108.00250244140625, + "logps/rejected": -135.02781677246094, + "loss": 0.1357, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3235188722610474, + "rewards/margins": 2.9059031009674072, + "rewards/rejected": -4.229422092437744, + "step": 1321 + }, + { + "epoch": 2.12, + "learning_rate": 4.2459373761395163e-07, + "logits/chosen": -1.5690838098526, + "logits/rejected": -1.5560059547424316, + "logps/chosen": -114.4980697631836, + "logps/rejected": -148.616943359375, + "loss": 0.0875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.988562822341919, + "rewards/margins": 3.3928020000457764, + "rewards/rejected": -4.381364822387695, + "step": 1322 + }, + { + "epoch": 2.12, + "learning_rate": 4.244946492271106e-07, + "logits/chosen": -1.6794424057006836, + "logits/rejected": -1.6560338735580444, + "logps/chosen": -98.9140853881836, + "logps/rejected": -128.63052368164062, + "loss": 0.0779, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.119200348854065, + "rewards/margins": 3.031686305999756, + "rewards/rejected": -4.150886535644531, + "step": 1323 + }, + { + "epoch": 2.13, + "learning_rate": 4.243955608402695e-07, + "logits/chosen": -1.6823468208312988, + "logits/rejected": -1.6544523239135742, + "logps/chosen": -70.92399597167969, + "logps/rejected": -110.52389526367188, + "loss": 0.1474, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7853749394416809, + "rewards/margins": 3.3389179706573486, + "rewards/rejected": -4.124292373657227, + "step": 1324 + }, + { + "epoch": 2.13, + "learning_rate": 4.242964724534284e-07, + "logits/chosen": -1.5590219497680664, + "logits/rejected": -1.5179688930511475, + "logps/chosen": -74.6555404663086, + "logps/rejected": -100.9283447265625, + "loss": 0.1318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44475603103637695, + "rewards/margins": 2.5158450603485107, + "rewards/rejected": -2.9606008529663086, + "step": 1325 + }, + { + "epoch": 2.13, + "learning_rate": 4.241973840665874e-07, + "logits/chosen": -1.4580512046813965, + "logits/rejected": -1.4559087753295898, + "logps/chosen": -101.34197235107422, + "logps/rejected": -115.62162780761719, + "loss": 0.1282, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.6387038230895996, + "rewards/margins": 1.962522029876709, + "rewards/rejected": -3.6012260913848877, + "step": 1326 + }, + { + "epoch": 2.13, + "learning_rate": 4.240982956797463e-07, + "logits/chosen": -1.5342864990234375, + "logits/rejected": -1.4762542247772217, + "logps/chosen": -111.10257720947266, + "logps/rejected": -158.4173583984375, + "loss": 0.1687, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4809216260910034, + "rewards/margins": 6.160635471343994, + "rewards/rejected": -7.641557216644287, + "step": 1327 + }, + { + "epoch": 2.13, + "learning_rate": 4.2399920729290523e-07, + "logits/chosen": -1.7026541233062744, + "logits/rejected": -1.7147884368896484, + "logps/chosen": -93.839599609375, + "logps/rejected": -153.12818908691406, + "loss": 0.1134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7958133816719055, + "rewards/margins": 6.794947147369385, + "rewards/rejected": -7.590760231018066, + "step": 1328 + }, + { + "epoch": 2.13, + "learning_rate": 4.239001189060642e-07, + "logits/chosen": -1.5107717514038086, + "logits/rejected": -1.4952237606048584, + "logps/chosen": -110.3841323852539, + "logps/rejected": -170.31298828125, + "loss": 0.1944, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7613985538482666, + "rewards/margins": 4.644292831420898, + "rewards/rejected": -6.405691623687744, + "step": 1329 + }, + { + "epoch": 2.13, + "learning_rate": 4.238010305192231e-07, + "logits/chosen": -1.5817116498947144, + "logits/rejected": -1.4830749034881592, + "logps/chosen": -73.76655578613281, + "logps/rejected": -111.94356536865234, + "loss": 0.0671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06894490122795105, + "rewards/margins": 5.265876293182373, + "rewards/rejected": -5.1969313621521, + "step": 1330 + }, + { + "epoch": 2.14, + "learning_rate": 4.237019421323821e-07, + "logits/chosen": -1.5164767503738403, + "logits/rejected": -1.6011509895324707, + "logps/chosen": -93.28781127929688, + "logps/rejected": -165.05960083007812, + "loss": 0.1573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7919735312461853, + "rewards/margins": 6.57158899307251, + "rewards/rejected": -7.36356258392334, + "step": 1331 + }, + { + "epoch": 2.14, + "learning_rate": 4.23602853745541e-07, + "logits/chosen": -1.6472721099853516, + "logits/rejected": -1.6341397762298584, + "logps/chosen": -108.298583984375, + "logps/rejected": -110.4988021850586, + "loss": 0.2449, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.889204978942871, + "rewards/margins": 0.37306874990463257, + "rewards/rejected": -2.2622737884521484, + "step": 1332 + }, + { + "epoch": 2.14, + "learning_rate": 4.235037653586999e-07, + "logits/chosen": -1.5981478691101074, + "logits/rejected": -1.6397006511688232, + "logps/chosen": -93.00361633300781, + "logps/rejected": -153.46026611328125, + "loss": 0.0952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.598907470703125, + "rewards/margins": 5.169729232788086, + "rewards/rejected": -5.768636703491211, + "step": 1333 + }, + { + "epoch": 2.14, + "learning_rate": 4.234046769718589e-07, + "logits/chosen": -1.6387025117874146, + "logits/rejected": -1.5589203834533691, + "logps/chosen": -100.74415588378906, + "logps/rejected": -129.70848083496094, + "loss": 0.0555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7530797719955444, + "rewards/margins": 4.149608135223389, + "rewards/rejected": -4.902688026428223, + "step": 1334 + }, + { + "epoch": 2.14, + "learning_rate": 4.233055885850178e-07, + "logits/chosen": -1.68950617313385, + "logits/rejected": -1.6537532806396484, + "logps/chosen": -87.95309448242188, + "logps/rejected": -136.2963409423828, + "loss": 0.172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2102571427822113, + "rewards/margins": 6.490574359893799, + "rewards/rejected": -6.700831413269043, + "step": 1335 + }, + { + "epoch": 2.14, + "learning_rate": 4.2320650019817675e-07, + "logits/chosen": -1.5810567140579224, + "logits/rejected": -1.687589168548584, + "logps/chosen": -80.44093322753906, + "logps/rejected": -113.61875915527344, + "loss": 0.0805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49664080142974854, + "rewards/margins": 3.9355032444000244, + "rewards/rejected": -4.4321441650390625, + "step": 1336 + }, + { + "epoch": 2.15, + "learning_rate": 4.231074118113357e-07, + "logits/chosen": -1.6273839473724365, + "logits/rejected": -1.614226222038269, + "logps/chosen": -84.90538024902344, + "logps/rejected": -124.92545318603516, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009976577013731003, + "rewards/margins": 4.270120143890381, + "rewards/rejected": -4.260143280029297, + "step": 1337 + }, + { + "epoch": 2.15, + "learning_rate": 4.230083234244946e-07, + "logits/chosen": -1.5504019260406494, + "logits/rejected": -1.4655872583389282, + "logps/chosen": -113.11661529541016, + "logps/rejected": -151.08946228027344, + "loss": 0.2112, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1648348569869995, + "rewards/margins": 3.0206949710845947, + "rewards/rejected": -4.185529708862305, + "step": 1338 + }, + { + "epoch": 2.15, + "learning_rate": 4.229092350376536e-07, + "logits/chosen": -1.428726077079773, + "logits/rejected": -1.3749570846557617, + "logps/chosen": -88.1656265258789, + "logps/rejected": -121.31806945800781, + "loss": 0.1139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4561257064342499, + "rewards/margins": 3.113236427307129, + "rewards/rejected": -3.569362163543701, + "step": 1339 + }, + { + "epoch": 2.15, + "learning_rate": 4.228101466508125e-07, + "logits/chosen": -1.45125412940979, + "logits/rejected": -1.37581467628479, + "logps/chosen": -117.9089584350586, + "logps/rejected": -139.68336486816406, + "loss": 0.1857, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8905314207077026, + "rewards/margins": 3.9694597721099854, + "rewards/rejected": -5.859991073608398, + "step": 1340 + }, + { + "epoch": 2.15, + "learning_rate": 4.2271105826397144e-07, + "logits/chosen": -1.6286014318466187, + "logits/rejected": -1.7025301456451416, + "logps/chosen": -84.77810668945312, + "logps/rejected": -163.83836364746094, + "loss": 0.1563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2818801999092102, + "rewards/margins": 7.251411437988281, + "rewards/rejected": -7.533291816711426, + "step": 1341 + }, + { + "epoch": 2.15, + "learning_rate": 4.226119698771304e-07, + "logits/chosen": -1.5373176336288452, + "logits/rejected": -1.6316320896148682, + "logps/chosen": -100.63174438476562, + "logps/rejected": -122.50738525390625, + "loss": 0.1647, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4930139780044556, + "rewards/margins": 1.776943564414978, + "rewards/rejected": -3.2699577808380127, + "step": 1342 + }, + { + "epoch": 2.16, + "learning_rate": 4.225128814902893e-07, + "logits/chosen": -1.5572986602783203, + "logits/rejected": -1.530056118965149, + "logps/chosen": -115.38034057617188, + "logps/rejected": -121.03761291503906, + "loss": 0.1052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1698150634765625, + "rewards/margins": 2.0185537338256836, + "rewards/rejected": -3.188368797302246, + "step": 1343 + }, + { + "epoch": 2.16, + "learning_rate": 4.2241379310344827e-07, + "logits/chosen": -1.6170669794082642, + "logits/rejected": -1.69415283203125, + "logps/chosen": -69.53729248046875, + "logps/rejected": -144.02574157714844, + "loss": 0.0835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21852093935012817, + "rewards/margins": 4.8164896965026855, + "rewards/rejected": -5.035010814666748, + "step": 1344 + }, + { + "epoch": 2.16, + "learning_rate": 4.223147047166072e-07, + "logits/chosen": -1.5123857259750366, + "logits/rejected": -1.5019184350967407, + "logps/chosen": -111.23906707763672, + "logps/rejected": -149.93643188476562, + "loss": 0.2242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6050783395767212, + "rewards/margins": 5.282061576843262, + "rewards/rejected": -5.887139797210693, + "step": 1345 + }, + { + "epoch": 2.16, + "learning_rate": 4.2221561632976614e-07, + "logits/chosen": -1.6199452877044678, + "logits/rejected": -1.6723341941833496, + "logps/chosen": -66.60102844238281, + "logps/rejected": -121.03868865966797, + "loss": 0.1255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14879541099071503, + "rewards/margins": 4.57734489440918, + "rewards/rejected": -4.428549289703369, + "step": 1346 + }, + { + "epoch": 2.16, + "learning_rate": 4.221165279429251e-07, + "logits/chosen": -1.4633084535598755, + "logits/rejected": -1.52935791015625, + "logps/chosen": -99.8571548461914, + "logps/rejected": -134.326416015625, + "loss": 0.0789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8046347498893738, + "rewards/margins": 2.899245500564575, + "rewards/rejected": -3.7038803100585938, + "step": 1347 + }, + { + "epoch": 2.16, + "learning_rate": 4.22017439556084e-07, + "logits/chosen": -1.646421194076538, + "logits/rejected": -1.5971318483352661, + "logps/chosen": -98.25833892822266, + "logps/rejected": -145.51687622070312, + "loss": 0.077, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6878421902656555, + "rewards/margins": 5.739898681640625, + "rewards/rejected": -6.427741050720215, + "step": 1348 + }, + { + "epoch": 2.17, + "learning_rate": 4.2191835116924296e-07, + "logits/chosen": -1.601574182510376, + "logits/rejected": -1.6203757524490356, + "logps/chosen": -94.61051940917969, + "logps/rejected": -190.1167755126953, + "loss": 0.0662, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6150786876678467, + "rewards/margins": 7.861876487731934, + "rewards/rejected": -9.47695541381836, + "step": 1349 + }, + { + "epoch": 2.17, + "learning_rate": 4.2181926278240187e-07, + "logits/chosen": -1.778205156326294, + "logits/rejected": -1.7022258043289185, + "logps/chosen": -102.38400268554688, + "logps/rejected": -130.54403686523438, + "loss": 0.0751, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.063835620880127, + "rewards/margins": 4.951713562011719, + "rewards/rejected": -6.0155487060546875, + "step": 1350 + }, + { + "epoch": 2.17, + "learning_rate": 4.2172017439556083e-07, + "logits/chosen": -1.4367897510528564, + "logits/rejected": -1.4633172750473022, + "logps/chosen": -111.20235443115234, + "logps/rejected": -157.0748291015625, + "loss": 0.1627, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6279869079589844, + "rewards/margins": 4.826987266540527, + "rewards/rejected": -6.454974174499512, + "step": 1351 + }, + { + "epoch": 2.17, + "learning_rate": 4.2162108600871974e-07, + "logits/chosen": -1.7334613800048828, + "logits/rejected": -1.5678167343139648, + "logps/chosen": -114.53622436523438, + "logps/rejected": -148.06655883789062, + "loss": 0.1574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9566946625709534, + "rewards/margins": 3.5877747535705566, + "rewards/rejected": -4.544469833374023, + "step": 1352 + }, + { + "epoch": 2.17, + "learning_rate": 4.215219976218787e-07, + "logits/chosen": -1.3929617404937744, + "logits/rejected": -1.3696813583374023, + "logps/chosen": -90.74357604980469, + "logps/rejected": -132.06875610351562, + "loss": 0.2188, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7109330892562866, + "rewards/margins": 2.5364789962768555, + "rewards/rejected": -4.247411727905273, + "step": 1353 + }, + { + "epoch": 2.17, + "learning_rate": 4.2142290923503766e-07, + "logits/chosen": -1.6520874500274658, + "logits/rejected": -1.5476669073104858, + "logps/chosen": -93.54718017578125, + "logps/rejected": -111.56974792480469, + "loss": 0.1573, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2347793579101562, + "rewards/margins": 3.2729408740997314, + "rewards/rejected": -4.507720470428467, + "step": 1354 + }, + { + "epoch": 2.17, + "learning_rate": 4.2132382084819656e-07, + "logits/chosen": -1.7001110315322876, + "logits/rejected": -1.683689832687378, + "logps/chosen": -130.0028076171875, + "logps/rejected": -143.41941833496094, + "loss": 0.1303, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.154772996902466, + "rewards/margins": 2.82552433013916, + "rewards/rejected": -4.980297088623047, + "step": 1355 + }, + { + "epoch": 2.18, + "learning_rate": 4.212247324613555e-07, + "logits/chosen": -1.6617591381072998, + "logits/rejected": -1.6486166715621948, + "logps/chosen": -69.67792510986328, + "logps/rejected": -133.50640869140625, + "loss": 0.0995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32201167941093445, + "rewards/margins": 5.735640048980713, + "rewards/rejected": -6.057651519775391, + "step": 1356 + }, + { + "epoch": 2.18, + "learning_rate": 4.2112564407451443e-07, + "logits/chosen": -1.4548852443695068, + "logits/rejected": -1.4365870952606201, + "logps/chosen": -93.72889709472656, + "logps/rejected": -135.6934051513672, + "loss": 0.0622, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0648747682571411, + "rewards/margins": 4.195525646209717, + "rewards/rejected": -5.260400772094727, + "step": 1357 + }, + { + "epoch": 2.18, + "learning_rate": 4.210265556876734e-07, + "logits/chosen": -1.4290966987609863, + "logits/rejected": -1.601818323135376, + "logps/chosen": -83.42930603027344, + "logps/rejected": -147.025146484375, + "loss": 0.1952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8801031112670898, + "rewards/margins": 3.3791656494140625, + "rewards/rejected": -4.259268760681152, + "step": 1358 + }, + { + "epoch": 2.18, + "learning_rate": 4.2092746730083235e-07, + "logits/chosen": -1.6270151138305664, + "logits/rejected": -1.6147451400756836, + "logps/chosen": -107.90476989746094, + "logps/rejected": -140.41925048828125, + "loss": 0.0612, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2110953330993652, + "rewards/margins": 2.666970729827881, + "rewards/rejected": -3.878066062927246, + "step": 1359 + }, + { + "epoch": 2.18, + "learning_rate": 4.2082837891399126e-07, + "logits/chosen": -1.4868501424789429, + "logits/rejected": -1.4638943672180176, + "logps/chosen": -117.21194458007812, + "logps/rejected": -138.11985778808594, + "loss": 0.1144, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6306129693984985, + "rewards/margins": 4.488436698913574, + "rewards/rejected": -6.119049549102783, + "step": 1360 + }, + { + "epoch": 2.18, + "learning_rate": 4.2072929052715016e-07, + "logits/chosen": -1.581730842590332, + "logits/rejected": -1.6036536693572998, + "logps/chosen": -82.90255737304688, + "logps/rejected": -142.49960327148438, + "loss": 0.1058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32142436504364014, + "rewards/margins": 5.161440849304199, + "rewards/rejected": -5.482865333557129, + "step": 1361 + }, + { + "epoch": 2.19, + "learning_rate": 4.206302021403091e-07, + "logits/chosen": -1.6582212448120117, + "logits/rejected": -1.734370470046997, + "logps/chosen": -98.61082458496094, + "logps/rejected": -156.35775756835938, + "loss": 0.0524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8819622993469238, + "rewards/margins": 3.191786050796509, + "rewards/rejected": -4.0737481117248535, + "step": 1362 + }, + { + "epoch": 2.19, + "learning_rate": 4.205311137534681e-07, + "logits/chosen": -1.631563425064087, + "logits/rejected": -1.652240514755249, + "logps/chosen": -97.85725402832031, + "logps/rejected": -141.07749938964844, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8314801454544067, + "rewards/margins": 4.55560302734375, + "rewards/rejected": -5.387083053588867, + "step": 1363 + }, + { + "epoch": 2.19, + "learning_rate": 4.2043202536662704e-07, + "logits/chosen": -1.7441623210906982, + "logits/rejected": -1.7486480474472046, + "logps/chosen": -106.75997161865234, + "logps/rejected": -176.14306640625, + "loss": 0.2211, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.407038927078247, + "rewards/margins": 5.091558456420898, + "rewards/rejected": -7.498597145080566, + "step": 1364 + }, + { + "epoch": 2.19, + "learning_rate": 4.2033293697978595e-07, + "logits/chosen": -1.6614198684692383, + "logits/rejected": -1.6448270082473755, + "logps/chosen": -105.43278503417969, + "logps/rejected": -165.24612426757812, + "loss": 0.0672, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2480764389038086, + "rewards/margins": 6.223727226257324, + "rewards/rejected": -7.471804141998291, + "step": 1365 + }, + { + "epoch": 2.19, + "learning_rate": 4.2023384859294486e-07, + "logits/chosen": -1.512107253074646, + "logits/rejected": -1.6087093353271484, + "logps/chosen": -89.76960754394531, + "logps/rejected": -187.8627166748047, + "loss": 0.2553, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3199329376220703, + "rewards/margins": 6.251929759979248, + "rewards/rejected": -7.57186222076416, + "step": 1366 + }, + { + "epoch": 2.19, + "learning_rate": 4.201347602061038e-07, + "logits/chosen": -1.4802409410476685, + "logits/rejected": -1.4524266719818115, + "logps/chosen": -125.40880584716797, + "logps/rejected": -145.90513610839844, + "loss": 0.1283, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.538398265838623, + "rewards/margins": 3.8400521278381348, + "rewards/rejected": -5.378450870513916, + "step": 1367 + }, + { + "epoch": 2.2, + "learning_rate": 4.200356718192628e-07, + "logits/chosen": -1.8189306259155273, + "logits/rejected": -1.862351417541504, + "logps/chosen": -96.68977355957031, + "logps/rejected": -121.31135559082031, + "loss": 0.1026, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.1385712623596191, + "rewards/margins": 1.94465970993042, + "rewards/rejected": -3.083230972290039, + "step": 1368 + }, + { + "epoch": 2.2, + "learning_rate": 4.1993658343242173e-07, + "logits/chosen": -1.5251394510269165, + "logits/rejected": -1.5063621997833252, + "logps/chosen": -111.1533203125, + "logps/rejected": -132.92153930664062, + "loss": 0.1697, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.482330799102783, + "rewards/margins": 2.9730148315429688, + "rewards/rejected": -5.455345630645752, + "step": 1369 + }, + { + "epoch": 2.2, + "learning_rate": 4.1983749504558064e-07, + "logits/chosen": -1.7229578495025635, + "logits/rejected": -1.7039639949798584, + "logps/chosen": -87.02064514160156, + "logps/rejected": -140.60501098632812, + "loss": 0.122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7793865203857422, + "rewards/margins": 4.140188217163086, + "rewards/rejected": -5.919575214385986, + "step": 1370 + }, + { + "epoch": 2.2, + "learning_rate": 4.1973840665873955e-07, + "logits/chosen": -1.6554880142211914, + "logits/rejected": -1.6555004119873047, + "logps/chosen": -94.99942016601562, + "logps/rejected": -143.53350830078125, + "loss": 0.1106, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3716769218444824, + "rewards/margins": 4.968088150024414, + "rewards/rejected": -6.339764595031738, + "step": 1371 + }, + { + "epoch": 2.2, + "learning_rate": 4.196393182718985e-07, + "logits/chosen": -1.681357979774475, + "logits/rejected": -1.7014377117156982, + "logps/chosen": -99.43440246582031, + "logps/rejected": -140.5343780517578, + "loss": 0.1715, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3499858379364014, + "rewards/margins": 3.8148181438446045, + "rewards/rejected": -5.164804458618164, + "step": 1372 + }, + { + "epoch": 2.2, + "learning_rate": 4.195402298850574e-07, + "logits/chosen": -1.665648341178894, + "logits/rejected": -1.7325962781906128, + "logps/chosen": -102.22709655761719, + "logps/rejected": -161.62277221679688, + "loss": 0.066, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0520827770233154, + "rewards/margins": 4.549915313720703, + "rewards/rejected": -6.601997375488281, + "step": 1373 + }, + { + "epoch": 2.21, + "learning_rate": 4.1944114149821643e-07, + "logits/chosen": -1.755373477935791, + "logits/rejected": -1.6984219551086426, + "logps/chosen": -102.77658081054688, + "logps/rejected": -141.46978759765625, + "loss": 0.0639, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.304837942123413, + "rewards/margins": 4.596761703491211, + "rewards/rejected": -5.901599884033203, + "step": 1374 + }, + { + "epoch": 2.21, + "learning_rate": 4.1934205311137533e-07, + "logits/chosen": -1.5537302494049072, + "logits/rejected": -1.5209190845489502, + "logps/chosen": -96.80744934082031, + "logps/rejected": -136.14761352539062, + "loss": 0.1563, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9905681610107422, + "rewards/margins": 5.139268398284912, + "rewards/rejected": -6.129836559295654, + "step": 1375 + }, + { + "epoch": 2.21, + "learning_rate": 4.1924296472453424e-07, + "logits/chosen": -1.5534892082214355, + "logits/rejected": -1.5238069295883179, + "logps/chosen": -94.27818298339844, + "logps/rejected": -171.1336212158203, + "loss": 0.2713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9838035106658936, + "rewards/margins": 8.661515235900879, + "rewards/rejected": -9.645318984985352, + "step": 1376 + }, + { + "epoch": 2.21, + "learning_rate": 4.191438763376932e-07, + "logits/chosen": -1.57021963596344, + "logits/rejected": -1.683388113975525, + "logps/chosen": -84.00981140136719, + "logps/rejected": -127.963134765625, + "loss": 0.124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3468647003173828, + "rewards/margins": 3.8721721172332764, + "rewards/rejected": -5.219037055969238, + "step": 1377 + }, + { + "epoch": 2.21, + "learning_rate": 4.190447879508521e-07, + "logits/chosen": -1.649670958518982, + "logits/rejected": -1.554040789604187, + "logps/chosen": -118.57820129394531, + "logps/rejected": -154.29811096191406, + "loss": 0.0877, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2263187170028687, + "rewards/margins": 4.598365783691406, + "rewards/rejected": -5.8246846199035645, + "step": 1378 + }, + { + "epoch": 2.21, + "learning_rate": 4.189456995640111e-07, + "logits/chosen": -1.5287781953811646, + "logits/rejected": -1.5195878744125366, + "logps/chosen": -78.37095642089844, + "logps/rejected": -140.1475830078125, + "loss": 0.0885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7970118522644043, + "rewards/margins": 5.096306800842285, + "rewards/rejected": -5.8933186531066895, + "step": 1379 + }, + { + "epoch": 2.22, + "learning_rate": 4.1884661117717003e-07, + "logits/chosen": -1.8367159366607666, + "logits/rejected": -1.797780156135559, + "logps/chosen": -96.09795379638672, + "logps/rejected": -142.07186889648438, + "loss": 0.0981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1155853271484375, + "rewards/margins": 4.8792266845703125, + "rewards/rejected": -4.763641834259033, + "step": 1380 + }, + { + "epoch": 2.22, + "learning_rate": 4.1874752279032893e-07, + "logits/chosen": -1.683313250541687, + "logits/rejected": -1.6235566139221191, + "logps/chosen": -122.1700439453125, + "logps/rejected": -152.3981475830078, + "loss": 0.1572, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.255657196044922, + "rewards/margins": 3.869410514831543, + "rewards/rejected": -6.125067710876465, + "step": 1381 + }, + { + "epoch": 2.22, + "learning_rate": 4.186484344034879e-07, + "logits/chosen": -1.5765355825424194, + "logits/rejected": -1.6029249429702759, + "logps/chosen": -75.38615417480469, + "logps/rejected": -148.80088806152344, + "loss": 0.169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42776206135749817, + "rewards/margins": 7.808515548706055, + "rewards/rejected": -8.23627758026123, + "step": 1382 + }, + { + "epoch": 2.22, + "learning_rate": 4.185493460166468e-07, + "logits/chosen": -1.6555942296981812, + "logits/rejected": -1.5520952939987183, + "logps/chosen": -98.8029556274414, + "logps/rejected": -117.80635833740234, + "loss": 0.1483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1686222106218338, + "rewards/margins": 4.035799026489258, + "rewards/rejected": -4.204421520233154, + "step": 1383 + }, + { + "epoch": 2.22, + "learning_rate": 4.184502576298058e-07, + "logits/chosen": -1.6027350425720215, + "logits/rejected": -1.5766801834106445, + "logps/chosen": -90.74070739746094, + "logps/rejected": -147.1011962890625, + "loss": 0.1556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7096178531646729, + "rewards/margins": 4.580148696899414, + "rewards/rejected": -5.289766311645508, + "step": 1384 + }, + { + "epoch": 2.22, + "learning_rate": 4.183511692429647e-07, + "logits/chosen": -1.5872423648834229, + "logits/rejected": -1.633470892906189, + "logps/chosen": -100.7939224243164, + "logps/rejected": -150.63735961914062, + "loss": 0.1308, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1119298934936523, + "rewards/margins": 3.558668613433838, + "rewards/rejected": -5.67059850692749, + "step": 1385 + }, + { + "epoch": 2.22, + "learning_rate": 4.1825208085612363e-07, + "logits/chosen": -1.5905462503433228, + "logits/rejected": -1.619890809059143, + "logps/chosen": -100.66637420654297, + "logps/rejected": -133.8612518310547, + "loss": 0.1408, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.5988945960998535, + "rewards/margins": 3.444685220718384, + "rewards/rejected": -5.043579578399658, + "step": 1386 + }, + { + "epoch": 2.23, + "learning_rate": 4.181529924692826e-07, + "logits/chosen": -1.5711575746536255, + "logits/rejected": -1.5867186784744263, + "logps/chosen": -121.06836700439453, + "logps/rejected": -148.47393798828125, + "loss": 0.0703, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0618984699249268, + "rewards/margins": 3.504014492034912, + "rewards/rejected": -4.565913200378418, + "step": 1387 + }, + { + "epoch": 2.23, + "learning_rate": 4.180539040824415e-07, + "logits/chosen": -1.5405067205429077, + "logits/rejected": -1.595107078552246, + "logps/chosen": -107.74894714355469, + "logps/rejected": -139.64031982421875, + "loss": 0.163, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4199810028076172, + "rewards/margins": 4.260577201843262, + "rewards/rejected": -5.680558204650879, + "step": 1388 + }, + { + "epoch": 2.23, + "learning_rate": 4.179548156956005e-07, + "logits/chosen": -1.633763074874878, + "logits/rejected": -1.6057320833206177, + "logps/chosen": -81.29264831542969, + "logps/rejected": -134.66732788085938, + "loss": 0.0886, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0964432954788208, + "rewards/margins": 5.743333339691162, + "rewards/rejected": -6.839776515960693, + "step": 1389 + }, + { + "epoch": 2.23, + "learning_rate": 4.178557273087594e-07, + "logits/chosen": -1.6215623617172241, + "logits/rejected": -1.693090558052063, + "logps/chosen": -108.61741638183594, + "logps/rejected": -176.4752655029297, + "loss": 0.0879, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.143627166748047, + "rewards/margins": 5.481383800506592, + "rewards/rejected": -9.625011444091797, + "step": 1390 + }, + { + "epoch": 2.23, + "learning_rate": 4.177566389219183e-07, + "logits/chosen": -1.7449121475219727, + "logits/rejected": -1.7119545936584473, + "logps/chosen": -89.17000579833984, + "logps/rejected": -146.4475860595703, + "loss": 0.1057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7062932252883911, + "rewards/margins": 5.982682228088379, + "rewards/rejected": -7.6889753341674805, + "step": 1391 + }, + { + "epoch": 2.23, + "learning_rate": 4.176575505350773e-07, + "logits/chosen": -1.6247495412826538, + "logits/rejected": -1.6377195119857788, + "logps/chosen": -101.07286071777344, + "logps/rejected": -139.48452758789062, + "loss": 0.2497, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5493354797363281, + "rewards/margins": 3.958782911300659, + "rewards/rejected": -5.508118629455566, + "step": 1392 + }, + { + "epoch": 2.24, + "learning_rate": 4.175584621482362e-07, + "logits/chosen": -1.4251034259796143, + "logits/rejected": -1.4867981672286987, + "logps/chosen": -95.80135345458984, + "logps/rejected": -145.24655151367188, + "loss": 0.1444, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.892636299133301, + "rewards/margins": 3.496647357940674, + "rewards/rejected": -6.389283657073975, + "step": 1393 + }, + { + "epoch": 2.24, + "learning_rate": 4.174593737613952e-07, + "logits/chosen": -1.5205965042114258, + "logits/rejected": -1.600287914276123, + "logps/chosen": -97.33863830566406, + "logps/rejected": -143.8543243408203, + "loss": 0.1227, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9116039276123047, + "rewards/margins": 3.9765703678131104, + "rewards/rejected": -5.888174057006836, + "step": 1394 + }, + { + "epoch": 2.24, + "learning_rate": 4.173602853745541e-07, + "logits/chosen": -1.4192677736282349, + "logits/rejected": -1.4076875448226929, + "logps/chosen": -91.93575286865234, + "logps/rejected": -130.5423126220703, + "loss": 0.1014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1891921758651733, + "rewards/margins": 3.532787322998047, + "rewards/rejected": -4.72197961807251, + "step": 1395 + }, + { + "epoch": 2.24, + "learning_rate": 4.17261196987713e-07, + "logits/chosen": -1.5313304662704468, + "logits/rejected": -1.5571908950805664, + "logps/chosen": -95.07061767578125, + "logps/rejected": -134.188232421875, + "loss": 0.0989, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5752620697021484, + "rewards/margins": 3.2661354541778564, + "rewards/rejected": -5.841397285461426, + "step": 1396 + }, + { + "epoch": 2.24, + "learning_rate": 4.1716210860087197e-07, + "logits/chosen": -1.7214677333831787, + "logits/rejected": -1.8196989297866821, + "logps/chosen": -109.36639404296875, + "logps/rejected": -157.03326416015625, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.008872628211975, + "rewards/margins": 3.504770040512085, + "rewards/rejected": -4.51364278793335, + "step": 1397 + }, + { + "epoch": 2.24, + "learning_rate": 4.170630202140309e-07, + "logits/chosen": -1.5224758386611938, + "logits/rejected": -1.5698275566101074, + "logps/chosen": -61.47170639038086, + "logps/rejected": -197.53671264648438, + "loss": 0.1511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.180962473154068, + "rewards/margins": 9.377080917358398, + "rewards/rejected": -9.558042526245117, + "step": 1398 + }, + { + "epoch": 2.25, + "learning_rate": 4.169639318271898e-07, + "logits/chosen": -1.7194334268569946, + "logits/rejected": -1.5725449323654175, + "logps/chosen": -115.42164611816406, + "logps/rejected": -126.91925048828125, + "loss": 0.1765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8592162728309631, + "rewards/margins": 4.355167388916016, + "rewards/rejected": -5.214383602142334, + "step": 1399 + }, + { + "epoch": 2.25, + "learning_rate": 4.168648434403488e-07, + "logits/chosen": -1.621013879776001, + "logits/rejected": -1.6337149143218994, + "logps/chosen": -110.72764587402344, + "logps/rejected": -118.30073547363281, + "loss": 0.1178, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.193861961364746, + "rewards/margins": 1.8688832521438599, + "rewards/rejected": -4.062745094299316, + "step": 1400 + }, + { + "epoch": 2.25, + "learning_rate": 4.167657550535077e-07, + "logits/chosen": -1.5210278034210205, + "logits/rejected": -1.598259687423706, + "logps/chosen": -100.45932006835938, + "logps/rejected": -135.28759765625, + "loss": 0.1723, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1652535200119019, + "rewards/margins": 3.519408941268921, + "rewards/rejected": -4.684662342071533, + "step": 1401 + }, + { + "epoch": 2.25, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -1.7496660947799683, + "logits/rejected": -1.799971342086792, + "logps/chosen": -97.34063720703125, + "logps/rejected": -124.48173522949219, + "loss": 0.1302, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8495504856109619, + "rewards/margins": 1.5857621431350708, + "rewards/rejected": -2.435312509536743, + "step": 1402 + }, + { + "epoch": 2.25, + "learning_rate": 4.1656757827982557e-07, + "logits/chosen": -1.4571690559387207, + "logits/rejected": -1.4530446529388428, + "logps/chosen": -84.40528869628906, + "logps/rejected": -134.15277099609375, + "loss": 0.1221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4465563893318176, + "rewards/margins": 5.36118745803833, + "rewards/rejected": -5.807743549346924, + "step": 1403 + }, + { + "epoch": 2.25, + "learning_rate": 4.164684898929845e-07, + "logits/chosen": -1.5063141584396362, + "logits/rejected": -1.5741297006607056, + "logps/chosen": -84.13290405273438, + "logps/rejected": -156.12757873535156, + "loss": 0.1507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5198636054992676, + "rewards/margins": 5.057949542999268, + "rewards/rejected": -5.577813148498535, + "step": 1404 + }, + { + "epoch": 2.26, + "learning_rate": 4.163694015061435e-07, + "logits/chosen": -1.695873498916626, + "logits/rejected": -1.6493498086929321, + "logps/chosen": -114.68113708496094, + "logps/rejected": -139.91079711914062, + "loss": 0.1752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12272852659225464, + "rewards/margins": 5.852752685546875, + "rewards/rejected": -5.975481033325195, + "step": 1405 + }, + { + "epoch": 2.26, + "learning_rate": 4.162703131193024e-07, + "logits/chosen": -1.5643233060836792, + "logits/rejected": -1.5608785152435303, + "logps/chosen": -88.06491088867188, + "logps/rejected": -111.77442932128906, + "loss": 0.073, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3164451122283936, + "rewards/margins": 2.7446956634521484, + "rewards/rejected": -4.061140537261963, + "step": 1406 + }, + { + "epoch": 2.26, + "learning_rate": 4.1617122473246136e-07, + "logits/chosen": -1.6841155290603638, + "logits/rejected": -1.6822530031204224, + "logps/chosen": -109.94157409667969, + "logps/rejected": -145.05624389648438, + "loss": 0.1478, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2020177841186523, + "rewards/margins": 4.178867340087891, + "rewards/rejected": -5.380885124206543, + "step": 1407 + }, + { + "epoch": 2.26, + "learning_rate": 4.1607213634562027e-07, + "logits/chosen": -1.532741665840149, + "logits/rejected": -1.4998644590377808, + "logps/chosen": -126.99593353271484, + "logps/rejected": -160.82803344726562, + "loss": 0.1312, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1753928661346436, + "rewards/margins": 4.831905364990234, + "rewards/rejected": -7.007298469543457, + "step": 1408 + }, + { + "epoch": 2.26, + "learning_rate": 4.1597304795877917e-07, + "logits/chosen": -1.7240633964538574, + "logits/rejected": -1.694566011428833, + "logps/chosen": -105.62854766845703, + "logps/rejected": -159.24432373046875, + "loss": 0.1686, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0441250801086426, + "rewards/margins": 3.712026834487915, + "rewards/rejected": -5.756152153015137, + "step": 1409 + }, + { + "epoch": 2.26, + "learning_rate": 4.158739595719382e-07, + "logits/chosen": -1.665038824081421, + "logits/rejected": -1.6948949098587036, + "logps/chosen": -113.11761474609375, + "logps/rejected": -156.4816131591797, + "loss": 0.1328, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.344834327697754, + "rewards/margins": 3.006448268890381, + "rewards/rejected": -5.351282119750977, + "step": 1410 + }, + { + "epoch": 2.26, + "learning_rate": 4.157748711850971e-07, + "logits/chosen": -1.5335407257080078, + "logits/rejected": -1.5148626565933228, + "logps/chosen": -99.43252563476562, + "logps/rejected": -125.01875305175781, + "loss": 0.1707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4898495376110077, + "rewards/margins": 3.6444034576416016, + "rewards/rejected": -4.134253025054932, + "step": 1411 + }, + { + "epoch": 2.27, + "learning_rate": 4.1567578279825605e-07, + "logits/chosen": -1.4480806589126587, + "logits/rejected": -1.53104829788208, + "logps/chosen": -94.06268310546875, + "logps/rejected": -155.015380859375, + "loss": 0.2428, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.587809443473816, + "rewards/margins": 5.697615623474121, + "rewards/rejected": -7.285425186157227, + "step": 1412 + }, + { + "epoch": 2.27, + "learning_rate": 4.1557669441141496e-07, + "logits/chosen": -1.770002841949463, + "logits/rejected": -1.7534785270690918, + "logps/chosen": -83.01763916015625, + "logps/rejected": -132.87599182128906, + "loss": 0.1144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35075482726097107, + "rewards/margins": 3.57771372795105, + "rewards/rejected": -3.928468704223633, + "step": 1413 + }, + { + "epoch": 2.27, + "learning_rate": 4.1547760602457387e-07, + "logits/chosen": -1.7614374160766602, + "logits/rejected": -1.7426267862319946, + "logps/chosen": -102.25765991210938, + "logps/rejected": -160.68905639648438, + "loss": 0.1015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43564948439598083, + "rewards/margins": 5.448760032653809, + "rewards/rejected": -5.8844099044799805, + "step": 1414 + }, + { + "epoch": 2.27, + "learning_rate": 4.153785176377328e-07, + "logits/chosen": -1.6535499095916748, + "logits/rejected": -1.721013069152832, + "logps/chosen": -108.98887634277344, + "logps/rejected": -138.59413146972656, + "loss": 0.1496, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.02699875831604, + "rewards/margins": 1.3645784854888916, + "rewards/rejected": -3.3915772438049316, + "step": 1415 + }, + { + "epoch": 2.27, + "learning_rate": 4.152794292508918e-07, + "logits/chosen": -1.5745267868041992, + "logits/rejected": -1.6279399394989014, + "logps/chosen": -72.76484680175781, + "logps/rejected": -119.44187927246094, + "loss": 0.1111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4794832766056061, + "rewards/margins": 4.1045002937316895, + "rewards/rejected": -4.583983421325684, + "step": 1416 + }, + { + "epoch": 2.27, + "learning_rate": 4.1518034086405074e-07, + "logits/chosen": -1.5368263721466064, + "logits/rejected": -1.414686679840088, + "logps/chosen": -117.1923828125, + "logps/rejected": -135.6180419921875, + "loss": 0.0899, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29243677854537964, + "rewards/margins": 3.7475361824035645, + "rewards/rejected": -4.03997278213501, + "step": 1417 + }, + { + "epoch": 2.28, + "learning_rate": 4.1508125247720965e-07, + "logits/chosen": -1.74886953830719, + "logits/rejected": -1.717078685760498, + "logps/chosen": -87.514892578125, + "logps/rejected": -124.32083129882812, + "loss": 0.1136, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.277405858039856, + "rewards/margins": 4.973484039306641, + "rewards/rejected": -6.250889778137207, + "step": 1418 + }, + { + "epoch": 2.28, + "learning_rate": 4.1498216409036856e-07, + "logits/chosen": -1.7288612127304077, + "logits/rejected": -1.70843505859375, + "logps/chosen": -84.53071594238281, + "logps/rejected": -153.1609649658203, + "loss": 0.0463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19075021147727966, + "rewards/margins": 5.475516319274902, + "rewards/rejected": -5.666266918182373, + "step": 1419 + }, + { + "epoch": 2.28, + "learning_rate": 4.148830757035275e-07, + "logits/chosen": -1.6198716163635254, + "logits/rejected": -1.6274473667144775, + "logps/chosen": -72.00983428955078, + "logps/rejected": -164.80831909179688, + "loss": 0.0832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4486519694328308, + "rewards/margins": 6.212048053741455, + "rewards/rejected": -6.660699844360352, + "step": 1420 + }, + { + "epoch": 2.28, + "learning_rate": 4.147839873166865e-07, + "logits/chosen": -1.6145879030227661, + "logits/rejected": -1.5420849323272705, + "logps/chosen": -108.18470764160156, + "logps/rejected": -138.5733642578125, + "loss": 0.1778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.508213222026825, + "rewards/margins": 3.697749137878418, + "rewards/rejected": -4.205962657928467, + "step": 1421 + }, + { + "epoch": 2.28, + "learning_rate": 4.1468489892984544e-07, + "logits/chosen": -1.624552607536316, + "logits/rejected": -1.5389845371246338, + "logps/chosen": -90.34996795654297, + "logps/rejected": -147.56124877929688, + "loss": 0.1521, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3849163055419922, + "rewards/margins": 1.8597416877746582, + "rewards/rejected": -3.2446579933166504, + "step": 1422 + }, + { + "epoch": 2.28, + "learning_rate": 4.1458581054300434e-07, + "logits/chosen": -1.6913083791732788, + "logits/rejected": -1.578614354133606, + "logps/chosen": -95.01055908203125, + "logps/rejected": -144.44972229003906, + "loss": 0.0748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9704793691635132, + "rewards/margins": 3.2797422409057617, + "rewards/rejected": -4.250221252441406, + "step": 1423 + }, + { + "epoch": 2.29, + "learning_rate": 4.1448672215616325e-07, + "logits/chosen": -1.4310641288757324, + "logits/rejected": -1.529903531074524, + "logps/chosen": -72.06104278564453, + "logps/rejected": -167.68148803710938, + "loss": 0.1686, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4669650793075562, + "rewards/margins": 7.871921539306641, + "rewards/rejected": -9.338886260986328, + "step": 1424 + }, + { + "epoch": 2.29, + "learning_rate": 4.143876337693222e-07, + "logits/chosen": -1.7280652523040771, + "logits/rejected": -1.7508034706115723, + "logps/chosen": -87.68287658691406, + "logps/rejected": -149.13851928710938, + "loss": 0.1065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5115649700164795, + "rewards/margins": 4.6422295570373535, + "rewards/rejected": -7.153794765472412, + "step": 1425 + }, + { + "epoch": 2.29, + "learning_rate": 4.1428854538248117e-07, + "logits/chosen": -1.707735300064087, + "logits/rejected": -1.7250267267227173, + "logps/chosen": -108.06665802001953, + "logps/rejected": -128.3871307373047, + "loss": 0.1406, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.103027582168579, + "rewards/margins": 2.5704457759857178, + "rewards/rejected": -3.673473358154297, + "step": 1426 + }, + { + "epoch": 2.29, + "learning_rate": 4.1418945699564013e-07, + "logits/chosen": -1.6493210792541504, + "logits/rejected": -1.652307152748108, + "logps/chosen": -84.65473175048828, + "logps/rejected": -128.85260009765625, + "loss": 0.081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.577272653579712, + "rewards/margins": 4.614554405212402, + "rewards/rejected": -6.191826820373535, + "step": 1427 + }, + { + "epoch": 2.29, + "learning_rate": 4.1409036860879904e-07, + "logits/chosen": -1.5915358066558838, + "logits/rejected": -1.674373745918274, + "logps/chosen": -88.54325866699219, + "logps/rejected": -140.00729370117188, + "loss": 0.0925, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.864208221435547, + "rewards/margins": 3.250596046447754, + "rewards/rejected": -6.114804267883301, + "step": 1428 + }, + { + "epoch": 2.29, + "learning_rate": 4.1399128022195794e-07, + "logits/chosen": -1.512673020362854, + "logits/rejected": -1.4942142963409424, + "logps/chosen": -104.28711700439453, + "logps/rejected": -181.19386291503906, + "loss": 0.0973, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8862333297729492, + "rewards/margins": 7.505236625671387, + "rewards/rejected": -9.391469955444336, + "step": 1429 + }, + { + "epoch": 2.3, + "learning_rate": 4.138921918351169e-07, + "logits/chosen": -1.8132683038711548, + "logits/rejected": -1.8515625, + "logps/chosen": -108.30857849121094, + "logps/rejected": -155.1065216064453, + "loss": 0.1316, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.947566032409668, + "rewards/margins": 3.149091958999634, + "rewards/rejected": -5.096657752990723, + "step": 1430 + }, + { + "epoch": 2.3, + "learning_rate": 4.1379310344827586e-07, + "logits/chosen": -1.6492600440979004, + "logits/rejected": -1.6190731525421143, + "logps/chosen": -119.80763244628906, + "logps/rejected": -151.47560119628906, + "loss": 0.1341, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.244690179824829, + "rewards/margins": 5.1907854080200195, + "rewards/rejected": -6.435475826263428, + "step": 1431 + }, + { + "epoch": 2.3, + "learning_rate": 4.1369401506143477e-07, + "logits/chosen": -1.5136934518814087, + "logits/rejected": -1.4396708011627197, + "logps/chosen": -111.11389923095703, + "logps/rejected": -162.59791564941406, + "loss": 0.1467, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7716938257217407, + "rewards/margins": 3.139373540878296, + "rewards/rejected": -4.911067485809326, + "step": 1432 + }, + { + "epoch": 2.3, + "learning_rate": 4.1359492667459373e-07, + "logits/chosen": -1.5370800495147705, + "logits/rejected": -1.489763617515564, + "logps/chosen": -84.74977111816406, + "logps/rejected": -163.06277465820312, + "loss": 0.0929, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.17057204246521, + "rewards/margins": 5.720549583435059, + "rewards/rejected": -6.8911213874816895, + "step": 1433 + }, + { + "epoch": 2.3, + "learning_rate": 4.1349583828775264e-07, + "logits/chosen": -1.6201831102371216, + "logits/rejected": -1.6319034099578857, + "logps/chosen": -86.96894836425781, + "logps/rejected": -123.63758850097656, + "loss": 0.1033, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7245900630950928, + "rewards/margins": 4.917116165161133, + "rewards/rejected": -6.641706466674805, + "step": 1434 + }, + { + "epoch": 2.3, + "learning_rate": 4.133967499009116e-07, + "logits/chosen": -1.5964241027832031, + "logits/rejected": -1.6553490161895752, + "logps/chosen": -136.25421142578125, + "logps/rejected": -124.70254516601562, + "loss": 0.0593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6843993663787842, + "rewards/margins": 3.7857375144958496, + "rewards/rejected": -4.470136642456055, + "step": 1435 + }, + { + "epoch": 2.3, + "learning_rate": 4.132976615140705e-07, + "logits/chosen": -1.5536327362060547, + "logits/rejected": -1.5600950717926025, + "logps/chosen": -87.00469207763672, + "logps/rejected": -123.5153579711914, + "loss": 0.0632, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.023163080215454, + "rewards/margins": 3.332223415374756, + "rewards/rejected": -4.355386257171631, + "step": 1436 + }, + { + "epoch": 2.31, + "learning_rate": 4.1319857312722946e-07, + "logits/chosen": -1.5688555240631104, + "logits/rejected": -1.5039918422698975, + "logps/chosen": -121.16756439208984, + "logps/rejected": -155.6558380126953, + "loss": 0.1189, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8975698947906494, + "rewards/margins": 3.4383363723754883, + "rewards/rejected": -5.335906505584717, + "step": 1437 + }, + { + "epoch": 2.31, + "learning_rate": 4.130994847403884e-07, + "logits/chosen": -1.684728741645813, + "logits/rejected": -1.6818584203720093, + "logps/chosen": -80.01072692871094, + "logps/rejected": -128.4375, + "loss": 0.0533, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.169456958770752, + "rewards/margins": 4.071681022644043, + "rewards/rejected": -5.241138458251953, + "step": 1438 + }, + { + "epoch": 2.31, + "learning_rate": 4.1300039635354733e-07, + "logits/chosen": -1.6396100521087646, + "logits/rejected": -1.66339111328125, + "logps/chosen": -76.57654571533203, + "logps/rejected": -125.15562438964844, + "loss": 0.1606, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7067886590957642, + "rewards/margins": 4.442413330078125, + "rewards/rejected": -5.149201393127441, + "step": 1439 + }, + { + "epoch": 2.31, + "learning_rate": 4.129013079667063e-07, + "logits/chosen": -1.7070128917694092, + "logits/rejected": -1.6729650497436523, + "logps/chosen": -101.57573699951172, + "logps/rejected": -135.72735595703125, + "loss": 0.1842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8113504648208618, + "rewards/margins": 3.8701703548431396, + "rewards/rejected": -4.681520938873291, + "step": 1440 + }, + { + "epoch": 2.31, + "learning_rate": 4.128022195798652e-07, + "logits/chosen": -1.675486445426941, + "logits/rejected": -1.647981882095337, + "logps/chosen": -146.17349243164062, + "logps/rejected": -184.56954956054688, + "loss": 0.1239, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5168029069900513, + "rewards/margins": 6.445296764373779, + "rewards/rejected": -7.962100028991699, + "step": 1441 + }, + { + "epoch": 2.31, + "learning_rate": 4.1270313119302416e-07, + "logits/chosen": -1.679695725440979, + "logits/rejected": -1.7266314029693604, + "logps/chosen": -98.33006286621094, + "logps/rejected": -138.36788940429688, + "loss": 0.2102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8881416320800781, + "rewards/margins": 4.195207595825195, + "rewards/rejected": -5.083349227905273, + "step": 1442 + }, + { + "epoch": 2.32, + "learning_rate": 4.126040428061831e-07, + "logits/chosen": -1.6798985004425049, + "logits/rejected": -1.671850562095642, + "logps/chosen": -70.52931213378906, + "logps/rejected": -126.93759155273438, + "loss": 0.1237, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.42653489112854, + "rewards/margins": 5.238668441772461, + "rewards/rejected": -6.665203094482422, + "step": 1443 + }, + { + "epoch": 2.32, + "learning_rate": 4.12504954419342e-07, + "logits/chosen": -1.7151775360107422, + "logits/rejected": -1.6514531373977661, + "logps/chosen": -112.63961029052734, + "logps/rejected": -111.74440002441406, + "loss": 0.194, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2234121561050415, + "rewards/margins": 2.2096331119537354, + "rewards/rejected": -3.4330451488494873, + "step": 1444 + }, + { + "epoch": 2.32, + "learning_rate": 4.12405866032501e-07, + "logits/chosen": -1.5797570943832397, + "logits/rejected": -1.5617091655731201, + "logps/chosen": -88.10249328613281, + "logps/rejected": -141.6862030029297, + "loss": 0.1021, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.900860071182251, + "rewards/margins": 5.411687850952148, + "rewards/rejected": -6.312548637390137, + "step": 1445 + }, + { + "epoch": 2.32, + "learning_rate": 4.123067776456599e-07, + "logits/chosen": -1.6567573547363281, + "logits/rejected": -1.7103627920150757, + "logps/chosen": -161.1071319580078, + "logps/rejected": -195.2894287109375, + "loss": 0.0972, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.773641586303711, + "rewards/margins": 2.472651481628418, + "rewards/rejected": -5.246293067932129, + "step": 1446 + }, + { + "epoch": 2.32, + "learning_rate": 4.1220768925881885e-07, + "logits/chosen": -1.5796881914138794, + "logits/rejected": -1.5277118682861328, + "logps/chosen": -79.33329772949219, + "logps/rejected": -145.93910217285156, + "loss": 0.1282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31882575154304504, + "rewards/margins": 6.093873977661133, + "rewards/rejected": -6.4126996994018555, + "step": 1447 + }, + { + "epoch": 2.32, + "learning_rate": 4.121086008719778e-07, + "logits/chosen": -1.7332220077514648, + "logits/rejected": -1.7883186340332031, + "logps/chosen": -94.91268920898438, + "logps/rejected": -141.56381225585938, + "loss": 0.0995, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9615980386734009, + "rewards/margins": 3.266876459121704, + "rewards/rejected": -5.2284746170043945, + "step": 1448 + }, + { + "epoch": 2.33, + "learning_rate": 4.120095124851367e-07, + "logits/chosen": -1.6668601036071777, + "logits/rejected": -1.6362234354019165, + "logps/chosen": -116.24885559082031, + "logps/rejected": -142.1651153564453, + "loss": 0.1208, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7617164850234985, + "rewards/margins": 3.271109104156494, + "rewards/rejected": -5.032825469970703, + "step": 1449 + }, + { + "epoch": 2.33, + "learning_rate": 4.119104240982957e-07, + "logits/chosen": -1.4589879512786865, + "logits/rejected": -1.432215690612793, + "logps/chosen": -101.90811920166016, + "logps/rejected": -133.19284057617188, + "loss": 0.1882, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.986182689666748, + "rewards/margins": 3.918524742126465, + "rewards/rejected": -5.904707431793213, + "step": 1450 + }, + { + "epoch": 2.33, + "learning_rate": 4.118113357114546e-07, + "logits/chosen": -1.551537036895752, + "logits/rejected": -1.5290002822875977, + "logps/chosen": -106.24058532714844, + "logps/rejected": -132.0687255859375, + "loss": 0.1608, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8513425588607788, + "rewards/margins": 2.477311372756958, + "rewards/rejected": -4.328653812408447, + "step": 1451 + }, + { + "epoch": 2.33, + "learning_rate": 4.117122473246135e-07, + "logits/chosen": -1.5224637985229492, + "logits/rejected": -1.6093188524246216, + "logps/chosen": -83.37364196777344, + "logps/rejected": -208.93910217285156, + "loss": 0.1379, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.077101230621338, + "rewards/margins": 7.790935516357422, + "rewards/rejected": -8.868036270141602, + "step": 1452 + }, + { + "epoch": 2.33, + "learning_rate": 4.116131589377725e-07, + "logits/chosen": -1.4874601364135742, + "logits/rejected": -1.4890401363372803, + "logps/chosen": -85.50657653808594, + "logps/rejected": -141.89614868164062, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7631651163101196, + "rewards/margins": 5.255190849304199, + "rewards/rejected": -6.018355846405029, + "step": 1453 + }, + { + "epoch": 2.33, + "learning_rate": 4.115140705509314e-07, + "logits/chosen": -1.642543911933899, + "logits/rejected": -1.6349819898605347, + "logps/chosen": -89.24288940429688, + "logps/rejected": -109.95523071289062, + "loss": 0.2564, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.783313512802124, + "rewards/margins": 2.202544689178467, + "rewards/rejected": -3.9858579635620117, + "step": 1454 + }, + { + "epoch": 2.34, + "learning_rate": 4.1141498216409037e-07, + "logits/chosen": -1.7209001779556274, + "logits/rejected": -1.7945139408111572, + "logps/chosen": -95.30863189697266, + "logps/rejected": -162.94439697265625, + "loss": 0.1491, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1124944686889648, + "rewards/margins": 4.865020275115967, + "rewards/rejected": -5.977514743804932, + "step": 1455 + }, + { + "epoch": 2.34, + "learning_rate": 4.113158937772493e-07, + "logits/chosen": -1.6660478115081787, + "logits/rejected": -1.663987159729004, + "logps/chosen": -98.72095489501953, + "logps/rejected": -167.54493713378906, + "loss": 0.128, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.956188440322876, + "rewards/margins": 7.595315933227539, + "rewards/rejected": -9.551504135131836, + "step": 1456 + }, + { + "epoch": 2.34, + "learning_rate": 4.112168053904082e-07, + "logits/chosen": -1.7016232013702393, + "logits/rejected": -1.6945881843566895, + "logps/chosen": -86.76763153076172, + "logps/rejected": -142.9833526611328, + "loss": 0.0912, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0745856761932373, + "rewards/margins": 5.578383922576904, + "rewards/rejected": -7.6529693603515625, + "step": 1457 + }, + { + "epoch": 2.34, + "learning_rate": 4.111177170035672e-07, + "logits/chosen": -1.3999191522598267, + "logits/rejected": -1.3795396089553833, + "logps/chosen": -97.30729675292969, + "logps/rejected": -111.57603454589844, + "loss": 0.1648, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.916393756866455, + "rewards/margins": 1.9566725492477417, + "rewards/rejected": -3.8730661869049072, + "step": 1458 + }, + { + "epoch": 2.34, + "learning_rate": 4.110186286167261e-07, + "logits/chosen": -1.6614223718643188, + "logits/rejected": -1.6390676498413086, + "logps/chosen": -106.58448791503906, + "logps/rejected": -117.39623260498047, + "loss": 0.102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.403724730014801, + "rewards/margins": 3.7722063064575195, + "rewards/rejected": -4.175930976867676, + "step": 1459 + }, + { + "epoch": 2.34, + "learning_rate": 4.1091954022988506e-07, + "logits/chosen": -1.7239326238632202, + "logits/rejected": -1.6126232147216797, + "logps/chosen": -114.9096908569336, + "logps/rejected": -142.51705932617188, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0271408557891846, + "rewards/margins": 3.7078652381896973, + "rewards/rejected": -4.735005855560303, + "step": 1460 + }, + { + "epoch": 2.35, + "learning_rate": 4.1082045184304397e-07, + "logits/chosen": -1.831423282623291, + "logits/rejected": -1.7994070053100586, + "logps/chosen": -124.40634155273438, + "logps/rejected": -134.9991912841797, + "loss": 0.1319, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2699074745178223, + "rewards/margins": 1.617745280265808, + "rewards/rejected": -3.88765287399292, + "step": 1461 + }, + { + "epoch": 2.35, + "learning_rate": 4.107213634562029e-07, + "logits/chosen": -1.5814533233642578, + "logits/rejected": -1.5658690929412842, + "logps/chosen": -85.51536560058594, + "logps/rejected": -163.7235107421875, + "loss": 0.159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07296298444271088, + "rewards/margins": 7.300739288330078, + "rewards/rejected": -7.373702049255371, + "step": 1462 + }, + { + "epoch": 2.35, + "learning_rate": 4.106222750693619e-07, + "logits/chosen": -1.4326744079589844, + "logits/rejected": -1.4111225605010986, + "logps/chosen": -107.56144714355469, + "logps/rejected": -161.078857421875, + "loss": 0.2144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5512489080429077, + "rewards/margins": 4.711457252502441, + "rewards/rejected": -5.262706279754639, + "step": 1463 + }, + { + "epoch": 2.35, + "learning_rate": 4.105231866825208e-07, + "logits/chosen": -1.6741074323654175, + "logits/rejected": -1.6438548564910889, + "logps/chosen": -115.98644256591797, + "logps/rejected": -145.0465087890625, + "loss": 0.1246, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5428389310836792, + "rewards/margins": 5.310937404632568, + "rewards/rejected": -6.853775978088379, + "step": 1464 + }, + { + "epoch": 2.35, + "learning_rate": 4.1042409829567975e-07, + "logits/chosen": -1.72212815284729, + "logits/rejected": -1.728501796722412, + "logps/chosen": -88.19808197021484, + "logps/rejected": -158.05972290039062, + "loss": 0.295, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7325023412704468, + "rewards/margins": 6.568589687347412, + "rewards/rejected": -7.30109167098999, + "step": 1465 + }, + { + "epoch": 2.35, + "learning_rate": 4.1032500990883866e-07, + "logits/chosen": -1.624548316001892, + "logits/rejected": -1.6008844375610352, + "logps/chosen": -82.06725311279297, + "logps/rejected": -112.7343978881836, + "loss": 0.1092, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7870576977729797, + "rewards/margins": 2.94651198387146, + "rewards/rejected": -3.733569860458374, + "step": 1466 + }, + { + "epoch": 2.35, + "learning_rate": 4.1022592152199757e-07, + "logits/chosen": -1.6599711179733276, + "logits/rejected": -1.743025779724121, + "logps/chosen": -72.84785461425781, + "logps/rejected": -179.69009399414062, + "loss": 0.1209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16680295765399933, + "rewards/margins": 8.748961448669434, + "rewards/rejected": -8.915763854980469, + "step": 1467 + }, + { + "epoch": 2.36, + "learning_rate": 4.101268331351566e-07, + "logits/chosen": -1.7131197452545166, + "logits/rejected": -1.700234293937683, + "logps/chosen": -102.97457122802734, + "logps/rejected": -181.13185119628906, + "loss": 0.1033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9319212436676025, + "rewards/margins": 6.669839382171631, + "rewards/rejected": -8.601760864257812, + "step": 1468 + }, + { + "epoch": 2.36, + "learning_rate": 4.100277447483155e-07, + "logits/chosen": -1.6140414476394653, + "logits/rejected": -1.633556842803955, + "logps/chosen": -102.66398620605469, + "logps/rejected": -156.39764404296875, + "loss": 0.0896, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1847665309906006, + "rewards/margins": 4.14161491394043, + "rewards/rejected": -6.326381683349609, + "step": 1469 + }, + { + "epoch": 2.36, + "learning_rate": 4.099286563614744e-07, + "logits/chosen": -1.7232232093811035, + "logits/rejected": -1.725425124168396, + "logps/chosen": -96.1461181640625, + "logps/rejected": -121.1124038696289, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30971431732177734, + "rewards/margins": 3.893807888031006, + "rewards/rejected": -4.203522682189941, + "step": 1470 + }, + { + "epoch": 2.36, + "learning_rate": 4.0982956797463335e-07, + "logits/chosen": -1.6902096271514893, + "logits/rejected": -1.5749232769012451, + "logps/chosen": -93.50181579589844, + "logps/rejected": -157.57406616210938, + "loss": 0.1107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4387052059173584, + "rewards/margins": 5.998256683349609, + "rewards/rejected": -7.436962127685547, + "step": 1471 + }, + { + "epoch": 2.36, + "learning_rate": 4.0973047958779226e-07, + "logits/chosen": -1.8048862218856812, + "logits/rejected": -1.772298812866211, + "logps/chosen": -99.40337371826172, + "logps/rejected": -160.56207275390625, + "loss": 0.0816, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3581384420394897, + "rewards/margins": 5.388286590576172, + "rewards/rejected": -6.746425151824951, + "step": 1472 + }, + { + "epoch": 2.36, + "learning_rate": 4.096313912009513e-07, + "logits/chosen": -1.6528825759887695, + "logits/rejected": -1.6474099159240723, + "logps/chosen": -118.61327362060547, + "logps/rejected": -172.26715087890625, + "loss": 0.0872, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5727038383483887, + "rewards/margins": 3.4714102745056152, + "rewards/rejected": -5.044114112854004, + "step": 1473 + }, + { + "epoch": 2.37, + "learning_rate": 4.095323028141102e-07, + "logits/chosen": -1.633191704750061, + "logits/rejected": -1.5426292419433594, + "logps/chosen": -97.20809936523438, + "logps/rejected": -129.29098510742188, + "loss": 0.1139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5058740973472595, + "rewards/margins": 4.510974407196045, + "rewards/rejected": -5.016848564147949, + "step": 1474 + }, + { + "epoch": 2.37, + "learning_rate": 4.094332144272691e-07, + "logits/chosen": -1.6834478378295898, + "logits/rejected": -1.6823031902313232, + "logps/chosen": -80.05238342285156, + "logps/rejected": -153.88021850585938, + "loss": 0.0771, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3101203441619873, + "rewards/margins": 6.712024211883545, + "rewards/rejected": -8.022144317626953, + "step": 1475 + }, + { + "epoch": 2.37, + "learning_rate": 4.0933412604042805e-07, + "logits/chosen": -1.536747932434082, + "logits/rejected": -1.472947120666504, + "logps/chosen": -100.44734954833984, + "logps/rejected": -152.25790405273438, + "loss": 0.1627, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2932469844818115, + "rewards/margins": 5.839528560638428, + "rewards/rejected": -7.132775783538818, + "step": 1476 + }, + { + "epoch": 2.37, + "learning_rate": 4.0923503765358695e-07, + "logits/chosen": -1.5412077903747559, + "logits/rejected": -1.5260741710662842, + "logps/chosen": -105.67164611816406, + "logps/rejected": -176.1360321044922, + "loss": 0.1176, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1501874923706055, + "rewards/margins": 6.643045425415039, + "rewards/rejected": -7.7932329177856445, + "step": 1477 + }, + { + "epoch": 2.37, + "learning_rate": 4.091359492667459e-07, + "logits/chosen": -1.7576298713684082, + "logits/rejected": -1.7769145965576172, + "logps/chosen": -94.69011688232422, + "logps/rejected": -133.70437622070312, + "loss": 0.1392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.726377010345459, + "rewards/margins": 3.5425403118133545, + "rewards/rejected": -4.268917083740234, + "step": 1478 + }, + { + "epoch": 2.37, + "learning_rate": 4.0903686087990487e-07, + "logits/chosen": -1.511179804801941, + "logits/rejected": -1.5482220649719238, + "logps/chosen": -117.68193054199219, + "logps/rejected": -152.30963134765625, + "loss": 0.1088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6891249418258667, + "rewards/margins": 4.555817604064941, + "rewards/rejected": -6.2449421882629395, + "step": 1479 + }, + { + "epoch": 2.38, + "learning_rate": 4.089377724930638e-07, + "logits/chosen": -1.6805524826049805, + "logits/rejected": -1.7040472030639648, + "logps/chosen": -125.67372131347656, + "logps/rejected": -134.75502014160156, + "loss": 0.1451, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8552608489990234, + "rewards/margins": 3.225457191467285, + "rewards/rejected": -5.080718040466309, + "step": 1480 + }, + { + "epoch": 2.38, + "learning_rate": 4.0883868410622274e-07, + "logits/chosen": -1.5982657670974731, + "logits/rejected": -1.601035714149475, + "logps/chosen": -90.72936248779297, + "logps/rejected": -137.61961364746094, + "loss": 0.1915, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2122385501861572, + "rewards/margins": 5.0202741622924805, + "rewards/rejected": -7.232512950897217, + "step": 1481 + }, + { + "epoch": 2.38, + "learning_rate": 4.0873959571938165e-07, + "logits/chosen": -1.5307133197784424, + "logits/rejected": -1.5169103145599365, + "logps/chosen": -114.79058837890625, + "logps/rejected": -134.68020629882812, + "loss": 0.1638, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.518075942993164, + "rewards/margins": 3.731745481491089, + "rewards/rejected": -5.249821662902832, + "step": 1482 + }, + { + "epoch": 2.38, + "learning_rate": 4.086405073325406e-07, + "logits/chosen": -1.618375539779663, + "logits/rejected": -1.6335082054138184, + "logps/chosen": -100.37903594970703, + "logps/rejected": -151.8387451171875, + "loss": 0.0829, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6983686685562134, + "rewards/margins": 5.765535354614258, + "rewards/rejected": -7.463903427124023, + "step": 1483 + }, + { + "epoch": 2.38, + "learning_rate": 4.0854141894569957e-07, + "logits/chosen": -1.7722855806350708, + "logits/rejected": -1.7039296627044678, + "logps/chosen": -113.68299865722656, + "logps/rejected": -173.16778564453125, + "loss": 0.1118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16755028069019318, + "rewards/margins": 5.9268798828125, + "rewards/rejected": -6.094430446624756, + "step": 1484 + }, + { + "epoch": 2.38, + "learning_rate": 4.0844233055885847e-07, + "logits/chosen": -1.690535545349121, + "logits/rejected": -1.601304054260254, + "logps/chosen": -113.0234375, + "logps/rejected": -112.406005859375, + "loss": 0.0815, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5554728507995605, + "rewards/margins": 1.6039519309997559, + "rewards/rejected": -3.1594250202178955, + "step": 1485 + }, + { + "epoch": 2.39, + "learning_rate": 4.0834324217201743e-07, + "logits/chosen": -1.661498785018921, + "logits/rejected": -1.733902931213379, + "logps/chosen": -102.24685668945312, + "logps/rejected": -158.42613220214844, + "loss": 0.319, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7509816884994507, + "rewards/margins": 2.810352087020874, + "rewards/rejected": -4.561333656311035, + "step": 1486 + }, + { + "epoch": 2.39, + "learning_rate": 4.0824415378517634e-07, + "logits/chosen": -1.666690707206726, + "logits/rejected": -1.5835140943527222, + "logps/chosen": -109.46944427490234, + "logps/rejected": -117.82136535644531, + "loss": 0.1768, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.2317575365304947, + "rewards/margins": 2.9080750942230225, + "rewards/rejected": -2.6763179302215576, + "step": 1487 + }, + { + "epoch": 2.39, + "learning_rate": 4.081450653983353e-07, + "logits/chosen": -1.6321110725402832, + "logits/rejected": -1.6530343294143677, + "logps/chosen": -103.3187255859375, + "logps/rejected": -147.22511291503906, + "loss": 0.1528, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9517402648925781, + "rewards/margins": 3.8201441764831543, + "rewards/rejected": -5.771884441375732, + "step": 1488 + }, + { + "epoch": 2.39, + "learning_rate": 4.0804597701149426e-07, + "logits/chosen": -1.6847548484802246, + "logits/rejected": -1.6763641834259033, + "logps/chosen": -133.73110961914062, + "logps/rejected": -176.3251190185547, + "loss": 0.1663, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.761630058288574, + "rewards/margins": 3.6182892322540283, + "rewards/rejected": -6.379919528961182, + "step": 1489 + }, + { + "epoch": 2.39, + "learning_rate": 4.0794688862465317e-07, + "logits/chosen": -1.5901468992233276, + "logits/rejected": -1.6527047157287598, + "logps/chosen": -106.97029113769531, + "logps/rejected": -176.55593872070312, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2518486976623535, + "rewards/margins": 4.839935779571533, + "rewards/rejected": -7.091784954071045, + "step": 1490 + }, + { + "epoch": 2.39, + "learning_rate": 4.078478002378121e-07, + "logits/chosen": -1.6851449012756348, + "logits/rejected": -1.5898983478546143, + "logps/chosen": -97.10938262939453, + "logps/rejected": -125.79306030273438, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3101112842559814, + "rewards/margins": 4.370337963104248, + "rewards/rejected": -5.68044900894165, + "step": 1491 + }, + { + "epoch": 2.39, + "learning_rate": 4.0774871185097103e-07, + "logits/chosen": -1.6654438972473145, + "logits/rejected": -1.6475812196731567, + "logps/chosen": -91.38770294189453, + "logps/rejected": -156.69041442871094, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9593333005905151, + "rewards/margins": 7.783817291259766, + "rewards/rejected": -8.743151664733887, + "step": 1492 + }, + { + "epoch": 2.4, + "learning_rate": 4.0764962346413e-07, + "logits/chosen": -1.764151930809021, + "logits/rejected": -1.6202425956726074, + "logps/chosen": -128.2392578125, + "logps/rejected": -116.54849243164062, + "loss": 0.1691, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8391079902648926, + "rewards/margins": 2.0576417446136475, + "rewards/rejected": -3.89674973487854, + "step": 1493 + }, + { + "epoch": 2.4, + "learning_rate": 4.0755053507728895e-07, + "logits/chosen": -1.6052769422531128, + "logits/rejected": -1.5756547451019287, + "logps/chosen": -77.28958129882812, + "logps/rejected": -151.672607421875, + "loss": 0.172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.685112476348877, + "rewards/margins": 7.137723922729492, + "rewards/rejected": -7.822836875915527, + "step": 1494 + }, + { + "epoch": 2.4, + "learning_rate": 4.0745144669044786e-07, + "logits/chosen": -1.6648147106170654, + "logits/rejected": -1.687241792678833, + "logps/chosen": -78.1926040649414, + "logps/rejected": -118.32796478271484, + "loss": 0.0779, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25719472765922546, + "rewards/margins": 3.386625289916992, + "rewards/rejected": -3.64382004737854, + "step": 1495 + }, + { + "epoch": 2.4, + "learning_rate": 4.073523583036068e-07, + "logits/chosen": -1.6595323085784912, + "logits/rejected": -1.6373927593231201, + "logps/chosen": -128.678466796875, + "logps/rejected": -138.0799560546875, + "loss": 0.169, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.9421684741973877, + "rewards/margins": 2.1058425903320312, + "rewards/rejected": -5.04801082611084, + "step": 1496 + }, + { + "epoch": 2.4, + "learning_rate": 4.072532699167657e-07, + "logits/chosen": -1.495861291885376, + "logits/rejected": -1.4829118251800537, + "logps/chosen": -92.12309265136719, + "logps/rejected": -134.8983154296875, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7952005863189697, + "rewards/margins": 3.08030366897583, + "rewards/rejected": -4.875504493713379, + "step": 1497 + }, + { + "epoch": 2.4, + "learning_rate": 4.071541815299247e-07, + "logits/chosen": -1.5180261135101318, + "logits/rejected": -1.5774201154708862, + "logps/chosen": -90.67567443847656, + "logps/rejected": -143.63949584960938, + "loss": 0.0604, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9251878261566162, + "rewards/margins": 3.4453117847442627, + "rewards/rejected": -5.370499610900879, + "step": 1498 + }, + { + "epoch": 2.41, + "learning_rate": 4.070550931430836e-07, + "logits/chosen": -1.7677569389343262, + "logits/rejected": -1.7046844959259033, + "logps/chosen": -96.19807434082031, + "logps/rejected": -99.63695526123047, + "loss": 0.1367, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2505528926849365, + "rewards/margins": 2.849989414215088, + "rewards/rejected": -4.100542068481445, + "step": 1499 + }, + { + "epoch": 2.41, + "learning_rate": 4.0695600475624255e-07, + "logits/chosen": -1.64897620677948, + "logits/rejected": -1.632043719291687, + "logps/chosen": -88.86354064941406, + "logps/rejected": -126.76194763183594, + "loss": 0.1331, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0777499675750732, + "rewards/margins": 4.732696533203125, + "rewards/rejected": -5.810446262359619, + "step": 1500 + }, + { + "epoch": 2.41, + "learning_rate": 4.068569163694015e-07, + "logits/chosen": -1.5667603015899658, + "logits/rejected": -1.5248873233795166, + "logps/chosen": -118.380615234375, + "logps/rejected": -138.02365112304688, + "loss": 0.074, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8939696550369263, + "rewards/margins": 3.096045970916748, + "rewards/rejected": -4.990015983581543, + "step": 1501 + }, + { + "epoch": 2.41, + "learning_rate": 4.067578279825604e-07, + "logits/chosen": -1.642941951751709, + "logits/rejected": -1.5449742078781128, + "logps/chosen": -112.85333251953125, + "logps/rejected": -133.91156005859375, + "loss": 0.1717, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.91801118850708, + "rewards/margins": 2.5385048389434814, + "rewards/rejected": -5.456515789031982, + "step": 1502 + }, + { + "epoch": 2.41, + "learning_rate": 4.066587395957193e-07, + "logits/chosen": -1.5652997493743896, + "logits/rejected": -1.5232667922973633, + "logps/chosen": -96.29391479492188, + "logps/rejected": -159.99798583984375, + "loss": 0.0993, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9544934034347534, + "rewards/margins": 5.283965110778809, + "rewards/rejected": -7.238458633422852, + "step": 1503 + }, + { + "epoch": 2.41, + "learning_rate": 4.065596512088783e-07, + "logits/chosen": -1.6967377662658691, + "logits/rejected": -1.6151225566864014, + "logps/chosen": -99.17096710205078, + "logps/rejected": -114.93666076660156, + "loss": 0.0936, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2134594917297363, + "rewards/margins": 2.3923895359039307, + "rewards/rejected": -3.605849266052246, + "step": 1504 + }, + { + "epoch": 2.42, + "learning_rate": 4.0646056282203725e-07, + "logits/chosen": -1.7171893119812012, + "logits/rejected": -1.7381590604782104, + "logps/chosen": -100.01307678222656, + "logps/rejected": -123.69398498535156, + "loss": 0.0921, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7337982654571533, + "rewards/margins": 3.064535140991211, + "rewards/rejected": -4.798333168029785, + "step": 1505 + }, + { + "epoch": 2.42, + "learning_rate": 4.063614744351962e-07, + "logits/chosen": -1.5500264167785645, + "logits/rejected": -1.5413289070129395, + "logps/chosen": -78.89205169677734, + "logps/rejected": -127.20889282226562, + "loss": 0.142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9197031855583191, + "rewards/margins": 5.1773247718811035, + "rewards/rejected": -6.097027778625488, + "step": 1506 + }, + { + "epoch": 2.42, + "learning_rate": 4.062623860483551e-07, + "logits/chosen": -1.619476079940796, + "logits/rejected": -1.653322696685791, + "logps/chosen": -95.01835632324219, + "logps/rejected": -124.08586120605469, + "loss": 0.1905, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3563987016677856, + "rewards/margins": 2.917402982711792, + "rewards/rejected": -4.273801803588867, + "step": 1507 + }, + { + "epoch": 2.42, + "learning_rate": 4.06163297661514e-07, + "logits/chosen": -1.720633625984192, + "logits/rejected": -1.7363874912261963, + "logps/chosen": -100.47887420654297, + "logps/rejected": -149.72454833984375, + "loss": 0.1443, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.962073802947998, + "rewards/margins": 4.306936264038086, + "rewards/rejected": -6.269009590148926, + "step": 1508 + }, + { + "epoch": 2.42, + "learning_rate": 4.06064209274673e-07, + "logits/chosen": -1.7655538320541382, + "logits/rejected": -1.66748046875, + "logps/chosen": -87.0724868774414, + "logps/rejected": -117.3565673828125, + "loss": 0.0748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24088314175605774, + "rewards/margins": 4.770526885986328, + "rewards/rejected": -5.011410236358643, + "step": 1509 + }, + { + "epoch": 2.42, + "learning_rate": 4.0596512088783194e-07, + "logits/chosen": -1.5929373502731323, + "logits/rejected": -1.5808699131011963, + "logps/chosen": -117.0266342163086, + "logps/rejected": -132.70071411132812, + "loss": 0.2016, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0866055488586426, + "rewards/margins": 1.559330701828003, + "rewards/rejected": -3.6459362506866455, + "step": 1510 + }, + { + "epoch": 2.43, + "learning_rate": 4.058660325009909e-07, + "logits/chosen": -1.553243637084961, + "logits/rejected": -1.6054189205169678, + "logps/chosen": -96.9155044555664, + "logps/rejected": -144.506591796875, + "loss": 0.1318, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3989510536193848, + "rewards/margins": 3.78666090965271, + "rewards/rejected": -6.185612201690674, + "step": 1511 + }, + { + "epoch": 2.43, + "learning_rate": 4.057669441141498e-07, + "logits/chosen": -1.7128180265426636, + "logits/rejected": -1.725083351135254, + "logps/chosen": -79.0287857055664, + "logps/rejected": -120.95449829101562, + "loss": 0.1127, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.701920747756958, + "rewards/margins": 3.174788236618042, + "rewards/rejected": -3.876708984375, + "step": 1512 + }, + { + "epoch": 2.43, + "learning_rate": 4.056678557273087e-07, + "logits/chosen": -1.7242698669433594, + "logits/rejected": -1.7452905178070068, + "logps/chosen": -103.14733123779297, + "logps/rejected": -149.8619384765625, + "loss": 0.2453, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9115474224090576, + "rewards/margins": 4.32144832611084, + "rewards/rejected": -6.232995510101318, + "step": 1513 + }, + { + "epoch": 2.43, + "learning_rate": 4.0556876734046767e-07, + "logits/chosen": -1.4700727462768555, + "logits/rejected": -1.5499107837677002, + "logps/chosen": -82.6824951171875, + "logps/rejected": -136.1915283203125, + "loss": 0.1666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9617751836776733, + "rewards/margins": 4.227591037750244, + "rewards/rejected": -5.189365863800049, + "step": 1514 + }, + { + "epoch": 2.43, + "learning_rate": 4.054696789536266e-07, + "logits/chosen": -1.6936482191085815, + "logits/rejected": -1.635155439376831, + "logps/chosen": -86.2714614868164, + "logps/rejected": -153.77500915527344, + "loss": 0.1891, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5294246673583984, + "rewards/margins": 6.518670558929443, + "rewards/rejected": -7.048095703125, + "step": 1515 + }, + { + "epoch": 2.43, + "learning_rate": 4.053705905667856e-07, + "logits/chosen": -1.6472275257110596, + "logits/rejected": -1.6376121044158936, + "logps/chosen": -84.46728515625, + "logps/rejected": -156.71446228027344, + "loss": 0.0646, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4644246101379395, + "rewards/margins": 7.579118728637695, + "rewards/rejected": -9.043543815612793, + "step": 1516 + }, + { + "epoch": 2.43, + "learning_rate": 4.052715021799445e-07, + "logits/chosen": -1.510190725326538, + "logits/rejected": -1.5585732460021973, + "logps/chosen": -98.6636734008789, + "logps/rejected": -164.86106872558594, + "loss": 0.1878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7325698733329773, + "rewards/margins": 6.539140224456787, + "rewards/rejected": -7.2717108726501465, + "step": 1517 + }, + { + "epoch": 2.44, + "learning_rate": 4.051724137931034e-07, + "logits/chosen": -1.6351839303970337, + "logits/rejected": -1.476347804069519, + "logps/chosen": -123.76448059082031, + "logps/rejected": -155.6278076171875, + "loss": 0.0867, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7456703186035156, + "rewards/margins": 5.353232383728027, + "rewards/rejected": -6.098902702331543, + "step": 1518 + }, + { + "epoch": 2.44, + "learning_rate": 4.0507332540626236e-07, + "logits/chosen": -1.5301094055175781, + "logits/rejected": -1.5763418674468994, + "logps/chosen": -85.13336944580078, + "logps/rejected": -125.96330261230469, + "loss": 0.109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7226637005805969, + "rewards/margins": 3.7251012325286865, + "rewards/rejected": -4.447764873504639, + "step": 1519 + }, + { + "epoch": 2.44, + "learning_rate": 4.0497423701942127e-07, + "logits/chosen": -1.6217589378356934, + "logits/rejected": -1.603546142578125, + "logps/chosen": -96.87796783447266, + "logps/rejected": -134.1571807861328, + "loss": 0.1388, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7026685476303101, + "rewards/margins": 3.9957871437072754, + "rewards/rejected": -4.698455810546875, + "step": 1520 + }, + { + "epoch": 2.44, + "learning_rate": 4.048751486325803e-07, + "logits/chosen": -1.5741209983825684, + "logits/rejected": -1.7027390003204346, + "logps/chosen": -78.73188781738281, + "logps/rejected": -163.205078125, + "loss": 0.0536, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6702752113342285, + "rewards/margins": 6.014985084533691, + "rewards/rejected": -6.685259819030762, + "step": 1521 + }, + { + "epoch": 2.44, + "learning_rate": 4.047760602457392e-07, + "logits/chosen": -1.6477409601211548, + "logits/rejected": -1.5877058506011963, + "logps/chosen": -104.22853088378906, + "logps/rejected": -139.50656127929688, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0314207077026367, + "rewards/margins": 4.046950817108154, + "rewards/rejected": -5.078371524810791, + "step": 1522 + }, + { + "epoch": 2.44, + "learning_rate": 4.046769718588981e-07, + "logits/chosen": -1.4774354696273804, + "logits/rejected": -1.472226858139038, + "logps/chosen": -113.31661987304688, + "logps/rejected": -162.3009796142578, + "loss": 0.0844, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.58207368850708, + "rewards/margins": 4.376163482666016, + "rewards/rejected": -6.958237648010254, + "step": 1523 + }, + { + "epoch": 2.45, + "learning_rate": 4.0457788347205706e-07, + "logits/chosen": -1.5766592025756836, + "logits/rejected": -1.4725664854049683, + "logps/chosen": -112.04310607910156, + "logps/rejected": -110.5008316040039, + "loss": 0.246, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.258474349975586, + "rewards/margins": 2.486903429031372, + "rewards/rejected": -4.745377540588379, + "step": 1524 + }, + { + "epoch": 2.45, + "learning_rate": 4.0447879508521596e-07, + "logits/chosen": -1.6960489749908447, + "logits/rejected": -1.6271402835845947, + "logps/chosen": -92.46038818359375, + "logps/rejected": -116.66616821289062, + "loss": 0.173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8856247067451477, + "rewards/margins": 1.6981244087219238, + "rewards/rejected": -2.5837488174438477, + "step": 1525 + }, + { + "epoch": 2.45, + "learning_rate": 4.04379706698375e-07, + "logits/chosen": -1.7546565532684326, + "logits/rejected": -1.7680683135986328, + "logps/chosen": -84.20319366455078, + "logps/rejected": -147.97560119628906, + "loss": 0.1453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43491172790527344, + "rewards/margins": 6.219766616821289, + "rewards/rejected": -6.6546783447265625, + "step": 1526 + }, + { + "epoch": 2.45, + "learning_rate": 4.042806183115339e-07, + "logits/chosen": -1.7380907535552979, + "logits/rejected": -1.7533730268478394, + "logps/chosen": -85.42982482910156, + "logps/rejected": -141.46109008789062, + "loss": 0.1251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5380926132202148, + "rewards/margins": 3.7549023628234863, + "rewards/rejected": -4.292995452880859, + "step": 1527 + }, + { + "epoch": 2.45, + "learning_rate": 4.041815299246928e-07, + "logits/chosen": -1.5744197368621826, + "logits/rejected": -1.5202808380126953, + "logps/chosen": -91.15573120117188, + "logps/rejected": -127.35403442382812, + "loss": 0.1009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4105987548828125, + "rewards/margins": 5.134240627288818, + "rewards/rejected": -6.544839859008789, + "step": 1528 + }, + { + "epoch": 2.45, + "learning_rate": 4.0408244153785175e-07, + "logits/chosen": -1.639115571975708, + "logits/rejected": -1.7042914628982544, + "logps/chosen": -111.60499572753906, + "logps/rejected": -145.43470764160156, + "loss": 0.181, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4815019369125366, + "rewards/margins": 1.939723253250122, + "rewards/rejected": -3.4212253093719482, + "step": 1529 + }, + { + "epoch": 2.46, + "learning_rate": 4.0398335315101066e-07, + "logits/chosen": -1.4887815713882446, + "logits/rejected": -1.5208137035369873, + "logps/chosen": -101.59739685058594, + "logps/rejected": -123.59337615966797, + "loss": 0.1407, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.792190670967102, + "rewards/margins": 2.4735848903656006, + "rewards/rejected": -4.265775680541992, + "step": 1530 + }, + { + "epoch": 2.46, + "learning_rate": 4.0388426476416967e-07, + "logits/chosen": -1.7882249355316162, + "logits/rejected": -1.7644145488739014, + "logps/chosen": -90.46642303466797, + "logps/rejected": -140.2609405517578, + "loss": 0.2106, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.1742628812789917, + "rewards/margins": 5.204369068145752, + "rewards/rejected": -6.378632068634033, + "step": 1531 + }, + { + "epoch": 2.46, + "learning_rate": 4.037851763773286e-07, + "logits/chosen": -1.7011228799819946, + "logits/rejected": -1.6157504320144653, + "logps/chosen": -75.59807586669922, + "logps/rejected": -139.660888671875, + "loss": 0.1558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05092087388038635, + "rewards/margins": 6.48737907409668, + "rewards/rejected": -6.538300037384033, + "step": 1532 + }, + { + "epoch": 2.46, + "learning_rate": 4.036860879904875e-07, + "logits/chosen": -1.6336381435394287, + "logits/rejected": -1.611140489578247, + "logps/chosen": -106.57609558105469, + "logps/rejected": -136.16159057617188, + "loss": 0.2473, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8734407424926758, + "rewards/margins": 2.5583033561706543, + "rewards/rejected": -4.431743621826172, + "step": 1533 + }, + { + "epoch": 2.46, + "learning_rate": 4.0358699960364644e-07, + "logits/chosen": -1.6688958406448364, + "logits/rejected": -1.6682106256484985, + "logps/chosen": -117.84190368652344, + "logps/rejected": -139.07240295410156, + "loss": 0.1183, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3286666870117188, + "rewards/margins": 3.041973114013672, + "rewards/rejected": -5.370639801025391, + "step": 1534 + }, + { + "epoch": 2.46, + "learning_rate": 4.0348791121680535e-07, + "logits/chosen": -1.5337060689926147, + "logits/rejected": -1.5028185844421387, + "logps/chosen": -83.20748901367188, + "logps/rejected": -148.07830810546875, + "loss": 0.1417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4818728268146515, + "rewards/margins": 6.372774600982666, + "rewards/rejected": -6.854646682739258, + "step": 1535 + }, + { + "epoch": 2.47, + "learning_rate": 4.0338882282996436e-07, + "logits/chosen": -1.5931715965270996, + "logits/rejected": -1.5353106260299683, + "logps/chosen": -105.6378402709961, + "logps/rejected": -162.3219451904297, + "loss": 0.0849, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3020728826522827, + "rewards/margins": 7.837519645690918, + "rewards/rejected": -9.139593124389648, + "step": 1536 + }, + { + "epoch": 2.47, + "learning_rate": 4.0328973444312327e-07, + "logits/chosen": -1.6676157712936401, + "logits/rejected": -1.6871238946914673, + "logps/chosen": -126.96955108642578, + "logps/rejected": -149.88645935058594, + "loss": 0.0715, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9541144371032715, + "rewards/margins": 4.077503681182861, + "rewards/rejected": -6.031618118286133, + "step": 1537 + }, + { + "epoch": 2.47, + "learning_rate": 4.031906460562822e-07, + "logits/chosen": -1.756999135017395, + "logits/rejected": -1.8051962852478027, + "logps/chosen": -88.68598937988281, + "logps/rejected": -173.6085662841797, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5411515831947327, + "rewards/margins": 6.178904056549072, + "rewards/rejected": -6.72005558013916, + "step": 1538 + }, + { + "epoch": 2.47, + "learning_rate": 4.0309155766944114e-07, + "logits/chosen": -1.6535563468933105, + "logits/rejected": -1.626357913017273, + "logps/chosen": -81.6829833984375, + "logps/rejected": -101.99337005615234, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5963294506072998, + "rewards/margins": 2.718825101852417, + "rewards/rejected": -3.315154552459717, + "step": 1539 + }, + { + "epoch": 2.47, + "learning_rate": 4.0299246928260004e-07, + "logits/chosen": -1.7715355157852173, + "logits/rejected": -1.717128038406372, + "logps/chosen": -104.78874969482422, + "logps/rejected": -124.2789535522461, + "loss": 0.104, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.814539909362793, + "rewards/margins": 2.5212225914001465, + "rewards/rejected": -4.3357625007629395, + "step": 1540 + }, + { + "epoch": 2.47, + "learning_rate": 4.0289338089575895e-07, + "logits/chosen": -1.700101375579834, + "logits/rejected": -1.6317527294158936, + "logps/chosen": -110.13068389892578, + "logps/rejected": -147.58824157714844, + "loss": 0.0831, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3511316776275635, + "rewards/margins": 5.090221405029297, + "rewards/rejected": -6.441352844238281, + "step": 1541 + }, + { + "epoch": 2.48, + "learning_rate": 4.0279429250891796e-07, + "logits/chosen": -1.9065167903900146, + "logits/rejected": -1.7883718013763428, + "logps/chosen": -86.64779663085938, + "logps/rejected": -135.6343231201172, + "loss": 0.0639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2539987564086914, + "rewards/margins": 6.186154365539551, + "rewards/rejected": -6.440153121948242, + "step": 1542 + }, + { + "epoch": 2.48, + "learning_rate": 4.0269520412207687e-07, + "logits/chosen": -1.6166367530822754, + "logits/rejected": -1.695024013519287, + "logps/chosen": -104.8846435546875, + "logps/rejected": -147.3314971923828, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7993589639663696, + "rewards/margins": 5.564014434814453, + "rewards/rejected": -6.363373279571533, + "step": 1543 + }, + { + "epoch": 2.48, + "learning_rate": 4.0259611573523583e-07, + "logits/chosen": -1.5517076253890991, + "logits/rejected": -1.5266845226287842, + "logps/chosen": -87.93705749511719, + "logps/rejected": -170.57333374023438, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6302905082702637, + "rewards/margins": 6.197275161743164, + "rewards/rejected": -5.566984176635742, + "step": 1544 + }, + { + "epoch": 2.48, + "learning_rate": 4.0249702734839474e-07, + "logits/chosen": -1.5856419801712036, + "logits/rejected": -1.6471059322357178, + "logps/chosen": -97.21983337402344, + "logps/rejected": -150.32290649414062, + "loss": 0.1302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8491491675376892, + "rewards/margins": 4.633986949920654, + "rewards/rejected": -5.4831366539001465, + "step": 1545 + }, + { + "epoch": 2.48, + "learning_rate": 4.0239793896155364e-07, + "logits/chosen": -1.6190826892852783, + "logits/rejected": -1.5980753898620605, + "logps/chosen": -108.26102447509766, + "logps/rejected": -135.6266326904297, + "loss": 0.1354, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9202570915222168, + "rewards/margins": 3.27478289604187, + "rewards/rejected": -5.195040225982666, + "step": 1546 + }, + { + "epoch": 2.48, + "learning_rate": 4.0229885057471266e-07, + "logits/chosen": -1.6052682399749756, + "logits/rejected": -1.6060292720794678, + "logps/chosen": -93.7086181640625, + "logps/rejected": -164.75408935546875, + "loss": 0.0449, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.161731481552124, + "rewards/margins": 5.436088562011719, + "rewards/rejected": -6.597820281982422, + "step": 1547 + }, + { + "epoch": 2.48, + "learning_rate": 4.0219976218787156e-07, + "logits/chosen": -1.6968213319778442, + "logits/rejected": -1.6516337394714355, + "logps/chosen": -107.81195068359375, + "logps/rejected": -149.43711853027344, + "loss": 0.1159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0313009023666382, + "rewards/margins": 6.073184967041016, + "rewards/rejected": -7.104485511779785, + "step": 1548 + }, + { + "epoch": 2.49, + "learning_rate": 4.021006738010305e-07, + "logits/chosen": -1.584862470626831, + "logits/rejected": -1.6257168054580688, + "logps/chosen": -91.61494445800781, + "logps/rejected": -134.11886596679688, + "loss": 0.1308, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.05718068778514862, + "rewards/margins": 3.3961164951324463, + "rewards/rejected": -3.4532973766326904, + "step": 1549 + }, + { + "epoch": 2.49, + "learning_rate": 4.0200158541418943e-07, + "logits/chosen": -1.5913203954696655, + "logits/rejected": -1.6108205318450928, + "logps/chosen": -82.05184936523438, + "logps/rejected": -151.38418579101562, + "loss": 0.05, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.029408574104309, + "rewards/margins": 6.295265197753906, + "rewards/rejected": -7.324673652648926, + "step": 1550 + }, + { + "epoch": 2.49, + "learning_rate": 4.0190249702734834e-07, + "logits/chosen": -1.6398818492889404, + "logits/rejected": -1.6251544952392578, + "logps/chosen": -71.79005432128906, + "logps/rejected": -116.27836608886719, + "loss": 0.1784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06548377871513367, + "rewards/margins": 4.368442058563232, + "rewards/rejected": -4.3029584884643555, + "step": 1551 + }, + { + "epoch": 2.49, + "learning_rate": 4.0180340864050735e-07, + "logits/chosen": -1.6087193489074707, + "logits/rejected": -1.5955302715301514, + "logps/chosen": -63.87066650390625, + "logps/rejected": -115.04846954345703, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12157170474529266, + "rewards/margins": 6.131241798400879, + "rewards/rejected": -6.009669303894043, + "step": 1552 + }, + { + "epoch": 2.49, + "learning_rate": 4.0170432025366626e-07, + "logits/chosen": -1.6494303941726685, + "logits/rejected": -1.742544412612915, + "logps/chosen": -86.73298645019531, + "logps/rejected": -138.45066833496094, + "loss": 0.1101, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.009513258934021, + "rewards/margins": 3.5808708667755127, + "rewards/rejected": -4.590384006500244, + "step": 1553 + }, + { + "epoch": 2.49, + "learning_rate": 4.016052318668252e-07, + "logits/chosen": -1.726935863494873, + "logits/rejected": -1.702909231185913, + "logps/chosen": -94.014892578125, + "logps/rejected": -174.6034393310547, + "loss": 0.1032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0027711391448975, + "rewards/margins": 8.567899703979492, + "rewards/rejected": -9.570670127868652, + "step": 1554 + }, + { + "epoch": 2.5, + "learning_rate": 4.015061434799841e-07, + "logits/chosen": -1.4530725479125977, + "logits/rejected": -1.584232211112976, + "logps/chosen": -109.99172973632812, + "logps/rejected": -148.54454040527344, + "loss": 0.1122, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4750698208808899, + "rewards/margins": 3.9654531478881836, + "rewards/rejected": -4.440523147583008, + "step": 1555 + }, + { + "epoch": 2.5, + "learning_rate": 4.0140705509314303e-07, + "logits/chosen": -1.640792965888977, + "logits/rejected": -1.6850590705871582, + "logps/chosen": -83.00566101074219, + "logps/rejected": -139.25643920898438, + "loss": 0.1624, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.630255937576294, + "rewards/margins": 4.776291370391846, + "rewards/rejected": -5.406547546386719, + "step": 1556 + }, + { + "epoch": 2.5, + "learning_rate": 4.01307966706302e-07, + "logits/chosen": -1.7018249034881592, + "logits/rejected": -1.6817395687103271, + "logps/chosen": -105.99757385253906, + "logps/rejected": -152.07693481445312, + "loss": 0.054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6234740018844604, + "rewards/margins": 4.7787322998046875, + "rewards/rejected": -5.402205944061279, + "step": 1557 + }, + { + "epoch": 2.5, + "learning_rate": 4.0120887831946095e-07, + "logits/chosen": -1.4879838228225708, + "logits/rejected": -1.6117268800735474, + "logps/chosen": -99.0791015625, + "logps/rejected": -160.3154754638672, + "loss": 0.1127, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1313610076904297, + "rewards/margins": 4.499762058258057, + "rewards/rejected": -5.631122589111328, + "step": 1558 + }, + { + "epoch": 2.5, + "learning_rate": 4.011097899326199e-07, + "logits/chosen": -1.5913962125778198, + "logits/rejected": -1.5591298341751099, + "logps/chosen": -100.78785705566406, + "logps/rejected": -154.5647735595703, + "loss": 0.0565, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.278231382369995, + "rewards/margins": 4.117045879364014, + "rewards/rejected": -6.39527702331543, + "step": 1559 + }, + { + "epoch": 2.5, + "learning_rate": 4.010107015457788e-07, + "logits/chosen": -1.7632079124450684, + "logits/rejected": -1.7530444860458374, + "logps/chosen": -98.27313995361328, + "logps/rejected": -142.61573791503906, + "loss": 0.2421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.828830361366272, + "rewards/margins": 5.477084636688232, + "rewards/rejected": -6.305914402008057, + "step": 1560 + }, + { + "epoch": 2.51, + "learning_rate": 4.009116131589377e-07, + "logits/chosen": -1.6196459531784058, + "logits/rejected": -1.6568164825439453, + "logps/chosen": -95.01111602783203, + "logps/rejected": -134.52291870117188, + "loss": 0.1845, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.5717241764068604, + "rewards/margins": 2.791172504425049, + "rewards/rejected": -4.362896919250488, + "step": 1561 + }, + { + "epoch": 2.51, + "learning_rate": 4.008125247720967e-07, + "logits/chosen": -1.7564078569412231, + "logits/rejected": -1.62039315700531, + "logps/chosen": -119.79519653320312, + "logps/rejected": -136.345703125, + "loss": 0.2036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.440758228302002, + "rewards/margins": 4.721499443054199, + "rewards/rejected": -6.162257671356201, + "step": 1562 + }, + { + "epoch": 2.51, + "learning_rate": 4.0071343638525564e-07, + "logits/chosen": -1.7026729583740234, + "logits/rejected": -1.648584008216858, + "logps/chosen": -91.40043640136719, + "logps/rejected": -147.43435668945312, + "loss": 0.0484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4549716114997864, + "rewards/margins": 6.302824020385742, + "rewards/rejected": -6.757795810699463, + "step": 1563 + }, + { + "epoch": 2.51, + "learning_rate": 4.006143479984146e-07, + "logits/chosen": -1.5825023651123047, + "logits/rejected": -1.497096061706543, + "logps/chosen": -100.11810302734375, + "logps/rejected": -157.0918426513672, + "loss": 0.0966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6713564395904541, + "rewards/margins": 4.074806213378906, + "rewards/rejected": -4.746163368225098, + "step": 1564 + }, + { + "epoch": 2.51, + "learning_rate": 4.005152596115735e-07, + "logits/chosen": -1.6536504030227661, + "logits/rejected": -1.6612420082092285, + "logps/chosen": -108.87899780273438, + "logps/rejected": -137.2091064453125, + "loss": 0.0652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7982808947563171, + "rewards/margins": 2.0841941833496094, + "rewards/rejected": -2.8824751377105713, + "step": 1565 + }, + { + "epoch": 2.51, + "learning_rate": 4.004161712247324e-07, + "logits/chosen": -1.727595329284668, + "logits/rejected": -1.6760400533676147, + "logps/chosen": -92.77307891845703, + "logps/rejected": -147.87680053710938, + "loss": 0.1908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9082643389701843, + "rewards/margins": 6.341095924377441, + "rewards/rejected": -7.24936056137085, + "step": 1566 + }, + { + "epoch": 2.52, + "learning_rate": 4.003170828378914e-07, + "logits/chosen": -1.530552625656128, + "logits/rejected": -1.4957835674285889, + "logps/chosen": -151.33584594726562, + "logps/rejected": -131.0881805419922, + "loss": 0.1157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8131237030029297, + "rewards/margins": 3.316512107849121, + "rewards/rejected": -4.129635810852051, + "step": 1567 + }, + { + "epoch": 2.52, + "learning_rate": 4.0021799445105033e-07, + "logits/chosen": -1.8309082984924316, + "logits/rejected": -1.774534821510315, + "logps/chosen": -98.18014526367188, + "logps/rejected": -150.80996704101562, + "loss": 0.123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.214583158493042, + "rewards/margins": 5.488448619842529, + "rewards/rejected": -6.70303201675415, + "step": 1568 + }, + { + "epoch": 2.52, + "learning_rate": 4.001189060642093e-07, + "logits/chosen": -1.5470460653305054, + "logits/rejected": -1.5462857484817505, + "logps/chosen": -88.42259216308594, + "logps/rejected": -139.3072052001953, + "loss": 0.1608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6979385614395142, + "rewards/margins": 4.841582775115967, + "rewards/rejected": -5.539521217346191, + "step": 1569 + }, + { + "epoch": 2.52, + "learning_rate": 4.000198176773682e-07, + "logits/chosen": -1.6583056449890137, + "logits/rejected": -1.6451210975646973, + "logps/chosen": -85.47271728515625, + "logps/rejected": -142.15225219726562, + "loss": 0.1357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.659433901309967, + "rewards/margins": 5.212213039398193, + "rewards/rejected": -5.871646881103516, + "step": 1570 + }, + { + "epoch": 2.52, + "learning_rate": 3.999207292905271e-07, + "logits/chosen": -1.6847831010818481, + "logits/rejected": -1.674706220626831, + "logps/chosen": -99.36507415771484, + "logps/rejected": -184.79019165039062, + "loss": 0.0757, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8249037265777588, + "rewards/margins": 7.190060138702393, + "rewards/rejected": -9.01496410369873, + "step": 1571 + }, + { + "epoch": 2.52, + "learning_rate": 3.9982164090368607e-07, + "logits/chosen": -1.5266740322113037, + "logits/rejected": -1.620421290397644, + "logps/chosen": -88.97676086425781, + "logps/rejected": -176.86361694335938, + "loss": 0.0823, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2191855907440186, + "rewards/margins": 5.633896827697754, + "rewards/rejected": -7.853082180023193, + "step": 1572 + }, + { + "epoch": 2.52, + "learning_rate": 3.9972255251684503e-07, + "logits/chosen": -1.6284303665161133, + "logits/rejected": -1.575615406036377, + "logps/chosen": -93.06710815429688, + "logps/rejected": -181.22596740722656, + "loss": 0.2806, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2214094400405884, + "rewards/margins": 5.594162940979004, + "rewards/rejected": -6.8155717849731445, + "step": 1573 + }, + { + "epoch": 2.53, + "learning_rate": 3.9962346413000393e-07, + "logits/chosen": -1.6996428966522217, + "logits/rejected": -1.7534018754959106, + "logps/chosen": -101.12648010253906, + "logps/rejected": -206.93905639648438, + "loss": 0.1393, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5073946714401245, + "rewards/margins": 7.869297027587891, + "rewards/rejected": -9.376691818237305, + "step": 1574 + }, + { + "epoch": 2.53, + "learning_rate": 3.995243757431629e-07, + "logits/chosen": -1.8097803592681885, + "logits/rejected": -1.6685130596160889, + "logps/chosen": -90.11126708984375, + "logps/rejected": -117.04840850830078, + "loss": 0.2093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36917388439178467, + "rewards/margins": 4.064358234405518, + "rewards/rejected": -4.433532238006592, + "step": 1575 + }, + { + "epoch": 2.53, + "learning_rate": 3.994252873563218e-07, + "logits/chosen": -1.6094155311584473, + "logits/rejected": -1.6077075004577637, + "logps/chosen": -91.07878112792969, + "logps/rejected": -126.92086029052734, + "loss": 0.1041, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.519827961921692, + "rewards/margins": 2.472761392593384, + "rewards/rejected": -3.9925894737243652, + "step": 1576 + }, + { + "epoch": 2.53, + "learning_rate": 3.9932619896948076e-07, + "logits/chosen": -1.5651779174804688, + "logits/rejected": -1.532698392868042, + "logps/chosen": -94.1179428100586, + "logps/rejected": -179.32701110839844, + "loss": 0.088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5506263971328735, + "rewards/margins": 6.633845806121826, + "rewards/rejected": -8.18447208404541, + "step": 1577 + }, + { + "epoch": 2.53, + "learning_rate": 3.9922711058263967e-07, + "logits/chosen": -1.5924263000488281, + "logits/rejected": -1.450516700744629, + "logps/chosen": -104.43159484863281, + "logps/rejected": -134.54637145996094, + "loss": 0.1261, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.454681396484375, + "rewards/margins": 4.193905353546143, + "rewards/rejected": -6.648587226867676, + "step": 1578 + }, + { + "epoch": 2.53, + "learning_rate": 3.9912802219579863e-07, + "logits/chosen": -1.4787063598632812, + "logits/rejected": -1.5085644721984863, + "logps/chosen": -71.24537658691406, + "logps/rejected": -135.27700805664062, + "loss": 0.1107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.253676176071167, + "rewards/margins": 4.544859409332275, + "rewards/rejected": -5.7985358238220215, + "step": 1579 + }, + { + "epoch": 2.54, + "learning_rate": 3.990289338089576e-07, + "logits/chosen": -1.5970096588134766, + "logits/rejected": -1.558953881263733, + "logps/chosen": -121.43826293945312, + "logps/rejected": -143.6401824951172, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.821876049041748, + "rewards/margins": 3.3712503910064697, + "rewards/rejected": -5.193126678466797, + "step": 1580 + }, + { + "epoch": 2.54, + "learning_rate": 3.989298454221165e-07, + "logits/chosen": -1.7156455516815186, + "logits/rejected": -1.7856324911117554, + "logps/chosen": -101.93797302246094, + "logps/rejected": -155.19033813476562, + "loss": 0.1442, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4950289726257324, + "rewards/margins": 3.09779953956604, + "rewards/rejected": -5.592828750610352, + "step": 1581 + }, + { + "epoch": 2.54, + "learning_rate": 3.9883075703527545e-07, + "logits/chosen": -1.8148295879364014, + "logits/rejected": -1.6176869869232178, + "logps/chosen": -110.08763885498047, + "logps/rejected": -125.24375915527344, + "loss": 0.0952, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5866113901138306, + "rewards/margins": 4.323555946350098, + "rewards/rejected": -5.910167217254639, + "step": 1582 + }, + { + "epoch": 2.54, + "learning_rate": 3.9873166864843436e-07, + "logits/chosen": -1.5242574214935303, + "logits/rejected": -1.649563193321228, + "logps/chosen": -66.35914611816406, + "logps/rejected": -134.16854858398438, + "loss": 0.0785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6833280324935913, + "rewards/margins": 5.45789909362793, + "rewards/rejected": -6.141226768493652, + "step": 1583 + }, + { + "epoch": 2.54, + "learning_rate": 3.986325802615933e-07, + "logits/chosen": -1.5379817485809326, + "logits/rejected": -1.5086207389831543, + "logps/chosen": -78.02264404296875, + "logps/rejected": -144.61300659179688, + "loss": 0.0984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40447044372558594, + "rewards/margins": 6.240036487579346, + "rewards/rejected": -6.64450740814209, + "step": 1584 + }, + { + "epoch": 2.54, + "learning_rate": 3.985334918747523e-07, + "logits/chosen": -1.5179221630096436, + "logits/rejected": -1.6315691471099854, + "logps/chosen": -90.12229919433594, + "logps/rejected": -145.6233367919922, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5580155849456787, + "rewards/margins": 2.5816168785095215, + "rewards/rejected": -4.139632701873779, + "step": 1585 + }, + { + "epoch": 2.55, + "learning_rate": 3.984344034879112e-07, + "logits/chosen": -1.600441813468933, + "logits/rejected": -1.620929479598999, + "logps/chosen": -110.63578796386719, + "logps/rejected": -170.987548828125, + "loss": 0.0912, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.124908447265625, + "rewards/margins": 5.0158891677856445, + "rewards/rejected": -6.140798091888428, + "step": 1586 + }, + { + "epoch": 2.55, + "learning_rate": 3.9833531510107015e-07, + "logits/chosen": -1.560804843902588, + "logits/rejected": -1.5974311828613281, + "logps/chosen": -79.38702392578125, + "logps/rejected": -143.84593200683594, + "loss": 0.1227, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.782480239868164, + "rewards/margins": 4.673681735992432, + "rewards/rejected": -6.456161975860596, + "step": 1587 + }, + { + "epoch": 2.55, + "learning_rate": 3.9823622671422905e-07, + "logits/chosen": -1.4683737754821777, + "logits/rejected": -1.4383587837219238, + "logps/chosen": -95.92796325683594, + "logps/rejected": -175.01858520507812, + "loss": 0.0487, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3718544244766235, + "rewards/margins": 7.066441535949707, + "rewards/rejected": -8.438295364379883, + "step": 1588 + }, + { + "epoch": 2.55, + "learning_rate": 3.98137138327388e-07, + "logits/chosen": -1.7322998046875, + "logits/rejected": -1.710734248161316, + "logps/chosen": -100.8972396850586, + "logps/rejected": -131.4492950439453, + "loss": 0.0557, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.247551441192627, + "rewards/margins": 3.1347036361694336, + "rewards/rejected": -6.3822550773620605, + "step": 1589 + }, + { + "epoch": 2.55, + "learning_rate": 3.9803804994054697e-07, + "logits/chosen": -1.7330348491668701, + "logits/rejected": -1.7054024934768677, + "logps/chosen": -111.55364990234375, + "logps/rejected": -131.46124267578125, + "loss": 0.1887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4935653805732727, + "rewards/margins": 4.084343433380127, + "rewards/rejected": -4.577908992767334, + "step": 1590 + }, + { + "epoch": 2.55, + "learning_rate": 3.979389615537059e-07, + "logits/chosen": -1.8348705768585205, + "logits/rejected": -1.613537311553955, + "logps/chosen": -118.7841796875, + "logps/rejected": -124.66348266601562, + "loss": 0.1372, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.255164623260498, + "rewards/margins": 2.899994373321533, + "rewards/rejected": -5.155158996582031, + "step": 1591 + }, + { + "epoch": 2.56, + "learning_rate": 3.9783987316686484e-07, + "logits/chosen": -1.633135437965393, + "logits/rejected": -1.589152216911316, + "logps/chosen": -86.32500457763672, + "logps/rejected": -143.56466674804688, + "loss": 0.0894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27792778611183167, + "rewards/margins": 5.609485626220703, + "rewards/rejected": -5.887413501739502, + "step": 1592 + }, + { + "epoch": 2.56, + "learning_rate": 3.9774078478002375e-07, + "logits/chosen": -1.6260837316513062, + "logits/rejected": -1.5902106761932373, + "logps/chosen": -110.70323181152344, + "logps/rejected": -133.01734924316406, + "loss": 0.1557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9546393156051636, + "rewards/margins": 3.7209715843200684, + "rewards/rejected": -4.6756110191345215, + "step": 1593 + }, + { + "epoch": 2.56, + "learning_rate": 3.976416963931827e-07, + "logits/chosen": -1.814721703529358, + "logits/rejected": -1.793402910232544, + "logps/chosen": -105.3807144165039, + "logps/rejected": -129.46099853515625, + "loss": 0.187, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3332536220550537, + "rewards/margins": 2.9713120460510254, + "rewards/rejected": -5.304565906524658, + "step": 1594 + }, + { + "epoch": 2.56, + "learning_rate": 3.9754260800634167e-07, + "logits/chosen": -1.599279761314392, + "logits/rejected": -1.6641438007354736, + "logps/chosen": -120.28355407714844, + "logps/rejected": -186.97964477539062, + "loss": 0.0938, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.368260622024536, + "rewards/margins": 6.632912635803223, + "rewards/rejected": -9.001173973083496, + "step": 1595 + }, + { + "epoch": 2.56, + "learning_rate": 3.9744351961950057e-07, + "logits/chosen": -1.7605721950531006, + "logits/rejected": -1.779759168624878, + "logps/chosen": -87.25627136230469, + "logps/rejected": -174.69989013671875, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6949288845062256, + "rewards/margins": 7.828545570373535, + "rewards/rejected": -8.523473739624023, + "step": 1596 + }, + { + "epoch": 2.56, + "learning_rate": 3.9734443123265953e-07, + "logits/chosen": -1.7103278636932373, + "logits/rejected": -1.6767277717590332, + "logps/chosen": -90.26966857910156, + "logps/rejected": -139.5058135986328, + "loss": 0.1485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27963343262672424, + "rewards/margins": 6.556266784667969, + "rewards/rejected": -6.835899353027344, + "step": 1597 + }, + { + "epoch": 2.57, + "learning_rate": 3.9724534284581844e-07, + "logits/chosen": -1.6779993772506714, + "logits/rejected": -1.6295992136001587, + "logps/chosen": -112.949951171875, + "logps/rejected": -138.36692810058594, + "loss": 0.0847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3521217405796051, + "rewards/margins": 3.686206579208374, + "rewards/rejected": -4.038328647613525, + "step": 1598 + }, + { + "epoch": 2.57, + "learning_rate": 3.9714625445897735e-07, + "logits/chosen": -1.8347086906433105, + "logits/rejected": -1.7368643283843994, + "logps/chosen": -117.81710815429688, + "logps/rejected": -162.89442443847656, + "loss": 0.2033, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3300954103469849, + "rewards/margins": 5.813445091247559, + "rewards/rejected": -7.143540382385254, + "step": 1599 + }, + { + "epoch": 2.57, + "learning_rate": 3.9704716607213636e-07, + "logits/chosen": -1.7208443880081177, + "logits/rejected": -1.6522516012191772, + "logps/chosen": -106.51007080078125, + "logps/rejected": -142.56048583984375, + "loss": 0.0683, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1910394430160522, + "rewards/margins": 4.712832450866699, + "rewards/rejected": -5.903872013092041, + "step": 1600 + }, + { + "epoch": 2.57, + "learning_rate": 3.9694807768529527e-07, + "logits/chosen": -1.5650928020477295, + "logits/rejected": -1.6778311729431152, + "logps/chosen": -113.66629028320312, + "logps/rejected": -183.86599731445312, + "loss": 0.0942, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6565715074539185, + "rewards/margins": 6.361757278442383, + "rewards/rejected": -8.018327713012695, + "step": 1601 + }, + { + "epoch": 2.57, + "learning_rate": 3.968489892984542e-07, + "logits/chosen": -1.7224628925323486, + "logits/rejected": -1.7024729251861572, + "logps/chosen": -136.17758178710938, + "logps/rejected": -168.82843017578125, + "loss": 0.0979, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0839645862579346, + "rewards/margins": 3.2652587890625, + "rewards/rejected": -5.349223613739014, + "step": 1602 + }, + { + "epoch": 2.57, + "learning_rate": 3.9674990091161313e-07, + "logits/chosen": -1.631959080696106, + "logits/rejected": -1.5801258087158203, + "logps/chosen": -99.63860321044922, + "logps/rejected": -133.9796142578125, + "loss": 0.0752, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2639216184616089, + "rewards/margins": 3.943310022354126, + "rewards/rejected": -5.207231521606445, + "step": 1603 + }, + { + "epoch": 2.57, + "learning_rate": 3.9665081252477204e-07, + "logits/chosen": -1.6479599475860596, + "logits/rejected": -1.7490185499191284, + "logps/chosen": -70.61756134033203, + "logps/rejected": -124.90959167480469, + "loss": 0.0889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9974884986877441, + "rewards/margins": 3.7811386585235596, + "rewards/rejected": -4.778626918792725, + "step": 1604 + }, + { + "epoch": 2.58, + "learning_rate": 3.9655172413793105e-07, + "logits/chosen": -1.6024096012115479, + "logits/rejected": -1.698488473892212, + "logps/chosen": -79.61404418945312, + "logps/rejected": -158.06600952148438, + "loss": 0.0832, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7447934150695801, + "rewards/margins": 5.869002342224121, + "rewards/rejected": -6.613795280456543, + "step": 1605 + }, + { + "epoch": 2.58, + "learning_rate": 3.9645263575108996e-07, + "logits/chosen": -1.7745715379714966, + "logits/rejected": -1.7692279815673828, + "logps/chosen": -107.55937194824219, + "logps/rejected": -131.77157592773438, + "loss": 0.2429, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.4012285470962524, + "rewards/margins": 3.2497127056121826, + "rewards/rejected": -4.650940895080566, + "step": 1606 + }, + { + "epoch": 2.58, + "learning_rate": 3.963535473642489e-07, + "logits/chosen": -1.7383382320404053, + "logits/rejected": -1.7177146673202515, + "logps/chosen": -94.09730529785156, + "logps/rejected": -142.09109497070312, + "loss": 0.1086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6904318928718567, + "rewards/margins": 4.734926223754883, + "rewards/rejected": -5.425358295440674, + "step": 1607 + }, + { + "epoch": 2.58, + "learning_rate": 3.962544589774078e-07, + "logits/chosen": -1.6292829513549805, + "logits/rejected": -1.6647508144378662, + "logps/chosen": -78.71209716796875, + "logps/rejected": -140.18771362304688, + "loss": 0.0711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3100743889808655, + "rewards/margins": 4.240592956542969, + "rewards/rejected": -4.550667762756348, + "step": 1608 + }, + { + "epoch": 2.58, + "learning_rate": 3.9615537059056673e-07, + "logits/chosen": -1.6707358360290527, + "logits/rejected": -1.6981303691864014, + "logps/chosen": -96.77862548828125, + "logps/rejected": -148.6749267578125, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8654155731201172, + "rewards/margins": 5.460432052612305, + "rewards/rejected": -6.325847625732422, + "step": 1609 + }, + { + "epoch": 2.58, + "learning_rate": 3.9605628220372574e-07, + "logits/chosen": -1.7408658266067505, + "logits/rejected": -1.7753620147705078, + "logps/chosen": -87.6339111328125, + "logps/rejected": -158.48348999023438, + "loss": 0.0727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9760258197784424, + "rewards/margins": 5.855222702026367, + "rewards/rejected": -6.831248760223389, + "step": 1610 + }, + { + "epoch": 2.59, + "learning_rate": 3.9595719381688465e-07, + "logits/chosen": -1.6654293537139893, + "logits/rejected": -1.7186510562896729, + "logps/chosen": -83.20941162109375, + "logps/rejected": -145.1309814453125, + "loss": 0.097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16288454830646515, + "rewards/margins": 7.108290195465088, + "rewards/rejected": -7.271175384521484, + "step": 1611 + }, + { + "epoch": 2.59, + "learning_rate": 3.9585810543004356e-07, + "logits/chosen": -1.8012669086456299, + "logits/rejected": -1.6597280502319336, + "logps/chosen": -108.53759765625, + "logps/rejected": -146.98770141601562, + "loss": 0.0999, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2267045974731445, + "rewards/margins": 6.570436477661133, + "rewards/rejected": -7.797141075134277, + "step": 1612 + }, + { + "epoch": 2.59, + "learning_rate": 3.957590170432025e-07, + "logits/chosen": -1.7769131660461426, + "logits/rejected": -1.8239631652832031, + "logps/chosen": -105.0603256225586, + "logps/rejected": -148.65687561035156, + "loss": 0.1884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6336904764175415, + "rewards/margins": 4.147021293640137, + "rewards/rejected": -4.780711650848389, + "step": 1613 + }, + { + "epoch": 2.59, + "learning_rate": 3.956599286563614e-07, + "logits/chosen": -1.4476923942565918, + "logits/rejected": -1.4214524030685425, + "logps/chosen": -84.91102600097656, + "logps/rejected": -151.73471069335938, + "loss": 0.0657, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0513105392456055, + "rewards/margins": 5.665750503540039, + "rewards/rejected": -6.717061519622803, + "step": 1614 + }, + { + "epoch": 2.59, + "learning_rate": 3.9556084026952044e-07, + "logits/chosen": -1.619481086730957, + "logits/rejected": -1.5780150890350342, + "logps/chosen": -142.35537719726562, + "logps/rejected": -153.94061279296875, + "loss": 0.0751, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8274528980255127, + "rewards/margins": 4.383543968200684, + "rewards/rejected": -7.210997104644775, + "step": 1615 + }, + { + "epoch": 2.59, + "learning_rate": 3.9546175188267934e-07, + "logits/chosen": -1.7960684299468994, + "logits/rejected": -1.7529315948486328, + "logps/chosen": -100.99296569824219, + "logps/rejected": -152.49417114257812, + "loss": 0.0885, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2951539754867554, + "rewards/margins": 4.537778854370117, + "rewards/rejected": -5.83293342590332, + "step": 1616 + }, + { + "epoch": 2.6, + "learning_rate": 3.9536266349583825e-07, + "logits/chosen": -1.6651270389556885, + "logits/rejected": -1.661226749420166, + "logps/chosen": -103.29608154296875, + "logps/rejected": -140.9802703857422, + "loss": 0.1844, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9721553325653076, + "rewards/margins": 3.407167673110962, + "rewards/rejected": -5.3793230056762695, + "step": 1617 + }, + { + "epoch": 2.6, + "learning_rate": 3.952635751089972e-07, + "logits/chosen": -1.732515811920166, + "logits/rejected": -1.6859455108642578, + "logps/chosen": -113.89671325683594, + "logps/rejected": -159.95726013183594, + "loss": 0.2015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.439382553100586, + "rewards/margins": 5.094992637634277, + "rewards/rejected": -6.534374713897705, + "step": 1618 + }, + { + "epoch": 2.6, + "learning_rate": 3.951644867221561e-07, + "logits/chosen": -1.601952075958252, + "logits/rejected": -1.5908163785934448, + "logps/chosen": -73.8998031616211, + "logps/rejected": -127.65390014648438, + "loss": 0.2232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1103142499923706, + "rewards/margins": 4.05513334274292, + "rewards/rejected": -5.165447235107422, + "step": 1619 + }, + { + "epoch": 2.6, + "learning_rate": 3.950653983353151e-07, + "logits/chosen": -1.6120493412017822, + "logits/rejected": -1.571523666381836, + "logps/chosen": -109.36521911621094, + "logps/rejected": -146.45326232910156, + "loss": 0.2561, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.514857053756714, + "rewards/margins": 4.719993591308594, + "rewards/rejected": -7.2348504066467285, + "step": 1620 + }, + { + "epoch": 2.6, + "learning_rate": 3.9496630994847404e-07, + "logits/chosen": -1.6132532358169556, + "logits/rejected": -1.6651500463485718, + "logps/chosen": -120.26191711425781, + "logps/rejected": -191.63925170898438, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6146912574768066, + "rewards/margins": 6.109132289886475, + "rewards/rejected": -8.723823547363281, + "step": 1621 + }, + { + "epoch": 2.6, + "learning_rate": 3.9486722156163294e-07, + "logits/chosen": -1.7061753273010254, + "logits/rejected": -1.670372724533081, + "logps/chosen": -89.03148651123047, + "logps/rejected": -149.909912109375, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9480960369110107, + "rewards/margins": 5.918118000030518, + "rewards/rejected": -6.866213798522949, + "step": 1622 + }, + { + "epoch": 2.61, + "learning_rate": 3.947681331747919e-07, + "logits/chosen": -1.4570914506912231, + "logits/rejected": -1.540759563446045, + "logps/chosen": -87.44071197509766, + "logps/rejected": -191.045654296875, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6355862617492676, + "rewards/margins": 6.893369674682617, + "rewards/rejected": -8.528956413269043, + "step": 1623 + }, + { + "epoch": 2.61, + "learning_rate": 3.946690447879508e-07, + "logits/chosen": -1.5185577869415283, + "logits/rejected": -1.449926495552063, + "logps/chosen": -83.57771301269531, + "logps/rejected": -137.5116729736328, + "loss": 0.0589, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6756622791290283, + "rewards/margins": 5.110085487365723, + "rewards/rejected": -6.785747528076172, + "step": 1624 + }, + { + "epoch": 2.61, + "learning_rate": 3.9456995640110977e-07, + "logits/chosen": -1.5396792888641357, + "logits/rejected": -1.5881719589233398, + "logps/chosen": -121.74185943603516, + "logps/rejected": -178.02171325683594, + "loss": 0.0906, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.811814308166504, + "rewards/margins": 5.857349395751953, + "rewards/rejected": -8.669163703918457, + "step": 1625 + }, + { + "epoch": 2.61, + "learning_rate": 3.9447086801426873e-07, + "logits/chosen": -1.6754686832427979, + "logits/rejected": -1.6324403285980225, + "logps/chosen": -100.95988464355469, + "logps/rejected": -142.52105712890625, + "loss": 0.2119, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5382416248321533, + "rewards/margins": 3.7480950355529785, + "rewards/rejected": -5.286336898803711, + "step": 1626 + }, + { + "epoch": 2.61, + "learning_rate": 3.9437177962742764e-07, + "logits/chosen": -1.6739939451217651, + "logits/rejected": -1.6295630931854248, + "logps/chosen": -110.15270233154297, + "logps/rejected": -150.19964599609375, + "loss": 0.1191, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1927270889282227, + "rewards/margins": 4.530815601348877, + "rewards/rejected": -6.7235426902771, + "step": 1627 + }, + { + "epoch": 2.61, + "learning_rate": 3.942726912405866e-07, + "logits/chosen": -1.6827847957611084, + "logits/rejected": -1.7039899826049805, + "logps/chosen": -100.11036682128906, + "logps/rejected": -111.03311157226562, + "loss": 0.111, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9681674242019653, + "rewards/margins": 1.950844168663025, + "rewards/rejected": -2.9190115928649902, + "step": 1628 + }, + { + "epoch": 2.61, + "learning_rate": 3.941736028537455e-07, + "logits/chosen": -1.7222727537155151, + "logits/rejected": -1.6499079465866089, + "logps/chosen": -112.16838073730469, + "logps/rejected": -167.1329803466797, + "loss": 0.0879, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.204603910446167, + "rewards/margins": 6.814640045166016, + "rewards/rejected": -8.019244194030762, + "step": 1629 + }, + { + "epoch": 2.62, + "learning_rate": 3.9407451446690446e-07, + "logits/chosen": -1.5573862791061401, + "logits/rejected": -1.5188207626342773, + "logps/chosen": -90.4585189819336, + "logps/rejected": -136.2434539794922, + "loss": 0.0486, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6311439275741577, + "rewards/margins": 2.859203338623047, + "rewards/rejected": -4.490347385406494, + "step": 1630 + }, + { + "epoch": 2.62, + "learning_rate": 3.939754260800634e-07, + "logits/chosen": -1.667012095451355, + "logits/rejected": -1.673702597618103, + "logps/chosen": -109.96087646484375, + "logps/rejected": -165.41712951660156, + "loss": 0.0994, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1286392211914062, + "rewards/margins": 6.292535781860352, + "rewards/rejected": -9.421175003051758, + "step": 1631 + }, + { + "epoch": 2.62, + "learning_rate": 3.9387633769322233e-07, + "logits/chosen": -1.6863280534744263, + "logits/rejected": -1.6358871459960938, + "logps/chosen": -101.52421569824219, + "logps/rejected": -137.13604736328125, + "loss": 0.2113, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3501996994018555, + "rewards/margins": 4.562976360321045, + "rewards/rejected": -6.9131760597229, + "step": 1632 + }, + { + "epoch": 2.62, + "learning_rate": 3.937772493063813e-07, + "logits/chosen": -1.7107231616973877, + "logits/rejected": -1.6725667715072632, + "logps/chosen": -107.75257873535156, + "logps/rejected": -139.10394287109375, + "loss": 0.0953, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8070709705352783, + "rewards/margins": 3.5965301990509033, + "rewards/rejected": -5.403601169586182, + "step": 1633 + }, + { + "epoch": 2.62, + "learning_rate": 3.936781609195402e-07, + "logits/chosen": -1.7599291801452637, + "logits/rejected": -1.8071138858795166, + "logps/chosen": -117.89910888671875, + "logps/rejected": -148.1265411376953, + "loss": 0.3147, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7654922008514404, + "rewards/margins": 3.4320321083068848, + "rewards/rejected": -5.197524547576904, + "step": 1634 + }, + { + "epoch": 2.62, + "learning_rate": 3.9357907253269916e-07, + "logits/chosen": -1.8023946285247803, + "logits/rejected": -1.7016016244888306, + "logps/chosen": -109.1348876953125, + "logps/rejected": -132.32748413085938, + "loss": 0.0763, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.310671329498291, + "rewards/margins": 4.045626163482666, + "rewards/rejected": -5.356297492980957, + "step": 1635 + }, + { + "epoch": 2.63, + "learning_rate": 3.934799841458581e-07, + "logits/chosen": -1.6300444602966309, + "logits/rejected": -1.7276816368103027, + "logps/chosen": -87.09320831298828, + "logps/rejected": -163.82447814941406, + "loss": 0.1155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5919895172119141, + "rewards/margins": 4.596068859100342, + "rewards/rejected": -5.188058376312256, + "step": 1636 + }, + { + "epoch": 2.63, + "learning_rate": 3.93380895759017e-07, + "logits/chosen": -1.719331979751587, + "logits/rejected": -1.8132953643798828, + "logps/chosen": -68.77217102050781, + "logps/rejected": -135.25283813476562, + "loss": 0.103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9111250638961792, + "rewards/margins": 4.569169044494629, + "rewards/rejected": -5.480294227600098, + "step": 1637 + }, + { + "epoch": 2.63, + "learning_rate": 3.93281807372176e-07, + "logits/chosen": -1.542251706123352, + "logits/rejected": -1.569412112236023, + "logps/chosen": -95.82078552246094, + "logps/rejected": -145.8072967529297, + "loss": 0.1139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13409289717674255, + "rewards/margins": 5.254614353179932, + "rewards/rejected": -5.388707160949707, + "step": 1638 + }, + { + "epoch": 2.63, + "learning_rate": 3.931827189853349e-07, + "logits/chosen": -1.65717351436615, + "logits/rejected": -1.6844940185546875, + "logps/chosen": -105.9080581665039, + "logps/rejected": -177.64984130859375, + "loss": 0.0697, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2233009338378906, + "rewards/margins": 6.619023323059082, + "rewards/rejected": -7.842324256896973, + "step": 1639 + }, + { + "epoch": 2.63, + "learning_rate": 3.9308363059849385e-07, + "logits/chosen": -1.7214059829711914, + "logits/rejected": -1.6457960605621338, + "logps/chosen": -112.06330871582031, + "logps/rejected": -136.50428771972656, + "loss": 0.1834, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.821772813796997, + "rewards/margins": 4.65543794631958, + "rewards/rejected": -6.4772114753723145, + "step": 1640 + }, + { + "epoch": 2.63, + "learning_rate": 3.9298454221165276e-07, + "logits/chosen": -1.6323341131210327, + "logits/rejected": -1.721718192100525, + "logps/chosen": -83.76187133789062, + "logps/rejected": -139.0671844482422, + "loss": 0.3235, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.34867894649505615, + "rewards/margins": 4.016147613525391, + "rewards/rejected": -4.364826679229736, + "step": 1641 + }, + { + "epoch": 2.64, + "learning_rate": 3.928854538248117e-07, + "logits/chosen": -1.7433383464813232, + "logits/rejected": -1.6081993579864502, + "logps/chosen": -105.66068267822266, + "logps/rejected": -129.79730224609375, + "loss": 0.2014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4628137946128845, + "rewards/margins": 4.714909076690674, + "rewards/rejected": -5.177722930908203, + "step": 1642 + }, + { + "epoch": 2.64, + "learning_rate": 3.927863654379707e-07, + "logits/chosen": -1.6566319465637207, + "logits/rejected": -1.6039808988571167, + "logps/chosen": -97.6040267944336, + "logps/rejected": -179.27122497558594, + "loss": 0.0803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6683167815208435, + "rewards/margins": 7.4173383712768555, + "rewards/rejected": -8.085655212402344, + "step": 1643 + }, + { + "epoch": 2.64, + "learning_rate": 3.926872770511296e-07, + "logits/chosen": -1.64591646194458, + "logits/rejected": -1.6679062843322754, + "logps/chosen": -90.45459747314453, + "logps/rejected": -126.66781616210938, + "loss": 0.1631, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.577660083770752, + "rewards/margins": 4.666001796722412, + "rewards/rejected": -6.243661403656006, + "step": 1644 + }, + { + "epoch": 2.64, + "learning_rate": 3.925881886642885e-07, + "logits/chosen": -1.5339325666427612, + "logits/rejected": -1.5786104202270508, + "logps/chosen": -79.47030639648438, + "logps/rejected": -158.0842742919922, + "loss": 0.0704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11824533343315125, + "rewards/margins": 6.55366325378418, + "rewards/rejected": -6.435418128967285, + "step": 1645 + }, + { + "epoch": 2.64, + "learning_rate": 3.9248910027744745e-07, + "logits/chosen": -1.686988115310669, + "logits/rejected": -1.5854737758636475, + "logps/chosen": -86.6767807006836, + "logps/rejected": -138.66416931152344, + "loss": 0.081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8170170187950134, + "rewards/margins": 4.332840919494629, + "rewards/rejected": -5.149857997894287, + "step": 1646 + }, + { + "epoch": 2.64, + "learning_rate": 3.923900118906064e-07, + "logits/chosen": -1.768094778060913, + "logits/rejected": -1.7337628602981567, + "logps/chosen": -66.6224136352539, + "logps/rejected": -152.81776428222656, + "loss": 0.1206, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2800143957138062, + "rewards/margins": 7.653653144836426, + "rewards/rejected": -8.933667182922363, + "step": 1647 + }, + { + "epoch": 2.65, + "learning_rate": 3.9229092350376537e-07, + "logits/chosen": -1.5408319234848022, + "logits/rejected": -1.6437922716140747, + "logps/chosen": -65.5640869140625, + "logps/rejected": -136.1582794189453, + "loss": 0.1353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12592744827270508, + "rewards/margins": 4.883445739746094, + "rewards/rejected": -5.009373188018799, + "step": 1648 + }, + { + "epoch": 2.65, + "learning_rate": 3.921918351169243e-07, + "logits/chosen": -1.719387173652649, + "logits/rejected": -1.730057716369629, + "logps/chosen": -103.0885238647461, + "logps/rejected": -122.94817352294922, + "loss": 0.3114, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0969889163970947, + "rewards/margins": 1.1272996664047241, + "rewards/rejected": -2.2242887020111084, + "step": 1649 + }, + { + "epoch": 2.65, + "learning_rate": 3.920927467300832e-07, + "logits/chosen": -1.6782052516937256, + "logits/rejected": -1.6316736936569214, + "logps/chosen": -104.43545532226562, + "logps/rejected": -124.58712768554688, + "loss": 0.0916, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.75673246383667, + "rewards/margins": 2.613795042037964, + "rewards/rejected": -5.370527267456055, + "step": 1650 + }, + { + "epoch": 2.65, + "learning_rate": 3.9199365834324214e-07, + "logits/chosen": -1.6421464681625366, + "logits/rejected": -1.6272259950637817, + "logps/chosen": -107.66386413574219, + "logps/rejected": -150.99456787109375, + "loss": 0.0742, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6330490112304688, + "rewards/margins": 4.535783290863037, + "rewards/rejected": -6.168832302093506, + "step": 1651 + }, + { + "epoch": 2.65, + "learning_rate": 3.918945699564011e-07, + "logits/chosen": -1.7215654850006104, + "logits/rejected": -1.7525396347045898, + "logps/chosen": -118.01673126220703, + "logps/rejected": -156.47991943359375, + "loss": 0.1416, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0125925540924072, + "rewards/margins": 3.54396915435791, + "rewards/rejected": -6.5565619468688965, + "step": 1652 + }, + { + "epoch": 2.65, + "learning_rate": 3.9179548156956006e-07, + "logits/chosen": -1.737659215927124, + "logits/rejected": -1.725931167602539, + "logps/chosen": -93.76824188232422, + "logps/rejected": -154.07403564453125, + "loss": 0.0539, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9895079135894775, + "rewards/margins": 6.329216003417969, + "rewards/rejected": -8.318724632263184, + "step": 1653 + }, + { + "epoch": 2.65, + "learning_rate": 3.9169639318271897e-07, + "logits/chosen": -1.5703163146972656, + "logits/rejected": -1.6204674243927002, + "logps/chosen": -68.430908203125, + "logps/rejected": -154.0151824951172, + "loss": 0.2045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7096720337867737, + "rewards/margins": 7.791020393371582, + "rewards/rejected": -8.500692367553711, + "step": 1654 + }, + { + "epoch": 2.66, + "learning_rate": 3.915973047958779e-07, + "logits/chosen": -1.654592752456665, + "logits/rejected": -1.6313996315002441, + "logps/chosen": -102.76555633544922, + "logps/rejected": -154.89437866210938, + "loss": 0.049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35646411776542664, + "rewards/margins": 4.78443717956543, + "rewards/rejected": -5.140901565551758, + "step": 1655 + }, + { + "epoch": 2.66, + "learning_rate": 3.9149821640903683e-07, + "logits/chosen": -1.7689440250396729, + "logits/rejected": -1.6736929416656494, + "logps/chosen": -86.84187316894531, + "logps/rejected": -120.33537292480469, + "loss": 0.1224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6355079412460327, + "rewards/margins": 3.3994030952453613, + "rewards/rejected": -4.034911155700684, + "step": 1656 + }, + { + "epoch": 2.66, + "learning_rate": 3.913991280221958e-07, + "logits/chosen": -1.5503085851669312, + "logits/rejected": -1.580747127532959, + "logps/chosen": -83.42243957519531, + "logps/rejected": -134.90499877929688, + "loss": 0.0931, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0180339813232422, + "rewards/margins": 4.432450771331787, + "rewards/rejected": -5.450484752655029, + "step": 1657 + }, + { + "epoch": 2.66, + "learning_rate": 3.9130003963535475e-07, + "logits/chosen": -1.7036112546920776, + "logits/rejected": -1.6509032249450684, + "logps/chosen": -73.76993560791016, + "logps/rejected": -139.8185272216797, + "loss": 0.1175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5025233030319214, + "rewards/margins": 6.139401435852051, + "rewards/rejected": -6.641924858093262, + "step": 1658 + }, + { + "epoch": 2.66, + "learning_rate": 3.9120095124851366e-07, + "logits/chosen": -1.7205610275268555, + "logits/rejected": -1.6802823543548584, + "logps/chosen": -106.723876953125, + "logps/rejected": -154.71156311035156, + "loss": 0.1218, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1316072940826416, + "rewards/margins": 4.3703765869140625, + "rewards/rejected": -5.501983642578125, + "step": 1659 + }, + { + "epoch": 2.66, + "learning_rate": 3.9110186286167257e-07, + "logits/chosen": -1.7646477222442627, + "logits/rejected": -1.6980911493301392, + "logps/chosen": -108.64151763916016, + "logps/rejected": -150.76380920410156, + "loss": 0.0624, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2869582176208496, + "rewards/margins": 2.9378268718719482, + "rewards/rejected": -4.224785327911377, + "step": 1660 + }, + { + "epoch": 2.67, + "learning_rate": 3.9100277447483153e-07, + "logits/chosen": -1.604628086090088, + "logits/rejected": -1.5901622772216797, + "logps/chosen": -127.52660369873047, + "logps/rejected": -163.03546142578125, + "loss": 0.1586, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7445952892303467, + "rewards/margins": 4.948258876800537, + "rewards/rejected": -6.692853927612305, + "step": 1661 + }, + { + "epoch": 2.67, + "learning_rate": 3.9090368608799043e-07, + "logits/chosen": -1.60129714012146, + "logits/rejected": -1.5919628143310547, + "logps/chosen": -102.20618438720703, + "logps/rejected": -151.06893920898438, + "loss": 0.1045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35329896211624146, + "rewards/margins": 4.015526294708252, + "rewards/rejected": -4.368825435638428, + "step": 1662 + }, + { + "epoch": 2.67, + "learning_rate": 3.9080459770114945e-07, + "logits/chosen": -1.763448715209961, + "logits/rejected": -1.712659478187561, + "logps/chosen": -99.34208679199219, + "logps/rejected": -130.9228057861328, + "loss": 0.0889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3112873136997223, + "rewards/margins": 3.0040805339813232, + "rewards/rejected": -3.3153679370880127, + "step": 1663 + }, + { + "epoch": 2.67, + "learning_rate": 3.9070550931430835e-07, + "logits/chosen": -1.66643488407135, + "logits/rejected": -1.6019322872161865, + "logps/chosen": -107.14747619628906, + "logps/rejected": -113.06047058105469, + "loss": 0.2551, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.734014868736267, + "rewards/margins": 2.1405088901519775, + "rewards/rejected": -3.874523639678955, + "step": 1664 + }, + { + "epoch": 2.67, + "learning_rate": 3.9060642092746726e-07, + "logits/chosen": -1.453410029411316, + "logits/rejected": -1.5284217596054077, + "logps/chosen": -62.22781753540039, + "logps/rejected": -102.10479736328125, + "loss": 0.3672, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.4065132141113281, + "rewards/margins": 1.8811142444610596, + "rewards/rejected": -3.2876274585723877, + "step": 1665 + }, + { + "epoch": 2.67, + "learning_rate": 3.905073325406262e-07, + "logits/chosen": -1.6353001594543457, + "logits/rejected": -1.6529649496078491, + "logps/chosen": -105.6570816040039, + "logps/rejected": -144.5390625, + "loss": 0.1454, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5073972940444946, + "rewards/margins": 2.818286895751953, + "rewards/rejected": -4.325684070587158, + "step": 1666 + }, + { + "epoch": 2.68, + "learning_rate": 3.9040824415378513e-07, + "logits/chosen": -1.5287467241287231, + "logits/rejected": -1.507435917854309, + "logps/chosen": -96.50396728515625, + "logps/rejected": -127.56505584716797, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.232048273086548, + "rewards/margins": 3.746950149536133, + "rewards/rejected": -5.97899866104126, + "step": 1667 + }, + { + "epoch": 2.68, + "learning_rate": 3.9030915576694414e-07, + "logits/chosen": -1.5859839916229248, + "logits/rejected": -1.5464189052581787, + "logps/chosen": -96.02423095703125, + "logps/rejected": -126.1764144897461, + "loss": 0.0788, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1699787378311157, + "rewards/margins": 3.4963724613189697, + "rewards/rejected": -4.666351318359375, + "step": 1668 + }, + { + "epoch": 2.68, + "learning_rate": 3.9021006738010305e-07, + "logits/chosen": -1.600571632385254, + "logits/rejected": -1.582901954650879, + "logps/chosen": -92.39420318603516, + "logps/rejected": -130.1382293701172, + "loss": 0.0754, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4282160997390747, + "rewards/margins": 3.8117406368255615, + "rewards/rejected": -5.239956855773926, + "step": 1669 + }, + { + "epoch": 2.68, + "learning_rate": 3.9011097899326195e-07, + "logits/chosen": -1.623052954673767, + "logits/rejected": -1.7397581338882446, + "logps/chosen": -90.87034606933594, + "logps/rejected": -141.79522705078125, + "loss": 0.0644, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7627394199371338, + "rewards/margins": 3.422524929046631, + "rewards/rejected": -5.185264587402344, + "step": 1670 + }, + { + "epoch": 2.68, + "learning_rate": 3.900118906064209e-07, + "logits/chosen": -1.4709466695785522, + "logits/rejected": -1.6833181381225586, + "logps/chosen": -81.34078216552734, + "logps/rejected": -166.14144897460938, + "loss": 0.0559, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.602898120880127, + "rewards/margins": 4.807818412780762, + "rewards/rejected": -6.4107160568237305, + "step": 1671 + }, + { + "epoch": 2.68, + "learning_rate": 3.899128022195798e-07, + "logits/chosen": -1.5289719104766846, + "logits/rejected": -1.4617083072662354, + "logps/chosen": -96.2445297241211, + "logps/rejected": -168.16632080078125, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8574463129043579, + "rewards/margins": 7.282916069030762, + "rewards/rejected": -8.140362739562988, + "step": 1672 + }, + { + "epoch": 2.69, + "learning_rate": 3.8981371383273883e-07, + "logits/chosen": -1.6871217489242554, + "logits/rejected": -1.5598878860473633, + "logps/chosen": -122.20726776123047, + "logps/rejected": -152.1883544921875, + "loss": 0.2009, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.95392906665802, + "rewards/margins": 3.9447951316833496, + "rewards/rejected": -5.898724555969238, + "step": 1673 + }, + { + "epoch": 2.69, + "learning_rate": 3.8971462544589774e-07, + "logits/chosen": -1.5609062910079956, + "logits/rejected": -1.6250025033950806, + "logps/chosen": -62.397216796875, + "logps/rejected": -127.29566955566406, + "loss": 0.0648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.802670955657959, + "rewards/margins": 4.677715301513672, + "rewards/rejected": -5.480386734008789, + "step": 1674 + }, + { + "epoch": 2.69, + "learning_rate": 3.8961553705905665e-07, + "logits/chosen": -1.766790509223938, + "logits/rejected": -1.5937047004699707, + "logps/chosen": -101.36880493164062, + "logps/rejected": -144.26031494140625, + "loss": 0.1347, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9222066402435303, + "rewards/margins": 4.550838470458984, + "rewards/rejected": -6.4730448722839355, + "step": 1675 + }, + { + "epoch": 2.69, + "learning_rate": 3.895164486722156e-07, + "logits/chosen": -1.495752215385437, + "logits/rejected": -1.5141485929489136, + "logps/chosen": -95.90216064453125, + "logps/rejected": -169.1507568359375, + "loss": 0.0714, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0071563720703125, + "rewards/margins": 6.518420219421387, + "rewards/rejected": -7.525576591491699, + "step": 1676 + }, + { + "epoch": 2.69, + "learning_rate": 3.894173602853745e-07, + "logits/chosen": -1.6498146057128906, + "logits/rejected": -1.5136171579360962, + "logps/chosen": -133.72706604003906, + "logps/rejected": -147.18910217285156, + "loss": 0.0928, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.643066644668579, + "rewards/margins": 4.318610668182373, + "rewards/rejected": -5.961677074432373, + "step": 1677 + }, + { + "epoch": 2.69, + "learning_rate": 3.893182718985335e-07, + "logits/chosen": -1.7261035442352295, + "logits/rejected": -1.6475547552108765, + "logps/chosen": -91.88957977294922, + "logps/rejected": -128.22500610351562, + "loss": 0.2556, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.905906319618225, + "rewards/margins": 4.8238372802734375, + "rewards/rejected": -6.729743957519531, + "step": 1678 + }, + { + "epoch": 2.7, + "learning_rate": 3.8921918351169243e-07, + "logits/chosen": -1.538452386856079, + "logits/rejected": -1.52724027633667, + "logps/chosen": -99.5289535522461, + "logps/rejected": -143.4537811279297, + "loss": 0.0986, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5178170204162598, + "rewards/margins": 4.421525955200195, + "rewards/rejected": -6.939342498779297, + "step": 1679 + }, + { + "epoch": 2.7, + "learning_rate": 3.8912009512485134e-07, + "logits/chosen": -1.6561017036437988, + "logits/rejected": -1.621412992477417, + "logps/chosen": -113.26424407958984, + "logps/rejected": -141.66610717773438, + "loss": 0.2159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5307321548461914, + "rewards/margins": 5.143934726715088, + "rewards/rejected": -6.6746673583984375, + "step": 1680 + }, + { + "epoch": 2.7, + "learning_rate": 3.890210067380103e-07, + "logits/chosen": -1.7042752504348755, + "logits/rejected": -1.6139485836029053, + "logps/chosen": -109.80953979492188, + "logps/rejected": -154.41958618164062, + "loss": 0.0935, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8617050647735596, + "rewards/margins": 5.367158889770508, + "rewards/rejected": -7.228863716125488, + "step": 1681 + }, + { + "epoch": 2.7, + "learning_rate": 3.889219183511692e-07, + "logits/chosen": -1.649864673614502, + "logits/rejected": -1.688380241394043, + "logps/chosen": -115.98057556152344, + "logps/rejected": -138.5621337890625, + "loss": 0.0919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35208797454833984, + "rewards/margins": 3.658139228820801, + "rewards/rejected": -4.010227203369141, + "step": 1682 + }, + { + "epoch": 2.7, + "learning_rate": 3.888228299643281e-07, + "logits/chosen": -1.7194029092788696, + "logits/rejected": -1.638472557067871, + "logps/chosen": -126.67015075683594, + "logps/rejected": -183.37649536132812, + "loss": 0.124, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1979000568389893, + "rewards/margins": 5.530783653259277, + "rewards/rejected": -7.728683948516846, + "step": 1683 + }, + { + "epoch": 2.7, + "learning_rate": 3.887237415774871e-07, + "logits/chosen": -1.6808104515075684, + "logits/rejected": -1.8558576107025146, + "logps/chosen": -81.59571838378906, + "logps/rejected": -141.98764038085938, + "loss": 0.0889, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3140886425971985, + "rewards/margins": 4.188577651977539, + "rewards/rejected": -4.502665996551514, + "step": 1684 + }, + { + "epoch": 2.7, + "learning_rate": 3.8862465319064603e-07, + "logits/chosen": -1.5889551639556885, + "logits/rejected": -1.7213510274887085, + "logps/chosen": -100.0901870727539, + "logps/rejected": -129.26759338378906, + "loss": 0.1015, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4790693521499634, + "rewards/margins": 2.9535489082336426, + "rewards/rejected": -4.432618618011475, + "step": 1685 + }, + { + "epoch": 2.71, + "learning_rate": 3.88525564803805e-07, + "logits/chosen": -1.6408355236053467, + "logits/rejected": -1.6618602275848389, + "logps/chosen": -124.66455841064453, + "logps/rejected": -138.0692901611328, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2381107807159424, + "rewards/margins": 2.3944685459136963, + "rewards/rejected": -4.632579326629639, + "step": 1686 + }, + { + "epoch": 2.71, + "learning_rate": 3.884264764169639e-07, + "logits/chosen": -1.6937484741210938, + "logits/rejected": -1.676285982131958, + "logps/chosen": -92.24946594238281, + "logps/rejected": -126.63739013671875, + "loss": 0.1181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.898006796836853, + "rewards/margins": 4.092273712158203, + "rewards/rejected": -4.9902801513671875, + "step": 1687 + }, + { + "epoch": 2.71, + "learning_rate": 3.883273880301228e-07, + "logits/chosen": -1.518448829650879, + "logits/rejected": -1.5355606079101562, + "logps/chosen": -94.283935546875, + "logps/rejected": -152.62081909179688, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9616947174072266, + "rewards/margins": 5.596569061279297, + "rewards/rejected": -7.55826473236084, + "step": 1688 + }, + { + "epoch": 2.71, + "learning_rate": 3.882282996432818e-07, + "logits/chosen": -1.5508153438568115, + "logits/rejected": -1.5178205966949463, + "logps/chosen": -130.1893310546875, + "logps/rejected": -172.04115295410156, + "loss": 0.0864, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.773143768310547, + "rewards/margins": 3.7177200317382812, + "rewards/rejected": -6.490863800048828, + "step": 1689 + }, + { + "epoch": 2.71, + "learning_rate": 3.881292112564407e-07, + "logits/chosen": -1.8021749258041382, + "logits/rejected": -1.7691079378128052, + "logps/chosen": -77.19468688964844, + "logps/rejected": -165.72084045410156, + "loss": 0.1617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9005052447319031, + "rewards/margins": 8.021329879760742, + "rewards/rejected": -7.120824813842773, + "step": 1690 + }, + { + "epoch": 2.71, + "learning_rate": 3.880301228695997e-07, + "logits/chosen": -1.6311252117156982, + "logits/rejected": -1.64441978931427, + "logps/chosen": -100.53656768798828, + "logps/rejected": -139.1445770263672, + "loss": 0.1033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4896967113018036, + "rewards/margins": 4.9500885009765625, + "rewards/rejected": -5.439785003662109, + "step": 1691 + }, + { + "epoch": 2.72, + "learning_rate": 3.879310344827586e-07, + "logits/chosen": -1.6958050727844238, + "logits/rejected": -1.670047640800476, + "logps/chosen": -92.33221435546875, + "logps/rejected": -122.91683959960938, + "loss": 0.2279, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3675527572631836, + "rewards/margins": 3.019754409790039, + "rewards/rejected": -4.387307167053223, + "step": 1692 + }, + { + "epoch": 2.72, + "learning_rate": 3.878319460959175e-07, + "logits/chosen": -1.7322396039962769, + "logits/rejected": -1.6827070713043213, + "logps/chosen": -93.61825561523438, + "logps/rejected": -102.63182067871094, + "loss": 0.2242, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5245802402496338, + "rewards/margins": 1.9215037822723389, + "rewards/rejected": -3.4460840225219727, + "step": 1693 + }, + { + "epoch": 2.72, + "learning_rate": 3.877328577090765e-07, + "logits/chosen": -1.6442080736160278, + "logits/rejected": -1.6633580923080444, + "logps/chosen": -77.29707336425781, + "logps/rejected": -166.5149383544922, + "loss": 0.0849, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8206380009651184, + "rewards/margins": 6.532628059387207, + "rewards/rejected": -7.35326623916626, + "step": 1694 + }, + { + "epoch": 2.72, + "learning_rate": 3.876337693222354e-07, + "logits/chosen": -1.5950648784637451, + "logits/rejected": -1.685260534286499, + "logps/chosen": -88.7186279296875, + "logps/rejected": -172.9910888671875, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9035142660140991, + "rewards/margins": 6.522806167602539, + "rewards/rejected": -7.426320552825928, + "step": 1695 + }, + { + "epoch": 2.72, + "learning_rate": 3.875346809353944e-07, + "logits/chosen": -1.6567819118499756, + "logits/rejected": -1.6497957706451416, + "logps/chosen": -124.39613342285156, + "logps/rejected": -156.3587646484375, + "loss": 0.2054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6777167320251465, + "rewards/margins": 4.277494430541992, + "rewards/rejected": -5.955211162567139, + "step": 1696 + }, + { + "epoch": 2.72, + "learning_rate": 3.874355925485533e-07, + "logits/chosen": -1.4963115453720093, + "logits/rejected": -1.4358872175216675, + "logps/chosen": -129.52008056640625, + "logps/rejected": -153.43161010742188, + "loss": 0.0708, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7815998792648315, + "rewards/margins": 5.206716537475586, + "rewards/rejected": -6.988316535949707, + "step": 1697 + }, + { + "epoch": 2.73, + "learning_rate": 3.873365041617122e-07, + "logits/chosen": -1.764592170715332, + "logits/rejected": -1.7095646858215332, + "logps/chosen": -96.08341979980469, + "logps/rejected": -143.89962768554688, + "loss": 0.1223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9924697875976562, + "rewards/margins": 5.921590328216553, + "rewards/rejected": -6.914060592651367, + "step": 1698 + }, + { + "epoch": 2.73, + "learning_rate": 3.872374157748712e-07, + "logits/chosen": -1.684032917022705, + "logits/rejected": -1.6870431900024414, + "logps/chosen": -93.80326843261719, + "logps/rejected": -157.6763153076172, + "loss": 0.2543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31217020750045776, + "rewards/margins": 6.128981590270996, + "rewards/rejected": -6.441151142120361, + "step": 1699 + }, + { + "epoch": 2.73, + "learning_rate": 3.871383273880301e-07, + "logits/chosen": -1.7220619916915894, + "logits/rejected": -1.7787582874298096, + "logps/chosen": -55.565162658691406, + "logps/rejected": -136.3475799560547, + "loss": 0.0886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18339119851589203, + "rewards/margins": 6.834646224975586, + "rewards/rejected": -6.651254653930664, + "step": 1700 + }, + { + "epoch": 2.73, + "learning_rate": 3.8703923900118907e-07, + "logits/chosen": -1.6423935890197754, + "logits/rejected": -1.5884943008422852, + "logps/chosen": -105.61051177978516, + "logps/rejected": -122.33573913574219, + "loss": 0.1882, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2204642295837402, + "rewards/margins": 2.960963726043701, + "rewards/rejected": -4.181427955627441, + "step": 1701 + }, + { + "epoch": 2.73, + "learning_rate": 3.86940150614348e-07, + "logits/chosen": -1.5610558986663818, + "logits/rejected": -1.5710076093673706, + "logps/chosen": -91.81001281738281, + "logps/rejected": -132.9902801513672, + "loss": 0.2061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9544978141784668, + "rewards/margins": 2.82768177986145, + "rewards/rejected": -4.782179832458496, + "step": 1702 + }, + { + "epoch": 2.73, + "learning_rate": 3.868410622275069e-07, + "logits/chosen": -1.7032796144485474, + "logits/rejected": -1.7363215684890747, + "logps/chosen": -110.2620849609375, + "logps/rejected": -154.64883422851562, + "loss": 0.1856, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5105397701263428, + "rewards/margins": 5.057180881500244, + "rewards/rejected": -6.567720890045166, + "step": 1703 + }, + { + "epoch": 2.74, + "learning_rate": 3.8674197384066584e-07, + "logits/chosen": -1.5895330905914307, + "logits/rejected": -1.633446216583252, + "logps/chosen": -114.31243133544922, + "logps/rejected": -168.3142547607422, + "loss": 0.0494, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2529311180114746, + "rewards/margins": 3.871659994125366, + "rewards/rejected": -7.124590873718262, + "step": 1704 + }, + { + "epoch": 2.74, + "learning_rate": 3.866428854538248e-07, + "logits/chosen": -1.6556551456451416, + "logits/rejected": -1.563711166381836, + "logps/chosen": -111.71294403076172, + "logps/rejected": -148.7470703125, + "loss": 0.1023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3790028095245361, + "rewards/margins": 5.243337631225586, + "rewards/rejected": -6.622340679168701, + "step": 1705 + }, + { + "epoch": 2.74, + "learning_rate": 3.8654379706698376e-07, + "logits/chosen": -1.7390375137329102, + "logits/rejected": -1.7449493408203125, + "logps/chosen": -94.03425598144531, + "logps/rejected": -149.23817443847656, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2745352983474731, + "rewards/margins": 4.810523986816406, + "rewards/rejected": -6.085059642791748, + "step": 1706 + }, + { + "epoch": 2.74, + "learning_rate": 3.8644470868014267e-07, + "logits/chosen": -1.5817800760269165, + "logits/rejected": -1.6437551975250244, + "logps/chosen": -99.87609100341797, + "logps/rejected": -179.95062255859375, + "loss": 0.0666, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6728297472000122, + "rewards/margins": 6.5248212814331055, + "rewards/rejected": -8.197650909423828, + "step": 1707 + }, + { + "epoch": 2.74, + "learning_rate": 3.863456202933016e-07, + "logits/chosen": -1.5573639869689941, + "logits/rejected": -1.5173733234405518, + "logps/chosen": -126.54251098632812, + "logps/rejected": -177.5182647705078, + "loss": 0.1044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6056582927703857, + "rewards/margins": 6.196355819702148, + "rewards/rejected": -8.802013397216797, + "step": 1708 + }, + { + "epoch": 2.74, + "learning_rate": 3.8624653190646054e-07, + "logits/chosen": -1.667578101158142, + "logits/rejected": -1.602408766746521, + "logps/chosen": -113.88560485839844, + "logps/rejected": -139.5880584716797, + "loss": 0.0591, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.759606122970581, + "rewards/margins": 2.4520180225372314, + "rewards/rejected": -6.211623668670654, + "step": 1709 + }, + { + "epoch": 2.74, + "learning_rate": 3.861474435196195e-07, + "logits/chosen": -1.5796071290969849, + "logits/rejected": -1.5116478204727173, + "logps/chosen": -96.09815979003906, + "logps/rejected": -159.8899688720703, + "loss": 0.0489, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0038764476776123, + "rewards/margins": 6.6698317527771, + "rewards/rejected": -7.673708915710449, + "step": 1710 + }, + { + "epoch": 2.75, + "learning_rate": 3.8604835513277846e-07, + "logits/chosen": -1.691290259361267, + "logits/rejected": -1.672302007675171, + "logps/chosen": -96.06298828125, + "logps/rejected": -175.695068359375, + "loss": 0.0444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5973865389823914, + "rewards/margins": 7.333518028259277, + "rewards/rejected": -7.930904388427734, + "step": 1711 + }, + { + "epoch": 2.75, + "learning_rate": 3.8594926674593736e-07, + "logits/chosen": -1.6229920387268066, + "logits/rejected": -1.6706695556640625, + "logps/chosen": -129.38009643554688, + "logps/rejected": -183.19676208496094, + "loss": 0.1434, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.332888603210449, + "rewards/margins": 4.283064842224121, + "rewards/rejected": -6.61595344543457, + "step": 1712 + }, + { + "epoch": 2.75, + "learning_rate": 3.8585017835909627e-07, + "logits/chosen": -1.5527560710906982, + "logits/rejected": -1.5401363372802734, + "logps/chosen": -107.97278594970703, + "logps/rejected": -136.66148376464844, + "loss": 0.0844, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9458701610565186, + "rewards/margins": 5.365225315093994, + "rewards/rejected": -7.311095714569092, + "step": 1713 + }, + { + "epoch": 2.75, + "learning_rate": 3.8575108997225523e-07, + "logits/chosen": -1.7033445835113525, + "logits/rejected": -1.746883511543274, + "logps/chosen": -113.42610168457031, + "logps/rejected": -152.87213134765625, + "loss": 0.0807, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8968216180801392, + "rewards/margins": 3.7497458457946777, + "rewards/rejected": -5.646567344665527, + "step": 1714 + }, + { + "epoch": 2.75, + "learning_rate": 3.856520015854142e-07, + "logits/chosen": -1.8013803958892822, + "logits/rejected": -1.7154548168182373, + "logps/chosen": -105.42173767089844, + "logps/rejected": -144.68460083007812, + "loss": 0.0458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6395660042762756, + "rewards/margins": 6.105227470397949, + "rewards/rejected": -6.744793891906738, + "step": 1715 + }, + { + "epoch": 2.75, + "learning_rate": 3.855529131985731e-07, + "logits/chosen": -1.4797344207763672, + "logits/rejected": -1.5067167282104492, + "logps/chosen": -105.45658874511719, + "logps/rejected": -159.64166259765625, + "loss": 0.195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.855002224445343, + "rewards/margins": 5.003995895385742, + "rewards/rejected": -5.8589982986450195, + "step": 1716 + }, + { + "epoch": 2.76, + "learning_rate": 3.8545382481173206e-07, + "logits/chosen": -1.6864482164382935, + "logits/rejected": -1.6967101097106934, + "logps/chosen": -96.51136779785156, + "logps/rejected": -127.41197967529297, + "loss": 0.0869, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3239071369171143, + "rewards/margins": 3.8827710151672363, + "rewards/rejected": -5.20667839050293, + "step": 1717 + }, + { + "epoch": 2.76, + "learning_rate": 3.8535473642489096e-07, + "logits/chosen": -1.5164958238601685, + "logits/rejected": -1.5359840393066406, + "logps/chosen": -103.20779418945312, + "logps/rejected": -129.1023406982422, + "loss": 0.077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8564060926437378, + "rewards/margins": 4.21718692779541, + "rewards/rejected": -6.0735931396484375, + "step": 1718 + }, + { + "epoch": 2.76, + "learning_rate": 3.852556480380499e-07, + "logits/chosen": -1.6447176933288574, + "logits/rejected": -1.7807071208953857, + "logps/chosen": -72.67943572998047, + "logps/rejected": -135.22320556640625, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6120203137397766, + "rewards/margins": 5.0758562088012695, + "rewards/rejected": -5.687876224517822, + "step": 1719 + }, + { + "epoch": 2.76, + "learning_rate": 3.8515655965120883e-07, + "logits/chosen": -1.7441954612731934, + "logits/rejected": -1.6575230360031128, + "logps/chosen": -117.86317443847656, + "logps/rejected": -157.60690307617188, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.133054256439209, + "rewards/margins": 5.585491180419922, + "rewards/rejected": -7.718544960021973, + "step": 1720 + }, + { + "epoch": 2.76, + "learning_rate": 3.850574712643678e-07, + "logits/chosen": -1.6561346054077148, + "logits/rejected": -1.5754518508911133, + "logps/chosen": -94.59526062011719, + "logps/rejected": -139.864013671875, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.570056915283203, + "rewards/margins": 5.326308250427246, + "rewards/rejected": -7.896364688873291, + "step": 1721 + }, + { + "epoch": 2.76, + "learning_rate": 3.8495838287752675e-07, + "logits/chosen": -1.9106751680374146, + "logits/rejected": -1.8608661890029907, + "logps/chosen": -107.29853820800781, + "logps/rejected": -155.08224487304688, + "loss": 0.1335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.231898695230484, + "rewards/margins": 4.3463521003723145, + "rewards/rejected": -4.578250885009766, + "step": 1722 + }, + { + "epoch": 2.77, + "learning_rate": 3.8485929449068566e-07, + "logits/chosen": -1.8106595277786255, + "logits/rejected": -1.7741972208023071, + "logps/chosen": -123.20651245117188, + "logps/rejected": -169.97357177734375, + "loss": 0.122, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.031836748123169, + "rewards/margins": 4.454592227935791, + "rewards/rejected": -5.486429214477539, + "step": 1723 + }, + { + "epoch": 2.77, + "learning_rate": 3.847602061038446e-07, + "logits/chosen": -1.54625403881073, + "logits/rejected": -1.5438166856765747, + "logps/chosen": -85.87701416015625, + "logps/rejected": -156.4394989013672, + "loss": 0.1544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43809774518013, + "rewards/margins": 7.436060428619385, + "rewards/rejected": -7.874157905578613, + "step": 1724 + }, + { + "epoch": 2.77, + "learning_rate": 3.846611177170035e-07, + "logits/chosen": -1.6615509986877441, + "logits/rejected": -1.628709077835083, + "logps/chosen": -75.87600708007812, + "logps/rejected": -181.4813232421875, + "loss": 0.0576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.061121970415115356, + "rewards/margins": 11.005786895751953, + "rewards/rejected": -11.06690788269043, + "step": 1725 + }, + { + "epoch": 2.77, + "learning_rate": 3.845620293301625e-07, + "logits/chosen": -1.639305591583252, + "logits/rejected": -1.5316998958587646, + "logps/chosen": -106.21957397460938, + "logps/rejected": -156.9407958984375, + "loss": 0.1703, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6505632400512695, + "rewards/margins": 6.693097114562988, + "rewards/rejected": -8.343660354614258, + "step": 1726 + }, + { + "epoch": 2.77, + "learning_rate": 3.8446294094332144e-07, + "logits/chosen": -1.551335096359253, + "logits/rejected": -1.5738424062728882, + "logps/chosen": -88.29304504394531, + "logps/rejected": -162.49609375, + "loss": 0.2563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.515679657459259, + "rewards/margins": 5.855982780456543, + "rewards/rejected": -6.371662616729736, + "step": 1727 + }, + { + "epoch": 2.77, + "learning_rate": 3.8436385255648035e-07, + "logits/chosen": -1.5758488178253174, + "logits/rejected": -1.6274272203445435, + "logps/chosen": -89.54116821289062, + "logps/rejected": -137.7141876220703, + "loss": 0.081, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3884612321853638, + "rewards/margins": 2.479238986968994, + "rewards/rejected": -3.8677000999450684, + "step": 1728 + }, + { + "epoch": 2.78, + "learning_rate": 3.842647641696393e-07, + "logits/chosen": -1.672809362411499, + "logits/rejected": -1.6774704456329346, + "logps/chosen": -100.63175964355469, + "logps/rejected": -143.12005615234375, + "loss": 0.2362, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6171278953552246, + "rewards/margins": 4.297855854034424, + "rewards/rejected": -5.914984226226807, + "step": 1729 + }, + { + "epoch": 2.78, + "learning_rate": 3.841656757827982e-07, + "logits/chosen": -1.7278759479522705, + "logits/rejected": -1.850433588027954, + "logps/chosen": -76.5829086303711, + "logps/rejected": -156.71005249023438, + "loss": 0.0902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22703400254249573, + "rewards/margins": 5.895695686340332, + "rewards/rejected": -6.122729778289795, + "step": 1730 + }, + { + "epoch": 2.78, + "learning_rate": 3.840665873959572e-07, + "logits/chosen": -1.6309189796447754, + "logits/rejected": -1.7040116786956787, + "logps/chosen": -68.90851593017578, + "logps/rejected": -123.36503601074219, + "loss": 0.0945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26131296157836914, + "rewards/margins": 4.654858589172363, + "rewards/rejected": -4.916171550750732, + "step": 1731 + }, + { + "epoch": 2.78, + "learning_rate": 3.8396749900911614e-07, + "logits/chosen": -1.5577058792114258, + "logits/rejected": -1.5882019996643066, + "logps/chosen": -92.9170913696289, + "logps/rejected": -140.0607147216797, + "loss": 0.1521, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.5169906616210938, + "rewards/margins": 4.608637809753418, + "rewards/rejected": -6.125628471374512, + "step": 1732 + }, + { + "epoch": 2.78, + "learning_rate": 3.8386841062227504e-07, + "logits/chosen": -1.7199769020080566, + "logits/rejected": -1.6817606687545776, + "logps/chosen": -118.91400909423828, + "logps/rejected": -149.94818115234375, + "loss": 0.3036, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3995593786239624, + "rewards/margins": 3.4038994312286377, + "rewards/rejected": -4.8034586906433105, + "step": 1733 + }, + { + "epoch": 2.78, + "learning_rate": 3.83769322235434e-07, + "logits/chosen": -1.7233734130859375, + "logits/rejected": -1.685361385345459, + "logps/chosen": -99.91777801513672, + "logps/rejected": -150.06031799316406, + "loss": 0.1402, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7346800565719604, + "rewards/margins": 4.67967414855957, + "rewards/rejected": -6.414353847503662, + "step": 1734 + }, + { + "epoch": 2.78, + "learning_rate": 3.836702338485929e-07, + "logits/chosen": -1.6008861064910889, + "logits/rejected": -1.5808324813842773, + "logps/chosen": -78.57119750976562, + "logps/rejected": -135.6722412109375, + "loss": 0.1447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34248214960098267, + "rewards/margins": 5.296980857849121, + "rewards/rejected": -5.639463424682617, + "step": 1735 + }, + { + "epoch": 2.79, + "learning_rate": 3.8357114546175187e-07, + "logits/chosen": -1.735021948814392, + "logits/rejected": -1.740487813949585, + "logps/chosen": -99.78807067871094, + "logps/rejected": -183.1121063232422, + "loss": 0.0523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1788601279258728, + "rewards/margins": 7.770191192626953, + "rewards/rejected": -7.949051856994629, + "step": 1736 + }, + { + "epoch": 2.79, + "learning_rate": 3.8347205707491083e-07, + "logits/chosen": -1.50991690158844, + "logits/rejected": -1.532413125038147, + "logps/chosen": -93.66630554199219, + "logps/rejected": -142.24966430664062, + "loss": 0.1084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6525890827178955, + "rewards/margins": 6.384730339050293, + "rewards/rejected": -8.03731918334961, + "step": 1737 + }, + { + "epoch": 2.79, + "learning_rate": 3.8337296868806974e-07, + "logits/chosen": -1.6704424619674683, + "logits/rejected": -1.6005035638809204, + "logps/chosen": -94.4825439453125, + "logps/rejected": -150.9242706298828, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3587244153022766, + "rewards/margins": 4.706463813781738, + "rewards/rejected": -5.065187931060791, + "step": 1738 + }, + { + "epoch": 2.79, + "learning_rate": 3.832738803012287e-07, + "logits/chosen": -1.788480520248413, + "logits/rejected": -1.7944239377975464, + "logps/chosen": -142.67686462402344, + "logps/rejected": -133.83688354492188, + "loss": 0.0747, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9966156482696533, + "rewards/margins": 1.9196555614471436, + "rewards/rejected": -3.9162709712982178, + "step": 1739 + }, + { + "epoch": 2.79, + "learning_rate": 3.831747919143876e-07, + "logits/chosen": -1.5799955129623413, + "logits/rejected": -1.623917818069458, + "logps/chosen": -68.50028991699219, + "logps/rejected": -186.34466552734375, + "loss": 0.0675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7705042362213135, + "rewards/margins": 8.460528373718262, + "rewards/rejected": -9.231033325195312, + "step": 1740 + }, + { + "epoch": 2.79, + "learning_rate": 3.830757035275465e-07, + "logits/chosen": -1.5403428077697754, + "logits/rejected": -1.5773061513900757, + "logps/chosen": -108.82228088378906, + "logps/rejected": -149.12045288085938, + "loss": 0.1373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8979717493057251, + "rewards/margins": 4.018382549285889, + "rewards/rejected": -4.916354179382324, + "step": 1741 + }, + { + "epoch": 2.8, + "learning_rate": 3.829766151407055e-07, + "logits/chosen": -1.7603864669799805, + "logits/rejected": -1.661365032196045, + "logps/chosen": -99.40763854980469, + "logps/rejected": -127.48089599609375, + "loss": 0.1884, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0572607517242432, + "rewards/margins": 3.4675674438476562, + "rewards/rejected": -4.5248284339904785, + "step": 1742 + }, + { + "epoch": 2.8, + "learning_rate": 3.8287752675386443e-07, + "logits/chosen": -1.6430116891860962, + "logits/rejected": -1.6068503856658936, + "logps/chosen": -74.22874450683594, + "logps/rejected": -125.27676391601562, + "loss": 0.0818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0395439863204956, + "rewards/margins": 5.395846366882324, + "rewards/rejected": -4.356302738189697, + "step": 1743 + }, + { + "epoch": 2.8, + "learning_rate": 3.827784383670234e-07, + "logits/chosen": -1.824817419052124, + "logits/rejected": -1.8087174892425537, + "logps/chosen": -122.9127197265625, + "logps/rejected": -184.99200439453125, + "loss": 0.1434, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.604060173034668, + "rewards/margins": 3.8104066848754883, + "rewards/rejected": -6.414466857910156, + "step": 1744 + }, + { + "epoch": 2.8, + "learning_rate": 3.826793499801823e-07, + "logits/chosen": -1.6561815738677979, + "logits/rejected": -1.6514068841934204, + "logps/chosen": -101.97447967529297, + "logps/rejected": -167.2471923828125, + "loss": 0.1004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2644758224487305, + "rewards/margins": 4.895671367645264, + "rewards/rejected": -7.160147666931152, + "step": 1745 + }, + { + "epoch": 2.8, + "learning_rate": 3.825802615933412e-07, + "logits/chosen": -1.7136898040771484, + "logits/rejected": -1.715869426727295, + "logps/chosen": -96.61660766601562, + "logps/rejected": -115.6915512084961, + "loss": 0.1511, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.109031915664673, + "rewards/margins": 2.5076656341552734, + "rewards/rejected": -4.616697788238525, + "step": 1746 + }, + { + "epoch": 2.8, + "learning_rate": 3.824811732065002e-07, + "logits/chosen": -1.6088203191757202, + "logits/rejected": -1.6484349966049194, + "logps/chosen": -92.48394012451172, + "logps/rejected": -148.11355590820312, + "loss": 0.0913, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6739041805267334, + "rewards/margins": 5.164997100830078, + "rewards/rejected": -6.838900566101074, + "step": 1747 + }, + { + "epoch": 2.81, + "learning_rate": 3.823820848196591e-07, + "logits/chosen": -1.6835582256317139, + "logits/rejected": -1.7297847270965576, + "logps/chosen": -81.65963745117188, + "logps/rejected": -142.24658203125, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.986751139163971, + "rewards/margins": 4.716616153717041, + "rewards/rejected": -5.703367233276367, + "step": 1748 + }, + { + "epoch": 2.81, + "learning_rate": 3.822829964328181e-07, + "logits/chosen": -1.593947410583496, + "logits/rejected": -1.5817863941192627, + "logps/chosen": -137.06143188476562, + "logps/rejected": -145.72488403320312, + "loss": 0.1489, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3429783582687378, + "rewards/margins": 3.469151496887207, + "rewards/rejected": -4.812129974365234, + "step": 1749 + }, + { + "epoch": 2.81, + "learning_rate": 3.82183908045977e-07, + "logits/chosen": -1.6540350914001465, + "logits/rejected": -1.5765328407287598, + "logps/chosen": -132.54258728027344, + "logps/rejected": -153.92657470703125, + "loss": 0.0623, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8090574741363525, + "rewards/margins": 3.0394363403320312, + "rewards/rejected": -5.848493576049805, + "step": 1750 + }, + { + "epoch": 2.81, + "learning_rate": 3.820848196591359e-07, + "logits/chosen": -1.792435884475708, + "logits/rejected": -1.8211042881011963, + "logps/chosen": -98.53768920898438, + "logps/rejected": -152.32498168945312, + "loss": 0.2383, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8069242238998413, + "rewards/margins": 4.923348426818848, + "rewards/rejected": -6.7302727699279785, + "step": 1751 + }, + { + "epoch": 2.81, + "learning_rate": 3.819857312722949e-07, + "logits/chosen": -1.766941785812378, + "logits/rejected": -1.7398006916046143, + "logps/chosen": -104.31485748291016, + "logps/rejected": -147.36483764648438, + "loss": 0.0531, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5594552755355835, + "rewards/margins": 5.265119552612305, + "rewards/rejected": -6.8245744705200195, + "step": 1752 + }, + { + "epoch": 2.81, + "learning_rate": 3.818866428854538e-07, + "logits/chosen": -1.6534035205841064, + "logits/rejected": -1.556052327156067, + "logps/chosen": -91.64700317382812, + "logps/rejected": -126.10535430908203, + "loss": 0.1364, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.3308826684951782, + "rewards/margins": 2.6697590351104736, + "rewards/rejected": -3.0006415843963623, + "step": 1753 + }, + { + "epoch": 2.82, + "learning_rate": 3.817875544986127e-07, + "logits/chosen": -1.587235927581787, + "logits/rejected": -1.5724472999572754, + "logps/chosen": -85.00242614746094, + "logps/rejected": -128.0253448486328, + "loss": 0.0913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7019557952880859, + "rewards/margins": 3.877869129180908, + "rewards/rejected": -4.579824447631836, + "step": 1754 + }, + { + "epoch": 2.82, + "learning_rate": 3.816884661117717e-07, + "logits/chosen": -1.7187707424163818, + "logits/rejected": -1.769547462463379, + "logps/chosen": -76.54315185546875, + "logps/rejected": -151.90740966796875, + "loss": 0.2259, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.3100498914718628, + "rewards/margins": 6.1684136390686035, + "rewards/rejected": -7.478463172912598, + "step": 1755 + }, + { + "epoch": 2.82, + "learning_rate": 3.815893777249306e-07, + "logits/chosen": -1.5695879459381104, + "logits/rejected": -1.6588401794433594, + "logps/chosen": -86.45618438720703, + "logps/rejected": -131.11285400390625, + "loss": 0.2785, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.007049322128296, + "rewards/margins": 3.660254955291748, + "rewards/rejected": -4.667304515838623, + "step": 1756 + }, + { + "epoch": 2.82, + "learning_rate": 3.814902893380896e-07, + "logits/chosen": -1.6101922988891602, + "logits/rejected": -1.578542947769165, + "logps/chosen": -91.38618469238281, + "logps/rejected": -131.7822723388672, + "loss": 0.1204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9607866406440735, + "rewards/margins": 3.6881613731384277, + "rewards/rejected": -4.648947715759277, + "step": 1757 + }, + { + "epoch": 2.82, + "learning_rate": 3.813912009512485e-07, + "logits/chosen": -1.8011674880981445, + "logits/rejected": -1.6972105503082275, + "logps/chosen": -111.57304382324219, + "logps/rejected": -144.32330322265625, + "loss": 0.2546, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2235825061798096, + "rewards/margins": 4.009917736053467, + "rewards/rejected": -6.233500003814697, + "step": 1758 + }, + { + "epoch": 2.82, + "learning_rate": 3.812921125644074e-07, + "logits/chosen": -1.5903282165527344, + "logits/rejected": -1.5985771417617798, + "logps/chosen": -51.76702880859375, + "logps/rejected": -118.87730407714844, + "loss": 0.0765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11593732237815857, + "rewards/margins": 5.692895889282227, + "rewards/rejected": -5.576958656311035, + "step": 1759 + }, + { + "epoch": 2.83, + "learning_rate": 3.811930241775664e-07, + "logits/chosen": -1.5937902927398682, + "logits/rejected": -1.6603013277053833, + "logps/chosen": -86.8178939819336, + "logps/rejected": -153.36459350585938, + "loss": 0.1647, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2910090684890747, + "rewards/margins": 5.230852127075195, + "rewards/rejected": -6.5218610763549805, + "step": 1760 + }, + { + "epoch": 2.83, + "learning_rate": 3.810939357907253e-07, + "logits/chosen": -1.7027860879898071, + "logits/rejected": -1.6819080114364624, + "logps/chosen": -118.70219421386719, + "logps/rejected": -144.90090942382812, + "loss": 0.078, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6176843643188477, + "rewards/margins": 2.1665663719177246, + "rewards/rejected": -4.784250736236572, + "step": 1761 + }, + { + "epoch": 2.83, + "learning_rate": 3.809948474038843e-07, + "logits/chosen": -1.8001539707183838, + "logits/rejected": -1.8497732877731323, + "logps/chosen": -73.74909973144531, + "logps/rejected": -163.430419921875, + "loss": 0.1442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6573901176452637, + "rewards/margins": 8.50106143951416, + "rewards/rejected": -9.158451080322266, + "step": 1762 + }, + { + "epoch": 2.83, + "learning_rate": 3.808957590170432e-07, + "logits/chosen": -1.753782868385315, + "logits/rejected": -1.7618824243545532, + "logps/chosen": -109.74986267089844, + "logps/rejected": -148.46038818359375, + "loss": 0.0906, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.5627260208129883, + "rewards/margins": 3.018367290496826, + "rewards/rejected": -4.5810933113098145, + "step": 1763 + }, + { + "epoch": 2.83, + "learning_rate": 3.807966706302021e-07, + "logits/chosen": -1.6510804891586304, + "logits/rejected": -1.6993743181228638, + "logps/chosen": -126.80740356445312, + "logps/rejected": -150.28273010253906, + "loss": 0.1583, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.643937110900879, + "rewards/margins": 2.1633071899414062, + "rewards/rejected": -3.8072447776794434, + "step": 1764 + }, + { + "epoch": 2.83, + "learning_rate": 3.8069758224336107e-07, + "logits/chosen": -1.7592511177062988, + "logits/rejected": -1.69386887550354, + "logps/chosen": -102.38301086425781, + "logps/rejected": -142.7652587890625, + "loss": 0.1877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8317674994468689, + "rewards/margins": 4.902235507965088, + "rewards/rejected": -5.734002113342285, + "step": 1765 + }, + { + "epoch": 2.83, + "learning_rate": 3.8059849385652e-07, + "logits/chosen": -1.685221791267395, + "logits/rejected": -1.6991008520126343, + "logps/chosen": -84.57648468017578, + "logps/rejected": -166.98175048828125, + "loss": 0.1114, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5754833221435547, + "rewards/margins": 6.0096435546875, + "rewards/rejected": -8.585126876831055, + "step": 1766 + }, + { + "epoch": 2.84, + "learning_rate": 3.8049940546967893e-07, + "logits/chosen": -1.6797466278076172, + "logits/rejected": -1.6582083702087402, + "logps/chosen": -118.40974426269531, + "logps/rejected": -143.94381713867188, + "loss": 0.1118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5311591625213623, + "rewards/margins": 2.851269245147705, + "rewards/rejected": -4.3824286460876465, + "step": 1767 + }, + { + "epoch": 2.84, + "learning_rate": 3.804003170828379e-07, + "logits/chosen": -1.6236010789871216, + "logits/rejected": -1.5500752925872803, + "logps/chosen": -74.86937713623047, + "logps/rejected": -99.06585693359375, + "loss": 0.1158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5848981738090515, + "rewards/margins": 2.9403905868530273, + "rewards/rejected": -3.5252888202667236, + "step": 1768 + }, + { + "epoch": 2.84, + "learning_rate": 3.803012286959968e-07, + "logits/chosen": -1.5713002681732178, + "logits/rejected": -1.6217745542526245, + "logps/chosen": -113.93025207519531, + "logps/rejected": -190.17652893066406, + "loss": 0.0866, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.879446268081665, + "rewards/margins": 7.012668132781982, + "rewards/rejected": -9.892114639282227, + "step": 1769 + }, + { + "epoch": 2.84, + "learning_rate": 3.8020214030915576e-07, + "logits/chosen": -1.4542591571807861, + "logits/rejected": -1.4850891828536987, + "logps/chosen": -83.11414337158203, + "logps/rejected": -162.91851806640625, + "loss": 0.1201, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0939154624938965, + "rewards/margins": 6.6386237144470215, + "rewards/rejected": -7.732539176940918, + "step": 1770 + }, + { + "epoch": 2.84, + "learning_rate": 3.8010305192231467e-07, + "logits/chosen": -1.6708972454071045, + "logits/rejected": -1.7647781372070312, + "logps/chosen": -97.3715591430664, + "logps/rejected": -204.25997924804688, + "loss": 0.2406, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.127810478210449, + "rewards/margins": 7.065168380737305, + "rewards/rejected": -9.192978858947754, + "step": 1771 + }, + { + "epoch": 2.84, + "learning_rate": 3.8000396353547363e-07, + "logits/chosen": -1.6088831424713135, + "logits/rejected": -1.645223617553711, + "logps/chosen": -81.86747741699219, + "logps/rejected": -178.09152221679688, + "loss": 0.0553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5065360069274902, + "rewards/margins": 7.853977203369141, + "rewards/rejected": -8.360513687133789, + "step": 1772 + }, + { + "epoch": 2.85, + "learning_rate": 3.799048751486326e-07, + "logits/chosen": -1.7509613037109375, + "logits/rejected": -1.7273695468902588, + "logps/chosen": -95.68939208984375, + "logps/rejected": -126.20100402832031, + "loss": 0.0886, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1065633296966553, + "rewards/margins": 3.528517484664917, + "rewards/rejected": -4.635080814361572, + "step": 1773 + }, + { + "epoch": 2.85, + "learning_rate": 3.798057867617915e-07, + "logits/chosen": -1.6854461431503296, + "logits/rejected": -1.6701722145080566, + "logps/chosen": -105.21220397949219, + "logps/rejected": -152.93569946289062, + "loss": 0.1845, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.337505578994751, + "rewards/margins": 4.891748905181885, + "rewards/rejected": -6.229255199432373, + "step": 1774 + }, + { + "epoch": 2.85, + "learning_rate": 3.7970669837495045e-07, + "logits/chosen": -1.7481276988983154, + "logits/rejected": -1.7513872385025024, + "logps/chosen": -75.9001693725586, + "logps/rejected": -120.05902099609375, + "loss": 0.1417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9432734251022339, + "rewards/margins": 3.886230230331421, + "rewards/rejected": -4.829503536224365, + "step": 1775 + }, + { + "epoch": 2.85, + "learning_rate": 3.7960760998810936e-07, + "logits/chosen": -1.388261079788208, + "logits/rejected": -1.4001448154449463, + "logps/chosen": -100.7335433959961, + "logps/rejected": -162.1338348388672, + "loss": 0.1805, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.609378457069397, + "rewards/margins": 6.518288612365723, + "rewards/rejected": -8.127667427062988, + "step": 1776 + }, + { + "epoch": 2.85, + "learning_rate": 3.795085216012683e-07, + "logits/chosen": -1.66798734664917, + "logits/rejected": -1.5416667461395264, + "logps/chosen": -95.4244613647461, + "logps/rejected": -131.13430786132812, + "loss": 0.1188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9205071926116943, + "rewards/margins": 4.767366409301758, + "rewards/rejected": -5.687872886657715, + "step": 1777 + }, + { + "epoch": 2.85, + "learning_rate": 3.794094332144273e-07, + "logits/chosen": -1.5265100002288818, + "logits/rejected": -1.5652284622192383, + "logps/chosen": -67.69842529296875, + "logps/rejected": -142.65243530273438, + "loss": 0.332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.884462833404541, + "rewards/margins": 6.6511335372924805, + "rewards/rejected": -7.53559684753418, + "step": 1778 + }, + { + "epoch": 2.86, + "learning_rate": 3.793103448275862e-07, + "logits/chosen": -1.6109681129455566, + "logits/rejected": -1.6076509952545166, + "logps/chosen": -81.2899169921875, + "logps/rejected": -118.94900512695312, + "loss": 0.2052, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9782448410987854, + "rewards/margins": 3.7560534477233887, + "rewards/rejected": -4.7342987060546875, + "step": 1779 + }, + { + "epoch": 2.86, + "learning_rate": 3.7921125644074515e-07, + "logits/chosen": -1.7216942310333252, + "logits/rejected": -1.7283964157104492, + "logps/chosen": -108.41961669921875, + "logps/rejected": -154.80162048339844, + "loss": 0.2486, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6344870328903198, + "rewards/margins": 4.183652400970459, + "rewards/rejected": -5.818139553070068, + "step": 1780 + }, + { + "epoch": 2.86, + "learning_rate": 3.7911216805390405e-07, + "logits/chosen": -1.6718034744262695, + "logits/rejected": -1.691701054573059, + "logps/chosen": -108.55213928222656, + "logps/rejected": -165.3509979248047, + "loss": 0.1214, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5856109857559204, + "rewards/margins": 5.235071659088135, + "rewards/rejected": -6.820682525634766, + "step": 1781 + }, + { + "epoch": 2.86, + "learning_rate": 3.79013079667063e-07, + "logits/chosen": -1.5459458827972412, + "logits/rejected": -1.5469999313354492, + "logps/chosen": -105.33699035644531, + "logps/rejected": -184.712158203125, + "loss": 0.1882, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.427011489868164, + "rewards/margins": 6.092480659484863, + "rewards/rejected": -8.519493103027344, + "step": 1782 + }, + { + "epoch": 2.86, + "learning_rate": 3.789139912802219e-07, + "logits/chosen": -1.6517573595046997, + "logits/rejected": -1.7072194814682007, + "logps/chosen": -105.84122467041016, + "logps/rejected": -138.7044677734375, + "loss": 0.0755, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0288937091827393, + "rewards/margins": 4.313884735107422, + "rewards/rejected": -5.342778205871582, + "step": 1783 + }, + { + "epoch": 2.86, + "learning_rate": 3.788149028933809e-07, + "logits/chosen": -1.7199172973632812, + "logits/rejected": -1.6935867071151733, + "logps/chosen": -105.45834350585938, + "logps/rejected": -164.40560913085938, + "loss": 0.1851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9580499529838562, + "rewards/margins": 6.656195640563965, + "rewards/rejected": -7.614245414733887, + "step": 1784 + }, + { + "epoch": 2.87, + "learning_rate": 3.7871581450653984e-07, + "logits/chosen": -1.8847829103469849, + "logits/rejected": -1.6701092720031738, + "logps/chosen": -118.7100601196289, + "logps/rejected": -138.61398315429688, + "loss": 0.1633, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6034716367721558, + "rewards/margins": 3.640238046646118, + "rewards/rejected": -5.243709564208984, + "step": 1785 + }, + { + "epoch": 2.87, + "learning_rate": 3.7861672611969875e-07, + "logits/chosen": -1.8477497100830078, + "logits/rejected": -1.7315752506256104, + "logps/chosen": -135.99404907226562, + "logps/rejected": -122.90853881835938, + "loss": 0.1248, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9037501811981201, + "rewards/margins": 1.650345802307129, + "rewards/rejected": -3.554095983505249, + "step": 1786 + }, + { + "epoch": 2.87, + "learning_rate": 3.7851763773285765e-07, + "logits/chosen": -1.7583935260772705, + "logits/rejected": -1.8241909742355347, + "logps/chosen": -94.69377899169922, + "logps/rejected": -144.22958374023438, + "loss": 0.0688, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9883744716644287, + "rewards/margins": 3.6275508403778076, + "rewards/rejected": -5.615924835205078, + "step": 1787 + }, + { + "epoch": 2.87, + "learning_rate": 3.784185493460166e-07, + "logits/chosen": -1.6794418096542358, + "logits/rejected": -1.6705546379089355, + "logps/chosen": -115.58597564697266, + "logps/rejected": -154.1556396484375, + "loss": 0.0665, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2622792720794678, + "rewards/margins": 5.815978050231934, + "rewards/rejected": -8.07825756072998, + "step": 1788 + }, + { + "epoch": 2.87, + "learning_rate": 3.7831946095917557e-07, + "logits/chosen": -1.6077405214309692, + "logits/rejected": -1.6480562686920166, + "logps/chosen": -104.37975311279297, + "logps/rejected": -167.54733276367188, + "loss": 0.0409, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3206868171691895, + "rewards/margins": 5.607962608337402, + "rewards/rejected": -8.92864990234375, + "step": 1789 + }, + { + "epoch": 2.87, + "learning_rate": 3.7822037257233453e-07, + "logits/chosen": -1.7588698863983154, + "logits/rejected": -1.698569893836975, + "logps/chosen": -107.63155364990234, + "logps/rejected": -146.66319274902344, + "loss": 0.1297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5554607510566711, + "rewards/margins": 6.162705898284912, + "rewards/rejected": -6.718166828155518, + "step": 1790 + }, + { + "epoch": 2.87, + "learning_rate": 3.7812128418549344e-07, + "logits/chosen": -1.5314441919326782, + "logits/rejected": -1.5583475828170776, + "logps/chosen": -106.9226303100586, + "logps/rejected": -159.56634521484375, + "loss": 0.0762, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.707304835319519, + "rewards/margins": 4.517581462860107, + "rewards/rejected": -6.224885940551758, + "step": 1791 + }, + { + "epoch": 2.88, + "learning_rate": 3.7802219579865235e-07, + "logits/chosen": -1.6321523189544678, + "logits/rejected": -1.662232756614685, + "logps/chosen": -100.90032958984375, + "logps/rejected": -142.9011688232422, + "loss": 0.1641, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7500076293945312, + "rewards/margins": 3.2798030376434326, + "rewards/rejected": -5.029810905456543, + "step": 1792 + }, + { + "epoch": 2.88, + "learning_rate": 3.779231074118113e-07, + "logits/chosen": -1.7939060926437378, + "logits/rejected": -1.7558579444885254, + "logps/chosen": -117.99163818359375, + "logps/rejected": -197.872802734375, + "loss": 0.1082, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3665931224823, + "rewards/margins": 7.062250137329102, + "rewards/rejected": -10.42884349822998, + "step": 1793 + }, + { + "epoch": 2.88, + "learning_rate": 3.7782401902497026e-07, + "logits/chosen": -1.657088041305542, + "logits/rejected": -1.6200149059295654, + "logps/chosen": -121.12468719482422, + "logps/rejected": -133.60653686523438, + "loss": 0.2105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2447307109832764, + "rewards/margins": 2.37349009513855, + "rewards/rejected": -4.618220806121826, + "step": 1794 + }, + { + "epoch": 2.88, + "learning_rate": 3.777249306381292e-07, + "logits/chosen": -1.6224212646484375, + "logits/rejected": -1.6546275615692139, + "logps/chosen": -110.99869537353516, + "logps/rejected": -156.47677612304688, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9481110572814941, + "rewards/margins": 6.501053810119629, + "rewards/rejected": -8.449165344238281, + "step": 1795 + }, + { + "epoch": 2.88, + "learning_rate": 3.7762584225128813e-07, + "logits/chosen": -1.6862252950668335, + "logits/rejected": -1.6314691305160522, + "logps/chosen": -116.3354263305664, + "logps/rejected": -163.01638793945312, + "loss": 0.0879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4760788381099701, + "rewards/margins": 6.421491622924805, + "rewards/rejected": -6.897570610046387, + "step": 1796 + }, + { + "epoch": 2.88, + "learning_rate": 3.7752675386444704e-07, + "logits/chosen": -1.6113824844360352, + "logits/rejected": -1.6036114692687988, + "logps/chosen": -95.64356231689453, + "logps/rejected": -121.98859405517578, + "loss": 0.2034, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3950374126434326, + "rewards/margins": 3.210289716720581, + "rewards/rejected": -5.605327129364014, + "step": 1797 + }, + { + "epoch": 2.89, + "learning_rate": 3.77427665477606e-07, + "logits/chosen": -1.622185468673706, + "logits/rejected": -1.546464443206787, + "logps/chosen": -104.50080871582031, + "logps/rejected": -163.09286499023438, + "loss": 0.051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16539974510669708, + "rewards/margins": 8.776033401489258, + "rewards/rejected": -8.94143295288086, + "step": 1798 + }, + { + "epoch": 2.89, + "learning_rate": 3.7732857709076496e-07, + "logits/chosen": -1.4777554273605347, + "logits/rejected": -1.5220603942871094, + "logps/chosen": -99.87274169921875, + "logps/rejected": -98.11062622070312, + "loss": 0.1556, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.034170150756836, + "rewards/margins": 0.34193193912506104, + "rewards/rejected": -3.3761019706726074, + "step": 1799 + }, + { + "epoch": 2.89, + "learning_rate": 3.772294887039239e-07, + "logits/chosen": -1.7738866806030273, + "logits/rejected": -1.6994258165359497, + "logps/chosen": -80.92544555664062, + "logps/rejected": -106.82350158691406, + "loss": 0.2368, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1291041374206543, + "rewards/margins": 4.073250770568848, + "rewards/rejected": -5.20235538482666, + "step": 1800 + }, + { + "epoch": 2.89, + "learning_rate": 3.771304003170828e-07, + "logits/chosen": -1.5356467962265015, + "logits/rejected": -1.6702868938446045, + "logps/chosen": -82.96348571777344, + "logps/rejected": -170.287841796875, + "loss": 0.1043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6384035348892212, + "rewards/margins": 8.618170738220215, + "rewards/rejected": -9.256574630737305, + "step": 1801 + }, + { + "epoch": 2.89, + "learning_rate": 3.7703131193024173e-07, + "logits/chosen": -1.6392366886138916, + "logits/rejected": -1.5786510705947876, + "logps/chosen": -103.74851989746094, + "logps/rejected": -144.15748596191406, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4483575820922852, + "rewards/margins": 4.92008113861084, + "rewards/rejected": -6.368438720703125, + "step": 1802 + }, + { + "epoch": 2.89, + "learning_rate": 3.769322235434007e-07, + "logits/chosen": -1.677764892578125, + "logits/rejected": -1.743652582168579, + "logps/chosen": -92.18203735351562, + "logps/rejected": -197.68563842773438, + "loss": 0.1996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.710017204284668, + "rewards/margins": 9.06793212890625, + "rewards/rejected": -9.777950286865234, + "step": 1803 + }, + { + "epoch": 2.9, + "learning_rate": 3.768331351565596e-07, + "logits/chosen": -1.6745946407318115, + "logits/rejected": -1.691635251045227, + "logps/chosen": -120.13055419921875, + "logps/rejected": -166.5257568359375, + "loss": 0.098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6820070743560791, + "rewards/margins": 5.661100387573242, + "rewards/rejected": -6.343107223510742, + "step": 1804 + }, + { + "epoch": 2.9, + "learning_rate": 3.767340467697186e-07, + "logits/chosen": -1.7669814825057983, + "logits/rejected": -1.7743035554885864, + "logps/chosen": -108.32340240478516, + "logps/rejected": -126.83101654052734, + "loss": 0.2007, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4231961965560913, + "rewards/margins": 2.030694007873535, + "rewards/rejected": -3.453890323638916, + "step": 1805 + }, + { + "epoch": 2.9, + "learning_rate": 3.766349583828775e-07, + "logits/chosen": -1.7148598432540894, + "logits/rejected": -1.6273465156555176, + "logps/chosen": -113.26337432861328, + "logps/rejected": -156.97796630859375, + "loss": 0.1076, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7466990947723389, + "rewards/margins": 4.992303371429443, + "rewards/rejected": -6.739002704620361, + "step": 1806 + }, + { + "epoch": 2.9, + "learning_rate": 3.765358699960364e-07, + "logits/chosen": -1.4694433212280273, + "logits/rejected": -1.5362205505371094, + "logps/chosen": -80.19744110107422, + "logps/rejected": -152.014404296875, + "loss": 0.0703, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7547639608383179, + "rewards/margins": 6.60331916809082, + "rewards/rejected": -7.358083248138428, + "step": 1807 + }, + { + "epoch": 2.9, + "learning_rate": 3.764367816091954e-07, + "logits/chosen": -1.7102206945419312, + "logits/rejected": -1.732323169708252, + "logps/chosen": -122.63440704345703, + "logps/rejected": -157.1143341064453, + "loss": 0.0685, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5858144760131836, + "rewards/margins": 5.267463207244873, + "rewards/rejected": -7.853277683258057, + "step": 1808 + }, + { + "epoch": 2.9, + "learning_rate": 3.763376932223543e-07, + "logits/chosen": -1.6916388273239136, + "logits/rejected": -1.693162441253662, + "logps/chosen": -92.6587142944336, + "logps/rejected": -134.86993408203125, + "loss": 0.1484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8916627764701843, + "rewards/margins": 3.9587302207946777, + "rewards/rejected": -4.850393295288086, + "step": 1809 + }, + { + "epoch": 2.91, + "learning_rate": 3.762386048355133e-07, + "logits/chosen": -1.603738784790039, + "logits/rejected": -1.635077714920044, + "logps/chosen": -74.41727447509766, + "logps/rejected": -126.33573913574219, + "loss": 0.1966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8321354389190674, + "rewards/margins": 4.7907280921936035, + "rewards/rejected": -5.622863292694092, + "step": 1810 + }, + { + "epoch": 2.91, + "learning_rate": 3.761395164486722e-07, + "logits/chosen": -1.7277610301971436, + "logits/rejected": -1.7426552772521973, + "logps/chosen": -103.33746337890625, + "logps/rejected": -169.03765869140625, + "loss": 0.0827, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.020691394805908, + "rewards/margins": 7.0620012283325195, + "rewards/rejected": -9.08269214630127, + "step": 1811 + }, + { + "epoch": 2.91, + "learning_rate": 3.760404280618311e-07, + "logits/chosen": -1.732521653175354, + "logits/rejected": -1.776577353477478, + "logps/chosen": -99.52017974853516, + "logps/rejected": -161.783203125, + "loss": 0.1416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5220286846160889, + "rewards/margins": 7.633773326873779, + "rewards/rejected": -8.155801773071289, + "step": 1812 + }, + { + "epoch": 2.91, + "learning_rate": 3.759413396749901e-07, + "logits/chosen": -1.6191545724868774, + "logits/rejected": -1.5511865615844727, + "logps/chosen": -122.78934478759766, + "logps/rejected": -169.49647521972656, + "loss": 0.1041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.176661252975464, + "rewards/margins": 6.4275102615356445, + "rewards/rejected": -8.604171752929688, + "step": 1813 + }, + { + "epoch": 2.91, + "learning_rate": 3.75842251288149e-07, + "logits/chosen": -1.8406273126602173, + "logits/rejected": -1.7240077257156372, + "logps/chosen": -115.03936004638672, + "logps/rejected": -144.81439208984375, + "loss": 0.0732, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1487236022949219, + "rewards/margins": 4.783183574676514, + "rewards/rejected": -5.9319071769714355, + "step": 1814 + }, + { + "epoch": 2.91, + "learning_rate": 3.75743162901308e-07, + "logits/chosen": -1.5763864517211914, + "logits/rejected": -1.5711785554885864, + "logps/chosen": -77.22325897216797, + "logps/rejected": -126.6904525756836, + "loss": 0.0703, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3793689012527466, + "rewards/margins": 5.253995418548584, + "rewards/rejected": -6.633364200592041, + "step": 1815 + }, + { + "epoch": 2.91, + "learning_rate": 3.756440745144669e-07, + "logits/chosen": -1.531294584274292, + "logits/rejected": -1.5590920448303223, + "logps/chosen": -93.43187713623047, + "logps/rejected": -157.0635223388672, + "loss": 0.113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6699162125587463, + "rewards/margins": 5.294865608215332, + "rewards/rejected": -5.964782238006592, + "step": 1816 + }, + { + "epoch": 2.92, + "learning_rate": 3.755449861276258e-07, + "logits/chosen": -1.7886762619018555, + "logits/rejected": -1.7001674175262451, + "logps/chosen": -114.40238189697266, + "logps/rejected": -168.64208984375, + "loss": 0.1559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7566229104995728, + "rewards/margins": 6.680775165557861, + "rewards/rejected": -7.4373979568481445, + "step": 1817 + }, + { + "epoch": 2.92, + "learning_rate": 3.7544589774078477e-07, + "logits/chosen": -1.7443194389343262, + "logits/rejected": -1.68293035030365, + "logps/chosen": -103.60906982421875, + "logps/rejected": -138.01133728027344, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0838513374328613, + "rewards/margins": 4.983738422393799, + "rewards/rejected": -6.06758975982666, + "step": 1818 + }, + { + "epoch": 2.92, + "learning_rate": 3.753468093539437e-07, + "logits/chosen": -1.6506086587905884, + "logits/rejected": -1.6421537399291992, + "logps/chosen": -96.27152252197266, + "logps/rejected": -169.48428344726562, + "loss": 0.069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3307204246520996, + "rewards/margins": 5.7270026206970215, + "rewards/rejected": -7.057723045349121, + "step": 1819 + }, + { + "epoch": 2.92, + "learning_rate": 3.752477209671027e-07, + "logits/chosen": -1.6307271718978882, + "logits/rejected": -1.655086636543274, + "logps/chosen": -86.7942123413086, + "logps/rejected": -129.18511962890625, + "loss": 0.1442, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.605810284614563, + "rewards/margins": 4.8568115234375, + "rewards/rejected": -6.462621688842773, + "step": 1820 + }, + { + "epoch": 2.92, + "learning_rate": 3.751486325802616e-07, + "logits/chosen": -1.5474883317947388, + "logits/rejected": -1.5981955528259277, + "logps/chosen": -101.75738525390625, + "logps/rejected": -160.26876831054688, + "loss": 0.0981, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0146722793579102, + "rewards/margins": 7.573986530303955, + "rewards/rejected": -8.588659286499023, + "step": 1821 + }, + { + "epoch": 2.92, + "learning_rate": 3.750495441934205e-07, + "logits/chosen": -1.6465976238250732, + "logits/rejected": -1.6424157619476318, + "logps/chosen": -79.42121887207031, + "logps/rejected": -136.84591674804688, + "loss": 0.1349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3404337167739868, + "rewards/margins": 7.089875221252441, + "rewards/rejected": -6.749441146850586, + "step": 1822 + }, + { + "epoch": 2.93, + "learning_rate": 3.7495045580657946e-07, + "logits/chosen": -1.749340295791626, + "logits/rejected": -1.7375813722610474, + "logps/chosen": -117.66568756103516, + "logps/rejected": -121.6031265258789, + "loss": 0.1332, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.396629810333252, + "rewards/margins": 1.7363895177841187, + "rewards/rejected": -4.13301944732666, + "step": 1823 + }, + { + "epoch": 2.93, + "learning_rate": 3.7485136741973837e-07, + "logits/chosen": -1.5258640050888062, + "logits/rejected": -1.5364243984222412, + "logps/chosen": -97.76651763916016, + "logps/rejected": -158.37901306152344, + "loss": 0.1586, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.404465436935425, + "rewards/margins": 5.261653423309326, + "rewards/rejected": -7.666119575500488, + "step": 1824 + }, + { + "epoch": 2.93, + "learning_rate": 3.747522790328973e-07, + "logits/chosen": -1.6214470863342285, + "logits/rejected": -1.754777193069458, + "logps/chosen": -70.96511840820312, + "logps/rejected": -122.2367935180664, + "loss": 0.1451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4130151867866516, + "rewards/margins": 1.8369543552398682, + "rewards/rejected": -2.249969482421875, + "step": 1825 + }, + { + "epoch": 2.93, + "learning_rate": 3.746531906460563e-07, + "logits/chosen": -1.5367629528045654, + "logits/rejected": -1.5843758583068848, + "logps/chosen": -101.90836334228516, + "logps/rejected": -170.86422729492188, + "loss": 0.15, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6052725315093994, + "rewards/margins": 3.9625608921051025, + "rewards/rejected": -5.567833423614502, + "step": 1826 + }, + { + "epoch": 2.93, + "learning_rate": 3.745541022592152e-07, + "logits/chosen": -1.6687405109405518, + "logits/rejected": -1.5846121311187744, + "logps/chosen": -110.18712615966797, + "logps/rejected": -141.54400634765625, + "loss": 0.0643, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6952368021011353, + "rewards/margins": 4.221129417419434, + "rewards/rejected": -5.9163665771484375, + "step": 1827 + }, + { + "epoch": 2.93, + "learning_rate": 3.7445501387237416e-07, + "logits/chosen": -1.565414309501648, + "logits/rejected": -1.597106695175171, + "logps/chosen": -116.12474060058594, + "logps/rejected": -152.18399047851562, + "loss": 0.1885, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8293275833129883, + "rewards/margins": 3.52860164642334, + "rewards/rejected": -6.357929229736328, + "step": 1828 + }, + { + "epoch": 2.94, + "learning_rate": 3.7435592548553306e-07, + "logits/chosen": -1.5327577590942383, + "logits/rejected": -1.519581913948059, + "logps/chosen": -103.36714172363281, + "logps/rejected": -164.3252410888672, + "loss": 0.0963, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9687784910202026, + "rewards/margins": 5.351982116699219, + "rewards/rejected": -7.320760726928711, + "step": 1829 + }, + { + "epoch": 2.94, + "learning_rate": 3.7425683709869197e-07, + "logits/chosen": -1.6294317245483398, + "logits/rejected": -1.6953259706497192, + "logps/chosen": -103.53370666503906, + "logps/rejected": -159.46585083007812, + "loss": 0.1081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6010355949401855, + "rewards/margins": 3.8423380851745605, + "rewards/rejected": -5.443373680114746, + "step": 1830 + }, + { + "epoch": 2.94, + "learning_rate": 3.74157748711851e-07, + "logits/chosen": -1.714198112487793, + "logits/rejected": -1.8112832307815552, + "logps/chosen": -107.547607421875, + "logps/rejected": -173.31353759765625, + "loss": 0.0647, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4005768299102783, + "rewards/margins": 4.298473358154297, + "rewards/rejected": -5.699049949645996, + "step": 1831 + }, + { + "epoch": 2.94, + "learning_rate": 3.740586603250099e-07, + "logits/chosen": -1.5286470651626587, + "logits/rejected": -1.4846584796905518, + "logps/chosen": -101.46790313720703, + "logps/rejected": -181.7661590576172, + "loss": 0.1304, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7079964876174927, + "rewards/margins": 8.209821701049805, + "rewards/rejected": -9.917818069458008, + "step": 1832 + }, + { + "epoch": 2.94, + "learning_rate": 3.7395957193816885e-07, + "logits/chosen": -1.7027385234832764, + "logits/rejected": -1.6326727867126465, + "logps/chosen": -94.35637664794922, + "logps/rejected": -123.62086486816406, + "loss": 0.1038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1007566452026367, + "rewards/margins": 3.4199986457824707, + "rewards/rejected": -4.520754814147949, + "step": 1833 + }, + { + "epoch": 2.94, + "learning_rate": 3.7386048355132776e-07, + "logits/chosen": -1.676821231842041, + "logits/rejected": -1.6876411437988281, + "logps/chosen": -86.18657684326172, + "logps/rejected": -132.56622314453125, + "loss": 0.0933, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4426305294036865, + "rewards/margins": 2.5332581996917725, + "rewards/rejected": -3.975888729095459, + "step": 1834 + }, + { + "epoch": 2.95, + "learning_rate": 3.7376139516448666e-07, + "logits/chosen": -1.7392170429229736, + "logits/rejected": -1.6444953680038452, + "logps/chosen": -124.28804016113281, + "logps/rejected": -160.623291015625, + "loss": 0.1119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2173526287078857, + "rewards/margins": 5.181742191314697, + "rewards/rejected": -7.39909553527832, + "step": 1835 + }, + { + "epoch": 2.95, + "learning_rate": 3.736623067776457e-07, + "logits/chosen": -1.5613404512405396, + "logits/rejected": -1.5801303386688232, + "logps/chosen": -105.79557800292969, + "logps/rejected": -168.4930877685547, + "loss": 0.1988, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.643540859222412, + "rewards/margins": 4.194326400756836, + "rewards/rejected": -6.83786678314209, + "step": 1836 + }, + { + "epoch": 2.95, + "learning_rate": 3.735632183908046e-07, + "logits/chosen": -1.7099552154541016, + "logits/rejected": -1.618041753768921, + "logps/chosen": -99.18416595458984, + "logps/rejected": -146.6209716796875, + "loss": 0.2075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6705077886581421, + "rewards/margins": 5.2806715965271, + "rewards/rejected": -5.951179504394531, + "step": 1837 + }, + { + "epoch": 2.95, + "learning_rate": 3.7346413000396354e-07, + "logits/chosen": -1.7018520832061768, + "logits/rejected": -1.6403980255126953, + "logps/chosen": -106.02590942382812, + "logps/rejected": -110.58439636230469, + "loss": 0.1805, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9982521533966064, + "rewards/margins": 2.072617769241333, + "rewards/rejected": -4.0708699226379395, + "step": 1838 + }, + { + "epoch": 2.95, + "learning_rate": 3.7336504161712245e-07, + "logits/chosen": -1.6169241666793823, + "logits/rejected": -1.6344153881072998, + "logps/chosen": -97.32797241210938, + "logps/rejected": -165.24551391601562, + "loss": 0.0744, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4419846534729004, + "rewards/margins": 5.967092037200928, + "rewards/rejected": -7.409076690673828, + "step": 1839 + }, + { + "epoch": 2.95, + "learning_rate": 3.7326595323028136e-07, + "logits/chosen": -1.7090115547180176, + "logits/rejected": -1.647094488143921, + "logps/chosen": -113.08973693847656, + "logps/rejected": -174.51031494140625, + "loss": 0.0652, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9218418598175049, + "rewards/margins": 5.865823745727539, + "rewards/rejected": -7.787665367126465, + "step": 1840 + }, + { + "epoch": 2.96, + "learning_rate": 3.7316686484344037e-07, + "logits/chosen": -1.822242259979248, + "logits/rejected": -1.7936092615127563, + "logps/chosen": -100.0579605102539, + "logps/rejected": -107.98933410644531, + "loss": 0.1689, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3148176670074463, + "rewards/margins": 2.1842103004455566, + "rewards/rejected": -3.499027967453003, + "step": 1841 + }, + { + "epoch": 2.96, + "learning_rate": 3.730677764565993e-07, + "logits/chosen": -1.785272240638733, + "logits/rejected": -1.8058993816375732, + "logps/chosen": -97.46770477294922, + "logps/rejected": -163.251708984375, + "loss": 0.1495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7274589538574219, + "rewards/margins": 6.838157653808594, + "rewards/rejected": -7.565616607666016, + "step": 1842 + }, + { + "epoch": 2.96, + "learning_rate": 3.7296868806975823e-07, + "logits/chosen": -1.6869449615478516, + "logits/rejected": -1.6811470985412598, + "logps/chosen": -93.52455139160156, + "logps/rejected": -158.1514129638672, + "loss": 0.047, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.242169976234436, + "rewards/margins": 5.656375885009766, + "rewards/rejected": -6.898545742034912, + "step": 1843 + }, + { + "epoch": 2.96, + "learning_rate": 3.7286959968291714e-07, + "logits/chosen": -1.5931137800216675, + "logits/rejected": -1.5227012634277344, + "logps/chosen": -133.9796142578125, + "logps/rejected": -131.98158264160156, + "loss": 0.1518, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.447665214538574, + "rewards/margins": 3.7390403747558594, + "rewards/rejected": -6.186705589294434, + "step": 1844 + }, + { + "epoch": 2.96, + "learning_rate": 3.7277051129607605e-07, + "logits/chosen": -1.5257635116577148, + "logits/rejected": -1.5521966218948364, + "logps/chosen": -123.79499816894531, + "logps/rejected": -154.1683807373047, + "loss": 0.2141, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4627857208251953, + "rewards/margins": 3.3797600269317627, + "rewards/rejected": -5.842545986175537, + "step": 1845 + }, + { + "epoch": 2.96, + "learning_rate": 3.72671422909235e-07, + "logits/chosen": -1.4717140197753906, + "logits/rejected": -1.7383618354797363, + "logps/chosen": -78.05812072753906, + "logps/rejected": -209.42141723632812, + "loss": 0.1034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4537895917892456, + "rewards/margins": 5.240690231323242, + "rewards/rejected": -6.694479942321777, + "step": 1846 + }, + { + "epoch": 2.96, + "learning_rate": 3.7257233452239397e-07, + "logits/chosen": -1.7382279634475708, + "logits/rejected": -1.6613696813583374, + "logps/chosen": -144.49481201171875, + "logps/rejected": -149.93467712402344, + "loss": 0.1187, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.299633741378784, + "rewards/margins": 3.491069793701172, + "rewards/rejected": -6.790703773498535, + "step": 1847 + }, + { + "epoch": 2.97, + "learning_rate": 3.7247324613555293e-07, + "logits/chosen": -1.625678539276123, + "logits/rejected": -1.6220862865447998, + "logps/chosen": -101.85519409179688, + "logps/rejected": -162.093017578125, + "loss": 0.0957, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8012583255767822, + "rewards/margins": 6.4540181159973145, + "rewards/rejected": -7.255276203155518, + "step": 1848 + }, + { + "epoch": 2.97, + "learning_rate": 3.7237415774871183e-07, + "logits/chosen": -1.6036503314971924, + "logits/rejected": -1.5726630687713623, + "logps/chosen": -107.09656524658203, + "logps/rejected": -185.03575134277344, + "loss": 0.2031, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.364143133163452, + "rewards/margins": 5.4613542556762695, + "rewards/rejected": -7.825497627258301, + "step": 1849 + }, + { + "epoch": 2.97, + "learning_rate": 3.7227506936187074e-07, + "logits/chosen": -1.684525966644287, + "logits/rejected": -1.7338085174560547, + "logps/chosen": -125.98589324951172, + "logps/rejected": -182.1653594970703, + "loss": 0.0452, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.374876022338867, + "rewards/margins": 4.519647598266602, + "rewards/rejected": -6.894523620605469, + "step": 1850 + }, + { + "epoch": 2.97, + "learning_rate": 3.721759809750297e-07, + "logits/chosen": -1.6773960590362549, + "logits/rejected": -1.6852432489395142, + "logps/chosen": -106.29759216308594, + "logps/rejected": -180.97177124023438, + "loss": 0.0799, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2513556480407715, + "rewards/margins": 5.7772536277771, + "rewards/rejected": -8.028609275817871, + "step": 1851 + }, + { + "epoch": 2.97, + "learning_rate": 3.7207689258818866e-07, + "logits/chosen": -1.6329138278961182, + "logits/rejected": -1.5798695087432861, + "logps/chosen": -97.25318145751953, + "logps/rejected": -138.33181762695312, + "loss": 0.0852, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4794821739196777, + "rewards/margins": 5.570281028747559, + "rewards/rejected": -7.049763202667236, + "step": 1852 + }, + { + "epoch": 2.97, + "learning_rate": 3.719778042013476e-07, + "logits/chosen": -1.5611621141433716, + "logits/rejected": -1.5081110000610352, + "logps/chosen": -107.39796447753906, + "logps/rejected": -133.10415649414062, + "loss": 0.1684, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0686638355255127, + "rewards/margins": 3.479729175567627, + "rewards/rejected": -5.548393249511719, + "step": 1853 + }, + { + "epoch": 2.98, + "learning_rate": 3.7187871581450653e-07, + "logits/chosen": -1.5460736751556396, + "logits/rejected": -1.5407496690750122, + "logps/chosen": -115.52072143554688, + "logps/rejected": -149.84645080566406, + "loss": 0.1453, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8644421100616455, + "rewards/margins": 4.501913070678711, + "rewards/rejected": -6.366355895996094, + "step": 1854 + }, + { + "epoch": 2.98, + "learning_rate": 3.7177962742766543e-07, + "logits/chosen": -1.5871484279632568, + "logits/rejected": -1.5384513139724731, + "logps/chosen": -111.2124252319336, + "logps/rejected": -156.21543884277344, + "loss": 0.071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4195386171340942, + "rewards/margins": 5.0207414627075195, + "rewards/rejected": -6.440280437469482, + "step": 1855 + }, + { + "epoch": 2.98, + "learning_rate": 3.716805390408244e-07, + "logits/chosen": -1.775153636932373, + "logits/rejected": -1.6535683870315552, + "logps/chosen": -100.07649993896484, + "logps/rejected": -129.04507446289062, + "loss": 0.0823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029498115181922913, + "rewards/margins": 4.148833274841309, + "rewards/rejected": -4.119335174560547, + "step": 1856 + }, + { + "epoch": 2.98, + "learning_rate": 3.7158145065398335e-07, + "logits/chosen": -1.6579911708831787, + "logits/rejected": -1.6191534996032715, + "logps/chosen": -88.49031066894531, + "logps/rejected": -123.34803771972656, + "loss": 0.0678, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1533992290496826, + "rewards/margins": 3.927980661392212, + "rewards/rejected": -5.0813798904418945, + "step": 1857 + }, + { + "epoch": 2.98, + "learning_rate": 3.714823622671423e-07, + "logits/chosen": -1.6502625942230225, + "logits/rejected": -1.6449239253997803, + "logps/chosen": -88.060302734375, + "logps/rejected": -115.62471008300781, + "loss": 0.0718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8267252445220947, + "rewards/margins": 3.535227060317993, + "rewards/rejected": -4.361952304840088, + "step": 1858 + }, + { + "epoch": 2.98, + "learning_rate": 3.713832738803012e-07, + "logits/chosen": -1.6126909255981445, + "logits/rejected": -1.6365091800689697, + "logps/chosen": -86.65589904785156, + "logps/rejected": -137.86685180664062, + "loss": 0.0781, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.286224365234375, + "rewards/margins": 3.535869598388672, + "rewards/rejected": -4.822093963623047, + "step": 1859 + }, + { + "epoch": 2.99, + "learning_rate": 3.7128418549346013e-07, + "logits/chosen": -1.6097652912139893, + "logits/rejected": -1.6258894205093384, + "logps/chosen": -74.16486358642578, + "logps/rejected": -122.19659423828125, + "loss": 0.1459, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8506567478179932, + "rewards/margins": 4.871476173400879, + "rewards/rejected": -6.722132682800293, + "step": 1860 + }, + { + "epoch": 2.99, + "learning_rate": 3.711850971066191e-07, + "logits/chosen": -1.6427104473114014, + "logits/rejected": -1.5989556312561035, + "logps/chosen": -95.17898559570312, + "logps/rejected": -144.50433349609375, + "loss": 0.1138, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.644016981124878, + "rewards/margins": 4.418154716491699, + "rewards/rejected": -6.062171459197998, + "step": 1861 + }, + { + "epoch": 2.99, + "learning_rate": 3.7108600871977805e-07, + "logits/chosen": -1.654820442199707, + "logits/rejected": -1.6354401111602783, + "logps/chosen": -87.06553649902344, + "logps/rejected": -145.74801635742188, + "loss": 0.0976, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.06065034866333, + "rewards/margins": 5.746522426605225, + "rewards/rejected": -7.807172775268555, + "step": 1862 + }, + { + "epoch": 2.99, + "learning_rate": 3.7098692033293695e-07, + "logits/chosen": -1.904773235321045, + "logits/rejected": -1.9184315204620361, + "logps/chosen": -122.90592956542969, + "logps/rejected": -192.02857971191406, + "loss": 0.0902, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.773087739944458, + "rewards/margins": 5.274674892425537, + "rewards/rejected": -8.047762870788574, + "step": 1863 + }, + { + "epoch": 2.99, + "learning_rate": 3.708878319460959e-07, + "logits/chosen": -1.712644100189209, + "logits/rejected": -1.6661410331726074, + "logps/chosen": -118.72415161132812, + "logps/rejected": -142.90203857421875, + "loss": 0.1228, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.5256692171096802, + "rewards/margins": 3.928725242614746, + "rewards/rejected": -5.454394817352295, + "step": 1864 + }, + { + "epoch": 2.99, + "learning_rate": 3.707887435592548e-07, + "logits/chosen": -1.595664620399475, + "logits/rejected": -1.577643871307373, + "logps/chosen": -82.77230834960938, + "logps/rejected": -156.54432678222656, + "loss": 0.0913, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5633922815322876, + "rewards/margins": 7.7410688400268555, + "rewards/rejected": -9.304461479187012, + "step": 1865 + }, + { + "epoch": 3.0, + "learning_rate": 3.706896551724138e-07, + "logits/chosen": -1.680959701538086, + "logits/rejected": -1.6975562572479248, + "logps/chosen": -85.92871856689453, + "logps/rejected": -145.62026977539062, + "loss": 0.2369, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.231262445449829, + "rewards/margins": 5.183119773864746, + "rewards/rejected": -6.414382457733154, + "step": 1866 + }, + { + "epoch": 3.0, + "learning_rate": 3.705905667855727e-07, + "logits/chosen": -1.6391139030456543, + "logits/rejected": -1.6536986827850342, + "logps/chosen": -103.18669891357422, + "logps/rejected": -173.68785095214844, + "loss": 0.2614, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0394108295440674, + "rewards/margins": 5.660988807678223, + "rewards/rejected": -8.700399398803711, + "step": 1867 + }, + { + "epoch": 3.0, + "learning_rate": 3.7049147839873165e-07, + "logits/chosen": -1.7592315673828125, + "logits/rejected": -1.620063066482544, + "logps/chosen": -130.3143310546875, + "logps/rejected": -127.0625, + "loss": 0.1578, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.701396942138672, + "rewards/margins": 1.934197187423706, + "rewards/rejected": -4.635594367980957, + "step": 1868 + }, + { + "epoch": 3.0, + "learning_rate": 3.703923900118906e-07, + "logits/chosen": -1.558750033378601, + "logits/rejected": -1.6264644861221313, + "logps/chosen": -94.35810089111328, + "logps/rejected": -133.89646911621094, + "loss": 0.189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9421181678771973, + "rewards/margins": 3.864265203475952, + "rewards/rejected": -5.8063836097717285, + "step": 1869 + }, + { + "epoch": 3.0, + "learning_rate": 3.702933016250495e-07, + "logits/chosen": -1.7621326446533203, + "logits/rejected": -1.806652545928955, + "logps/chosen": -99.29528045654297, + "logps/rejected": -171.24449157714844, + "loss": 0.0814, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0293467044830322, + "rewards/margins": 6.328563690185547, + "rewards/rejected": -7.35791015625, + "step": 1870 + }, + { + "epoch": 3.0, + "learning_rate": 3.7019421323820847e-07, + "logits/chosen": -1.5703579187393188, + "logits/rejected": -1.555256962776184, + "logps/chosen": -109.84393310546875, + "logps/rejected": -178.0146942138672, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.639150619506836, + "rewards/margins": 7.076193809509277, + "rewards/rejected": -9.715344429016113, + "step": 1871 + }, + { + "epoch": 3.0, + "learning_rate": 3.700951248513674e-07, + "logits/chosen": -1.674302577972412, + "logits/rejected": -1.6335704326629639, + "logps/chosen": -94.05751037597656, + "logps/rejected": -139.27871704101562, + "loss": 0.0594, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4134095907211304, + "rewards/margins": 3.8592147827148438, + "rewards/rejected": -5.272624492645264, + "step": 1872 + }, + { + "epoch": 3.01, + "learning_rate": 3.6999603646452634e-07, + "logits/chosen": -1.599224328994751, + "logits/rejected": -1.6235761642456055, + "logps/chosen": -88.71532440185547, + "logps/rejected": -168.3770751953125, + "loss": 0.0542, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3061755895614624, + "rewards/margins": 7.155682563781738, + "rewards/rejected": -8.461857795715332, + "step": 1873 + }, + { + "epoch": 3.01, + "learning_rate": 3.698969480776853e-07, + "logits/chosen": -1.6920950412750244, + "logits/rejected": -1.7486002445220947, + "logps/chosen": -105.12274932861328, + "logps/rejected": -146.84295654296875, + "loss": 0.0446, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.425275206565857, + "rewards/margins": 4.147624969482422, + "rewards/rejected": -5.572900295257568, + "step": 1874 + }, + { + "epoch": 3.01, + "learning_rate": 3.697978596908442e-07, + "logits/chosen": -1.5562728643417358, + "logits/rejected": -1.4507569074630737, + "logps/chosen": -120.26089477539062, + "logps/rejected": -153.0841522216797, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5183202028274536, + "rewards/margins": 7.01490592956543, + "rewards/rejected": -8.533226013183594, + "step": 1875 + }, + { + "epoch": 3.01, + "learning_rate": 3.6969877130400317e-07, + "logits/chosen": -1.648351788520813, + "logits/rejected": -1.6494344472885132, + "logps/chosen": -99.00519561767578, + "logps/rejected": -154.11337280273438, + "loss": 0.0513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7693948149681091, + "rewards/margins": 7.378775596618652, + "rewards/rejected": -8.148170471191406, + "step": 1876 + }, + { + "epoch": 3.01, + "learning_rate": 3.6959968291716207e-07, + "logits/chosen": -1.6667989492416382, + "logits/rejected": -1.5795297622680664, + "logps/chosen": -109.90059661865234, + "logps/rejected": -164.38848876953125, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.845070481300354, + "rewards/margins": 6.159598350524902, + "rewards/rejected": -8.004669189453125, + "step": 1877 + }, + { + "epoch": 3.01, + "learning_rate": 3.6950059453032103e-07, + "logits/chosen": -1.568537712097168, + "logits/rejected": -1.6190099716186523, + "logps/chosen": -81.86335754394531, + "logps/rejected": -141.58892822265625, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5047804713249207, + "rewards/margins": 6.584012985229492, + "rewards/rejected": -7.088793754577637, + "step": 1878 + }, + { + "epoch": 3.02, + "learning_rate": 3.6940150614348e-07, + "logits/chosen": -1.637345552444458, + "logits/rejected": -1.6550883054733276, + "logps/chosen": -73.62660217285156, + "logps/rejected": -137.79254150390625, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.406909465789795, + "rewards/margins": 6.262750625610352, + "rewards/rejected": -7.669660568237305, + "step": 1879 + }, + { + "epoch": 3.02, + "learning_rate": 3.693024177566389e-07, + "logits/chosen": -1.7395377159118652, + "logits/rejected": -1.7102490663528442, + "logps/chosen": -114.62059783935547, + "logps/rejected": -154.3107147216797, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4890029430389404, + "rewards/margins": 4.7981157302856445, + "rewards/rejected": -6.287118911743164, + "step": 1880 + }, + { + "epoch": 3.02, + "learning_rate": 3.6920332936979786e-07, + "logits/chosen": -1.5865892171859741, + "logits/rejected": -1.4998939037322998, + "logps/chosen": -114.85581970214844, + "logps/rejected": -165.06552124023438, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.196744441986084, + "rewards/margins": 5.685003757476807, + "rewards/rejected": -6.881748199462891, + "step": 1881 + }, + { + "epoch": 3.02, + "learning_rate": 3.6910424098295677e-07, + "logits/chosen": -1.7192896604537964, + "logits/rejected": -1.7427091598510742, + "logps/chosen": -101.73321533203125, + "logps/rejected": -192.13999938964844, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.506147623062134, + "rewards/margins": 6.993900299072266, + "rewards/rejected": -9.500048637390137, + "step": 1882 + }, + { + "epoch": 3.02, + "learning_rate": 3.6900515259611567e-07, + "logits/chosen": -1.5768084526062012, + "logits/rejected": -1.5447890758514404, + "logps/chosen": -92.79623413085938, + "logps/rejected": -143.77194213867188, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6340055465698242, + "rewards/margins": 6.512142181396484, + "rewards/rejected": -7.146147727966309, + "step": 1883 + }, + { + "epoch": 3.02, + "learning_rate": 3.689060642092747e-07, + "logits/chosen": -1.6432958841323853, + "logits/rejected": -1.6737364530563354, + "logps/chosen": -119.97509765625, + "logps/rejected": -172.07504272460938, + "loss": 0.0538, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0013494491577148, + "rewards/margins": 6.485969066619873, + "rewards/rejected": -7.487318992614746, + "step": 1884 + }, + { + "epoch": 3.03, + "learning_rate": 3.688069758224336e-07, + "logits/chosen": -1.580551028251648, + "logits/rejected": -1.4564507007598877, + "logps/chosen": -122.84117126464844, + "logps/rejected": -156.1721954345703, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7995476722717285, + "rewards/margins": 4.610548973083496, + "rewards/rejected": -6.410096645355225, + "step": 1885 + }, + { + "epoch": 3.03, + "learning_rate": 3.6870788743559255e-07, + "logits/chosen": -1.5368965864181519, + "logits/rejected": -1.5106050968170166, + "logps/chosen": -81.06300354003906, + "logps/rejected": -120.94497680664062, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6516866683959961, + "rewards/margins": 5.176954746246338, + "rewards/rejected": -5.828641414642334, + "step": 1886 + }, + { + "epoch": 3.03, + "learning_rate": 3.6860879904875146e-07, + "logits/chosen": -1.5713231563568115, + "logits/rejected": -1.5903701782226562, + "logps/chosen": -92.16338348388672, + "logps/rejected": -160.01678466796875, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.40602970123291, + "rewards/margins": 6.362042427062988, + "rewards/rejected": -8.768072128295898, + "step": 1887 + }, + { + "epoch": 3.03, + "learning_rate": 3.6850971066191037e-07, + "logits/chosen": -1.8924490213394165, + "logits/rejected": -1.7989635467529297, + "logps/chosen": -118.22969818115234, + "logps/rejected": -151.23269653320312, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7644065618515015, + "rewards/margins": 4.755973815917969, + "rewards/rejected": -6.520380020141602, + "step": 1888 + }, + { + "epoch": 3.03, + "learning_rate": 3.684106222750694e-07, + "logits/chosen": -1.5663931369781494, + "logits/rejected": -1.5189484357833862, + "logps/chosen": -83.39598083496094, + "logps/rejected": -126.71826171875, + "loss": 0.0357, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3664265871047974, + "rewards/margins": 3.816039562225342, + "rewards/rejected": -5.182466506958008, + "step": 1889 + }, + { + "epoch": 3.03, + "learning_rate": 3.683115338882283e-07, + "logits/chosen": -1.7923333644866943, + "logits/rejected": -1.8274815082550049, + "logps/chosen": -108.31106567382812, + "logps/rejected": -163.31524658203125, + "loss": 0.0374, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0676989555358887, + "rewards/margins": 4.607069969177246, + "rewards/rejected": -5.674768447875977, + "step": 1890 + }, + { + "epoch": 3.04, + "learning_rate": 3.6821244550138724e-07, + "logits/chosen": -1.6667779684066772, + "logits/rejected": -1.6493017673492432, + "logps/chosen": -105.67617797851562, + "logps/rejected": -148.6350555419922, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.107917070388794, + "rewards/margins": 5.460620880126953, + "rewards/rejected": -6.568537712097168, + "step": 1891 + }, + { + "epoch": 3.04, + "learning_rate": 3.6811335711454615e-07, + "logits/chosen": -1.7100728750228882, + "logits/rejected": -1.6290254592895508, + "logps/chosen": -104.39954376220703, + "logps/rejected": -146.69679260253906, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34755825996398926, + "rewards/margins": 5.27253532409668, + "rewards/rejected": -5.620093822479248, + "step": 1892 + }, + { + "epoch": 3.04, + "learning_rate": 3.6801426872770506e-07, + "logits/chosen": -1.6277227401733398, + "logits/rejected": -1.5788722038269043, + "logps/chosen": -85.88333129882812, + "logps/rejected": -155.81558227539062, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7997020483016968, + "rewards/margins": 5.331783294677734, + "rewards/rejected": -7.1314849853515625, + "step": 1893 + }, + { + "epoch": 3.04, + "learning_rate": 3.6791518034086407e-07, + "logits/chosen": -1.6799894571304321, + "logits/rejected": -1.692346215248108, + "logps/chosen": -80.70146942138672, + "logps/rejected": -136.03201293945312, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8223770260810852, + "rewards/margins": 6.562894344329834, + "rewards/rejected": -7.3852715492248535, + "step": 1894 + }, + { + "epoch": 3.04, + "learning_rate": 3.67816091954023e-07, + "logits/chosen": -1.6415435075759888, + "logits/rejected": -1.5249104499816895, + "logps/chosen": -103.30278015136719, + "logps/rejected": -126.67869567871094, + "loss": 0.094, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2904064655303955, + "rewards/margins": 2.6864137649536133, + "rewards/rejected": -3.976820230484009, + "step": 1895 + }, + { + "epoch": 3.04, + "learning_rate": 3.677170035671819e-07, + "logits/chosen": -1.4976716041564941, + "logits/rejected": -1.5245343446731567, + "logps/chosen": -81.58033752441406, + "logps/rejected": -145.6389617919922, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5117979049682617, + "rewards/margins": 5.154696464538574, + "rewards/rejected": -7.666494369506836, + "step": 1896 + }, + { + "epoch": 3.04, + "learning_rate": 3.6761791518034084e-07, + "logits/chosen": -1.7200483083724976, + "logits/rejected": -1.620421290397644, + "logps/chosen": -99.7545166015625, + "logps/rejected": -111.57674407958984, + "loss": 0.0511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4283807873725891, + "rewards/margins": 3.263841390609741, + "rewards/rejected": -3.6922223567962646, + "step": 1897 + }, + { + "epoch": 3.05, + "learning_rate": 3.6751882679349975e-07, + "logits/chosen": -1.6229528188705444, + "logits/rejected": -1.6154208183288574, + "logps/chosen": -96.90663146972656, + "logps/rejected": -154.357421875, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6037845611572266, + "rewards/margins": 6.531915664672852, + "rewards/rejected": -8.135700225830078, + "step": 1898 + }, + { + "epoch": 3.05, + "learning_rate": 3.6741973840665876e-07, + "logits/chosen": -1.87437903881073, + "logits/rejected": -1.8073971271514893, + "logps/chosen": -110.45804595947266, + "logps/rejected": -170.51812744140625, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1526765823364258, + "rewards/margins": 7.192652702331543, + "rewards/rejected": -8.345329284667969, + "step": 1899 + }, + { + "epoch": 3.05, + "learning_rate": 3.6732065001981767e-07, + "logits/chosen": -1.6587483882904053, + "logits/rejected": -1.5802404880523682, + "logps/chosen": -111.92886352539062, + "logps/rejected": -137.61181640625, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6742111444473267, + "rewards/margins": 3.9953184127807617, + "rewards/rejected": -5.669529914855957, + "step": 1900 + }, + { + "epoch": 3.05, + "learning_rate": 3.672215616329766e-07, + "logits/chosen": -1.6973333358764648, + "logits/rejected": -1.6282687187194824, + "logps/chosen": -110.10041046142578, + "logps/rejected": -157.08056640625, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.484410047531128, + "rewards/margins": 5.230213165283203, + "rewards/rejected": -7.71462345123291, + "step": 1901 + }, + { + "epoch": 3.05, + "learning_rate": 3.6712247324613554e-07, + "logits/chosen": -1.6041487455368042, + "logits/rejected": -1.7238198518753052, + "logps/chosen": -95.08317565917969, + "logps/rejected": -164.3568115234375, + "loss": 0.0533, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4543869495391846, + "rewards/margins": 3.182302951812744, + "rewards/rejected": -4.63668966293335, + "step": 1902 + }, + { + "epoch": 3.05, + "learning_rate": 3.6702338485929444e-07, + "logits/chosen": -1.6339761018753052, + "logits/rejected": -1.574198842048645, + "logps/chosen": -82.62008666992188, + "logps/rejected": -141.4514617919922, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2512325048446655, + "rewards/margins": 5.288807392120361, + "rewards/rejected": -6.540039539337158, + "step": 1903 + }, + { + "epoch": 3.06, + "learning_rate": 3.6692429647245346e-07, + "logits/chosen": -1.6735442876815796, + "logits/rejected": -1.653478980064392, + "logps/chosen": -103.46018981933594, + "logps/rejected": -184.48081970214844, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0918941497802734, + "rewards/margins": 7.652433395385742, + "rewards/rejected": -9.744327545166016, + "step": 1904 + }, + { + "epoch": 3.06, + "learning_rate": 3.6682520808561236e-07, + "logits/chosen": -1.7127230167388916, + "logits/rejected": -1.6702903509140015, + "logps/chosen": -99.74884033203125, + "logps/rejected": -158.19534301757812, + "loss": 0.061, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1497256755828857, + "rewards/margins": 5.801769256591797, + "rewards/rejected": -7.951494216918945, + "step": 1905 + }, + { + "epoch": 3.06, + "learning_rate": 3.6672611969877127e-07, + "logits/chosen": -1.7548975944519043, + "logits/rejected": -1.7401463985443115, + "logps/chosen": -98.95782470703125, + "logps/rejected": -155.8157958984375, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.436131715774536, + "rewards/margins": 6.115757465362549, + "rewards/rejected": -8.551889419555664, + "step": 1906 + }, + { + "epoch": 3.06, + "learning_rate": 3.6662703131193023e-07, + "logits/chosen": -1.6533514261245728, + "logits/rejected": -1.760128378868103, + "logps/chosen": -118.9019546508789, + "logps/rejected": -162.83978271484375, + "loss": 0.0323, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.769501209259033, + "rewards/margins": 4.0259199142456055, + "rewards/rejected": -6.7954206466674805, + "step": 1907 + }, + { + "epoch": 3.06, + "learning_rate": 3.6652794292508914e-07, + "logits/chosen": -1.7508225440979004, + "logits/rejected": -1.8303509950637817, + "logps/chosen": -128.1343536376953, + "logps/rejected": -178.4276885986328, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.702071189880371, + "rewards/margins": 4.746828556060791, + "rewards/rejected": -7.448899269104004, + "step": 1908 + }, + { + "epoch": 3.06, + "learning_rate": 3.664288545382481e-07, + "logits/chosen": -1.7263387441635132, + "logits/rejected": -1.7603390216827393, + "logps/chosen": -144.83370971679688, + "logps/rejected": -182.39764404296875, + "loss": 0.0603, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4596524238586426, + "rewards/margins": 4.986607074737549, + "rewards/rejected": -7.44625997543335, + "step": 1909 + }, + { + "epoch": 3.07, + "learning_rate": 3.6632976615140706e-07, + "logits/chosen": -1.8220547437667847, + "logits/rejected": -1.7363783121109009, + "logps/chosen": -102.36355590820312, + "logps/rejected": -185.99185180664062, + "loss": 0.0754, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8659863471984863, + "rewards/margins": 8.897928237915039, + "rewards/rejected": -10.763915061950684, + "step": 1910 + }, + { + "epoch": 3.07, + "learning_rate": 3.6623067776456596e-07, + "logits/chosen": -1.6040081977844238, + "logits/rejected": -1.6430306434631348, + "logps/chosen": -86.89701843261719, + "logps/rejected": -132.38198852539062, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3414196968078613, + "rewards/margins": 5.169520854949951, + "rewards/rejected": -6.5109405517578125, + "step": 1911 + }, + { + "epoch": 3.07, + "learning_rate": 3.661315893777249e-07, + "logits/chosen": -1.688725471496582, + "logits/rejected": -1.7015926837921143, + "logps/chosen": -99.93431091308594, + "logps/rejected": -181.14259338378906, + "loss": 0.0479, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2033534049987793, + "rewards/margins": 5.790416240692139, + "rewards/rejected": -6.993769645690918, + "step": 1912 + }, + { + "epoch": 3.07, + "learning_rate": 3.6603250099088383e-07, + "logits/chosen": -1.7488212585449219, + "logits/rejected": -1.795146107673645, + "logps/chosen": -110.70306396484375, + "logps/rejected": -145.4078369140625, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3580440282821655, + "rewards/margins": 4.888909339904785, + "rewards/rejected": -6.246953010559082, + "step": 1913 + }, + { + "epoch": 3.07, + "learning_rate": 3.659334126040428e-07, + "logits/chosen": -1.6552610397338867, + "logits/rejected": -1.662558913230896, + "logps/chosen": -99.43937683105469, + "logps/rejected": -187.4552459716797, + "loss": 0.0492, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2031972408294678, + "rewards/margins": 6.932158946990967, + "rewards/rejected": -9.135356903076172, + "step": 1914 + }, + { + "epoch": 3.07, + "learning_rate": 3.6583432421720175e-07, + "logits/chosen": -1.5724084377288818, + "logits/rejected": -1.692056655883789, + "logps/chosen": -113.00341796875, + "logps/rejected": -215.00323486328125, + "loss": 0.1127, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.779017210006714, + "rewards/margins": 5.822019577026367, + "rewards/rejected": -8.601036071777344, + "step": 1915 + }, + { + "epoch": 3.08, + "learning_rate": 3.6573523583036066e-07, + "logits/chosen": -1.7610795497894287, + "logits/rejected": -1.7060661315917969, + "logps/chosen": -127.24043273925781, + "logps/rejected": -183.40130615234375, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5935006141662598, + "rewards/margins": 6.849026679992676, + "rewards/rejected": -9.442526817321777, + "step": 1916 + }, + { + "epoch": 3.08, + "learning_rate": 3.656361474435196e-07, + "logits/chosen": -1.7431848049163818, + "logits/rejected": -1.6702219247817993, + "logps/chosen": -95.4879150390625, + "logps/rejected": -168.80035400390625, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5997374057769775, + "rewards/margins": 7.886066436767578, + "rewards/rejected": -8.485803604125977, + "step": 1917 + }, + { + "epoch": 3.08, + "learning_rate": 3.655370590566785e-07, + "logits/chosen": -1.7646522521972656, + "logits/rejected": -1.642569899559021, + "logps/chosen": -103.41246032714844, + "logps/rejected": -132.5789337158203, + "loss": 0.0992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6851685047149658, + "rewards/margins": 4.765220642089844, + "rewards/rejected": -5.450389385223389, + "step": 1918 + }, + { + "epoch": 3.08, + "learning_rate": 3.654379706698375e-07, + "logits/chosen": -1.71641206741333, + "logits/rejected": -1.7225399017333984, + "logps/chosen": -117.28567504882812, + "logps/rejected": -180.14553833007812, + "loss": 0.045, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6157400608062744, + "rewards/margins": 5.102658748626709, + "rewards/rejected": -8.718399047851562, + "step": 1919 + }, + { + "epoch": 3.08, + "learning_rate": 3.6533888228299644e-07, + "logits/chosen": -1.6854426860809326, + "logits/rejected": -1.7167248725891113, + "logps/chosen": -101.82542419433594, + "logps/rejected": -177.07296752929688, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0744495391845703, + "rewards/margins": 7.362520217895508, + "rewards/rejected": -9.436969757080078, + "step": 1920 + }, + { + "epoch": 3.08, + "learning_rate": 3.6523979389615535e-07, + "logits/chosen": -1.6945387125015259, + "logits/rejected": -1.7681795358657837, + "logps/chosen": -131.51300048828125, + "logps/rejected": -178.84646606445312, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.876291036605835, + "rewards/margins": 4.3888115882873535, + "rewards/rejected": -8.26510238647461, + "step": 1921 + }, + { + "epoch": 3.09, + "learning_rate": 3.651407055093143e-07, + "logits/chosen": -1.6427534818649292, + "logits/rejected": -1.6236960887908936, + "logps/chosen": -95.70513916015625, + "logps/rejected": -193.684326171875, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1455950736999512, + "rewards/margins": 10.101436614990234, + "rewards/rejected": -11.247031211853027, + "step": 1922 + }, + { + "epoch": 3.09, + "learning_rate": 3.650416171224732e-07, + "logits/chosen": -1.6943484544754028, + "logits/rejected": -1.7048935890197754, + "logps/chosen": -82.8721694946289, + "logps/rejected": -149.16055297851562, + "loss": 0.0573, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3443591594696045, + "rewards/margins": 6.0919013023376465, + "rewards/rejected": -7.436261177062988, + "step": 1923 + }, + { + "epoch": 3.09, + "learning_rate": 3.649425287356322e-07, + "logits/chosen": -1.5582890510559082, + "logits/rejected": -1.5721954107284546, + "logps/chosen": -116.88792419433594, + "logps/rejected": -179.40170288085938, + "loss": 0.0661, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.71346378326416, + "rewards/margins": 6.799563407897949, + "rewards/rejected": -9.51302719116211, + "step": 1924 + }, + { + "epoch": 3.09, + "learning_rate": 3.6484344034879114e-07, + "logits/chosen": -1.7036488056182861, + "logits/rejected": -1.7554304599761963, + "logps/chosen": -103.85214233398438, + "logps/rejected": -205.3756561279297, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.534808397293091, + "rewards/margins": 7.539812088012695, + "rewards/rejected": -10.074620246887207, + "step": 1925 + }, + { + "epoch": 3.09, + "learning_rate": 3.6474435196195004e-07, + "logits/chosen": -1.5587064027786255, + "logits/rejected": -1.623528242111206, + "logps/chosen": -97.46810913085938, + "logps/rejected": -174.80950927734375, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5073888301849365, + "rewards/margins": 7.149669647216797, + "rewards/rejected": -9.657058715820312, + "step": 1926 + }, + { + "epoch": 3.09, + "learning_rate": 3.64645263575109e-07, + "logits/chosen": -1.5745210647583008, + "logits/rejected": -1.5146420001983643, + "logps/chosen": -94.89555358886719, + "logps/rejected": -168.1523895263672, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.169896364212036, + "rewards/margins": 8.105024337768555, + "rewards/rejected": -10.274920463562012, + "step": 1927 + }, + { + "epoch": 3.09, + "learning_rate": 3.645461751882679e-07, + "logits/chosen": -1.7285041809082031, + "logits/rejected": -1.730893850326538, + "logps/chosen": -118.09146118164062, + "logps/rejected": -132.76002502441406, + "loss": 0.043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.500150442123413, + "rewards/margins": 3.345635414123535, + "rewards/rejected": -4.845786094665527, + "step": 1928 + }, + { + "epoch": 3.1, + "learning_rate": 3.6444708680142687e-07, + "logits/chosen": -1.7215481996536255, + "logits/rejected": -1.7563947439193726, + "logps/chosen": -115.05442810058594, + "logps/rejected": -214.53884887695312, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7084250450134277, + "rewards/margins": 8.851237297058105, + "rewards/rejected": -10.559661865234375, + "step": 1929 + }, + { + "epoch": 3.1, + "learning_rate": 3.643479984145858e-07, + "logits/chosen": -1.717126727104187, + "logits/rejected": -1.6685242652893066, + "logps/chosen": -116.08157348632812, + "logps/rejected": -148.3933868408203, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.075526475906372, + "rewards/margins": 5.374039173126221, + "rewards/rejected": -6.449565410614014, + "step": 1930 + }, + { + "epoch": 3.1, + "learning_rate": 3.6424891002774474e-07, + "logits/chosen": -1.665290117263794, + "logits/rejected": -1.6367485523223877, + "logps/chosen": -130.36013793945312, + "logps/rejected": -177.5913848876953, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.711521625518799, + "rewards/margins": 6.114994049072266, + "rewards/rejected": -9.826516151428223, + "step": 1931 + }, + { + "epoch": 3.1, + "learning_rate": 3.641498216409037e-07, + "logits/chosen": -1.6750272512435913, + "logits/rejected": -1.644675612449646, + "logps/chosen": -98.60137939453125, + "logps/rejected": -120.58511352539062, + "loss": 0.0379, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7334991693496704, + "rewards/margins": 3.1803090572357178, + "rewards/rejected": -4.913808345794678, + "step": 1932 + }, + { + "epoch": 3.1, + "learning_rate": 3.640507332540626e-07, + "logits/chosen": -1.5439561605453491, + "logits/rejected": -1.6064728498458862, + "logps/chosen": -116.88392639160156, + "logps/rejected": -194.9021453857422, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.370835065841675, + "rewards/margins": 6.219663619995117, + "rewards/rejected": -9.590497970581055, + "step": 1933 + }, + { + "epoch": 3.1, + "learning_rate": 3.639516448672215e-07, + "logits/chosen": -1.7127487659454346, + "logits/rejected": -1.7000490427017212, + "logps/chosen": -123.20008850097656, + "logps/rejected": -201.34494018554688, + "loss": 0.0377, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6485095024108887, + "rewards/margins": 7.709593772888184, + "rewards/rejected": -10.358102798461914, + "step": 1934 + }, + { + "epoch": 3.11, + "learning_rate": 3.6385255648038047e-07, + "logits/chosen": -1.46981680393219, + "logits/rejected": -1.5378495454788208, + "logps/chosen": -80.08000946044922, + "logps/rejected": -135.05169677734375, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0323357582092285, + "rewards/margins": 5.417774677276611, + "rewards/rejected": -6.45011043548584, + "step": 1935 + }, + { + "epoch": 3.11, + "learning_rate": 3.6375346809353943e-07, + "logits/chosen": -1.7383027076721191, + "logits/rejected": -1.7675490379333496, + "logps/chosen": -124.0751953125, + "logps/rejected": -205.1046600341797, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.445425033569336, + "rewards/margins": 5.516932487487793, + "rewards/rejected": -8.962356567382812, + "step": 1936 + }, + { + "epoch": 3.11, + "learning_rate": 3.636543797066984e-07, + "logits/chosen": -1.8075617551803589, + "logits/rejected": -1.8700815439224243, + "logps/chosen": -114.66032409667969, + "logps/rejected": -173.7946014404297, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7023115158081055, + "rewards/margins": 4.775672912597656, + "rewards/rejected": -7.47798490524292, + "step": 1937 + }, + { + "epoch": 3.11, + "learning_rate": 3.635552913198573e-07, + "logits/chosen": -1.8957135677337646, + "logits/rejected": -1.8277348279953003, + "logps/chosen": -123.88986206054688, + "logps/rejected": -168.5408935546875, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.42256760597229, + "rewards/margins": 5.346343994140625, + "rewards/rejected": -8.768912315368652, + "step": 1938 + }, + { + "epoch": 3.11, + "learning_rate": 3.634562029330162e-07, + "logits/chosen": -1.6349012851715088, + "logits/rejected": -1.6503089666366577, + "logps/chosen": -84.57666778564453, + "logps/rejected": -171.06443786621094, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8964742422103882, + "rewards/margins": 7.850900650024414, + "rewards/rejected": -8.74737548828125, + "step": 1939 + }, + { + "epoch": 3.11, + "learning_rate": 3.6335711454617516e-07, + "logits/chosen": -1.5477144718170166, + "logits/rejected": -1.6328811645507812, + "logps/chosen": -133.12603759765625, + "logps/rejected": -205.72183227539062, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.266352891921997, + "rewards/margins": 7.226635932922363, + "rewards/rejected": -10.492989540100098, + "step": 1940 + }, + { + "epoch": 3.12, + "learning_rate": 3.632580261593341e-07, + "logits/chosen": -1.7440824508666992, + "logits/rejected": -1.8198513984680176, + "logps/chosen": -90.61750793457031, + "logps/rejected": -216.2113037109375, + "loss": 0.042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024347588419914246, + "rewards/margins": 12.375530242919922, + "rewards/rejected": -12.399877548217773, + "step": 1941 + }, + { + "epoch": 3.12, + "learning_rate": 3.631589377724931e-07, + "logits/chosen": -1.4591419696807861, + "logits/rejected": -1.4938799142837524, + "logps/chosen": -107.21939086914062, + "logps/rejected": -197.74029541015625, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.802433967590332, + "rewards/margins": 8.58193302154541, + "rewards/rejected": -11.384367942810059, + "step": 1942 + }, + { + "epoch": 3.12, + "learning_rate": 3.63059849385652e-07, + "logits/chosen": -1.6882543563842773, + "logits/rejected": -1.7257355451583862, + "logps/chosen": -97.52288818359375, + "logps/rejected": -158.1234893798828, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0082539319992065, + "rewards/margins": 5.645055294036865, + "rewards/rejected": -6.653309345245361, + "step": 1943 + }, + { + "epoch": 3.12, + "learning_rate": 3.629607609988109e-07, + "logits/chosen": -1.7466353178024292, + "logits/rejected": -1.7213224172592163, + "logps/chosen": -83.61992645263672, + "logps/rejected": -208.62783813476562, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03688819706439972, + "rewards/margins": 7.528218746185303, + "rewards/rejected": -7.491330146789551, + "step": 1944 + }, + { + "epoch": 3.12, + "learning_rate": 3.6286167261196985e-07, + "logits/chosen": -1.7166410684585571, + "logits/rejected": -1.6794699430465698, + "logps/chosen": -133.43780517578125, + "logps/rejected": -174.02767944335938, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8693768978118896, + "rewards/margins": 5.477082252502441, + "rewards/rejected": -9.346458435058594, + "step": 1945 + }, + { + "epoch": 3.12, + "learning_rate": 3.6276258422512876e-07, + "logits/chosen": -1.748315453529358, + "logits/rejected": -1.6389821767807007, + "logps/chosen": -127.88972473144531, + "logps/rejected": -150.11944580078125, + "loss": 0.026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0441665649414062, + "rewards/margins": 4.500893592834473, + "rewards/rejected": -7.545060157775879, + "step": 1946 + }, + { + "epoch": 3.13, + "learning_rate": 3.626634958382878e-07, + "logits/chosen": -1.6809368133544922, + "logits/rejected": -1.7901078462600708, + "logps/chosen": -77.57942199707031, + "logps/rejected": -152.99945068359375, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8548580408096313, + "rewards/margins": 4.466367244720459, + "rewards/rejected": -6.321225643157959, + "step": 1947 + }, + { + "epoch": 3.13, + "learning_rate": 3.625644074514467e-07, + "logits/chosen": -1.7235873937606812, + "logits/rejected": -1.7226086854934692, + "logps/chosen": -78.4059829711914, + "logps/rejected": -128.07118225097656, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2847692966461182, + "rewards/margins": 4.415901184082031, + "rewards/rejected": -5.70067024230957, + "step": 1948 + }, + { + "epoch": 3.13, + "learning_rate": 3.624653190646056e-07, + "logits/chosen": -1.6852827072143555, + "logits/rejected": -1.6790168285369873, + "logps/chosen": -150.095703125, + "logps/rejected": -216.11949157714844, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3690452575683594, + "rewards/margins": 7.016724586486816, + "rewards/rejected": -10.385769844055176, + "step": 1949 + }, + { + "epoch": 3.13, + "learning_rate": 3.6236623067776455e-07, + "logits/chosen": -1.823380947113037, + "logits/rejected": -1.6570295095443726, + "logps/chosen": -123.5429458618164, + "logps/rejected": -162.1097412109375, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4376399517059326, + "rewards/margins": 5.464521408081055, + "rewards/rejected": -7.902161121368408, + "step": 1950 + }, + { + "epoch": 3.13, + "learning_rate": 3.6226714229092345e-07, + "logits/chosen": -1.6425719261169434, + "logits/rejected": -1.6099767684936523, + "logps/chosen": -141.56192016601562, + "logps/rejected": -216.4085693359375, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.039567470550537, + "rewards/margins": 7.744546890258789, + "rewards/rejected": -12.784114837646484, + "step": 1951 + }, + { + "epoch": 3.13, + "learning_rate": 3.6216805390408247e-07, + "logits/chosen": -1.8270808458328247, + "logits/rejected": -1.8234386444091797, + "logps/chosen": -126.98995971679688, + "logps/rejected": -173.7485809326172, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.884312152862549, + "rewards/margins": 3.3831474781036377, + "rewards/rejected": -6.267459869384766, + "step": 1952 + }, + { + "epoch": 3.13, + "learning_rate": 3.620689655172414e-07, + "logits/chosen": -1.5415632724761963, + "logits/rejected": -1.6515536308288574, + "logps/chosen": -111.42669677734375, + "logps/rejected": -182.29058837890625, + "loss": 0.0281, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.138338088989258, + "rewards/margins": 5.588000774383545, + "rewards/rejected": -8.726339340209961, + "step": 1953 + }, + { + "epoch": 3.14, + "learning_rate": 3.619698771304003e-07, + "logits/chosen": -1.5850507020950317, + "logits/rejected": -1.5958012342453003, + "logps/chosen": -116.91487884521484, + "logps/rejected": -195.30686950683594, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.919752597808838, + "rewards/margins": 7.000920295715332, + "rewards/rejected": -10.920673370361328, + "step": 1954 + }, + { + "epoch": 3.14, + "learning_rate": 3.6187078874355924e-07, + "logits/chosen": -1.7460577487945557, + "logits/rejected": -1.7089674472808838, + "logps/chosen": -124.64060974121094, + "logps/rejected": -185.37905883789062, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.035760879516602, + "rewards/margins": 6.420534133911133, + "rewards/rejected": -10.456295013427734, + "step": 1955 + }, + { + "epoch": 3.14, + "learning_rate": 3.6177170035671815e-07, + "logits/chosen": -1.6647393703460693, + "logits/rejected": -1.706511378288269, + "logps/chosen": -147.89768981933594, + "logps/rejected": -198.69247436523438, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0797901153564453, + "rewards/margins": 5.071946144104004, + "rewards/rejected": -7.151736259460449, + "step": 1956 + }, + { + "epoch": 3.14, + "learning_rate": 3.6167261196987716e-07, + "logits/chosen": -1.6850024461746216, + "logits/rejected": -1.684111475944519, + "logps/chosen": -105.60826110839844, + "logps/rejected": -175.80361938476562, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4007818698883057, + "rewards/margins": 7.041140079498291, + "rewards/rejected": -9.441922187805176, + "step": 1957 + }, + { + "epoch": 3.14, + "learning_rate": 3.6157352358303607e-07, + "logits/chosen": -1.5616878271102905, + "logits/rejected": -1.5326266288757324, + "logps/chosen": -68.1089096069336, + "logps/rejected": -115.59246826171875, + "loss": 0.0404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35430896282196045, + "rewards/margins": 4.422353744506836, + "rewards/rejected": -4.776662826538086, + "step": 1958 + }, + { + "epoch": 3.14, + "learning_rate": 3.61474435196195e-07, + "logits/chosen": -1.7346173524856567, + "logits/rejected": -1.7499836683273315, + "logps/chosen": -97.03572845458984, + "logps/rejected": -184.90524291992188, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6955863237380981, + "rewards/margins": 8.96204948425293, + "rewards/rejected": -10.657635688781738, + "step": 1959 + }, + { + "epoch": 3.15, + "learning_rate": 3.6137534680935393e-07, + "logits/chosen": -1.772837519645691, + "logits/rejected": -1.7603347301483154, + "logps/chosen": -65.09259033203125, + "logps/rejected": -137.53564453125, + "loss": 0.066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1786889135837555, + "rewards/margins": 7.673479080200195, + "rewards/rejected": -7.852168083190918, + "step": 1960 + }, + { + "epoch": 3.15, + "learning_rate": 3.6127625842251284e-07, + "logits/chosen": -1.5729438066482544, + "logits/rejected": -1.4959604740142822, + "logps/chosen": -86.2054443359375, + "logps/rejected": -184.04727172851562, + "loss": 0.0666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12659834325313568, + "rewards/margins": 10.28763198852539, + "rewards/rejected": -10.414229393005371, + "step": 1961 + }, + { + "epoch": 3.15, + "learning_rate": 3.6117717003567185e-07, + "logits/chosen": -1.7164199352264404, + "logits/rejected": -1.6199469566345215, + "logps/chosen": -136.36248779296875, + "logps/rejected": -187.34881591796875, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8692572116851807, + "rewards/margins": 6.836688995361328, + "rewards/rejected": -9.705946922302246, + "step": 1962 + }, + { + "epoch": 3.15, + "learning_rate": 3.6107808164883076e-07, + "logits/chosen": -1.7264463901519775, + "logits/rejected": -1.685098648071289, + "logps/chosen": -94.50041198730469, + "logps/rejected": -181.68499755859375, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5757334232330322, + "rewards/margins": 8.881714820861816, + "rewards/rejected": -10.45744800567627, + "step": 1963 + }, + { + "epoch": 3.15, + "learning_rate": 3.6097899326198967e-07, + "logits/chosen": -1.6349058151245117, + "logits/rejected": -1.7028789520263672, + "logps/chosen": -102.79163360595703, + "logps/rejected": -198.27064514160156, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6038267612457275, + "rewards/margins": 8.8462553024292, + "rewards/rejected": -11.450082778930664, + "step": 1964 + }, + { + "epoch": 3.15, + "learning_rate": 3.608799048751486e-07, + "logits/chosen": -1.7351312637329102, + "logits/rejected": -1.723825454711914, + "logps/chosen": -104.37217712402344, + "logps/rejected": -152.84677124023438, + "loss": 0.1197, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8030707836151123, + "rewards/margins": 4.907454490661621, + "rewards/rejected": -7.710525989532471, + "step": 1965 + }, + { + "epoch": 3.16, + "learning_rate": 3.6078081648830753e-07, + "logits/chosen": -1.638871192932129, + "logits/rejected": -1.560049295425415, + "logps/chosen": -110.66050720214844, + "logps/rejected": -142.51060485839844, + "loss": 0.0847, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7966654300689697, + "rewards/margins": 4.6570281982421875, + "rewards/rejected": -7.453693389892578, + "step": 1966 + }, + { + "epoch": 3.16, + "learning_rate": 3.6068172810146644e-07, + "logits/chosen": -1.6663594245910645, + "logits/rejected": -1.719827651977539, + "logps/chosen": -98.63490295410156, + "logps/rejected": -183.7691650390625, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.237114906311035, + "rewards/margins": 6.978353500366211, + "rewards/rejected": -9.215468406677246, + "step": 1967 + }, + { + "epoch": 3.16, + "learning_rate": 3.6058263971462545e-07, + "logits/chosen": -1.6410138607025146, + "logits/rejected": -1.6698615550994873, + "logps/chosen": -114.33976745605469, + "logps/rejected": -172.77833557128906, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.735752582550049, + "rewards/margins": 5.422980308532715, + "rewards/rejected": -8.158733367919922, + "step": 1968 + }, + { + "epoch": 3.16, + "learning_rate": 3.6048355132778436e-07, + "logits/chosen": -1.6621994972229004, + "logits/rejected": -1.6747206449508667, + "logps/chosen": -129.36508178710938, + "logps/rejected": -202.16140747070312, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.641920328140259, + "rewards/margins": 5.4655938148498535, + "rewards/rejected": -9.107514381408691, + "step": 1969 + }, + { + "epoch": 3.16, + "learning_rate": 3.603844629409433e-07, + "logits/chosen": -1.7725540399551392, + "logits/rejected": -1.7223410606384277, + "logps/chosen": -84.36936950683594, + "logps/rejected": -165.73927307128906, + "loss": 0.0281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3146694302558899, + "rewards/margins": 7.3898396492004395, + "rewards/rejected": -7.704509735107422, + "step": 1970 + }, + { + "epoch": 3.16, + "learning_rate": 3.602853745541022e-07, + "logits/chosen": -1.573026180267334, + "logits/rejected": -1.6656849384307861, + "logps/chosen": -96.93243408203125, + "logps/rejected": -174.966064453125, + "loss": 0.1155, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5613150596618652, + "rewards/margins": 6.243129730224609, + "rewards/rejected": -9.804445266723633, + "step": 1971 + }, + { + "epoch": 3.17, + "learning_rate": 3.6018628616726113e-07, + "logits/chosen": -1.7056617736816406, + "logits/rejected": -1.749609351158142, + "logps/chosen": -125.49136352539062, + "logps/rejected": -199.64938354492188, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.734984874725342, + "rewards/margins": 6.830042839050293, + "rewards/rejected": -11.565028190612793, + "step": 1972 + }, + { + "epoch": 3.17, + "learning_rate": 3.6008719778042015e-07, + "logits/chosen": -1.6128945350646973, + "logits/rejected": -1.7019072771072388, + "logps/chosen": -108.23746490478516, + "logps/rejected": -201.17156982421875, + "loss": 0.0922, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.24153470993042, + "rewards/margins": 6.931545734405518, + "rewards/rejected": -10.173080444335938, + "step": 1973 + }, + { + "epoch": 3.17, + "learning_rate": 3.5998810939357905e-07, + "logits/chosen": -1.805866003036499, + "logits/rejected": -1.8104450702667236, + "logps/chosen": -106.85108947753906, + "logps/rejected": -187.06568908691406, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.86531662940979, + "rewards/margins": 8.228012084960938, + "rewards/rejected": -10.093328475952148, + "step": 1974 + }, + { + "epoch": 3.17, + "learning_rate": 3.59889021006738e-07, + "logits/chosen": -1.631588101387024, + "logits/rejected": -1.6011061668395996, + "logps/chosen": -123.61515808105469, + "logps/rejected": -213.03077697753906, + "loss": 0.0505, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7904090881347656, + "rewards/margins": 8.409296989440918, + "rewards/rejected": -12.19970703125, + "step": 1975 + }, + { + "epoch": 3.17, + "learning_rate": 3.597899326198969e-07, + "logits/chosen": -1.624626636505127, + "logits/rejected": -1.605358362197876, + "logps/chosen": -117.13568878173828, + "logps/rejected": -158.8237762451172, + "loss": 0.0624, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0105838775634766, + "rewards/margins": 4.174419403076172, + "rewards/rejected": -7.185003280639648, + "step": 1976 + }, + { + "epoch": 3.17, + "learning_rate": 3.596908442330558e-07, + "logits/chosen": -1.8333104848861694, + "logits/rejected": -1.8280823230743408, + "logps/chosen": -108.53047943115234, + "logps/rejected": -177.74520874023438, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5441758632659912, + "rewards/margins": 6.97042989730835, + "rewards/rejected": -8.514606475830078, + "step": 1977 + }, + { + "epoch": 3.17, + "learning_rate": 3.5959175584621484e-07, + "logits/chosen": -1.7583786249160767, + "logits/rejected": -1.7734776735305786, + "logps/chosen": -131.54200744628906, + "logps/rejected": -220.95298767089844, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2204041481018066, + "rewards/margins": 7.116202354431152, + "rewards/rejected": -9.336606979370117, + "step": 1978 + }, + { + "epoch": 3.18, + "learning_rate": 3.5949266745937375e-07, + "logits/chosen": -1.510981798171997, + "logits/rejected": -1.580195426940918, + "logps/chosen": -71.58564758300781, + "logps/rejected": -175.59471130371094, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4252915382385254, + "rewards/margins": 7.644321441650391, + "rewards/rejected": -9.069613456726074, + "step": 1979 + }, + { + "epoch": 3.18, + "learning_rate": 3.593935790725327e-07, + "logits/chosen": -1.5241928100585938, + "logits/rejected": -1.5479671955108643, + "logps/chosen": -95.62603759765625, + "logps/rejected": -178.81036376953125, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6505924463272095, + "rewards/margins": 7.531933784484863, + "rewards/rejected": -9.182526588439941, + "step": 1980 + }, + { + "epoch": 3.18, + "learning_rate": 3.592944906856916e-07, + "logits/chosen": -1.7434297800064087, + "logits/rejected": -1.7364239692687988, + "logps/chosen": -100.96076202392578, + "logps/rejected": -164.77230834960938, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4954776763916016, + "rewards/margins": 6.1485066413879395, + "rewards/rejected": -8.643983840942383, + "step": 1981 + }, + { + "epoch": 3.18, + "learning_rate": 3.591954022988505e-07, + "logits/chosen": -1.7545653581619263, + "logits/rejected": -1.7757402658462524, + "logps/chosen": -138.83709716796875, + "logps/rejected": -198.08169555664062, + "loss": 0.1579, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.481916904449463, + "rewards/margins": 6.665650844573975, + "rewards/rejected": -10.147567749023438, + "step": 1982 + }, + { + "epoch": 3.18, + "learning_rate": 3.5909631391200953e-07, + "logits/chosen": -1.5850155353546143, + "logits/rejected": -1.6293792724609375, + "logps/chosen": -117.47493743896484, + "logps/rejected": -218.18447875976562, + "loss": 0.0393, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.609663248062134, + "rewards/margins": 9.055994033813477, + "rewards/rejected": -11.665657997131348, + "step": 1983 + }, + { + "epoch": 3.18, + "learning_rate": 3.5899722552516844e-07, + "logits/chosen": -1.714773416519165, + "logits/rejected": -1.8290891647338867, + "logps/chosen": -86.82098388671875, + "logps/rejected": -173.24423217773438, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.050837278366089, + "rewards/margins": 4.450195789337158, + "rewards/rejected": -6.501032829284668, + "step": 1984 + }, + { + "epoch": 3.19, + "learning_rate": 3.588981371383274e-07, + "logits/chosen": -1.7641794681549072, + "logits/rejected": -1.7192003726959229, + "logps/chosen": -132.29916381835938, + "logps/rejected": -158.6502685546875, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4948902130126953, + "rewards/margins": 4.208545684814453, + "rewards/rejected": -6.703434944152832, + "step": 1985 + }, + { + "epoch": 3.19, + "learning_rate": 3.587990487514863e-07, + "logits/chosen": -1.6659367084503174, + "logits/rejected": -1.6605466604232788, + "logps/chosen": -111.20159149169922, + "logps/rejected": -207.7238006591797, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.381382942199707, + "rewards/margins": 7.288689136505127, + "rewards/rejected": -9.670072555541992, + "step": 1986 + }, + { + "epoch": 3.19, + "learning_rate": 3.586999603646452e-07, + "logits/chosen": -1.7060295343399048, + "logits/rejected": -1.7931212186813354, + "logps/chosen": -126.69080352783203, + "logps/rejected": -210.21237182617188, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.326317310333252, + "rewards/margins": 6.684223651885986, + "rewards/rejected": -9.010540962219238, + "step": 1987 + }, + { + "epoch": 3.19, + "learning_rate": 3.5860087197780417e-07, + "logits/chosen": -1.7053873538970947, + "logits/rejected": -1.739250898361206, + "logps/chosen": -106.90412902832031, + "logps/rejected": -183.924560546875, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.281558036804199, + "rewards/margins": 7.322775840759277, + "rewards/rejected": -9.604333877563477, + "step": 1988 + }, + { + "epoch": 3.19, + "learning_rate": 3.5850178359096313e-07, + "logits/chosen": -1.678769826889038, + "logits/rejected": -1.5989044904708862, + "logps/chosen": -115.54940795898438, + "logps/rejected": -163.576416015625, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7568955421447754, + "rewards/margins": 5.199860572814941, + "rewards/rejected": -8.956756591796875, + "step": 1989 + }, + { + "epoch": 3.19, + "learning_rate": 3.584026952041221e-07, + "logits/chosen": -1.6448458433151245, + "logits/rejected": -1.584925889968872, + "logps/chosen": -124.52859497070312, + "logps/rejected": -180.59735107421875, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3303802013397217, + "rewards/margins": 5.922438621520996, + "rewards/rejected": -8.252819061279297, + "step": 1990 + }, + { + "epoch": 3.2, + "learning_rate": 3.58303606817281e-07, + "logits/chosen": -1.73057222366333, + "logits/rejected": -1.7992959022521973, + "logps/chosen": -99.14570617675781, + "logps/rejected": -202.18441772460938, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8036044836044312, + "rewards/margins": 7.1999616622924805, + "rewards/rejected": -9.00356674194336, + "step": 1991 + }, + { + "epoch": 3.2, + "learning_rate": 3.582045184304399e-07, + "logits/chosen": -1.635919451713562, + "logits/rejected": -1.7112751007080078, + "logps/chosen": -92.94445037841797, + "logps/rejected": -181.21163940429688, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7323309183120728, + "rewards/margins": 7.687793254852295, + "rewards/rejected": -9.420124053955078, + "step": 1992 + }, + { + "epoch": 3.2, + "learning_rate": 3.5810543004359886e-07, + "logits/chosen": -1.671657919883728, + "logits/rejected": -1.6368800401687622, + "logps/chosen": -136.6543426513672, + "logps/rejected": -147.5520477294922, + "loss": 0.0789, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.31401252746582, + "rewards/margins": 3.441311836242676, + "rewards/rejected": -7.755324840545654, + "step": 1993 + }, + { + "epoch": 3.2, + "learning_rate": 3.580063416567578e-07, + "logits/chosen": -1.6248106956481934, + "logits/rejected": -1.677144169807434, + "logps/chosen": -60.86071014404297, + "logps/rejected": -161.8345947265625, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7413819432258606, + "rewards/margins": 8.462782859802246, + "rewards/rejected": -9.2041654586792, + "step": 1994 + }, + { + "epoch": 3.2, + "learning_rate": 3.579072532699168e-07, + "logits/chosen": -1.7373908758163452, + "logits/rejected": -1.693756341934204, + "logps/chosen": -116.62831115722656, + "logps/rejected": -173.82101440429688, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8449108600616455, + "rewards/margins": 6.4041748046875, + "rewards/rejected": -9.249085426330566, + "step": 1995 + }, + { + "epoch": 3.2, + "learning_rate": 3.578081648830757e-07, + "logits/chosen": -1.6260175704956055, + "logits/rejected": -1.689884901046753, + "logps/chosen": -99.6757583618164, + "logps/rejected": -169.79754638671875, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9919350147247314, + "rewards/margins": 5.179461479187012, + "rewards/rejected": -8.171396255493164, + "step": 1996 + }, + { + "epoch": 3.21, + "learning_rate": 3.577090764962346e-07, + "logits/chosen": -1.6278773546218872, + "logits/rejected": -1.5755157470703125, + "logps/chosen": -140.30148315429688, + "logps/rejected": -187.70242309570312, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.143270492553711, + "rewards/margins": 7.0506672859191895, + "rewards/rejected": -10.193938255310059, + "step": 1997 + }, + { + "epoch": 3.21, + "learning_rate": 3.5760998810939356e-07, + "logits/chosen": -1.6571062803268433, + "logits/rejected": -1.7408782243728638, + "logps/chosen": -119.50012969970703, + "logps/rejected": -171.8124542236328, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3525402545928955, + "rewards/margins": 5.181175231933594, + "rewards/rejected": -7.533716201782227, + "step": 1998 + }, + { + "epoch": 3.21, + "learning_rate": 3.575108997225525e-07, + "logits/chosen": -1.4913583993911743, + "logits/rejected": -1.5393571853637695, + "logps/chosen": -137.9495086669922, + "logps/rejected": -205.7068328857422, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.592309951782227, + "rewards/margins": 5.664676666259766, + "rewards/rejected": -11.256987571716309, + "step": 1999 + }, + { + "epoch": 3.21, + "learning_rate": 3.574118113357115e-07, + "logits/chosen": -1.6846952438354492, + "logits/rejected": -1.597804069519043, + "logps/chosen": -153.3061981201172, + "logps/rejected": -180.0919952392578, + "loss": 0.0488, + "rewards/accuracies": 0.75, + "rewards/chosen": -4.228933811187744, + "rewards/margins": 3.2838704586029053, + "rewards/rejected": -7.51280403137207, + "step": 2000 + }, + { + "epoch": 3.21, + "learning_rate": 3.573127229488704e-07, + "logits/chosen": -1.4437700510025024, + "logits/rejected": -1.4576162099838257, + "logps/chosen": -113.46170806884766, + "logps/rejected": -228.89572143554688, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1835033893585205, + "rewards/margins": 6.617332935333252, + "rewards/rejected": -9.800836563110352, + "step": 2001 + }, + { + "epoch": 3.21, + "learning_rate": 3.572136345620293e-07, + "logits/chosen": -1.7463536262512207, + "logits/rejected": -1.7170718908309937, + "logps/chosen": -123.6358642578125, + "logps/rejected": -190.8805694580078, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3949577808380127, + "rewards/margins": 7.933981895446777, + "rewards/rejected": -10.328940391540527, + "step": 2002 + }, + { + "epoch": 3.22, + "learning_rate": 3.5711454617518825e-07, + "logits/chosen": -1.539290189743042, + "logits/rejected": -1.5693296194076538, + "logps/chosen": -116.95975494384766, + "logps/rejected": -194.82623291015625, + "loss": 0.0872, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3267292976379395, + "rewards/margins": 8.51527214050293, + "rewards/rejected": -10.842000961303711, + "step": 2003 + }, + { + "epoch": 3.22, + "learning_rate": 3.570154577883472e-07, + "logits/chosen": -1.698521375656128, + "logits/rejected": -1.6985746622085571, + "logps/chosen": -92.02882385253906, + "logps/rejected": -129.2461700439453, + "loss": 0.0554, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4635863304138184, + "rewards/margins": 3.5712997913360596, + "rewards/rejected": -6.034885883331299, + "step": 2004 + }, + { + "epoch": 3.22, + "learning_rate": 3.569163694015061e-07, + "logits/chosen": -1.75165855884552, + "logits/rejected": -1.7594168186187744, + "logps/chosen": -126.77601623535156, + "logps/rejected": -177.39732360839844, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.361426591873169, + "rewards/margins": 5.126023292541504, + "rewards/rejected": -7.487449645996094, + "step": 2005 + }, + { + "epoch": 3.22, + "learning_rate": 3.568172810146651e-07, + "logits/chosen": -1.6680991649627686, + "logits/rejected": -1.6535958051681519, + "logps/chosen": -118.80451965332031, + "logps/rejected": -203.21603393554688, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.221660852432251, + "rewards/margins": 7.459354400634766, + "rewards/rejected": -10.681015014648438, + "step": 2006 + }, + { + "epoch": 3.22, + "learning_rate": 3.56718192627824e-07, + "logits/chosen": -1.7694975137710571, + "logits/rejected": -1.6701669692993164, + "logps/chosen": -129.56613159179688, + "logps/rejected": -195.66006469726562, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.583385467529297, + "rewards/margins": 7.497419834136963, + "rewards/rejected": -12.080804824829102, + "step": 2007 + }, + { + "epoch": 3.22, + "learning_rate": 3.5661910424098294e-07, + "logits/chosen": -1.7256627082824707, + "logits/rejected": -1.687364101409912, + "logps/chosen": -132.4951629638672, + "logps/rejected": -167.18020629882812, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7311692237854004, + "rewards/margins": 4.674322128295898, + "rewards/rejected": -7.405491352081299, + "step": 2008 + }, + { + "epoch": 3.22, + "learning_rate": 3.5652001585414185e-07, + "logits/chosen": -1.6575243473052979, + "logits/rejected": -1.675766944885254, + "logps/chosen": -87.92431640625, + "logps/rejected": -159.44053649902344, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9726520776748657, + "rewards/margins": 6.444147109985352, + "rewards/rejected": -7.416799545288086, + "step": 2009 + }, + { + "epoch": 3.23, + "learning_rate": 3.564209274673008e-07, + "logits/chosen": -1.6902023553848267, + "logits/rejected": -1.6616640090942383, + "logps/chosen": -123.70252227783203, + "logps/rejected": -195.3353271484375, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8088459968566895, + "rewards/margins": 6.850536346435547, + "rewards/rejected": -9.659382820129395, + "step": 2010 + }, + { + "epoch": 3.23, + "learning_rate": 3.5632183908045977e-07, + "logits/chosen": -1.7516603469848633, + "logits/rejected": -1.7181164026260376, + "logps/chosen": -96.66256713867188, + "logps/rejected": -182.65757751464844, + "loss": 0.0471, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6014363765716553, + "rewards/margins": 8.602636337280273, + "rewards/rejected": -10.204072952270508, + "step": 2011 + }, + { + "epoch": 3.23, + "learning_rate": 3.562227506936187e-07, + "logits/chosen": -1.7618237733840942, + "logits/rejected": -1.7404905557632446, + "logps/chosen": -116.84169006347656, + "logps/rejected": -170.95925903320312, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.472209930419922, + "rewards/margins": 5.421300411224365, + "rewards/rejected": -7.893510341644287, + "step": 2012 + }, + { + "epoch": 3.23, + "learning_rate": 3.5612366230677764e-07, + "logits/chosen": -1.8653661012649536, + "logits/rejected": -1.8255833387374878, + "logps/chosen": -124.89971923828125, + "logps/rejected": -186.58786010742188, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7147672176361084, + "rewards/margins": 5.521827220916748, + "rewards/rejected": -9.236594200134277, + "step": 2013 + }, + { + "epoch": 3.23, + "learning_rate": 3.5602457391993654e-07, + "logits/chosen": -1.733572244644165, + "logits/rejected": -1.725052833557129, + "logps/chosen": -105.62425231933594, + "logps/rejected": -181.5944366455078, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.207209825515747, + "rewards/margins": 7.438213348388672, + "rewards/rejected": -9.645423889160156, + "step": 2014 + }, + { + "epoch": 3.23, + "learning_rate": 3.559254855330955e-07, + "logits/chosen": -1.714752197265625, + "logits/rejected": -1.6862417459487915, + "logps/chosen": -136.73931884765625, + "logps/rejected": -188.9892578125, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.014031410217285, + "rewards/margins": 6.40467643737793, + "rewards/rejected": -10.418707847595215, + "step": 2015 + }, + { + "epoch": 3.24, + "learning_rate": 3.5582639714625446e-07, + "logits/chosen": -1.7709604501724243, + "logits/rejected": -1.74626624584198, + "logps/chosen": -123.44064331054688, + "logps/rejected": -195.44447326660156, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0272455215454102, + "rewards/margins": 9.278097152709961, + "rewards/rejected": -10.305341720581055, + "step": 2016 + }, + { + "epoch": 3.24, + "learning_rate": 3.5572730875941337e-07, + "logits/chosen": -1.5063961744308472, + "logits/rejected": -1.6065428256988525, + "logps/chosen": -84.97294616699219, + "logps/rejected": -193.00779724121094, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5014123916625977, + "rewards/margins": 7.171283721923828, + "rewards/rejected": -8.67269515991211, + "step": 2017 + }, + { + "epoch": 3.24, + "learning_rate": 3.5562822037257233e-07, + "logits/chosen": -1.7142449617385864, + "logits/rejected": -1.572502613067627, + "logps/chosen": -103.47219848632812, + "logps/rejected": -141.6439666748047, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7490415573120117, + "rewards/margins": 4.820174217224121, + "rewards/rejected": -6.569215774536133, + "step": 2018 + }, + { + "epoch": 3.24, + "learning_rate": 3.5552913198573124e-07, + "logits/chosen": -1.6510255336761475, + "logits/rejected": -1.6544229984283447, + "logps/chosen": -93.62248992919922, + "logps/rejected": -196.1938018798828, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.779899001121521, + "rewards/margins": 8.541215896606445, + "rewards/rejected": -10.321115493774414, + "step": 2019 + }, + { + "epoch": 3.24, + "learning_rate": 3.554300435988902e-07, + "logits/chosen": -1.5723965167999268, + "logits/rejected": -1.5600991249084473, + "logps/chosen": -130.86013793945312, + "logps/rejected": -167.15878295898438, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.158167839050293, + "rewards/margins": 4.35741662979126, + "rewards/rejected": -7.5155839920043945, + "step": 2020 + }, + { + "epoch": 3.24, + "learning_rate": 3.5533095521204916e-07, + "logits/chosen": -1.7211904525756836, + "logits/rejected": -1.6674188375473022, + "logps/chosen": -103.51260375976562, + "logps/rejected": -153.24549865722656, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.587942361831665, + "rewards/margins": 4.041879653930664, + "rewards/rejected": -6.62982177734375, + "step": 2021 + }, + { + "epoch": 3.25, + "learning_rate": 3.5523186682520806e-07, + "logits/chosen": -1.8160046339035034, + "logits/rejected": -1.8378138542175293, + "logps/chosen": -110.88774108886719, + "logps/rejected": -141.93838500976562, + "loss": 0.0464, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.352081537246704, + "rewards/margins": 4.052038192749023, + "rewards/rejected": -5.404119491577148, + "step": 2022 + }, + { + "epoch": 3.25, + "learning_rate": 3.55132778438367e-07, + "logits/chosen": -1.6357476711273193, + "logits/rejected": -1.7729158401489258, + "logps/chosen": -90.22354125976562, + "logps/rejected": -144.4239959716797, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.064680576324463, + "rewards/margins": 5.501731872558594, + "rewards/rejected": -7.566412925720215, + "step": 2023 + }, + { + "epoch": 3.25, + "learning_rate": 3.5503369005152593e-07, + "logits/chosen": -1.7298825979232788, + "logits/rejected": -1.7840473651885986, + "logps/chosen": -98.15716552734375, + "logps/rejected": -202.7715301513672, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7900609970092773, + "rewards/margins": 8.75582504272461, + "rewards/rejected": -10.545886039733887, + "step": 2024 + }, + { + "epoch": 3.25, + "learning_rate": 3.549346016646849e-07, + "logits/chosen": -1.6559823751449585, + "logits/rejected": -1.631261944770813, + "logps/chosen": -68.30438995361328, + "logps/rejected": -154.02804565429688, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9928219318389893, + "rewards/margins": 7.781617164611816, + "rewards/rejected": -8.774438858032227, + "step": 2025 + }, + { + "epoch": 3.25, + "learning_rate": 3.5483551327784385e-07, + "logits/chosen": -1.6812584400177002, + "logits/rejected": -1.660265326499939, + "logps/chosen": -138.43495178222656, + "logps/rejected": -183.21591186523438, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.618306875228882, + "rewards/margins": 5.05946159362793, + "rewards/rejected": -8.67776870727539, + "step": 2026 + }, + { + "epoch": 3.25, + "learning_rate": 3.5473642489100276e-07, + "logits/chosen": -1.690529704093933, + "logits/rejected": -1.6533970832824707, + "logps/chosen": -116.55496215820312, + "logps/rejected": -195.35899353027344, + "loss": 0.0343, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.213809967041016, + "rewards/margins": 7.62592887878418, + "rewards/rejected": -11.839737892150879, + "step": 2027 + }, + { + "epoch": 3.26, + "learning_rate": 3.546373365041617e-07, + "logits/chosen": -1.5723062753677368, + "logits/rejected": -1.6559556722640991, + "logps/chosen": -84.3291015625, + "logps/rejected": -188.81622314453125, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4844000339508057, + "rewards/margins": 9.447090148925781, + "rewards/rejected": -10.931489944458008, + "step": 2028 + }, + { + "epoch": 3.26, + "learning_rate": 3.545382481173206e-07, + "logits/chosen": -1.584083080291748, + "logits/rejected": -1.6116832494735718, + "logps/chosen": -107.50037384033203, + "logps/rejected": -186.1376495361328, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5716426372528076, + "rewards/margins": 7.57182502746582, + "rewards/rejected": -10.143467903137207, + "step": 2029 + }, + { + "epoch": 3.26, + "learning_rate": 3.5443915973047953e-07, + "logits/chosen": -1.5774288177490234, + "logits/rejected": -1.6623879671096802, + "logps/chosen": -112.900390625, + "logps/rejected": -195.7937774658203, + "loss": 0.0488, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.689795970916748, + "rewards/margins": 6.9362382888793945, + "rewards/rejected": -10.626033782958984, + "step": 2030 + }, + { + "epoch": 3.26, + "learning_rate": 3.5434007134363854e-07, + "logits/chosen": -1.645391821861267, + "logits/rejected": -1.7327213287353516, + "logps/chosen": -124.74850463867188, + "logps/rejected": -217.0048828125, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2198710441589355, + "rewards/margins": 7.360556602478027, + "rewards/rejected": -10.580427169799805, + "step": 2031 + }, + { + "epoch": 3.26, + "learning_rate": 3.5424098295679745e-07, + "logits/chosen": -1.651674509048462, + "logits/rejected": -1.7258096933364868, + "logps/chosen": -110.44580841064453, + "logps/rejected": -179.6291961669922, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8107266426086426, + "rewards/margins": 4.630771636962891, + "rewards/rejected": -7.441497802734375, + "step": 2032 + }, + { + "epoch": 3.26, + "learning_rate": 3.541418945699564e-07, + "logits/chosen": -1.7522649765014648, + "logits/rejected": -1.744690179824829, + "logps/chosen": -124.28913879394531, + "logps/rejected": -196.3479766845703, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.483734130859375, + "rewards/margins": 6.694919586181641, + "rewards/rejected": -10.178653717041016, + "step": 2033 + }, + { + "epoch": 3.26, + "learning_rate": 3.540428061831153e-07, + "logits/chosen": -1.6912932395935059, + "logits/rejected": -1.7010061740875244, + "logps/chosen": -99.73137664794922, + "logps/rejected": -167.2801513671875, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.113145351409912, + "rewards/margins": 7.037293434143066, + "rewards/rejected": -9.150439262390137, + "step": 2034 + }, + { + "epoch": 3.27, + "learning_rate": 3.539437177962742e-07, + "logits/chosen": -1.5355944633483887, + "logits/rejected": -1.6451241970062256, + "logps/chosen": -73.27796173095703, + "logps/rejected": -204.64828491210938, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1976823806762695, + "rewards/margins": 10.020912170410156, + "rewards/rejected": -11.218594551086426, + "step": 2035 + }, + { + "epoch": 3.27, + "learning_rate": 3.5384462940943323e-07, + "logits/chosen": -1.619011402130127, + "logits/rejected": -1.646573781967163, + "logps/chosen": -122.8797607421875, + "logps/rejected": -204.96339416503906, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8439865112304688, + "rewards/margins": 6.5051703453063965, + "rewards/rejected": -10.349156379699707, + "step": 2036 + }, + { + "epoch": 3.27, + "learning_rate": 3.5374554102259214e-07, + "logits/chosen": -1.7068995237350464, + "logits/rejected": -1.7333273887634277, + "logps/chosen": -121.65911865234375, + "logps/rejected": -183.50997924804688, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2939860820770264, + "rewards/margins": 6.232802391052246, + "rewards/rejected": -8.526788711547852, + "step": 2037 + }, + { + "epoch": 3.27, + "learning_rate": 3.5364645263575105e-07, + "logits/chosen": -1.7073216438293457, + "logits/rejected": -1.6960726976394653, + "logps/chosen": -98.9232177734375, + "logps/rejected": -140.88519287109375, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3781697750091553, + "rewards/margins": 4.691478252410889, + "rewards/rejected": -7.069647789001465, + "step": 2038 + }, + { + "epoch": 3.27, + "learning_rate": 3.5354736424891e-07, + "logits/chosen": -1.5573463439941406, + "logits/rejected": -1.5077707767486572, + "logps/chosen": -79.8752670288086, + "logps/rejected": -148.6927032470703, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2021527290344238, + "rewards/margins": 8.196165084838867, + "rewards/rejected": -9.39831829071045, + "step": 2039 + }, + { + "epoch": 3.27, + "learning_rate": 3.534482758620689e-07, + "logits/chosen": -1.7919373512268066, + "logits/rejected": -1.7153916358947754, + "logps/chosen": -96.89486694335938, + "logps/rejected": -167.23121643066406, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8974096179008484, + "rewards/margins": 7.23689079284668, + "rewards/rejected": -8.134300231933594, + "step": 2040 + }, + { + "epoch": 3.28, + "learning_rate": 3.5334918747522793e-07, + "logits/chosen": -1.6336089372634888, + "logits/rejected": -1.7141127586364746, + "logps/chosen": -131.75634765625, + "logps/rejected": -195.8248291015625, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5211052894592285, + "rewards/margins": 5.545111656188965, + "rewards/rejected": -9.066216468811035, + "step": 2041 + }, + { + "epoch": 3.28, + "learning_rate": 3.5325009908838683e-07, + "logits/chosen": -1.7816723585128784, + "logits/rejected": -1.6934444904327393, + "logps/chosen": -138.10914611816406, + "logps/rejected": -190.49066162109375, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6673169136047363, + "rewards/margins": 6.743302345275879, + "rewards/rejected": -10.410619735717773, + "step": 2042 + }, + { + "epoch": 3.28, + "learning_rate": 3.5315101070154574e-07, + "logits/chosen": -1.7043250799179077, + "logits/rejected": -1.696839451789856, + "logps/chosen": -125.66586303710938, + "logps/rejected": -178.55821228027344, + "loss": 0.0519, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8805248737335205, + "rewards/margins": 4.60069465637207, + "rewards/rejected": -7.48121976852417, + "step": 2043 + }, + { + "epoch": 3.28, + "learning_rate": 3.530519223147047e-07, + "logits/chosen": -1.632690191268921, + "logits/rejected": -1.6675313711166382, + "logps/chosen": -96.94892883300781, + "logps/rejected": -198.15162658691406, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.426142692565918, + "rewards/margins": 7.072009086608887, + "rewards/rejected": -8.498151779174805, + "step": 2044 + }, + { + "epoch": 3.28, + "learning_rate": 3.529528339278636e-07, + "logits/chosen": -1.8036823272705078, + "logits/rejected": -1.6965363025665283, + "logps/chosen": -89.57609558105469, + "logps/rejected": -133.85401916503906, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8678112030029297, + "rewards/margins": 5.78618860244751, + "rewards/rejected": -6.654000282287598, + "step": 2045 + }, + { + "epoch": 3.28, + "learning_rate": 3.528537455410226e-07, + "logits/chosen": -1.5618705749511719, + "logits/rejected": -1.580300211906433, + "logps/chosen": -85.90895080566406, + "logps/rejected": -152.3944854736328, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.465034008026123, + "rewards/margins": 5.579839706420898, + "rewards/rejected": -8.044873237609863, + "step": 2046 + }, + { + "epoch": 3.29, + "learning_rate": 3.5275465715418153e-07, + "logits/chosen": -1.5931611061096191, + "logits/rejected": -1.6863532066345215, + "logps/chosen": -101.43183898925781, + "logps/rejected": -163.30418395996094, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.275968074798584, + "rewards/margins": 6.54271936416626, + "rewards/rejected": -8.818687438964844, + "step": 2047 + }, + { + "epoch": 3.29, + "learning_rate": 3.5265556876734043e-07, + "logits/chosen": -1.661088228225708, + "logits/rejected": -1.6374977827072144, + "logps/chosen": -118.02924346923828, + "logps/rejected": -200.5345458984375, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.752305030822754, + "rewards/margins": 8.292831420898438, + "rewards/rejected": -12.045136451721191, + "step": 2048 + }, + { + "epoch": 3.29, + "learning_rate": 3.525564803804994e-07, + "logits/chosen": -1.5887510776519775, + "logits/rejected": -1.6357126235961914, + "logps/chosen": -85.42509460449219, + "logps/rejected": -175.87744140625, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4510061740875244, + "rewards/margins": 7.103634357452393, + "rewards/rejected": -8.554640769958496, + "step": 2049 + }, + { + "epoch": 3.29, + "learning_rate": 3.524573919936583e-07, + "logits/chosen": -1.7295176982879639, + "logits/rejected": -1.6733283996582031, + "logps/chosen": -81.71739196777344, + "logps/rejected": -183.78436279296875, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04011097550392151, + "rewards/margins": 10.382695198059082, + "rewards/rejected": -10.422805786132812, + "step": 2050 + }, + { + "epoch": 3.29, + "learning_rate": 3.5235830360681726e-07, + "logits/chosen": -1.7350940704345703, + "logits/rejected": -1.6993513107299805, + "logps/chosen": -114.74260711669922, + "logps/rejected": -195.00222778320312, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.893986701965332, + "rewards/margins": 8.711647987365723, + "rewards/rejected": -11.605634689331055, + "step": 2051 + }, + { + "epoch": 3.29, + "learning_rate": 3.522592152199762e-07, + "logits/chosen": -1.8325505256652832, + "logits/rejected": -1.7374728918075562, + "logps/chosen": -134.5269775390625, + "logps/rejected": -184.96864318847656, + "loss": 0.1428, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.158799648284912, + "rewards/margins": 8.088876724243164, + "rewards/rejected": -11.247675895690918, + "step": 2052 + }, + { + "epoch": 3.3, + "learning_rate": 3.5216012683313513e-07, + "logits/chosen": -1.6940503120422363, + "logits/rejected": -1.7987241744995117, + "logps/chosen": -97.38296508789062, + "logps/rejected": -199.442626953125, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4544386863708496, + "rewards/margins": 7.868283748626709, + "rewards/rejected": -10.322722434997559, + "step": 2053 + }, + { + "epoch": 3.3, + "learning_rate": 3.520610384462941e-07, + "logits/chosen": -1.6412770748138428, + "logits/rejected": -1.7698283195495605, + "logps/chosen": -84.8344497680664, + "logps/rejected": -170.0762176513672, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9975993633270264, + "rewards/margins": 6.987544059753418, + "rewards/rejected": -9.985143661499023, + "step": 2054 + }, + { + "epoch": 3.3, + "learning_rate": 3.51961950059453e-07, + "logits/chosen": -1.6144872903823853, + "logits/rejected": -1.6565890312194824, + "logps/chosen": -95.73251342773438, + "logps/rejected": -153.00527954101562, + "loss": 0.1312, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9153337478637695, + "rewards/margins": 4.173851013183594, + "rewards/rejected": -7.089184761047363, + "step": 2055 + }, + { + "epoch": 3.3, + "learning_rate": 3.5186286167261195e-07, + "logits/chosen": -1.5612393617630005, + "logits/rejected": -1.4966504573822021, + "logps/chosen": -142.116943359375, + "logps/rejected": -194.21397399902344, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988341808319092, + "rewards/margins": 6.339868068695068, + "rewards/rejected": -12.32820987701416, + "step": 2056 + }, + { + "epoch": 3.3, + "learning_rate": 3.517637732857709e-07, + "logits/chosen": -1.5660104751586914, + "logits/rejected": -1.6873427629470825, + "logps/chosen": -114.38330078125, + "logps/rejected": -203.99484252929688, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.250114679336548, + "rewards/margins": 7.195102691650391, + "rewards/rejected": -9.44521713256836, + "step": 2057 + }, + { + "epoch": 3.3, + "learning_rate": 3.516646848989298e-07, + "logits/chosen": -1.7787103652954102, + "logits/rejected": -1.6974260807037354, + "logps/chosen": -112.34767150878906, + "logps/rejected": -176.05419921875, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.31773042678833, + "rewards/margins": 6.7927045822143555, + "rewards/rejected": -10.110435485839844, + "step": 2058 + }, + { + "epoch": 3.3, + "learning_rate": 3.515655965120888e-07, + "logits/chosen": -1.6437506675720215, + "logits/rejected": -1.699434757232666, + "logps/chosen": -131.1387176513672, + "logps/rejected": -206.95291137695312, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9130077362060547, + "rewards/margins": 7.901460647583008, + "rewards/rejected": -10.814468383789062, + "step": 2059 + }, + { + "epoch": 3.31, + "learning_rate": 3.514665081252477e-07, + "logits/chosen": -1.7752704620361328, + "logits/rejected": -1.8196146488189697, + "logps/chosen": -135.15318298339844, + "logps/rejected": -199.51034545898438, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.368276834487915, + "rewards/margins": 6.969776153564453, + "rewards/rejected": -10.338052749633789, + "step": 2060 + }, + { + "epoch": 3.31, + "learning_rate": 3.5136741973840665e-07, + "logits/chosen": -1.5261967182159424, + "logits/rejected": -1.5570764541625977, + "logps/chosen": -97.02388000488281, + "logps/rejected": -178.44711303710938, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1986265182495117, + "rewards/margins": 6.216625213623047, + "rewards/rejected": -9.415250778198242, + "step": 2061 + }, + { + "epoch": 3.31, + "learning_rate": 3.512683313515656e-07, + "logits/chosen": -1.6163796186447144, + "logits/rejected": -1.5798059701919556, + "logps/chosen": -112.79469299316406, + "logps/rejected": -176.45101928710938, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4859495162963867, + "rewards/margins": 7.309941291809082, + "rewards/rejected": -10.795890808105469, + "step": 2062 + }, + { + "epoch": 3.31, + "learning_rate": 3.511692429647245e-07, + "logits/chosen": -1.7428224086761475, + "logits/rejected": -1.6137886047363281, + "logps/chosen": -147.28561401367188, + "logps/rejected": -182.53680419921875, + "loss": 0.0816, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.783318042755127, + "rewards/margins": 4.728327751159668, + "rewards/rejected": -8.511646270751953, + "step": 2063 + }, + { + "epoch": 3.31, + "learning_rate": 3.5107015457788347e-07, + "logits/chosen": -1.6979378461837769, + "logits/rejected": -1.7455346584320068, + "logps/chosen": -124.53958129882812, + "logps/rejected": -198.02040100097656, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.078638553619385, + "rewards/margins": 5.82305908203125, + "rewards/rejected": -9.901698112487793, + "step": 2064 + }, + { + "epoch": 3.31, + "learning_rate": 3.509710661910424e-07, + "logits/chosen": -1.5711442232131958, + "logits/rejected": -1.5496519804000854, + "logps/chosen": -104.2340087890625, + "logps/rejected": -185.98040771484375, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1871848106384277, + "rewards/margins": 7.526273250579834, + "rewards/rejected": -10.713458061218262, + "step": 2065 + }, + { + "epoch": 3.32, + "learning_rate": 3.5087197780420134e-07, + "logits/chosen": -1.5481290817260742, + "logits/rejected": -1.6790692806243896, + "logps/chosen": -102.08615112304688, + "logps/rejected": -188.74856567382812, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.82808256149292, + "rewards/margins": 6.767024993896484, + "rewards/rejected": -9.595108032226562, + "step": 2066 + }, + { + "epoch": 3.32, + "learning_rate": 3.507728894173603e-07, + "logits/chosen": -1.4970184564590454, + "logits/rejected": -1.594588279724121, + "logps/chosen": -95.92796325683594, + "logps/rejected": -202.95010375976562, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.183837890625, + "rewards/margins": 6.93454647064209, + "rewards/rejected": -10.11838436126709, + "step": 2067 + }, + { + "epoch": 3.32, + "learning_rate": 3.506738010305192e-07, + "logits/chosen": -1.615171194076538, + "logits/rejected": -1.5649290084838867, + "logps/chosen": -161.8868408203125, + "logps/rejected": -240.80953979492188, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.24836540222168, + "rewards/margins": 6.343355178833008, + "rewards/rejected": -13.591720581054688, + "step": 2068 + }, + { + "epoch": 3.32, + "learning_rate": 3.5057471264367817e-07, + "logits/chosen": -1.659646987915039, + "logits/rejected": -1.6686872243881226, + "logps/chosen": -142.30947875976562, + "logps/rejected": -213.208740234375, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.487746238708496, + "rewards/margins": 7.394679069519043, + "rewards/rejected": -11.882425308227539, + "step": 2069 + }, + { + "epoch": 3.32, + "learning_rate": 3.5047562425683707e-07, + "logits/chosen": -1.4991090297698975, + "logits/rejected": -1.5452336072921753, + "logps/chosen": -146.97239685058594, + "logps/rejected": -198.33534240722656, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.92130708694458, + "rewards/margins": 5.071410179138184, + "rewards/rejected": -9.992717742919922, + "step": 2070 + }, + { + "epoch": 3.32, + "learning_rate": 3.5037653586999603e-07, + "logits/chosen": -1.6591300964355469, + "logits/rejected": -1.6487138271331787, + "logps/chosen": -104.8362045288086, + "logps/rejected": -162.5994873046875, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3694262504577637, + "rewards/margins": 6.500641822814941, + "rewards/rejected": -9.870068550109863, + "step": 2071 + }, + { + "epoch": 3.33, + "learning_rate": 3.5027744748315494e-07, + "logits/chosen": -1.7570536136627197, + "logits/rejected": -1.7994208335876465, + "logps/chosen": -125.41072845458984, + "logps/rejected": -184.57676696777344, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7761731147766113, + "rewards/margins": 6.0106329917907715, + "rewards/rejected": -9.7868070602417, + "step": 2072 + }, + { + "epoch": 3.33, + "learning_rate": 3.501783590963139e-07, + "logits/chosen": -1.6470446586608887, + "logits/rejected": -1.6384143829345703, + "logps/chosen": -147.060302734375, + "logps/rejected": -202.508544921875, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.721890449523926, + "rewards/margins": 6.80593204498291, + "rewards/rejected": -10.527823448181152, + "step": 2073 + }, + { + "epoch": 3.33, + "learning_rate": 3.5007927070947286e-07, + "logits/chosen": -1.721962809562683, + "logits/rejected": -1.8580856323242188, + "logps/chosen": -134.58436584472656, + "logps/rejected": -221.06863403320312, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.479290962219238, + "rewards/margins": 6.382335186004639, + "rewards/rejected": -10.861625671386719, + "step": 2074 + }, + { + "epoch": 3.33, + "learning_rate": 3.4998018232263177e-07, + "logits/chosen": -1.5944583415985107, + "logits/rejected": -1.6397796869277954, + "logps/chosen": -110.708984375, + "logps/rejected": -185.36746215820312, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9464404582977295, + "rewards/margins": 6.239197731018066, + "rewards/rejected": -9.185638427734375, + "step": 2075 + }, + { + "epoch": 3.33, + "learning_rate": 3.4988109393579067e-07, + "logits/chosen": -1.6801296472549438, + "logits/rejected": -1.7167810201644897, + "logps/chosen": -123.9905776977539, + "logps/rejected": -195.42466735839844, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2716193199157715, + "rewards/margins": 7.981805324554443, + "rewards/rejected": -12.253424644470215, + "step": 2076 + }, + { + "epoch": 3.33, + "learning_rate": 3.4978200554894963e-07, + "logits/chosen": -1.6201092004776, + "logits/rejected": -1.60984468460083, + "logps/chosen": -148.92037963867188, + "logps/rejected": -210.60067749023438, + "loss": 0.0262, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.497320175170898, + "rewards/margins": 6.2794036865234375, + "rewards/rejected": -10.776723861694336, + "step": 2077 + }, + { + "epoch": 3.34, + "learning_rate": 3.496829171621086e-07, + "logits/chosen": -1.7650374174118042, + "logits/rejected": -1.5757511854171753, + "logps/chosen": -163.24435424804688, + "logps/rejected": -212.4429931640625, + "loss": 0.1855, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.880949020385742, + "rewards/margins": 6.788045883178711, + "rewards/rejected": -12.668994903564453, + "step": 2078 + }, + { + "epoch": 3.34, + "learning_rate": 3.4958382877526755e-07, + "logits/chosen": -1.6514322757720947, + "logits/rejected": -1.6147501468658447, + "logps/chosen": -125.0506591796875, + "logps/rejected": -193.29022216796875, + "loss": 0.0451, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.056339263916016, + "rewards/margins": 6.6867451667785645, + "rewards/rejected": -10.743084907531738, + "step": 2079 + }, + { + "epoch": 3.34, + "learning_rate": 3.4948474038842646e-07, + "logits/chosen": -1.6074570417404175, + "logits/rejected": -1.5741498470306396, + "logps/chosen": -84.68325805664062, + "logps/rejected": -184.94464111328125, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3913419246673584, + "rewards/margins": 9.007447242736816, + "rewards/rejected": -10.398789405822754, + "step": 2080 + }, + { + "epoch": 3.34, + "learning_rate": 3.4938565200158537e-07, + "logits/chosen": -1.7041853666305542, + "logits/rejected": -1.6737600564956665, + "logps/chosen": -119.42922973632812, + "logps/rejected": -149.07240295410156, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.012264728546143, + "rewards/margins": 3.462195634841919, + "rewards/rejected": -7.474460124969482, + "step": 2081 + }, + { + "epoch": 3.34, + "learning_rate": 3.492865636147443e-07, + "logits/chosen": -1.4731776714324951, + "logits/rejected": -1.544046401977539, + "logps/chosen": -110.99830627441406, + "logps/rejected": -197.74563598632812, + "loss": 0.045, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.954432964324951, + "rewards/margins": 6.247214317321777, + "rewards/rejected": -10.201647758483887, + "step": 2082 + }, + { + "epoch": 3.34, + "learning_rate": 3.491874752279033e-07, + "logits/chosen": -1.5051703453063965, + "logits/rejected": -1.4734195470809937, + "logps/chosen": -119.0909423828125, + "logps/rejected": -207.31082153320312, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.827057838439941, + "rewards/margins": 8.716464042663574, + "rewards/rejected": -13.543521881103516, + "step": 2083 + }, + { + "epoch": 3.35, + "learning_rate": 3.4908838684106224e-07, + "logits/chosen": -1.633575677871704, + "logits/rejected": -1.7270586490631104, + "logps/chosen": -133.06439208984375, + "logps/rejected": -203.20111083984375, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6094703674316406, + "rewards/margins": 7.152858257293701, + "rewards/rejected": -10.7623291015625, + "step": 2084 + }, + { + "epoch": 3.35, + "learning_rate": 3.4898929845422115e-07, + "logits/chosen": -1.7078940868377686, + "logits/rejected": -1.7678699493408203, + "logps/chosen": -101.42133331298828, + "logps/rejected": -194.13467407226562, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.351909875869751, + "rewards/margins": 7.795727252960205, + "rewards/rejected": -10.147636413574219, + "step": 2085 + }, + { + "epoch": 3.35, + "learning_rate": 3.4889021006738006e-07, + "logits/chosen": -1.5811543464660645, + "logits/rejected": -1.719336986541748, + "logps/chosen": -116.04046630859375, + "logps/rejected": -228.47386169433594, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0201780796051025, + "rewards/margins": 7.708017826080322, + "rewards/rejected": -10.728196144104004, + "step": 2086 + }, + { + "epoch": 3.35, + "learning_rate": 3.48791121680539e-07, + "logits/chosen": -1.7652583122253418, + "logits/rejected": -1.8926763534545898, + "logps/chosen": -80.40861511230469, + "logps/rejected": -195.84689331054688, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8227264881134033, + "rewards/margins": 10.153970718383789, + "rewards/rejected": -11.97669792175293, + "step": 2087 + }, + { + "epoch": 3.35, + "learning_rate": 3.48692033293698e-07, + "logits/chosen": -1.7359036207199097, + "logits/rejected": -1.6724848747253418, + "logps/chosen": -97.54639434814453, + "logps/rejected": -190.151123046875, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.19036602973938, + "rewards/margins": 8.324312210083008, + "rewards/rejected": -10.514678955078125, + "step": 2088 + }, + { + "epoch": 3.35, + "learning_rate": 3.4859294490685694e-07, + "logits/chosen": -1.6203641891479492, + "logits/rejected": -1.6291577816009521, + "logps/chosen": -77.82333374023438, + "logps/rejected": -155.30172729492188, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0014326572418213, + "rewards/margins": 6.2169342041015625, + "rewards/rejected": -7.218366622924805, + "step": 2089 + }, + { + "epoch": 3.35, + "learning_rate": 3.4849385652001584e-07, + "logits/chosen": -1.6725049018859863, + "logits/rejected": -1.7031389474868774, + "logps/chosen": -104.24005126953125, + "logps/rejected": -203.46922302246094, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.535928249359131, + "rewards/margins": 8.983169555664062, + "rewards/rejected": -11.519098281860352, + "step": 2090 + }, + { + "epoch": 3.36, + "learning_rate": 3.4839476813317475e-07, + "logits/chosen": -1.7080307006835938, + "logits/rejected": -1.7406466007232666, + "logps/chosen": -137.87539672851562, + "logps/rejected": -216.49749755859375, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.034592151641846, + "rewards/margins": 7.497150897979736, + "rewards/rejected": -12.531743049621582, + "step": 2091 + }, + { + "epoch": 3.36, + "learning_rate": 3.482956797463337e-07, + "logits/chosen": -1.5447242259979248, + "logits/rejected": -1.5915106534957886, + "logps/chosen": -99.11773681640625, + "logps/rejected": -172.39376831054688, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.975186347961426, + "rewards/margins": 7.174497127532959, + "rewards/rejected": -10.149683952331543, + "step": 2092 + }, + { + "epoch": 3.36, + "learning_rate": 3.481965913594926e-07, + "logits/chosen": -1.6917927265167236, + "logits/rejected": -1.6393178701400757, + "logps/chosen": -118.53850555419922, + "logps/rejected": -191.73538208007812, + "loss": 0.0461, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.934405565261841, + "rewards/margins": 8.804667472839355, + "rewards/rejected": -11.739073753356934, + "step": 2093 + }, + { + "epoch": 3.36, + "learning_rate": 3.4809750297265163e-07, + "logits/chosen": -1.5695432424545288, + "logits/rejected": -1.4879179000854492, + "logps/chosen": -142.67584228515625, + "logps/rejected": -213.89813232421875, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.528069972991943, + "rewards/margins": 7.256927967071533, + "rewards/rejected": -11.784997940063477, + "step": 2094 + }, + { + "epoch": 3.36, + "learning_rate": 3.4799841458581054e-07, + "logits/chosen": -1.6946247816085815, + "logits/rejected": -1.6985349655151367, + "logps/chosen": -139.14996337890625, + "logps/rejected": -219.64065551757812, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2215428352355957, + "rewards/margins": 7.263465404510498, + "rewards/rejected": -10.485008239746094, + "step": 2095 + }, + { + "epoch": 3.36, + "learning_rate": 3.4789932619896944e-07, + "logits/chosen": -1.7050718069076538, + "logits/rejected": -1.6117172241210938, + "logps/chosen": -135.0051727294922, + "logps/rejected": -194.73324584960938, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.129521369934082, + "rewards/margins": 7.611749649047852, + "rewards/rejected": -10.741271018981934, + "step": 2096 + }, + { + "epoch": 3.37, + "learning_rate": 3.478002378121284e-07, + "logits/chosen": -1.8147413730621338, + "logits/rejected": -1.839887261390686, + "logps/chosen": -119.82239532470703, + "logps/rejected": -188.00216674804688, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7494897842407227, + "rewards/margins": 6.676491737365723, + "rewards/rejected": -10.425981521606445, + "step": 2097 + }, + { + "epoch": 3.37, + "learning_rate": 3.477011494252873e-07, + "logits/chosen": -1.6380187273025513, + "logits/rejected": -1.5468883514404297, + "logps/chosen": -123.74588775634766, + "logps/rejected": -183.67068481445312, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.301665782928467, + "rewards/margins": 9.571096420288086, + "rewards/rejected": -11.872761726379395, + "step": 2098 + }, + { + "epoch": 3.37, + "learning_rate": 3.476020610384463e-07, + "logits/chosen": -1.5602937936782837, + "logits/rejected": -1.6781938076019287, + "logps/chosen": -75.20357513427734, + "logps/rejected": -211.9456329345703, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9074466228485107, + "rewards/margins": 9.166354179382324, + "rewards/rejected": -11.073801040649414, + "step": 2099 + }, + { + "epoch": 3.37, + "learning_rate": 3.4750297265160523e-07, + "logits/chosen": -1.6872003078460693, + "logits/rejected": -1.7494442462921143, + "logps/chosen": -96.90390014648438, + "logps/rejected": -193.1634521484375, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.773188591003418, + "rewards/margins": 5.21782112121582, + "rewards/rejected": -7.991009712219238, + "step": 2100 + }, + { + "epoch": 3.37, + "learning_rate": 3.4740388426476414e-07, + "logits/chosen": -1.735888957977295, + "logits/rejected": -1.77731454372406, + "logps/chosen": -111.6415023803711, + "logps/rejected": -202.2088623046875, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4343886375427246, + "rewards/margins": 9.205720901489258, + "rewards/rejected": -11.64011001586914, + "step": 2101 + }, + { + "epoch": 3.37, + "learning_rate": 3.473047958779231e-07, + "logits/chosen": -1.6401386260986328, + "logits/rejected": -1.64959716796875, + "logps/chosen": -116.19493865966797, + "logps/rejected": -205.484619140625, + "loss": 0.0886, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.709129571914673, + "rewards/margins": 8.487519264221191, + "rewards/rejected": -12.196647644042969, + "step": 2102 + }, + { + "epoch": 3.38, + "learning_rate": 3.47205707491082e-07, + "logits/chosen": -1.7416654825210571, + "logits/rejected": -1.7254853248596191, + "logps/chosen": -120.11166381835938, + "logps/rejected": -190.72552490234375, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7783774137496948, + "rewards/margins": 8.362340927124023, + "rewards/rejected": -10.140718460083008, + "step": 2103 + }, + { + "epoch": 3.38, + "learning_rate": 3.47106619104241e-07, + "logits/chosen": -1.6619935035705566, + "logits/rejected": -1.6697955131530762, + "logps/chosen": -137.00656127929688, + "logps/rejected": -199.46624755859375, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.244231939315796, + "rewards/margins": 5.712350845336914, + "rewards/rejected": -8.956583023071289, + "step": 2104 + }, + { + "epoch": 3.38, + "learning_rate": 3.470075307173999e-07, + "logits/chosen": -1.5250502824783325, + "logits/rejected": -1.5787010192871094, + "logps/chosen": -83.69013977050781, + "logps/rejected": -198.79531860351562, + "loss": 0.0483, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0251948833465576, + "rewards/margins": 9.062454223632812, + "rewards/rejected": -11.087648391723633, + "step": 2105 + }, + { + "epoch": 3.38, + "learning_rate": 3.4690844233055883e-07, + "logits/chosen": -1.5914771556854248, + "logits/rejected": -1.5557430982589722, + "logps/chosen": -98.78699493408203, + "logps/rejected": -163.16671752929688, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5047211647033691, + "rewards/margins": 6.872321128845215, + "rewards/rejected": -8.377042770385742, + "step": 2106 + }, + { + "epoch": 3.38, + "learning_rate": 3.468093539437178e-07, + "logits/chosen": -1.7883110046386719, + "logits/rejected": -1.8109058141708374, + "logps/chosen": -104.06839752197266, + "logps/rejected": -201.0550994873047, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9065914154052734, + "rewards/margins": 7.728572845458984, + "rewards/rejected": -10.635164260864258, + "step": 2107 + }, + { + "epoch": 3.38, + "learning_rate": 3.467102655568767e-07, + "logits/chosen": -1.7161736488342285, + "logits/rejected": -1.7161662578582764, + "logps/chosen": -111.885498046875, + "logps/rejected": -184.13526916503906, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.637997031211853, + "rewards/margins": 7.011697769165039, + "rewards/rejected": -8.64969539642334, + "step": 2108 + }, + { + "epoch": 3.39, + "learning_rate": 3.466111771700356e-07, + "logits/chosen": -1.6591455936431885, + "logits/rejected": -1.6691287755966187, + "logps/chosen": -135.85447692871094, + "logps/rejected": -191.85086059570312, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9727344512939453, + "rewards/margins": 5.6169891357421875, + "rewards/rejected": -8.589723587036133, + "step": 2109 + }, + { + "epoch": 3.39, + "learning_rate": 3.465120887831946e-07, + "logits/chosen": -1.702610731124878, + "logits/rejected": -1.603570818901062, + "logps/chosen": -93.38331604003906, + "logps/rejected": -196.10940551757812, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3597171306610107, + "rewards/margins": 9.624948501586914, + "rewards/rejected": -10.984665870666504, + "step": 2110 + }, + { + "epoch": 3.39, + "learning_rate": 3.464130003963535e-07, + "logits/chosen": -1.5169848203659058, + "logits/rejected": -1.5099804401397705, + "logps/chosen": -130.1094970703125, + "logps/rejected": -196.49183654785156, + "loss": 0.1969, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.409634828567505, + "rewards/margins": 6.069133758544922, + "rewards/rejected": -9.478768348693848, + "step": 2111 + }, + { + "epoch": 3.39, + "learning_rate": 3.463139120095125e-07, + "logits/chosen": -1.6773943901062012, + "logits/rejected": -1.6937811374664307, + "logps/chosen": -87.21916198730469, + "logps/rejected": -186.1132049560547, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.245816707611084, + "rewards/margins": 8.923694610595703, + "rewards/rejected": -10.169511795043945, + "step": 2112 + }, + { + "epoch": 3.39, + "learning_rate": 3.462148236226714e-07, + "logits/chosen": -1.559417963027954, + "logits/rejected": -1.6223130226135254, + "logps/chosen": -94.95071411132812, + "logps/rejected": -168.73936462402344, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.944291353225708, + "rewards/margins": 6.240782737731934, + "rewards/rejected": -9.185073852539062, + "step": 2113 + }, + { + "epoch": 3.39, + "learning_rate": 3.461157352358303e-07, + "logits/chosen": -1.6678355932235718, + "logits/rejected": -1.6845722198486328, + "logps/chosen": -133.56356811523438, + "logps/rejected": -201.32028198242188, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0608415603637695, + "rewards/margins": 4.923059463500977, + "rewards/rejected": -8.983901023864746, + "step": 2114 + }, + { + "epoch": 3.39, + "learning_rate": 3.460166468489893e-07, + "logits/chosen": -1.569079875946045, + "logits/rejected": -1.6221654415130615, + "logps/chosen": -132.33856201171875, + "logps/rejected": -213.79824829101562, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9205539226531982, + "rewards/margins": 9.98185920715332, + "rewards/rejected": -12.902413368225098, + "step": 2115 + }, + { + "epoch": 3.4, + "learning_rate": 3.459175584621482e-07, + "logits/chosen": -1.5844814777374268, + "logits/rejected": -1.6246367692947388, + "logps/chosen": -105.90972137451172, + "logps/rejected": -203.58642578125, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0645363330841064, + "rewards/margins": 7.86533784866333, + "rewards/rejected": -9.9298734664917, + "step": 2116 + }, + { + "epoch": 3.4, + "learning_rate": 3.458184700753072e-07, + "logits/chosen": -1.746612548828125, + "logits/rejected": -1.6887919902801514, + "logps/chosen": -84.11842346191406, + "logps/rejected": -147.73593139648438, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5091730356216431, + "rewards/margins": 5.924288749694824, + "rewards/rejected": -6.4334611892700195, + "step": 2117 + }, + { + "epoch": 3.4, + "learning_rate": 3.457193816884661e-07, + "logits/chosen": -1.6343305110931396, + "logits/rejected": -1.7002191543579102, + "logps/chosen": -110.46508026123047, + "logps/rejected": -196.3812713623047, + "loss": 0.0324, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3924858570098877, + "rewards/margins": 7.36801815032959, + "rewards/rejected": -10.760503768920898, + "step": 2118 + }, + { + "epoch": 3.4, + "learning_rate": 3.45620293301625e-07, + "logits/chosen": -1.7157305479049683, + "logits/rejected": -1.6764408349990845, + "logps/chosen": -131.21575927734375, + "logps/rejected": -170.7235107421875, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7900822162628174, + "rewards/margins": 6.043600082397461, + "rewards/rejected": -9.833683013916016, + "step": 2119 + }, + { + "epoch": 3.4, + "learning_rate": 3.45521204914784e-07, + "logits/chosen": -1.7368850708007812, + "logits/rejected": -1.682816982269287, + "logps/chosen": -86.04376983642578, + "logps/rejected": -167.6602020263672, + "loss": 0.0315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5637054443359375, + "rewards/margins": 8.701826095581055, + "rewards/rejected": -9.265531539916992, + "step": 2120 + }, + { + "epoch": 3.4, + "learning_rate": 3.454221165279429e-07, + "logits/chosen": -1.7683706283569336, + "logits/rejected": -1.7663694620132446, + "logps/chosen": -149.86444091796875, + "logps/rejected": -201.77944946289062, + "loss": 0.0717, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.168430805206299, + "rewards/margins": 4.790733814239502, + "rewards/rejected": -9.9591646194458, + "step": 2121 + }, + { + "epoch": 3.41, + "learning_rate": 3.4532302814110187e-07, + "logits/chosen": -1.4711833000183105, + "logits/rejected": -1.5797113180160522, + "logps/chosen": -106.86261749267578, + "logps/rejected": -177.88555908203125, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.182180166244507, + "rewards/margins": 6.106150150299072, + "rewards/rejected": -9.288330078125, + "step": 2122 + }, + { + "epoch": 3.41, + "learning_rate": 3.452239397542608e-07, + "logits/chosen": -1.6761380434036255, + "logits/rejected": -1.6101338863372803, + "logps/chosen": -130.4086151123047, + "logps/rejected": -195.48348999023438, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1683037281036377, + "rewards/margins": 6.8069868087768555, + "rewards/rejected": -9.97529125213623, + "step": 2123 + }, + { + "epoch": 3.41, + "learning_rate": 3.451248513674197e-07, + "logits/chosen": -1.6642192602157593, + "logits/rejected": -1.6988962888717651, + "logps/chosen": -123.58120727539062, + "logps/rejected": -211.19435119628906, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4430480003356934, + "rewards/margins": 9.166884422302246, + "rewards/rejected": -11.609932899475098, + "step": 2124 + }, + { + "epoch": 3.41, + "learning_rate": 3.450257629805787e-07, + "logits/chosen": -1.730371117591858, + "logits/rejected": -1.6416380405426025, + "logps/chosen": -97.18882751464844, + "logps/rejected": -174.91757202148438, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8670603036880493, + "rewards/margins": 7.787100791931152, + "rewards/rejected": -9.65416145324707, + "step": 2125 + }, + { + "epoch": 3.41, + "learning_rate": 3.449266745937376e-07, + "logits/chosen": -1.6365939378738403, + "logits/rejected": -1.7273750305175781, + "logps/chosen": -116.5837173461914, + "logps/rejected": -201.31570434570312, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8996098041534424, + "rewards/margins": 7.166316509246826, + "rewards/rejected": -11.065925598144531, + "step": 2126 + }, + { + "epoch": 3.41, + "learning_rate": 3.4482758620689656e-07, + "logits/chosen": -1.6202144622802734, + "logits/rejected": -1.5693854093551636, + "logps/chosen": -142.09185791015625, + "logps/rejected": -213.19493103027344, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.31125545501709, + "rewards/margins": 8.871237754821777, + "rewards/rejected": -12.182493209838867, + "step": 2127 + }, + { + "epoch": 3.42, + "learning_rate": 3.4472849782005547e-07, + "logits/chosen": -1.5818853378295898, + "logits/rejected": -1.6726527214050293, + "logps/chosen": -140.8394012451172, + "logps/rejected": -207.14654541015625, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.689122200012207, + "rewards/margins": 4.647453308105469, + "rewards/rejected": -10.336575508117676, + "step": 2128 + }, + { + "epoch": 3.42, + "learning_rate": 3.446294094332144e-07, + "logits/chosen": -1.607424259185791, + "logits/rejected": -1.6349148750305176, + "logps/chosen": -85.53028106689453, + "logps/rejected": -153.42079162597656, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7479677200317383, + "rewards/margins": 6.573788642883301, + "rewards/rejected": -8.321756362915039, + "step": 2129 + }, + { + "epoch": 3.42, + "learning_rate": 3.445303210463734e-07, + "logits/chosen": -1.6290106773376465, + "logits/rejected": -1.5881041288375854, + "logps/chosen": -172.427001953125, + "logps/rejected": -220.6194305419922, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.804256916046143, + "rewards/margins": 7.604361534118652, + "rewards/rejected": -13.408618927001953, + "step": 2130 + }, + { + "epoch": 3.42, + "learning_rate": 3.444312326595323e-07, + "logits/chosen": -1.7298390865325928, + "logits/rejected": -1.7589638233184814, + "logps/chosen": -93.51658630371094, + "logps/rejected": -198.98550415039062, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4621872901916504, + "rewards/margins": 8.678671836853027, + "rewards/rejected": -10.140859603881836, + "step": 2131 + }, + { + "epoch": 3.42, + "learning_rate": 3.4433214427269125e-07, + "logits/chosen": -1.7806942462921143, + "logits/rejected": -1.8234977722167969, + "logps/chosen": -117.62294006347656, + "logps/rejected": -206.7951202392578, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.904381036758423, + "rewards/margins": 8.252351760864258, + "rewards/rejected": -11.156732559204102, + "step": 2132 + }, + { + "epoch": 3.42, + "learning_rate": 3.4423305588585016e-07, + "logits/chosen": -1.6317096948623657, + "logits/rejected": -1.5454776287078857, + "logps/chosen": -117.43798828125, + "logps/rejected": -140.1502685546875, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5689504146575928, + "rewards/margins": 4.75799560546875, + "rewards/rejected": -7.326945781707764, + "step": 2133 + }, + { + "epoch": 3.43, + "learning_rate": 3.4413396749900907e-07, + "logits/chosen": -1.6589980125427246, + "logits/rejected": -1.6901204586029053, + "logps/chosen": -121.6462631225586, + "logps/rejected": -193.60992431640625, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.958437204360962, + "rewards/margins": 8.330808639526367, + "rewards/rejected": -11.28924560546875, + "step": 2134 + }, + { + "epoch": 3.43, + "learning_rate": 3.4403487911216803e-07, + "logits/chosen": -1.6623107194900513, + "logits/rejected": -1.7446346282958984, + "logps/chosen": -120.94762420654297, + "logps/rejected": -222.643798828125, + "loss": 0.1269, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.342770576477051, + "rewards/margins": 8.552261352539062, + "rewards/rejected": -11.895031929016113, + "step": 2135 + }, + { + "epoch": 3.43, + "learning_rate": 3.43935790725327e-07, + "logits/chosen": -1.6347672939300537, + "logits/rejected": -1.6703367233276367, + "logps/chosen": -106.63505554199219, + "logps/rejected": -202.09063720703125, + "loss": 0.0761, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0990548133850098, + "rewards/margins": 7.805667400360107, + "rewards/rejected": -10.904723167419434, + "step": 2136 + }, + { + "epoch": 3.43, + "learning_rate": 3.4383670233848595e-07, + "logits/chosen": -1.7434719800949097, + "logits/rejected": -1.8070636987686157, + "logps/chosen": -110.2618637084961, + "logps/rejected": -199.59222412109375, + "loss": 0.111, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5942676067352295, + "rewards/margins": 8.111776351928711, + "rewards/rejected": -11.70604419708252, + "step": 2137 + }, + { + "epoch": 3.43, + "learning_rate": 3.4373761395164485e-07, + "logits/chosen": -1.804626226425171, + "logits/rejected": -1.8199589252471924, + "logps/chosen": -138.87066650390625, + "logps/rejected": -195.74761962890625, + "loss": 0.1292, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5542654991149902, + "rewards/margins": 6.609280109405518, + "rewards/rejected": -10.163545608520508, + "step": 2138 + }, + { + "epoch": 3.43, + "learning_rate": 3.4363852556480376e-07, + "logits/chosen": -1.6549772024154663, + "logits/rejected": -1.621967077255249, + "logps/chosen": -133.55752563476562, + "logps/rejected": -192.957275390625, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4377353191375732, + "rewards/margins": 5.737541198730469, + "rewards/rejected": -9.175275802612305, + "step": 2139 + }, + { + "epoch": 3.43, + "learning_rate": 3.435394371779627e-07, + "logits/chosen": -1.7712923288345337, + "logits/rejected": -1.8134711980819702, + "logps/chosen": -146.56890869140625, + "logps/rejected": -207.39730834960938, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.399912357330322, + "rewards/margins": 6.217414379119873, + "rewards/rejected": -10.617326736450195, + "step": 2140 + }, + { + "epoch": 3.44, + "learning_rate": 3.434403487911217e-07, + "logits/chosen": -1.793640375137329, + "logits/rejected": -1.7274856567382812, + "logps/chosen": -115.07180786132812, + "logps/rejected": -169.2913055419922, + "loss": 0.1645, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1477694511413574, + "rewards/margins": 7.205746650695801, + "rewards/rejected": -9.353515625, + "step": 2141 + }, + { + "epoch": 3.44, + "learning_rate": 3.4334126040428064e-07, + "logits/chosen": -1.4984748363494873, + "logits/rejected": -1.5911855697631836, + "logps/chosen": -84.72413635253906, + "logps/rejected": -171.7322998046875, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6862950325012207, + "rewards/margins": 5.60990047454834, + "rewards/rejected": -8.296195030212402, + "step": 2142 + }, + { + "epoch": 3.44, + "learning_rate": 3.4324217201743955e-07, + "logits/chosen": -1.599632978439331, + "logits/rejected": -1.5431455373764038, + "logps/chosen": -152.15631103515625, + "logps/rejected": -175.05084228515625, + "loss": 0.0987, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.8824384212493896, + "rewards/margins": 3.907371759414673, + "rewards/rejected": -7.7898101806640625, + "step": 2143 + }, + { + "epoch": 3.44, + "learning_rate": 3.4314308363059845e-07, + "logits/chosen": -1.4776054620742798, + "logits/rejected": -1.522964358329773, + "logps/chosen": -99.07505798339844, + "logps/rejected": -193.6692352294922, + "loss": 0.1172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6232134103775024, + "rewards/margins": 9.079904556274414, + "rewards/rejected": -10.703117370605469, + "step": 2144 + }, + { + "epoch": 3.44, + "learning_rate": 3.430439952437574e-07, + "logits/chosen": -1.6149190664291382, + "logits/rejected": -1.6089062690734863, + "logps/chosen": -111.9411849975586, + "logps/rejected": -193.4600067138672, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6378111839294434, + "rewards/margins": 7.838544845581055, + "rewards/rejected": -9.476356506347656, + "step": 2145 + }, + { + "epoch": 3.44, + "learning_rate": 3.429449068569164e-07, + "logits/chosen": -1.6998226642608643, + "logits/rejected": -1.805789589881897, + "logps/chosen": -94.07997131347656, + "logps/rejected": -194.38330078125, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.320855140686035, + "rewards/margins": 7.899065017700195, + "rewards/rejected": -10.219921112060547, + "step": 2146 + }, + { + "epoch": 3.45, + "learning_rate": 3.428458184700753e-07, + "logits/chosen": -1.6409660577774048, + "logits/rejected": -1.6643823385238647, + "logps/chosen": -93.9280776977539, + "logps/rejected": -180.39480590820312, + "loss": 0.046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35299554467201233, + "rewards/margins": 7.9883623123168945, + "rewards/rejected": -8.341358184814453, + "step": 2147 + }, + { + "epoch": 3.45, + "learning_rate": 3.4274673008323424e-07, + "logits/chosen": -1.6759159564971924, + "logits/rejected": -1.6278804540634155, + "logps/chosen": -115.24301147460938, + "logps/rejected": -178.8355712890625, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.93955397605896, + "rewards/margins": 7.745891094207764, + "rewards/rejected": -9.685444831848145, + "step": 2148 + }, + { + "epoch": 3.45, + "learning_rate": 3.4264764169639315e-07, + "logits/chosen": -1.6501338481903076, + "logits/rejected": -1.7302364110946655, + "logps/chosen": -96.22069549560547, + "logps/rejected": -189.64724731445312, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2895054817199707, + "rewards/margins": 8.564990997314453, + "rewards/rejected": -10.854496002197266, + "step": 2149 + }, + { + "epoch": 3.45, + "learning_rate": 3.425485533095521e-07, + "logits/chosen": -1.8039555549621582, + "logits/rejected": -1.6177948713302612, + "logps/chosen": -149.35165405273438, + "logps/rejected": -182.60836791992188, + "loss": 0.0378, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2662148475646973, + "rewards/margins": 5.740684509277344, + "rewards/rejected": -9.006898880004883, + "step": 2150 + }, + { + "epoch": 3.45, + "learning_rate": 3.42449464922711e-07, + "logits/chosen": -1.7074477672576904, + "logits/rejected": -1.7142506837844849, + "logps/chosen": -91.75123596191406, + "logps/rejected": -152.29296875, + "loss": 0.081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9018985033035278, + "rewards/margins": 6.245310306549072, + "rewards/rejected": -8.147209167480469, + "step": 2151 + }, + { + "epoch": 3.45, + "learning_rate": 3.4235037653586997e-07, + "logits/chosen": -1.5900598764419556, + "logits/rejected": -1.6221089363098145, + "logps/chosen": -119.46685028076172, + "logps/rejected": -194.05747985839844, + "loss": 0.051, + "rewards/accuracies": 0.75, + "rewards/chosen": -4.081146240234375, + "rewards/margins": 6.115410804748535, + "rewards/rejected": -10.19655704498291, + "step": 2152 + }, + { + "epoch": 3.46, + "learning_rate": 3.4225128814902893e-07, + "logits/chosen": -1.8479186296463013, + "logits/rejected": -1.8195433616638184, + "logps/chosen": -80.700927734375, + "logps/rejected": -155.3946533203125, + "loss": 0.0866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9450441598892212, + "rewards/margins": 7.4348907470703125, + "rewards/rejected": -8.379934310913086, + "step": 2153 + }, + { + "epoch": 3.46, + "learning_rate": 3.4215219976218784e-07, + "logits/chosen": -1.7731728553771973, + "logits/rejected": -1.7697763442993164, + "logps/chosen": -130.59503173828125, + "logps/rejected": -223.90118408203125, + "loss": 0.0658, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0572190284729004, + "rewards/margins": 9.36357307434082, + "rewards/rejected": -12.420791625976562, + "step": 2154 + }, + { + "epoch": 3.46, + "learning_rate": 3.420531113753468e-07, + "logits/chosen": -1.6587774753570557, + "logits/rejected": -1.6017616987228394, + "logps/chosen": -81.21226501464844, + "logps/rejected": -160.55992126464844, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1182959079742432, + "rewards/margins": 9.271583557128906, + "rewards/rejected": -10.38987922668457, + "step": 2155 + }, + { + "epoch": 3.46, + "learning_rate": 3.419540229885057e-07, + "logits/chosen": -1.5914591550827026, + "logits/rejected": -1.5598258972167969, + "logps/chosen": -100.12600708007812, + "logps/rejected": -184.84909057617188, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.230988025665283, + "rewards/margins": 7.8361358642578125, + "rewards/rejected": -10.067123413085938, + "step": 2156 + }, + { + "epoch": 3.46, + "learning_rate": 3.4185493460166467e-07, + "logits/chosen": -1.7248643636703491, + "logits/rejected": -1.703332781791687, + "logps/chosen": -73.2528305053711, + "logps/rejected": -125.4189682006836, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4913840591907501, + "rewards/margins": 5.6052398681640625, + "rewards/rejected": -6.09662389755249, + "step": 2157 + }, + { + "epoch": 3.46, + "learning_rate": 3.417558462148236e-07, + "logits/chosen": -1.6010689735412598, + "logits/rejected": -1.6112291812896729, + "logps/chosen": -99.1246109008789, + "logps/rejected": -206.788330078125, + "loss": 0.0264, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6860393285751343, + "rewards/margins": 10.67763900756836, + "rewards/rejected": -12.363677978515625, + "step": 2158 + }, + { + "epoch": 3.47, + "learning_rate": 3.4165675782798253e-07, + "logits/chosen": -1.7529363632202148, + "logits/rejected": -1.6900386810302734, + "logps/chosen": -150.697021484375, + "logps/rejected": -179.15399169921875, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6851181983947754, + "rewards/margins": 5.068181991577148, + "rewards/rejected": -7.753300666809082, + "step": 2159 + }, + { + "epoch": 3.47, + "learning_rate": 3.415576694411415e-07, + "logits/chosen": -1.6619263887405396, + "logits/rejected": -1.7629179954528809, + "logps/chosen": -111.65821838378906, + "logps/rejected": -182.08128356933594, + "loss": 0.057, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.945007562637329, + "rewards/margins": 5.473406791687012, + "rewards/rejected": -7.418414115905762, + "step": 2160 + }, + { + "epoch": 3.47, + "learning_rate": 3.414585810543004e-07, + "logits/chosen": -1.8316186666488647, + "logits/rejected": -1.789556860923767, + "logps/chosen": -77.19055938720703, + "logps/rejected": -186.62420654296875, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0987797975540161, + "rewards/margins": 10.91048812866211, + "rewards/rejected": -12.009267807006836, + "step": 2161 + }, + { + "epoch": 3.47, + "learning_rate": 3.4135949266745936e-07, + "logits/chosen": -1.777234435081482, + "logits/rejected": -1.779771327972412, + "logps/chosen": -109.99400329589844, + "logps/rejected": -183.2122802734375, + "loss": 0.0765, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4982142448425293, + "rewards/margins": 7.62421989440918, + "rewards/rejected": -10.122434616088867, + "step": 2162 + }, + { + "epoch": 3.47, + "learning_rate": 3.412604042806183e-07, + "logits/chosen": -1.716835856437683, + "logits/rejected": -1.6154719591140747, + "logps/chosen": -100.1056900024414, + "logps/rejected": -193.4738006591797, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.131496548652649, + "rewards/margins": 10.311134338378906, + "rewards/rejected": -11.44262981414795, + "step": 2163 + }, + { + "epoch": 3.47, + "learning_rate": 3.411613158937772e-07, + "logits/chosen": -1.6234805583953857, + "logits/rejected": -1.625514030456543, + "logps/chosen": -108.8163070678711, + "logps/rejected": -195.77316284179688, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2255983352661133, + "rewards/margins": 7.656390190124512, + "rewards/rejected": -9.881988525390625, + "step": 2164 + }, + { + "epoch": 3.48, + "learning_rate": 3.410622275069362e-07, + "logits/chosen": -1.7151618003845215, + "logits/rejected": -1.685014009475708, + "logps/chosen": -157.894775390625, + "logps/rejected": -198.50721740722656, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.278841495513916, + "rewards/margins": 4.181025981903076, + "rewards/rejected": -9.459867477416992, + "step": 2165 + }, + { + "epoch": 3.48, + "learning_rate": 3.409631391200951e-07, + "logits/chosen": -1.7651591300964355, + "logits/rejected": -1.693076729774475, + "logps/chosen": -145.2887420654297, + "logps/rejected": -215.3388671875, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.234164714813232, + "rewards/margins": 7.307461738586426, + "rewards/rejected": -11.5416259765625, + "step": 2166 + }, + { + "epoch": 3.48, + "learning_rate": 3.4086405073325405e-07, + "logits/chosen": -1.5455334186553955, + "logits/rejected": -1.5793042182922363, + "logps/chosen": -104.68943786621094, + "logps/rejected": -178.998046875, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0828378200531006, + "rewards/margins": 8.346479415893555, + "rewards/rejected": -10.429317474365234, + "step": 2167 + }, + { + "epoch": 3.48, + "learning_rate": 3.40764962346413e-07, + "logits/chosen": -1.5963637828826904, + "logits/rejected": -1.6142406463623047, + "logps/chosen": -79.9174575805664, + "logps/rejected": -200.72665405273438, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7019015550613403, + "rewards/margins": 11.260303497314453, + "rewards/rejected": -11.962204933166504, + "step": 2168 + }, + { + "epoch": 3.48, + "learning_rate": 3.406658739595719e-07, + "logits/chosen": -1.7945754528045654, + "logits/rejected": -1.6899678707122803, + "logps/chosen": -102.81196594238281, + "logps/rejected": -158.63314819335938, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9191205501556396, + "rewards/margins": 6.1256937980651855, + "rewards/rejected": -8.044815063476562, + "step": 2169 + }, + { + "epoch": 3.48, + "learning_rate": 3.405667855727309e-07, + "logits/chosen": -1.601143717765808, + "logits/rejected": -1.599191427230835, + "logps/chosen": -93.4360580444336, + "logps/rejected": -151.4473876953125, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0266518592834473, + "rewards/margins": 6.015458106994629, + "rewards/rejected": -8.042110443115234, + "step": 2170 + }, + { + "epoch": 3.48, + "learning_rate": 3.404676971858898e-07, + "logits/chosen": -1.770904779434204, + "logits/rejected": -1.8050644397735596, + "logps/chosen": -117.44557189941406, + "logps/rejected": -191.75460815429688, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.874706745147705, + "rewards/margins": 7.015566349029541, + "rewards/rejected": -9.890273094177246, + "step": 2171 + }, + { + "epoch": 3.49, + "learning_rate": 3.403686087990487e-07, + "logits/chosen": -1.5838853120803833, + "logits/rejected": -1.678159475326538, + "logps/chosen": -85.75621795654297, + "logps/rejected": -196.12625122070312, + "loss": 0.0606, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6742981672286987, + "rewards/margins": 8.555357933044434, + "rewards/rejected": -10.229656219482422, + "step": 2172 + }, + { + "epoch": 3.49, + "learning_rate": 3.402695204122077e-07, + "logits/chosen": -1.595556616783142, + "logits/rejected": -1.712563157081604, + "logps/chosen": -100.57102966308594, + "logps/rejected": -165.27557373046875, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1026413440704346, + "rewards/margins": 5.061697959899902, + "rewards/rejected": -6.164339542388916, + "step": 2173 + }, + { + "epoch": 3.49, + "learning_rate": 3.401704320253666e-07, + "logits/chosen": -1.6338465213775635, + "logits/rejected": -1.655766487121582, + "logps/chosen": -139.91232299804688, + "logps/rejected": -162.613525390625, + "loss": 0.0559, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.9903364181518555, + "rewards/margins": 2.745312213897705, + "rewards/rejected": -6.735648155212402, + "step": 2174 + }, + { + "epoch": 3.49, + "learning_rate": 3.4007134363852557e-07, + "logits/chosen": -1.583310604095459, + "logits/rejected": -1.509516954421997, + "logps/chosen": -129.80990600585938, + "logps/rejected": -156.01466369628906, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5212085247039795, + "rewards/margins": 4.120067119598389, + "rewards/rejected": -7.641275405883789, + "step": 2175 + }, + { + "epoch": 3.49, + "learning_rate": 3.399722552516845e-07, + "logits/chosen": -1.7003259658813477, + "logits/rejected": -1.6756852865219116, + "logps/chosen": -118.69117736816406, + "logps/rejected": -203.1019287109375, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8331968784332275, + "rewards/margins": 7.614658355712891, + "rewards/rejected": -9.447854995727539, + "step": 2176 + }, + { + "epoch": 3.49, + "learning_rate": 3.398731668648434e-07, + "logits/chosen": -1.5054864883422852, + "logits/rejected": -1.5412368774414062, + "logps/chosen": -99.67404174804688, + "logps/rejected": -192.83258056640625, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4855995178222656, + "rewards/margins": 9.165367126464844, + "rewards/rejected": -11.65096664428711, + "step": 2177 + }, + { + "epoch": 3.5, + "learning_rate": 3.397740784780024e-07, + "logits/chosen": -1.7287299633026123, + "logits/rejected": -1.7606894969940186, + "logps/chosen": -76.34248352050781, + "logps/rejected": -149.45217895507812, + "loss": 0.0509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8506460189819336, + "rewards/margins": 6.627985954284668, + "rewards/rejected": -7.478631973266602, + "step": 2178 + }, + { + "epoch": 3.5, + "learning_rate": 3.396749900911613e-07, + "logits/chosen": -1.6097536087036133, + "logits/rejected": -1.6574169397354126, + "logps/chosen": -114.58055114746094, + "logps/rejected": -158.13880920410156, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5772671699523926, + "rewards/margins": 5.356046676635742, + "rewards/rejected": -7.933313846588135, + "step": 2179 + }, + { + "epoch": 3.5, + "learning_rate": 3.395759017043202e-07, + "logits/chosen": -1.6381224393844604, + "logits/rejected": -1.7310000658035278, + "logps/chosen": -125.16942596435547, + "logps/rejected": -201.40597534179688, + "loss": 0.0596, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0556318759918213, + "rewards/margins": 6.587934494018555, + "rewards/rejected": -9.643567085266113, + "step": 2180 + }, + { + "epoch": 3.5, + "learning_rate": 3.3947681331747917e-07, + "logits/chosen": -1.6124820709228516, + "logits/rejected": -1.594954013824463, + "logps/chosen": -109.06853485107422, + "logps/rejected": -179.33193969726562, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.205012321472168, + "rewards/margins": 6.765749931335449, + "rewards/rejected": -9.970762252807617, + "step": 2181 + }, + { + "epoch": 3.5, + "learning_rate": 3.393777249306381e-07, + "logits/chosen": -1.7686759233474731, + "logits/rejected": -1.670459508895874, + "logps/chosen": -133.2688751220703, + "logps/rejected": -222.00338745117188, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.007737874984741, + "rewards/margins": 9.152046203613281, + "rewards/rejected": -11.159785270690918, + "step": 2182 + }, + { + "epoch": 3.5, + "learning_rate": 3.392786365437971e-07, + "logits/chosen": -1.6169896125793457, + "logits/rejected": -1.6745312213897705, + "logps/chosen": -78.66304016113281, + "logps/rejected": -166.64132690429688, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6870551705360413, + "rewards/margins": 8.052303314208984, + "rewards/rejected": -8.739358901977539, + "step": 2183 + }, + { + "epoch": 3.51, + "learning_rate": 3.39179548156956e-07, + "logits/chosen": -1.4757592678070068, + "logits/rejected": -1.603187084197998, + "logps/chosen": -112.26322174072266, + "logps/rejected": -206.55282592773438, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.689795970916748, + "rewards/margins": 5.225701332092285, + "rewards/rejected": -9.915496826171875, + "step": 2184 + }, + { + "epoch": 3.51, + "learning_rate": 3.390804597701149e-07, + "logits/chosen": -1.6145761013031006, + "logits/rejected": -1.5781753063201904, + "logps/chosen": -87.34968566894531, + "logps/rejected": -176.5531768798828, + "loss": 0.1058, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6740260124206543, + "rewards/margins": 7.926778793334961, + "rewards/rejected": -10.600804328918457, + "step": 2185 + }, + { + "epoch": 3.51, + "learning_rate": 3.3898137138327386e-07, + "logits/chosen": -1.6855660676956177, + "logits/rejected": -1.730371117591858, + "logps/chosen": -108.11871337890625, + "logps/rejected": -203.3104248046875, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.306955575942993, + "rewards/margins": 9.912744522094727, + "rewards/rejected": -12.21969985961914, + "step": 2186 + }, + { + "epoch": 3.51, + "learning_rate": 3.3888228299643277e-07, + "logits/chosen": -1.676460862159729, + "logits/rejected": -1.5703682899475098, + "logps/chosen": -101.76032257080078, + "logps/rejected": -182.075439453125, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0002734661102295, + "rewards/margins": 8.948898315429688, + "rewards/rejected": -10.94917106628418, + "step": 2187 + }, + { + "epoch": 3.51, + "learning_rate": 3.387831946095918e-07, + "logits/chosen": -1.664189100265503, + "logits/rejected": -1.7848117351531982, + "logps/chosen": -116.78555297851562, + "logps/rejected": -207.26173400878906, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7308411598205566, + "rewards/margins": 8.269793510437012, + "rewards/rejected": -12.000635147094727, + "step": 2188 + }, + { + "epoch": 3.51, + "learning_rate": 3.386841062227507e-07, + "logits/chosen": -1.7437033653259277, + "logits/rejected": -1.7369760274887085, + "logps/chosen": -136.3424072265625, + "logps/rejected": -218.85086059570312, + "loss": 0.0452, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.748770236968994, + "rewards/margins": 7.828930854797363, + "rewards/rejected": -11.577701568603516, + "step": 2189 + }, + { + "epoch": 3.52, + "learning_rate": 3.385850178359096e-07, + "logits/chosen": -1.6202707290649414, + "logits/rejected": -1.5667678117752075, + "logps/chosen": -120.86553192138672, + "logps/rejected": -196.01837158203125, + "loss": 0.0532, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.343974590301514, + "rewards/margins": 7.6044921875, + "rewards/rejected": -11.948466300964355, + "step": 2190 + }, + { + "epoch": 3.52, + "learning_rate": 3.3848592944906856e-07, + "logits/chosen": -1.5916898250579834, + "logits/rejected": -1.6580067873001099, + "logps/chosen": -98.63236999511719, + "logps/rejected": -202.04701232910156, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.987713575363159, + "rewards/margins": 7.232316970825195, + "rewards/rejected": -10.220030784606934, + "step": 2191 + }, + { + "epoch": 3.52, + "learning_rate": 3.3838684106222746e-07, + "logits/chosen": -1.618975281715393, + "logits/rejected": -1.7239468097686768, + "logps/chosen": -115.52989959716797, + "logps/rejected": -212.44586181640625, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.455399751663208, + "rewards/margins": 7.932098388671875, + "rewards/rejected": -10.387497901916504, + "step": 2192 + }, + { + "epoch": 3.52, + "learning_rate": 3.382877526753865e-07, + "logits/chosen": -1.7617734670639038, + "logits/rejected": -1.6921441555023193, + "logps/chosen": -125.24949645996094, + "logps/rejected": -224.56398010253906, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.183316230773926, + "rewards/margins": 11.336982727050781, + "rewards/rejected": -14.520299911499023, + "step": 2193 + }, + { + "epoch": 3.52, + "learning_rate": 3.381886642885454e-07, + "logits/chosen": -1.779747724533081, + "logits/rejected": -1.7777910232543945, + "logps/chosen": -98.57221221923828, + "logps/rejected": -198.21597290039062, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7811456918716431, + "rewards/margins": 10.550959587097168, + "rewards/rejected": -11.33210563659668, + "step": 2194 + }, + { + "epoch": 3.52, + "learning_rate": 3.380895759017043e-07, + "logits/chosen": -1.68574857711792, + "logits/rejected": -1.6739004850387573, + "logps/chosen": -96.83192443847656, + "logps/rejected": -220.6968994140625, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.274838447570801, + "rewards/margins": 11.156787872314453, + "rewards/rejected": -13.431626319885254, + "step": 2195 + }, + { + "epoch": 3.52, + "learning_rate": 3.3799048751486325e-07, + "logits/chosen": -1.6576963663101196, + "logits/rejected": -1.623194694519043, + "logps/chosen": -94.11833953857422, + "logps/rejected": -185.60928344726562, + "loss": 0.0281, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6137702465057373, + "rewards/margins": 7.65414571762085, + "rewards/rejected": -9.267916679382324, + "step": 2196 + }, + { + "epoch": 3.53, + "learning_rate": 3.3789139912802216e-07, + "logits/chosen": -1.6625865697860718, + "logits/rejected": -1.682747483253479, + "logps/chosen": -112.37965393066406, + "logps/rejected": -187.8845977783203, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.299849987030029, + "rewards/margins": 6.7868804931640625, + "rewards/rejected": -12.08673095703125, + "step": 2197 + }, + { + "epoch": 3.53, + "learning_rate": 3.377923107411811e-07, + "logits/chosen": -1.5759074687957764, + "logits/rejected": -1.5447046756744385, + "logps/chosen": -136.9354248046875, + "logps/rejected": -204.7698974609375, + "loss": 0.0405, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.918977737426758, + "rewards/margins": 6.905411720275879, + "rewards/rejected": -10.824389457702637, + "step": 2198 + }, + { + "epoch": 3.53, + "learning_rate": 3.376932223543401e-07, + "logits/chosen": -1.5186904668807983, + "logits/rejected": -1.5501371622085571, + "logps/chosen": -113.7569580078125, + "logps/rejected": -191.30023193359375, + "loss": 0.0268, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.732248544692993, + "rewards/margins": 7.999310493469238, + "rewards/rejected": -10.731559753417969, + "step": 2199 + }, + { + "epoch": 3.53, + "learning_rate": 3.37594133967499e-07, + "logits/chosen": -1.6720471382141113, + "logits/rejected": -1.6990073919296265, + "logps/chosen": -95.16914367675781, + "logps/rejected": -185.11512756347656, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4308176040649414, + "rewards/margins": 7.85296630859375, + "rewards/rejected": -10.283783912658691, + "step": 2200 + }, + { + "epoch": 3.53, + "learning_rate": 3.3749504558065794e-07, + "logits/chosen": -1.5515761375427246, + "logits/rejected": -1.7411341667175293, + "logps/chosen": -119.11207580566406, + "logps/rejected": -235.58932495117188, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.843124866485596, + "rewards/margins": 8.674072265625, + "rewards/rejected": -13.517196655273438, + "step": 2201 + }, + { + "epoch": 3.53, + "learning_rate": 3.3739595719381685e-07, + "logits/chosen": -1.733351707458496, + "logits/rejected": -1.7337539196014404, + "logps/chosen": -97.0371322631836, + "logps/rejected": -170.8935546875, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.75014066696167, + "rewards/margins": 7.784721374511719, + "rewards/rejected": -9.534862518310547, + "step": 2202 + }, + { + "epoch": 3.54, + "learning_rate": 3.372968688069758e-07, + "logits/chosen": -1.5356403589248657, + "logits/rejected": -1.596405267715454, + "logps/chosen": -132.21670532226562, + "logps/rejected": -191.58277893066406, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.619069576263428, + "rewards/margins": 5.685756683349609, + "rewards/rejected": -10.304825782775879, + "step": 2203 + }, + { + "epoch": 3.54, + "learning_rate": 3.3719778042013477e-07, + "logits/chosen": -1.8015656471252441, + "logits/rejected": -1.842349886894226, + "logps/chosen": -143.68746948242188, + "logps/rejected": -231.2112274169922, + "loss": 0.053, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.13079309463501, + "rewards/margins": 8.553049087524414, + "rewards/rejected": -12.683842658996582, + "step": 2204 + }, + { + "epoch": 3.54, + "learning_rate": 3.370986920332937e-07, + "logits/chosen": -1.589627981185913, + "logits/rejected": -1.561662197113037, + "logps/chosen": -93.67778778076172, + "logps/rejected": -176.78030395507812, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.328676223754883, + "rewards/margins": 6.6245317459106445, + "rewards/rejected": -8.953207969665527, + "step": 2205 + }, + { + "epoch": 3.54, + "learning_rate": 3.3699960364645264e-07, + "logits/chosen": -1.4915751218795776, + "logits/rejected": -1.5112242698669434, + "logps/chosen": -120.44245910644531, + "logps/rejected": -231.91175842285156, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7680115699768066, + "rewards/margins": 8.288870811462402, + "rewards/rejected": -12.056882858276367, + "step": 2206 + }, + { + "epoch": 3.54, + "learning_rate": 3.3690051525961154e-07, + "logits/chosen": -1.7280364036560059, + "logits/rejected": -1.8369883298873901, + "logps/chosen": -88.6279067993164, + "logps/rejected": -156.9542694091797, + "loss": 0.028, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.675112247467041, + "rewards/margins": 6.126389026641846, + "rewards/rejected": -8.801501274108887, + "step": 2207 + }, + { + "epoch": 3.54, + "learning_rate": 3.368014268727705e-07, + "logits/chosen": -1.5656670331954956, + "logits/rejected": -1.603361964225769, + "logps/chosen": -99.00666046142578, + "logps/rejected": -164.24673461914062, + "loss": 0.0273, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.722357749938965, + "rewards/margins": 5.524351119995117, + "rewards/rejected": -8.246708869934082, + "step": 2208 + }, + { + "epoch": 3.55, + "learning_rate": 3.3670233848592946e-07, + "logits/chosen": -1.7547831535339355, + "logits/rejected": -1.7362349033355713, + "logps/chosen": -123.79874420166016, + "logps/rejected": -180.7222900390625, + "loss": 0.0517, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.750986337661743, + "rewards/margins": 5.894593238830566, + "rewards/rejected": -8.645580291748047, + "step": 2209 + }, + { + "epoch": 3.55, + "learning_rate": 3.3660325009908837e-07, + "logits/chosen": -1.5473390817642212, + "logits/rejected": -1.5781065225601196, + "logps/chosen": -119.60375213623047, + "logps/rejected": -187.90890502929688, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1592600345611572, + "rewards/margins": 7.436932563781738, + "rewards/rejected": -10.596192359924316, + "step": 2210 + }, + { + "epoch": 3.55, + "learning_rate": 3.3650416171224733e-07, + "logits/chosen": -1.6235580444335938, + "logits/rejected": -1.616828441619873, + "logps/chosen": -99.59192657470703, + "logps/rejected": -171.66177368164062, + "loss": 0.0472, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.604792356491089, + "rewards/margins": 6.3724565505981445, + "rewards/rejected": -8.977249145507812, + "step": 2211 + }, + { + "epoch": 3.55, + "learning_rate": 3.3640507332540624e-07, + "logits/chosen": -1.6397091150283813, + "logits/rejected": -1.5779242515563965, + "logps/chosen": -116.86198425292969, + "logps/rejected": -172.9431610107422, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9910216331481934, + "rewards/margins": 6.268888473510742, + "rewards/rejected": -10.259909629821777, + "step": 2212 + }, + { + "epoch": 3.55, + "learning_rate": 3.363059849385652e-07, + "logits/chosen": -1.5800119638442993, + "logits/rejected": -1.5377910137176514, + "logps/chosen": -141.0878448486328, + "logps/rejected": -213.0290069580078, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.608948230743408, + "rewards/margins": 8.761387825012207, + "rewards/rejected": -12.370336532592773, + "step": 2213 + }, + { + "epoch": 3.55, + "learning_rate": 3.362068965517241e-07, + "logits/chosen": -1.6727968454360962, + "logits/rejected": -1.7212371826171875, + "logps/chosen": -94.72151947021484, + "logps/rejected": -177.04563903808594, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2064785957336426, + "rewards/margins": 7.931061744689941, + "rewards/rejected": -10.137540817260742, + "step": 2214 + }, + { + "epoch": 3.56, + "learning_rate": 3.3610780816488306e-07, + "logits/chosen": -1.7735649347305298, + "logits/rejected": -1.820259690284729, + "logps/chosen": -117.69949340820312, + "logps/rejected": -202.99038696289062, + "loss": 0.1164, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.507907390594482, + "rewards/margins": 6.491986274719238, + "rewards/rejected": -10.999894142150879, + "step": 2215 + }, + { + "epoch": 3.56, + "learning_rate": 3.36008719778042e-07, + "logits/chosen": -1.6771914958953857, + "logits/rejected": -1.6688172817230225, + "logps/chosen": -117.81047058105469, + "logps/rejected": -174.03167724609375, + "loss": 0.0709, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7743401527404785, + "rewards/margins": 6.387953281402588, + "rewards/rejected": -11.16229248046875, + "step": 2216 + }, + { + "epoch": 3.56, + "learning_rate": 3.3590963139120093e-07, + "logits/chosen": -1.6570510864257812, + "logits/rejected": -1.7413548231124878, + "logps/chosen": -88.157958984375, + "logps/rejected": -215.14117431640625, + "loss": 0.0357, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.99481999874115, + "rewards/margins": 9.930700302124023, + "rewards/rejected": -11.925519943237305, + "step": 2217 + }, + { + "epoch": 3.56, + "learning_rate": 3.3581054300435984e-07, + "logits/chosen": -1.7173500061035156, + "logits/rejected": -1.690964698791504, + "logps/chosen": -138.04469299316406, + "logps/rejected": -204.36044311523438, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5717313289642334, + "rewards/margins": 7.983512878417969, + "rewards/rejected": -11.555244445800781, + "step": 2218 + }, + { + "epoch": 3.56, + "learning_rate": 3.357114546175188e-07, + "logits/chosen": -1.6061077117919922, + "logits/rejected": -1.6889772415161133, + "logps/chosen": -98.9211654663086, + "logps/rejected": -181.8084259033203, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8415679931640625, + "rewards/margins": 6.557866096496582, + "rewards/rejected": -9.399434089660645, + "step": 2219 + }, + { + "epoch": 3.56, + "learning_rate": 3.3561236623067776e-07, + "logits/chosen": -1.533785343170166, + "logits/rejected": -1.6268900632858276, + "logps/chosen": -114.76138305664062, + "logps/rejected": -169.86744689941406, + "loss": 0.0556, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4756481647491455, + "rewards/margins": 5.946053504943848, + "rewards/rejected": -9.421701431274414, + "step": 2220 + }, + { + "epoch": 3.57, + "learning_rate": 3.355132778438367e-07, + "logits/chosen": -1.7191898822784424, + "logits/rejected": -1.7779878377914429, + "logps/chosen": -116.95243072509766, + "logps/rejected": -203.89085388183594, + "loss": 0.0415, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.502088785171509, + "rewards/margins": 7.74766731262207, + "rewards/rejected": -11.249754905700684, + "step": 2221 + }, + { + "epoch": 3.57, + "learning_rate": 3.354141894569956e-07, + "logits/chosen": -1.7274562120437622, + "logits/rejected": -1.7726225852966309, + "logps/chosen": -129.01365661621094, + "logps/rejected": -201.13844299316406, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.553653240203857, + "rewards/margins": 5.236922264099121, + "rewards/rejected": -10.790575981140137, + "step": 2222 + }, + { + "epoch": 3.57, + "learning_rate": 3.3531510107015453e-07, + "logits/chosen": -1.6511423587799072, + "logits/rejected": -1.6641439199447632, + "logps/chosen": -111.41622161865234, + "logps/rejected": -189.50990295410156, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.651482343673706, + "rewards/margins": 8.005288124084473, + "rewards/rejected": -9.656770706176758, + "step": 2223 + }, + { + "epoch": 3.57, + "learning_rate": 3.352160126833135e-07, + "logits/chosen": -1.6874476671218872, + "logits/rejected": -1.5531210899353027, + "logps/chosen": -153.70712280273438, + "logps/rejected": -221.50775146484375, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.06813907623291, + "rewards/margins": 8.35676383972168, + "rewards/rejected": -13.424901962280273, + "step": 2224 + }, + { + "epoch": 3.57, + "learning_rate": 3.3511692429647245e-07, + "logits/chosen": -1.6181848049163818, + "logits/rejected": -1.5689749717712402, + "logps/chosen": -119.84063720703125, + "logps/rejected": -175.09182739257812, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7988457679748535, + "rewards/margins": 7.491373538970947, + "rewards/rejected": -10.2902193069458, + "step": 2225 + }, + { + "epoch": 3.57, + "learning_rate": 3.350178359096314e-07, + "logits/chosen": -1.5885916948318481, + "logits/rejected": -1.5322649478912354, + "logps/chosen": -90.09568786621094, + "logps/rejected": -179.17556762695312, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4758234024047852, + "rewards/margins": 8.35731029510498, + "rewards/rejected": -9.833133697509766, + "step": 2226 + }, + { + "epoch": 3.57, + "learning_rate": 3.349187475227903e-07, + "logits/chosen": -1.6924326419830322, + "logits/rejected": -1.6814393997192383, + "logps/chosen": -99.2391357421875, + "logps/rejected": -131.8724822998047, + "loss": 0.0894, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8938478231430054, + "rewards/margins": 3.782175064086914, + "rewards/rejected": -5.676023006439209, + "step": 2227 + }, + { + "epoch": 3.58, + "learning_rate": 3.348196591359492e-07, + "logits/chosen": -1.709230661392212, + "logits/rejected": -1.7801133394241333, + "logps/chosen": -126.95674896240234, + "logps/rejected": -191.69265747070312, + "loss": 0.0662, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.022183656692505, + "rewards/margins": 6.344205379486084, + "rewards/rejected": -9.366389274597168, + "step": 2228 + }, + { + "epoch": 3.58, + "learning_rate": 3.347205707491082e-07, + "logits/chosen": -1.4913146495819092, + "logits/rejected": -1.4165433645248413, + "logps/chosen": -138.1383056640625, + "logps/rejected": -209.15576171875, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7259693145751953, + "rewards/margins": 9.120784759521484, + "rewards/rejected": -11.846753120422363, + "step": 2229 + }, + { + "epoch": 3.58, + "learning_rate": 3.3462148236226714e-07, + "logits/chosen": -1.560220718383789, + "logits/rejected": -1.5830823183059692, + "logps/chosen": -111.1532974243164, + "logps/rejected": -198.36456298828125, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7562687397003174, + "rewards/margins": 8.696372032165527, + "rewards/rejected": -10.452640533447266, + "step": 2230 + }, + { + "epoch": 3.58, + "learning_rate": 3.345223939754261e-07, + "logits/chosen": -1.6603989601135254, + "logits/rejected": -1.7132729291915894, + "logps/chosen": -139.78591918945312, + "logps/rejected": -170.65179443359375, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6671664714813232, + "rewards/margins": 3.617896318435669, + "rewards/rejected": -6.285062789916992, + "step": 2231 + }, + { + "epoch": 3.58, + "learning_rate": 3.34423305588585e-07, + "logits/chosen": -1.7766631841659546, + "logits/rejected": -1.7438440322875977, + "logps/chosen": -103.82136535644531, + "logps/rejected": -197.41551208496094, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6042543649673462, + "rewards/margins": 8.577926635742188, + "rewards/rejected": -10.182181358337402, + "step": 2232 + }, + { + "epoch": 3.58, + "learning_rate": 3.343242172017439e-07, + "logits/chosen": -1.6675878763198853, + "logits/rejected": -1.6285542249679565, + "logps/chosen": -130.81967163085938, + "logps/rejected": -189.7350311279297, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1990835666656494, + "rewards/margins": 6.658381462097168, + "rewards/rejected": -9.857464790344238, + "step": 2233 + }, + { + "epoch": 3.59, + "learning_rate": 3.342251288149029e-07, + "logits/chosen": -1.5268471240997314, + "logits/rejected": -1.6294515132904053, + "logps/chosen": -127.32374572753906, + "logps/rejected": -232.93873596191406, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.314704418182373, + "rewards/margins": 8.128697395324707, + "rewards/rejected": -12.443401336669922, + "step": 2234 + }, + { + "epoch": 3.59, + "learning_rate": 3.341260404280618e-07, + "logits/chosen": -1.7119054794311523, + "logits/rejected": -1.6640980243682861, + "logps/chosen": -111.43547821044922, + "logps/rejected": -157.20758056640625, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.134040594100952, + "rewards/margins": 6.149867534637451, + "rewards/rejected": -9.283907890319824, + "step": 2235 + }, + { + "epoch": 3.59, + "learning_rate": 3.340269520412208e-07, + "logits/chosen": -1.5928608179092407, + "logits/rejected": -1.5748846530914307, + "logps/chosen": -159.46542358398438, + "logps/rejected": -208.7386016845703, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980015754699707, + "rewards/margins": 5.318713665008545, + "rewards/rejected": -11.29872989654541, + "step": 2236 + }, + { + "epoch": 3.59, + "learning_rate": 3.339278636543797e-07, + "logits/chosen": -1.5073680877685547, + "logits/rejected": -1.5554091930389404, + "logps/chosen": -119.78199768066406, + "logps/rejected": -179.97445678710938, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7307143211364746, + "rewards/margins": 6.590792655944824, + "rewards/rejected": -10.32150650024414, + "step": 2237 + }, + { + "epoch": 3.59, + "learning_rate": 3.338287752675386e-07, + "logits/chosen": -1.6201958656311035, + "logits/rejected": -1.6870075464248657, + "logps/chosen": -118.17042541503906, + "logps/rejected": -188.1943359375, + "loss": 0.033, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3671867847442627, + "rewards/margins": 6.514526844024658, + "rewards/rejected": -9.8817138671875, + "step": 2238 + }, + { + "epoch": 3.59, + "learning_rate": 3.3372968688069757e-07, + "logits/chosen": -1.6522932052612305, + "logits/rejected": -1.6521302461624146, + "logps/chosen": -117.29963684082031, + "logps/rejected": -162.43057250976562, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.402805805206299, + "rewards/margins": 6.133418083190918, + "rewards/rejected": -9.536224365234375, + "step": 2239 + }, + { + "epoch": 3.6, + "learning_rate": 3.336305984938565e-07, + "logits/chosen": -1.8291852474212646, + "logits/rejected": -1.8003087043762207, + "logps/chosen": -99.58392333984375, + "logps/rejected": -214.86419677734375, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9135282039642334, + "rewards/margins": 11.624619483947754, + "rewards/rejected": -13.538147926330566, + "step": 2240 + }, + { + "epoch": 3.6, + "learning_rate": 3.335315101070155e-07, + "logits/chosen": -1.5295642614364624, + "logits/rejected": -1.5127875804901123, + "logps/chosen": -128.41290283203125, + "logps/rejected": -181.0443115234375, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.075294017791748, + "rewards/margins": 6.431306838989258, + "rewards/rejected": -9.506601333618164, + "step": 2241 + }, + { + "epoch": 3.6, + "learning_rate": 3.334324217201744e-07, + "logits/chosen": -1.8065533638000488, + "logits/rejected": -1.7797086238861084, + "logps/chosen": -132.897216796875, + "logps/rejected": -226.3004913330078, + "loss": 0.104, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.293903350830078, + "rewards/margins": 9.431037902832031, + "rewards/rejected": -12.72494125366211, + "step": 2242 + }, + { + "epoch": 3.6, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -1.7383289337158203, + "logits/rejected": -1.7263422012329102, + "logps/chosen": -116.19664001464844, + "logps/rejected": -207.2869110107422, + "loss": 0.0391, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7998377084732056, + "rewards/margins": 10.29233169555664, + "rewards/rejected": -12.092168807983398, + "step": 2243 + }, + { + "epoch": 3.6, + "learning_rate": 3.3323424494649226e-07, + "logits/chosen": -1.4913913011550903, + "logits/rejected": -1.5624334812164307, + "logps/chosen": -101.63388061523438, + "logps/rejected": -189.13377380371094, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.644965171813965, + "rewards/margins": 8.185795783996582, + "rewards/rejected": -11.830760955810547, + "step": 2244 + }, + { + "epoch": 3.6, + "learning_rate": 3.3313515655965117e-07, + "logits/chosen": -1.646781325340271, + "logits/rejected": -1.6731444597244263, + "logps/chosen": -112.21378326416016, + "logps/rejected": -220.83624267578125, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8901472091674805, + "rewards/margins": 9.389264106750488, + "rewards/rejected": -13.279411315917969, + "step": 2245 + }, + { + "epoch": 3.61, + "learning_rate": 3.330360681728102e-07, + "logits/chosen": -1.7876710891723633, + "logits/rejected": -1.8417720794677734, + "logps/chosen": -130.73916625976562, + "logps/rejected": -213.50161743164062, + "loss": 0.1048, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2430739402771, + "rewards/margins": 7.426568984985352, + "rewards/rejected": -11.669642448425293, + "step": 2246 + }, + { + "epoch": 3.61, + "learning_rate": 3.329369797859691e-07, + "logits/chosen": -1.8185949325561523, + "logits/rejected": -1.741937279701233, + "logps/chosen": -110.82194519042969, + "logps/rejected": -183.70513916015625, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9620469808578491, + "rewards/margins": 8.994590759277344, + "rewards/rejected": -10.956637382507324, + "step": 2247 + }, + { + "epoch": 3.61, + "learning_rate": 3.32837891399128e-07, + "logits/chosen": -1.6111469268798828, + "logits/rejected": -1.5694628953933716, + "logps/chosen": -121.46887969970703, + "logps/rejected": -185.1714630126953, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.657587766647339, + "rewards/margins": 7.651818752288818, + "rewards/rejected": -10.309406280517578, + "step": 2248 + }, + { + "epoch": 3.61, + "learning_rate": 3.3273880301228695e-07, + "logits/chosen": -1.6857022047042847, + "logits/rejected": -1.6532087326049805, + "logps/chosen": -125.72998046875, + "logps/rejected": -168.48797607421875, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4643027782440186, + "rewards/margins": 5.060924053192139, + "rewards/rejected": -8.525227546691895, + "step": 2249 + }, + { + "epoch": 3.61, + "learning_rate": 3.3263971462544586e-07, + "logits/chosen": -1.6912109851837158, + "logits/rejected": -1.6285903453826904, + "logps/chosen": -117.2468032836914, + "logps/rejected": -162.56112670898438, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3808088302612305, + "rewards/margins": 5.63465690612793, + "rewards/rejected": -9.01546573638916, + "step": 2250 + }, + { + "epoch": 3.61, + "learning_rate": 3.3254062623860477e-07, + "logits/chosen": -1.6466959714889526, + "logits/rejected": -1.692476511001587, + "logps/chosen": -117.78041076660156, + "logps/rejected": -179.09754943847656, + "loss": 0.0434, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7616307735443115, + "rewards/margins": 5.663517951965332, + "rewards/rejected": -8.425148963928223, + "step": 2251 + }, + { + "epoch": 3.61, + "learning_rate": 3.324415378517638e-07, + "logits/chosen": -1.559828519821167, + "logits/rejected": -1.6599076986312866, + "logps/chosen": -105.68682098388672, + "logps/rejected": -194.6924591064453, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4081475734710693, + "rewards/margins": 7.569893836975098, + "rewards/rejected": -9.97804069519043, + "step": 2252 + }, + { + "epoch": 3.62, + "learning_rate": 3.323424494649227e-07, + "logits/chosen": -1.7765926122665405, + "logits/rejected": -1.7600340843200684, + "logps/chosen": -95.71693420410156, + "logps/rejected": -213.9748992919922, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0061025619506836, + "rewards/margins": 10.594067573547363, + "rewards/rejected": -12.600170135498047, + "step": 2253 + }, + { + "epoch": 3.62, + "learning_rate": 3.3224336107808165e-07, + "logits/chosen": -1.6996288299560547, + "logits/rejected": -1.7322828769683838, + "logps/chosen": -137.75784301757812, + "logps/rejected": -186.53627014160156, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0883371829986572, + "rewards/margins": 6.453249931335449, + "rewards/rejected": -9.541586875915527, + "step": 2254 + }, + { + "epoch": 3.62, + "learning_rate": 3.3214427269124055e-07, + "logits/chosen": -1.4822311401367188, + "logits/rejected": -1.4938368797302246, + "logps/chosen": -106.28611755371094, + "logps/rejected": -193.651611328125, + "loss": 0.068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1427273750305176, + "rewards/margins": 8.771964073181152, + "rewards/rejected": -10.914691925048828, + "step": 2255 + }, + { + "epoch": 3.62, + "learning_rate": 3.3204518430439946e-07, + "logits/chosen": -1.6558715105056763, + "logits/rejected": -1.737040638923645, + "logps/chosen": -102.8006820678711, + "logps/rejected": -187.3025360107422, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4277501106262207, + "rewards/margins": 6.512253761291504, + "rewards/rejected": -9.940003395080566, + "step": 2256 + }, + { + "epoch": 3.62, + "learning_rate": 3.3194609591755847e-07, + "logits/chosen": -1.6246564388275146, + "logits/rejected": -1.6515123844146729, + "logps/chosen": -157.03048706054688, + "logps/rejected": -227.7128143310547, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.307823657989502, + "rewards/margins": 8.545865058898926, + "rewards/rejected": -12.85368824005127, + "step": 2257 + }, + { + "epoch": 3.62, + "learning_rate": 3.318470075307174e-07, + "logits/chosen": -1.52339768409729, + "logits/rejected": -1.5662024021148682, + "logps/chosen": -106.58567810058594, + "logps/rejected": -207.32452392578125, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.545586585998535, + "rewards/margins": 9.422029495239258, + "rewards/rejected": -12.967616081237793, + "step": 2258 + }, + { + "epoch": 3.63, + "learning_rate": 3.3174791914387634e-07, + "logits/chosen": -1.69525146484375, + "logits/rejected": -1.6240246295928955, + "logps/chosen": -111.90753173828125, + "logps/rejected": -142.46334838867188, + "loss": 0.0567, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4758071899414062, + "rewards/margins": 5.237011909484863, + "rewards/rejected": -7.712818622589111, + "step": 2259 + }, + { + "epoch": 3.63, + "learning_rate": 3.3164883075703525e-07, + "logits/chosen": -1.6090526580810547, + "logits/rejected": -1.6389533281326294, + "logps/chosen": -112.5648193359375, + "logps/rejected": -190.21910095214844, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3296592235565186, + "rewards/margins": 7.295497894287109, + "rewards/rejected": -9.625157356262207, + "step": 2260 + }, + { + "epoch": 3.63, + "learning_rate": 3.3154974237019415e-07, + "logits/chosen": -1.6576104164123535, + "logits/rejected": -1.5804264545440674, + "logps/chosen": -131.62606811523438, + "logps/rejected": -161.01837158203125, + "loss": 0.0618, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2749502658843994, + "rewards/margins": 6.147180557250977, + "rewards/rejected": -9.422130584716797, + "step": 2261 + }, + { + "epoch": 3.63, + "learning_rate": 3.3145065398335317e-07, + "logits/chosen": -1.590188980102539, + "logits/rejected": -1.5429344177246094, + "logps/chosen": -120.29436492919922, + "logps/rejected": -179.65237426757812, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.312166213989258, + "rewards/margins": 6.210004806518555, + "rewards/rejected": -9.522171020507812, + "step": 2262 + }, + { + "epoch": 3.63, + "learning_rate": 3.3135156559651207e-07, + "logits/chosen": -1.6708226203918457, + "logits/rejected": -1.6554505825042725, + "logps/chosen": -122.57577514648438, + "logps/rejected": -201.19924926757812, + "loss": 0.0406, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.784712553024292, + "rewards/margins": 8.511367797851562, + "rewards/rejected": -11.296079635620117, + "step": 2263 + }, + { + "epoch": 3.63, + "learning_rate": 3.3125247720967103e-07, + "logits/chosen": -1.6910431385040283, + "logits/rejected": -1.6773285865783691, + "logps/chosen": -155.87718200683594, + "logps/rejected": -242.3700714111328, + "loss": 0.0712, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.475171089172363, + "rewards/margins": 7.212989807128906, + "rewards/rejected": -12.68816089630127, + "step": 2264 + }, + { + "epoch": 3.64, + "learning_rate": 3.3115338882282994e-07, + "logits/chosen": -1.8337926864624023, + "logits/rejected": -1.8796274662017822, + "logps/chosen": -94.67475891113281, + "logps/rejected": -220.69610595703125, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0445568561553955, + "rewards/margins": 11.898097038269043, + "rewards/rejected": -13.94265365600586, + "step": 2265 + }, + { + "epoch": 3.64, + "learning_rate": 3.3105430043598885e-07, + "logits/chosen": -1.6743953227996826, + "logits/rejected": -1.6979258060455322, + "logps/chosen": -104.16490173339844, + "logps/rejected": -144.90725708007812, + "loss": 0.0677, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1384482383728027, + "rewards/margins": 4.224229335784912, + "rewards/rejected": -7.362677574157715, + "step": 2266 + }, + { + "epoch": 3.64, + "learning_rate": 3.3095521204914786e-07, + "logits/chosen": -1.5063830614089966, + "logits/rejected": -1.5557621717453003, + "logps/chosen": -81.17405700683594, + "logps/rejected": -172.343505859375, + "loss": 0.0561, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.417820930480957, + "rewards/margins": 7.09390926361084, + "rewards/rejected": -9.511731147766113, + "step": 2267 + }, + { + "epoch": 3.64, + "learning_rate": 3.3085612366230677e-07, + "logits/chosen": -1.5921920537948608, + "logits/rejected": -1.6130906343460083, + "logps/chosen": -129.16793823242188, + "logps/rejected": -191.0594482421875, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.917766809463501, + "rewards/margins": 6.288747787475586, + "rewards/rejected": -10.206514358520508, + "step": 2268 + }, + { + "epoch": 3.64, + "learning_rate": 3.307570352754657e-07, + "logits/chosen": -1.450277328491211, + "logits/rejected": -1.6031291484832764, + "logps/chosen": -101.74950408935547, + "logps/rejected": -244.2327423095703, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2815794944763184, + "rewards/margins": 8.815677642822266, + "rewards/rejected": -12.097256660461426, + "step": 2269 + }, + { + "epoch": 3.64, + "learning_rate": 3.3065794688862463e-07, + "logits/chosen": -1.6818299293518066, + "logits/rejected": -1.6519874334335327, + "logps/chosen": -95.14582061767578, + "logps/rejected": -181.36199951171875, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6170029640197754, + "rewards/margins": 9.817673683166504, + "rewards/rejected": -11.434676170349121, + "step": 2270 + }, + { + "epoch": 3.65, + "learning_rate": 3.3055885850178354e-07, + "logits/chosen": -1.5903297662734985, + "logits/rejected": -1.63703191280365, + "logps/chosen": -120.62324523925781, + "logps/rejected": -194.5399169921875, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.434066295623779, + "rewards/margins": 6.3973612785339355, + "rewards/rejected": -10.831428527832031, + "step": 2271 + }, + { + "epoch": 3.65, + "learning_rate": 3.3045977011494255e-07, + "logits/chosen": -1.6517356634140015, + "logits/rejected": -1.7616267204284668, + "logps/chosen": -118.96813201904297, + "logps/rejected": -231.98248291015625, + "loss": 0.0254, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4106855392456055, + "rewards/margins": 9.353771209716797, + "rewards/rejected": -12.764455795288086, + "step": 2272 + }, + { + "epoch": 3.65, + "learning_rate": 3.3036068172810146e-07, + "logits/chosen": -1.747129201889038, + "logits/rejected": -1.6903430223464966, + "logps/chosen": -147.5254364013672, + "logps/rejected": -197.29803466796875, + "loss": 0.0734, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9193177223205566, + "rewards/margins": 7.916950225830078, + "rewards/rejected": -10.836268424987793, + "step": 2273 + }, + { + "epoch": 3.65, + "learning_rate": 3.302615933412604e-07, + "logits/chosen": -1.4064290523529053, + "logits/rejected": -1.4918147325515747, + "logps/chosen": -108.15161895751953, + "logps/rejected": -197.32269287109375, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2438278198242188, + "rewards/margins": 8.106475830078125, + "rewards/rejected": -10.350303649902344, + "step": 2274 + }, + { + "epoch": 3.65, + "learning_rate": 3.301625049544193e-07, + "logits/chosen": -1.8094675540924072, + "logits/rejected": -1.7576035261154175, + "logps/chosen": -134.0812530517578, + "logps/rejected": -151.92526245117188, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8651912212371826, + "rewards/margins": 3.761399269104004, + "rewards/rejected": -7.626590728759766, + "step": 2275 + }, + { + "epoch": 3.65, + "learning_rate": 3.3006341656757823e-07, + "logits/chosen": -1.651664137840271, + "logits/rejected": -1.6258695125579834, + "logps/chosen": -138.14804077148438, + "logps/rejected": -235.55526733398438, + "loss": 0.0986, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.085735321044922, + "rewards/margins": 9.14148998260498, + "rewards/rejected": -13.227226257324219, + "step": 2276 + }, + { + "epoch": 3.65, + "learning_rate": 3.299643281807372e-07, + "logits/chosen": -1.853964924812317, + "logits/rejected": -1.6839885711669922, + "logps/chosen": -142.66424560546875, + "logps/rejected": -221.62692260742188, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8068461418151855, + "rewards/margins": 10.608430862426758, + "rewards/rejected": -14.415277481079102, + "step": 2277 + }, + { + "epoch": 3.66, + "learning_rate": 3.2986523979389615e-07, + "logits/chosen": -1.6637663841247559, + "logits/rejected": -1.7623672485351562, + "logps/chosen": -107.79478454589844, + "logps/rejected": -235.3152313232422, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.951864719390869, + "rewards/margins": 11.422028541564941, + "rewards/rejected": -14.373893737792969, + "step": 2278 + }, + { + "epoch": 3.66, + "learning_rate": 3.297661514070551e-07, + "logits/chosen": -1.6593060493469238, + "logits/rejected": -1.6115483045578003, + "logps/chosen": -164.79518127441406, + "logps/rejected": -188.31771850585938, + "loss": 0.0587, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.276126384735107, + "rewards/margins": 5.092874526977539, + "rewards/rejected": -10.369000434875488, + "step": 2279 + }, + { + "epoch": 3.66, + "learning_rate": 3.29667063020214e-07, + "logits/chosen": -1.6324045658111572, + "logits/rejected": -1.5869152545928955, + "logps/chosen": -113.5484848022461, + "logps/rejected": -168.6986083984375, + "loss": 0.0978, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.136707782745361, + "rewards/margins": 5.263484954833984, + "rewards/rejected": -9.400193214416504, + "step": 2280 + }, + { + "epoch": 3.66, + "learning_rate": 3.295679746333729e-07, + "logits/chosen": -1.819186806678772, + "logits/rejected": -1.7145427465438843, + "logps/chosen": -127.93392181396484, + "logps/rejected": -188.18165588378906, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8150625228881836, + "rewards/margins": 8.453293800354004, + "rewards/rejected": -10.268356323242188, + "step": 2281 + }, + { + "epoch": 3.66, + "learning_rate": 3.294688862465319e-07, + "logits/chosen": -1.799998164176941, + "logits/rejected": -1.7497378587722778, + "logps/chosen": -117.31425476074219, + "logps/rejected": -199.05392456054688, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5268406867980957, + "rewards/margins": 8.482148170471191, + "rewards/rejected": -12.008987426757812, + "step": 2282 + }, + { + "epoch": 3.66, + "learning_rate": 3.2936979785969084e-07, + "logits/chosen": -1.7611639499664307, + "logits/rejected": -1.6965222358703613, + "logps/chosen": -135.53115844726562, + "logps/rejected": -242.65768432617188, + "loss": 0.1409, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.491403818130493, + "rewards/margins": 9.956501007080078, + "rewards/rejected": -13.447904586791992, + "step": 2283 + }, + { + "epoch": 3.67, + "learning_rate": 3.292707094728498e-07, + "logits/chosen": -1.9025449752807617, + "logits/rejected": -1.8628513813018799, + "logps/chosen": -82.82514953613281, + "logps/rejected": -191.51644897460938, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40048977732658386, + "rewards/margins": 11.050196647644043, + "rewards/rejected": -11.45068645477295, + "step": 2284 + }, + { + "epoch": 3.67, + "learning_rate": 3.291716210860087e-07, + "logits/chosen": -1.7823518514633179, + "logits/rejected": -1.6091548204421997, + "logps/chosen": -149.95021057128906, + "logps/rejected": -186.4630584716797, + "loss": 0.0696, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.614593505859375, + "rewards/margins": 5.507284164428711, + "rewards/rejected": -11.121877670288086, + "step": 2285 + }, + { + "epoch": 3.67, + "learning_rate": 3.290725326991676e-07, + "logits/chosen": -1.5468422174453735, + "logits/rejected": -1.511552333831787, + "logps/chosen": -126.0462646484375, + "logps/rejected": -197.65701293945312, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5411200523376465, + "rewards/margins": 6.903808116912842, + "rewards/rejected": -11.444928169250488, + "step": 2286 + }, + { + "epoch": 3.67, + "learning_rate": 3.289734443123266e-07, + "logits/chosen": -1.5872944593429565, + "logits/rejected": -1.6591380834579468, + "logps/chosen": -108.10122680664062, + "logps/rejected": -216.50051879882812, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.340151309967041, + "rewards/margins": 10.292902946472168, + "rewards/rejected": -12.633054733276367, + "step": 2287 + }, + { + "epoch": 3.67, + "learning_rate": 3.2887435592548554e-07, + "logits/chosen": -1.6974329948425293, + "logits/rejected": -1.7717838287353516, + "logps/chosen": -111.73988342285156, + "logps/rejected": -168.21075439453125, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5773797035217285, + "rewards/margins": 4.821328639984131, + "rewards/rejected": -7.398708343505859, + "step": 2288 + }, + { + "epoch": 3.67, + "learning_rate": 3.2877526753864444e-07, + "logits/chosen": -1.8239482641220093, + "logits/rejected": -1.8261752128601074, + "logps/chosen": -135.1286163330078, + "logps/rejected": -223.80990600585938, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.992556571960449, + "rewards/margins": 8.393324851989746, + "rewards/rejected": -12.385881423950195, + "step": 2289 + }, + { + "epoch": 3.68, + "learning_rate": 3.286761791518034e-07, + "logits/chosen": -1.7461308240890503, + "logits/rejected": -1.6807665824890137, + "logps/chosen": -120.10870361328125, + "logps/rejected": -182.51837158203125, + "loss": 0.04, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2903270721435547, + "rewards/margins": 6.979238510131836, + "rewards/rejected": -10.269564628601074, + "step": 2290 + }, + { + "epoch": 3.68, + "learning_rate": 3.285770907649623e-07, + "logits/chosen": -1.7494211196899414, + "logits/rejected": -1.6254916191101074, + "logps/chosen": -93.9747543334961, + "logps/rejected": -175.0828857421875, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22830913960933685, + "rewards/margins": 9.647789001464844, + "rewards/rejected": -9.876097679138184, + "step": 2291 + }, + { + "epoch": 3.68, + "learning_rate": 3.2847800237812127e-07, + "logits/chosen": -1.586530327796936, + "logits/rejected": -1.547570824623108, + "logps/chosen": -128.0208740234375, + "logps/rejected": -180.78468322753906, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1164228916168213, + "rewards/margins": 6.2070393562316895, + "rewards/rejected": -9.32346248626709, + "step": 2292 + }, + { + "epoch": 3.68, + "learning_rate": 3.2837891399128023e-07, + "logits/chosen": -1.773360252380371, + "logits/rejected": -1.787628173828125, + "logps/chosen": -134.08721923828125, + "logps/rejected": -180.96107482910156, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.66825008392334, + "rewards/margins": 5.4873127937316895, + "rewards/rejected": -9.155562400817871, + "step": 2293 + }, + { + "epoch": 3.68, + "learning_rate": 3.2827982560443914e-07, + "logits/chosen": -1.6673506498336792, + "logits/rejected": -1.4731078147888184, + "logps/chosen": -118.15168762207031, + "logps/rejected": -180.10006713867188, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9660184383392334, + "rewards/margins": 7.075699806213379, + "rewards/rejected": -9.041719436645508, + "step": 2294 + }, + { + "epoch": 3.68, + "learning_rate": 3.281807372175981e-07, + "logits/chosen": -1.6660858392715454, + "logits/rejected": -1.7462584972381592, + "logps/chosen": -123.79605865478516, + "logps/rejected": -204.95999145507812, + "loss": 0.0347, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.447722911834717, + "rewards/margins": 8.043126106262207, + "rewards/rejected": -10.490849494934082, + "step": 2295 + }, + { + "epoch": 3.69, + "learning_rate": 3.28081648830757e-07, + "logits/chosen": -1.7386506795883179, + "logits/rejected": -1.7775291204452515, + "logps/chosen": -161.58148193359375, + "logps/rejected": -240.81842041015625, + "loss": 0.0442, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1474785804748535, + "rewards/margins": 7.108044624328613, + "rewards/rejected": -13.255523681640625, + "step": 2296 + }, + { + "epoch": 3.69, + "learning_rate": 3.2798256044391596e-07, + "logits/chosen": -1.6366751194000244, + "logits/rejected": -1.6886268854141235, + "logps/chosen": -117.91315460205078, + "logps/rejected": -226.99070739746094, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.180783748626709, + "rewards/margins": 8.522754669189453, + "rewards/rejected": -12.70353889465332, + "step": 2297 + }, + { + "epoch": 3.69, + "learning_rate": 3.2788347205707487e-07, + "logits/chosen": -1.714130163192749, + "logits/rejected": -1.8303064107894897, + "logps/chosen": -110.7461929321289, + "logps/rejected": -198.17526245117188, + "loss": 0.195, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9007807970046997, + "rewards/margins": 7.292250633239746, + "rewards/rejected": -9.193031311035156, + "step": 2298 + }, + { + "epoch": 3.69, + "learning_rate": 3.2778438367023383e-07, + "logits/chosen": -1.532355546951294, + "logits/rejected": -1.548356056213379, + "logps/chosen": -87.28816223144531, + "logps/rejected": -174.44683837890625, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7970457077026367, + "rewards/margins": 7.539129257202148, + "rewards/rejected": -10.336174964904785, + "step": 2299 + }, + { + "epoch": 3.69, + "learning_rate": 3.276852952833928e-07, + "logits/chosen": -1.5283081531524658, + "logits/rejected": -1.6062949895858765, + "logps/chosen": -85.52682495117188, + "logps/rejected": -214.8189239501953, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.180251121520996, + "rewards/margins": 9.345355987548828, + "rewards/rejected": -10.525606155395508, + "step": 2300 + }, + { + "epoch": 3.69, + "learning_rate": 3.275862068965517e-07, + "logits/chosen": -1.594812273979187, + "logits/rejected": -1.5928547382354736, + "logps/chosen": -127.20219421386719, + "logps/rejected": -210.6011962890625, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8816609382629395, + "rewards/margins": 8.342187881469727, + "rewards/rejected": -12.223848342895508, + "step": 2301 + }, + { + "epoch": 3.7, + "learning_rate": 3.2748711850971066e-07, + "logits/chosen": -1.6104925870895386, + "logits/rejected": -1.6324795484542847, + "logps/chosen": -87.98396301269531, + "logps/rejected": -168.87997436523438, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2816630601882935, + "rewards/margins": 7.584209442138672, + "rewards/rejected": -8.865872383117676, + "step": 2302 + }, + { + "epoch": 3.7, + "learning_rate": 3.2738803012286956e-07, + "logits/chosen": -1.8444292545318604, + "logits/rejected": -1.828599214553833, + "logps/chosen": -121.47574615478516, + "logps/rejected": -187.11708068847656, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.043778419494629, + "rewards/margins": 7.6925048828125, + "rewards/rejected": -10.736283302307129, + "step": 2303 + }, + { + "epoch": 3.7, + "learning_rate": 3.272889417360285e-07, + "logits/chosen": -1.6889960765838623, + "logits/rejected": -1.655486822128296, + "logps/chosen": -128.1184844970703, + "logps/rejected": -197.08477783203125, + "loss": 0.029, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1832966804504395, + "rewards/margins": 8.031497955322266, + "rewards/rejected": -12.214795112609863, + "step": 2304 + }, + { + "epoch": 3.7, + "learning_rate": 3.271898533491875e-07, + "logits/chosen": -1.6603795289993286, + "logits/rejected": -1.6515603065490723, + "logps/chosen": -137.33851623535156, + "logps/rejected": -192.21627807617188, + "loss": 0.0518, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.764671802520752, + "rewards/margins": 7.3756608963012695, + "rewards/rejected": -11.14033317565918, + "step": 2305 + }, + { + "epoch": 3.7, + "learning_rate": 3.270907649623464e-07, + "logits/chosen": -1.6981981992721558, + "logits/rejected": -1.7338566780090332, + "logps/chosen": -129.8831787109375, + "logps/rejected": -206.4558868408203, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.410679340362549, + "rewards/margins": 5.309022903442383, + "rewards/rejected": -9.71970272064209, + "step": 2306 + }, + { + "epoch": 3.7, + "learning_rate": 3.2699167657550535e-07, + "logits/chosen": -1.7845847606658936, + "logits/rejected": -1.7620651721954346, + "logps/chosen": -98.66643524169922, + "logps/rejected": -210.71356201171875, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.308903694152832, + "rewards/margins": 11.332841873168945, + "rewards/rejected": -13.641746520996094, + "step": 2307 + }, + { + "epoch": 3.7, + "learning_rate": 3.2689258818866426e-07, + "logits/chosen": -1.6928433179855347, + "logits/rejected": -1.788573145866394, + "logps/chosen": -103.06317138671875, + "logps/rejected": -209.43600463867188, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.329413414001465, + "rewards/margins": 9.216878890991211, + "rewards/rejected": -11.546292304992676, + "step": 2308 + }, + { + "epoch": 3.71, + "learning_rate": 3.267934998018232e-07, + "logits/chosen": -1.7074053287506104, + "logits/rejected": -1.7633161544799805, + "logps/chosen": -112.15960693359375, + "logps/rejected": -237.8001708984375, + "loss": 0.0499, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1866791248321533, + "rewards/margins": 8.852712631225586, + "rewards/rejected": -12.03939151763916, + "step": 2309 + }, + { + "epoch": 3.71, + "learning_rate": 3.266944114149822e-07, + "logits/chosen": -1.6660215854644775, + "logits/rejected": -1.6516250371932983, + "logps/chosen": -135.48291015625, + "logps/rejected": -227.9437255859375, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.86887788772583, + "rewards/margins": 9.827211380004883, + "rewards/rejected": -13.696089744567871, + "step": 2310 + }, + { + "epoch": 3.71, + "learning_rate": 3.265953230281411e-07, + "logits/chosen": -1.5858216285705566, + "logits/rejected": -1.658750295639038, + "logps/chosen": -112.45283508300781, + "logps/rejected": -191.83660888671875, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0501890182495117, + "rewards/margins": 8.776137351989746, + "rewards/rejected": -9.826326370239258, + "step": 2311 + }, + { + "epoch": 3.71, + "learning_rate": 3.2649623464130004e-07, + "logits/chosen": -1.5980432033538818, + "logits/rejected": -1.601485252380371, + "logps/chosen": -140.13108825683594, + "logps/rejected": -220.96124267578125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.156899452209473, + "rewards/margins": 7.748292922973633, + "rewards/rejected": -11.905192375183105, + "step": 2312 + }, + { + "epoch": 3.71, + "learning_rate": 3.2639714625445895e-07, + "logits/chosen": -1.7252349853515625, + "logits/rejected": -1.8121204376220703, + "logps/chosen": -149.87677001953125, + "logps/rejected": -214.54954528808594, + "loss": 0.0134, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.47411584854126, + "rewards/margins": 7.312758445739746, + "rewards/rejected": -12.786874771118164, + "step": 2313 + }, + { + "epoch": 3.71, + "learning_rate": 3.2629805786761786e-07, + "logits/chosen": -1.6578776836395264, + "logits/rejected": -1.5784997940063477, + "logps/chosen": -134.31613159179688, + "logps/rejected": -183.14366149902344, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6023056507110596, + "rewards/margins": 7.38063907623291, + "rewards/rejected": -10.982945442199707, + "step": 2314 + }, + { + "epoch": 3.72, + "learning_rate": 3.2619896948077687e-07, + "logits/chosen": -1.7434546947479248, + "logits/rejected": -1.677778720855713, + "logps/chosen": -145.8841552734375, + "logps/rejected": -226.1613311767578, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.107231616973877, + "rewards/margins": 8.385185241699219, + "rewards/rejected": -13.492416381835938, + "step": 2315 + }, + { + "epoch": 3.72, + "learning_rate": 3.260998810939358e-07, + "logits/chosen": -1.6497191190719604, + "logits/rejected": -1.6381596326828003, + "logps/chosen": -101.49665069580078, + "logps/rejected": -187.69161987304688, + "loss": 0.0731, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0963196754455566, + "rewards/margins": 7.026187896728516, + "rewards/rejected": -9.12250804901123, + "step": 2316 + }, + { + "epoch": 3.72, + "learning_rate": 3.2600079270709474e-07, + "logits/chosen": -1.652151346206665, + "logits/rejected": -1.6550602912902832, + "logps/chosen": -114.57373046875, + "logps/rejected": -200.62557983398438, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6586036682128906, + "rewards/margins": 8.001544952392578, + "rewards/rejected": -11.660148620605469, + "step": 2317 + }, + { + "epoch": 3.72, + "learning_rate": 3.2590170432025364e-07, + "logits/chosen": -1.6246683597564697, + "logits/rejected": -1.5807461738586426, + "logps/chosen": -98.36683654785156, + "logps/rejected": -167.97842407226562, + "loss": 0.0286, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9241061210632324, + "rewards/margins": 6.956235885620117, + "rewards/rejected": -8.880341529846191, + "step": 2318 + }, + { + "epoch": 3.72, + "learning_rate": 3.2580261593341255e-07, + "logits/chosen": -1.5350431203842163, + "logits/rejected": -1.5612502098083496, + "logps/chosen": -113.13674926757812, + "logps/rejected": -195.98410034179688, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.065746307373047, + "rewards/margins": 5.726138114929199, + "rewards/rejected": -9.791884422302246, + "step": 2319 + }, + { + "epoch": 3.72, + "learning_rate": 3.2570352754657156e-07, + "logits/chosen": -1.5567461252212524, + "logits/rejected": -1.6080724000930786, + "logps/chosen": -101.8628158569336, + "logps/rejected": -161.1023406982422, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7515063285827637, + "rewards/margins": 6.31574010848999, + "rewards/rejected": -9.067246437072754, + "step": 2320 + }, + { + "epoch": 3.73, + "learning_rate": 3.2560443915973047e-07, + "logits/chosen": -1.7068179845809937, + "logits/rejected": -1.7468098402023315, + "logps/chosen": -119.6235122680664, + "logps/rejected": -190.67669677734375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3555235862731934, + "rewards/margins": 8.19668960571289, + "rewards/rejected": -11.552212715148926, + "step": 2321 + }, + { + "epoch": 3.73, + "learning_rate": 3.255053507728894e-07, + "logits/chosen": -1.6337318420410156, + "logits/rejected": -1.7015316486358643, + "logps/chosen": -114.07081604003906, + "logps/rejected": -207.4059600830078, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1083133220672607, + "rewards/margins": 8.78349494934082, + "rewards/rejected": -11.891809463500977, + "step": 2322 + }, + { + "epoch": 3.73, + "learning_rate": 3.2540626238604833e-07, + "logits/chosen": -1.8247907161712646, + "logits/rejected": -1.716069221496582, + "logps/chosen": -124.78273010253906, + "logps/rejected": -208.68226623535156, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1644246578216553, + "rewards/margins": 10.699517250061035, + "rewards/rejected": -13.863941192626953, + "step": 2323 + }, + { + "epoch": 3.73, + "learning_rate": 3.2530717399920724e-07, + "logits/chosen": -1.5708287954330444, + "logits/rejected": -1.6379966735839844, + "logps/chosen": -157.41062927246094, + "logps/rejected": -191.54852294921875, + "loss": 0.0937, + "rewards/accuracies": 0.75, + "rewards/chosen": -6.938906192779541, + "rewards/margins": 3.676086664199829, + "rewards/rejected": -10.61499309539795, + "step": 2324 + }, + { + "epoch": 3.73, + "learning_rate": 3.2520808561236625e-07, + "logits/chosen": -1.6069053411483765, + "logits/rejected": -1.6728436946868896, + "logps/chosen": -117.98716735839844, + "logps/rejected": -201.4302520751953, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.329711675643921, + "rewards/margins": 7.865008354187012, + "rewards/rejected": -11.194720268249512, + "step": 2325 + }, + { + "epoch": 3.73, + "learning_rate": 3.2510899722552516e-07, + "logits/chosen": -1.5425291061401367, + "logits/rejected": -1.484020471572876, + "logps/chosen": -124.76675415039062, + "logps/rejected": -212.63079833984375, + "loss": 0.0163, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8208587169647217, + "rewards/margins": 8.365591049194336, + "rewards/rejected": -12.18644905090332, + "step": 2326 + }, + { + "epoch": 3.74, + "learning_rate": 3.2500990883868407e-07, + "logits/chosen": -1.5872973203659058, + "logits/rejected": -1.5947365760803223, + "logps/chosen": -140.48973083496094, + "logps/rejected": -178.7237548828125, + "loss": 0.1116, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.980966567993164, + "rewards/margins": 5.018335342407227, + "rewards/rejected": -9.999300956726074, + "step": 2327 + }, + { + "epoch": 3.74, + "learning_rate": 3.2491082045184303e-07, + "logits/chosen": -1.6542290449142456, + "logits/rejected": -1.6306202411651611, + "logps/chosen": -119.72705078125, + "logps/rejected": -170.83331298828125, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1729466915130615, + "rewards/margins": 6.113062858581543, + "rewards/rejected": -9.2860107421875, + "step": 2328 + }, + { + "epoch": 3.74, + "learning_rate": 3.2481173206500193e-07, + "logits/chosen": -1.6668219566345215, + "logits/rejected": -1.6958229541778564, + "logps/chosen": -94.63520812988281, + "logps/rejected": -160.02536010742188, + "loss": 0.1087, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.857431173324585, + "rewards/margins": 6.320005416870117, + "rewards/rejected": -8.177435874938965, + "step": 2329 + }, + { + "epoch": 3.74, + "learning_rate": 3.2471264367816095e-07, + "logits/chosen": -1.610055923461914, + "logits/rejected": -1.6626219749450684, + "logps/chosen": -134.24740600585938, + "logps/rejected": -247.36093139648438, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.773460865020752, + "rewards/margins": 10.339125633239746, + "rewards/rejected": -15.11258602142334, + "step": 2330 + }, + { + "epoch": 3.74, + "learning_rate": 3.2461355529131985e-07, + "logits/chosen": -1.6237703561782837, + "logits/rejected": -1.675407886505127, + "logps/chosen": -112.81748962402344, + "logps/rejected": -183.80679321289062, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.484699249267578, + "rewards/margins": 4.704216957092285, + "rewards/rejected": -8.188915252685547, + "step": 2331 + }, + { + "epoch": 3.74, + "learning_rate": 3.2451446690447876e-07, + "logits/chosen": -1.8585542440414429, + "logits/rejected": -1.794982671737671, + "logps/chosen": -112.9943618774414, + "logps/rejected": -195.46054077148438, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6111416816711426, + "rewards/margins": 7.873619079589844, + "rewards/rejected": -10.484761238098145, + "step": 2332 + }, + { + "epoch": 3.74, + "learning_rate": 3.244153785176377e-07, + "logits/chosen": -1.6421979665756226, + "logits/rejected": -1.751068353652954, + "logps/chosen": -111.4361572265625, + "logps/rejected": -242.04322814941406, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3762691020965576, + "rewards/margins": 9.660629272460938, + "rewards/rejected": -12.036897659301758, + "step": 2333 + }, + { + "epoch": 3.75, + "learning_rate": 3.2431629013079663e-07, + "logits/chosen": -1.7615444660186768, + "logits/rejected": -1.7758668661117554, + "logps/chosen": -86.07138061523438, + "logps/rejected": -232.09561157226562, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0530905723571777, + "rewards/margins": 12.606305122375488, + "rewards/rejected": -14.659395217895508, + "step": 2334 + }, + { + "epoch": 3.75, + "learning_rate": 3.2421720174395564e-07, + "logits/chosen": -1.6577832698822021, + "logits/rejected": -1.6700713634490967, + "logps/chosen": -104.39529418945312, + "logps/rejected": -190.57630920410156, + "loss": 0.0593, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4315361976623535, + "rewards/margins": 7.798834800720215, + "rewards/rejected": -10.230371475219727, + "step": 2335 + }, + { + "epoch": 3.75, + "learning_rate": 3.2411811335711455e-07, + "logits/chosen": -1.7692406177520752, + "logits/rejected": -1.74700129032135, + "logps/chosen": -124.5422592163086, + "logps/rejected": -182.0872802734375, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.151364326477051, + "rewards/margins": 5.880071640014648, + "rewards/rejected": -10.0314359664917, + "step": 2336 + }, + { + "epoch": 3.75, + "learning_rate": 3.2401902497027345e-07, + "logits/chosen": -1.7526332139968872, + "logits/rejected": -1.7095459699630737, + "logps/chosen": -117.37751770019531, + "logps/rejected": -168.01138305664062, + "loss": 0.0578, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.846921920776367, + "rewards/margins": 5.821063995361328, + "rewards/rejected": -8.667986869812012, + "step": 2337 + }, + { + "epoch": 3.75, + "learning_rate": 3.239199365834324e-07, + "logits/chosen": -1.5605442523956299, + "logits/rejected": -1.5163935422897339, + "logps/chosen": -134.75376892089844, + "logps/rejected": -220.61788940429688, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.277048110961914, + "rewards/margins": 8.188934326171875, + "rewards/rejected": -11.465982437133789, + "step": 2338 + }, + { + "epoch": 3.75, + "learning_rate": 3.238208481965913e-07, + "logits/chosen": -1.803006649017334, + "logits/rejected": -1.8004748821258545, + "logps/chosen": -111.09736633300781, + "logps/rejected": -191.33941650390625, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6571481227874756, + "rewards/margins": 6.831193923950195, + "rewards/rejected": -9.48834228515625, + "step": 2339 + }, + { + "epoch": 3.76, + "learning_rate": 3.237217598097503e-07, + "logits/chosen": -1.6906901597976685, + "logits/rejected": -1.7215460538864136, + "logps/chosen": -122.63764953613281, + "logps/rejected": -232.069580078125, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.855917453765869, + "rewards/margins": 10.589788436889648, + "rewards/rejected": -13.44570541381836, + "step": 2340 + }, + { + "epoch": 3.76, + "learning_rate": 3.2362267142290924e-07, + "logits/chosen": -1.758908987045288, + "logits/rejected": -1.7656493186950684, + "logps/chosen": -120.91925048828125, + "logps/rejected": -209.1841583251953, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3603930473327637, + "rewards/margins": 8.657021522521973, + "rewards/rejected": -11.017414093017578, + "step": 2341 + }, + { + "epoch": 3.76, + "learning_rate": 3.2352358303606815e-07, + "logits/chosen": -1.6986446380615234, + "logits/rejected": -1.7664356231689453, + "logps/chosen": -143.6707000732422, + "logps/rejected": -224.37762451171875, + "loss": 0.1141, + "rewards/accuracies": 0.75, + "rewards/chosen": -5.645977973937988, + "rewards/margins": 5.843169212341309, + "rewards/rejected": -11.489147186279297, + "step": 2342 + }, + { + "epoch": 3.76, + "learning_rate": 3.234244946492271e-07, + "logits/chosen": -1.698897123336792, + "logits/rejected": -1.7202891111373901, + "logps/chosen": -112.34027099609375, + "logps/rejected": -222.67843627929688, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.553938388824463, + "rewards/margins": 10.97614574432373, + "rewards/rejected": -14.530084609985352, + "step": 2343 + }, + { + "epoch": 3.76, + "learning_rate": 3.23325406262386e-07, + "logits/chosen": -1.8231227397918701, + "logits/rejected": -1.698907494544983, + "logps/chosen": -145.13267517089844, + "logps/rejected": -189.7211151123047, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2401304244995117, + "rewards/margins": 7.391724586486816, + "rewards/rejected": -10.631855010986328, + "step": 2344 + }, + { + "epoch": 3.76, + "learning_rate": 3.2322631787554497e-07, + "logits/chosen": -1.6546614170074463, + "logits/rejected": -1.6445395946502686, + "logps/chosen": -156.4005889892578, + "logps/rejected": -213.60418701171875, + "loss": 0.0889, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.281661033630371, + "rewards/margins": 6.893617630004883, + "rewards/rejected": -11.175278663635254, + "step": 2345 + }, + { + "epoch": 3.77, + "learning_rate": 3.2312722948870393e-07, + "logits/chosen": -1.6953094005584717, + "logits/rejected": -1.758908987045288, + "logps/chosen": -119.35540771484375, + "logps/rejected": -227.37368774414062, + "loss": 0.0366, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.35043478012085, + "rewards/margins": 8.784727096557617, + "rewards/rejected": -13.135162353515625, + "step": 2346 + }, + { + "epoch": 3.77, + "learning_rate": 3.2302814110186284e-07, + "logits/chosen": -1.7744874954223633, + "logits/rejected": -1.7590372562408447, + "logps/chosen": -122.46107482910156, + "logps/rejected": -183.22793579101562, + "loss": 0.069, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.192976713180542, + "rewards/margins": 8.055132865905762, + "rewards/rejected": -10.248109817504883, + "step": 2347 + }, + { + "epoch": 3.77, + "learning_rate": 3.229290527150218e-07, + "logits/chosen": -1.6820406913757324, + "logits/rejected": -1.6836700439453125, + "logps/chosen": -101.22765350341797, + "logps/rejected": -187.42156982421875, + "loss": 0.0483, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.75233793258667, + "rewards/margins": 7.89308500289917, + "rewards/rejected": -10.645423889160156, + "step": 2348 + }, + { + "epoch": 3.77, + "learning_rate": 3.228299643281807e-07, + "logits/chosen": -1.5699925422668457, + "logits/rejected": -1.5580295324325562, + "logps/chosen": -111.20899200439453, + "logps/rejected": -191.59292602539062, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.48868989944458, + "rewards/margins": 5.292327404022217, + "rewards/rejected": -8.781017303466797, + "step": 2349 + }, + { + "epoch": 3.77, + "learning_rate": 3.2273087594133967e-07, + "logits/chosen": -1.5493375062942505, + "logits/rejected": -1.673671007156372, + "logps/chosen": -111.25311279296875, + "logps/rejected": -224.96951293945312, + "loss": 0.059, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7368862628936768, + "rewards/margins": 8.661574363708496, + "rewards/rejected": -12.398460388183594, + "step": 2350 + }, + { + "epoch": 3.77, + "learning_rate": 3.226317875544986e-07, + "logits/chosen": -1.6881983280181885, + "logits/rejected": -1.6678149700164795, + "logps/chosen": -142.19288635253906, + "logps/rejected": -218.66885375976562, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.889679431915283, + "rewards/margins": 7.520153999328613, + "rewards/rejected": -12.409832954406738, + "step": 2351 + }, + { + "epoch": 3.78, + "learning_rate": 3.2253269916765753e-07, + "logits/chosen": -1.6784650087356567, + "logits/rejected": -1.6658210754394531, + "logps/chosen": -100.40452575683594, + "logps/rejected": -190.6077117919922, + "loss": 0.0677, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.958136558532715, + "rewards/margins": 8.885456085205078, + "rewards/rejected": -11.843591690063477, + "step": 2352 + }, + { + "epoch": 3.78, + "learning_rate": 3.224336107808165e-07, + "logits/chosen": -1.7221271991729736, + "logits/rejected": -1.7038224935531616, + "logps/chosen": -140.16152954101562, + "logps/rejected": -236.28660583496094, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.551783084869385, + "rewards/margins": 9.807587623596191, + "rewards/rejected": -14.359370231628418, + "step": 2353 + }, + { + "epoch": 3.78, + "learning_rate": 3.223345223939754e-07, + "logits/chosen": -1.620375633239746, + "logits/rejected": -1.6621818542480469, + "logps/chosen": -146.3239288330078, + "logps/rejected": -211.66464233398438, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.47210693359375, + "rewards/margins": 6.051090240478516, + "rewards/rejected": -11.523197174072266, + "step": 2354 + }, + { + "epoch": 3.78, + "learning_rate": 3.2223543400713436e-07, + "logits/chosen": -1.57815420627594, + "logits/rejected": -1.5759004354476929, + "logps/chosen": -112.56422424316406, + "logps/rejected": -195.11215209960938, + "loss": 0.0324, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9592342376708984, + "rewards/margins": 6.746616840362549, + "rewards/rejected": -10.705850601196289, + "step": 2355 + }, + { + "epoch": 3.78, + "learning_rate": 3.221363456202933e-07, + "logits/chosen": -1.7194838523864746, + "logits/rejected": -1.7013369798660278, + "logps/chosen": -128.4029998779297, + "logps/rejected": -209.1298065185547, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.851563930511475, + "rewards/margins": 6.782806396484375, + "rewards/rejected": -11.634370803833008, + "step": 2356 + }, + { + "epoch": 3.78, + "learning_rate": 3.220372572334522e-07, + "logits/chosen": -1.5145419836044312, + "logits/rejected": -1.5702106952667236, + "logps/chosen": -101.69506072998047, + "logps/rejected": -219.94424438476562, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3172786235809326, + "rewards/margins": 8.498668670654297, + "rewards/rejected": -10.815947532653809, + "step": 2357 + }, + { + "epoch": 3.78, + "learning_rate": 3.219381688466112e-07, + "logits/chosen": -1.6025053262710571, + "logits/rejected": -1.546595573425293, + "logps/chosen": -138.9676513671875, + "logps/rejected": -217.17454528808594, + "loss": 0.0383, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.587996482849121, + "rewards/margins": 8.27702808380127, + "rewards/rejected": -12.86502456665039, + "step": 2358 + }, + { + "epoch": 3.79, + "learning_rate": 3.218390804597701e-07, + "logits/chosen": -1.6256418228149414, + "logits/rejected": -1.726883053779602, + "logps/chosen": -103.54171752929688, + "logps/rejected": -241.27059936523438, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.119443893432617, + "rewards/margins": 10.963056564331055, + "rewards/rejected": -14.082500457763672, + "step": 2359 + }, + { + "epoch": 3.79, + "learning_rate": 3.21739992072929e-07, + "logits/chosen": -1.4867441654205322, + "logits/rejected": -1.4229871034622192, + "logps/chosen": -117.802490234375, + "logps/rejected": -173.11228942871094, + "loss": 0.0657, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.639409065246582, + "rewards/margins": 6.203106880187988, + "rewards/rejected": -10.84251594543457, + "step": 2360 + }, + { + "epoch": 3.79, + "learning_rate": 3.2164090368608796e-07, + "logits/chosen": -1.6992586851119995, + "logits/rejected": -1.718916654586792, + "logps/chosen": -136.71060180664062, + "logps/rejected": -229.361083984375, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.53383207321167, + "rewards/margins": 8.447216987609863, + "rewards/rejected": -12.981049537658691, + "step": 2361 + }, + { + "epoch": 3.79, + "learning_rate": 3.215418152992469e-07, + "logits/chosen": -1.5216670036315918, + "logits/rejected": -1.5689162015914917, + "logps/chosen": -120.87297058105469, + "logps/rejected": -182.3489227294922, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8417654037475586, + "rewards/margins": 6.106678009033203, + "rewards/rejected": -8.948444366455078, + "step": 2362 + }, + { + "epoch": 3.79, + "learning_rate": 3.214427269124059e-07, + "logits/chosen": -1.6336767673492432, + "logits/rejected": -1.6985676288604736, + "logps/chosen": -121.23904418945312, + "logps/rejected": -194.4447479248047, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.633866548538208, + "rewards/margins": 7.228724002838135, + "rewards/rejected": -9.862590789794922, + "step": 2363 + }, + { + "epoch": 3.79, + "learning_rate": 3.213436385255648e-07, + "logits/chosen": -1.7223989963531494, + "logits/rejected": -1.7175618410110474, + "logps/chosen": -145.15274047851562, + "logps/rejected": -217.1385498046875, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.208574295043945, + "rewards/margins": 6.679978847503662, + "rewards/rejected": -12.888551712036133, + "step": 2364 + }, + { + "epoch": 3.8, + "learning_rate": 3.212445501387237e-07, + "logits/chosen": -1.4615838527679443, + "logits/rejected": -1.5103819370269775, + "logps/chosen": -134.48748779296875, + "logps/rejected": -164.2128448486328, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3462653160095215, + "rewards/margins": 3.9434120655059814, + "rewards/rejected": -7.289677619934082, + "step": 2365 + }, + { + "epoch": 3.8, + "learning_rate": 3.2114546175188265e-07, + "logits/chosen": -1.710462212562561, + "logits/rejected": -1.6803940534591675, + "logps/chosen": -119.16873168945312, + "logps/rejected": -168.99615478515625, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.70656156539917, + "rewards/margins": 4.804478645324707, + "rewards/rejected": -8.511040687561035, + "step": 2366 + }, + { + "epoch": 3.8, + "learning_rate": 3.210463733650416e-07, + "logits/chosen": -1.8582797050476074, + "logits/rejected": -1.869641661643982, + "logps/chosen": -112.74285888671875, + "logps/rejected": -196.01907348632812, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1690802574157715, + "rewards/margins": 5.020543575286865, + "rewards/rejected": -8.189623832702637, + "step": 2367 + }, + { + "epoch": 3.8, + "learning_rate": 3.2094728497820057e-07, + "logits/chosen": -1.6420475244522095, + "logits/rejected": -1.6821105480194092, + "logps/chosen": -101.19828796386719, + "logps/rejected": -136.92135620117188, + "loss": 0.0792, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.9937186241149902, + "rewards/margins": 2.7031593322753906, + "rewards/rejected": -6.696877479553223, + "step": 2368 + }, + { + "epoch": 3.8, + "learning_rate": 3.208481965913595e-07, + "logits/chosen": -1.6301823854446411, + "logits/rejected": -1.7212028503417969, + "logps/chosen": -111.843505859375, + "logps/rejected": -192.34494018554688, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7747948169708252, + "rewards/margins": 9.053443908691406, + "rewards/rejected": -10.828239440917969, + "step": 2369 + }, + { + "epoch": 3.8, + "learning_rate": 3.207491082045184e-07, + "logits/chosen": -1.6988943815231323, + "logits/rejected": -1.6619665622711182, + "logps/chosen": -116.53617858886719, + "logps/rejected": -160.88766479492188, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.622095584869385, + "rewards/margins": 4.9193315505981445, + "rewards/rejected": -9.541427612304688, + "step": 2370 + }, + { + "epoch": 3.81, + "learning_rate": 3.2065001981767734e-07, + "logits/chosen": -1.588718056678772, + "logits/rejected": -1.6597381830215454, + "logps/chosen": -119.9001235961914, + "logps/rejected": -257.44061279296875, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1289985179901123, + "rewards/margins": 8.635900497436523, + "rewards/rejected": -11.764899253845215, + "step": 2371 + }, + { + "epoch": 3.81, + "learning_rate": 3.205509314308363e-07, + "logits/chosen": -1.683728814125061, + "logits/rejected": -1.7181702852249146, + "logps/chosen": -114.10076141357422, + "logps/rejected": -177.61656188964844, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.449475288391113, + "rewards/margins": 5.39668607711792, + "rewards/rejected": -9.846160888671875, + "step": 2372 + }, + { + "epoch": 3.81, + "learning_rate": 3.2045184304399526e-07, + "logits/chosen": -1.6036990880966187, + "logits/rejected": -1.5912317037582397, + "logps/chosen": -111.8170394897461, + "logps/rejected": -181.03515625, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.137857437133789, + "rewards/margins": 8.951726913452148, + "rewards/rejected": -10.089585304260254, + "step": 2373 + }, + { + "epoch": 3.81, + "learning_rate": 3.2035275465715417e-07, + "logits/chosen": -1.5701097249984741, + "logits/rejected": -1.6142956018447876, + "logps/chosen": -108.41219329833984, + "logps/rejected": -192.8760986328125, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.445307970046997, + "rewards/margins": 8.14690113067627, + "rewards/rejected": -10.592208862304688, + "step": 2374 + }, + { + "epoch": 3.81, + "learning_rate": 3.202536662703131e-07, + "logits/chosen": -1.700121521949768, + "logits/rejected": -1.7189815044403076, + "logps/chosen": -105.03556060791016, + "logps/rejected": -184.64761352539062, + "loss": 0.0627, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.426473379135132, + "rewards/margins": 8.022259712219238, + "rewards/rejected": -11.44873332977295, + "step": 2375 + }, + { + "epoch": 3.81, + "learning_rate": 3.2015457788347204e-07, + "logits/chosen": -1.6585737466812134, + "logits/rejected": -1.6547126770019531, + "logps/chosen": -146.72642517089844, + "logps/rejected": -190.4581298828125, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.476183891296387, + "rewards/margins": 6.1036906242370605, + "rewards/rejected": -10.579874038696289, + "step": 2376 + }, + { + "epoch": 3.82, + "learning_rate": 3.2005548949663094e-07, + "logits/chosen": -1.567196249961853, + "logits/rejected": -1.6953275203704834, + "logps/chosen": -120.19379425048828, + "logps/rejected": -219.40170288085938, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.877488613128662, + "rewards/margins": 8.766547203063965, + "rewards/rejected": -12.644036293029785, + "step": 2377 + }, + { + "epoch": 3.82, + "learning_rate": 3.1995640110978996e-07, + "logits/chosen": -1.5845746994018555, + "logits/rejected": -1.6153608560562134, + "logps/chosen": -104.49258422851562, + "logps/rejected": -210.97772216796875, + "loss": 0.0313, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.224562883377075, + "rewards/margins": 7.893970489501953, + "rewards/rejected": -11.118534088134766, + "step": 2378 + }, + { + "epoch": 3.82, + "learning_rate": 3.1985731272294886e-07, + "logits/chosen": -1.601705551147461, + "logits/rejected": -1.6573858261108398, + "logps/chosen": -129.14215087890625, + "logps/rejected": -196.4967041015625, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.214423656463623, + "rewards/margins": 7.137455940246582, + "rewards/rejected": -11.351880073547363, + "step": 2379 + }, + { + "epoch": 3.82, + "learning_rate": 3.1975822433610777e-07, + "logits/chosen": -1.604797601699829, + "logits/rejected": -1.6751773357391357, + "logps/chosen": -97.13069152832031, + "logps/rejected": -149.95175170898438, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6037068367004395, + "rewards/margins": 3.871598243713379, + "rewards/rejected": -7.475305557250977, + "step": 2380 + }, + { + "epoch": 3.82, + "learning_rate": 3.1965913594926673e-07, + "logits/chosen": -1.6576404571533203, + "logits/rejected": -1.625542163848877, + "logps/chosen": -105.68020629882812, + "logps/rejected": -158.76905822753906, + "loss": 0.0741, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7899646759033203, + "rewards/margins": 5.787967205047607, + "rewards/rejected": -8.577932357788086, + "step": 2381 + }, + { + "epoch": 3.82, + "learning_rate": 3.1956004756242564e-07, + "logits/chosen": -1.6971545219421387, + "logits/rejected": -1.6002506017684937, + "logps/chosen": -133.5108184814453, + "logps/rejected": -222.41064453125, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.55681037902832, + "rewards/margins": 7.985811710357666, + "rewards/rejected": -13.542622566223145, + "step": 2382 + }, + { + "epoch": 3.83, + "learning_rate": 3.1946095917558465e-07, + "logits/chosen": -1.6375170946121216, + "logits/rejected": -1.7156740427017212, + "logps/chosen": -113.85963439941406, + "logps/rejected": -235.859619140625, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.086759328842163, + "rewards/margins": 9.886980056762695, + "rewards/rejected": -11.973739624023438, + "step": 2383 + }, + { + "epoch": 3.83, + "learning_rate": 3.1936187078874356e-07, + "logits/chosen": -1.6829006671905518, + "logits/rejected": -1.5552351474761963, + "logps/chosen": -140.96824645996094, + "logps/rejected": -206.91104125976562, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9780287742614746, + "rewards/margins": 9.050844192504883, + "rewards/rejected": -12.028873443603516, + "step": 2384 + }, + { + "epoch": 3.83, + "learning_rate": 3.1926278240190246e-07, + "logits/chosen": -1.724962830543518, + "logits/rejected": -1.739586353302002, + "logps/chosen": -100.47004699707031, + "logps/rejected": -179.01663208007812, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5453970432281494, + "rewards/margins": 7.040555000305176, + "rewards/rejected": -9.585952758789062, + "step": 2385 + }, + { + "epoch": 3.83, + "learning_rate": 3.191636940150614e-07, + "logits/chosen": -1.701669692993164, + "logits/rejected": -1.74045729637146, + "logps/chosen": -129.35650634765625, + "logps/rejected": -188.23065185546875, + "loss": 0.0567, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.787810325622559, + "rewards/margins": 6.5041728019714355, + "rewards/rejected": -11.291983604431152, + "step": 2386 + }, + { + "epoch": 3.83, + "learning_rate": 3.1906460562822033e-07, + "logits/chosen": -1.563277244567871, + "logits/rejected": -1.5770463943481445, + "logps/chosen": -85.0738296508789, + "logps/rejected": -186.9622802734375, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8521764278411865, + "rewards/margins": 9.412817001342773, + "rewards/rejected": -10.264993667602539, + "step": 2387 + }, + { + "epoch": 3.83, + "learning_rate": 3.1896551724137934e-07, + "logits/chosen": -1.712384819984436, + "logits/rejected": -1.623170256614685, + "logps/chosen": -129.46151733398438, + "logps/rejected": -197.38265991210938, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.055704593658447, + "rewards/margins": 7.686938285827637, + "rewards/rejected": -12.742643356323242, + "step": 2388 + }, + { + "epoch": 3.83, + "learning_rate": 3.1886642885453825e-07, + "logits/chosen": -1.5608283281326294, + "logits/rejected": -1.583633542060852, + "logps/chosen": -132.79287719726562, + "logps/rejected": -207.98831176757812, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2530317306518555, + "rewards/margins": 7.404496669769287, + "rewards/rejected": -10.657527923583984, + "step": 2389 + }, + { + "epoch": 3.84, + "learning_rate": 3.1876734046769716e-07, + "logits/chosen": -1.6924899816513062, + "logits/rejected": -1.6144723892211914, + "logps/chosen": -167.56300354003906, + "logps/rejected": -210.65643310546875, + "loss": 0.092, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.864516258239746, + "rewards/margins": 7.58135986328125, + "rewards/rejected": -12.445877075195312, + "step": 2390 + }, + { + "epoch": 3.84, + "learning_rate": 3.186682520808561e-07, + "logits/chosen": -1.5679552555084229, + "logits/rejected": -1.5654226541519165, + "logps/chosen": -121.96119689941406, + "logps/rejected": -176.71514892578125, + "loss": 0.0501, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.647735118865967, + "rewards/margins": 5.510859966278076, + "rewards/rejected": -10.158595085144043, + "step": 2391 + }, + { + "epoch": 3.84, + "learning_rate": 3.18569163694015e-07, + "logits/chosen": -1.6476850509643555, + "logits/rejected": -1.5574887990951538, + "logps/chosen": -96.47245025634766, + "logps/rejected": -156.91429138183594, + "loss": 0.0999, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9922966957092285, + "rewards/margins": 7.601269721984863, + "rewards/rejected": -9.593565940856934, + "step": 2392 + }, + { + "epoch": 3.84, + "learning_rate": 3.18470075307174e-07, + "logits/chosen": -1.51857328414917, + "logits/rejected": -1.5557715892791748, + "logps/chosen": -91.85493469238281, + "logps/rejected": -196.0801544189453, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.68640398979187, + "rewards/margins": 9.26175308227539, + "rewards/rejected": -11.948156356811523, + "step": 2393 + }, + { + "epoch": 3.84, + "learning_rate": 3.1837098692033294e-07, + "logits/chosen": -1.6385716199874878, + "logits/rejected": -1.7108683586120605, + "logps/chosen": -122.09945678710938, + "logps/rejected": -212.04885864257812, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8243677616119385, + "rewards/margins": 7.172274589538574, + "rewards/rejected": -9.996642112731934, + "step": 2394 + }, + { + "epoch": 3.84, + "learning_rate": 3.1827189853349185e-07, + "logits/chosen": -1.7196998596191406, + "logits/rejected": -1.7281506061553955, + "logps/chosen": -97.26560974121094, + "logps/rejected": -155.4130859375, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6287782192230225, + "rewards/margins": 5.586073875427246, + "rewards/rejected": -9.214852333068848, + "step": 2395 + }, + { + "epoch": 3.85, + "learning_rate": 3.181728101466508e-07, + "logits/chosen": -1.6173921823501587, + "logits/rejected": -1.5268261432647705, + "logps/chosen": -132.0015869140625, + "logps/rejected": -200.84890747070312, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.302964210510254, + "rewards/margins": 8.486019134521484, + "rewards/rejected": -13.788982391357422, + "step": 2396 + }, + { + "epoch": 3.85, + "learning_rate": 3.180737217598097e-07, + "logits/chosen": -1.5480588674545288, + "logits/rejected": -1.5868220329284668, + "logps/chosen": -91.56251525878906, + "logps/rejected": -177.05752563476562, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7758076190948486, + "rewards/margins": 6.836430072784424, + "rewards/rejected": -8.612237930297852, + "step": 2397 + }, + { + "epoch": 3.85, + "learning_rate": 3.179746333729686e-07, + "logits/chosen": -1.6000757217407227, + "logits/rejected": -1.7243598699569702, + "logps/chosen": -112.73287963867188, + "logps/rejected": -164.40501403808594, + "loss": 0.0503, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.013267517089844, + "rewards/margins": 3.5765650272369385, + "rewards/rejected": -7.589832305908203, + "step": 2398 + }, + { + "epoch": 3.85, + "learning_rate": 3.1787554498612764e-07, + "logits/chosen": -1.6200958490371704, + "logits/rejected": -1.592132568359375, + "logps/chosen": -101.67574310302734, + "logps/rejected": -186.0350341796875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1768546104431152, + "rewards/margins": 8.273351669311523, + "rewards/rejected": -11.450206756591797, + "step": 2399 + }, + { + "epoch": 3.85, + "learning_rate": 3.1777645659928654e-07, + "logits/chosen": -1.6390913724899292, + "logits/rejected": -1.641045331954956, + "logps/chosen": -95.40827178955078, + "logps/rejected": -186.0070343017578, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.262139320373535, + "rewards/margins": 8.439342498779297, + "rewards/rejected": -10.701482772827148, + "step": 2400 + }, + { + "epoch": 3.85, + "learning_rate": 3.176773682124455e-07, + "logits/chosen": -1.6030129194259644, + "logits/rejected": -1.672055721282959, + "logps/chosen": -83.38811492919922, + "logps/rejected": -187.17803955078125, + "loss": 0.0754, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2446540594100952, + "rewards/margins": 7.308122634887695, + "rewards/rejected": -8.552777290344238, + "step": 2401 + }, + { + "epoch": 3.86, + "learning_rate": 3.175782798256044e-07, + "logits/chosen": -1.5027625560760498, + "logits/rejected": -1.535874843597412, + "logps/chosen": -106.12118530273438, + "logps/rejected": -182.5994873046875, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.196977138519287, + "rewards/margins": 8.487594604492188, + "rewards/rejected": -11.684571266174316, + "step": 2402 + }, + { + "epoch": 3.86, + "learning_rate": 3.174791914387633e-07, + "logits/chosen": -1.6047086715698242, + "logits/rejected": -1.6685631275177002, + "logps/chosen": -110.91162109375, + "logps/rejected": -212.10720825195312, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8099205493927, + "rewards/margins": 7.41293478012085, + "rewards/rejected": -10.222855567932129, + "step": 2403 + }, + { + "epoch": 3.86, + "learning_rate": 3.1738010305192233e-07, + "logits/chosen": -1.7531527280807495, + "logits/rejected": -1.7570133209228516, + "logps/chosen": -140.868408203125, + "logps/rejected": -181.33047485351562, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.019111156463623, + "rewards/margins": 3.6669156551361084, + "rewards/rejected": -7.6860270500183105, + "step": 2404 + }, + { + "epoch": 3.86, + "learning_rate": 3.1728101466508124e-07, + "logits/chosen": -1.7270870208740234, + "logits/rejected": -1.601033329963684, + "logps/chosen": -180.95741271972656, + "logps/rejected": -210.2803497314453, + "loss": 0.0659, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.073790550231934, + "rewards/margins": 5.729896545410156, + "rewards/rejected": -10.80368709564209, + "step": 2405 + }, + { + "epoch": 3.86, + "learning_rate": 3.171819262782402e-07, + "logits/chosen": -1.7300307750701904, + "logits/rejected": -1.6788334846496582, + "logps/chosen": -128.97142028808594, + "logps/rejected": -178.462158203125, + "loss": 0.0354, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3873605728149414, + "rewards/margins": 4.844751358032227, + "rewards/rejected": -8.232111930847168, + "step": 2406 + }, + { + "epoch": 3.86, + "learning_rate": 3.170828378913991e-07, + "logits/chosen": -1.6013938188552856, + "logits/rejected": -1.6272945404052734, + "logps/chosen": -91.98778533935547, + "logps/rejected": -185.08042907714844, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.304780960083008, + "rewards/margins": 8.771812438964844, + "rewards/rejected": -11.076593399047852, + "step": 2407 + }, + { + "epoch": 3.87, + "learning_rate": 3.16983749504558e-07, + "logits/chosen": -1.7244064807891846, + "logits/rejected": -1.61904776096344, + "logps/chosen": -113.40894317626953, + "logps/rejected": -199.55490112304688, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7658300399780273, + "rewards/margins": 11.38885498046875, + "rewards/rejected": -13.154685020446777, + "step": 2408 + }, + { + "epoch": 3.87, + "learning_rate": 3.16884661117717e-07, + "logits/chosen": -1.6555628776550293, + "logits/rejected": -1.6733663082122803, + "logps/chosen": -101.77973937988281, + "logps/rejected": -181.6728057861328, + "loss": 0.0581, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8746888637542725, + "rewards/margins": 6.1852335929870605, + "rewards/rejected": -9.059922218322754, + "step": 2409 + }, + { + "epoch": 3.87, + "learning_rate": 3.1678557273087593e-07, + "logits/chosen": -1.7198377847671509, + "logits/rejected": -1.7709424495697021, + "logps/chosen": -138.9071807861328, + "logps/rejected": -242.89303588867188, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.182053089141846, + "rewards/margins": 10.256742477416992, + "rewards/rejected": -15.43879508972168, + "step": 2410 + }, + { + "epoch": 3.87, + "learning_rate": 3.166864843440349e-07, + "logits/chosen": -1.7640382051467896, + "logits/rejected": -1.6901030540466309, + "logps/chosen": -107.20628356933594, + "logps/rejected": -206.52774047851562, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.213637113571167, + "rewards/margins": 7.750899314880371, + "rewards/rejected": -10.964536666870117, + "step": 2411 + }, + { + "epoch": 3.87, + "learning_rate": 3.165873959571938e-07, + "logits/chosen": -1.7535653114318848, + "logits/rejected": -1.7580926418304443, + "logps/chosen": -138.08677673339844, + "logps/rejected": -189.32632446289062, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3030920028686523, + "rewards/margins": 5.568613052368164, + "rewards/rejected": -8.871705055236816, + "step": 2412 + }, + { + "epoch": 3.87, + "learning_rate": 3.164883075703527e-07, + "logits/chosen": -1.5582382678985596, + "logits/rejected": -1.52957022190094, + "logps/chosen": -84.17105865478516, + "logps/rejected": -192.63201904296875, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3845877647399902, + "rewards/margins": 7.85618782043457, + "rewards/rejected": -10.240775108337402, + "step": 2413 + }, + { + "epoch": 3.87, + "learning_rate": 3.163892191835117e-07, + "logits/chosen": -1.5962769985198975, + "logits/rejected": -1.613531231880188, + "logps/chosen": -98.41378784179688, + "logps/rejected": -196.9261932373047, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0163331031799316, + "rewards/margins": 9.025720596313477, + "rewards/rejected": -11.04205322265625, + "step": 2414 + }, + { + "epoch": 3.88, + "learning_rate": 3.162901307966706e-07, + "logits/chosen": -1.6582759618759155, + "logits/rejected": -1.5894577503204346, + "logps/chosen": -111.92469787597656, + "logps/rejected": -187.90162658691406, + "loss": 0.0361, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.4613871574401855, + "rewards/margins": 7.6016998291015625, + "rewards/rejected": -11.06308650970459, + "step": 2415 + }, + { + "epoch": 3.88, + "learning_rate": 3.161910424098296e-07, + "logits/chosen": -1.8397626876831055, + "logits/rejected": -1.8217613697052002, + "logps/chosen": -91.76139068603516, + "logps/rejected": -151.5057373046875, + "loss": 0.0482, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.956761360168457, + "rewards/margins": 5.563185214996338, + "rewards/rejected": -7.519946575164795, + "step": 2416 + }, + { + "epoch": 3.88, + "learning_rate": 3.160919540229885e-07, + "logits/chosen": -1.671904444694519, + "logits/rejected": -1.6388635635375977, + "logps/chosen": -125.98857879638672, + "logps/rejected": -209.8167724609375, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.444940090179443, + "rewards/margins": 8.21549129486084, + "rewards/rejected": -12.660431861877441, + "step": 2417 + }, + { + "epoch": 3.88, + "learning_rate": 3.159928656361474e-07, + "logits/chosen": -1.7634985446929932, + "logits/rejected": -1.747597098350525, + "logps/chosen": -155.8622589111328, + "logps/rejected": -228.66717529296875, + "loss": 0.0584, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.420461654663086, + "rewards/margins": 7.944371700286865, + "rewards/rejected": -12.36483383178711, + "step": 2418 + }, + { + "epoch": 3.88, + "learning_rate": 3.1589377724930635e-07, + "logits/chosen": -1.8033969402313232, + "logits/rejected": -1.7551662921905518, + "logps/chosen": -125.44935607910156, + "logps/rejected": -194.17868041992188, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5067336559295654, + "rewards/margins": 8.15127182006836, + "rewards/rejected": -11.658005714416504, + "step": 2419 + }, + { + "epoch": 3.88, + "learning_rate": 3.157946888624653e-07, + "logits/chosen": -1.543353796005249, + "logits/rejected": -1.6781220436096191, + "logps/chosen": -101.99571228027344, + "logps/rejected": -217.25936889648438, + "loss": 0.075, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4381635189056396, + "rewards/margins": 7.805092811584473, + "rewards/rejected": -10.243256568908691, + "step": 2420 + }, + { + "epoch": 3.89, + "learning_rate": 3.156956004756243e-07, + "logits/chosen": -1.7035565376281738, + "logits/rejected": -1.5942926406860352, + "logps/chosen": -109.42706298828125, + "logps/rejected": -146.5576629638672, + "loss": 0.0362, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7436060905456543, + "rewards/margins": 5.860665321350098, + "rewards/rejected": -8.60427188873291, + "step": 2421 + }, + { + "epoch": 3.89, + "learning_rate": 3.155965120887832e-07, + "logits/chosen": -1.6925337314605713, + "logits/rejected": -1.6763392686843872, + "logps/chosen": -132.4579620361328, + "logps/rejected": -146.0277862548828, + "loss": 0.1227, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.8759727478027344, + "rewards/margins": 2.685601234436035, + "rewards/rejected": -6.5615739822387695, + "step": 2422 + }, + { + "epoch": 3.89, + "learning_rate": 3.154974237019421e-07, + "logits/chosen": -1.609245777130127, + "logits/rejected": -1.595859169960022, + "logps/chosen": -116.06646728515625, + "logps/rejected": -199.58837890625, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.944540500640869, + "rewards/margins": 7.1418609619140625, + "rewards/rejected": -10.08640193939209, + "step": 2423 + }, + { + "epoch": 3.89, + "learning_rate": 3.1539833531510105e-07, + "logits/chosen": -1.7667548656463623, + "logits/rejected": -1.790874719619751, + "logps/chosen": -104.67604064941406, + "logps/rejected": -163.30999755859375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7231812477111816, + "rewards/margins": 4.874203681945801, + "rewards/rejected": -7.597385406494141, + "step": 2424 + }, + { + "epoch": 3.89, + "learning_rate": 3.1529924692826e-07, + "logits/chosen": -1.6431548595428467, + "logits/rejected": -1.65325927734375, + "logps/chosen": -123.75814819335938, + "logps/rejected": -229.10060119628906, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.299292802810669, + "rewards/margins": 11.158443450927734, + "rewards/rejected": -13.457735061645508, + "step": 2425 + }, + { + "epoch": 3.89, + "learning_rate": 3.1520015854141897e-07, + "logits/chosen": -1.630470633506775, + "logits/rejected": -1.7551894187927246, + "logps/chosen": -129.8077850341797, + "logps/rejected": -240.2442626953125, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.040369987487793, + "rewards/margins": 8.132010459899902, + "rewards/rejected": -12.172380447387695, + "step": 2426 + }, + { + "epoch": 3.9, + "learning_rate": 3.151010701545779e-07, + "logits/chosen": -1.7194890975952148, + "logits/rejected": -1.8156936168670654, + "logps/chosen": -125.35464477539062, + "logps/rejected": -221.14886474609375, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.378241539001465, + "rewards/margins": 7.97416353225708, + "rewards/rejected": -12.352405548095703, + "step": 2427 + }, + { + "epoch": 3.9, + "learning_rate": 3.150019817677368e-07, + "logits/chosen": -1.606913685798645, + "logits/rejected": -1.6412755250930786, + "logps/chosen": -88.26358032226562, + "logps/rejected": -188.1614227294922, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7785019874572754, + "rewards/margins": 9.391172409057617, + "rewards/rejected": -12.16967487335205, + "step": 2428 + }, + { + "epoch": 3.9, + "learning_rate": 3.1490289338089574e-07, + "logits/chosen": -1.6635327339172363, + "logits/rejected": -1.6889121532440186, + "logps/chosen": -119.38616943359375, + "logps/rejected": -192.55941772460938, + "loss": 0.0822, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.971400737762451, + "rewards/margins": 7.428398609161377, + "rewards/rejected": -11.399799346923828, + "step": 2429 + }, + { + "epoch": 3.9, + "learning_rate": 3.148038049940547e-07, + "logits/chosen": -1.522049903869629, + "logits/rejected": -1.67695951461792, + "logps/chosen": -149.18856811523438, + "logps/rejected": -261.0108642578125, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.789420127868652, + "rewards/margins": 9.74087905883789, + "rewards/rejected": -15.530299186706543, + "step": 2430 + }, + { + "epoch": 3.9, + "learning_rate": 3.147047166072136e-07, + "logits/chosen": -1.5759978294372559, + "logits/rejected": -1.5609081983566284, + "logps/chosen": -92.87998962402344, + "logps/rejected": -138.20327758789062, + "loss": 0.0457, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9554953575134277, + "rewards/margins": 4.999082088470459, + "rewards/rejected": -6.954577445983887, + "step": 2431 + }, + { + "epoch": 3.9, + "learning_rate": 3.1460562822037257e-07, + "logits/chosen": -1.6905136108398438, + "logits/rejected": -1.6488957405090332, + "logps/chosen": -99.2413558959961, + "logps/rejected": -142.98231506347656, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.011648416519165, + "rewards/margins": 4.788084983825684, + "rewards/rejected": -6.7997331619262695, + "step": 2432 + }, + { + "epoch": 3.91, + "learning_rate": 3.145065398335315e-07, + "logits/chosen": -1.65487802028656, + "logits/rejected": -1.7152732610702515, + "logps/chosen": -150.5223388671875, + "logps/rejected": -248.24354553222656, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.168529033660889, + "rewards/margins": 7.358080863952637, + "rewards/rejected": -11.526610374450684, + "step": 2433 + }, + { + "epoch": 3.91, + "learning_rate": 3.1440745144669043e-07, + "logits/chosen": -1.6715703010559082, + "logits/rejected": -1.6871179342269897, + "logps/chosen": -102.06703186035156, + "logps/rejected": -209.58905029296875, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3578950762748718, + "rewards/margins": 12.237202644348145, + "rewards/rejected": -12.595097541809082, + "step": 2434 + }, + { + "epoch": 3.91, + "learning_rate": 3.143083630598494e-07, + "logits/chosen": -1.440708041191101, + "logits/rejected": -1.5244152545928955, + "logps/chosen": -119.58341979980469, + "logps/rejected": -207.9298095703125, + "loss": 0.0624, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5864901542663574, + "rewards/margins": 6.190683364868164, + "rewards/rejected": -9.777173042297363, + "step": 2435 + }, + { + "epoch": 3.91, + "learning_rate": 3.142092746730083e-07, + "logits/chosen": -1.6872646808624268, + "logits/rejected": -1.6220782995224, + "logps/chosen": -110.09986877441406, + "logps/rejected": -186.00332641601562, + "loss": 0.0428, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.355742931365967, + "rewards/margins": 9.229270935058594, + "rewards/rejected": -12.585014343261719, + "step": 2436 + }, + { + "epoch": 3.91, + "learning_rate": 3.1411018628616726e-07, + "logits/chosen": -1.753859043121338, + "logits/rejected": -1.7516181468963623, + "logps/chosen": -133.18873596191406, + "logps/rejected": -204.59027099609375, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.848236560821533, + "rewards/margins": 6.3958258628845215, + "rewards/rejected": -11.244062423706055, + "step": 2437 + }, + { + "epoch": 3.91, + "learning_rate": 3.1401109789932617e-07, + "logits/chosen": -1.6625372171401978, + "logits/rejected": -1.650441288948059, + "logps/chosen": -110.43911743164062, + "logps/rejected": -194.53773498535156, + "loss": 0.0161, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.168846845626831, + "rewards/margins": 8.868406295776367, + "rewards/rejected": -11.037252426147461, + "step": 2438 + }, + { + "epoch": 3.91, + "learning_rate": 3.1391200951248513e-07, + "logits/chosen": -1.4720935821533203, + "logits/rejected": -1.5541760921478271, + "logps/chosen": -89.5753173828125, + "logps/rejected": -195.80728149414062, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.00138258934021, + "rewards/margins": 9.80927562713623, + "rewards/rejected": -11.81065845489502, + "step": 2439 + }, + { + "epoch": 3.92, + "learning_rate": 3.1381292112564403e-07, + "logits/chosen": -1.6719281673431396, + "logits/rejected": -1.6534889936447144, + "logps/chosen": -116.26509857177734, + "logps/rejected": -188.3282470703125, + "loss": 0.1016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4362869262695312, + "rewards/margins": 7.416840553283691, + "rewards/rejected": -10.853127479553223, + "step": 2440 + }, + { + "epoch": 3.92, + "learning_rate": 3.13713832738803e-07, + "logits/chosen": -1.7264108657836914, + "logits/rejected": -1.6836025714874268, + "logps/chosen": -129.27008056640625, + "logps/rejected": -184.9966583251953, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.797067165374756, + "rewards/margins": 5.004114627838135, + "rewards/rejected": -8.80118179321289, + "step": 2441 + }, + { + "epoch": 3.92, + "learning_rate": 3.1361474435196195e-07, + "logits/chosen": -1.7945678234100342, + "logits/rejected": -1.785756230354309, + "logps/chosen": -109.13490295410156, + "logps/rejected": -221.2981414794922, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3065757751464844, + "rewards/margins": 10.880393981933594, + "rewards/rejected": -13.186969757080078, + "step": 2442 + }, + { + "epoch": 3.92, + "learning_rate": 3.1351565596512086e-07, + "logits/chosen": -1.708480715751648, + "logits/rejected": -1.664196252822876, + "logps/chosen": -149.47067260742188, + "logps/rejected": -216.32342529296875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7134270668029785, + "rewards/margins": 6.618880271911621, + "rewards/rejected": -11.332306861877441, + "step": 2443 + }, + { + "epoch": 3.92, + "learning_rate": 3.134165675782798e-07, + "logits/chosen": -1.6031821966171265, + "logits/rejected": -1.552596926689148, + "logps/chosen": -135.0286865234375, + "logps/rejected": -192.23204040527344, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3780651092529297, + "rewards/margins": 7.481261253356934, + "rewards/rejected": -10.859326362609863, + "step": 2444 + }, + { + "epoch": 3.92, + "learning_rate": 3.1331747919143873e-07, + "logits/chosen": -1.6226836442947388, + "logits/rejected": -1.6078803539276123, + "logps/chosen": -132.01902770996094, + "logps/rejected": -198.5802001953125, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.22147798538208, + "rewards/margins": 6.882156848907471, + "rewards/rejected": -11.10363483428955, + "step": 2445 + }, + { + "epoch": 3.93, + "learning_rate": 3.132183908045977e-07, + "logits/chosen": -1.6077642440795898, + "logits/rejected": -1.8075286149978638, + "logps/chosen": -98.47418212890625, + "logps/rejected": -247.69964599609375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.258549451828003, + "rewards/margins": 10.925354957580566, + "rewards/rejected": -14.183904647827148, + "step": 2446 + }, + { + "epoch": 3.93, + "learning_rate": 3.1311930241775665e-07, + "logits/chosen": -1.5728838443756104, + "logits/rejected": -1.4860873222351074, + "logps/chosen": -124.60508728027344, + "logps/rejected": -193.56524658203125, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9942266941070557, + "rewards/margins": 8.412568092346191, + "rewards/rejected": -11.406794548034668, + "step": 2447 + }, + { + "epoch": 3.93, + "learning_rate": 3.1302021403091555e-07, + "logits/chosen": -1.6562812328338623, + "logits/rejected": -1.6881718635559082, + "logps/chosen": -113.64115142822266, + "logps/rejected": -195.18594360351562, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3704047203063965, + "rewards/margins": 7.333744049072266, + "rewards/rejected": -10.70414924621582, + "step": 2448 + }, + { + "epoch": 3.93, + "learning_rate": 3.129211256440745e-07, + "logits/chosen": -1.5184731483459473, + "logits/rejected": -1.5802011489868164, + "logps/chosen": -76.17317962646484, + "logps/rejected": -223.9153594970703, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3385045528411865, + "rewards/margins": 11.42319107055664, + "rewards/rejected": -12.761695861816406, + "step": 2449 + }, + { + "epoch": 3.93, + "learning_rate": 3.128220372572334e-07, + "logits/chosen": -1.6177641153335571, + "logits/rejected": -1.5687041282653809, + "logps/chosen": -89.2684326171875, + "logps/rejected": -168.19163513183594, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2813916206359863, + "rewards/margins": 7.792001724243164, + "rewards/rejected": -10.073392868041992, + "step": 2450 + }, + { + "epoch": 3.93, + "learning_rate": 3.127229488703924e-07, + "logits/chosen": -1.6622493267059326, + "logits/rejected": -1.7670118808746338, + "logps/chosen": -111.56812286376953, + "logps/rejected": -242.7255859375, + "loss": 0.0914, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.558161735534668, + "rewards/margins": 9.724294662475586, + "rewards/rejected": -13.282455444335938, + "step": 2451 + }, + { + "epoch": 3.94, + "learning_rate": 3.1262386048355134e-07, + "logits/chosen": -1.6927717924118042, + "logits/rejected": -1.6943306922912598, + "logps/chosen": -116.58832550048828, + "logps/rejected": -190.60525512695312, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0453872680664062, + "rewards/margins": 7.068394660949707, + "rewards/rejected": -9.113781929016113, + "step": 2452 + }, + { + "epoch": 3.94, + "learning_rate": 3.1252477209671025e-07, + "logits/chosen": -1.5557587146759033, + "logits/rejected": -1.5377393960952759, + "logps/chosen": -92.35574340820312, + "logps/rejected": -169.23231506347656, + "loss": 0.0462, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.37526273727417, + "rewards/margins": 8.486651420593262, + "rewards/rejected": -10.861913681030273, + "step": 2453 + }, + { + "epoch": 3.94, + "learning_rate": 3.124256837098692e-07, + "logits/chosen": -1.7196012735366821, + "logits/rejected": -1.7672115564346313, + "logps/chosen": -88.59476470947266, + "logps/rejected": -226.37001037597656, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.635707378387451, + "rewards/margins": 12.937529563903809, + "rewards/rejected": -15.573236465454102, + "step": 2454 + }, + { + "epoch": 3.94, + "learning_rate": 3.123265953230281e-07, + "logits/chosen": -1.7803711891174316, + "logits/rejected": -1.7831135988235474, + "logps/chosen": -105.15370178222656, + "logps/rejected": -233.0884246826172, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3229150772094727, + "rewards/margins": 12.12285041809082, + "rewards/rejected": -14.44576644897461, + "step": 2455 + }, + { + "epoch": 3.94, + "learning_rate": 3.1222750693618707e-07, + "logits/chosen": -1.6060926914215088, + "logits/rejected": -1.6118249893188477, + "logps/chosen": -116.92308044433594, + "logps/rejected": -154.93817138671875, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.118552207946777, + "rewards/margins": 4.979379177093506, + "rewards/rejected": -9.097931861877441, + "step": 2456 + }, + { + "epoch": 3.94, + "learning_rate": 3.1212841854934603e-07, + "logits/chosen": -1.7049835920333862, + "logits/rejected": -1.7385197877883911, + "logps/chosen": -114.18585205078125, + "logps/rejected": -205.79281616210938, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.688788414001465, + "rewards/margins": 8.060035705566406, + "rewards/rejected": -11.748824119567871, + "step": 2457 + }, + { + "epoch": 3.95, + "learning_rate": 3.1202933016250494e-07, + "logits/chosen": -1.6754369735717773, + "logits/rejected": -1.7190947532653809, + "logps/chosen": -95.128173828125, + "logps/rejected": -186.01669311523438, + "loss": 0.0819, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3451223373413086, + "rewards/margins": 7.96246337890625, + "rewards/rejected": -10.307586669921875, + "step": 2458 + }, + { + "epoch": 3.95, + "learning_rate": 3.119302417756639e-07, + "logits/chosen": -1.6589767932891846, + "logits/rejected": -1.679428219795227, + "logps/chosen": -125.38641357421875, + "logps/rejected": -165.5476837158203, + "loss": 0.0373, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9961771965026855, + "rewards/margins": 5.228609085083008, + "rewards/rejected": -9.224785804748535, + "step": 2459 + }, + { + "epoch": 3.95, + "learning_rate": 3.118311533888228e-07, + "logits/chosen": -1.6189472675323486, + "logits/rejected": -1.5783114433288574, + "logps/chosen": -120.13316345214844, + "logps/rejected": -213.81002807617188, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.497579574584961, + "rewards/margins": 8.854448318481445, + "rewards/rejected": -13.352027893066406, + "step": 2460 + }, + { + "epoch": 3.95, + "learning_rate": 3.117320650019817e-07, + "logits/chosen": -1.8136708736419678, + "logits/rejected": -1.8605544567108154, + "logps/chosen": -134.48548889160156, + "logps/rejected": -229.24208068847656, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3923707008361816, + "rewards/margins": 9.32573127746582, + "rewards/rejected": -12.71810245513916, + "step": 2461 + }, + { + "epoch": 3.95, + "learning_rate": 3.116329766151407e-07, + "logits/chosen": -1.6419748067855835, + "logits/rejected": -1.7006802558898926, + "logps/chosen": -101.52938079833984, + "logps/rejected": -216.19224548339844, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6780807971954346, + "rewards/margins": 10.293095588684082, + "rewards/rejected": -12.971175193786621, + "step": 2462 + }, + { + "epoch": 3.95, + "learning_rate": 3.1153388822829963e-07, + "logits/chosen": -1.6243489980697632, + "logits/rejected": -1.7193981409072876, + "logps/chosen": -78.58547973632812, + "logps/rejected": -191.18389892578125, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1677660942077637, + "rewards/margins": 10.378124237060547, + "rewards/rejected": -11.545890808105469, + "step": 2463 + }, + { + "epoch": 3.96, + "learning_rate": 3.1143479984145854e-07, + "logits/chosen": -1.73092520236969, + "logits/rejected": -1.646431803703308, + "logps/chosen": -127.85552215576172, + "logps/rejected": -212.22242736816406, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8785150051116943, + "rewards/margins": 10.299612045288086, + "rewards/rejected": -13.17812728881836, + "step": 2464 + }, + { + "epoch": 3.96, + "learning_rate": 3.113357114546175e-07, + "logits/chosen": -1.534735918045044, + "logits/rejected": -1.514582872390747, + "logps/chosen": -110.49726867675781, + "logps/rejected": -209.98178100585938, + "loss": 0.0713, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5886037349700928, + "rewards/margins": 9.025491714477539, + "rewards/rejected": -11.614095687866211, + "step": 2465 + }, + { + "epoch": 3.96, + "learning_rate": 3.112366230677764e-07, + "logits/chosen": -1.5980184078216553, + "logits/rejected": -1.7133891582489014, + "logps/chosen": -112.80120849609375, + "logps/rejected": -195.59133911132812, + "loss": 0.0411, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.245931386947632, + "rewards/margins": 6.2734270095825195, + "rewards/rejected": -9.51935863494873, + "step": 2466 + }, + { + "epoch": 3.96, + "learning_rate": 3.111375346809354e-07, + "logits/chosen": -1.7193608283996582, + "logits/rejected": -1.6247200965881348, + "logps/chosen": -125.89959716796875, + "logps/rejected": -194.40093994140625, + "loss": 0.0373, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3265254497528076, + "rewards/margins": 8.288331985473633, + "rewards/rejected": -10.614856719970703, + "step": 2467 + }, + { + "epoch": 3.96, + "learning_rate": 3.110384462940943e-07, + "logits/chosen": -1.5964524745941162, + "logits/rejected": -1.5829105377197266, + "logps/chosen": -160.55581665039062, + "logps/rejected": -263.0168762207031, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3400487899780273, + "rewards/margins": 10.445079803466797, + "rewards/rejected": -13.785127639770508, + "step": 2468 + }, + { + "epoch": 3.96, + "learning_rate": 3.1093935790725323e-07, + "logits/chosen": -1.7840162515640259, + "logits/rejected": -1.729468822479248, + "logps/chosen": -157.90675354003906, + "logps/rejected": -183.84255981445312, + "loss": 0.0327, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7088396549224854, + "rewards/margins": 5.1570916175842285, + "rewards/rejected": -8.865931510925293, + "step": 2469 + }, + { + "epoch": 3.96, + "learning_rate": 3.108402695204122e-07, + "logits/chosen": -1.7639405727386475, + "logits/rejected": -1.6769871711730957, + "logps/chosen": -120.47565460205078, + "logps/rejected": -197.19239807128906, + "loss": 0.0072, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1731252670288086, + "rewards/margins": 10.451326370239258, + "rewards/rejected": -13.624451637268066, + "step": 2470 + }, + { + "epoch": 3.97, + "learning_rate": 3.107411811335711e-07, + "logits/chosen": -1.7406063079833984, + "logits/rejected": -1.876851201057434, + "logps/chosen": -101.6218490600586, + "logps/rejected": -201.0334930419922, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.444594383239746, + "rewards/margins": 8.630941390991211, + "rewards/rejected": -11.07553482055664, + "step": 2471 + }, + { + "epoch": 3.97, + "learning_rate": 3.106420927467301e-07, + "logits/chosen": -1.5825328826904297, + "logits/rejected": -1.559051275253296, + "logps/chosen": -121.26351165771484, + "logps/rejected": -162.42938232421875, + "loss": 0.1091, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.874772548675537, + "rewards/margins": 4.640717506408691, + "rewards/rejected": -7.5154900550842285, + "step": 2472 + }, + { + "epoch": 3.97, + "learning_rate": 3.10543004359889e-07, + "logits/chosen": -1.6133747100830078, + "logits/rejected": -1.5922369956970215, + "logps/chosen": -106.62471008300781, + "logps/rejected": -200.8203887939453, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5301971435546875, + "rewards/margins": 7.611550807952881, + "rewards/rejected": -11.141748428344727, + "step": 2473 + }, + { + "epoch": 3.97, + "learning_rate": 3.104439159730479e-07, + "logits/chosen": -1.8177268505096436, + "logits/rejected": -1.7803781032562256, + "logps/chosen": -115.55880737304688, + "logps/rejected": -197.11056518554688, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6015684604644775, + "rewards/margins": 9.475107192993164, + "rewards/rejected": -13.076675415039062, + "step": 2474 + }, + { + "epoch": 3.97, + "learning_rate": 3.103448275862069e-07, + "logits/chosen": -1.5184355974197388, + "logits/rejected": -1.6421277523040771, + "logps/chosen": -95.86965942382812, + "logps/rejected": -188.05699157714844, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6089060306549072, + "rewards/margins": 6.7908430099487305, + "rewards/rejected": -10.399748802185059, + "step": 2475 + }, + { + "epoch": 3.97, + "learning_rate": 3.102457391993658e-07, + "logits/chosen": -1.582395076751709, + "logits/rejected": -1.5192985534667969, + "logps/chosen": -117.06130981445312, + "logps/rejected": -197.36050415039062, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6889541149139404, + "rewards/margins": 9.835277557373047, + "rewards/rejected": -13.52423095703125, + "step": 2476 + }, + { + "epoch": 3.98, + "learning_rate": 3.101466508125248e-07, + "logits/chosen": -1.6509050130844116, + "logits/rejected": -1.5921218395233154, + "logps/chosen": -179.4667510986328, + "logps/rejected": -206.12167358398438, + "loss": 0.0655, + "rewards/accuracies": 0.75, + "rewards/chosen": -6.0866594314575195, + "rewards/margins": 5.619956970214844, + "rewards/rejected": -11.706616401672363, + "step": 2477 + }, + { + "epoch": 3.98, + "learning_rate": 3.100475624256837e-07, + "logits/chosen": -1.6941401958465576, + "logits/rejected": -1.6584988832473755, + "logps/chosen": -139.13841247558594, + "logps/rejected": -207.24456787109375, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4464640617370605, + "rewards/margins": 7.151640892028809, + "rewards/rejected": -12.598104476928711, + "step": 2478 + }, + { + "epoch": 3.98, + "learning_rate": 3.099484740388426e-07, + "logits/chosen": -1.7294784784317017, + "logits/rejected": -1.7791050672531128, + "logps/chosen": -118.61955261230469, + "logps/rejected": -187.35745239257812, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.437647581100464, + "rewards/margins": 6.815385341644287, + "rewards/rejected": -9.253032684326172, + "step": 2479 + }, + { + "epoch": 3.98, + "learning_rate": 3.098493856520016e-07, + "logits/chosen": -1.7609450817108154, + "logits/rejected": -1.7257410287857056, + "logps/chosen": -129.36959838867188, + "logps/rejected": -175.30084228515625, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.990910768508911, + "rewards/margins": 6.790219783782959, + "rewards/rejected": -9.78113079071045, + "step": 2480 + }, + { + "epoch": 3.98, + "learning_rate": 3.097502972651605e-07, + "logits/chosen": -1.6195313930511475, + "logits/rejected": -1.5821988582611084, + "logps/chosen": -82.0247802734375, + "logps/rejected": -174.9648895263672, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8905493021011353, + "rewards/margins": 9.528448104858398, + "rewards/rejected": -11.418996810913086, + "step": 2481 + }, + { + "epoch": 3.98, + "learning_rate": 3.0965120887831944e-07, + "logits/chosen": -1.475446105003357, + "logits/rejected": -1.5017861127853394, + "logps/chosen": -114.30125427246094, + "logps/rejected": -196.71163940429688, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1271605491638184, + "rewards/margins": 7.872254371643066, + "rewards/rejected": -10.999414443969727, + "step": 2482 + }, + { + "epoch": 3.99, + "learning_rate": 3.095521204914784e-07, + "logits/chosen": -1.646718978881836, + "logits/rejected": -1.640291690826416, + "logps/chosen": -152.05514526367188, + "logps/rejected": -179.84446716308594, + "loss": 0.1014, + "rewards/accuracies": 0.75, + "rewards/chosen": -5.914675712585449, + "rewards/margins": 3.899415969848633, + "rewards/rejected": -9.814091682434082, + "step": 2483 + }, + { + "epoch": 3.99, + "learning_rate": 3.094530321046373e-07, + "logits/chosen": -1.6995790004730225, + "logits/rejected": -1.7138745784759521, + "logps/chosen": -102.91109466552734, + "logps/rejected": -197.96458435058594, + "loss": 0.1074, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.323108434677124, + "rewards/margins": 8.290371894836426, + "rewards/rejected": -11.613479614257812, + "step": 2484 + }, + { + "epoch": 3.99, + "learning_rate": 3.0935394371779627e-07, + "logits/chosen": -1.5202114582061768, + "logits/rejected": -1.5061296224594116, + "logps/chosen": -114.546142578125, + "logps/rejected": -204.1097412109375, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.161726236343384, + "rewards/margins": 9.21014404296875, + "rewards/rejected": -12.371870040893555, + "step": 2485 + }, + { + "epoch": 3.99, + "learning_rate": 3.092548553309552e-07, + "logits/chosen": -1.584567666053772, + "logits/rejected": -1.721338152885437, + "logps/chosen": -106.00804138183594, + "logps/rejected": -220.09518432617188, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6973540782928467, + "rewards/margins": 9.451199531555176, + "rewards/rejected": -12.148552894592285, + "step": 2486 + }, + { + "epoch": 3.99, + "learning_rate": 3.0915576694411414e-07, + "logits/chosen": -1.6046621799468994, + "logits/rejected": -1.5530221462249756, + "logps/chosen": -84.0755844116211, + "logps/rejected": -132.17782592773438, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.125699043273926, + "rewards/margins": 5.624811172485352, + "rewards/rejected": -7.7505106925964355, + "step": 2487 + }, + { + "epoch": 3.99, + "learning_rate": 3.090566785572731e-07, + "logits/chosen": -1.7677592039108276, + "logits/rejected": -1.6597754955291748, + "logps/chosen": -123.61154174804688, + "logps/rejected": -169.79759216308594, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.175506591796875, + "rewards/margins": 6.205968379974365, + "rewards/rejected": -9.381475448608398, + "step": 2488 + }, + { + "epoch": 4.0, + "learning_rate": 3.08957590170432e-07, + "logits/chosen": -1.6724743843078613, + "logits/rejected": -1.678788661956787, + "logps/chosen": -100.08451080322266, + "logps/rejected": -156.104736328125, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8735365867614746, + "rewards/margins": 5.581778526306152, + "rewards/rejected": -8.455313682556152, + "step": 2489 + }, + { + "epoch": 4.0, + "learning_rate": 3.0885850178359096e-07, + "logits/chosen": -1.762566328048706, + "logits/rejected": -1.6823458671569824, + "logps/chosen": -133.17758178710938, + "logps/rejected": -243.6854248046875, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.439713478088379, + "rewards/margins": 10.402105331420898, + "rewards/rejected": -14.841818809509277, + "step": 2490 + }, + { + "epoch": 4.0, + "learning_rate": 3.0875941339674987e-07, + "logits/chosen": -1.453844666481018, + "logits/rejected": -1.6031880378723145, + "logps/chosen": -95.93086242675781, + "logps/rejected": -197.94921875, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2596800327301025, + "rewards/margins": 8.086135864257812, + "rewards/rejected": -11.345815658569336, + "step": 2491 + }, + { + "epoch": 4.0, + "learning_rate": 3.0866032500990883e-07, + "logits/chosen": -1.6306859254837036, + "logits/rejected": -1.6718180179595947, + "logps/chosen": -112.61123657226562, + "logps/rejected": -188.1778564453125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.692707061767578, + "rewards/margins": 7.12523078918457, + "rewards/rejected": -9.817937850952148, + "step": 2492 + }, + { + "epoch": 4.0, + "learning_rate": 3.085612366230678e-07, + "logits/chosen": -1.72189199924469, + "logits/rejected": -1.7278311252593994, + "logps/chosen": -111.62344360351562, + "logps/rejected": -198.2715606689453, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5825027227401733, + "rewards/margins": 8.565530776977539, + "rewards/rejected": -10.14803409576416, + "step": 2493 + }, + { + "epoch": 4.0, + "learning_rate": 3.084621482362267e-07, + "logits/chosen": -1.66962730884552, + "logits/rejected": -1.698232650756836, + "logps/chosen": -107.85733795166016, + "logps/rejected": -216.23165893554688, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.213002920150757, + "rewards/margins": 9.97922134399414, + "rewards/rejected": -12.19222354888916, + "step": 2494 + }, + { + "epoch": 4.0, + "learning_rate": 3.0836305984938566e-07, + "logits/chosen": -1.5406492948532104, + "logits/rejected": -1.5379347801208496, + "logps/chosen": -127.5925521850586, + "logps/rejected": -200.87403869628906, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7536542415618896, + "rewards/margins": 8.005014419555664, + "rewards/rejected": -11.758668899536133, + "step": 2495 + }, + { + "epoch": 4.01, + "learning_rate": 3.0826397146254456e-07, + "logits/chosen": -1.7422574758529663, + "logits/rejected": -1.69615638256073, + "logps/chosen": -136.8681640625, + "logps/rejected": -218.6101837158203, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.528923511505127, + "rewards/margins": 10.528447151184082, + "rewards/rejected": -15.057371139526367, + "step": 2496 + }, + { + "epoch": 4.01, + "learning_rate": 3.081648830757035e-07, + "logits/chosen": -1.842346429824829, + "logits/rejected": -1.825121521949768, + "logps/chosen": -145.908203125, + "logps/rejected": -221.43333435058594, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2306928634643555, + "rewards/margins": 8.66795825958252, + "rewards/rejected": -12.898651123046875, + "step": 2497 + }, + { + "epoch": 4.01, + "learning_rate": 3.080657946888625e-07, + "logits/chosen": -1.5246798992156982, + "logits/rejected": -1.5532732009887695, + "logps/chosen": -106.9865951538086, + "logps/rejected": -211.9129638671875, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9941115379333496, + "rewards/margins": 8.991926193237305, + "rewards/rejected": -11.986037254333496, + "step": 2498 + }, + { + "epoch": 4.01, + "learning_rate": 3.079667063020214e-07, + "logits/chosen": -1.6735752820968628, + "logits/rejected": -1.6546440124511719, + "logps/chosen": -138.49008178710938, + "logps/rejected": -231.6451416015625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.962188243865967, + "rewards/margins": 8.148418426513672, + "rewards/rejected": -12.110607147216797, + "step": 2499 + }, + { + "epoch": 4.01, + "learning_rate": 3.0786761791518035e-07, + "logits/chosen": -1.8160932064056396, + "logits/rejected": -1.8021754026412964, + "logps/chosen": -149.5265350341797, + "logps/rejected": -199.94308471679688, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.741860866546631, + "rewards/margins": 7.704150199890137, + "rewards/rejected": -12.44601058959961, + "step": 2500 + }, + { + "epoch": 4.01, + "learning_rate": 3.0776852952833926e-07, + "logits/chosen": -1.6432619094848633, + "logits/rejected": -1.6990611553192139, + "logps/chosen": -111.1497802734375, + "logps/rejected": -227.98007202148438, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8291747570037842, + "rewards/margins": 10.840398788452148, + "rewards/rejected": -12.669573783874512, + "step": 2501 + }, + { + "epoch": 4.02, + "learning_rate": 3.0766944114149816e-07, + "logits/chosen": -1.659531831741333, + "logits/rejected": -1.6324594020843506, + "logps/chosen": -116.5029525756836, + "logps/rejected": -190.52862548828125, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9654483795166016, + "rewards/margins": 8.976431846618652, + "rewards/rejected": -11.941880226135254, + "step": 2502 + }, + { + "epoch": 4.02, + "learning_rate": 3.075703527546571e-07, + "logits/chosen": -1.5678319931030273, + "logits/rejected": -1.6212395429611206, + "logps/chosen": -127.81037902832031, + "logps/rejected": -216.714599609375, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.440348148345947, + "rewards/margins": 9.187931060791016, + "rewards/rejected": -13.628279685974121, + "step": 2503 + }, + { + "epoch": 4.02, + "learning_rate": 3.074712643678161e-07, + "logits/chosen": -1.5374960899353027, + "logits/rejected": -1.654158353805542, + "logps/chosen": -93.89268493652344, + "logps/rejected": -195.35914611816406, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.523164987564087, + "rewards/margins": 7.022997856140137, + "rewards/rejected": -10.546163558959961, + "step": 2504 + }, + { + "epoch": 4.02, + "learning_rate": 3.0737217598097504e-07, + "logits/chosen": -1.6651911735534668, + "logits/rejected": -1.695858359336853, + "logps/chosen": -147.72576904296875, + "logps/rejected": -205.76388549804688, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.914487838745117, + "rewards/margins": 5.898893356323242, + "rewards/rejected": -11.81338119506836, + "step": 2505 + }, + { + "epoch": 4.02, + "learning_rate": 3.0727308759413395e-07, + "logits/chosen": -1.6496708393096924, + "logits/rejected": -1.627266764640808, + "logps/chosen": -122.88634490966797, + "logps/rejected": -203.31570434570312, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.353158473968506, + "rewards/margins": 7.399735927581787, + "rewards/rejected": -9.752894401550293, + "step": 2506 + }, + { + "epoch": 4.02, + "learning_rate": 3.0717399920729286e-07, + "logits/chosen": -1.657367467880249, + "logits/rejected": -1.6772427558898926, + "logps/chosen": -141.00572204589844, + "logps/rejected": -207.79397583007812, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.019824028015137, + "rewards/margins": 7.1086812019348145, + "rewards/rejected": -12.128504753112793, + "step": 2507 + }, + { + "epoch": 4.03, + "learning_rate": 3.070749108204518e-07, + "logits/chosen": -1.492790699005127, + "logits/rejected": -1.6124016046524048, + "logps/chosen": -84.41968536376953, + "logps/rejected": -184.6190643310547, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.242992401123047, + "rewards/margins": 6.907172203063965, + "rewards/rejected": -9.150163650512695, + "step": 2508 + }, + { + "epoch": 4.03, + "learning_rate": 3.069758224336108e-07, + "logits/chosen": -1.6366609334945679, + "logits/rejected": -1.5938897132873535, + "logps/chosen": -120.18865966796875, + "logps/rejected": -209.8963623046875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2974164485931396, + "rewards/margins": 7.387589931488037, + "rewards/rejected": -9.685007095336914, + "step": 2509 + }, + { + "epoch": 4.03, + "learning_rate": 3.0687673404676974e-07, + "logits/chosen": -1.6669700145721436, + "logits/rejected": -1.693543791770935, + "logps/chosen": -117.47119903564453, + "logps/rejected": -219.45687866210938, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7207865715026855, + "rewards/margins": 9.309667587280273, + "rewards/rejected": -14.030454635620117, + "step": 2510 + }, + { + "epoch": 4.03, + "learning_rate": 3.0677764565992864e-07, + "logits/chosen": -1.686862826347351, + "logits/rejected": -1.8006712198257446, + "logps/chosen": -108.82182312011719, + "logps/rejected": -232.6983184814453, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0155394077301025, + "rewards/margins": 11.246267318725586, + "rewards/rejected": -14.26180648803711, + "step": 2511 + }, + { + "epoch": 4.03, + "learning_rate": 3.0667855727308755e-07, + "logits/chosen": -1.659718632698059, + "logits/rejected": -1.6852625608444214, + "logps/chosen": -117.36539459228516, + "logps/rejected": -228.60592651367188, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8146252632141113, + "rewards/margins": 9.800826072692871, + "rewards/rejected": -12.61545181274414, + "step": 2512 + }, + { + "epoch": 4.03, + "learning_rate": 3.065794688862465e-07, + "logits/chosen": -1.6858441829681396, + "logits/rejected": -1.6066285371780396, + "logps/chosen": -141.23336791992188, + "logps/rejected": -219.733154296875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.473121166229248, + "rewards/margins": 9.888525009155273, + "rewards/rejected": -14.361644744873047, + "step": 2513 + }, + { + "epoch": 4.04, + "learning_rate": 3.0648038049940547e-07, + "logits/chosen": -1.6965941190719604, + "logits/rejected": -1.67086660861969, + "logps/chosen": -114.09213256835938, + "logps/rejected": -214.55226135253906, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.771657705307007, + "rewards/margins": 11.059677124023438, + "rewards/rejected": -14.831335067749023, + "step": 2514 + }, + { + "epoch": 4.04, + "learning_rate": 3.0638129211256443e-07, + "logits/chosen": -1.6366122961044312, + "logits/rejected": -1.6601407527923584, + "logps/chosen": -169.2470245361328, + "logps/rejected": -258.90533447265625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.989646911621094, + "rewards/margins": 9.243654251098633, + "rewards/rejected": -17.233301162719727, + "step": 2515 + }, + { + "epoch": 4.04, + "learning_rate": 3.0628220372572333e-07, + "logits/chosen": -1.6648001670837402, + "logits/rejected": -1.6920074224472046, + "logps/chosen": -119.31245422363281, + "logps/rejected": -224.4051513671875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9948253631591797, + "rewards/margins": 9.78790283203125, + "rewards/rejected": -13.78272819519043, + "step": 2516 + }, + { + "epoch": 4.04, + "learning_rate": 3.0618311533888224e-07, + "logits/chosen": -1.6737489700317383, + "logits/rejected": -1.7008857727050781, + "logps/chosen": -142.32736206054688, + "logps/rejected": -208.51507568359375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.365001678466797, + "rewards/margins": 7.777826309204102, + "rewards/rejected": -12.142827987670898, + "step": 2517 + }, + { + "epoch": 4.04, + "learning_rate": 3.060840269520412e-07, + "logits/chosen": -1.5795458555221558, + "logits/rejected": -1.6330132484436035, + "logps/chosen": -127.78146362304688, + "logps/rejected": -208.53094482421875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.783404588699341, + "rewards/margins": 7.175398826599121, + "rewards/rejected": -9.958803176879883, + "step": 2518 + }, + { + "epoch": 4.04, + "learning_rate": 3.0598493856520016e-07, + "logits/chosen": -1.5915993452072144, + "logits/rejected": -1.5115429162979126, + "logps/chosen": -94.33334350585938, + "logps/rejected": -171.54515075683594, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5520060062408447, + "rewards/margins": 8.156268119812012, + "rewards/rejected": -9.708274841308594, + "step": 2519 + }, + { + "epoch": 4.04, + "learning_rate": 3.058858501783591e-07, + "logits/chosen": -1.5666792392730713, + "logits/rejected": -1.5621589422225952, + "logps/chosen": -111.59307861328125, + "logps/rejected": -227.45492553710938, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.135239601135254, + "rewards/margins": 9.754109382629395, + "rewards/rejected": -13.889348983764648, + "step": 2520 + }, + { + "epoch": 4.05, + "learning_rate": 3.0578676179151803e-07, + "logits/chosen": -1.6642191410064697, + "logits/rejected": -1.6133276224136353, + "logps/chosen": -137.95394897460938, + "logps/rejected": -217.50267028808594, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.247903347015381, + "rewards/margins": 8.80392074584961, + "rewards/rejected": -14.051824569702148, + "step": 2521 + }, + { + "epoch": 4.05, + "learning_rate": 3.0568767340467693e-07, + "logits/chosen": -1.551016092300415, + "logits/rejected": -1.655708909034729, + "logps/chosen": -144.3777313232422, + "logps/rejected": -258.6844787597656, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.179811000823975, + "rewards/margins": 9.069574356079102, + "rewards/rejected": -16.249385833740234, + "step": 2522 + }, + { + "epoch": 4.05, + "learning_rate": 3.055885850178359e-07, + "logits/chosen": -1.7133549451828003, + "logits/rejected": -1.6551358699798584, + "logps/chosen": -68.69253540039062, + "logps/rejected": -158.48635864257812, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8848367929458618, + "rewards/margins": 9.9399995803833, + "rewards/rejected": -10.824836730957031, + "step": 2523 + }, + { + "epoch": 4.05, + "learning_rate": 3.054894966309948e-07, + "logits/chosen": -1.6241545677185059, + "logits/rejected": -1.6490187644958496, + "logps/chosen": -118.51455688476562, + "logps/rejected": -221.5877685546875, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2495946884155273, + "rewards/margins": 9.885830879211426, + "rewards/rejected": -13.13542652130127, + "step": 2524 + }, + { + "epoch": 4.05, + "learning_rate": 3.053904082441538e-07, + "logits/chosen": -1.5949119329452515, + "logits/rejected": -1.5709125995635986, + "logps/chosen": -152.59283447265625, + "logps/rejected": -259.5760803222656, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.79520320892334, + "rewards/margins": 9.761048316955566, + "rewards/rejected": -14.556251525878906, + "step": 2525 + }, + { + "epoch": 4.05, + "learning_rate": 3.052913198573127e-07, + "logits/chosen": -1.6660544872283936, + "logits/rejected": -1.730514645576477, + "logps/chosen": -119.88259887695312, + "logps/rejected": -216.29183959960938, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.487823486328125, + "rewards/margins": 9.465367317199707, + "rewards/rejected": -12.953190803527832, + "step": 2526 + }, + { + "epoch": 4.06, + "learning_rate": 3.0519223147047163e-07, + "logits/chosen": -1.7136340141296387, + "logits/rejected": -1.679704189300537, + "logps/chosen": -132.6939697265625, + "logps/rejected": -217.55227661132812, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.650230407714844, + "rewards/margins": 8.351633071899414, + "rewards/rejected": -14.001862525939941, + "step": 2527 + }, + { + "epoch": 4.06, + "learning_rate": 3.050931430836306e-07, + "logits/chosen": -1.5165984630584717, + "logits/rejected": -1.5513417720794678, + "logps/chosen": -135.3675994873047, + "logps/rejected": -242.787109375, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.577577590942383, + "rewards/margins": 9.986881256103516, + "rewards/rejected": -15.564458847045898, + "step": 2528 + }, + { + "epoch": 4.06, + "learning_rate": 3.049940546967895e-07, + "logits/chosen": -1.6378387212753296, + "logits/rejected": -1.6323111057281494, + "logps/chosen": -143.7834930419922, + "logps/rejected": -266.67132568359375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5051000118255615, + "rewards/margins": 11.801870346069336, + "rewards/rejected": -15.306970596313477, + "step": 2529 + }, + { + "epoch": 4.06, + "learning_rate": 3.048949663099485e-07, + "logits/chosen": -1.5396989583969116, + "logits/rejected": -1.5699775218963623, + "logps/chosen": -104.88870239257812, + "logps/rejected": -198.44334411621094, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.284257650375366, + "rewards/margins": 7.72358512878418, + "rewards/rejected": -11.007843017578125, + "step": 2530 + }, + { + "epoch": 4.06, + "learning_rate": 3.047958779231074e-07, + "logits/chosen": -1.7101026773452759, + "logits/rejected": -1.7011549472808838, + "logps/chosen": -150.27099609375, + "logps/rejected": -228.33152770996094, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2168498039245605, + "rewards/margins": 7.95550537109375, + "rewards/rejected": -13.172356605529785, + "step": 2531 + }, + { + "epoch": 4.06, + "learning_rate": 3.046967895362663e-07, + "logits/chosen": -1.6548919677734375, + "logits/rejected": -1.6244680881500244, + "logps/chosen": -157.78302001953125, + "logps/rejected": -287.2731018066406, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.519103050231934, + "rewards/margins": 12.98355484008789, + "rewards/rejected": -18.50265884399414, + "step": 2532 + }, + { + "epoch": 4.07, + "learning_rate": 3.045977011494253e-07, + "logits/chosen": -1.5498991012573242, + "logits/rejected": -1.624609351158142, + "logps/chosen": -133.81573486328125, + "logps/rejected": -278.434814453125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9922564029693604, + "rewards/margins": 14.223941802978516, + "rewards/rejected": -17.216197967529297, + "step": 2533 + }, + { + "epoch": 4.07, + "learning_rate": 3.044986127625842e-07, + "logits/chosen": -1.77744722366333, + "logits/rejected": -1.6740620136260986, + "logps/chosen": -131.01593017578125, + "logps/rejected": -225.44760131835938, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.649707794189453, + "rewards/margins": 10.70853042602539, + "rewards/rejected": -15.358236312866211, + "step": 2534 + }, + { + "epoch": 4.07, + "learning_rate": 3.0439952437574315e-07, + "logits/chosen": -1.4686803817749023, + "logits/rejected": -1.5744339227676392, + "logps/chosen": -82.48048400878906, + "logps/rejected": -192.45944213867188, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.492063522338867, + "rewards/margins": 9.67403793334961, + "rewards/rejected": -12.166101455688477, + "step": 2535 + }, + { + "epoch": 4.07, + "learning_rate": 3.043004359889021e-07, + "logits/chosen": -1.5441677570343018, + "logits/rejected": -1.5157268047332764, + "logps/chosen": -123.55418395996094, + "logps/rejected": -191.66433715820312, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.622587203979492, + "rewards/margins": 7.4192705154418945, + "rewards/rejected": -12.041857719421387, + "step": 2536 + }, + { + "epoch": 4.07, + "learning_rate": 3.04201347602061e-07, + "logits/chosen": -1.6571245193481445, + "logits/rejected": -1.6640400886535645, + "logps/chosen": -138.4080352783203, + "logps/rejected": -201.17044067382812, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.673321723937988, + "rewards/margins": 6.3547892570495605, + "rewards/rejected": -12.028111457824707, + "step": 2537 + }, + { + "epoch": 4.07, + "learning_rate": 3.0410225921521997e-07, + "logits/chosen": -1.8054652214050293, + "logits/rejected": -1.6736958026885986, + "logps/chosen": -126.06246948242188, + "logps/rejected": -232.80490112304688, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5187931060791016, + "rewards/margins": 10.52789306640625, + "rewards/rejected": -14.046686172485352, + "step": 2538 + }, + { + "epoch": 4.08, + "learning_rate": 3.040031708283789e-07, + "logits/chosen": -1.6055035591125488, + "logits/rejected": -1.6108424663543701, + "logps/chosen": -159.64442443847656, + "logps/rejected": -233.33419799804688, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.387627601623535, + "rewards/margins": 6.720761775970459, + "rewards/rejected": -13.108388900756836, + "step": 2539 + }, + { + "epoch": 4.08, + "learning_rate": 3.039040824415378e-07, + "logits/chosen": -1.5428630113601685, + "logits/rejected": -1.5103716850280762, + "logps/chosen": -163.2515106201172, + "logps/rejected": -216.48638916015625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.328344345092773, + "rewards/margins": 6.713250160217285, + "rewards/rejected": -14.041594505310059, + "step": 2540 + }, + { + "epoch": 4.08, + "learning_rate": 3.038049940546968e-07, + "logits/chosen": -1.7772529125213623, + "logits/rejected": -1.683284878730774, + "logps/chosen": -148.89048767089844, + "logps/rejected": -216.6741943359375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.621286392211914, + "rewards/margins": 8.243154525756836, + "rewards/rejected": -12.86444091796875, + "step": 2541 + }, + { + "epoch": 4.08, + "learning_rate": 3.037059056678557e-07, + "logits/chosen": -1.5589646100997925, + "logits/rejected": -1.4745087623596191, + "logps/chosen": -138.81468200683594, + "logps/rejected": -225.5457305908203, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7542495727539062, + "rewards/margins": 8.419733047485352, + "rewards/rejected": -12.173982620239258, + "step": 2542 + }, + { + "epoch": 4.08, + "learning_rate": 3.0360681728101467e-07, + "logits/chosen": -1.6079130172729492, + "logits/rejected": -1.6610912084579468, + "logps/chosen": -130.51315307617188, + "logps/rejected": -242.71090698242188, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.121193885803223, + "rewards/margins": 10.760823249816895, + "rewards/rejected": -14.882017135620117, + "step": 2543 + }, + { + "epoch": 4.08, + "learning_rate": 3.0350772889417357e-07, + "logits/chosen": -1.6654467582702637, + "logits/rejected": -1.7516627311706543, + "logps/chosen": -153.14715576171875, + "logps/rejected": -258.56219482421875, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.577592372894287, + "rewards/margins": 10.29360580444336, + "rewards/rejected": -15.871198654174805, + "step": 2544 + }, + { + "epoch": 4.09, + "learning_rate": 3.034086405073325e-07, + "logits/chosen": -1.5852079391479492, + "logits/rejected": -1.5281107425689697, + "logps/chosen": -128.21902465820312, + "logps/rejected": -233.83935546875, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8453893661499023, + "rewards/margins": 11.467109680175781, + "rewards/rejected": -15.312499046325684, + "step": 2545 + }, + { + "epoch": 4.09, + "learning_rate": 3.033095521204915e-07, + "logits/chosen": -1.6739469766616821, + "logits/rejected": -1.6677422523498535, + "logps/chosen": -132.0171356201172, + "logps/rejected": -239.90591430664062, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.345278739929199, + "rewards/margins": 10.095596313476562, + "rewards/rejected": -14.440876007080078, + "step": 2546 + }, + { + "epoch": 4.09, + "learning_rate": 3.032104637336504e-07, + "logits/chosen": -1.6212173700332642, + "logits/rejected": -1.5505297183990479, + "logps/chosen": -120.74028778076172, + "logps/rejected": -227.01953125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5874392986297607, + "rewards/margins": 10.901639938354492, + "rewards/rejected": -14.489079475402832, + "step": 2547 + }, + { + "epoch": 4.09, + "learning_rate": 3.0311137534680936e-07, + "logits/chosen": -1.5746961832046509, + "logits/rejected": -1.7024953365325928, + "logps/chosen": -115.47480773925781, + "logps/rejected": -196.77029418945312, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.676499366760254, + "rewards/margins": 7.817974090576172, + "rewards/rejected": -10.494474411010742, + "step": 2548 + }, + { + "epoch": 4.09, + "learning_rate": 3.0301228695996827e-07, + "logits/chosen": -1.5974124670028687, + "logits/rejected": -1.5515084266662598, + "logps/chosen": -152.17413330078125, + "logps/rejected": -230.42422485351562, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.098540306091309, + "rewards/margins": 9.398162841796875, + "rewards/rejected": -15.496703147888184, + "step": 2549 + }, + { + "epoch": 4.09, + "learning_rate": 3.0291319857312717e-07, + "logits/chosen": -1.6033787727355957, + "logits/rejected": -1.6426105499267578, + "logps/chosen": -121.32392883300781, + "logps/rejected": -262.5384521484375, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8127214908599854, + "rewards/margins": 13.885164260864258, + "rewards/rejected": -17.697885513305664, + "step": 2550 + }, + { + "epoch": 4.09, + "learning_rate": 3.028141101862862e-07, + "logits/chosen": -1.6033382415771484, + "logits/rejected": -1.685429334640503, + "logps/chosen": -126.35247802734375, + "logps/rejected": -243.22544860839844, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.649021625518799, + "rewards/margins": 11.712108612060547, + "rewards/rejected": -16.361129760742188, + "step": 2551 + }, + { + "epoch": 4.1, + "learning_rate": 3.027150217994451e-07, + "logits/chosen": -1.6666128635406494, + "logits/rejected": -1.738156795501709, + "logps/chosen": -122.02116394042969, + "logps/rejected": -259.8206787109375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.637908935546875, + "rewards/margins": 13.970306396484375, + "rewards/rejected": -18.60821533203125, + "step": 2552 + }, + { + "epoch": 4.1, + "learning_rate": 3.0261593341260405e-07, + "logits/chosen": -1.649677038192749, + "logits/rejected": -1.67061448097229, + "logps/chosen": -110.00238037109375, + "logps/rejected": -278.99560546875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8164026737213135, + "rewards/margins": 14.028656959533691, + "rewards/rejected": -16.845060348510742, + "step": 2553 + }, + { + "epoch": 4.1, + "learning_rate": 3.0251684502576296e-07, + "logits/chosen": -1.6582057476043701, + "logits/rejected": -1.6588548421859741, + "logps/chosen": -122.07439422607422, + "logps/rejected": -181.16204833984375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.212009906768799, + "rewards/margins": 6.352017879486084, + "rewards/rejected": -10.564027786254883, + "step": 2554 + }, + { + "epoch": 4.1, + "learning_rate": 3.0241775663892187e-07, + "logits/chosen": -1.634881615638733, + "logits/rejected": -1.5738052129745483, + "logps/chosen": -147.9556884765625, + "logps/rejected": -210.35430908203125, + "loss": 0.046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.146516799926758, + "rewards/margins": 8.395234107971191, + "rewards/rejected": -12.54175090789795, + "step": 2555 + }, + { + "epoch": 4.1, + "learning_rate": 3.023186682520809e-07, + "logits/chosen": -1.6197564601898193, + "logits/rejected": -1.67807137966156, + "logps/chosen": -117.95576477050781, + "logps/rejected": -230.28277587890625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.085933685302734, + "rewards/margins": 9.766239166259766, + "rewards/rejected": -13.852171897888184, + "step": 2556 + }, + { + "epoch": 4.1, + "learning_rate": 3.022195798652398e-07, + "logits/chosen": -1.6173584461212158, + "logits/rejected": -1.5965694189071655, + "logps/chosen": -187.7185516357422, + "logps/rejected": -282.83953857421875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.631080627441406, + "rewards/margins": 10.573331832885742, + "rewards/rejected": -17.20441246032715, + "step": 2557 + }, + { + "epoch": 4.11, + "learning_rate": 3.0212049147839875e-07, + "logits/chosen": -1.6408036947250366, + "logits/rejected": -1.7023413181304932, + "logps/chosen": -114.13460540771484, + "logps/rejected": -250.00772094726562, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.787585973739624, + "rewards/margins": 11.493175506591797, + "rewards/rejected": -15.28076171875, + "step": 2558 + }, + { + "epoch": 4.11, + "learning_rate": 3.0202140309155765e-07, + "logits/chosen": -1.668805718421936, + "logits/rejected": -1.6198583841323853, + "logps/chosen": -131.968505859375, + "logps/rejected": -209.96658325195312, + "loss": 0.0122, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.04791259765625, + "rewards/margins": 9.493781089782715, + "rewards/rejected": -13.541693687438965, + "step": 2559 + }, + { + "epoch": 4.11, + "learning_rate": 3.0192231470471656e-07, + "logits/chosen": -1.6645163297653198, + "logits/rejected": -1.6628834009170532, + "logps/chosen": -147.61532592773438, + "logps/rejected": -259.4848937988281, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.373167991638184, + "rewards/margins": 11.232403755187988, + "rewards/rejected": -16.605571746826172, + "step": 2560 + }, + { + "epoch": 4.11, + "learning_rate": 3.0182322631787557e-07, + "logits/chosen": -1.5379629135131836, + "logits/rejected": -1.588653326034546, + "logps/chosen": -122.39176940917969, + "logps/rejected": -205.7824249267578, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.185626983642578, + "rewards/margins": 7.4109649658203125, + "rewards/rejected": -11.59659194946289, + "step": 2561 + }, + { + "epoch": 4.11, + "learning_rate": 3.017241379310345e-07, + "logits/chosen": -1.5906161069869995, + "logits/rejected": -1.5917664766311646, + "logps/chosen": -126.44559478759766, + "logps/rejected": -187.01210021972656, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.393872261047363, + "rewards/margins": 7.085519313812256, + "rewards/rejected": -11.479392051696777, + "step": 2562 + }, + { + "epoch": 4.11, + "learning_rate": 3.0162504954419344e-07, + "logits/chosen": -1.789567470550537, + "logits/rejected": -1.7433922290802002, + "logps/chosen": -134.07528686523438, + "logps/rejected": -246.52505493164062, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.736633777618408, + "rewards/margins": 10.419671058654785, + "rewards/rejected": -15.156304359436035, + "step": 2563 + }, + { + "epoch": 4.12, + "learning_rate": 3.0152596115735234e-07, + "logits/chosen": -1.5609800815582275, + "logits/rejected": -1.6218181848526, + "logps/chosen": -134.80758666992188, + "logps/rejected": -254.07171630859375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.735422611236572, + "rewards/margins": 9.650691986083984, + "rewards/rejected": -15.386113166809082, + "step": 2564 + }, + { + "epoch": 4.12, + "learning_rate": 3.0142687277051125e-07, + "logits/chosen": -1.7480415105819702, + "logits/rejected": -1.7085357904434204, + "logps/chosen": -103.86094665527344, + "logps/rejected": -210.26858520507812, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.009929895401001, + "rewards/margins": 10.821799278259277, + "rewards/rejected": -13.831729888916016, + "step": 2565 + }, + { + "epoch": 4.12, + "learning_rate": 3.013277843836702e-07, + "logits/chosen": -1.7534701824188232, + "logits/rejected": -1.7408908605575562, + "logps/chosen": -142.44049072265625, + "logps/rejected": -248.9718017578125, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.976529598236084, + "rewards/margins": 10.983325004577637, + "rewards/rejected": -13.959854125976562, + "step": 2566 + }, + { + "epoch": 4.12, + "learning_rate": 3.0122869599682917e-07, + "logits/chosen": -1.7440170049667358, + "logits/rejected": -1.8171173334121704, + "logps/chosen": -104.28142547607422, + "logps/rejected": -216.24508666992188, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2942423820495605, + "rewards/margins": 10.07826042175293, + "rewards/rejected": -14.372503280639648, + "step": 2567 + }, + { + "epoch": 4.12, + "learning_rate": 3.0112960760998813e-07, + "logits/chosen": -1.7844855785369873, + "logits/rejected": -1.6897716522216797, + "logps/chosen": -159.11456298828125, + "logps/rejected": -208.8516387939453, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.382807731628418, + "rewards/margins": 8.046588897705078, + "rewards/rejected": -12.429396629333496, + "step": 2568 + }, + { + "epoch": 4.12, + "learning_rate": 3.0103051922314704e-07, + "logits/chosen": -1.479152798652649, + "logits/rejected": -1.5450966358184814, + "logps/chosen": -117.27633666992188, + "logps/rejected": -198.5849609375, + "loss": 0.0098, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.834822177886963, + "rewards/margins": 6.878600120544434, + "rewards/rejected": -11.713421821594238, + "step": 2569 + }, + { + "epoch": 4.13, + "learning_rate": 3.0093143083630594e-07, + "logits/chosen": -1.7441937923431396, + "logits/rejected": -1.7946717739105225, + "logps/chosen": -140.7435302734375, + "logps/rejected": -257.0694580078125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.195028781890869, + "rewards/margins": 10.498964309692383, + "rewards/rejected": -15.693994522094727, + "step": 2570 + }, + { + "epoch": 4.13, + "learning_rate": 3.008323424494649e-07, + "logits/chosen": -1.685577154159546, + "logits/rejected": -1.693581461906433, + "logps/chosen": -149.2362518310547, + "logps/rejected": -243.56814575195312, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.435965538024902, + "rewards/margins": 8.835838317871094, + "rewards/rejected": -16.27180290222168, + "step": 2571 + }, + { + "epoch": 4.13, + "learning_rate": 3.0073325406262386e-07, + "logits/chosen": -1.717738389968872, + "logits/rejected": -1.6944881677627563, + "logps/chosen": -141.2256622314453, + "logps/rejected": -200.75100708007812, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.498356819152832, + "rewards/margins": 6.390169143676758, + "rewards/rejected": -11.88852596282959, + "step": 2572 + }, + { + "epoch": 4.13, + "learning_rate": 3.0063416567578277e-07, + "logits/chosen": -1.6185858249664307, + "logits/rejected": -1.7014288902282715, + "logps/chosen": -161.23318481445312, + "logps/rejected": -283.177734375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.844430923461914, + "rewards/margins": 10.8836669921875, + "rewards/rejected": -17.728097915649414, + "step": 2573 + }, + { + "epoch": 4.13, + "learning_rate": 3.0053507728894173e-07, + "logits/chosen": -1.5734784603118896, + "logits/rejected": -1.5832760334014893, + "logps/chosen": -123.55335235595703, + "logps/rejected": -213.60935974121094, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.444847583770752, + "rewards/margins": 7.660919189453125, + "rewards/rejected": -12.105767250061035, + "step": 2574 + }, + { + "epoch": 4.13, + "learning_rate": 3.0043598890210064e-07, + "logits/chosen": -1.56904137134552, + "logits/rejected": -1.6897714138031006, + "logps/chosen": -118.52400207519531, + "logps/rejected": -236.43255615234375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.912258625030518, + "rewards/margins": 8.800154685974121, + "rewards/rejected": -13.712413787841797, + "step": 2575 + }, + { + "epoch": 4.13, + "learning_rate": 3.003369005152596e-07, + "logits/chosen": -1.5309813022613525, + "logits/rejected": -1.566010594367981, + "logps/chosen": -102.79072570800781, + "logps/rejected": -212.0646209716797, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0056135654449463, + "rewards/margins": 10.745924949645996, + "rewards/rejected": -13.75153923034668, + "step": 2576 + }, + { + "epoch": 4.14, + "learning_rate": 3.0023781212841856e-07, + "logits/chosen": -1.6338467597961426, + "logits/rejected": -1.6406478881835938, + "logps/chosen": -133.89312744140625, + "logps/rejected": -248.25576782226562, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.517811298370361, + "rewards/margins": 11.816007614135742, + "rewards/rejected": -16.333818435668945, + "step": 2577 + }, + { + "epoch": 4.14, + "learning_rate": 3.0013872374157746e-07, + "logits/chosen": -1.4842942953109741, + "logits/rejected": -1.4959633350372314, + "logps/chosen": -86.3606948852539, + "logps/rejected": -172.97286987304688, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5633327960968018, + "rewards/margins": 9.036480903625488, + "rewards/rejected": -11.599813461303711, + "step": 2578 + }, + { + "epoch": 4.14, + "learning_rate": 3.000396353547364e-07, + "logits/chosen": -1.5501203536987305, + "logits/rejected": -1.5455594062805176, + "logps/chosen": -126.81220245361328, + "logps/rejected": -241.97940063476562, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.425923824310303, + "rewards/margins": 10.954816818237305, + "rewards/rejected": -15.380741119384766, + "step": 2579 + }, + { + "epoch": 4.14, + "learning_rate": 2.9994054696789533e-07, + "logits/chosen": -1.6996294260025024, + "logits/rejected": -1.6044483184814453, + "logps/chosen": -164.78091430664062, + "logps/rejected": -219.7285919189453, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.516902923583984, + "rewards/margins": 6.464900970458984, + "rewards/rejected": -12.981803894042969, + "step": 2580 + }, + { + "epoch": 4.14, + "learning_rate": 2.998414585810543e-07, + "logits/chosen": -1.5503407716751099, + "logits/rejected": -1.5917922258377075, + "logps/chosen": -103.84812927246094, + "logps/rejected": -227.26223754882812, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.013312339782715, + "rewards/margins": 10.86255168914795, + "rewards/rejected": -14.875863075256348, + "step": 2581 + }, + { + "epoch": 4.14, + "learning_rate": 2.997423701942132e-07, + "logits/chosen": -1.5587021112442017, + "logits/rejected": -1.6382226943969727, + "logps/chosen": -113.75021362304688, + "logps/rejected": -195.59405517578125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2157111167907715, + "rewards/margins": 7.25003719329834, + "rewards/rejected": -11.46574878692627, + "step": 2582 + }, + { + "epoch": 4.15, + "learning_rate": 2.9964328180737216e-07, + "logits/chosen": -1.6200093030929565, + "logits/rejected": -1.560253381729126, + "logps/chosen": -127.14789581298828, + "logps/rejected": -207.91180419921875, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.836456298828125, + "rewards/margins": 9.17413330078125, + "rewards/rejected": -13.010589599609375, + "step": 2583 + }, + { + "epoch": 4.15, + "learning_rate": 2.995441934205311e-07, + "logits/chosen": -1.454539179801941, + "logits/rejected": -1.5354617834091187, + "logps/chosen": -117.69158935546875, + "logps/rejected": -240.65093994140625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.230213165283203, + "rewards/margins": 11.456697463989258, + "rewards/rejected": -15.686910629272461, + "step": 2584 + }, + { + "epoch": 4.15, + "learning_rate": 2.9944510503369e-07, + "logits/chosen": -1.5963188409805298, + "logits/rejected": -1.7004926204681396, + "logps/chosen": -97.3895263671875, + "logps/rejected": -225.81631469726562, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.391481399536133, + "rewards/margins": 9.83773422241211, + "rewards/rejected": -13.229215621948242, + "step": 2585 + }, + { + "epoch": 4.15, + "learning_rate": 2.99346016646849e-07, + "logits/chosen": -1.742470383644104, + "logits/rejected": -1.6578419208526611, + "logps/chosen": -168.33013916015625, + "logps/rejected": -247.6545867919922, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.170949935913086, + "rewards/margins": 10.295636177062988, + "rewards/rejected": -15.466585159301758, + "step": 2586 + }, + { + "epoch": 4.15, + "learning_rate": 2.992469282600079e-07, + "logits/chosen": -1.524503231048584, + "logits/rejected": -1.5889770984649658, + "logps/chosen": -132.502197265625, + "logps/rejected": -225.08746337890625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.928086757659912, + "rewards/margins": 8.00860595703125, + "rewards/rejected": -13.936694145202637, + "step": 2587 + }, + { + "epoch": 4.15, + "learning_rate": 2.9914783987316685e-07, + "logits/chosen": -1.4620314836502075, + "logits/rejected": -1.5217745304107666, + "logps/chosen": -122.49758911132812, + "logps/rejected": -239.345458984375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9297432899475098, + "rewards/margins": 10.555765151977539, + "rewards/rejected": -14.48550796508789, + "step": 2588 + }, + { + "epoch": 4.16, + "learning_rate": 2.990487514863258e-07, + "logits/chosen": -1.5210789442062378, + "logits/rejected": -1.50363028049469, + "logps/chosen": -140.1033172607422, + "logps/rejected": -264.50567626953125, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.304030418395996, + "rewards/margins": 12.517658233642578, + "rewards/rejected": -18.82168960571289, + "step": 2589 + }, + { + "epoch": 4.16, + "learning_rate": 2.989496630994847e-07, + "logits/chosen": -1.472170352935791, + "logits/rejected": -1.5807462930679321, + "logps/chosen": -143.32870483398438, + "logps/rejected": -237.32826232910156, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.140076637268066, + "rewards/margins": 8.737728118896484, + "rewards/rejected": -13.87780475616455, + "step": 2590 + }, + { + "epoch": 4.16, + "learning_rate": 2.988505747126437e-07, + "logits/chosen": -1.571116328239441, + "logits/rejected": -1.5420236587524414, + "logps/chosen": -163.24951171875, + "logps/rejected": -241.69207763671875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0404887199401855, + "rewards/margins": 9.048639297485352, + "rewards/rejected": -15.089127540588379, + "step": 2591 + }, + { + "epoch": 4.16, + "learning_rate": 2.987514863258026e-07, + "logits/chosen": -1.679246425628662, + "logits/rejected": -1.574311375617981, + "logps/chosen": -127.45072174072266, + "logps/rejected": -221.72891235351562, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7748565673828125, + "rewards/margins": 9.633292198181152, + "rewards/rejected": -13.408148765563965, + "step": 2592 + }, + { + "epoch": 4.16, + "learning_rate": 2.9865239793896154e-07, + "logits/chosen": -1.7165629863739014, + "logits/rejected": -1.619974136352539, + "logps/chosen": -116.19605255126953, + "logps/rejected": -204.73265075683594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4122250080108643, + "rewards/margins": 11.371054649353027, + "rewards/rejected": -14.783280372619629, + "step": 2593 + }, + { + "epoch": 4.16, + "learning_rate": 2.985533095521205e-07, + "logits/chosen": -1.7190942764282227, + "logits/rejected": -1.6454085111618042, + "logps/chosen": -168.05853271484375, + "logps/rejected": -259.9736633300781, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.330255031585693, + "rewards/margins": 10.43486213684082, + "rewards/rejected": -15.765117645263672, + "step": 2594 + }, + { + "epoch": 4.17, + "learning_rate": 2.984542211652794e-07, + "logits/chosen": -1.5647408962249756, + "logits/rejected": -1.561293125152588, + "logps/chosen": -119.53518676757812, + "logps/rejected": -240.32757568359375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.154478549957275, + "rewards/margins": 11.267559051513672, + "rewards/rejected": -15.422038078308105, + "step": 2595 + }, + { + "epoch": 4.17, + "learning_rate": 2.9835513277843837e-07, + "logits/chosen": -1.5635526180267334, + "logits/rejected": -1.5776234865188599, + "logps/chosen": -164.2215576171875, + "logps/rejected": -240.31536865234375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.346957206726074, + "rewards/margins": 7.822229385375977, + "rewards/rejected": -14.16918659210205, + "step": 2596 + }, + { + "epoch": 4.17, + "learning_rate": 2.982560443915973e-07, + "logits/chosen": -1.662605881690979, + "logits/rejected": -1.648646593093872, + "logps/chosen": -122.19505310058594, + "logps/rejected": -172.97988891601562, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8155975341796875, + "rewards/margins": 5.74720573425293, + "rewards/rejected": -9.562803268432617, + "step": 2597 + }, + { + "epoch": 4.17, + "learning_rate": 2.9815695600475624e-07, + "logits/chosen": -1.5425796508789062, + "logits/rejected": -1.5020132064819336, + "logps/chosen": -148.14537048339844, + "logps/rejected": -241.3771514892578, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6546406745910645, + "rewards/margins": 9.21835994720459, + "rewards/rejected": -15.873001098632812, + "step": 2598 + }, + { + "epoch": 4.17, + "learning_rate": 2.980578676179152e-07, + "logits/chosen": -1.7624741792678833, + "logits/rejected": -1.7617411613464355, + "logps/chosen": -140.3846435546875, + "logps/rejected": -229.33656311035156, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7605934143066406, + "rewards/margins": 10.07968807220459, + "rewards/rejected": -13.84028148651123, + "step": 2599 + }, + { + "epoch": 4.17, + "learning_rate": 2.979587792310741e-07, + "logits/chosen": -1.6464412212371826, + "logits/rejected": -1.6403224468231201, + "logps/chosen": -120.69850158691406, + "logps/rejected": -237.12161254882812, + "loss": 0.116, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.063785552978516, + "rewards/margins": 10.305391311645508, + "rewards/rejected": -14.369176864624023, + "step": 2600 + }, + { + "epoch": 4.17, + "learning_rate": 2.9785969084423306e-07, + "logits/chosen": -1.5858052968978882, + "logits/rejected": -1.5251717567443848, + "logps/chosen": -144.09173583984375, + "logps/rejected": -228.1300811767578, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2009968757629395, + "rewards/margins": 9.794378280639648, + "rewards/rejected": -14.995375633239746, + "step": 2601 + }, + { + "epoch": 4.18, + "learning_rate": 2.9776060245739197e-07, + "logits/chosen": -1.604098916053772, + "logits/rejected": -1.636525273323059, + "logps/chosen": -102.31597900390625, + "logps/rejected": -198.59017944335938, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.367818832397461, + "rewards/margins": 10.49850082397461, + "rewards/rejected": -12.86631965637207, + "step": 2602 + }, + { + "epoch": 4.18, + "learning_rate": 2.976615140705509e-07, + "logits/chosen": -1.543642520904541, + "logits/rejected": -1.6041741371154785, + "logps/chosen": -129.02154541015625, + "logps/rejected": -228.4657745361328, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8073549270629883, + "rewards/margins": 10.307844161987305, + "rewards/rejected": -13.11520004272461, + "step": 2603 + }, + { + "epoch": 4.18, + "learning_rate": 2.975624256837099e-07, + "logits/chosen": -1.4940179586410522, + "logits/rejected": -1.5145294666290283, + "logps/chosen": -173.17755126953125, + "logps/rejected": -246.92498779296875, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.378036975860596, + "rewards/margins": 7.123447895050049, + "rewards/rejected": -14.501483917236328, + "step": 2604 + }, + { + "epoch": 4.18, + "learning_rate": 2.974633372968688e-07, + "logits/chosen": -1.712327241897583, + "logits/rejected": -1.733378291130066, + "logps/chosen": -105.77377319335938, + "logps/rejected": -227.8582763671875, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1166789531707764, + "rewards/margins": 11.488521575927734, + "rewards/rejected": -14.605198860168457, + "step": 2605 + }, + { + "epoch": 4.18, + "learning_rate": 2.973642489100277e-07, + "logits/chosen": -1.5718108415603638, + "logits/rejected": -1.5769075155258179, + "logps/chosen": -128.98143005371094, + "logps/rejected": -208.38546752929688, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.580320358276367, + "rewards/margins": 7.900373935699463, + "rewards/rejected": -12.480694770812988, + "step": 2606 + }, + { + "epoch": 4.18, + "learning_rate": 2.9726516052318666e-07, + "logits/chosen": -1.5935933589935303, + "logits/rejected": -1.6050814390182495, + "logps/chosen": -126.69969177246094, + "logps/rejected": -220.27598571777344, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5989503860473633, + "rewards/margins": 10.732498168945312, + "rewards/rejected": -14.33144760131836, + "step": 2607 + }, + { + "epoch": 4.19, + "learning_rate": 2.9716607213634557e-07, + "logits/chosen": -1.6414810419082642, + "logits/rejected": -1.768264889717102, + "logps/chosen": -141.17935180664062, + "logps/rejected": -279.7181091308594, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9601593017578125, + "rewards/margins": 12.411981582641602, + "rewards/rejected": -17.372140884399414, + "step": 2608 + }, + { + "epoch": 4.19, + "learning_rate": 2.970669837495046e-07, + "logits/chosen": -1.7081685066223145, + "logits/rejected": -1.631910800933838, + "logps/chosen": -161.190185546875, + "logps/rejected": -223.7712860107422, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.197986125946045, + "rewards/margins": 8.058090209960938, + "rewards/rejected": -13.25607681274414, + "step": 2609 + }, + { + "epoch": 4.19, + "learning_rate": 2.969678953626635e-07, + "logits/chosen": -1.671266794204712, + "logits/rejected": -1.652588129043579, + "logps/chosen": -107.98770141601562, + "logps/rejected": -214.56912231445312, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.11541748046875, + "rewards/margins": 9.896196365356445, + "rewards/rejected": -14.011613845825195, + "step": 2610 + }, + { + "epoch": 4.19, + "learning_rate": 2.968688069758224e-07, + "logits/chosen": -1.39606773853302, + "logits/rejected": -1.3900809288024902, + "logps/chosen": -125.17507934570312, + "logps/rejected": -218.57386779785156, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4314990043640137, + "rewards/margins": 9.602810859680176, + "rewards/rejected": -13.034310340881348, + "step": 2611 + }, + { + "epoch": 4.19, + "learning_rate": 2.9676971858898135e-07, + "logits/chosen": -1.6833913326263428, + "logits/rejected": -1.681294560432434, + "logps/chosen": -105.56851959228516, + "logps/rejected": -217.75962829589844, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.327134609222412, + "rewards/margins": 10.370965003967285, + "rewards/rejected": -12.698100090026855, + "step": 2612 + }, + { + "epoch": 4.19, + "learning_rate": 2.9667063020214026e-07, + "logits/chosen": -1.5989315509796143, + "logits/rejected": -1.4526102542877197, + "logps/chosen": -164.85728454589844, + "logps/rejected": -224.6356964111328, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.553280830383301, + "rewards/margins": 8.810578346252441, + "rewards/rejected": -14.363859176635742, + "step": 2613 + }, + { + "epoch": 4.2, + "learning_rate": 2.965715418152993e-07, + "logits/chosen": -1.6716057062149048, + "logits/rejected": -1.6490294933319092, + "logps/chosen": -128.25872802734375, + "logps/rejected": -217.73248291015625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.069178104400635, + "rewards/margins": 8.339736938476562, + "rewards/rejected": -15.408914566040039, + "step": 2614 + }, + { + "epoch": 4.2, + "learning_rate": 2.964724534284582e-07, + "logits/chosen": -1.4590779542922974, + "logits/rejected": -1.5756938457489014, + "logps/chosen": -129.34506225585938, + "logps/rejected": -227.45455932617188, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6346306800842285, + "rewards/margins": 8.044358253479004, + "rewards/rejected": -12.67898941040039, + "step": 2615 + }, + { + "epoch": 4.2, + "learning_rate": 2.963733650416171e-07, + "logits/chosen": -1.6908906698226929, + "logits/rejected": -1.766894817352295, + "logps/chosen": -113.42901611328125, + "logps/rejected": -248.98570251464844, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.78451681137085, + "rewards/margins": 10.98194694519043, + "rewards/rejected": -15.766463279724121, + "step": 2616 + }, + { + "epoch": 4.2, + "learning_rate": 2.9627427665477605e-07, + "logits/chosen": -1.600558876991272, + "logits/rejected": -1.5202388763427734, + "logps/chosen": -146.3251190185547, + "logps/rejected": -227.97462463378906, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.558346748352051, + "rewards/margins": 8.172496795654297, + "rewards/rejected": -13.730842590332031, + "step": 2617 + }, + { + "epoch": 4.2, + "learning_rate": 2.9617518826793495e-07, + "logits/chosen": -1.592308759689331, + "logits/rejected": -1.6360368728637695, + "logps/chosen": -120.90931701660156, + "logps/rejected": -184.81846618652344, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.654166221618652, + "rewards/margins": 6.997293472290039, + "rewards/rejected": -11.651460647583008, + "step": 2618 + }, + { + "epoch": 4.2, + "learning_rate": 2.9607609988109397e-07, + "logits/chosen": -1.6177361011505127, + "logits/rejected": -1.6399503946304321, + "logps/chosen": -214.5901641845703, + "logps/rejected": -306.0205078125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.040821075439453, + "rewards/margins": 8.940839767456055, + "rewards/rejected": -18.98166275024414, + "step": 2619 + }, + { + "epoch": 4.21, + "learning_rate": 2.959770114942529e-07, + "logits/chosen": -1.6378214359283447, + "logits/rejected": -1.6378552913665771, + "logps/chosen": -126.52810668945312, + "logps/rejected": -203.5341339111328, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.536712884902954, + "rewards/margins": 8.249670028686523, + "rewards/rejected": -11.786382675170898, + "step": 2620 + }, + { + "epoch": 4.21, + "learning_rate": 2.958779231074118e-07, + "logits/chosen": -1.5713030099868774, + "logits/rejected": -1.5706346035003662, + "logps/chosen": -117.23066711425781, + "logps/rejected": -251.73605346679688, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.104074001312256, + "rewards/margins": 13.040127754211426, + "rewards/rejected": -17.144201278686523, + "step": 2621 + }, + { + "epoch": 4.21, + "learning_rate": 2.9577883472057074e-07, + "logits/chosen": -1.5413975715637207, + "logits/rejected": -1.5913618803024292, + "logps/chosen": -183.6466827392578, + "logps/rejected": -268.5638122558594, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.479085445404053, + "rewards/margins": 6.674108505249023, + "rewards/rejected": -14.153193473815918, + "step": 2622 + }, + { + "epoch": 4.21, + "learning_rate": 2.9567974633372965e-07, + "logits/chosen": -1.6313267946243286, + "logits/rejected": -1.6778851747512817, + "logps/chosen": -102.08729553222656, + "logps/rejected": -215.1317138671875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.39048433303833, + "rewards/margins": 9.797775268554688, + "rewards/rejected": -13.18825912475586, + "step": 2623 + }, + { + "epoch": 4.21, + "learning_rate": 2.9558065794688866e-07, + "logits/chosen": -1.6312676668167114, + "logits/rejected": -1.629957914352417, + "logps/chosen": -145.45391845703125, + "logps/rejected": -226.21234130859375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.25387716293335, + "rewards/margins": 8.332586288452148, + "rewards/rejected": -13.586463928222656, + "step": 2624 + }, + { + "epoch": 4.21, + "learning_rate": 2.9548156956004757e-07, + "logits/chosen": -1.5536497831344604, + "logits/rejected": -1.6098591089248657, + "logps/chosen": -114.69577026367188, + "logps/rejected": -271.6200256347656, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.895942449569702, + "rewards/margins": 12.759693145751953, + "rewards/rejected": -16.655635833740234, + "step": 2625 + }, + { + "epoch": 4.22, + "learning_rate": 2.953824811732065e-07, + "logits/chosen": -1.6172168254852295, + "logits/rejected": -1.5654453039169312, + "logps/chosen": -114.69781494140625, + "logps/rejected": -217.96771240234375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6442770957946777, + "rewards/margins": 11.275846481323242, + "rewards/rejected": -14.920122146606445, + "step": 2626 + }, + { + "epoch": 4.22, + "learning_rate": 2.9528339278636543e-07, + "logits/chosen": -1.6785448789596558, + "logits/rejected": -1.7810044288635254, + "logps/chosen": -139.0729217529297, + "logps/rejected": -254.35446166992188, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.176102638244629, + "rewards/margins": 10.639910697937012, + "rewards/rejected": -15.816012382507324, + "step": 2627 + }, + { + "epoch": 4.22, + "learning_rate": 2.9518430439952434e-07, + "logits/chosen": -1.5462268590927124, + "logits/rejected": -1.58240807056427, + "logps/chosen": -100.69804382324219, + "logps/rejected": -193.3378143310547, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.55835223197937, + "rewards/margins": 10.003695487976074, + "rewards/rejected": -12.562047004699707, + "step": 2628 + }, + { + "epoch": 4.22, + "learning_rate": 2.950852160126833e-07, + "logits/chosen": -1.6230099201202393, + "logits/rejected": -1.5950812101364136, + "logps/chosen": -141.2525177001953, + "logps/rejected": -268.20867919921875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.783052444458008, + "rewards/margins": 11.954780578613281, + "rewards/rejected": -16.73783302307129, + "step": 2629 + }, + { + "epoch": 4.22, + "learning_rate": 2.9498612762584226e-07, + "logits/chosen": -1.4799995422363281, + "logits/rejected": -1.551307201385498, + "logps/chosen": -121.23284912109375, + "logps/rejected": -212.97344970703125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5722405910491943, + "rewards/margins": 8.336201667785645, + "rewards/rejected": -11.908441543579102, + "step": 2630 + }, + { + "epoch": 4.22, + "learning_rate": 2.9488703923900117e-07, + "logits/chosen": -1.402214765548706, + "logits/rejected": -1.4962234497070312, + "logps/chosen": -103.02789306640625, + "logps/rejected": -214.31927490234375, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5429563522338867, + "rewards/margins": 9.565313339233398, + "rewards/rejected": -13.108268737792969, + "step": 2631 + }, + { + "epoch": 4.22, + "learning_rate": 2.9478795085216013e-07, + "logits/chosen": -1.5335835218429565, + "logits/rejected": -1.4797885417938232, + "logps/chosen": -134.79937744140625, + "logps/rejected": -231.7010040283203, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.693737983703613, + "rewards/margins": 9.09432601928711, + "rewards/rejected": -13.788064956665039, + "step": 2632 + }, + { + "epoch": 4.23, + "learning_rate": 2.9468886246531903e-07, + "logits/chosen": -1.4911197423934937, + "logits/rejected": -1.5144861936569214, + "logps/chosen": -139.337646484375, + "logps/rejected": -223.69522094726562, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8994245529174805, + "rewards/margins": 8.586593627929688, + "rewards/rejected": -13.486018180847168, + "step": 2633 + }, + { + "epoch": 4.23, + "learning_rate": 2.94589774078478e-07, + "logits/chosen": -1.4754489660263062, + "logits/rejected": -1.5762310028076172, + "logps/chosen": -145.02239990234375, + "logps/rejected": -256.0067443847656, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.152637004852295, + "rewards/margins": 9.32961368560791, + "rewards/rejected": -14.482251167297363, + "step": 2634 + }, + { + "epoch": 4.23, + "learning_rate": 2.9449068569163695e-07, + "logits/chosen": -1.3841404914855957, + "logits/rejected": -1.5288054943084717, + "logps/chosen": -120.25679779052734, + "logps/rejected": -245.59136962890625, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.515930414199829, + "rewards/margins": 9.74242115020752, + "rewards/rejected": -13.258352279663086, + "step": 2635 + }, + { + "epoch": 4.23, + "learning_rate": 2.9439159730479586e-07, + "logits/chosen": -1.516653060913086, + "logits/rejected": -1.5398778915405273, + "logps/chosen": -156.48106384277344, + "logps/rejected": -294.3954772949219, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.712270736694336, + "rewards/margins": 11.332633972167969, + "rewards/rejected": -18.044906616210938, + "step": 2636 + }, + { + "epoch": 4.23, + "learning_rate": 2.942925089179548e-07, + "logits/chosen": -1.5660320520401, + "logits/rejected": -1.6014647483825684, + "logps/chosen": -116.04778289794922, + "logps/rejected": -222.39625549316406, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5275158882141113, + "rewards/margins": 9.572059631347656, + "rewards/rejected": -13.099575996398926, + "step": 2637 + }, + { + "epoch": 4.23, + "learning_rate": 2.941934205311137e-07, + "logits/chosen": -1.4754986763000488, + "logits/rejected": -1.4315857887268066, + "logps/chosen": -145.16091918945312, + "logps/rejected": -218.84410095214844, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.126506805419922, + "rewards/margins": 8.765851974487305, + "rewards/rejected": -13.892358779907227, + "step": 2638 + }, + { + "epoch": 4.24, + "learning_rate": 2.940943321442727e-07, + "logits/chosen": -1.6263965368270874, + "logits/rejected": -1.635613203048706, + "logps/chosen": -99.49462890625, + "logps/rejected": -173.45802307128906, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7284655570983887, + "rewards/margins": 7.282393455505371, + "rewards/rejected": -11.010858535766602, + "step": 2639 + }, + { + "epoch": 4.24, + "learning_rate": 2.9399524375743165e-07, + "logits/chosen": -1.5491924285888672, + "logits/rejected": -1.636959433555603, + "logps/chosen": -157.48828125, + "logps/rejected": -249.24038696289062, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.92471981048584, + "rewards/margins": 8.82001781463623, + "rewards/rejected": -14.74473762512207, + "step": 2640 + }, + { + "epoch": 4.24, + "learning_rate": 2.9389615537059055e-07, + "logits/chosen": -1.456390380859375, + "logits/rejected": -1.373331904411316, + "logps/chosen": -95.78446197509766, + "logps/rejected": -246.6943817138672, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.001149892807007, + "rewards/margins": 13.160087585449219, + "rewards/rejected": -15.161237716674805, + "step": 2641 + }, + { + "epoch": 4.24, + "learning_rate": 2.937970669837495e-07, + "logits/chosen": -1.4955196380615234, + "logits/rejected": -1.5138293504714966, + "logps/chosen": -123.02764129638672, + "logps/rejected": -233.41293334960938, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5924577713012695, + "rewards/margins": 10.42428207397461, + "rewards/rejected": -15.016739845275879, + "step": 2642 + }, + { + "epoch": 4.24, + "learning_rate": 2.936979785969084e-07, + "logits/chosen": -1.6056244373321533, + "logits/rejected": -1.5511646270751953, + "logps/chosen": -128.62457275390625, + "logps/rejected": -218.11398315429688, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9791293144226074, + "rewards/margins": 9.845979690551758, + "rewards/rejected": -13.825109481811523, + "step": 2643 + }, + { + "epoch": 4.24, + "learning_rate": 2.935988902100673e-07, + "logits/chosen": -1.7495043277740479, + "logits/rejected": -1.7163230180740356, + "logps/chosen": -148.57826232910156, + "logps/rejected": -216.97166442871094, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.505868434906006, + "rewards/margins": 7.896707057952881, + "rewards/rejected": -13.402575492858887, + "step": 2644 + }, + { + "epoch": 4.25, + "learning_rate": 2.934998018232263e-07, + "logits/chosen": -1.5667736530303955, + "logits/rejected": -1.420120120048523, + "logps/chosen": -136.36166381835938, + "logps/rejected": -195.2672576904297, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5667595863342285, + "rewards/margins": 8.515351295471191, + "rewards/rejected": -11.082110404968262, + "step": 2645 + }, + { + "epoch": 4.25, + "learning_rate": 2.9340071343638525e-07, + "logits/chosen": -1.5338914394378662, + "logits/rejected": -1.452678918838501, + "logps/chosen": -129.51490783691406, + "logps/rejected": -164.97705078125, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.491064071655273, + "rewards/margins": 5.408830642700195, + "rewards/rejected": -9.899894714355469, + "step": 2646 + }, + { + "epoch": 4.25, + "learning_rate": 2.933016250495442e-07, + "logits/chosen": -1.5446820259094238, + "logits/rejected": -1.6063745021820068, + "logps/chosen": -155.09190368652344, + "logps/rejected": -240.429931640625, + "loss": 0.0188, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7944655418396, + "rewards/margins": 6.286945343017578, + "rewards/rejected": -13.08141040802002, + "step": 2647 + }, + { + "epoch": 4.25, + "learning_rate": 2.932025366627031e-07, + "logits/chosen": -1.551790714263916, + "logits/rejected": -1.574445366859436, + "logps/chosen": -131.30039978027344, + "logps/rejected": -252.68038940429688, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.639746189117432, + "rewards/margins": 11.976330757141113, + "rewards/rejected": -16.616077423095703, + "step": 2648 + }, + { + "epoch": 4.25, + "learning_rate": 2.93103448275862e-07, + "logits/chosen": -1.4791717529296875, + "logits/rejected": -1.5024423599243164, + "logps/chosen": -141.19644165039062, + "logps/rejected": -222.9124755859375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0868330001831055, + "rewards/margins": 10.219091415405273, + "rewards/rejected": -15.305925369262695, + "step": 2649 + }, + { + "epoch": 4.25, + "learning_rate": 2.93004359889021e-07, + "logits/chosen": -1.52309250831604, + "logits/rejected": -1.6297481060028076, + "logps/chosen": -137.26234436035156, + "logps/rejected": -244.86785888671875, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.194770812988281, + "rewards/margins": 8.577580451965332, + "rewards/rejected": -13.77235221862793, + "step": 2650 + }, + { + "epoch": 4.26, + "learning_rate": 2.9290527150217994e-07, + "logits/chosen": -1.6286351680755615, + "logits/rejected": -1.6019634008407593, + "logps/chosen": -112.03473663330078, + "logps/rejected": -215.87948608398438, + "loss": 0.0891, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6193580627441406, + "rewards/margins": 10.22908878326416, + "rewards/rejected": -13.8484468460083, + "step": 2651 + }, + { + "epoch": 4.26, + "learning_rate": 2.928061831153389e-07, + "logits/chosen": -1.4716320037841797, + "logits/rejected": -1.4713270664215088, + "logps/chosen": -133.9058837890625, + "logps/rejected": -208.60305786132812, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.601775646209717, + "rewards/margins": 9.07995891571045, + "rewards/rejected": -13.681734085083008, + "step": 2652 + }, + { + "epoch": 4.26, + "learning_rate": 2.927070947284978e-07, + "logits/chosen": -1.5490361452102661, + "logits/rejected": -1.5907597541809082, + "logps/chosen": -113.91609954833984, + "logps/rejected": -196.2628631591797, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.902942657470703, + "rewards/margins": 7.6152167320251465, + "rewards/rejected": -11.518158912658691, + "step": 2653 + }, + { + "epoch": 4.26, + "learning_rate": 2.926080063416567e-07, + "logits/chosen": -1.6606957912445068, + "logits/rejected": -1.67069673538208, + "logps/chosen": -138.542236328125, + "logps/rejected": -228.2842254638672, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.081847667694092, + "rewards/margins": 8.938965797424316, + "rewards/rejected": -14.020814895629883, + "step": 2654 + }, + { + "epoch": 4.26, + "learning_rate": 2.9250891795481567e-07, + "logits/chosen": -1.7181811332702637, + "logits/rejected": -1.7411872148513794, + "logps/chosen": -94.76527404785156, + "logps/rejected": -194.21701049804688, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1614432334899902, + "rewards/margins": 9.150632858276367, + "rewards/rejected": -12.312076568603516, + "step": 2655 + }, + { + "epoch": 4.26, + "learning_rate": 2.9240982956797463e-07, + "logits/chosen": -1.6080694198608398, + "logits/rejected": -1.6008875370025635, + "logps/chosen": -138.1259307861328, + "logps/rejected": -229.15032958984375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.360541820526123, + "rewards/margins": 8.205428123474121, + "rewards/rejected": -12.565970420837402, + "step": 2656 + }, + { + "epoch": 4.26, + "learning_rate": 2.923107411811336e-07, + "logits/chosen": -1.5129817724227905, + "logits/rejected": -1.6830817461013794, + "logps/chosen": -132.26953125, + "logps/rejected": -238.1708526611328, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.526209354400635, + "rewards/margins": 9.413996696472168, + "rewards/rejected": -13.940205574035645, + "step": 2657 + }, + { + "epoch": 4.27, + "learning_rate": 2.922116527942925e-07, + "logits/chosen": -1.6018728017807007, + "logits/rejected": -1.7031590938568115, + "logps/chosen": -100.26914978027344, + "logps/rejected": -240.414794921875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7641754150390625, + "rewards/margins": 11.135849952697754, + "rewards/rejected": -14.900023460388184, + "step": 2658 + }, + { + "epoch": 4.27, + "learning_rate": 2.921125644074514e-07, + "logits/chosen": -1.649741291999817, + "logits/rejected": -1.6461081504821777, + "logps/chosen": -184.20306396484375, + "logps/rejected": -252.39208984375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.027582168579102, + "rewards/margins": 7.404205799102783, + "rewards/rejected": -14.431788444519043, + "step": 2659 + }, + { + "epoch": 4.27, + "learning_rate": 2.9201347602061036e-07, + "logits/chosen": -1.4926331043243408, + "logits/rejected": -1.5130314826965332, + "logps/chosen": -138.96786499023438, + "logps/rejected": -208.95726013183594, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.880455017089844, + "rewards/margins": 6.160386085510254, + "rewards/rejected": -12.040841102600098, + "step": 2660 + }, + { + "epoch": 4.27, + "learning_rate": 2.919143876337693e-07, + "logits/chosen": -1.600313425064087, + "logits/rejected": -1.5796160697937012, + "logps/chosen": -119.24134826660156, + "logps/rejected": -262.1336364746094, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.269399642944336, + "rewards/margins": 13.468500137329102, + "rewards/rejected": -17.737899780273438, + "step": 2661 + }, + { + "epoch": 4.27, + "learning_rate": 2.918152992469283e-07, + "logits/chosen": -1.609178066253662, + "logits/rejected": -1.552290439605713, + "logps/chosen": -130.94107055664062, + "logps/rejected": -240.32562255859375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.87797212600708, + "rewards/margins": 11.946358680725098, + "rewards/rejected": -15.82433032989502, + "step": 2662 + }, + { + "epoch": 4.27, + "learning_rate": 2.917162108600872e-07, + "logits/chosen": -1.5622427463531494, + "logits/rejected": -1.5343575477600098, + "logps/chosen": -132.42083740234375, + "logps/rejected": -228.65069580078125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.777279853820801, + "rewards/margins": 9.582925796508789, + "rewards/rejected": -14.360206604003906, + "step": 2663 + }, + { + "epoch": 4.28, + "learning_rate": 2.916171224732461e-07, + "logits/chosen": -1.6382904052734375, + "logits/rejected": -1.6905285120010376, + "logps/chosen": -91.34497833251953, + "logps/rejected": -185.57675170898438, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5845413208007812, + "rewards/margins": 8.893877029418945, + "rewards/rejected": -12.478418350219727, + "step": 2664 + }, + { + "epoch": 4.28, + "learning_rate": 2.9151803408640506e-07, + "logits/chosen": -1.6895217895507812, + "logits/rejected": -1.6123977899551392, + "logps/chosen": -121.86882019042969, + "logps/rejected": -232.92303466796875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.559264898300171, + "rewards/margins": 11.711341857910156, + "rewards/rejected": -15.270607948303223, + "step": 2665 + }, + { + "epoch": 4.28, + "learning_rate": 2.9141894569956396e-07, + "logits/chosen": -1.761647343635559, + "logits/rejected": -1.6982779502868652, + "logps/chosen": -151.42587280273438, + "logps/rejected": -246.1985626220703, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.919920444488525, + "rewards/margins": 10.464000701904297, + "rewards/rejected": -15.383922576904297, + "step": 2666 + }, + { + "epoch": 4.28, + "learning_rate": 2.91319857312723e-07, + "logits/chosen": -1.484360933303833, + "logits/rejected": -1.5196475982666016, + "logps/chosen": -109.69369506835938, + "logps/rejected": -205.6442413330078, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9167914390563965, + "rewards/margins": 8.419023513793945, + "rewards/rejected": -12.3358154296875, + "step": 2667 + }, + { + "epoch": 4.28, + "learning_rate": 2.912207689258819e-07, + "logits/chosen": -1.5144636631011963, + "logits/rejected": -1.4715849161148071, + "logps/chosen": -135.67945861816406, + "logps/rejected": -212.42593383789062, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.233280181884766, + "rewards/margins": 8.244329452514648, + "rewards/rejected": -13.477609634399414, + "step": 2668 + }, + { + "epoch": 4.28, + "learning_rate": 2.911216805390408e-07, + "logits/chosen": -1.4240036010742188, + "logits/rejected": -1.5557358264923096, + "logps/chosen": -148.37576293945312, + "logps/rejected": -259.8450622558594, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.360055923461914, + "rewards/margins": 9.257190704345703, + "rewards/rejected": -14.6172456741333, + "step": 2669 + }, + { + "epoch": 4.29, + "learning_rate": 2.9102259215219975e-07, + "logits/chosen": -1.5485775470733643, + "logits/rejected": -1.6267142295837402, + "logps/chosen": -129.58853149414062, + "logps/rejected": -246.10208129882812, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.617799758911133, + "rewards/margins": 7.321616172790527, + "rewards/rejected": -10.939414978027344, + "step": 2670 + }, + { + "epoch": 4.29, + "learning_rate": 2.9092350376535866e-07, + "logits/chosen": -1.755169153213501, + "logits/rejected": -1.7670153379440308, + "logps/chosen": -109.71975708007812, + "logps/rejected": -269.9612121582031, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9194440841674805, + "rewards/margins": 14.892406463623047, + "rewards/rejected": -17.81184959411621, + "step": 2671 + }, + { + "epoch": 4.29, + "learning_rate": 2.9082441537851767e-07, + "logits/chosen": -1.4579756259918213, + "logits/rejected": -1.4883406162261963, + "logps/chosen": -164.52708435058594, + "logps/rejected": -243.89007568359375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.527739524841309, + "rewards/margins": 9.14558219909668, + "rewards/rejected": -13.673321723937988, + "step": 2672 + }, + { + "epoch": 4.29, + "learning_rate": 2.907253269916766e-07, + "logits/chosen": -1.534324049949646, + "logits/rejected": -1.5034745931625366, + "logps/chosen": -135.1552276611328, + "logps/rejected": -257.4395751953125, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7654848098754883, + "rewards/margins": 13.033981323242188, + "rewards/rejected": -16.79946517944336, + "step": 2673 + }, + { + "epoch": 4.29, + "learning_rate": 2.906262386048355e-07, + "logits/chosen": -1.5323336124420166, + "logits/rejected": -1.512731909751892, + "logps/chosen": -128.73117065429688, + "logps/rejected": -254.79498291015625, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.788881778717041, + "rewards/margins": 12.37465763092041, + "rewards/rejected": -17.16353988647461, + "step": 2674 + }, + { + "epoch": 4.29, + "learning_rate": 2.9052715021799444e-07, + "logits/chosen": -1.7603925466537476, + "logits/rejected": -1.6577562093734741, + "logps/chosen": -123.3717041015625, + "logps/rejected": -151.15402221679688, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.863552451133728, + "rewards/margins": 7.311705112457275, + "rewards/rejected": -9.175257682800293, + "step": 2675 + }, + { + "epoch": 4.3, + "learning_rate": 2.9042806183115335e-07, + "logits/chosen": -1.587792158126831, + "logits/rejected": -1.5856435298919678, + "logps/chosen": -118.47705841064453, + "logps/rejected": -229.99237060546875, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7816171646118164, + "rewards/margins": 9.666367530822754, + "rewards/rejected": -13.44798469543457, + "step": 2676 + }, + { + "epoch": 4.3, + "learning_rate": 2.903289734443123e-07, + "logits/chosen": -1.7034218311309814, + "logits/rejected": -1.71919846534729, + "logps/chosen": -109.50798797607422, + "logps/rejected": -209.8534698486328, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9680557250976562, + "rewards/margins": 10.340243339538574, + "rewards/rejected": -13.308300018310547, + "step": 2677 + }, + { + "epoch": 4.3, + "learning_rate": 2.9022988505747127e-07, + "logits/chosen": -1.6507941484451294, + "logits/rejected": -1.6460776329040527, + "logps/chosen": -137.9029998779297, + "logps/rejected": -254.79286193847656, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6089887619018555, + "rewards/margins": 11.018564224243164, + "rewards/rejected": -15.627553939819336, + "step": 2678 + }, + { + "epoch": 4.3, + "learning_rate": 2.901307966706302e-07, + "logits/chosen": -1.5858436822891235, + "logits/rejected": -1.6932224035263062, + "logps/chosen": -127.51107025146484, + "logps/rejected": -242.91448974609375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.389698028564453, + "rewards/margins": 9.750028610229492, + "rewards/rejected": -16.139726638793945, + "step": 2679 + }, + { + "epoch": 4.3, + "learning_rate": 2.9003170828378914e-07, + "logits/chosen": -1.5720397233963013, + "logits/rejected": -1.5701583623886108, + "logps/chosen": -121.60831451416016, + "logps/rejected": -254.4521942138672, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.35711669921875, + "rewards/margins": 11.709243774414062, + "rewards/rejected": -17.066360473632812, + "step": 2680 + }, + { + "epoch": 4.3, + "learning_rate": 2.8993261989694804e-07, + "logits/chosen": -1.5037003755569458, + "logits/rejected": -1.3988037109375, + "logps/chosen": -146.1622314453125, + "logps/rejected": -215.40614318847656, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.389657974243164, + "rewards/margins": 10.063405990600586, + "rewards/rejected": -14.453062057495117, + "step": 2681 + }, + { + "epoch": 4.3, + "learning_rate": 2.8983353151010695e-07, + "logits/chosen": -1.532509684562683, + "logits/rejected": -1.5747215747833252, + "logps/chosen": -128.59323120117188, + "logps/rejected": -233.13461303710938, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.497270107269287, + "rewards/margins": 10.917905807495117, + "rewards/rejected": -16.415176391601562, + "step": 2682 + }, + { + "epoch": 4.31, + "learning_rate": 2.8973444312326596e-07, + "logits/chosen": -1.641840934753418, + "logits/rejected": -1.6387147903442383, + "logps/chosen": -120.13775634765625, + "logps/rejected": -244.57659912109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3169946670532227, + "rewards/margins": 13.085742950439453, + "rewards/rejected": -16.402738571166992, + "step": 2683 + }, + { + "epoch": 4.31, + "learning_rate": 2.8963535473642487e-07, + "logits/chosen": -1.5707632303237915, + "logits/rejected": -1.5770695209503174, + "logps/chosen": -120.60309600830078, + "logps/rejected": -213.22705078125, + "loss": 0.0499, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.438572883605957, + "rewards/margins": 9.825907707214355, + "rewards/rejected": -13.264481544494629, + "step": 2684 + }, + { + "epoch": 4.31, + "learning_rate": 2.8953626634958383e-07, + "logits/chosen": -1.4449576139450073, + "logits/rejected": -1.464577555656433, + "logps/chosen": -125.71534729003906, + "logps/rejected": -268.1496887207031, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.71002721786499, + "rewards/margins": 11.611729621887207, + "rewards/rejected": -16.321758270263672, + "step": 2685 + }, + { + "epoch": 4.31, + "learning_rate": 2.8943717796274274e-07, + "logits/chosen": -1.7275912761688232, + "logits/rejected": -1.7570582628250122, + "logps/chosen": -140.91827392578125, + "logps/rejected": -243.6029052734375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.284562110900879, + "rewards/margins": 8.819369316101074, + "rewards/rejected": -14.103931427001953, + "step": 2686 + }, + { + "epoch": 4.31, + "learning_rate": 2.8933808957590164e-07, + "logits/chosen": -1.4731565713882446, + "logits/rejected": -1.5452535152435303, + "logps/chosen": -127.87910461425781, + "logps/rejected": -200.79429626464844, + "loss": 0.018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.215612411499023, + "rewards/margins": 6.443398475646973, + "rewards/rejected": -10.659010887145996, + "step": 2687 + }, + { + "epoch": 4.31, + "learning_rate": 2.8923900118906066e-07, + "logits/chosen": -1.526278018951416, + "logits/rejected": -1.6020742654800415, + "logps/chosen": -140.69908142089844, + "logps/rejected": -270.2707824707031, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.121748447418213, + "rewards/margins": 11.946529388427734, + "rewards/rejected": -19.06827735900879, + "step": 2688 + }, + { + "epoch": 4.32, + "learning_rate": 2.8913991280221956e-07, + "logits/chosen": -1.6483566761016846, + "logits/rejected": -1.6944273710250854, + "logps/chosen": -133.17637634277344, + "logps/rejected": -231.50006103515625, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.576644420623779, + "rewards/margins": 9.614761352539062, + "rewards/rejected": -14.191404342651367, + "step": 2689 + }, + { + "epoch": 4.32, + "learning_rate": 2.890408244153785e-07, + "logits/chosen": -1.6106691360473633, + "logits/rejected": -1.6255688667297363, + "logps/chosen": -126.68218231201172, + "logps/rejected": -179.02589416503906, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.230224847793579, + "rewards/margins": 7.629091739654541, + "rewards/rejected": -10.8593168258667, + "step": 2690 + }, + { + "epoch": 4.32, + "learning_rate": 2.8894173602853743e-07, + "logits/chosen": -1.6046873331069946, + "logits/rejected": -1.6035925149917603, + "logps/chosen": -139.38291931152344, + "logps/rejected": -230.37042236328125, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.214730739593506, + "rewards/margins": 9.895200729370117, + "rewards/rejected": -13.109931945800781, + "step": 2691 + }, + { + "epoch": 4.32, + "learning_rate": 2.8884264764169634e-07, + "logits/chosen": -1.5859174728393555, + "logits/rejected": -1.4609119892120361, + "logps/chosen": -181.66009521484375, + "logps/rejected": -233.13478088378906, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.891443729400635, + "rewards/margins": 6.592597961425781, + "rewards/rejected": -13.484041213989258, + "step": 2692 + }, + { + "epoch": 4.32, + "learning_rate": 2.8874355925485535e-07, + "logits/chosen": -1.6162762641906738, + "logits/rejected": -1.4732496738433838, + "logps/chosen": -148.00950622558594, + "logps/rejected": -233.1619415283203, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.830533981323242, + "rewards/margins": 10.208687782287598, + "rewards/rejected": -15.039223670959473, + "step": 2693 + }, + { + "epoch": 4.32, + "learning_rate": 2.8864447086801426e-07, + "logits/chosen": -1.6242276430130005, + "logits/rejected": -1.814460277557373, + "logps/chosen": -104.81797790527344, + "logps/rejected": -196.026123046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.424649238586426, + "rewards/margins": 7.702243328094482, + "rewards/rejected": -11.12689208984375, + "step": 2694 + }, + { + "epoch": 4.33, + "learning_rate": 2.885453824811732e-07, + "logits/chosen": -1.5530041456222534, + "logits/rejected": -1.4485989809036255, + "logps/chosen": -161.65785217285156, + "logps/rejected": -230.1293182373047, + "loss": 0.0105, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.401159763336182, + "rewards/margins": 9.867533683776855, + "rewards/rejected": -14.268693923950195, + "step": 2695 + }, + { + "epoch": 4.33, + "learning_rate": 2.884462940943321e-07, + "logits/chosen": -1.5990560054779053, + "logits/rejected": -1.5892136096954346, + "logps/chosen": -75.73429107666016, + "logps/rejected": -174.43191528320312, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1946336030960083, + "rewards/margins": 9.81296443939209, + "rewards/rejected": -11.007597923278809, + "step": 2696 + }, + { + "epoch": 4.33, + "learning_rate": 2.8834720570749103e-07, + "logits/chosen": -1.5873782634735107, + "logits/rejected": -1.5689404010772705, + "logps/chosen": -147.67227172851562, + "logps/rejected": -242.33389282226562, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.765568256378174, + "rewards/margins": 11.271584510803223, + "rewards/rejected": -16.037153244018555, + "step": 2697 + }, + { + "epoch": 4.33, + "learning_rate": 2.8824811732065004e-07, + "logits/chosen": -1.5447468757629395, + "logits/rejected": -1.6158127784729004, + "logps/chosen": -147.7467498779297, + "logps/rejected": -248.13890075683594, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.734414577484131, + "rewards/margins": 9.923940658569336, + "rewards/rejected": -14.658355712890625, + "step": 2698 + }, + { + "epoch": 4.33, + "learning_rate": 2.8814902893380895e-07, + "logits/chosen": -1.6217620372772217, + "logits/rejected": -1.6358007192611694, + "logps/chosen": -119.26307678222656, + "logps/rejected": -193.7361602783203, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.936152935028076, + "rewards/margins": 6.994050979614258, + "rewards/rejected": -10.930203437805176, + "step": 2699 + }, + { + "epoch": 4.33, + "learning_rate": 2.880499405469679e-07, + "logits/chosen": -1.7457430362701416, + "logits/rejected": -1.7495160102844238, + "logps/chosen": -92.64984130859375, + "logps/rejected": -185.75360107421875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1108267307281494, + "rewards/margins": 8.952719688415527, + "rewards/rejected": -11.063547134399414, + "step": 2700 + }, + { + "epoch": 4.34, + "learning_rate": 2.879508521601268e-07, + "logits/chosen": -1.689592957496643, + "logits/rejected": -1.6231684684753418, + "logps/chosen": -167.611083984375, + "logps/rejected": -251.2779998779297, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3698248863220215, + "rewards/margins": 9.618505477905273, + "rewards/rejected": -15.988329887390137, + "step": 2701 + }, + { + "epoch": 4.34, + "learning_rate": 2.878517637732857e-07, + "logits/chosen": -1.5911628007888794, + "logits/rejected": -1.6211574077606201, + "logps/chosen": -123.07876586914062, + "logps/rejected": -247.23683166503906, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.087737560272217, + "rewards/margins": 10.925151824951172, + "rewards/rejected": -15.012889862060547, + "step": 2702 + }, + { + "epoch": 4.34, + "learning_rate": 2.8775267538644473e-07, + "logits/chosen": -1.7623565196990967, + "logits/rejected": -1.7019540071487427, + "logps/chosen": -115.10385131835938, + "logps/rejected": -217.34971618652344, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.661360025405884, + "rewards/margins": 10.460493087768555, + "rewards/rejected": -14.12185287475586, + "step": 2703 + }, + { + "epoch": 4.34, + "learning_rate": 2.8765358699960364e-07, + "logits/chosen": -1.4666502475738525, + "logits/rejected": -1.465287446975708, + "logps/chosen": -130.3800506591797, + "logps/rejected": -219.73854064941406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.500557899475098, + "rewards/margins": 9.050395011901855, + "rewards/rejected": -14.55095386505127, + "step": 2704 + }, + { + "epoch": 4.34, + "learning_rate": 2.875544986127626e-07, + "logits/chosen": -1.509737491607666, + "logits/rejected": -1.516382098197937, + "logps/chosen": -125.42378234863281, + "logps/rejected": -219.41705322265625, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.027798652648926, + "rewards/margins": 9.60063362121582, + "rewards/rejected": -13.62843132019043, + "step": 2705 + }, + { + "epoch": 4.34, + "learning_rate": 2.874554102259215e-07, + "logits/chosen": -1.7992517948150635, + "logits/rejected": -1.6944160461425781, + "logps/chosen": -109.88286590576172, + "logps/rejected": -209.97201538085938, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0165963172912598, + "rewards/margins": 11.080083847045898, + "rewards/rejected": -14.096678733825684, + "step": 2706 + }, + { + "epoch": 4.35, + "learning_rate": 2.873563218390804e-07, + "logits/chosen": -1.5950607061386108, + "logits/rejected": -1.6680513620376587, + "logps/chosen": -129.33627319335938, + "logps/rejected": -234.19615173339844, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1628258228302, + "rewards/margins": 10.332077026367188, + "rewards/rejected": -13.494901657104492, + "step": 2707 + }, + { + "epoch": 4.35, + "learning_rate": 2.872572334522394e-07, + "logits/chosen": -1.6945271492004395, + "logits/rejected": -1.7065246105194092, + "logps/chosen": -150.19375610351562, + "logps/rejected": -226.61611938476562, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7568464279174805, + "rewards/margins": 9.798002243041992, + "rewards/rejected": -13.554847717285156, + "step": 2708 + }, + { + "epoch": 4.35, + "learning_rate": 2.8715814506539833e-07, + "logits/chosen": -1.7407609224319458, + "logits/rejected": -1.6673810482025146, + "logps/chosen": -171.8336181640625, + "logps/rejected": -236.2877960205078, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.680035591125488, + "rewards/margins": 9.95743179321289, + "rewards/rejected": -15.637468338012695, + "step": 2709 + }, + { + "epoch": 4.35, + "learning_rate": 2.870590566785573e-07, + "logits/chosen": -1.5881246328353882, + "logits/rejected": -1.6654787063598633, + "logps/chosen": -120.13629150390625, + "logps/rejected": -242.16006469726562, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.322103500366211, + "rewards/margins": 9.560227394104004, + "rewards/rejected": -13.882329940795898, + "step": 2710 + }, + { + "epoch": 4.35, + "learning_rate": 2.869599682917162e-07, + "logits/chosen": -1.5644659996032715, + "logits/rejected": -1.5354502201080322, + "logps/chosen": -176.0826416015625, + "logps/rejected": -255.49224853515625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.151700019836426, + "rewards/margins": 8.524535179138184, + "rewards/rejected": -15.67623519897461, + "step": 2711 + }, + { + "epoch": 4.35, + "learning_rate": 2.868608799048751e-07, + "logits/chosen": -1.4831297397613525, + "logits/rejected": -1.5800228118896484, + "logps/chosen": -125.56117248535156, + "logps/rejected": -268.6870422363281, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.863163471221924, + "rewards/margins": 12.56589126586914, + "rewards/rejected": -16.429054260253906, + "step": 2712 + }, + { + "epoch": 4.35, + "learning_rate": 2.8676179151803407e-07, + "logits/chosen": -1.7188048362731934, + "logits/rejected": -1.8612301349639893, + "logps/chosen": -113.70161437988281, + "logps/rejected": -264.95050048828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.367321968078613, + "rewards/margins": 13.113808631896973, + "rewards/rejected": -17.481130599975586, + "step": 2713 + }, + { + "epoch": 4.36, + "learning_rate": 2.8666270313119303e-07, + "logits/chosen": -1.57486093044281, + "logits/rejected": -1.533722162246704, + "logps/chosen": -110.47323608398438, + "logps/rejected": -252.98753356933594, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.601809501647949, + "rewards/margins": 13.707547187805176, + "rewards/rejected": -17.309356689453125, + "step": 2714 + }, + { + "epoch": 4.36, + "learning_rate": 2.8656361474435193e-07, + "logits/chosen": -1.54884934425354, + "logits/rejected": -1.5179437398910522, + "logps/chosen": -117.19139099121094, + "logps/rejected": -229.16049194335938, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.040502548217773, + "rewards/margins": 10.593888282775879, + "rewards/rejected": -14.634389877319336, + "step": 2715 + }, + { + "epoch": 4.36, + "learning_rate": 2.864645263575109e-07, + "logits/chosen": -1.3936482667922974, + "logits/rejected": -1.3749257326126099, + "logps/chosen": -137.77694702148438, + "logps/rejected": -229.060791015625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.234468698501587, + "rewards/margins": 9.033712387084961, + "rewards/rejected": -12.268180847167969, + "step": 2716 + }, + { + "epoch": 4.36, + "learning_rate": 2.863654379706698e-07, + "logits/chosen": -1.5513296127319336, + "logits/rejected": -1.5265400409698486, + "logps/chosen": -121.88670349121094, + "logps/rejected": -242.40025329589844, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.923501014709473, + "rewards/margins": 10.814271926879883, + "rewards/rejected": -16.737773895263672, + "step": 2717 + }, + { + "epoch": 4.36, + "learning_rate": 2.8626634958382876e-07, + "logits/chosen": -1.5112066268920898, + "logits/rejected": -1.5811978578567505, + "logps/chosen": -145.32713317871094, + "logps/rejected": -279.7803955078125, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.312545299530029, + "rewards/margins": 13.130022048950195, + "rewards/rejected": -18.442567825317383, + "step": 2718 + }, + { + "epoch": 4.36, + "learning_rate": 2.861672611969877e-07, + "logits/chosen": -1.43589186668396, + "logits/rejected": -1.5404096841812134, + "logps/chosen": -105.24784851074219, + "logps/rejected": -215.0416717529297, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.716059446334839, + "rewards/margins": 10.233918190002441, + "rewards/rejected": -13.94997787475586, + "step": 2719 + }, + { + "epoch": 4.37, + "learning_rate": 2.8606817281014663e-07, + "logits/chosen": -1.604697585105896, + "logits/rejected": -1.574940800666809, + "logps/chosen": -141.23324584960938, + "logps/rejected": -254.27012634277344, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.838027477264404, + "rewards/margins": 9.745918273925781, + "rewards/rejected": -14.583945274353027, + "step": 2720 + }, + { + "epoch": 4.37, + "learning_rate": 2.859690844233056e-07, + "logits/chosen": -1.6449010372161865, + "logits/rejected": -1.6311697959899902, + "logps/chosen": -144.22784423828125, + "logps/rejected": -230.12008666992188, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7527947425842285, + "rewards/margins": 8.402633666992188, + "rewards/rejected": -13.155428886413574, + "step": 2721 + }, + { + "epoch": 4.37, + "learning_rate": 2.858699960364645e-07, + "logits/chosen": -1.820873737335205, + "logits/rejected": -1.7669110298156738, + "logps/chosen": -102.68189239501953, + "logps/rejected": -216.33917236328125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.39007830619812, + "rewards/margins": 10.833419799804688, + "rewards/rejected": -14.223499298095703, + "step": 2722 + }, + { + "epoch": 4.37, + "learning_rate": 2.8577090764962345e-07, + "logits/chosen": -1.5361204147338867, + "logits/rejected": -1.6005632877349854, + "logps/chosen": -145.8508758544922, + "logps/rejected": -254.4680633544922, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.688516616821289, + "rewards/margins": 8.850040435791016, + "rewards/rejected": -14.538557052612305, + "step": 2723 + }, + { + "epoch": 4.37, + "learning_rate": 2.856718192627824e-07, + "logits/chosen": -1.5622220039367676, + "logits/rejected": -1.5475040674209595, + "logps/chosen": -131.94325256347656, + "logps/rejected": -197.24072265625, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.540065288543701, + "rewards/margins": 8.749982833862305, + "rewards/rejected": -12.290048599243164, + "step": 2724 + }, + { + "epoch": 4.37, + "learning_rate": 2.855727308759413e-07, + "logits/chosen": -1.6205891370773315, + "logits/rejected": -1.672995686531067, + "logps/chosen": -129.09080505371094, + "logps/rejected": -222.75900268554688, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.210233211517334, + "rewards/margins": 9.428805351257324, + "rewards/rejected": -12.639039039611816, + "step": 2725 + }, + { + "epoch": 4.38, + "learning_rate": 2.854736424891003e-07, + "logits/chosen": -1.4986212253570557, + "logits/rejected": -1.488057255744934, + "logps/chosen": -172.02178955078125, + "logps/rejected": -246.6441650390625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.190101623535156, + "rewards/margins": 7.754901885986328, + "rewards/rejected": -14.945003509521484, + "step": 2726 + }, + { + "epoch": 4.38, + "learning_rate": 2.853745541022592e-07, + "logits/chosen": -1.6370593309402466, + "logits/rejected": -1.5723285675048828, + "logps/chosen": -162.03515625, + "logps/rejected": -211.7146453857422, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.955228328704834, + "rewards/margins": 7.85566520690918, + "rewards/rejected": -13.810894012451172, + "step": 2727 + }, + { + "epoch": 4.38, + "learning_rate": 2.8527546571541815e-07, + "logits/chosen": -1.4537222385406494, + "logits/rejected": -1.5885872840881348, + "logps/chosen": -135.88449096679688, + "logps/rejected": -263.41925048828125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1324381828308105, + "rewards/margins": 11.385358810424805, + "rewards/rejected": -15.517796516418457, + "step": 2728 + }, + { + "epoch": 4.38, + "learning_rate": 2.8517637732857705e-07, + "logits/chosen": -1.5613166093826294, + "logits/rejected": -1.5919588804244995, + "logps/chosen": -101.88641357421875, + "logps/rejected": -179.1001739501953, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.185787677764893, + "rewards/margins": 8.363441467285156, + "rewards/rejected": -12.549229621887207, + "step": 2729 + }, + { + "epoch": 4.38, + "learning_rate": 2.85077288941736e-07, + "logits/chosen": -1.6459912061691284, + "logits/rejected": -1.6963763236999512, + "logps/chosen": -121.8833236694336, + "logps/rejected": -254.6198272705078, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.575231552124023, + "rewards/margins": 10.258979797363281, + "rewards/rejected": -14.834212303161621, + "step": 2730 + }, + { + "epoch": 4.38, + "learning_rate": 2.8497820055489497e-07, + "logits/chosen": -1.4920313358306885, + "logits/rejected": -1.5534296035766602, + "logps/chosen": -136.25750732421875, + "logps/rejected": -225.75994873046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.56334114074707, + "rewards/margins": 8.823334693908691, + "rewards/rejected": -13.386674880981445, + "step": 2731 + }, + { + "epoch": 4.39, + "learning_rate": 2.848791121680539e-07, + "logits/chosen": -1.5881024599075317, + "logits/rejected": -1.555364727973938, + "logps/chosen": -124.42652893066406, + "logps/rejected": -225.83660888671875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8523130416870117, + "rewards/margins": 11.03785228729248, + "rewards/rejected": -13.890165328979492, + "step": 2732 + }, + { + "epoch": 4.39, + "learning_rate": 2.8478002378121284e-07, + "logits/chosen": -1.5217175483703613, + "logits/rejected": -1.4797708988189697, + "logps/chosen": -144.7264404296875, + "logps/rejected": -249.1586151123047, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.470658779144287, + "rewards/margins": 11.141939163208008, + "rewards/rejected": -17.612598419189453, + "step": 2733 + }, + { + "epoch": 4.39, + "learning_rate": 2.8468093539437175e-07, + "logits/chosen": -1.5542359352111816, + "logits/rejected": -1.586277723312378, + "logps/chosen": -150.18081665039062, + "logps/rejected": -232.5347137451172, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.495151996612549, + "rewards/margins": 9.450002670288086, + "rewards/rejected": -14.945154190063477, + "step": 2734 + }, + { + "epoch": 4.39, + "learning_rate": 2.845818470075307e-07, + "logits/chosen": -1.7437036037445068, + "logits/rejected": -1.8106427192687988, + "logps/chosen": -127.38957977294922, + "logps/rejected": -236.47662353515625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.075035572052002, + "rewards/margins": 10.382805824279785, + "rewards/rejected": -15.457841873168945, + "step": 2735 + }, + { + "epoch": 4.39, + "learning_rate": 2.8448275862068967e-07, + "logits/chosen": -1.5717424154281616, + "logits/rejected": -1.5086891651153564, + "logps/chosen": -127.60755920410156, + "logps/rejected": -240.640380859375, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.413602828979492, + "rewards/margins": 12.253816604614258, + "rewards/rejected": -16.66741943359375, + "step": 2736 + }, + { + "epoch": 4.39, + "learning_rate": 2.8438367023384857e-07, + "logits/chosen": -1.7324081659317017, + "logits/rejected": -1.6997135877609253, + "logps/chosen": -156.7826690673828, + "logps/rejected": -249.06898498535156, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.938941478729248, + "rewards/margins": 9.959733963012695, + "rewards/rejected": -14.898675918579102, + "step": 2737 + }, + { + "epoch": 4.39, + "learning_rate": 2.8428458184700753e-07, + "logits/chosen": -1.5725185871124268, + "logits/rejected": -1.603454828262329, + "logps/chosen": -94.85633850097656, + "logps/rejected": -222.597412109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2955291271209717, + "rewards/margins": 12.444089889526367, + "rewards/rejected": -15.739618301391602, + "step": 2738 + }, + { + "epoch": 4.4, + "learning_rate": 2.8418549346016644e-07, + "logits/chosen": -1.5346653461456299, + "logits/rejected": -1.4956259727478027, + "logps/chosen": -201.06982421875, + "logps/rejected": -221.88934326171875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.505808353424072, + "rewards/margins": 8.380111694335938, + "rewards/rejected": -14.885919570922852, + "step": 2739 + }, + { + "epoch": 4.4, + "learning_rate": 2.840864050733254e-07, + "logits/chosen": -1.5518558025360107, + "logits/rejected": -1.5157350301742554, + "logps/chosen": -159.24163818359375, + "logps/rejected": -228.64549255371094, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.71225643157959, + "rewards/margins": 7.522473335266113, + "rewards/rejected": -13.234729766845703, + "step": 2740 + }, + { + "epoch": 4.4, + "learning_rate": 2.8398731668648436e-07, + "logits/chosen": -1.533934235572815, + "logits/rejected": -1.554236888885498, + "logps/chosen": -142.6758575439453, + "logps/rejected": -250.65208435058594, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.23792839050293, + "rewards/margins": 10.542596817016602, + "rewards/rejected": -16.78052520751953, + "step": 2741 + }, + { + "epoch": 4.4, + "learning_rate": 2.8388822829964327e-07, + "logits/chosen": -1.6511311531066895, + "logits/rejected": -1.665867567062378, + "logps/chosen": -135.06382751464844, + "logps/rejected": -261.1358337402344, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.100952625274658, + "rewards/margins": 11.429085731506348, + "rewards/rejected": -16.53003692626953, + "step": 2742 + }, + { + "epoch": 4.4, + "learning_rate": 2.837891399128022e-07, + "logits/chosen": -1.6143620014190674, + "logits/rejected": -1.603949785232544, + "logps/chosen": -147.03831481933594, + "logps/rejected": -248.8500213623047, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.380857467651367, + "rewards/margins": 10.758564949035645, + "rewards/rejected": -17.139421463012695, + "step": 2743 + }, + { + "epoch": 4.4, + "learning_rate": 2.8369005152596113e-07, + "logits/chosen": -1.4233711957931519, + "logits/rejected": -1.5262531042099, + "logps/chosen": -106.28994750976562, + "logps/rejected": -229.238525390625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5503110885620117, + "rewards/margins": 10.036842346191406, + "rewards/rejected": -13.587154388427734, + "step": 2744 + }, + { + "epoch": 4.41, + "learning_rate": 2.8359096313912004e-07, + "logits/chosen": -1.5898771286010742, + "logits/rejected": -1.5718083381652832, + "logps/chosen": -125.01554107666016, + "logps/rejected": -222.83021545410156, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.853241920471191, + "rewards/margins": 9.736324310302734, + "rewards/rejected": -14.589567184448242, + "step": 2745 + }, + { + "epoch": 4.41, + "learning_rate": 2.8349187475227905e-07, + "logits/chosen": -1.6329039335250854, + "logits/rejected": -1.6574586629867554, + "logps/chosen": -95.35833740234375, + "logps/rejected": -217.6109619140625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5097174644470215, + "rewards/margins": 11.907732963562012, + "rewards/rejected": -14.417449951171875, + "step": 2746 + }, + { + "epoch": 4.41, + "learning_rate": 2.8339278636543796e-07, + "logits/chosen": -1.4765872955322266, + "logits/rejected": -1.6432390213012695, + "logps/chosen": -118.42501068115234, + "logps/rejected": -247.189208984375, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.513876914978027, + "rewards/margins": 10.640029907226562, + "rewards/rejected": -15.153907775878906, + "step": 2747 + }, + { + "epoch": 4.41, + "learning_rate": 2.8329369797859687e-07, + "logits/chosen": -1.4298268556594849, + "logits/rejected": -1.4288899898529053, + "logps/chosen": -151.6299285888672, + "logps/rejected": -235.86044311523438, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.147488594055176, + "rewards/margins": 9.905120849609375, + "rewards/rejected": -16.052610397338867, + "step": 2748 + }, + { + "epoch": 4.41, + "learning_rate": 2.831946095917558e-07, + "logits/chosen": -1.6230149269104004, + "logits/rejected": -1.6415226459503174, + "logps/chosen": -154.929931640625, + "logps/rejected": -254.8118133544922, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.146159648895264, + "rewards/margins": 10.277194023132324, + "rewards/rejected": -15.423354148864746, + "step": 2749 + }, + { + "epoch": 4.41, + "learning_rate": 2.8309552120491473e-07, + "logits/chosen": -1.5679807662963867, + "logits/rejected": -1.5540771484375, + "logps/chosen": -118.28207397460938, + "logps/rejected": -211.56150817871094, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.544943809509277, + "rewards/margins": 9.553079605102539, + "rewards/rejected": -14.098023414611816, + "step": 2750 + }, + { + "epoch": 4.42, + "learning_rate": 2.8299643281807374e-07, + "logits/chosen": -1.5485495328903198, + "logits/rejected": -1.5943037271499634, + "logps/chosen": -120.41741943359375, + "logps/rejected": -180.13356018066406, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.025323390960693, + "rewards/margins": 5.908749580383301, + "rewards/rejected": -9.934072494506836, + "step": 2751 + }, + { + "epoch": 4.42, + "learning_rate": 2.8289734443123265e-07, + "logits/chosen": -1.611480712890625, + "logits/rejected": -1.6532461643218994, + "logps/chosen": -115.31661987304688, + "logps/rejected": -247.1338653564453, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2026615142822266, + "rewards/margins": 12.903923034667969, + "rewards/rejected": -16.106582641601562, + "step": 2752 + }, + { + "epoch": 4.42, + "learning_rate": 2.8279825604439156e-07, + "logits/chosen": -1.5620131492614746, + "logits/rejected": -1.6490687131881714, + "logps/chosen": -79.9268569946289, + "logps/rejected": -223.0098876953125, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.410628318786621, + "rewards/margins": 12.070353507995605, + "rewards/rejected": -14.480981826782227, + "step": 2753 + }, + { + "epoch": 4.42, + "learning_rate": 2.826991676575505e-07, + "logits/chosen": -1.4730966091156006, + "logits/rejected": -1.5045579671859741, + "logps/chosen": -144.82215881347656, + "logps/rejected": -273.5934753417969, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.425799369812012, + "rewards/margins": 12.714338302612305, + "rewards/rejected": -18.14013671875, + "step": 2754 + }, + { + "epoch": 4.42, + "learning_rate": 2.826000792707094e-07, + "logits/chosen": -1.6694135665893555, + "logits/rejected": -1.6621085405349731, + "logps/chosen": -131.1252899169922, + "logps/rejected": -217.86700439453125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.115210056304932, + "rewards/margins": 9.40616226196289, + "rewards/rejected": -13.521371841430664, + "step": 2755 + }, + { + "epoch": 4.42, + "learning_rate": 2.8250099088386844e-07, + "logits/chosen": -1.5563735961914062, + "logits/rejected": -1.5341675281524658, + "logps/chosen": -161.56570434570312, + "logps/rejected": -253.91421508789062, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.335036754608154, + "rewards/margins": 10.296276092529297, + "rewards/rejected": -15.63131332397461, + "step": 2756 + }, + { + "epoch": 4.43, + "learning_rate": 2.8240190249702734e-07, + "logits/chosen": -1.5389516353607178, + "logits/rejected": -1.6099638938903809, + "logps/chosen": -91.54885864257812, + "logps/rejected": -168.38998413085938, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0507402420043945, + "rewards/margins": 6.907531261444092, + "rewards/rejected": -9.958271026611328, + "step": 2757 + }, + { + "epoch": 4.43, + "learning_rate": 2.8230281411018625e-07, + "logits/chosen": -1.571253776550293, + "logits/rejected": -1.5348806381225586, + "logps/chosen": -107.9016342163086, + "logps/rejected": -233.93258666992188, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.745862007141113, + "rewards/margins": 11.6283540725708, + "rewards/rejected": -16.374216079711914, + "step": 2758 + }, + { + "epoch": 4.43, + "learning_rate": 2.822037257233452e-07, + "logits/chosen": -1.495646595954895, + "logits/rejected": -1.5194401741027832, + "logps/chosen": -95.109619140625, + "logps/rejected": -205.4697265625, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.696382999420166, + "rewards/margins": 10.568206787109375, + "rewards/rejected": -13.2645902633667, + "step": 2759 + }, + { + "epoch": 4.43, + "learning_rate": 2.821046373365041e-07, + "logits/chosen": -1.6217894554138184, + "logits/rejected": -1.6155112981796265, + "logps/chosen": -144.34454345703125, + "logps/rejected": -241.9854736328125, + "loss": 0.0281, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.128584861755371, + "rewards/margins": 10.77570629119873, + "rewards/rejected": -15.904291152954102, + "step": 2760 + }, + { + "epoch": 4.43, + "learning_rate": 2.8200554894966313e-07, + "logits/chosen": -1.5358809232711792, + "logits/rejected": -1.530653476715088, + "logps/chosen": -175.5430908203125, + "logps/rejected": -251.4400634765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.741790294647217, + "rewards/margins": 8.747354507446289, + "rewards/rejected": -16.489145278930664, + "step": 2761 + }, + { + "epoch": 4.43, + "learning_rate": 2.8190646056282204e-07, + "logits/chosen": -1.5708673000335693, + "logits/rejected": -1.5588667392730713, + "logps/chosen": -101.2484130859375, + "logps/rejected": -161.5988006591797, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8774006366729736, + "rewards/margins": 5.936879634857178, + "rewards/rejected": -8.81428050994873, + "step": 2762 + }, + { + "epoch": 4.43, + "learning_rate": 2.8180737217598094e-07, + "logits/chosen": -1.5750653743743896, + "logits/rejected": -1.7654157876968384, + "logps/chosen": -86.88893127441406, + "logps/rejected": -180.5362091064453, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4166572093963623, + "rewards/margins": 8.01063346862793, + "rewards/rejected": -10.427290916442871, + "step": 2763 + }, + { + "epoch": 4.44, + "learning_rate": 2.817082837891399e-07, + "logits/chosen": -1.4746253490447998, + "logits/rejected": -1.5653737783432007, + "logps/chosen": -132.5467529296875, + "logps/rejected": -225.18115234375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.933967590332031, + "rewards/margins": 8.254783630371094, + "rewards/rejected": -13.188752174377441, + "step": 2764 + }, + { + "epoch": 4.44, + "learning_rate": 2.816091954022988e-07, + "logits/chosen": -1.4548503160476685, + "logits/rejected": -1.5111383199691772, + "logps/chosen": -110.25473022460938, + "logps/rejected": -215.64654541015625, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.585642337799072, + "rewards/margins": 9.538105010986328, + "rewards/rejected": -14.123748779296875, + "step": 2765 + }, + { + "epoch": 4.44, + "learning_rate": 2.815101070154578e-07, + "logits/chosen": -1.4512884616851807, + "logits/rejected": -1.4752033948898315, + "logps/chosen": -172.13363647460938, + "logps/rejected": -273.2087707519531, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.668320655822754, + "rewards/margins": 9.66629695892334, + "rewards/rejected": -18.334617614746094, + "step": 2766 + }, + { + "epoch": 4.44, + "learning_rate": 2.8141101862861673e-07, + "logits/chosen": -1.7221190929412842, + "logits/rejected": -1.7068355083465576, + "logps/chosen": -178.95968627929688, + "logps/rejected": -262.5660095214844, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.734452247619629, + "rewards/margins": 9.113062858581543, + "rewards/rejected": -16.847515106201172, + "step": 2767 + }, + { + "epoch": 4.44, + "learning_rate": 2.8131193024177564e-07, + "logits/chosen": -1.625832200050354, + "logits/rejected": -1.6556293964385986, + "logps/chosen": -106.03610229492188, + "logps/rejected": -256.46746826171875, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.392554998397827, + "rewards/margins": 13.48036003112793, + "rewards/rejected": -16.872915267944336, + "step": 2768 + }, + { + "epoch": 4.44, + "learning_rate": 2.812128418549346e-07, + "logits/chosen": -1.7671726942062378, + "logits/rejected": -1.7746357917785645, + "logps/chosen": -140.02615356445312, + "logps/rejected": -258.77069091796875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.159049034118652, + "rewards/margins": 11.598835945129395, + "rewards/rejected": -16.757884979248047, + "step": 2769 + }, + { + "epoch": 4.45, + "learning_rate": 2.811137534680935e-07, + "logits/chosen": -1.6480728387832642, + "logits/rejected": -1.6215770244598389, + "logps/chosen": -168.7593994140625, + "logps/rejected": -234.2295379638672, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.406893253326416, + "rewards/margins": 8.226319313049316, + "rewards/rejected": -14.63321304321289, + "step": 2770 + }, + { + "epoch": 4.45, + "learning_rate": 2.8101466508125246e-07, + "logits/chosen": -1.6111595630645752, + "logits/rejected": -1.6228020191192627, + "logps/chosen": -123.53425598144531, + "logps/rejected": -208.87274169921875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9654159545898438, + "rewards/margins": 8.618648529052734, + "rewards/rejected": -12.584064483642578, + "step": 2771 + }, + { + "epoch": 4.45, + "learning_rate": 2.809155766944114e-07, + "logits/chosen": -1.6037147045135498, + "logits/rejected": -1.5263954401016235, + "logps/chosen": -154.48353576660156, + "logps/rejected": -235.903564453125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.21937370300293, + "rewards/margins": 8.961101531982422, + "rewards/rejected": -14.180475234985352, + "step": 2772 + }, + { + "epoch": 4.45, + "learning_rate": 2.8081648830757033e-07, + "logits/chosen": -1.701793909072876, + "logits/rejected": -1.7092843055725098, + "logps/chosen": -135.17901611328125, + "logps/rejected": -234.3446044921875, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.532351493835449, + "rewards/margins": 10.55919361114502, + "rewards/rejected": -16.09154510498047, + "step": 2773 + }, + { + "epoch": 4.45, + "learning_rate": 2.807173999207293e-07, + "logits/chosen": -1.7039819955825806, + "logits/rejected": -1.682356834411621, + "logps/chosen": -150.87261962890625, + "logps/rejected": -263.63232421875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.312885284423828, + "rewards/margins": 10.717589378356934, + "rewards/rejected": -16.030475616455078, + "step": 2774 + }, + { + "epoch": 4.45, + "learning_rate": 2.806183115338882e-07, + "logits/chosen": -1.4944771528244019, + "logits/rejected": -1.4120906591415405, + "logps/chosen": -136.0952911376953, + "logps/rejected": -193.75282287597656, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.084843635559082, + "rewards/margins": 7.954451560974121, + "rewards/rejected": -12.03929615020752, + "step": 2775 + }, + { + "epoch": 4.46, + "learning_rate": 2.8051922314704716e-07, + "logits/chosen": -1.5095566511154175, + "logits/rejected": -1.5478435754776, + "logps/chosen": -142.15118408203125, + "logps/rejected": -233.05300903320312, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.958883285522461, + "rewards/margins": 10.36723804473877, + "rewards/rejected": -14.32612133026123, + "step": 2776 + }, + { + "epoch": 4.46, + "learning_rate": 2.804201347602061e-07, + "logits/chosen": -1.7724047899246216, + "logits/rejected": -1.7530958652496338, + "logps/chosen": -169.1248321533203, + "logps/rejected": -238.17800903320312, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.316110134124756, + "rewards/margins": 9.206033706665039, + "rewards/rejected": -16.522144317626953, + "step": 2777 + }, + { + "epoch": 4.46, + "learning_rate": 2.80321046373365e-07, + "logits/chosen": -1.6659913063049316, + "logits/rejected": -1.5619697570800781, + "logps/chosen": -171.40823364257812, + "logps/rejected": -232.71527099609375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.649877548217773, + "rewards/margins": 8.75882339477539, + "rewards/rejected": -15.408700942993164, + "step": 2778 + }, + { + "epoch": 4.46, + "learning_rate": 2.80221957986524e-07, + "logits/chosen": -1.6000263690948486, + "logits/rejected": -1.5953128337860107, + "logps/chosen": -71.24349975585938, + "logps/rejected": -174.62022399902344, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.678483247756958, + "rewards/margins": 10.27414608001709, + "rewards/rejected": -11.952629089355469, + "step": 2779 + }, + { + "epoch": 4.46, + "learning_rate": 2.801228695996829e-07, + "logits/chosen": -1.621415376663208, + "logits/rejected": -1.6565399169921875, + "logps/chosen": -108.86508178710938, + "logps/rejected": -225.0075225830078, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.623643159866333, + "rewards/margins": 11.878439903259277, + "rewards/rejected": -14.502083778381348, + "step": 2780 + }, + { + "epoch": 4.46, + "learning_rate": 2.8002378121284185e-07, + "logits/chosen": -1.5521348714828491, + "logits/rejected": -1.5540755987167358, + "logps/chosen": -122.08818817138672, + "logps/rejected": -209.15716552734375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.168209075927734, + "rewards/margins": 9.919797897338867, + "rewards/rejected": -14.088006973266602, + "step": 2781 + }, + { + "epoch": 4.47, + "learning_rate": 2.799246928260008e-07, + "logits/chosen": -1.485342264175415, + "logits/rejected": -1.5466617345809937, + "logps/chosen": -119.37955474853516, + "logps/rejected": -229.3961181640625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.773891448974609, + "rewards/margins": 9.49170970916748, + "rewards/rejected": -14.26560115814209, + "step": 2782 + }, + { + "epoch": 4.47, + "learning_rate": 2.798256044391597e-07, + "logits/chosen": -1.7207412719726562, + "logits/rejected": -1.7234901189804077, + "logps/chosen": -112.4862060546875, + "logps/rejected": -250.169189453125, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4771955013275146, + "rewards/margins": 13.739461898803711, + "rewards/rejected": -17.216655731201172, + "step": 2783 + }, + { + "epoch": 4.47, + "learning_rate": 2.797265160523187e-07, + "logits/chosen": -1.5213991403579712, + "logits/rejected": -1.4766119718551636, + "logps/chosen": -132.43447875976562, + "logps/rejected": -239.79600524902344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.070225715637207, + "rewards/margins": 12.394888877868652, + "rewards/rejected": -16.46511459350586, + "step": 2784 + }, + { + "epoch": 4.47, + "learning_rate": 2.796274276654776e-07, + "logits/chosen": -1.503800630569458, + "logits/rejected": -1.4523800611495972, + "logps/chosen": -136.9077606201172, + "logps/rejected": -193.6627197265625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.877272129058838, + "rewards/margins": 7.413882732391357, + "rewards/rejected": -11.291154861450195, + "step": 2785 + }, + { + "epoch": 4.47, + "learning_rate": 2.795283392786365e-07, + "logits/chosen": -1.669974684715271, + "logits/rejected": -1.6851468086242676, + "logps/chosen": -148.18927001953125, + "logps/rejected": -280.16302490234375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.377048015594482, + "rewards/margins": 11.962980270385742, + "rewards/rejected": -18.340028762817383, + "step": 2786 + }, + { + "epoch": 4.47, + "learning_rate": 2.794292508917955e-07, + "logits/chosen": -1.5625427961349487, + "logits/rejected": -1.616051435470581, + "logps/chosen": -129.7542266845703, + "logps/rejected": -241.31643676757812, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4089882373809814, + "rewards/margins": 9.094388961791992, + "rewards/rejected": -12.503376960754395, + "step": 2787 + }, + { + "epoch": 4.48, + "learning_rate": 2.793301625049544e-07, + "logits/chosen": -1.5150152444839478, + "logits/rejected": -1.5254220962524414, + "logps/chosen": -126.76512145996094, + "logps/rejected": -200.1992950439453, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.936099052429199, + "rewards/margins": 6.220221042633057, + "rewards/rejected": -12.156320571899414, + "step": 2788 + }, + { + "epoch": 4.48, + "learning_rate": 2.7923107411811337e-07, + "logits/chosen": -1.6040432453155518, + "logits/rejected": -1.5642261505126953, + "logps/chosen": -133.92800903320312, + "logps/rejected": -247.1510467529297, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.699306964874268, + "rewards/margins": 11.202713966369629, + "rewards/rejected": -15.902021408081055, + "step": 2789 + }, + { + "epoch": 4.48, + "learning_rate": 2.791319857312723e-07, + "logits/chosen": -1.6042938232421875, + "logits/rejected": -1.5755186080932617, + "logps/chosen": -129.53164672851562, + "logps/rejected": -227.7854461669922, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354366779327393, + "rewards/margins": 8.602972984313965, + "rewards/rejected": -12.957340240478516, + "step": 2790 + }, + { + "epoch": 4.48, + "learning_rate": 2.790328973444312e-07, + "logits/chosen": -1.451268196105957, + "logits/rejected": -1.488668441772461, + "logps/chosen": -188.19393920898438, + "logps/rejected": -242.9617919921875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.885913848876953, + "rewards/margins": 7.035870552062988, + "rewards/rejected": -14.921785354614258, + "step": 2791 + }, + { + "epoch": 4.48, + "learning_rate": 2.7893380895759014e-07, + "logits/chosen": -1.5272724628448486, + "logits/rejected": -1.5402376651763916, + "logps/chosen": -98.49087524414062, + "logps/rejected": -179.5891876220703, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8125195503234863, + "rewards/margins": 8.231917381286621, + "rewards/rejected": -12.044437408447266, + "step": 2792 + }, + { + "epoch": 4.48, + "learning_rate": 2.788347205707491e-07, + "logits/chosen": -1.6101946830749512, + "logits/rejected": -1.5522842407226562, + "logps/chosen": -138.28851318359375, + "logps/rejected": -246.4190673828125, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4755353927612305, + "rewards/margins": 11.168808937072754, + "rewards/rejected": -16.644344329833984, + "step": 2793 + }, + { + "epoch": 4.48, + "learning_rate": 2.7873563218390806e-07, + "logits/chosen": -1.4968459606170654, + "logits/rejected": -1.5565507411956787, + "logps/chosen": -93.6102294921875, + "logps/rejected": -239.70826721191406, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5057034492492676, + "rewards/margins": 13.784570693969727, + "rewards/rejected": -15.290273666381836, + "step": 2794 + }, + { + "epoch": 4.49, + "learning_rate": 2.7863654379706697e-07, + "logits/chosen": -1.6463429927825928, + "logits/rejected": -1.6407725811004639, + "logps/chosen": -111.93770599365234, + "logps/rejected": -216.40350341796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.293461799621582, + "rewards/margins": 10.810312271118164, + "rewards/rejected": -13.10377311706543, + "step": 2795 + }, + { + "epoch": 4.49, + "learning_rate": 2.785374554102259e-07, + "logits/chosen": -1.4864040613174438, + "logits/rejected": -1.5323628187179565, + "logps/chosen": -81.9390640258789, + "logps/rejected": -202.75511169433594, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.584714412689209, + "rewards/margins": 10.030757904052734, + "rewards/rejected": -13.615471839904785, + "step": 2796 + }, + { + "epoch": 4.49, + "learning_rate": 2.7843836702338484e-07, + "logits/chosen": -1.541719913482666, + "logits/rejected": -1.476633071899414, + "logps/chosen": -150.97898864746094, + "logps/rejected": -239.44508361816406, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.847156047821045, + "rewards/margins": 9.204046249389648, + "rewards/rejected": -14.051202774047852, + "step": 2797 + }, + { + "epoch": 4.49, + "learning_rate": 2.783392786365438e-07, + "logits/chosen": -1.5616563558578491, + "logits/rejected": -1.5403021574020386, + "logps/chosen": -141.3931884765625, + "logps/rejected": -218.9105224609375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.778682708740234, + "rewards/margins": 8.907339096069336, + "rewards/rejected": -14.68602180480957, + "step": 2798 + }, + { + "epoch": 4.49, + "learning_rate": 2.7824019024970275e-07, + "logits/chosen": -1.640052318572998, + "logits/rejected": -1.658229947090149, + "logps/chosen": -129.95126342773438, + "logps/rejected": -240.1207733154297, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.86325740814209, + "rewards/margins": 10.251483917236328, + "rewards/rejected": -15.114742279052734, + "step": 2799 + }, + { + "epoch": 4.49, + "learning_rate": 2.7814110186286166e-07, + "logits/chosen": -1.6252448558807373, + "logits/rejected": -1.5091874599456787, + "logps/chosen": -131.91546630859375, + "logps/rejected": -251.16970825195312, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.578756809234619, + "rewards/margins": 12.612921714782715, + "rewards/rejected": -17.191679000854492, + "step": 2800 + }, + { + "epoch": 4.5, + "learning_rate": 2.7804201347602057e-07, + "logits/chosen": -1.652876853942871, + "logits/rejected": -1.6234314441680908, + "logps/chosen": -162.2783203125, + "logps/rejected": -242.21786499023438, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6338372230529785, + "rewards/margins": 9.332765579223633, + "rewards/rejected": -15.96660327911377, + "step": 2801 + }, + { + "epoch": 4.5, + "learning_rate": 2.7794292508917953e-07, + "logits/chosen": -1.5144388675689697, + "logits/rejected": -1.5487178564071655, + "logps/chosen": -115.75148010253906, + "logps/rejected": -250.61029052734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7632505893707275, + "rewards/margins": 11.381921768188477, + "rewards/rejected": -15.145173072814941, + "step": 2802 + }, + { + "epoch": 4.5, + "learning_rate": 2.778438367023385e-07, + "logits/chosen": -1.5801445245742798, + "logits/rejected": -1.5290191173553467, + "logps/chosen": -138.8501434326172, + "logps/rejected": -245.97216796875, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.890955924987793, + "rewards/margins": 10.224698066711426, + "rewards/rejected": -16.11565399169922, + "step": 2803 + }, + { + "epoch": 4.5, + "learning_rate": 2.7774474831549745e-07, + "logits/chosen": -1.6285772323608398, + "logits/rejected": -1.6636097431182861, + "logps/chosen": -113.93743896484375, + "logps/rejected": -285.4978942871094, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.404966354370117, + "rewards/margins": 14.759344100952148, + "rewards/rejected": -19.164310455322266, + "step": 2804 + }, + { + "epoch": 4.5, + "learning_rate": 2.7764565992865635e-07, + "logits/chosen": -1.502935528755188, + "logits/rejected": -1.5023832321166992, + "logps/chosen": -143.6563720703125, + "logps/rejected": -228.884521484375, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.117403030395508, + "rewards/margins": 9.1888427734375, + "rewards/rejected": -13.306245803833008, + "step": 2805 + }, + { + "epoch": 4.5, + "learning_rate": 2.7754657154181526e-07, + "logits/chosen": -1.75993013381958, + "logits/rejected": -1.778637170791626, + "logps/chosen": -145.81350708007812, + "logps/rejected": -296.07513427734375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.111900806427002, + "rewards/margins": 11.174005508422852, + "rewards/rejected": -16.285905838012695, + "step": 2806 + }, + { + "epoch": 4.51, + "learning_rate": 2.774474831549742e-07, + "logits/chosen": -1.5163995027542114, + "logits/rejected": -1.5313801765441895, + "logps/chosen": -134.6122283935547, + "logps/rejected": -243.01937866210938, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.483554840087891, + "rewards/margins": 8.564491271972656, + "rewards/rejected": -14.048046112060547, + "step": 2807 + }, + { + "epoch": 4.51, + "learning_rate": 2.7734839476813313e-07, + "logits/chosen": -1.680015206336975, + "logits/rejected": -1.6355396509170532, + "logps/chosen": -132.52325439453125, + "logps/rejected": -249.52288818359375, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.545040607452393, + "rewards/margins": 11.7825288772583, + "rewards/rejected": -17.32756996154785, + "step": 2808 + }, + { + "epoch": 4.51, + "learning_rate": 2.7724930638129214e-07, + "logits/chosen": -1.527362585067749, + "logits/rejected": -1.527822494506836, + "logps/chosen": -135.01019287109375, + "logps/rejected": -238.34754943847656, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.903086185455322, + "rewards/margins": 10.757129669189453, + "rewards/rejected": -15.660216331481934, + "step": 2809 + }, + { + "epoch": 4.51, + "learning_rate": 2.7715021799445105e-07, + "logits/chosen": -1.6305440664291382, + "logits/rejected": -1.62916100025177, + "logps/chosen": -130.04013061523438, + "logps/rejected": -232.4702911376953, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.968426704406738, + "rewards/margins": 10.743631362915039, + "rewards/rejected": -15.712057113647461, + "step": 2810 + }, + { + "epoch": 4.51, + "learning_rate": 2.7705112960760995e-07, + "logits/chosen": -1.4590213298797607, + "logits/rejected": -1.4841420650482178, + "logps/chosen": -134.41177368164062, + "logps/rejected": -241.6845703125, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.550762176513672, + "rewards/margins": 10.249332427978516, + "rewards/rejected": -14.800093650817871, + "step": 2811 + }, + { + "epoch": 4.51, + "learning_rate": 2.769520412207689e-07, + "logits/chosen": -1.4618349075317383, + "logits/rejected": -1.4450814723968506, + "logps/chosen": -171.9166717529297, + "logps/rejected": -247.81298828125, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.798979759216309, + "rewards/margins": 7.979863166809082, + "rewards/rejected": -14.77884292602539, + "step": 2812 + }, + { + "epoch": 4.52, + "learning_rate": 2.768529528339278e-07, + "logits/chosen": -1.5947637557983398, + "logits/rejected": -1.5830657482147217, + "logps/chosen": -163.08323669433594, + "logps/rejected": -274.16064453125, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5760178565979, + "rewards/margins": 10.463120460510254, + "rewards/rejected": -17.039138793945312, + "step": 2813 + }, + { + "epoch": 4.52, + "learning_rate": 2.7675386444708683e-07, + "logits/chosen": -1.6015257835388184, + "logits/rejected": -1.5990798473358154, + "logps/chosen": -117.4588623046875, + "logps/rejected": -206.17495727539062, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0037994384765625, + "rewards/margins": 8.453954696655273, + "rewards/rejected": -12.457754135131836, + "step": 2814 + }, + { + "epoch": 4.52, + "learning_rate": 2.7665477606024574e-07, + "logits/chosen": -1.6255697011947632, + "logits/rejected": -1.6712092161178589, + "logps/chosen": -153.6128692626953, + "logps/rejected": -238.37274169921875, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.809083938598633, + "rewards/margins": 8.362279891967773, + "rewards/rejected": -14.171364784240723, + "step": 2815 + }, + { + "epoch": 4.52, + "learning_rate": 2.7655568767340465e-07, + "logits/chosen": -1.6855837106704712, + "logits/rejected": -1.6779553890228271, + "logps/chosen": -155.612060546875, + "logps/rejected": -228.32540893554688, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5503058433532715, + "rewards/margins": 8.333115577697754, + "rewards/rejected": -13.883420944213867, + "step": 2816 + }, + { + "epoch": 4.52, + "learning_rate": 2.764565992865636e-07, + "logits/chosen": -1.5686019659042358, + "logits/rejected": -1.6002357006072998, + "logps/chosen": -113.88412475585938, + "logps/rejected": -247.1538543701172, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.359396934509277, + "rewards/margins": 11.560690879821777, + "rewards/rejected": -15.920087814331055, + "step": 2817 + }, + { + "epoch": 4.52, + "learning_rate": 2.763575108997225e-07, + "logits/chosen": -1.5269384384155273, + "logits/rejected": -1.5655933618545532, + "logps/chosen": -115.63810729980469, + "logps/rejected": -225.92860412597656, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.826902151107788, + "rewards/margins": 10.578222274780273, + "rewards/rejected": -14.405123710632324, + "step": 2818 + }, + { + "epoch": 4.52, + "learning_rate": 2.762584225128815e-07, + "logits/chosen": -1.639686107635498, + "logits/rejected": -1.691918134689331, + "logps/chosen": -106.64309692382812, + "logps/rejected": -190.20960998535156, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8653557300567627, + "rewards/margins": 7.256318092346191, + "rewards/rejected": -11.121673583984375, + "step": 2819 + }, + { + "epoch": 4.53, + "learning_rate": 2.7615933412604043e-07, + "logits/chosen": -1.5898807048797607, + "logits/rejected": -1.5956050157546997, + "logps/chosen": -82.0503158569336, + "logps/rejected": -174.1080780029297, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.678748846054077, + "rewards/margins": 9.388021469116211, + "rewards/rejected": -12.06676959991455, + "step": 2820 + }, + { + "epoch": 4.53, + "learning_rate": 2.7606024573919934e-07, + "logits/chosen": -1.8119322061538696, + "logits/rejected": -1.7621175050735474, + "logps/chosen": -149.87088012695312, + "logps/rejected": -241.07542419433594, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.261667728424072, + "rewards/margins": 10.34338092803955, + "rewards/rejected": -16.60504913330078, + "step": 2821 + }, + { + "epoch": 4.53, + "learning_rate": 2.759611573523583e-07, + "logits/chosen": -1.72573983669281, + "logits/rejected": -1.7325003147125244, + "logps/chosen": -120.32891082763672, + "logps/rejected": -229.6142578125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.298008441925049, + "rewards/margins": 10.989982604980469, + "rewards/rejected": -14.28799057006836, + "step": 2822 + }, + { + "epoch": 4.53, + "learning_rate": 2.758620689655172e-07, + "logits/chosen": -1.5940457582473755, + "logits/rejected": -1.5923705101013184, + "logps/chosen": -133.24681091308594, + "logps/rejected": -237.94903564453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.884000778198242, + "rewards/margins": 11.673494338989258, + "rewards/rejected": -14.5574951171875, + "step": 2823 + }, + { + "epoch": 4.53, + "learning_rate": 2.7576298057867617e-07, + "logits/chosen": -1.578063726425171, + "logits/rejected": -1.578145980834961, + "logps/chosen": -151.49685668945312, + "logps/rejected": -299.7729187011719, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.873994827270508, + "rewards/margins": 13.433101654052734, + "rewards/rejected": -19.307096481323242, + "step": 2824 + }, + { + "epoch": 4.53, + "learning_rate": 2.7566389219183513e-07, + "logits/chosen": -1.5385832786560059, + "logits/rejected": -1.5514661073684692, + "logps/chosen": -134.41766357421875, + "logps/rejected": -225.20236206054688, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.342463493347168, + "rewards/margins": 9.45561408996582, + "rewards/rejected": -13.798076629638672, + "step": 2825 + }, + { + "epoch": 4.54, + "learning_rate": 2.7556480380499403e-07, + "logits/chosen": -1.4231773614883423, + "logits/rejected": -1.5198866128921509, + "logps/chosen": -128.70538330078125, + "logps/rejected": -241.09848022460938, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.243760585784912, + "rewards/margins": 10.570501327514648, + "rewards/rejected": -15.814261436462402, + "step": 2826 + }, + { + "epoch": 4.54, + "learning_rate": 2.75465715418153e-07, + "logits/chosen": -1.5884253978729248, + "logits/rejected": -1.5305211544036865, + "logps/chosen": -137.478271484375, + "logps/rejected": -242.0609893798828, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5672197341918945, + "rewards/margins": 11.298229217529297, + "rewards/rejected": -15.865448951721191, + "step": 2827 + }, + { + "epoch": 4.54, + "learning_rate": 2.753666270313119e-07, + "logits/chosen": -1.6935991048812866, + "logits/rejected": -1.6324217319488525, + "logps/chosen": -160.7823944091797, + "logps/rejected": -268.3804016113281, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.118954181671143, + "rewards/margins": 10.718652725219727, + "rewards/rejected": -16.837608337402344, + "step": 2828 + }, + { + "epoch": 4.54, + "learning_rate": 2.752675386444708e-07, + "logits/chosen": -1.4552879333496094, + "logits/rejected": -1.3670326471328735, + "logps/chosen": -145.2235870361328, + "logps/rejected": -207.7515869140625, + "loss": 0.0646, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.231058120727539, + "rewards/margins": 8.504594802856445, + "rewards/rejected": -13.7356538772583, + "step": 2829 + }, + { + "epoch": 4.54, + "learning_rate": 2.751684502576298e-07, + "logits/chosen": -1.5197253227233887, + "logits/rejected": -1.5053484439849854, + "logps/chosen": -138.58673095703125, + "logps/rejected": -217.93016052246094, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.293963432312012, + "rewards/margins": 8.544078826904297, + "rewards/rejected": -12.838041305541992, + "step": 2830 + }, + { + "epoch": 4.54, + "learning_rate": 2.750693618707887e-07, + "logits/chosen": -1.445472002029419, + "logits/rejected": -1.414696455001831, + "logps/chosen": -152.7145538330078, + "logps/rejected": -229.15115356445312, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.331068992614746, + "rewards/margins": 9.836455345153809, + "rewards/rejected": -14.167524337768555, + "step": 2831 + }, + { + "epoch": 4.55, + "learning_rate": 2.749702734839477e-07, + "logits/chosen": -1.4078130722045898, + "logits/rejected": -1.39902663230896, + "logps/chosen": -142.72589111328125, + "logps/rejected": -253.47982788085938, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.298447132110596, + "rewards/margins": 11.785785675048828, + "rewards/rejected": -17.084232330322266, + "step": 2832 + }, + { + "epoch": 4.55, + "learning_rate": 2.748711850971066e-07, + "logits/chosen": -1.6970798969268799, + "logits/rejected": -1.746498942375183, + "logps/chosen": -77.19947052001953, + "logps/rejected": -186.76528930664062, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3104373216629028, + "rewards/margins": 11.600809097290039, + "rewards/rejected": -12.911247253417969, + "step": 2833 + }, + { + "epoch": 4.55, + "learning_rate": 2.747720967102655e-07, + "logits/chosen": -1.5782599449157715, + "logits/rejected": -1.6015715599060059, + "logps/chosen": -140.51531982421875, + "logps/rejected": -267.2781066894531, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.273142337799072, + "rewards/margins": 12.333765983581543, + "rewards/rejected": -17.606908798217773, + "step": 2834 + }, + { + "epoch": 4.55, + "learning_rate": 2.746730083234245e-07, + "logits/chosen": -1.5432519912719727, + "logits/rejected": -1.5531351566314697, + "logps/chosen": -156.93751525878906, + "logps/rejected": -274.94085693359375, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.438264846801758, + "rewards/margins": 12.945955276489258, + "rewards/rejected": -19.384220123291016, + "step": 2835 + }, + { + "epoch": 4.55, + "learning_rate": 2.745739199365834e-07, + "logits/chosen": -1.6199326515197754, + "logits/rejected": -1.6426987648010254, + "logps/chosen": -109.1934814453125, + "logps/rejected": -228.87843322753906, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3846726417541504, + "rewards/margins": 11.644591331481934, + "rewards/rejected": -15.029263496398926, + "step": 2836 + }, + { + "epoch": 4.55, + "learning_rate": 2.744748315497424e-07, + "logits/chosen": -1.504603385925293, + "logits/rejected": -1.5654747486114502, + "logps/chosen": -109.61658477783203, + "logps/rejected": -218.8448028564453, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.993145704269409, + "rewards/margins": 10.182534217834473, + "rewards/rejected": -14.175680160522461, + "step": 2837 + }, + { + "epoch": 4.56, + "learning_rate": 2.743757431629013e-07, + "logits/chosen": -1.4566230773925781, + "logits/rejected": -1.4841477870941162, + "logps/chosen": -133.1646270751953, + "logps/rejected": -249.279052734375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.156158924102783, + "rewards/margins": 10.0572509765625, + "rewards/rejected": -15.213409423828125, + "step": 2838 + }, + { + "epoch": 4.56, + "learning_rate": 2.742766547760602e-07, + "logits/chosen": -1.4440183639526367, + "logits/rejected": -1.5208971500396729, + "logps/chosen": -174.27093505859375, + "logps/rejected": -248.71119689941406, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.084232330322266, + "rewards/margins": 8.9021635055542, + "rewards/rejected": -16.98639678955078, + "step": 2839 + }, + { + "epoch": 4.56, + "learning_rate": 2.741775663892192e-07, + "logits/chosen": -1.5303318500518799, + "logits/rejected": -1.55264413356781, + "logps/chosen": -139.18399047851562, + "logps/rejected": -280.086669921875, + "loss": 0.064, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.774323463439941, + "rewards/margins": 12.022619247436523, + "rewards/rejected": -16.79694175720215, + "step": 2840 + }, + { + "epoch": 4.56, + "learning_rate": 2.740784780023781e-07, + "logits/chosen": -1.4367369413375854, + "logits/rejected": -1.3810492753982544, + "logps/chosen": -102.03660583496094, + "logps/rejected": -197.992919921875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2760202884674072, + "rewards/margins": 10.389532089233398, + "rewards/rejected": -12.665552139282227, + "step": 2841 + }, + { + "epoch": 4.56, + "learning_rate": 2.7397938961553707e-07, + "logits/chosen": -1.5954349040985107, + "logits/rejected": -1.542466640472412, + "logps/chosen": -125.31034851074219, + "logps/rejected": -214.36813354492188, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8865115642547607, + "rewards/margins": 9.81609058380127, + "rewards/rejected": -12.70260238647461, + "step": 2842 + }, + { + "epoch": 4.56, + "learning_rate": 2.73880301228696e-07, + "logits/chosen": -1.4792134761810303, + "logits/rejected": -1.4543198347091675, + "logps/chosen": -102.7669677734375, + "logps/rejected": -193.1697998046875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6447434425354004, + "rewards/margins": 9.630353927612305, + "rewards/rejected": -12.275097846984863, + "step": 2843 + }, + { + "epoch": 4.57, + "learning_rate": 2.737812128418549e-07, + "logits/chosen": -1.7056212425231934, + "logits/rejected": -1.6779396533966064, + "logps/chosen": -135.36651611328125, + "logps/rejected": -292.4979248046875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8712847232818604, + "rewards/margins": 16.85468101501465, + "rewards/rejected": -20.72596549987793, + "step": 2844 + }, + { + "epoch": 4.57, + "learning_rate": 2.736821244550139e-07, + "logits/chosen": -1.7758382558822632, + "logits/rejected": -1.6221508979797363, + "logps/chosen": -146.53912353515625, + "logps/rejected": -251.81283569335938, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.665487766265869, + "rewards/margins": 11.406951904296875, + "rewards/rejected": -16.07244110107422, + "step": 2845 + }, + { + "epoch": 4.57, + "learning_rate": 2.735830360681728e-07, + "logits/chosen": -1.602111577987671, + "logits/rejected": -1.5715088844299316, + "logps/chosen": -145.44622802734375, + "logps/rejected": -295.92645263671875, + "loss": 0.1383, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5456085205078125, + "rewards/margins": 13.601818084716797, + "rewards/rejected": -20.14742660522461, + "step": 2846 + }, + { + "epoch": 4.57, + "learning_rate": 2.7348394768133176e-07, + "logits/chosen": -1.419093132019043, + "logits/rejected": -1.4504579305648804, + "logps/chosen": -112.45354461669922, + "logps/rejected": -257.2138671875, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.76430082321167, + "rewards/margins": 13.21423053741455, + "rewards/rejected": -15.978532791137695, + "step": 2847 + }, + { + "epoch": 4.57, + "learning_rate": 2.7338485929449067e-07, + "logits/chosen": -1.6715598106384277, + "logits/rejected": -1.6332874298095703, + "logps/chosen": -120.75337219238281, + "logps/rejected": -242.3410186767578, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.915994644165039, + "rewards/margins": 12.85696792602539, + "rewards/rejected": -15.77296257019043, + "step": 2848 + }, + { + "epoch": 4.57, + "learning_rate": 2.732857709076496e-07, + "logits/chosen": -1.5937896966934204, + "logits/rejected": -1.5393452644348145, + "logps/chosen": -140.9891815185547, + "logps/rejected": -214.26995849609375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.082625389099121, + "rewards/margins": 8.829866409301758, + "rewards/rejected": -12.912492752075195, + "step": 2849 + }, + { + "epoch": 4.57, + "learning_rate": 2.7318668252080854e-07, + "logits/chosen": -1.6953908205032349, + "logits/rejected": -1.6876412630081177, + "logps/chosen": -119.43608093261719, + "logps/rejected": -244.83712768554688, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.935963153839111, + "rewards/margins": 11.491640090942383, + "rewards/rejected": -16.42760467529297, + "step": 2850 + }, + { + "epoch": 4.58, + "learning_rate": 2.730875941339675e-07, + "logits/chosen": -1.5645134449005127, + "logits/rejected": -1.5784215927124023, + "logps/chosen": -181.3961181640625, + "logps/rejected": -233.08697509765625, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7351531982421875, + "rewards/margins": 6.526765823364258, + "rewards/rejected": -14.261919021606445, + "step": 2851 + }, + { + "epoch": 4.58, + "learning_rate": 2.7298850574712646e-07, + "logits/chosen": -1.7069848775863647, + "logits/rejected": -1.7591235637664795, + "logps/chosen": -140.8182373046875, + "logps/rejected": -246.55023193359375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.655781269073486, + "rewards/margins": 10.435622215270996, + "rewards/rejected": -15.091403007507324, + "step": 2852 + }, + { + "epoch": 4.58, + "learning_rate": 2.7288941736028536e-07, + "logits/chosen": -1.6918538808822632, + "logits/rejected": -1.64956796169281, + "logps/chosen": -131.4624786376953, + "logps/rejected": -240.11093139648438, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.597045421600342, + "rewards/margins": 10.032846450805664, + "rewards/rejected": -14.629892349243164, + "step": 2853 + }, + { + "epoch": 4.58, + "learning_rate": 2.7279032897344427e-07, + "logits/chosen": -1.5912437438964844, + "logits/rejected": -1.7224540710449219, + "logps/chosen": -122.45218658447266, + "logps/rejected": -244.23648071289062, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.209161758422852, + "rewards/margins": 9.854090690612793, + "rewards/rejected": -14.063251495361328, + "step": 2854 + }, + { + "epoch": 4.58, + "learning_rate": 2.7269124058660323e-07, + "logits/chosen": -1.5638470649719238, + "logits/rejected": -1.6249359846115112, + "logps/chosen": -171.281982421875, + "logps/rejected": -249.23193359375, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5090508460998535, + "rewards/margins": 7.132864952087402, + "rewards/rejected": -13.641915321350098, + "step": 2855 + }, + { + "epoch": 4.58, + "learning_rate": 2.725921521997622e-07, + "logits/chosen": -1.694502353668213, + "logits/rejected": -1.57957124710083, + "logps/chosen": -171.33291625976562, + "logps/rejected": -241.36090087890625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.534945011138916, + "rewards/margins": 8.704036712646484, + "rewards/rejected": -15.238981246948242, + "step": 2856 + }, + { + "epoch": 4.59, + "learning_rate": 2.724930638129211e-07, + "logits/chosen": -1.6113861799240112, + "logits/rejected": -1.5304720401763916, + "logps/chosen": -123.95651245117188, + "logps/rejected": -224.29103088378906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.831267356872559, + "rewards/margins": 10.141507148742676, + "rewards/rejected": -14.972774505615234, + "step": 2857 + }, + { + "epoch": 4.59, + "learning_rate": 2.7239397542608006e-07, + "logits/chosen": -1.5323429107666016, + "logits/rejected": -1.5122230052947998, + "logps/chosen": -110.03152465820312, + "logps/rejected": -184.61599731445312, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.055251121520996, + "rewards/margins": 7.018614292144775, + "rewards/rejected": -11.07386589050293, + "step": 2858 + }, + { + "epoch": 4.59, + "learning_rate": 2.7229488703923896e-07, + "logits/chosen": -1.5625205039978027, + "logits/rejected": -1.5995848178863525, + "logps/chosen": -130.38983154296875, + "logps/rejected": -244.61831665039062, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.221988677978516, + "rewards/margins": 9.722090721130371, + "rewards/rejected": -14.944079399108887, + "step": 2859 + }, + { + "epoch": 4.59, + "learning_rate": 2.721957986523979e-07, + "logits/chosen": -1.459826111793518, + "logits/rejected": -1.4898276329040527, + "logps/chosen": -178.95028686523438, + "logps/rejected": -288.26678466796875, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.899082660675049, + "rewards/margins": 11.026535987854004, + "rewards/rejected": -17.925621032714844, + "step": 2860 + }, + { + "epoch": 4.59, + "learning_rate": 2.720967102655569e-07, + "logits/chosen": -1.5481373071670532, + "logits/rejected": -1.521531343460083, + "logps/chosen": -108.42938232421875, + "logps/rejected": -202.45663452148438, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.783082962036133, + "rewards/margins": 9.706971168518066, + "rewards/rejected": -14.490053176879883, + "step": 2861 + }, + { + "epoch": 4.59, + "learning_rate": 2.719976218787158e-07, + "logits/chosen": -1.535512924194336, + "logits/rejected": -1.5648725032806396, + "logps/chosen": -115.46162414550781, + "logps/rejected": -214.9276885986328, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.809177875518799, + "rewards/margins": 9.185504913330078, + "rewards/rejected": -13.994682312011719, + "step": 2862 + }, + { + "epoch": 4.6, + "learning_rate": 2.7189853349187475e-07, + "logits/chosen": -1.5587040185928345, + "logits/rejected": -1.577810525894165, + "logps/chosen": -116.78153991699219, + "logps/rejected": -230.13900756835938, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.005915641784668, + "rewards/margins": 10.214391708374023, + "rewards/rejected": -14.220308303833008, + "step": 2863 + }, + { + "epoch": 4.6, + "learning_rate": 2.7179944510503366e-07, + "logits/chosen": -1.769890308380127, + "logits/rejected": -1.6159653663635254, + "logps/chosen": -130.84564208984375, + "logps/rejected": -215.83831787109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.756534576416016, + "rewards/margins": 10.184228897094727, + "rewards/rejected": -14.940763473510742, + "step": 2864 + }, + { + "epoch": 4.6, + "learning_rate": 2.717003567181926e-07, + "logits/chosen": -1.5526883602142334, + "logits/rejected": -1.544137954711914, + "logps/chosen": -150.59523010253906, + "logps/rejected": -220.68824768066406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.624057292938232, + "rewards/margins": 8.075841903686523, + "rewards/rejected": -13.699898719787598, + "step": 2865 + }, + { + "epoch": 4.6, + "learning_rate": 2.716012683313516e-07, + "logits/chosen": -1.6180590391159058, + "logits/rejected": -1.5881212949752808, + "logps/chosen": -143.15939331054688, + "logps/rejected": -254.510498046875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.065999984741211, + "rewards/margins": 11.864163398742676, + "rewards/rejected": -16.930164337158203, + "step": 2866 + }, + { + "epoch": 4.6, + "learning_rate": 2.715021799445105e-07, + "logits/chosen": -1.5364744663238525, + "logits/rejected": -1.4912023544311523, + "logps/chosen": -173.7873077392578, + "logps/rejected": -269.84869384765625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.672622203826904, + "rewards/margins": 11.081594467163086, + "rewards/rejected": -18.75421714782715, + "step": 2867 + }, + { + "epoch": 4.6, + "learning_rate": 2.7140309155766944e-07, + "logits/chosen": -1.6385951042175293, + "logits/rejected": -1.6474331617355347, + "logps/chosen": -117.94232177734375, + "logps/rejected": -233.2362518310547, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4060778617858887, + "rewards/margins": 12.000121116638184, + "rewards/rejected": -15.406198501586914, + "step": 2868 + }, + { + "epoch": 4.61, + "learning_rate": 2.7130400317082835e-07, + "logits/chosen": -1.5424784421920776, + "logits/rejected": -1.4660046100616455, + "logps/chosen": -145.44784545898438, + "logps/rejected": -222.13406372070312, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.257870674133301, + "rewards/margins": 9.107823371887207, + "rewards/rejected": -14.365694046020508, + "step": 2869 + }, + { + "epoch": 4.61, + "learning_rate": 2.712049147839873e-07, + "logits/chosen": -1.6024067401885986, + "logits/rejected": -1.5794553756713867, + "logps/chosen": -142.2131805419922, + "logps/rejected": -242.68667602539062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.568624019622803, + "rewards/margins": 10.841888427734375, + "rewards/rejected": -16.410512924194336, + "step": 2870 + }, + { + "epoch": 4.61, + "learning_rate": 2.711058263971462e-07, + "logits/chosen": -1.594876766204834, + "logits/rejected": -1.6474511623382568, + "logps/chosen": -143.87496948242188, + "logps/rejected": -234.07223510742188, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.096208572387695, + "rewards/margins": 8.206635475158691, + "rewards/rejected": -15.302844047546387, + "step": 2871 + }, + { + "epoch": 4.61, + "learning_rate": 2.710067380103052e-07, + "logits/chosen": -1.6903713941574097, + "logits/rejected": -1.6525278091430664, + "logps/chosen": -161.66152954101562, + "logps/rejected": -231.22991943359375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.024187088012695, + "rewards/margins": 8.099905014038086, + "rewards/rejected": -14.124092102050781, + "step": 2872 + }, + { + "epoch": 4.61, + "learning_rate": 2.7090764962346414e-07, + "logits/chosen": -1.5043197870254517, + "logits/rejected": -1.4559921026229858, + "logps/chosen": -83.07469940185547, + "logps/rejected": -178.99398803710938, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0453598499298096, + "rewards/margins": 9.669290542602539, + "rewards/rejected": -10.714651107788086, + "step": 2873 + }, + { + "epoch": 4.61, + "learning_rate": 2.7080856123662304e-07, + "logits/chosen": -1.5894557237625122, + "logits/rejected": -1.5098575353622437, + "logps/chosen": -146.45852661132812, + "logps/rejected": -226.5121612548828, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6977667808532715, + "rewards/margins": 8.523452758789062, + "rewards/rejected": -14.221220016479492, + "step": 2874 + }, + { + "epoch": 4.61, + "learning_rate": 2.70709472849782e-07, + "logits/chosen": -1.5694338083267212, + "logits/rejected": -1.5304968357086182, + "logps/chosen": -144.4697723388672, + "logps/rejected": -228.8848419189453, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.247193813323975, + "rewards/margins": 9.654844284057617, + "rewards/rejected": -15.90203857421875, + "step": 2875 + }, + { + "epoch": 4.62, + "learning_rate": 2.706103844629409e-07, + "logits/chosen": -1.6360493898391724, + "logits/rejected": -1.6244531869888306, + "logps/chosen": -139.91123962402344, + "logps/rejected": -242.9866943359375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894350051879883, + "rewards/margins": 10.541926383972168, + "rewards/rejected": -16.436277389526367, + "step": 2876 + }, + { + "epoch": 4.62, + "learning_rate": 2.7051129607609987e-07, + "logits/chosen": -1.5513970851898193, + "logits/rejected": -1.5425575971603394, + "logps/chosen": -133.17799377441406, + "logps/rejected": -226.77452087402344, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3998589515686035, + "rewards/margins": 10.763910293579102, + "rewards/rejected": -14.163768768310547, + "step": 2877 + }, + { + "epoch": 4.62, + "learning_rate": 2.7041220768925883e-07, + "logits/chosen": -1.4514422416687012, + "logits/rejected": -1.461829423904419, + "logps/chosen": -142.50318908691406, + "logps/rejected": -257.77740478515625, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.008760929107666, + "rewards/margins": 10.264333724975586, + "rewards/rejected": -17.273094177246094, + "step": 2878 + }, + { + "epoch": 4.62, + "learning_rate": 2.7031311930241774e-07, + "logits/chosen": -1.6614940166473389, + "logits/rejected": -1.5810109376907349, + "logps/chosen": -142.23460388183594, + "logps/rejected": -185.27137756347656, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3465774059295654, + "rewards/margins": 8.810511589050293, + "rewards/rejected": -12.157089233398438, + "step": 2879 + }, + { + "epoch": 4.62, + "learning_rate": 2.702140309155767e-07, + "logits/chosen": -1.8086720705032349, + "logits/rejected": -1.8149009943008423, + "logps/chosen": -100.29417419433594, + "logps/rejected": -245.81729125976562, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.788410186767578, + "rewards/margins": 14.36094856262207, + "rewards/rejected": -17.14935874938965, + "step": 2880 + }, + { + "epoch": 4.62, + "learning_rate": 2.701149425287356e-07, + "logits/chosen": -1.5031431913375854, + "logits/rejected": -1.559556245803833, + "logps/chosen": -149.8198699951172, + "logps/rejected": -262.16864013671875, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.330438137054443, + "rewards/margins": 11.175887107849121, + "rewards/rejected": -16.506324768066406, + "step": 2881 + }, + { + "epoch": 4.63, + "learning_rate": 2.7001585414189456e-07, + "logits/chosen": -1.5649210214614868, + "logits/rejected": -1.5573844909667969, + "logps/chosen": -108.39559173583984, + "logps/rejected": -210.0255126953125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.747928619384766, + "rewards/margins": 9.190079689025879, + "rewards/rejected": -13.938007354736328, + "step": 2882 + }, + { + "epoch": 4.63, + "learning_rate": 2.699167657550535e-07, + "logits/chosen": -1.4670464992523193, + "logits/rejected": -1.5230388641357422, + "logps/chosen": -135.4256134033203, + "logps/rejected": -212.1370849609375, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.514664173126221, + "rewards/margins": 6.9630608558654785, + "rewards/rejected": -12.477725982666016, + "step": 2883 + }, + { + "epoch": 4.63, + "learning_rate": 2.6981767736821243e-07, + "logits/chosen": -1.6632089614868164, + "logits/rejected": -1.7093381881713867, + "logps/chosen": -139.55984497070312, + "logps/rejected": -249.17184448242188, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.729571342468262, + "rewards/margins": 10.636819839477539, + "rewards/rejected": -17.366392135620117, + "step": 2884 + }, + { + "epoch": 4.63, + "learning_rate": 2.697185889813714e-07, + "logits/chosen": -1.6220649480819702, + "logits/rejected": -1.6062796115875244, + "logps/chosen": -192.724853515625, + "logps/rejected": -291.74371337890625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.636268615722656, + "rewards/margins": 11.107978820800781, + "rewards/rejected": -20.744247436523438, + "step": 2885 + }, + { + "epoch": 4.63, + "learning_rate": 2.696195005945303e-07, + "logits/chosen": -1.5526129007339478, + "logits/rejected": -1.4173314571380615, + "logps/chosen": -175.43580627441406, + "logps/rejected": -253.78579711914062, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.003754615783691, + "rewards/margins": 9.556163787841797, + "rewards/rejected": -16.559917449951172, + "step": 2886 + }, + { + "epoch": 4.63, + "learning_rate": 2.6952041220768926e-07, + "logits/chosen": -1.5534238815307617, + "logits/rejected": -1.5591456890106201, + "logps/chosen": -149.20518493652344, + "logps/rejected": -273.7069091796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.393659591674805, + "rewards/margins": 12.216948509216309, + "rewards/rejected": -19.610607147216797, + "step": 2887 + }, + { + "epoch": 4.64, + "learning_rate": 2.694213238208482e-07, + "logits/chosen": -1.7168524265289307, + "logits/rejected": -1.7563542127609253, + "logps/chosen": -124.13566589355469, + "logps/rejected": -207.54307556152344, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.677646636962891, + "rewards/margins": 7.961723804473877, + "rewards/rejected": -12.639370918273926, + "step": 2888 + }, + { + "epoch": 4.64, + "learning_rate": 2.693222354340071e-07, + "logits/chosen": -1.6165814399719238, + "logits/rejected": -1.6294846534729004, + "logps/chosen": -148.03492736816406, + "logps/rejected": -214.65997314453125, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.220637321472168, + "rewards/margins": 7.983526229858398, + "rewards/rejected": -13.20416259765625, + "step": 2889 + }, + { + "epoch": 4.64, + "learning_rate": 2.6922314704716603e-07, + "logits/chosen": -1.6697485446929932, + "logits/rejected": -1.656581997871399, + "logps/chosen": -128.30319213867188, + "logps/rejected": -248.2773895263672, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.145974159240723, + "rewards/margins": 11.773134231567383, + "rewards/rejected": -16.919109344482422, + "step": 2890 + }, + { + "epoch": 4.64, + "learning_rate": 2.69124058660325e-07, + "logits/chosen": -1.5598927736282349, + "logits/rejected": -1.5349242687225342, + "logps/chosen": -156.8033447265625, + "logps/rejected": -224.45506286621094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.480134963989258, + "rewards/margins": 7.624922752380371, + "rewards/rejected": -14.105056762695312, + "step": 2891 + }, + { + "epoch": 4.64, + "learning_rate": 2.690249702734839e-07, + "logits/chosen": -1.573132038116455, + "logits/rejected": -1.5361766815185547, + "logps/chosen": -163.15284729003906, + "logps/rejected": -250.19525146484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.774960041046143, + "rewards/margins": 11.149232864379883, + "rewards/rejected": -16.924192428588867, + "step": 2892 + }, + { + "epoch": 4.64, + "learning_rate": 2.689258818866429e-07, + "logits/chosen": -1.6583707332611084, + "logits/rejected": -1.5696437358856201, + "logps/chosen": -129.41468811035156, + "logps/rejected": -206.2461395263672, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.004560708999634, + "rewards/margins": 10.203919410705566, + "rewards/rejected": -13.208480834960938, + "step": 2893 + }, + { + "epoch": 4.65, + "learning_rate": 2.688267934998018e-07, + "logits/chosen": -1.6241576671600342, + "logits/rejected": -1.6229395866394043, + "logps/chosen": -182.19021606445312, + "logps/rejected": -275.8990478515625, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.00061321258545, + "rewards/margins": 8.604902267456055, + "rewards/rejected": -18.605514526367188, + "step": 2894 + }, + { + "epoch": 4.65, + "learning_rate": 2.687277051129607e-07, + "logits/chosen": -1.6811754703521729, + "logits/rejected": -1.692021369934082, + "logps/chosen": -159.83892822265625, + "logps/rejected": -274.3501281738281, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.545848369598389, + "rewards/margins": 10.21853256225586, + "rewards/rejected": -16.764381408691406, + "step": 2895 + }, + { + "epoch": 4.65, + "learning_rate": 2.686286167261197e-07, + "logits/chosen": -1.6802849769592285, + "logits/rejected": -1.609713077545166, + "logps/chosen": -190.84652709960938, + "logps/rejected": -255.67349243164062, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.907395362854004, + "rewards/margins": 8.721273422241211, + "rewards/rejected": -15.628667831420898, + "step": 2896 + }, + { + "epoch": 4.65, + "learning_rate": 2.685295283392786e-07, + "logits/chosen": -1.6797481775283813, + "logits/rejected": -1.695957899093628, + "logps/chosen": -140.5084991455078, + "logps/rejected": -244.77548217773438, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.539989471435547, + "rewards/margins": 7.949967384338379, + "rewards/rejected": -14.489956855773926, + "step": 2897 + }, + { + "epoch": 4.65, + "learning_rate": 2.684304399524376e-07, + "logits/chosen": -1.602318286895752, + "logits/rejected": -1.5384528636932373, + "logps/chosen": -174.68133544921875, + "logps/rejected": -252.36404418945312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.19322395324707, + "rewards/margins": 10.038524627685547, + "rewards/rejected": -15.231748580932617, + "step": 2898 + }, + { + "epoch": 4.65, + "learning_rate": 2.683313515655965e-07, + "logits/chosen": -1.7264543771743774, + "logits/rejected": -1.7745482921600342, + "logps/chosen": -164.20077514648438, + "logps/rejected": -275.601318359375, + "loss": 0.0297, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.307490348815918, + "rewards/margins": 11.073887825012207, + "rewards/rejected": -15.381378173828125, + "step": 2899 + }, + { + "epoch": 4.65, + "learning_rate": 2.682322631787554e-07, + "logits/chosen": -1.4478037357330322, + "logits/rejected": -1.4360638856887817, + "logps/chosen": -134.5330352783203, + "logps/rejected": -243.22271728515625, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.858177185058594, + "rewards/margins": 10.760713577270508, + "rewards/rejected": -15.618890762329102, + "step": 2900 + }, + { + "epoch": 4.66, + "learning_rate": 2.681331747919144e-07, + "logits/chosen": -1.4796507358551025, + "logits/rejected": -1.5287935733795166, + "logps/chosen": -152.68247985839844, + "logps/rejected": -261.4158630371094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.208851337432861, + "rewards/margins": 11.347524642944336, + "rewards/rejected": -17.55637550354004, + "step": 2901 + }, + { + "epoch": 4.66, + "learning_rate": 2.680340864050733e-07, + "logits/chosen": -1.4666680097579956, + "logits/rejected": -1.5345869064331055, + "logps/chosen": -112.3341064453125, + "logps/rejected": -213.49148559570312, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.441270351409912, + "rewards/margins": 10.046318054199219, + "rewards/rejected": -12.487588882446289, + "step": 2902 + }, + { + "epoch": 4.66, + "learning_rate": 2.679349980182323e-07, + "logits/chosen": -1.5045793056488037, + "logits/rejected": -1.527762532234192, + "logps/chosen": -116.43006896972656, + "logps/rejected": -226.27032470703125, + "loss": 0.0111, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.89715576171875, + "rewards/margins": 10.26179313659668, + "rewards/rejected": -14.158949851989746, + "step": 2903 + }, + { + "epoch": 4.66, + "learning_rate": 2.678359096313912e-07, + "logits/chosen": -1.523429274559021, + "logits/rejected": -1.551692247390747, + "logps/chosen": -147.62579345703125, + "logps/rejected": -227.86009216308594, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.701258659362793, + "rewards/margins": 8.856009483337402, + "rewards/rejected": -14.557268142700195, + "step": 2904 + }, + { + "epoch": 4.66, + "learning_rate": 2.677368212445501e-07, + "logits/chosen": -1.5607854127883911, + "logits/rejected": -1.4940048456192017, + "logps/chosen": -132.0556640625, + "logps/rejected": -235.2646484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4093828201293945, + "rewards/margins": 10.782269477844238, + "rewards/rejected": -15.19165325164795, + "step": 2905 + }, + { + "epoch": 4.66, + "learning_rate": 2.6763773285770907e-07, + "logits/chosen": -1.5931551456451416, + "logits/rejected": -1.5749748945236206, + "logps/chosen": -150.5115966796875, + "logps/rejected": -248.45108032226562, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.414941787719727, + "rewards/margins": 11.708484649658203, + "rewards/rejected": -16.123428344726562, + "step": 2906 + }, + { + "epoch": 4.67, + "learning_rate": 2.67538644470868e-07, + "logits/chosen": -1.6313732862472534, + "logits/rejected": -1.565626859664917, + "logps/chosen": -129.89486694335938, + "logps/rejected": -234.93191528320312, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.043497085571289, + "rewards/margins": 10.83903694152832, + "rewards/rejected": -15.88253402709961, + "step": 2907 + }, + { + "epoch": 4.67, + "learning_rate": 2.67439556084027e-07, + "logits/chosen": -1.709572434425354, + "logits/rejected": -1.6631548404693604, + "logps/chosen": -108.17478942871094, + "logps/rejected": -178.11448669433594, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.400628089904785, + "rewards/margins": 8.231270790100098, + "rewards/rejected": -11.631897926330566, + "step": 2908 + }, + { + "epoch": 4.67, + "learning_rate": 2.673404676971859e-07, + "logits/chosen": -1.6117336750030518, + "logits/rejected": -1.6395901441574097, + "logps/chosen": -108.18408966064453, + "logps/rejected": -208.12350463867188, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2281837463378906, + "rewards/margins": 10.018380165100098, + "rewards/rejected": -13.246563911437988, + "step": 2909 + }, + { + "epoch": 4.67, + "learning_rate": 2.672413793103448e-07, + "logits/chosen": -1.6330645084381104, + "logits/rejected": -1.645434856414795, + "logps/chosen": -200.32723999023438, + "logps/rejected": -242.7725830078125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.177993774414062, + "rewards/margins": 6.051130294799805, + "rewards/rejected": -15.2291259765625, + "step": 2910 + }, + { + "epoch": 4.67, + "learning_rate": 2.6714229092350376e-07, + "logits/chosen": -1.6660782098770142, + "logits/rejected": -1.5866376161575317, + "logps/chosen": -153.9830780029297, + "logps/rejected": -225.43341064453125, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0449676513671875, + "rewards/margins": 9.111007690429688, + "rewards/rejected": -14.155975341796875, + "step": 2911 + }, + { + "epoch": 4.67, + "learning_rate": 2.6704320253666267e-07, + "logits/chosen": -1.6977410316467285, + "logits/rejected": -1.6392161846160889, + "logps/chosen": -200.16798400878906, + "logps/rejected": -280.4103698730469, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.172115325927734, + "rewards/margins": 11.860982894897461, + "rewards/rejected": -18.033098220825195, + "step": 2912 + }, + { + "epoch": 4.68, + "learning_rate": 2.6694411414982163e-07, + "logits/chosen": -1.4373106956481934, + "logits/rejected": -1.4683008193969727, + "logps/chosen": -106.68769836425781, + "logps/rejected": -212.5522003173828, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.390641689300537, + "rewards/margins": 9.783185005187988, + "rewards/rejected": -14.173827171325684, + "step": 2913 + }, + { + "epoch": 4.68, + "learning_rate": 2.668450257629806e-07, + "logits/chosen": -1.5258934497833252, + "logits/rejected": -1.587598443031311, + "logps/chosen": -112.08844757080078, + "logps/rejected": -196.68472290039062, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.026933431625366, + "rewards/margins": 7.787805557250977, + "rewards/rejected": -10.814739227294922, + "step": 2914 + }, + { + "epoch": 4.68, + "learning_rate": 2.667459373761395e-07, + "logits/chosen": -1.7174715995788574, + "logits/rejected": -1.608305811882019, + "logps/chosen": -143.92864990234375, + "logps/rejected": -265.87652587890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.048043251037598, + "rewards/margins": 12.241436004638672, + "rewards/rejected": -17.289478302001953, + "step": 2915 + }, + { + "epoch": 4.68, + "learning_rate": 2.6664684898929845e-07, + "logits/chosen": -1.4923722743988037, + "logits/rejected": -1.517388939857483, + "logps/chosen": -155.4786376953125, + "logps/rejected": -259.5655517578125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3448381423950195, + "rewards/margins": 10.912919998168945, + "rewards/rejected": -18.25775909423828, + "step": 2916 + }, + { + "epoch": 4.68, + "learning_rate": 2.6654776060245736e-07, + "logits/chosen": -1.6469858884811401, + "logits/rejected": -1.6828852891921997, + "logps/chosen": -125.11383056640625, + "logps/rejected": -289.50384521484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5388028621673584, + "rewards/margins": 15.57254409790039, + "rewards/rejected": -19.111347198486328, + "step": 2917 + }, + { + "epoch": 4.68, + "learning_rate": 2.664486722156163e-07, + "logits/chosen": -1.71649169921875, + "logits/rejected": -1.7645237445831299, + "logps/chosen": -90.726806640625, + "logps/rejected": -192.79583740234375, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.869795560836792, + "rewards/margins": 9.187906265258789, + "rewards/rejected": -12.05770206451416, + "step": 2918 + }, + { + "epoch": 4.69, + "learning_rate": 2.663495838287753e-07, + "logits/chosen": -1.629927635192871, + "logits/rejected": -1.5398938655853271, + "logps/chosen": -146.66632080078125, + "logps/rejected": -240.93734741210938, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.837042808532715, + "rewards/margins": 10.826396942138672, + "rewards/rejected": -16.663440704345703, + "step": 2919 + }, + { + "epoch": 4.69, + "learning_rate": 2.662504954419342e-07, + "logits/chosen": -1.5755157470703125, + "logits/rejected": -1.5550447702407837, + "logps/chosen": -149.80189514160156, + "logps/rejected": -263.40863037109375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.717446327209473, + "rewards/margins": 10.858149528503418, + "rewards/rejected": -15.57559585571289, + "step": 2920 + }, + { + "epoch": 4.69, + "learning_rate": 2.6615140705509315e-07, + "logits/chosen": -1.5270607471466064, + "logits/rejected": -1.576663851737976, + "logps/chosen": -134.2341766357422, + "logps/rejected": -244.19442749023438, + "loss": 0.0094, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020873546600342, + "rewards/margins": 9.274447441101074, + "rewards/rejected": -15.295321464538574, + "step": 2921 + }, + { + "epoch": 4.69, + "learning_rate": 2.6605231866825205e-07, + "logits/chosen": -1.6992183923721313, + "logits/rejected": -1.6777079105377197, + "logps/chosen": -109.28311157226562, + "logps/rejected": -192.36102294921875, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4933342933654785, + "rewards/margins": 7.887777328491211, + "rewards/rejected": -11.381111145019531, + "step": 2922 + }, + { + "epoch": 4.69, + "learning_rate": 2.65953230281411e-07, + "logits/chosen": -1.664992332458496, + "logits/rejected": -1.7715034484863281, + "logps/chosen": -166.52658081054688, + "logps/rejected": -287.16522216796875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.854942798614502, + "rewards/margins": 10.135269165039062, + "rewards/rejected": -16.99021339416504, + "step": 2923 + }, + { + "epoch": 4.69, + "learning_rate": 2.6585414189456997e-07, + "logits/chosen": -1.5022164583206177, + "logits/rejected": -1.5000247955322266, + "logps/chosen": -124.38109588623047, + "logps/rejected": -245.6664276123047, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.711190223693848, + "rewards/margins": 11.398712158203125, + "rewards/rejected": -16.10990333557129, + "step": 2924 + }, + { + "epoch": 4.7, + "learning_rate": 2.657550535077289e-07, + "logits/chosen": -1.6681286096572876, + "logits/rejected": -1.6769405603408813, + "logps/chosen": -165.5919647216797, + "logps/rejected": -297.0609130859375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.970783233642578, + "rewards/margins": 14.066560745239258, + "rewards/rejected": -19.03734588623047, + "step": 2925 + }, + { + "epoch": 4.7, + "learning_rate": 2.6565596512088784e-07, + "logits/chosen": -1.56667959690094, + "logits/rejected": -1.6233766078948975, + "logps/chosen": -142.14007568359375, + "logps/rejected": -277.90521240234375, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.964111804962158, + "rewards/margins": 12.068734169006348, + "rewards/rejected": -17.03284454345703, + "step": 2926 + }, + { + "epoch": 4.7, + "learning_rate": 2.6555687673404675e-07, + "logits/chosen": -1.6209644079208374, + "logits/rejected": -1.625811219215393, + "logps/chosen": -107.24798583984375, + "logps/rejected": -247.08096313476562, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.125896453857422, + "rewards/margins": 11.901782989501953, + "rewards/rejected": -16.027681350708008, + "step": 2927 + }, + { + "epoch": 4.7, + "learning_rate": 2.6545778834720565e-07, + "logits/chosen": -1.5354163646697998, + "logits/rejected": -1.5703887939453125, + "logps/chosen": -148.10104370117188, + "logps/rejected": -250.90032958984375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.14177131652832, + "rewards/margins": 9.716143608093262, + "rewards/rejected": -14.857914924621582, + "step": 2928 + }, + { + "epoch": 4.7, + "learning_rate": 2.6535869996036467e-07, + "logits/chosen": -1.4321351051330566, + "logits/rejected": -1.4843884706497192, + "logps/chosen": -109.00495910644531, + "logps/rejected": -232.0357208251953, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6745710372924805, + "rewards/margins": 11.093461990356445, + "rewards/rejected": -15.768033981323242, + "step": 2929 + }, + { + "epoch": 4.7, + "learning_rate": 2.6525961157352357e-07, + "logits/chosen": -1.562753677368164, + "logits/rejected": -1.6928423643112183, + "logps/chosen": -124.12287902832031, + "logps/rejected": -205.25027465820312, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.199832916259766, + "rewards/margins": 5.243329048156738, + "rewards/rejected": -11.443161964416504, + "step": 2930 + }, + { + "epoch": 4.7, + "learning_rate": 2.6516052318668253e-07, + "logits/chosen": -1.681947112083435, + "logits/rejected": -1.5355989933013916, + "logps/chosen": -159.88783264160156, + "logps/rejected": -242.93641662597656, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.009876251220703, + "rewards/margins": 10.836859703063965, + "rewards/rejected": -15.846735000610352, + "step": 2931 + }, + { + "epoch": 4.71, + "learning_rate": 2.6506143479984144e-07, + "logits/chosen": -1.6634950637817383, + "logits/rejected": -1.7535076141357422, + "logps/chosen": -117.1104507446289, + "logps/rejected": -251.49745178222656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6129586696624756, + "rewards/margins": 12.792705535888672, + "rewards/rejected": -15.405664443969727, + "step": 2932 + }, + { + "epoch": 4.71, + "learning_rate": 2.6496234641300035e-07, + "logits/chosen": -1.5390058755874634, + "logits/rejected": -1.5790014266967773, + "logps/chosen": -128.06729125976562, + "logps/rejected": -260.2852783203125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.911489486694336, + "rewards/margins": 11.941197395324707, + "rewards/rejected": -15.85268783569336, + "step": 2933 + }, + { + "epoch": 4.71, + "learning_rate": 2.648632580261593e-07, + "logits/chosen": -1.6585723161697388, + "logits/rejected": -1.7001410722732544, + "logps/chosen": -96.82504272460938, + "logps/rejected": -256.3796691894531, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8496479988098145, + "rewards/margins": 14.918493270874023, + "rewards/rejected": -17.76814079284668, + "step": 2934 + }, + { + "epoch": 4.71, + "learning_rate": 2.6476416963931827e-07, + "logits/chosen": -1.634920358657837, + "logits/rejected": -1.530321478843689, + "logps/chosen": -163.0467987060547, + "logps/rejected": -232.03384399414062, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0554680824279785, + "rewards/margins": 7.967438220977783, + "rewards/rejected": -14.022905349731445, + "step": 2935 + }, + { + "epoch": 4.71, + "learning_rate": 2.646650812524772e-07, + "logits/chosen": -1.5610246658325195, + "logits/rejected": -1.5466043949127197, + "logps/chosen": -140.00904846191406, + "logps/rejected": -248.87921142578125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.378490447998047, + "rewards/margins": 11.078807830810547, + "rewards/rejected": -16.457298278808594, + "step": 2936 + }, + { + "epoch": 4.71, + "learning_rate": 2.6456599286563613e-07, + "logits/chosen": -1.5065592527389526, + "logits/rejected": -1.5753366947174072, + "logps/chosen": -116.34737396240234, + "logps/rejected": -233.07809448242188, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8005051612854, + "rewards/margins": 10.016885757446289, + "rewards/rejected": -14.817390441894531, + "step": 2937 + }, + { + "epoch": 4.72, + "learning_rate": 2.6446690447879504e-07, + "logits/chosen": -1.5162394046783447, + "logits/rejected": -1.5769097805023193, + "logps/chosen": -131.25196838378906, + "logps/rejected": -261.5663757324219, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.10664176940918, + "rewards/margins": 10.131183624267578, + "rewards/rejected": -15.237825393676758, + "step": 2938 + }, + { + "epoch": 4.72, + "learning_rate": 2.64367816091954e-07, + "logits/chosen": -1.4345641136169434, + "logits/rejected": -1.496512532234192, + "logps/chosen": -78.43336486816406, + "logps/rejected": -202.10061645507812, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0575406551361084, + "rewards/margins": 11.824251174926758, + "rewards/rejected": -13.881792068481445, + "step": 2939 + }, + { + "epoch": 4.72, + "learning_rate": 2.6426872770511296e-07, + "logits/chosen": -1.6593408584594727, + "logits/rejected": -1.756556749343872, + "logps/chosen": -135.34695434570312, + "logps/rejected": -281.85272216796875, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.294543743133545, + "rewards/margins": 12.589704513549805, + "rewards/rejected": -18.884246826171875, + "step": 2940 + }, + { + "epoch": 4.72, + "learning_rate": 2.641696393182719e-07, + "logits/chosen": -1.724341630935669, + "logits/rejected": -1.7301373481750488, + "logps/chosen": -128.88929748535156, + "logps/rejected": -220.2422332763672, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.602444648742676, + "rewards/margins": 10.830321311950684, + "rewards/rejected": -14.43276596069336, + "step": 2941 + }, + { + "epoch": 4.72, + "learning_rate": 2.640705509314308e-07, + "logits/chosen": -1.5199843645095825, + "logits/rejected": -1.509350061416626, + "logps/chosen": -100.95677947998047, + "logps/rejected": -186.08816528320312, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.161935329437256, + "rewards/margins": 9.148808479309082, + "rewards/rejected": -12.31074333190918, + "step": 2942 + }, + { + "epoch": 4.72, + "learning_rate": 2.6397146254458973e-07, + "logits/chosen": -1.6404558420181274, + "logits/rejected": -1.640014410018921, + "logps/chosen": -168.42388916015625, + "logps/rejected": -275.60650634765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.199614524841309, + "rewards/margins": 10.895272254943848, + "rewards/rejected": -19.094886779785156, + "step": 2943 + }, + { + "epoch": 4.73, + "learning_rate": 2.638723741577487e-07, + "logits/chosen": -1.6340628862380981, + "logits/rejected": -1.7327852249145508, + "logps/chosen": -91.6138916015625, + "logps/rejected": -293.92767333984375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.09183669090271, + "rewards/margins": 16.92431640625, + "rewards/rejected": -20.01615333557129, + "step": 2944 + }, + { + "epoch": 4.73, + "learning_rate": 2.6377328577090765e-07, + "logits/chosen": -1.6631691455841064, + "logits/rejected": -1.741714358329773, + "logps/chosen": -150.75234985351562, + "logps/rejected": -267.5704345703125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6404314041137695, + "rewards/margins": 11.15585708618164, + "rewards/rejected": -15.796287536621094, + "step": 2945 + }, + { + "epoch": 4.73, + "learning_rate": 2.636741973840666e-07, + "logits/chosen": -1.481337547302246, + "logits/rejected": -1.5540251731872559, + "logps/chosen": -150.5426788330078, + "logps/rejected": -297.3229675292969, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.031569480895996, + "rewards/margins": 12.178277969360352, + "rewards/rejected": -20.209848403930664, + "step": 2946 + }, + { + "epoch": 4.73, + "learning_rate": 2.635751089972255e-07, + "logits/chosen": -1.6638230085372925, + "logits/rejected": -1.6340886354446411, + "logps/chosen": -166.31642150878906, + "logps/rejected": -262.2658996582031, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.866975784301758, + "rewards/margins": 10.196449279785156, + "rewards/rejected": -17.063425064086914, + "step": 2947 + }, + { + "epoch": 4.73, + "learning_rate": 2.634760206103844e-07, + "logits/chosen": -1.6198116540908813, + "logits/rejected": -1.7256009578704834, + "logps/chosen": -115.16364288330078, + "logps/rejected": -240.25595092773438, + "loss": 0.0696, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8089957237243652, + "rewards/margins": 11.751544952392578, + "rewards/rejected": -14.560540199279785, + "step": 2948 + }, + { + "epoch": 4.73, + "learning_rate": 2.633769322235434e-07, + "logits/chosen": -1.6263103485107422, + "logits/rejected": -1.6700454950332642, + "logps/chosen": -134.1862335205078, + "logps/rejected": -244.60009765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5912861824035645, + "rewards/margins": 11.207610130310059, + "rewards/rejected": -15.798895835876465, + "step": 2949 + }, + { + "epoch": 4.74, + "learning_rate": 2.632778438367023e-07, + "logits/chosen": -1.6107457876205444, + "logits/rejected": -1.5127252340316772, + "logps/chosen": -149.2644500732422, + "logps/rejected": -246.7620391845703, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.013493061065674, + "rewards/margins": 10.808847427368164, + "rewards/rejected": -15.822341918945312, + "step": 2950 + }, + { + "epoch": 4.74, + "learning_rate": 2.631787554498613e-07, + "logits/chosen": -1.6662752628326416, + "logits/rejected": -1.6638222932815552, + "logps/chosen": -140.41502380371094, + "logps/rejected": -264.69879150390625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3868589401245117, + "rewards/margins": 14.510576248168945, + "rewards/rejected": -16.897436141967773, + "step": 2951 + }, + { + "epoch": 4.74, + "learning_rate": 2.630796670630202e-07, + "logits/chosen": -1.6421881914138794, + "logits/rejected": -1.6611055135726929, + "logps/chosen": -165.55694580078125, + "logps/rejected": -294.27813720703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.808432579040527, + "rewards/margins": 12.045783042907715, + "rewards/rejected": -18.854215621948242, + "step": 2952 + }, + { + "epoch": 4.74, + "learning_rate": 2.629805786761791e-07, + "logits/chosen": -1.513806700706482, + "logits/rejected": -1.5880364179611206, + "logps/chosen": -131.44705200195312, + "logps/rejected": -268.2743225097656, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.388998985290527, + "rewards/margins": 11.766688346862793, + "rewards/rejected": -18.15568733215332, + "step": 2953 + }, + { + "epoch": 4.74, + "learning_rate": 2.628814902893381e-07, + "logits/chosen": -1.6824309825897217, + "logits/rejected": -1.6920301914215088, + "logps/chosen": -109.23954010009766, + "logps/rejected": -204.2901611328125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.154960870742798, + "rewards/margins": 10.380378723144531, + "rewards/rejected": -13.53533935546875, + "step": 2954 + }, + { + "epoch": 4.74, + "learning_rate": 2.62782401902497e-07, + "logits/chosen": -1.5776376724243164, + "logits/rejected": -1.6075630187988281, + "logps/chosen": -139.78448486328125, + "logps/rejected": -292.4078063964844, + "loss": 0.0804, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.433243751525879, + "rewards/margins": 15.481950759887695, + "rewards/rejected": -19.915193557739258, + "step": 2955 + }, + { + "epoch": 4.74, + "learning_rate": 2.62683313515656e-07, + "logits/chosen": -1.5523757934570312, + "logits/rejected": -1.5167943239212036, + "logps/chosen": -139.92617797851562, + "logps/rejected": -234.29151916503906, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011055946350098, + "rewards/margins": 9.119413375854492, + "rewards/rejected": -15.130468368530273, + "step": 2956 + }, + { + "epoch": 4.75, + "learning_rate": 2.625842251288149e-07, + "logits/chosen": -1.5878188610076904, + "logits/rejected": -1.6367127895355225, + "logps/chosen": -154.1046142578125, + "logps/rejected": -263.8914794921875, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.735781192779541, + "rewards/margins": 10.246749877929688, + "rewards/rejected": -17.98253059387207, + "step": 2957 + }, + { + "epoch": 4.75, + "learning_rate": 2.624851367419738e-07, + "logits/chosen": -1.533967137336731, + "logits/rejected": -1.580559253692627, + "logps/chosen": -135.9903564453125, + "logps/rejected": -261.99066162109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8494205474853516, + "rewards/margins": 12.925979614257812, + "rewards/rejected": -16.775400161743164, + "step": 2958 + }, + { + "epoch": 4.75, + "learning_rate": 2.6238604835513277e-07, + "logits/chosen": -1.5072414875030518, + "logits/rejected": -1.5882620811462402, + "logps/chosen": -115.62643432617188, + "logps/rejected": -230.82611083984375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8355181217193604, + "rewards/margins": 10.628847122192383, + "rewards/rejected": -14.464365005493164, + "step": 2959 + }, + { + "epoch": 4.75, + "learning_rate": 2.622869599682917e-07, + "logits/chosen": -1.8272441625595093, + "logits/rejected": -1.7548272609710693, + "logps/chosen": -122.36483764648438, + "logps/rejected": -245.96002197265625, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6312496662139893, + "rewards/margins": 13.059724807739258, + "rewards/rejected": -16.690975189208984, + "step": 2960 + }, + { + "epoch": 4.75, + "learning_rate": 2.6218787158145064e-07, + "logits/chosen": -1.64153254032135, + "logits/rejected": -1.686498999595642, + "logps/chosen": -115.31649017333984, + "logps/rejected": -223.97317504882812, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8216562271118164, + "rewards/margins": 11.755496978759766, + "rewards/rejected": -14.577152252197266, + "step": 2961 + }, + { + "epoch": 4.75, + "learning_rate": 2.620887831946096e-07, + "logits/chosen": -1.560295820236206, + "logits/rejected": -1.5732100009918213, + "logps/chosen": -137.75436401367188, + "logps/rejected": -205.2888641357422, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.02023458480835, + "rewards/margins": 6.271869659423828, + "rewards/rejected": -12.292104721069336, + "step": 2962 + }, + { + "epoch": 4.76, + "learning_rate": 2.619896948077685e-07, + "logits/chosen": -1.5232536792755127, + "logits/rejected": -1.5038464069366455, + "logps/chosen": -149.11981201171875, + "logps/rejected": -255.08160400390625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597732067108154, + "rewards/margins": 12.18422794342041, + "rewards/rejected": -17.781959533691406, + "step": 2963 + }, + { + "epoch": 4.76, + "learning_rate": 2.6189060642092746e-07, + "logits/chosen": -1.4766042232513428, + "logits/rejected": -1.4662771224975586, + "logps/chosen": -143.96310424804688, + "logps/rejected": -264.7278747558594, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.392165184020996, + "rewards/margins": 13.039006233215332, + "rewards/rejected": -18.431171417236328, + "step": 2964 + }, + { + "epoch": 4.76, + "learning_rate": 2.6179151803408637e-07, + "logits/chosen": -1.5099155902862549, + "logits/rejected": -1.4369797706604004, + "logps/chosen": -137.8896484375, + "logps/rejected": -193.15420532226562, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0526580810546875, + "rewards/margins": 7.801795959472656, + "rewards/rejected": -12.854454040527344, + "step": 2965 + }, + { + "epoch": 4.76, + "learning_rate": 2.6169242964724533e-07, + "logits/chosen": -1.532375454902649, + "logits/rejected": -1.5290477275848389, + "logps/chosen": -123.12327575683594, + "logps/rejected": -212.09608459472656, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7397208213806152, + "rewards/margins": 8.42689323425293, + "rewards/rejected": -12.166614532470703, + "step": 2966 + }, + { + "epoch": 4.76, + "learning_rate": 2.615933412604043e-07, + "logits/chosen": -1.6975924968719482, + "logits/rejected": -1.7444579601287842, + "logps/chosen": -125.30848693847656, + "logps/rejected": -260.2738037109375, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.951406478881836, + "rewards/margins": 11.768741607666016, + "rewards/rejected": -16.72014808654785, + "step": 2967 + }, + { + "epoch": 4.76, + "learning_rate": 2.614942528735632e-07, + "logits/chosen": -1.5837938785552979, + "logits/rejected": -1.6247901916503906, + "logps/chosen": -147.67364501953125, + "logps/rejected": -252.96434020996094, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.573224067687988, + "rewards/margins": 11.018562316894531, + "rewards/rejected": -16.591787338256836, + "step": 2968 + }, + { + "epoch": 4.77, + "learning_rate": 2.6139516448672216e-07, + "logits/chosen": -1.4666013717651367, + "logits/rejected": -1.503180742263794, + "logps/chosen": -158.0015106201172, + "logps/rejected": -280.8604736328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.912051200866699, + "rewards/margins": 11.598762512207031, + "rewards/rejected": -19.510814666748047, + "step": 2969 + }, + { + "epoch": 4.77, + "learning_rate": 2.6129607609988106e-07, + "logits/chosen": -1.5692720413208008, + "logits/rejected": -1.536184310913086, + "logps/chosen": -144.45391845703125, + "logps/rejected": -239.8967742919922, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.885148525238037, + "rewards/margins": 9.141096115112305, + "rewards/rejected": -15.0262451171875, + "step": 2970 + }, + { + "epoch": 4.77, + "learning_rate": 2.6119698771303997e-07, + "logits/chosen": -1.5661554336547852, + "logits/rejected": -1.4158103466033936, + "logps/chosen": -127.92787170410156, + "logps/rejected": -180.58016967773438, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7636351585388184, + "rewards/margins": 8.442623138427734, + "rewards/rejected": -12.206257820129395, + "step": 2971 + }, + { + "epoch": 4.77, + "learning_rate": 2.61097899326199e-07, + "logits/chosen": -1.5915119647979736, + "logits/rejected": -1.5848171710968018, + "logps/chosen": -102.35797119140625, + "logps/rejected": -237.24099731445312, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.321559190750122, + "rewards/margins": 13.124653816223145, + "rewards/rejected": -16.44621467590332, + "step": 2972 + }, + { + "epoch": 4.77, + "learning_rate": 2.609988109393579e-07, + "logits/chosen": -1.6077306270599365, + "logits/rejected": -1.5940797328948975, + "logps/chosen": -144.78851318359375, + "logps/rejected": -262.662841796875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.638704299926758, + "rewards/margins": 10.334847450256348, + "rewards/rejected": -15.973552703857422, + "step": 2973 + }, + { + "epoch": 4.77, + "learning_rate": 2.6089972255251685e-07, + "logits/chosen": -1.488654375076294, + "logits/rejected": -1.5726031064987183, + "logps/chosen": -149.5167694091797, + "logps/rejected": -266.619140625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.937635898590088, + "rewards/margins": 10.097967147827148, + "rewards/rejected": -17.035602569580078, + "step": 2974 + }, + { + "epoch": 4.78, + "learning_rate": 2.6080063416567576e-07, + "logits/chosen": -1.532511591911316, + "logits/rejected": -1.5266168117523193, + "logps/chosen": -149.94171142578125, + "logps/rejected": -258.4335021972656, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099495887756348, + "rewards/margins": 11.029693603515625, + "rewards/rejected": -17.12919044494629, + "step": 2975 + }, + { + "epoch": 4.78, + "learning_rate": 2.6070154577883466e-07, + "logits/chosen": -1.648294448852539, + "logits/rejected": -1.6365549564361572, + "logps/chosen": -149.1060333251953, + "logps/rejected": -266.92181396484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.991412162780762, + "rewards/margins": 11.124512672424316, + "rewards/rejected": -16.115924835205078, + "step": 2976 + }, + { + "epoch": 4.78, + "learning_rate": 2.606024573919937e-07, + "logits/chosen": -1.5770782232284546, + "logits/rejected": -1.6465632915496826, + "logps/chosen": -94.91999053955078, + "logps/rejected": -186.91290283203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1803793907165527, + "rewards/margins": 8.724727630615234, + "rewards/rejected": -10.905107498168945, + "step": 2977 + }, + { + "epoch": 4.78, + "learning_rate": 2.605033690051526e-07, + "logits/chosen": -1.5647876262664795, + "logits/rejected": -1.6584553718566895, + "logps/chosen": -150.70741271972656, + "logps/rejected": -279.16900634765625, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360967636108398, + "rewards/margins": 9.65416145324707, + "rewards/rejected": -16.01512908935547, + "step": 2978 + }, + { + "epoch": 4.78, + "learning_rate": 2.6040428061831154e-07, + "logits/chosen": -1.657009482383728, + "logits/rejected": -1.6330184936523438, + "logps/chosen": -137.5081787109375, + "logps/rejected": -258.7147521972656, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.467405319213867, + "rewards/margins": 11.928424835205078, + "rewards/rejected": -17.395828247070312, + "step": 2979 + }, + { + "epoch": 4.78, + "learning_rate": 2.6030519223147045e-07, + "logits/chosen": -1.4449386596679688, + "logits/rejected": -1.4652687311172485, + "logps/chosen": -135.45867919921875, + "logps/rejected": -243.10995483398438, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1092448234558105, + "rewards/margins": 9.865264892578125, + "rewards/rejected": -14.974510192871094, + "step": 2980 + }, + { + "epoch": 4.78, + "learning_rate": 2.6020610384462936e-07, + "logits/chosen": -1.5515271425247192, + "logits/rejected": -1.6211881637573242, + "logps/chosen": -161.5952606201172, + "logps/rejected": -304.0499267578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.367401599884033, + "rewards/margins": 12.173785209655762, + "rewards/rejected": -19.541187286376953, + "step": 2981 + }, + { + "epoch": 4.79, + "learning_rate": 2.6010701545778837e-07, + "logits/chosen": -1.6486141681671143, + "logits/rejected": -1.6421289443969727, + "logps/chosen": -112.67147827148438, + "logps/rejected": -235.7434539794922, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.381955146789551, + "rewards/margins": 12.286215782165527, + "rewards/rejected": -16.668170928955078, + "step": 2982 + }, + { + "epoch": 4.79, + "learning_rate": 2.600079270709473e-07, + "logits/chosen": -1.7242636680603027, + "logits/rejected": -1.617112636566162, + "logps/chosen": -180.59048461914062, + "logps/rejected": -255.0449676513672, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2308454513549805, + "rewards/margins": 11.062152862548828, + "rewards/rejected": -15.292998313903809, + "step": 2983 + }, + { + "epoch": 4.79, + "learning_rate": 2.5990883868410624e-07, + "logits/chosen": -1.515058994293213, + "logits/rejected": -1.5850697755813599, + "logps/chosen": -163.87640380859375, + "logps/rejected": -280.37091064453125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.012068271636963, + "rewards/margins": 10.353845596313477, + "rewards/rejected": -17.365915298461914, + "step": 2984 + }, + { + "epoch": 4.79, + "learning_rate": 2.5980975029726514e-07, + "logits/chosen": -1.6814583539962769, + "logits/rejected": -1.6520367860794067, + "logps/chosen": -142.67295837402344, + "logps/rejected": -211.09759521484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.44337272644043, + "rewards/margins": 8.3430814743042, + "rewards/rejected": -13.786453247070312, + "step": 2985 + }, + { + "epoch": 4.79, + "learning_rate": 2.5971066191042405e-07, + "logits/chosen": -1.4846441745758057, + "logits/rejected": -1.4906529188156128, + "logps/chosen": -133.39564514160156, + "logps/rejected": -235.34683227539062, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.352825164794922, + "rewards/margins": 9.272780418395996, + "rewards/rejected": -14.625605583190918, + "step": 2986 + }, + { + "epoch": 4.79, + "learning_rate": 2.5961157352358306e-07, + "logits/chosen": -1.691097378730774, + "logits/rejected": -1.6358839273452759, + "logps/chosen": -125.97467041015625, + "logps/rejected": -244.2049560546875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.223735809326172, + "rewards/margins": 11.826028823852539, + "rewards/rejected": -16.049762725830078, + "step": 2987 + }, + { + "epoch": 4.8, + "learning_rate": 2.5951248513674197e-07, + "logits/chosen": -1.5661427974700928, + "logits/rejected": -1.6407376527786255, + "logps/chosen": -92.1572036743164, + "logps/rejected": -202.39401245117188, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8801915645599365, + "rewards/margins": 8.6819429397583, + "rewards/rejected": -12.562134742736816, + "step": 2988 + }, + { + "epoch": 4.8, + "learning_rate": 2.5941339674990093e-07, + "logits/chosen": -1.5693097114562988, + "logits/rejected": -1.4564285278320312, + "logps/chosen": -166.66729736328125, + "logps/rejected": -248.87783813476562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3753461837768555, + "rewards/margins": 9.809085845947266, + "rewards/rejected": -17.184432983398438, + "step": 2989 + }, + { + "epoch": 4.8, + "learning_rate": 2.5931430836305984e-07, + "logits/chosen": -1.5660454034805298, + "logits/rejected": -1.6023552417755127, + "logps/chosen": -131.79315185546875, + "logps/rejected": -283.03082275390625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.028189659118652, + "rewards/margins": 13.342792510986328, + "rewards/rejected": -17.370981216430664, + "step": 2990 + }, + { + "epoch": 4.8, + "learning_rate": 2.5921521997621874e-07, + "logits/chosen": -1.718446969985962, + "logits/rejected": -1.7605522871017456, + "logps/chosen": -136.79595947265625, + "logps/rejected": -235.0123291015625, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.764071464538574, + "rewards/margins": 9.382318496704102, + "rewards/rejected": -15.146389961242676, + "step": 2991 + }, + { + "epoch": 4.8, + "learning_rate": 2.5911613158937775e-07, + "logits/chosen": -1.6609740257263184, + "logits/rejected": -1.645427942276001, + "logps/chosen": -163.3021697998047, + "logps/rejected": -208.85342407226562, + "loss": 0.0492, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.850152969360352, + "rewards/margins": 5.094761371612549, + "rewards/rejected": -12.944914817810059, + "step": 2992 + }, + { + "epoch": 4.8, + "learning_rate": 2.5901704320253666e-07, + "logits/chosen": -1.4926533699035645, + "logits/rejected": -1.5649020671844482, + "logps/chosen": -165.16346740722656, + "logps/rejected": -264.6737060546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.645424842834473, + "rewards/margins": 8.680688858032227, + "rewards/rejected": -16.326114654541016, + "step": 2993 + }, + { + "epoch": 4.81, + "learning_rate": 2.589179548156956e-07, + "logits/chosen": -1.5406733751296997, + "logits/rejected": -1.4640238285064697, + "logps/chosen": -179.0723876953125, + "logps/rejected": -264.95758056640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.898601531982422, + "rewards/margins": 9.88935661315918, + "rewards/rejected": -18.7879581451416, + "step": 2994 + }, + { + "epoch": 4.81, + "learning_rate": 2.5881886642885453e-07, + "logits/chosen": -1.5794203281402588, + "logits/rejected": -1.549370288848877, + "logps/chosen": -145.34156799316406, + "logps/rejected": -268.919677734375, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2300238609313965, + "rewards/margins": 13.306482315063477, + "rewards/rejected": -18.53650665283203, + "step": 2995 + }, + { + "epoch": 4.81, + "learning_rate": 2.5871977804201344e-07, + "logits/chosen": -1.5180045366287231, + "logits/rejected": -1.5880374908447266, + "logps/chosen": -124.54895782470703, + "logps/rejected": -241.7191925048828, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.959848403930664, + "rewards/margins": 11.811300277709961, + "rewards/rejected": -16.771148681640625, + "step": 2996 + }, + { + "epoch": 4.81, + "learning_rate": 2.586206896551724e-07, + "logits/chosen": -1.5037211179733276, + "logits/rejected": -1.5100979804992676, + "logps/chosen": -177.17922973632812, + "logps/rejected": -279.1216735839844, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.742660522460938, + "rewards/margins": 8.819931983947754, + "rewards/rejected": -18.562593460083008, + "step": 2997 + }, + { + "epoch": 4.81, + "learning_rate": 2.5852160126833135e-07, + "logits/chosen": -1.5121715068817139, + "logits/rejected": -1.4797178506851196, + "logps/chosen": -184.1899871826172, + "logps/rejected": -294.52752685546875, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.440917491912842, + "rewards/margins": 11.784178733825684, + "rewards/rejected": -18.225095748901367, + "step": 2998 + }, + { + "epoch": 4.81, + "learning_rate": 2.5842251288149026e-07, + "logits/chosen": -1.557069182395935, + "logits/rejected": -1.627794623374939, + "logps/chosen": -179.11473083496094, + "logps/rejected": -280.16876220703125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.144424438476562, + "rewards/margins": 9.128866195678711, + "rewards/rejected": -17.273290634155273, + "step": 2999 + }, + { + "epoch": 4.82, + "learning_rate": 2.583234244946492e-07, + "logits/chosen": -1.7662169933319092, + "logits/rejected": -1.7248973846435547, + "logps/chosen": -156.29934692382812, + "logps/rejected": -284.9810791015625, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.899590969085693, + "rewards/margins": 14.615553855895996, + "rewards/rejected": -20.51514434814453, + "step": 3000 + }, + { + "epoch": 4.82, + "learning_rate": 2.5822433610780813e-07, + "logits/chosen": -1.5423063039779663, + "logits/rejected": -1.5523961782455444, + "logps/chosen": -120.17752075195312, + "logps/rejected": -212.08482360839844, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.666022300720215, + "rewards/margins": 9.611339569091797, + "rewards/rejected": -14.277361869812012, + "step": 3001 + }, + { + "epoch": 4.82, + "learning_rate": 2.581252477209671e-07, + "logits/chosen": -1.759448528289795, + "logits/rejected": -1.5822601318359375, + "logps/chosen": -204.325439453125, + "logps/rejected": -261.5441589355469, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.24658203125, + "rewards/margins": 10.5662202835083, + "rewards/rejected": -17.812803268432617, + "step": 3002 + }, + { + "epoch": 4.82, + "learning_rate": 2.5802615933412605e-07, + "logits/chosen": -1.4918349981307983, + "logits/rejected": -1.5515334606170654, + "logps/chosen": -139.88677978515625, + "logps/rejected": -287.52178955078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.982763290405273, + "rewards/margins": 13.850737571716309, + "rewards/rejected": -19.8335018157959, + "step": 3003 + }, + { + "epoch": 4.82, + "learning_rate": 2.5792707094728495e-07, + "logits/chosen": -1.6262210607528687, + "logits/rejected": -1.5808684825897217, + "logps/chosen": -138.77532958984375, + "logps/rejected": -196.3770751953125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.33355712890625, + "rewards/margins": 6.742087364196777, + "rewards/rejected": -13.075643539428711, + "step": 3004 + }, + { + "epoch": 4.82, + "learning_rate": 2.578279825604439e-07, + "logits/chosen": -1.5124845504760742, + "logits/rejected": -1.5999141931533813, + "logps/chosen": -87.28600311279297, + "logps/rejected": -279.16680908203125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.015707492828369, + "rewards/margins": 17.88571548461914, + "rewards/rejected": -19.90142250061035, + "step": 3005 + }, + { + "epoch": 4.83, + "learning_rate": 2.577288941736028e-07, + "logits/chosen": -1.452108383178711, + "logits/rejected": -1.4366297721862793, + "logps/chosen": -118.63349914550781, + "logps/rejected": -219.49671936035156, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.10701322555542, + "rewards/margins": 10.924370765686035, + "rewards/rejected": -15.031383514404297, + "step": 3006 + }, + { + "epoch": 4.83, + "learning_rate": 2.576298057867618e-07, + "logits/chosen": -1.5253705978393555, + "logits/rejected": -1.5792113542556763, + "logps/chosen": -162.13113403320312, + "logps/rejected": -274.82452392578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.34441614151001, + "rewards/margins": 9.571182250976562, + "rewards/rejected": -16.915599822998047, + "step": 3007 + }, + { + "epoch": 4.83, + "learning_rate": 2.5753071739992074e-07, + "logits/chosen": -1.659886121749878, + "logits/rejected": -1.7633187770843506, + "logps/chosen": -169.87677001953125, + "logps/rejected": -271.030029296875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8277907371521, + "rewards/margins": 9.594008445739746, + "rewards/rejected": -16.421798706054688, + "step": 3008 + }, + { + "epoch": 4.83, + "learning_rate": 2.5743162901307965e-07, + "logits/chosen": -1.517781376838684, + "logits/rejected": -1.5624313354492188, + "logps/chosen": -144.9323272705078, + "logps/rejected": -307.3636169433594, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9975786209106445, + "rewards/margins": 14.590618133544922, + "rewards/rejected": -20.588197708129883, + "step": 3009 + }, + { + "epoch": 4.83, + "learning_rate": 2.573325406262386e-07, + "logits/chosen": -1.6240814924240112, + "logits/rejected": -1.6325764656066895, + "logps/chosen": -132.98953247070312, + "logps/rejected": -242.91131591796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.834216117858887, + "rewards/margins": 11.271158218383789, + "rewards/rejected": -16.105375289916992, + "step": 3010 + }, + { + "epoch": 4.83, + "learning_rate": 2.572334522393975e-07, + "logits/chosen": -1.4851702451705933, + "logits/rejected": -1.554124355316162, + "logps/chosen": -173.875732421875, + "logps/rejected": -307.4058837890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.630066871643066, + "rewards/margins": 11.896696090698242, + "rewards/rejected": -19.526763916015625, + "step": 3011 + }, + { + "epoch": 4.83, + "learning_rate": 2.571343638525565e-07, + "logits/chosen": -1.7178688049316406, + "logits/rejected": -1.5797784328460693, + "logps/chosen": -135.68399047851562, + "logps/rejected": -201.8084716796875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.931974411010742, + "rewards/margins": 9.028636932373047, + "rewards/rejected": -13.960610389709473, + "step": 3012 + }, + { + "epoch": 4.84, + "learning_rate": 2.570352754657154e-07, + "logits/chosen": -1.5271825790405273, + "logits/rejected": -1.5239899158477783, + "logps/chosen": -133.82327270507812, + "logps/rejected": -218.52940368652344, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.884002685546875, + "rewards/margins": 9.102221488952637, + "rewards/rejected": -13.986224174499512, + "step": 3013 + }, + { + "epoch": 4.84, + "learning_rate": 2.5693618707887434e-07, + "logits/chosen": -1.7121046781539917, + "logits/rejected": -1.6954052448272705, + "logps/chosen": -143.54342651367188, + "logps/rejected": -242.92987060546875, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.22504997253418, + "rewards/margins": 12.094991683959961, + "rewards/rejected": -16.320043563842773, + "step": 3014 + }, + { + "epoch": 4.84, + "learning_rate": 2.568370986920333e-07, + "logits/chosen": -1.4397704601287842, + "logits/rejected": -1.4956470727920532, + "logps/chosen": -97.32813262939453, + "logps/rejected": -176.48704528808594, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.939892292022705, + "rewards/margins": 7.171698570251465, + "rewards/rejected": -10.111590385437012, + "step": 3015 + }, + { + "epoch": 4.84, + "learning_rate": 2.567380103051922e-07, + "logits/chosen": -1.3748772144317627, + "logits/rejected": -1.395900011062622, + "logps/chosen": -109.3741226196289, + "logps/rejected": -209.11856079101562, + "loss": 0.0598, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.669735431671143, + "rewards/margins": 10.947754859924316, + "rewards/rejected": -15.617490768432617, + "step": 3016 + }, + { + "epoch": 4.84, + "learning_rate": 2.5663892191835117e-07, + "logits/chosen": -1.6579629182815552, + "logits/rejected": -1.578538417816162, + "logps/chosen": -122.98030090332031, + "logps/rejected": -205.38848876953125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.343793869018555, + "rewards/margins": 9.223575592041016, + "rewards/rejected": -13.56736946105957, + "step": 3017 + }, + { + "epoch": 4.84, + "learning_rate": 2.5653983353151007e-07, + "logits/chosen": -1.463202953338623, + "logits/rejected": -1.5099128484725952, + "logps/chosen": -138.76492309570312, + "logps/rejected": -275.33782958984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.227639198303223, + "rewards/margins": 13.36764907836914, + "rewards/rejected": -18.595287322998047, + "step": 3018 + }, + { + "epoch": 4.85, + "learning_rate": 2.5644074514466903e-07, + "logits/chosen": -1.5126879215240479, + "logits/rejected": -1.449783205986023, + "logps/chosen": -192.20120239257812, + "logps/rejected": -288.2878112792969, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.051506996154785, + "rewards/margins": 11.412683486938477, + "rewards/rejected": -20.464189529418945, + "step": 3019 + }, + { + "epoch": 4.85, + "learning_rate": 2.56341656757828e-07, + "logits/chosen": -1.810093641281128, + "logits/rejected": -1.6125891208648682, + "logps/chosen": -206.1083221435547, + "logps/rejected": -256.6120300292969, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.839030265808105, + "rewards/margins": 8.256784439086914, + "rewards/rejected": -17.095815658569336, + "step": 3020 + }, + { + "epoch": 4.85, + "learning_rate": 2.562425683709869e-07, + "logits/chosen": -1.6111949682235718, + "logits/rejected": -1.557801365852356, + "logps/chosen": -137.77828979492188, + "logps/rejected": -282.5586853027344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.428106784820557, + "rewards/margins": 14.518948554992676, + "rewards/rejected": -19.94705581665039, + "step": 3021 + }, + { + "epoch": 4.85, + "learning_rate": 2.5614347998414586e-07, + "logits/chosen": -1.4028807878494263, + "logits/rejected": -1.4797894954681396, + "logps/chosen": -160.17153930664062, + "logps/rejected": -229.8134765625, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1658453941345215, + "rewards/margins": 7.4520649909973145, + "rewards/rejected": -13.617910385131836, + "step": 3022 + }, + { + "epoch": 4.85, + "learning_rate": 2.5604439159730477e-07, + "logits/chosen": -1.4407119750976562, + "logits/rejected": -1.5702636241912842, + "logps/chosen": -109.42483520507812, + "logps/rejected": -258.8568115234375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.915988922119141, + "rewards/margins": 11.449865341186523, + "rewards/rejected": -16.365854263305664, + "step": 3023 + }, + { + "epoch": 4.85, + "learning_rate": 2.559453032104637e-07, + "logits/chosen": -1.6092844009399414, + "logits/rejected": -1.4708192348480225, + "logps/chosen": -143.40869140625, + "logps/rejected": -225.58863830566406, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.504721164703369, + "rewards/margins": 10.053157806396484, + "rewards/rejected": -15.557880401611328, + "step": 3024 + }, + { + "epoch": 4.86, + "learning_rate": 2.558462148236227e-07, + "logits/chosen": -1.6969234943389893, + "logits/rejected": -1.6762382984161377, + "logps/chosen": -184.93289184570312, + "logps/rejected": -270.7356872558594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.311636924743652, + "rewards/margins": 9.189972877502441, + "rewards/rejected": -16.501609802246094, + "step": 3025 + }, + { + "epoch": 4.86, + "learning_rate": 2.557471264367816e-07, + "logits/chosen": -1.5995945930480957, + "logits/rejected": -1.6474636793136597, + "logps/chosen": -154.37522888183594, + "logps/rejected": -291.111083984375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.891350746154785, + "rewards/margins": 11.481247901916504, + "rewards/rejected": -19.372596740722656, + "step": 3026 + }, + { + "epoch": 4.86, + "learning_rate": 2.5564803804994055e-07, + "logits/chosen": -1.6266865730285645, + "logits/rejected": -1.6713911294937134, + "logps/chosen": -110.50544738769531, + "logps/rejected": -230.7946319580078, + "loss": 0.0707, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.195006370544434, + "rewards/margins": 9.61620044708252, + "rewards/rejected": -13.811206817626953, + "step": 3027 + }, + { + "epoch": 4.86, + "learning_rate": 2.5554894966309946e-07, + "logits/chosen": -1.6201668977737427, + "logits/rejected": -1.685059905052185, + "logps/chosen": -154.9766845703125, + "logps/rejected": -238.64572143554688, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.651875495910645, + "rewards/margins": 5.77121639251709, + "rewards/rejected": -14.423091888427734, + "step": 3028 + }, + { + "epoch": 4.86, + "learning_rate": 2.554498612762584e-07, + "logits/chosen": -1.4411693811416626, + "logits/rejected": -1.4504756927490234, + "logps/chosen": -140.91592407226562, + "logps/rejected": -243.12066650390625, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.666123390197754, + "rewards/margins": 11.04104995727539, + "rewards/rejected": -15.707172393798828, + "step": 3029 + }, + { + "epoch": 4.86, + "learning_rate": 2.553507728894174e-07, + "logits/chosen": -1.5778255462646484, + "logits/rejected": -1.510769248008728, + "logps/chosen": -174.35948181152344, + "logps/rejected": -249.70950317382812, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.522197246551514, + "rewards/margins": 9.520999908447266, + "rewards/rejected": -17.043197631835938, + "step": 3030 + }, + { + "epoch": 4.87, + "learning_rate": 2.552516845025763e-07, + "logits/chosen": -1.5168817043304443, + "logits/rejected": -1.5514177083969116, + "logps/chosen": -133.1212615966797, + "logps/rejected": -287.5615234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5749921798706055, + "rewards/margins": 11.86282730102539, + "rewards/rejected": -18.437820434570312, + "step": 3031 + }, + { + "epoch": 4.87, + "learning_rate": 2.551525961157352e-07, + "logits/chosen": -1.6411187648773193, + "logits/rejected": -1.613901138305664, + "logps/chosen": -192.70660400390625, + "logps/rejected": -277.80035400390625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.916500091552734, + "rewards/margins": 9.471851348876953, + "rewards/rejected": -18.388355255126953, + "step": 3032 + }, + { + "epoch": 4.87, + "learning_rate": 2.5505350772889415e-07, + "logits/chosen": -1.4554873704910278, + "logits/rejected": -1.569277048110962, + "logps/chosen": -140.38502502441406, + "logps/rejected": -277.5528259277344, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.462531089782715, + "rewards/margins": 11.665081977844238, + "rewards/rejected": -17.127614974975586, + "step": 3033 + }, + { + "epoch": 4.87, + "learning_rate": 2.5495441934205306e-07, + "logits/chosen": -1.4279760122299194, + "logits/rejected": -1.499062418937683, + "logps/chosen": -112.95838928222656, + "logps/rejected": -252.52569580078125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1023759841918945, + "rewards/margins": 13.043456077575684, + "rewards/rejected": -18.145832061767578, + "step": 3034 + }, + { + "epoch": 4.87, + "learning_rate": 2.5485533095521207e-07, + "logits/chosen": -1.3998849391937256, + "logits/rejected": -1.474881887435913, + "logps/chosen": -158.05711364746094, + "logps/rejected": -280.638671875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19196891784668, + "rewards/margins": 10.856045722961426, + "rewards/rejected": -19.048015594482422, + "step": 3035 + }, + { + "epoch": 4.87, + "learning_rate": 2.54756242568371e-07, + "logits/chosen": -1.5152959823608398, + "logits/rejected": -1.4274942874908447, + "logps/chosen": -159.01747131347656, + "logps/rejected": -252.18788146972656, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.751352310180664, + "rewards/margins": 8.909814834594727, + "rewards/rejected": -15.66116714477539, + "step": 3036 + }, + { + "epoch": 4.87, + "learning_rate": 2.546571541815299e-07, + "logits/chosen": -1.5985699892044067, + "logits/rejected": -1.4947700500488281, + "logps/chosen": -137.47235107421875, + "logps/rejected": -236.36874389648438, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.44924783706665, + "rewards/margins": 10.36996841430664, + "rewards/rejected": -16.819215774536133, + "step": 3037 + }, + { + "epoch": 4.88, + "learning_rate": 2.5455806579468885e-07, + "logits/chosen": -1.6482042074203491, + "logits/rejected": -1.6252140998840332, + "logps/chosen": -174.86434936523438, + "logps/rejected": -286.18310546875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.444029808044434, + "rewards/margins": 12.65528678894043, + "rewards/rejected": -19.09931755065918, + "step": 3038 + }, + { + "epoch": 4.88, + "learning_rate": 2.5445897740784775e-07, + "logits/chosen": -1.4293930530548096, + "logits/rejected": -1.5777881145477295, + "logps/chosen": -109.97760772705078, + "logps/rejected": -268.6188049316406, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.18352746963501, + "rewards/margins": 11.854950904846191, + "rewards/rejected": -17.03847885131836, + "step": 3039 + }, + { + "epoch": 4.88, + "learning_rate": 2.5435988902100676e-07, + "logits/chosen": -1.6549938917160034, + "logits/rejected": -1.610329031944275, + "logps/chosen": -168.6908416748047, + "logps/rejected": -251.5948944091797, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.269668102264404, + "rewards/margins": 10.21218204498291, + "rewards/rejected": -16.481849670410156, + "step": 3040 + }, + { + "epoch": 4.88, + "learning_rate": 2.5426080063416567e-07, + "logits/chosen": -1.6161553859710693, + "logits/rejected": -1.6414794921875, + "logps/chosen": -142.24404907226562, + "logps/rejected": -251.42674255371094, + "loss": 0.011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.786394119262695, + "rewards/margins": 11.211397171020508, + "rewards/rejected": -17.997791290283203, + "step": 3041 + }, + { + "epoch": 4.88, + "learning_rate": 2.541617122473246e-07, + "logits/chosen": -1.543824553489685, + "logits/rejected": -1.6203733682632446, + "logps/chosen": -126.67439270019531, + "logps/rejected": -256.6875305175781, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.456245422363281, + "rewards/margins": 11.644412994384766, + "rewards/rejected": -17.100658416748047, + "step": 3042 + }, + { + "epoch": 4.88, + "learning_rate": 2.5406262386048354e-07, + "logits/chosen": -1.6935137510299683, + "logits/rejected": -1.623835563659668, + "logps/chosen": -130.92575073242188, + "logps/rejected": -236.81829833984375, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.713956356048584, + "rewards/margins": 10.850776672363281, + "rewards/rejected": -16.564733505249023, + "step": 3043 + }, + { + "epoch": 4.89, + "learning_rate": 2.5396353547364245e-07, + "logits/chosen": -1.5924901962280273, + "logits/rejected": -1.7246167659759521, + "logps/chosen": -99.78510284423828, + "logps/rejected": -239.112060546875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.228536605834961, + "rewards/margins": 11.721044540405273, + "rewards/rejected": -14.949579238891602, + "step": 3044 + }, + { + "epoch": 4.89, + "learning_rate": 2.5386444708680146e-07, + "logits/chosen": -1.4147179126739502, + "logits/rejected": -1.442866325378418, + "logps/chosen": -130.70849609375, + "logps/rejected": -265.4307861328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.769624710083008, + "rewards/margins": 13.195096969604492, + "rewards/rejected": -18.9647216796875, + "step": 3045 + }, + { + "epoch": 4.89, + "learning_rate": 2.5376535869996036e-07, + "logits/chosen": -1.7117120027542114, + "logits/rejected": -1.5665923357009888, + "logps/chosen": -181.68759155273438, + "logps/rejected": -293.5534362792969, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.962737083435059, + "rewards/margins": 13.143957138061523, + "rewards/rejected": -20.106693267822266, + "step": 3046 + }, + { + "epoch": 4.89, + "learning_rate": 2.5366627031311927e-07, + "logits/chosen": -1.6603600978851318, + "logits/rejected": -1.5208940505981445, + "logps/chosen": -176.27255249023438, + "logps/rejected": -254.94200134277344, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.637332439422607, + "rewards/margins": 10.188783645629883, + "rewards/rejected": -17.826114654541016, + "step": 3047 + }, + { + "epoch": 4.89, + "learning_rate": 2.5356718192627823e-07, + "logits/chosen": -1.7850474119186401, + "logits/rejected": -1.6328308582305908, + "logps/chosen": -148.95382690429688, + "logps/rejected": -254.66424560546875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.718789100646973, + "rewards/margins": 11.145008087158203, + "rewards/rejected": -17.863798141479492, + "step": 3048 + }, + { + "epoch": 4.89, + "learning_rate": 2.5346809353943714e-07, + "logits/chosen": -1.6109039783477783, + "logits/rejected": -1.5951831340789795, + "logps/chosen": -166.17770385742188, + "logps/rejected": -286.60345458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.492368698120117, + "rewards/margins": 9.838651657104492, + "rewards/rejected": -18.33102035522461, + "step": 3049 + }, + { + "epoch": 4.9, + "learning_rate": 2.5336900515259615e-07, + "logits/chosen": -1.6459803581237793, + "logits/rejected": -1.6789069175720215, + "logps/chosen": -148.41807556152344, + "logps/rejected": -291.2695007324219, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.048157691955566, + "rewards/margins": 13.181796073913574, + "rewards/rejected": -18.22995376586914, + "step": 3050 + }, + { + "epoch": 4.9, + "learning_rate": 2.5326991676575506e-07, + "logits/chosen": -1.4894603490829468, + "logits/rejected": -1.5766639709472656, + "logps/chosen": -112.33875274658203, + "logps/rejected": -255.47564697265625, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.537836074829102, + "rewards/margins": 12.534717559814453, + "rewards/rejected": -17.072551727294922, + "step": 3051 + }, + { + "epoch": 4.9, + "learning_rate": 2.5317082837891396e-07, + "logits/chosen": -1.3636729717254639, + "logits/rejected": -1.4332247972488403, + "logps/chosen": -123.3023910522461, + "logps/rejected": -199.00674438476562, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.502621650695801, + "rewards/margins": 6.467041969299316, + "rewards/rejected": -12.969663619995117, + "step": 3052 + }, + { + "epoch": 4.9, + "learning_rate": 2.530717399920729e-07, + "logits/chosen": -1.7122979164123535, + "logits/rejected": -1.6508651971817017, + "logps/chosen": -207.53216552734375, + "logps/rejected": -333.4276123046875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.89658260345459, + "rewards/margins": 14.763897895812988, + "rewards/rejected": -24.66048240661621, + "step": 3053 + }, + { + "epoch": 4.9, + "learning_rate": 2.5297265160523183e-07, + "logits/chosen": -1.7131376266479492, + "logits/rejected": -1.6878223419189453, + "logps/chosen": -187.15902709960938, + "logps/rejected": -286.279296875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.35421371459961, + "rewards/margins": 10.467395782470703, + "rewards/rejected": -18.821609497070312, + "step": 3054 + }, + { + "epoch": 4.9, + "learning_rate": 2.5287356321839084e-07, + "logits/chosen": -1.4956250190734863, + "logits/rejected": -1.5285588502883911, + "logps/chosen": -134.54701232910156, + "logps/rejected": -308.81707763671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.661291122436523, + "rewards/margins": 16.509952545166016, + "rewards/rejected": -22.171241760253906, + "step": 3055 + }, + { + "epoch": 4.91, + "learning_rate": 2.5277447483154975e-07, + "logits/chosen": -1.5224061012268066, + "logits/rejected": -1.4577035903930664, + "logps/chosen": -159.84732055664062, + "logps/rejected": -223.0663299560547, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8549113273620605, + "rewards/margins": 7.827574729919434, + "rewards/rejected": -12.682486534118652, + "step": 3056 + }, + { + "epoch": 4.91, + "learning_rate": 2.5267538644470866e-07, + "logits/chosen": -1.6337649822235107, + "logits/rejected": -1.6779203414916992, + "logps/chosen": -142.1373291015625, + "logps/rejected": -237.81472778320312, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.467741966247559, + "rewards/margins": 9.072274208068848, + "rewards/rejected": -14.540017127990723, + "step": 3057 + }, + { + "epoch": 4.91, + "learning_rate": 2.525762980578676e-07, + "logits/chosen": -1.5525791645050049, + "logits/rejected": -1.5684441328048706, + "logps/chosen": -143.92800903320312, + "logps/rejected": -277.7154541015625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.978017330169678, + "rewards/margins": 13.036785125732422, + "rewards/rejected": -20.014802932739258, + "step": 3058 + }, + { + "epoch": 4.91, + "learning_rate": 2.524772096710265e-07, + "logits/chosen": -1.4951804876327515, + "logits/rejected": -1.4670066833496094, + "logps/chosen": -157.13235473632812, + "logps/rejected": -253.3062744140625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.798226833343506, + "rewards/margins": 9.68120288848877, + "rewards/rejected": -17.47943115234375, + "step": 3059 + }, + { + "epoch": 4.91, + "learning_rate": 2.523781212841855e-07, + "logits/chosen": -1.5410572290420532, + "logits/rejected": -1.6684672832489014, + "logps/chosen": -158.89663696289062, + "logps/rejected": -267.79168701171875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.571890830993652, + "rewards/margins": 9.328311920166016, + "rewards/rejected": -17.900203704833984, + "step": 3060 + }, + { + "epoch": 4.91, + "learning_rate": 2.5227903289734444e-07, + "logits/chosen": -1.4299260377883911, + "logits/rejected": -1.4791194200515747, + "logps/chosen": -153.26637268066406, + "logps/rejected": -262.45172119140625, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.322373867034912, + "rewards/margins": 10.137031555175781, + "rewards/rejected": -17.45940589904785, + "step": 3061 + }, + { + "epoch": 4.91, + "learning_rate": 2.5217994451050335e-07, + "logits/chosen": -1.5761810541152954, + "logits/rejected": -1.6085141897201538, + "logps/chosen": -152.52017211914062, + "logps/rejected": -250.35157775878906, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0363569259643555, + "rewards/margins": 10.279949188232422, + "rewards/rejected": -15.316306114196777, + "step": 3062 + }, + { + "epoch": 4.92, + "learning_rate": 2.520808561236623e-07, + "logits/chosen": -1.7657877206802368, + "logits/rejected": -1.623388648033142, + "logps/chosen": -146.90713500976562, + "logps/rejected": -253.6886444091797, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.065025329589844, + "rewards/margins": 13.297927856445312, + "rewards/rejected": -17.362953186035156, + "step": 3063 + }, + { + "epoch": 4.92, + "learning_rate": 2.519817677368212e-07, + "logits/chosen": -1.69261634349823, + "logits/rejected": -1.6406782865524292, + "logps/chosen": -117.64510345458984, + "logps/rejected": -219.14857482910156, + "loss": 0.0597, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.527890205383301, + "rewards/margins": 9.896730422973633, + "rewards/rejected": -14.42462158203125, + "step": 3064 + }, + { + "epoch": 4.92, + "learning_rate": 2.518826793499802e-07, + "logits/chosen": -1.7124931812286377, + "logits/rejected": -1.6435210704803467, + "logps/chosen": -129.6770782470703, + "logps/rejected": -241.09988403320312, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.890984535217285, + "rewards/margins": 11.25416374206543, + "rewards/rejected": -16.1451473236084, + "step": 3065 + }, + { + "epoch": 4.92, + "learning_rate": 2.5178359096313914e-07, + "logits/chosen": -1.517417311668396, + "logits/rejected": -1.5557595491409302, + "logps/chosen": -142.91761779785156, + "logps/rejected": -242.11032104492188, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.149068832397461, + "rewards/margins": 9.622203826904297, + "rewards/rejected": -16.771272659301758, + "step": 3066 + }, + { + "epoch": 4.92, + "learning_rate": 2.5168450257629804e-07, + "logits/chosen": -1.6447687149047852, + "logits/rejected": -1.5586367845535278, + "logps/chosen": -136.85098266601562, + "logps/rejected": -191.77786254882812, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.014993667602539, + "rewards/margins": 7.356808662414551, + "rewards/rejected": -11.37180233001709, + "step": 3067 + }, + { + "epoch": 4.92, + "learning_rate": 2.51585414189457e-07, + "logits/chosen": -1.4543451070785522, + "logits/rejected": -1.4207231998443604, + "logps/chosen": -144.015869140625, + "logps/rejected": -258.7178039550781, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.880503177642822, + "rewards/margins": 11.478341102600098, + "rewards/rejected": -16.358844757080078, + "step": 3068 + }, + { + "epoch": 4.93, + "learning_rate": 2.514863258026159e-07, + "logits/chosen": -1.8979496955871582, + "logits/rejected": -1.7937335968017578, + "logps/chosen": -98.01948547363281, + "logps/rejected": -261.7539367675781, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.608144760131836, + "rewards/margins": 17.445890426635742, + "rewards/rejected": -19.054035186767578, + "step": 3069 + }, + { + "epoch": 4.93, + "learning_rate": 2.513872374157748e-07, + "logits/chosen": -1.692335844039917, + "logits/rejected": -1.6906415224075317, + "logps/chosen": -140.7911834716797, + "logps/rejected": -231.30050659179688, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.028026103973389, + "rewards/margins": 11.04559326171875, + "rewards/rejected": -16.073619842529297, + "step": 3070 + }, + { + "epoch": 4.93, + "learning_rate": 2.5128814902893383e-07, + "logits/chosen": -1.5596023797988892, + "logits/rejected": -1.5552492141723633, + "logps/chosen": -156.7205810546875, + "logps/rejected": -276.81292724609375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1753339767456055, + "rewards/margins": 11.322607040405273, + "rewards/rejected": -17.497940063476562, + "step": 3071 + }, + { + "epoch": 4.93, + "learning_rate": 2.5118906064209274e-07, + "logits/chosen": -1.6315504312515259, + "logits/rejected": -1.6874337196350098, + "logps/chosen": -156.35267639160156, + "logps/rejected": -287.9909973144531, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.325174331665039, + "rewards/margins": 12.316265106201172, + "rewards/rejected": -19.641437530517578, + "step": 3072 + }, + { + "epoch": 4.93, + "learning_rate": 2.510899722552517e-07, + "logits/chosen": -1.615752100944519, + "logits/rejected": -1.6367475986480713, + "logps/chosen": -117.86549377441406, + "logps/rejected": -248.46163940429688, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.403353214263916, + "rewards/margins": 12.886713027954102, + "rewards/rejected": -15.29006576538086, + "step": 3073 + }, + { + "epoch": 4.93, + "learning_rate": 2.509908838684106e-07, + "logits/chosen": -1.4133230447769165, + "logits/rejected": -1.5059024095535278, + "logps/chosen": -133.52183532714844, + "logps/rejected": -250.16741943359375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.208264350891113, + "rewards/margins": 11.906686782836914, + "rewards/rejected": -17.114952087402344, + "step": 3074 + }, + { + "epoch": 4.94, + "learning_rate": 2.508917954815695e-07, + "logits/chosen": -1.4344881772994995, + "logits/rejected": -1.4670958518981934, + "logps/chosen": -136.4660186767578, + "logps/rejected": -248.94219970703125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.163681507110596, + "rewards/margins": 10.29470443725586, + "rewards/rejected": -15.458386421203613, + "step": 3075 + }, + { + "epoch": 4.94, + "learning_rate": 2.5079270709472847e-07, + "logits/chosen": -1.5844206809997559, + "logits/rejected": -1.6508249044418335, + "logps/chosen": -107.33158874511719, + "logps/rejected": -238.4326171875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.270458221435547, + "rewards/margins": 12.17056655883789, + "rewards/rejected": -15.441024780273438, + "step": 3076 + }, + { + "epoch": 4.94, + "learning_rate": 2.5069361870788743e-07, + "logits/chosen": -1.5883867740631104, + "logits/rejected": -1.587373971939087, + "logps/chosen": -132.30503845214844, + "logps/rejected": -240.78802490234375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3818864822387695, + "rewards/margins": 9.410202980041504, + "rewards/rejected": -13.792089462280273, + "step": 3077 + }, + { + "epoch": 4.94, + "learning_rate": 2.505945303210464e-07, + "logits/chosen": -1.528652310371399, + "logits/rejected": -1.479478120803833, + "logps/chosen": -172.88731384277344, + "logps/rejected": -252.09652709960938, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.779089450836182, + "rewards/margins": 10.40493392944336, + "rewards/rejected": -17.184022903442383, + "step": 3078 + }, + { + "epoch": 4.94, + "learning_rate": 2.504954419342053e-07, + "logits/chosen": -1.53297758102417, + "logits/rejected": -1.6328545808792114, + "logps/chosen": -175.34901428222656, + "logps/rejected": -298.1333312988281, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.061513900756836, + "rewards/margins": 11.571731567382812, + "rewards/rejected": -18.63324546813965, + "step": 3079 + }, + { + "epoch": 4.94, + "learning_rate": 2.503963535473642e-07, + "logits/chosen": -1.4776356220245361, + "logits/rejected": -1.5513346195220947, + "logps/chosen": -140.17054748535156, + "logps/rejected": -319.95904541015625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.457078456878662, + "rewards/margins": 16.09861946105957, + "rewards/rejected": -22.55569839477539, + "step": 3080 + }, + { + "epoch": 4.95, + "learning_rate": 2.5029726516052316e-07, + "logits/chosen": -1.6960495710372925, + "logits/rejected": -1.7201521396636963, + "logps/chosen": -128.60205078125, + "logps/rejected": -244.00936889648438, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.88749361038208, + "rewards/margins": 10.736011505126953, + "rewards/rejected": -14.623505592346191, + "step": 3081 + }, + { + "epoch": 4.95, + "learning_rate": 2.501981767736821e-07, + "logits/chosen": -1.6174514293670654, + "logits/rejected": -1.5619838237762451, + "logps/chosen": -160.61322021484375, + "logps/rejected": -235.08087158203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.998156547546387, + "rewards/margins": 9.564531326293945, + "rewards/rejected": -15.562688827514648, + "step": 3082 + }, + { + "epoch": 4.95, + "learning_rate": 2.500990883868411e-07, + "logits/chosen": -1.606747031211853, + "logits/rejected": -1.6287868022918701, + "logps/chosen": -118.26628112792969, + "logps/rejected": -237.82022094726562, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9723987579345703, + "rewards/margins": 12.131949424743652, + "rewards/rejected": -16.104347229003906, + "step": 3083 + }, + { + "epoch": 4.95, + "learning_rate": 2.5e-07, + "logits/chosen": -1.5745983123779297, + "logits/rejected": -1.6349263191223145, + "logps/chosen": -145.8449249267578, + "logps/rejected": -267.9464111328125, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.040528774261475, + "rewards/margins": 11.05945110321045, + "rewards/rejected": -16.099979400634766, + "step": 3084 + }, + { + "epoch": 4.95, + "learning_rate": 2.4990091161315895e-07, + "logits/chosen": -1.521039366722107, + "logits/rejected": -1.5049598217010498, + "logps/chosen": -160.49920654296875, + "logps/rejected": -269.1514587402344, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.744072914123535, + "rewards/margins": 10.94176197052002, + "rewards/rejected": -18.685834884643555, + "step": 3085 + }, + { + "epoch": 4.95, + "learning_rate": 2.4980182322631786e-07, + "logits/chosen": -1.4584065675735474, + "logits/rejected": -1.5422462224960327, + "logps/chosen": -155.54901123046875, + "logps/rejected": -269.0795593261719, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.046666145324707, + "rewards/margins": 10.023353576660156, + "rewards/rejected": -17.070018768310547, + "step": 3086 + }, + { + "epoch": 4.96, + "learning_rate": 2.497027348394768e-07, + "logits/chosen": -1.7263514995574951, + "logits/rejected": -1.721433162689209, + "logps/chosen": -165.44935607910156, + "logps/rejected": -292.9507141113281, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.401544094085693, + "rewards/margins": 11.778488159179688, + "rewards/rejected": -18.18003273010254, + "step": 3087 + }, + { + "epoch": 4.96, + "learning_rate": 2.496036464526357e-07, + "logits/chosen": -1.4228614568710327, + "logits/rejected": -1.4179209470748901, + "logps/chosen": -168.57325744628906, + "logps/rejected": -268.0875244140625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.321219444274902, + "rewards/margins": 10.325368881225586, + "rewards/rejected": -16.646589279174805, + "step": 3088 + }, + { + "epoch": 4.96, + "learning_rate": 2.495045580657947e-07, + "logits/chosen": -1.5707318782806396, + "logits/rejected": -1.5840909481048584, + "logps/chosen": -129.78872680664062, + "logps/rejected": -246.52664184570312, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.227991104125977, + "rewards/margins": 10.597474098205566, + "rewards/rejected": -15.825465202331543, + "step": 3089 + }, + { + "epoch": 4.96, + "learning_rate": 2.4940546967895364e-07, + "logits/chosen": -1.8096868991851807, + "logits/rejected": -1.7173882722854614, + "logps/chosen": -104.82066345214844, + "logps/rejected": -268.82269287109375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.754993438720703, + "rewards/margins": 15.436769485473633, + "rewards/rejected": -18.191761016845703, + "step": 3090 + }, + { + "epoch": 4.96, + "learning_rate": 2.4930638129211255e-07, + "logits/chosen": -1.6890531778335571, + "logits/rejected": -1.6280899047851562, + "logps/chosen": -141.97451782226562, + "logps/rejected": -248.74961853027344, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2841033935546875, + "rewards/margins": 10.982544898986816, + "rewards/rejected": -17.266647338867188, + "step": 3091 + }, + { + "epoch": 4.96, + "learning_rate": 2.492072929052715e-07, + "logits/chosen": -1.4639781713485718, + "logits/rejected": -1.542650580406189, + "logps/chosen": -139.55494689941406, + "logps/rejected": -258.54522705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.171555995941162, + "rewards/margins": 10.85981559753418, + "rewards/rejected": -17.0313720703125, + "step": 3092 + }, + { + "epoch": 4.96, + "learning_rate": 2.491082045184304e-07, + "logits/chosen": -1.4393049478530884, + "logits/rejected": -1.4394692182540894, + "logps/chosen": -109.1115951538086, + "logps/rejected": -237.92742919921875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2762861251831055, + "rewards/margins": 12.328411102294922, + "rewards/rejected": -15.604696273803711, + "step": 3093 + }, + { + "epoch": 4.97, + "learning_rate": 2.490091161315894e-07, + "logits/chosen": -1.5235254764556885, + "logits/rejected": -1.5486503839492798, + "logps/chosen": -140.0886993408203, + "logps/rejected": -262.8493957519531, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.771365642547607, + "rewards/margins": 11.111190795898438, + "rewards/rejected": -16.882556915283203, + "step": 3094 + }, + { + "epoch": 4.97, + "learning_rate": 2.4891002774474833e-07, + "logits/chosen": -1.472780466079712, + "logits/rejected": -1.5682305097579956, + "logps/chosen": -120.42237854003906, + "logps/rejected": -265.95587158203125, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.262857437133789, + "rewards/margins": 12.360076904296875, + "rewards/rejected": -18.62293243408203, + "step": 3095 + }, + { + "epoch": 4.97, + "learning_rate": 2.4881093935790724e-07, + "logits/chosen": -1.5604878664016724, + "logits/rejected": -1.561241626739502, + "logps/chosen": -153.3413848876953, + "logps/rejected": -270.0909729003906, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.463719367980957, + "rewards/margins": 10.881559371948242, + "rewards/rejected": -17.345279693603516, + "step": 3096 + }, + { + "epoch": 4.97, + "learning_rate": 2.4871185097106615e-07, + "logits/chosen": -1.713822603225708, + "logits/rejected": -1.672666072845459, + "logps/chosen": -195.22149658203125, + "logps/rejected": -301.16943359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.990966796875, + "rewards/margins": 10.110091209411621, + "rewards/rejected": -20.101058959960938, + "step": 3097 + }, + { + "epoch": 4.97, + "learning_rate": 2.486127625842251e-07, + "logits/chosen": -1.3799023628234863, + "logits/rejected": -1.4090087413787842, + "logps/chosen": -103.22047424316406, + "logps/rejected": -253.007568359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3250179290771484, + "rewards/margins": 14.3345365524292, + "rewards/rejected": -16.65955352783203, + "step": 3098 + }, + { + "epoch": 4.97, + "learning_rate": 2.4851367419738407e-07, + "logits/chosen": -1.4867222309112549, + "logits/rejected": -1.5101947784423828, + "logps/chosen": -170.1656036376953, + "logps/rejected": -292.2098388671875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.335787773132324, + "rewards/margins": 11.195863723754883, + "rewards/rejected": -19.53165054321289, + "step": 3099 + }, + { + "epoch": 4.98, + "learning_rate": 2.4841458581054303e-07, + "logits/chosen": -1.5421479940414429, + "logits/rejected": -1.5743883848190308, + "logps/chosen": -127.24839782714844, + "logps/rejected": -266.1440124511719, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1333537101745605, + "rewards/margins": 12.162609100341797, + "rewards/rejected": -17.295961380004883, + "step": 3100 + }, + { + "epoch": 4.98, + "learning_rate": 2.4831549742370193e-07, + "logits/chosen": -1.5263954401016235, + "logits/rejected": -1.527968406677246, + "logps/chosen": -176.54725646972656, + "logps/rejected": -288.5918884277344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.533895492553711, + "rewards/margins": 11.561506271362305, + "rewards/rejected": -19.095401763916016, + "step": 3101 + }, + { + "epoch": 4.98, + "learning_rate": 2.4821640903686084e-07, + "logits/chosen": -1.4578834772109985, + "logits/rejected": -1.5096769332885742, + "logps/chosen": -144.4181365966797, + "logps/rejected": -247.67747497558594, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.759390830993652, + "rewards/margins": 10.894268035888672, + "rewards/rejected": -17.65365982055664, + "step": 3102 + }, + { + "epoch": 4.98, + "learning_rate": 2.481173206500198e-07, + "logits/chosen": -1.5571353435516357, + "logits/rejected": -1.5526766777038574, + "logps/chosen": -98.32770538330078, + "logps/rejected": -214.333740234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3344192504882812, + "rewards/margins": 11.841728210449219, + "rewards/rejected": -15.1761474609375, + "step": 3103 + }, + { + "epoch": 4.98, + "learning_rate": 2.4801823226317876e-07, + "logits/chosen": -1.5838717222213745, + "logits/rejected": -1.5972464084625244, + "logps/chosen": -125.76941680908203, + "logps/rejected": -217.18421936035156, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7811689376831055, + "rewards/margins": 9.681500434875488, + "rewards/rejected": -13.462669372558594, + "step": 3104 + }, + { + "epoch": 4.98, + "learning_rate": 2.479191438763377e-07, + "logits/chosen": -1.554275631904602, + "logits/rejected": -1.5663264989852905, + "logps/chosen": -133.95654296875, + "logps/rejected": -276.4942321777344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.099637031555176, + "rewards/margins": 15.120672225952148, + "rewards/rejected": -19.22031021118164, + "step": 3105 + }, + { + "epoch": 4.99, + "learning_rate": 2.4782005548949663e-07, + "logits/chosen": -1.4723037481307983, + "logits/rejected": -1.4768097400665283, + "logps/chosen": -127.9981689453125, + "logps/rejected": -222.2070770263672, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.291740894317627, + "rewards/margins": 9.419509887695312, + "rewards/rejected": -15.711250305175781, + "step": 3106 + }, + { + "epoch": 4.99, + "learning_rate": 2.4772096710265553e-07, + "logits/chosen": -1.4645708799362183, + "logits/rejected": -1.3986704349517822, + "logps/chosen": -138.61245727539062, + "logps/rejected": -245.0240478515625, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.251101016998291, + "rewards/margins": 7.6906843185424805, + "rewards/rejected": -12.941784858703613, + "step": 3107 + }, + { + "epoch": 4.99, + "learning_rate": 2.476218787158145e-07, + "logits/chosen": -1.483332633972168, + "logits/rejected": -1.5682251453399658, + "logps/chosen": -150.23435974121094, + "logps/rejected": -256.00970458984375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.571688175201416, + "rewards/margins": 10.647275924682617, + "rewards/rejected": -17.218965530395508, + "step": 3108 + }, + { + "epoch": 4.99, + "learning_rate": 2.4752279032897345e-07, + "logits/chosen": -1.5320026874542236, + "logits/rejected": -1.5542941093444824, + "logps/chosen": -143.69407653808594, + "logps/rejected": -227.49298095703125, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0731611251831055, + "rewards/margins": 8.607951164245605, + "rewards/rejected": -14.681112289428711, + "step": 3109 + }, + { + "epoch": 4.99, + "learning_rate": 2.4742370194213236e-07, + "logits/chosen": -1.537348985671997, + "logits/rejected": -1.5065922737121582, + "logps/chosen": -128.8089141845703, + "logps/rejected": -243.07215881347656, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.816633462905884, + "rewards/margins": 12.699992179870605, + "rewards/rejected": -16.516626358032227, + "step": 3110 + }, + { + "epoch": 4.99, + "learning_rate": 2.473246135552913e-07, + "logits/chosen": -1.612801432609558, + "logits/rejected": -1.576140284538269, + "logps/chosen": -156.64932250976562, + "logps/rejected": -247.78526306152344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.467748641967773, + "rewards/margins": 10.777105331420898, + "rewards/rejected": -17.244853973388672, + "step": 3111 + }, + { + "epoch": 5.0, + "learning_rate": 2.4722552516845023e-07, + "logits/chosen": -1.402331829071045, + "logits/rejected": -1.4307838678359985, + "logps/chosen": -185.59922790527344, + "logps/rejected": -286.432861328125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.13956356048584, + "rewards/margins": 10.205191612243652, + "rewards/rejected": -20.344755172729492, + "step": 3112 + }, + { + "epoch": 5.0, + "learning_rate": 2.471264367816092e-07, + "logits/chosen": -1.5006393194198608, + "logits/rejected": -1.5229551792144775, + "logps/chosen": -149.61106872558594, + "logps/rejected": -214.18585205078125, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988677501678467, + "rewards/margins": 7.156899929046631, + "rewards/rejected": -13.145576477050781, + "step": 3113 + }, + { + "epoch": 5.0, + "learning_rate": 2.4702734839476815e-07, + "logits/chosen": -1.6205846071243286, + "logits/rejected": -1.659084677696228, + "logps/chosen": -144.57650756835938, + "logps/rejected": -253.15553283691406, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.137938022613525, + "rewards/margins": 12.348478317260742, + "rewards/rejected": -17.486417770385742, + "step": 3114 + }, + { + "epoch": 5.0, + "learning_rate": 2.4692826000792705e-07, + "logits/chosen": -1.7013664245605469, + "logits/rejected": -1.7275958061218262, + "logps/chosen": -157.40179443359375, + "logps/rejected": -309.5972900390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.570490837097168, + "rewards/margins": 14.25887680053711, + "rewards/rejected": -19.829368591308594, + "step": 3115 + }, + { + "epoch": 5.0, + "learning_rate": 2.46829171621086e-07, + "logits/chosen": -1.6073191165924072, + "logits/rejected": -1.6952643394470215, + "logps/chosen": -169.89649963378906, + "logps/rejected": -324.5791320800781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.033249855041504, + "rewards/margins": 13.300962448120117, + "rewards/rejected": -21.334213256835938, + "step": 3116 + }, + { + "epoch": 5.0, + "learning_rate": 2.467300832342449e-07, + "logits/chosen": -1.5690288543701172, + "logits/rejected": -1.5801321268081665, + "logps/chosen": -134.87632751464844, + "logps/rejected": -260.5616455078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.494507789611816, + "rewards/margins": 12.489862442016602, + "rewards/rejected": -16.984371185302734, + "step": 3117 + }, + { + "epoch": 5.0, + "learning_rate": 2.466309948474039e-07, + "logits/chosen": -1.6771221160888672, + "logits/rejected": -1.586949110031128, + "logps/chosen": -149.57559204101562, + "logps/rejected": -286.3363037109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.784968852996826, + "rewards/margins": 13.636580467224121, + "rewards/rejected": -19.421550750732422, + "step": 3118 + }, + { + "epoch": 5.01, + "learning_rate": 2.4653190646056284e-07, + "logits/chosen": -1.5849571228027344, + "logits/rejected": -1.4959657192230225, + "logps/chosen": -126.88955688476562, + "logps/rejected": -252.347900390625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.608795642852783, + "rewards/margins": 12.170234680175781, + "rewards/rejected": -17.779029846191406, + "step": 3119 + }, + { + "epoch": 5.01, + "learning_rate": 2.4643281807372175e-07, + "logits/chosen": -1.4078638553619385, + "logits/rejected": -1.3200838565826416, + "logps/chosen": -149.1171875, + "logps/rejected": -215.20152282714844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.48276948928833, + "rewards/margins": 10.235958099365234, + "rewards/rejected": -14.718727111816406, + "step": 3120 + }, + { + "epoch": 5.01, + "learning_rate": 2.463337296868807e-07, + "logits/chosen": -1.5108827352523804, + "logits/rejected": -1.590162992477417, + "logps/chosen": -187.14810180664062, + "logps/rejected": -311.34747314453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1807403564453125, + "rewards/margins": 12.703447341918945, + "rewards/rejected": -19.88418960571289, + "step": 3121 + }, + { + "epoch": 5.01, + "learning_rate": 2.462346413000396e-07, + "logits/chosen": -1.5147552490234375, + "logits/rejected": -1.5789105892181396, + "logps/chosen": -162.84564208984375, + "logps/rejected": -312.63934326171875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.183581352233887, + "rewards/margins": 13.094157218933105, + "rewards/rejected": -22.277738571166992, + "step": 3122 + }, + { + "epoch": 5.01, + "learning_rate": 2.4613555291319857e-07, + "logits/chosen": -1.7397079467773438, + "logits/rejected": -1.6673675775527954, + "logps/chosen": -164.10562133789062, + "logps/rejected": -250.80450439453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.105806350708008, + "rewards/margins": 9.521023750305176, + "rewards/rejected": -15.6268310546875, + "step": 3123 + }, + { + "epoch": 5.01, + "learning_rate": 2.4603646452635753e-07, + "logits/chosen": -1.3734591007232666, + "logits/rejected": -1.4460797309875488, + "logps/chosen": -103.94938659667969, + "logps/rejected": -255.7218780517578, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6443698406219482, + "rewards/margins": 13.684560775756836, + "rewards/rejected": -17.32893180847168, + "step": 3124 + }, + { + "epoch": 5.02, + "learning_rate": 2.4593737613951644e-07, + "logits/chosen": -1.668335199356079, + "logits/rejected": -1.5911206007003784, + "logps/chosen": -155.06602478027344, + "logps/rejected": -239.8800048828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.271856307983398, + "rewards/margins": 11.419143676757812, + "rewards/rejected": -15.690999984741211, + "step": 3125 + }, + { + "epoch": 5.02, + "learning_rate": 2.4583828775267535e-07, + "logits/chosen": -1.3719030618667603, + "logits/rejected": -1.3534754514694214, + "logps/chosen": -94.68092346191406, + "logps/rejected": -205.46533203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.412700653076172, + "rewards/margins": 10.408016204833984, + "rewards/rejected": -13.820716857910156, + "step": 3126 + }, + { + "epoch": 5.02, + "learning_rate": 2.457391993658343e-07, + "logits/chosen": -1.429132103919983, + "logits/rejected": -1.3759117126464844, + "logps/chosen": -134.21786499023438, + "logps/rejected": -209.40074157714844, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.532031536102295, + "rewards/margins": 9.023947715759277, + "rewards/rejected": -13.55597972869873, + "step": 3127 + }, + { + "epoch": 5.02, + "learning_rate": 2.4564011097899327e-07, + "logits/chosen": -1.574343204498291, + "logits/rejected": -1.5807675123214722, + "logps/chosen": -96.25482177734375, + "logps/rejected": -245.55543518066406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.575493574142456, + "rewards/margins": 14.908943176269531, + "rewards/rejected": -16.484437942504883, + "step": 3128 + }, + { + "epoch": 5.02, + "learning_rate": 2.4554102259215217e-07, + "logits/chosen": -1.4758803844451904, + "logits/rejected": -1.503057837486267, + "logps/chosen": -155.5498809814453, + "logps/rejected": -272.6719970703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.049112796783447, + "rewards/margins": 12.91306209564209, + "rewards/rejected": -17.962173461914062, + "step": 3129 + }, + { + "epoch": 5.02, + "learning_rate": 2.4544193420531113e-07, + "logits/chosen": -1.5561389923095703, + "logits/rejected": -1.5662267208099365, + "logps/chosen": -173.9281005859375, + "logps/rejected": -288.9751281738281, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.914882183074951, + "rewards/margins": 11.669995307922363, + "rewards/rejected": -18.584877014160156, + "step": 3130 + }, + { + "epoch": 5.03, + "learning_rate": 2.4534284581847004e-07, + "logits/chosen": -1.4983558654785156, + "logits/rejected": -1.3058159351348877, + "logps/chosen": -116.93765258789062, + "logps/rejected": -221.72483825683594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.662283897399902, + "rewards/margins": 11.42985725402832, + "rewards/rejected": -16.09214210510254, + "step": 3131 + }, + { + "epoch": 5.03, + "learning_rate": 2.45243757431629e-07, + "logits/chosen": -1.4575530290603638, + "logits/rejected": -1.4805736541748047, + "logps/chosen": -105.72708129882812, + "logps/rejected": -241.04641723632812, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6687145233154297, + "rewards/margins": 13.232151985168457, + "rewards/rejected": -16.90086555480957, + "step": 3132 + }, + { + "epoch": 5.03, + "learning_rate": 2.4514466904478796e-07, + "logits/chosen": -1.5933864116668701, + "logits/rejected": -1.6280900239944458, + "logps/chosen": -173.43997192382812, + "logps/rejected": -277.2003479003906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.874416351318359, + "rewards/margins": 11.494421005249023, + "rewards/rejected": -17.368839263916016, + "step": 3133 + }, + { + "epoch": 5.03, + "learning_rate": 2.4504558065794687e-07, + "logits/chosen": -1.6775513887405396, + "logits/rejected": -1.6595275402069092, + "logps/chosen": -124.96160125732422, + "logps/rejected": -248.89614868164062, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.017516613006592, + "rewards/margins": 12.928281784057617, + "rewards/rejected": -17.945798873901367, + "step": 3134 + }, + { + "epoch": 5.03, + "learning_rate": 2.449464922711058e-07, + "logits/chosen": -1.4349302053451538, + "logits/rejected": -1.412517786026001, + "logps/chosen": -145.3555145263672, + "logps/rejected": -252.66746520996094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.50351095199585, + "rewards/margins": 12.763410568237305, + "rewards/rejected": -17.266921997070312, + "step": 3135 + }, + { + "epoch": 5.03, + "learning_rate": 2.4484740388426473e-07, + "logits/chosen": -1.5984992980957031, + "logits/rejected": -1.5627403259277344, + "logps/chosen": -101.12865447998047, + "logps/rejected": -226.9209442138672, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.545706033706665, + "rewards/margins": 13.011831283569336, + "rewards/rejected": -16.557537078857422, + "step": 3136 + }, + { + "epoch": 5.04, + "learning_rate": 2.447483154974237e-07, + "logits/chosen": -1.5194011926651, + "logits/rejected": -1.4146113395690918, + "logps/chosen": -154.14649963378906, + "logps/rejected": -244.73233032226562, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4118757247924805, + "rewards/margins": 10.063342094421387, + "rewards/rejected": -15.475218772888184, + "step": 3137 + }, + { + "epoch": 5.04, + "learning_rate": 2.4464922711058265e-07, + "logits/chosen": -1.675168514251709, + "logits/rejected": -1.678998351097107, + "logps/chosen": -119.29895782470703, + "logps/rejected": -259.0997314453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4724926948547363, + "rewards/margins": 12.605355262756348, + "rewards/rejected": -16.077848434448242, + "step": 3138 + }, + { + "epoch": 5.04, + "learning_rate": 2.4455013872374156e-07, + "logits/chosen": -1.3847206830978394, + "logits/rejected": -1.410377025604248, + "logps/chosen": -148.40048217773438, + "logps/rejected": -215.83322143554688, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.791936874389648, + "rewards/margins": 8.232172012329102, + "rewards/rejected": -15.02410888671875, + "step": 3139 + }, + { + "epoch": 5.04, + "learning_rate": 2.444510503369005e-07, + "logits/chosen": -1.5075887441635132, + "logits/rejected": -1.5566884279251099, + "logps/chosen": -110.22516632080078, + "logps/rejected": -267.6278991699219, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.38134765625, + "rewards/margins": 14.499710083007812, + "rewards/rejected": -17.881057739257812, + "step": 3140 + }, + { + "epoch": 5.04, + "learning_rate": 2.443519619500594e-07, + "logits/chosen": -1.561842918395996, + "logits/rejected": -1.6149367094039917, + "logps/chosen": -141.79708862304688, + "logps/rejected": -279.7761535644531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.632045745849609, + "rewards/margins": 11.58454704284668, + "rewards/rejected": -18.21659278869629, + "step": 3141 + }, + { + "epoch": 5.04, + "learning_rate": 2.442528735632184e-07, + "logits/chosen": -1.5129773616790771, + "logits/rejected": -1.5186468362808228, + "logps/chosen": -156.6436004638672, + "logps/rejected": -237.1956329345703, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.661927223205566, + "rewards/margins": 9.545805931091309, + "rewards/rejected": -16.207731246948242, + "step": 3142 + }, + { + "epoch": 5.04, + "learning_rate": 2.4415378517637734e-07, + "logits/chosen": -1.4838292598724365, + "logits/rejected": -1.5774509906768799, + "logps/chosen": -154.13494873046875, + "logps/rejected": -323.2926330566406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.614153861999512, + "rewards/margins": 15.523143768310547, + "rewards/rejected": -22.137298583984375, + "step": 3143 + }, + { + "epoch": 5.05, + "learning_rate": 2.4405469678953625e-07, + "logits/chosen": -1.5540493726730347, + "logits/rejected": -1.5812885761260986, + "logps/chosen": -133.6961669921875, + "logps/rejected": -245.48904418945312, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.887031555175781, + "rewards/margins": 12.682815551757812, + "rewards/rejected": -17.569847106933594, + "step": 3144 + }, + { + "epoch": 5.05, + "learning_rate": 2.439556084026952e-07, + "logits/chosen": -1.524163842201233, + "logits/rejected": -1.5160088539123535, + "logps/chosen": -142.4969482421875, + "logps/rejected": -287.4452819824219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.722354412078857, + "rewards/margins": 12.59227466583252, + "rewards/rejected": -18.31462860107422, + "step": 3145 + }, + { + "epoch": 5.05, + "learning_rate": 2.438565200158541e-07, + "logits/chosen": -1.6073378324508667, + "logits/rejected": -1.6624040603637695, + "logps/chosen": -177.421875, + "logps/rejected": -341.3675842285156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.220351219177246, + "rewards/margins": 14.256845474243164, + "rewards/rejected": -21.477197647094727, + "step": 3146 + }, + { + "epoch": 5.05, + "learning_rate": 2.437574316290131e-07, + "logits/chosen": -1.534429669380188, + "logits/rejected": -1.4994834661483765, + "logps/chosen": -172.43939208984375, + "logps/rejected": -264.2120666503906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.800379276275635, + "rewards/margins": 10.000265121459961, + "rewards/rejected": -16.800643920898438, + "step": 3147 + }, + { + "epoch": 5.05, + "learning_rate": 2.43658343242172e-07, + "logits/chosen": -1.4677658081054688, + "logits/rejected": -1.421427607536316, + "logps/chosen": -163.04380798339844, + "logps/rejected": -293.80548095703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.840329170227051, + "rewards/margins": 11.317394256591797, + "rewards/rejected": -19.157724380493164, + "step": 3148 + }, + { + "epoch": 5.05, + "learning_rate": 2.4355925485533094e-07, + "logits/chosen": -1.824153184890747, + "logits/rejected": -1.8157682418823242, + "logps/chosen": -125.73027038574219, + "logps/rejected": -284.9013977050781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.916707515716553, + "rewards/margins": 14.998092651367188, + "rewards/rejected": -19.9148006439209, + "step": 3149 + }, + { + "epoch": 5.06, + "learning_rate": 2.434601664684899e-07, + "logits/chosen": -1.4629919528961182, + "logits/rejected": -1.646169662475586, + "logps/chosen": -135.14364624023438, + "logps/rejected": -273.2564697265625, + "loss": 0.0351, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.158356189727783, + "rewards/margins": 10.67152214050293, + "rewards/rejected": -16.829879760742188, + "step": 3150 + }, + { + "epoch": 5.06, + "learning_rate": 2.433610780816488e-07, + "logits/chosen": -1.498049259185791, + "logits/rejected": -1.4637588262557983, + "logps/chosen": -166.65927124023438, + "logps/rejected": -306.7194519042969, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.751962184906006, + "rewards/margins": 13.092897415161133, + "rewards/rejected": -19.844858169555664, + "step": 3151 + }, + { + "epoch": 5.06, + "learning_rate": 2.4326198969480777e-07, + "logits/chosen": -1.6863857507705688, + "logits/rejected": -1.7126166820526123, + "logps/chosen": -128.91854858398438, + "logps/rejected": -256.70367431640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.685506343841553, + "rewards/margins": 12.960786819458008, + "rewards/rejected": -17.646291732788086, + "step": 3152 + }, + { + "epoch": 5.06, + "learning_rate": 2.431629013079667e-07, + "logits/chosen": -1.6440143585205078, + "logits/rejected": -1.7044243812561035, + "logps/chosen": -142.89927673339844, + "logps/rejected": -303.29522705078125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.680419921875, + "rewards/margins": 13.770174980163574, + "rewards/rejected": -19.45059585571289, + "step": 3153 + }, + { + "epoch": 5.06, + "learning_rate": 2.4306381292112564e-07, + "logits/chosen": -1.5221821069717407, + "logits/rejected": -1.5835154056549072, + "logps/chosen": -120.89131164550781, + "logps/rejected": -238.28048706054688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.144491195678711, + "rewards/margins": 11.279291152954102, + "rewards/rejected": -16.423782348632812, + "step": 3154 + }, + { + "epoch": 5.06, + "learning_rate": 2.429647245342846e-07, + "logits/chosen": -1.554871678352356, + "logits/rejected": -1.6169588565826416, + "logps/chosen": -130.958740234375, + "logps/rejected": -251.32794189453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.95173978805542, + "rewards/margins": 11.645973205566406, + "rewards/rejected": -16.597713470458984, + "step": 3155 + }, + { + "epoch": 5.07, + "learning_rate": 2.428656361474435e-07, + "logits/chosen": -1.6710537672042847, + "logits/rejected": -1.720177173614502, + "logps/chosen": -166.33872985839844, + "logps/rejected": -282.7784118652344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.695423126220703, + "rewards/margins": 11.024748802185059, + "rewards/rejected": -15.720171928405762, + "step": 3156 + }, + { + "epoch": 5.07, + "learning_rate": 2.4276654776060246e-07, + "logits/chosen": -1.653134822845459, + "logits/rejected": -1.6333341598510742, + "logps/chosen": -141.50177001953125, + "logps/rejected": -273.343017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.305607318878174, + "rewards/margins": 13.651803970336914, + "rewards/rejected": -18.95741081237793, + "step": 3157 + }, + { + "epoch": 5.07, + "learning_rate": 2.4266745937376137e-07, + "logits/chosen": -1.4675874710083008, + "logits/rejected": -1.5357871055603027, + "logps/chosen": -145.43722534179688, + "logps/rejected": -272.02142333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.341757774353027, + "rewards/margins": 10.81241226196289, + "rewards/rejected": -17.154170989990234, + "step": 3158 + }, + { + "epoch": 5.07, + "learning_rate": 2.4256837098692033e-07, + "logits/chosen": -1.4958487749099731, + "logits/rejected": -1.5321637392044067, + "logps/chosen": -139.0830535888672, + "logps/rejected": -249.35081481933594, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.514171600341797, + "rewards/margins": 10.439618110656738, + "rewards/rejected": -16.95379066467285, + "step": 3159 + }, + { + "epoch": 5.07, + "learning_rate": 2.4246928260007924e-07, + "logits/chosen": -1.5937190055847168, + "logits/rejected": -1.5868417024612427, + "logps/chosen": -112.6384506225586, + "logps/rejected": -240.93423461914062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.328816890716553, + "rewards/margins": 11.782520294189453, + "rewards/rejected": -16.111337661743164, + "step": 3160 + }, + { + "epoch": 5.07, + "learning_rate": 2.423701942132382e-07, + "logits/chosen": -1.6132572889328003, + "logits/rejected": -1.5710232257843018, + "logps/chosen": -167.9349822998047, + "logps/rejected": -280.2166748046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.472358703613281, + "rewards/margins": 12.534078598022461, + "rewards/rejected": -20.006437301635742, + "step": 3161 + }, + { + "epoch": 5.08, + "learning_rate": 2.4227110582639716e-07, + "logits/chosen": -1.548827886581421, + "logits/rejected": -1.486708164215088, + "logps/chosen": -129.19781494140625, + "logps/rejected": -243.1239013671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.169085502624512, + "rewards/margins": 11.44007396697998, + "rewards/rejected": -16.609159469604492, + "step": 3162 + }, + { + "epoch": 5.08, + "learning_rate": 2.4217201743955606e-07, + "logits/chosen": -1.4925241470336914, + "logits/rejected": -1.4897024631500244, + "logps/chosen": -171.30148315429688, + "logps/rejected": -314.03851318359375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.23686408996582, + "rewards/margins": 15.026387214660645, + "rewards/rejected": -22.26325225830078, + "step": 3163 + }, + { + "epoch": 5.08, + "learning_rate": 2.42072929052715e-07, + "logits/chosen": -1.6479902267456055, + "logits/rejected": -1.5054610967636108, + "logps/chosen": -151.65875244140625, + "logps/rejected": -258.36505126953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.161971092224121, + "rewards/margins": 13.31981372833252, + "rewards/rejected": -18.48178482055664, + "step": 3164 + }, + { + "epoch": 5.08, + "learning_rate": 2.4197384066587393e-07, + "logits/chosen": -1.6587005853652954, + "logits/rejected": -1.6956088542938232, + "logps/chosen": -125.80198669433594, + "logps/rejected": -257.9600524902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.259902000427246, + "rewards/margins": 13.361173629760742, + "rewards/rejected": -17.621074676513672, + "step": 3165 + }, + { + "epoch": 5.08, + "learning_rate": 2.418747522790329e-07, + "logits/chosen": -1.5346665382385254, + "logits/rejected": -1.5596517324447632, + "logps/chosen": -200.36203002929688, + "logps/rejected": -317.67291259765625, + "loss": 0.0087, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.520515441894531, + "rewards/margins": 10.853381156921387, + "rewards/rejected": -20.3738956451416, + "step": 3166 + }, + { + "epoch": 5.08, + "learning_rate": 2.417756638921918e-07, + "logits/chosen": -1.469420075416565, + "logits/rejected": -1.4556242227554321, + "logps/chosen": -131.2755584716797, + "logps/rejected": -303.41162109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.084958076477051, + "rewards/margins": 13.508155822753906, + "rewards/rejected": -17.59311294555664, + "step": 3167 + }, + { + "epoch": 5.09, + "learning_rate": 2.4167657550535076e-07, + "logits/chosen": -1.4631619453430176, + "logits/rejected": -1.5311259031295776, + "logps/chosen": -145.0003662109375, + "logps/rejected": -245.78659057617188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.472144603729248, + "rewards/margins": 9.785760879516602, + "rewards/rejected": -16.257904052734375, + "step": 3168 + }, + { + "epoch": 5.09, + "learning_rate": 2.415774871185097e-07, + "logits/chosen": -1.625868558883667, + "logits/rejected": -1.655190110206604, + "logps/chosen": -119.47862243652344, + "logps/rejected": -231.51852416992188, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8015151023864746, + "rewards/margins": 11.319512367248535, + "rewards/rejected": -15.121026992797852, + "step": 3169 + }, + { + "epoch": 5.09, + "learning_rate": 2.414783987316686e-07, + "logits/chosen": -1.4148235321044922, + "logits/rejected": -1.5776946544647217, + "logps/chosen": -112.34758758544922, + "logps/rejected": -229.85975646972656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.333334445953369, + "rewards/margins": 9.78282642364502, + "rewards/rejected": -14.116161346435547, + "step": 3170 + }, + { + "epoch": 5.09, + "learning_rate": 2.413793103448276e-07, + "logits/chosen": -1.6155585050582886, + "logits/rejected": -1.5806667804718018, + "logps/chosen": -170.17526245117188, + "logps/rejected": -273.2729797363281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.624184608459473, + "rewards/margins": 11.053913116455078, + "rewards/rejected": -17.678096771240234, + "step": 3171 + }, + { + "epoch": 5.09, + "learning_rate": 2.412802219579865e-07, + "logits/chosen": -1.6264030933380127, + "logits/rejected": -1.5830748081207275, + "logps/chosen": -146.99046325683594, + "logps/rejected": -300.71966552734375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.727875232696533, + "rewards/margins": 15.240476608276367, + "rewards/rejected": -20.968353271484375, + "step": 3172 + }, + { + "epoch": 5.09, + "learning_rate": 2.4118113357114545e-07, + "logits/chosen": -1.4874294996261597, + "logits/rejected": -1.6085609197616577, + "logps/chosen": -158.98423767089844, + "logps/rejected": -268.0056457519531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.468179225921631, + "rewards/margins": 10.704697608947754, + "rewards/rejected": -18.172876358032227, + "step": 3173 + }, + { + "epoch": 5.09, + "learning_rate": 2.410820451843044e-07, + "logits/chosen": -1.5545432567596436, + "logits/rejected": -1.6085259914398193, + "logps/chosen": -108.6632308959961, + "logps/rejected": -287.2519226074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9405882358551025, + "rewards/margins": 15.286620140075684, + "rewards/rejected": -19.227209091186523, + "step": 3174 + }, + { + "epoch": 5.1, + "learning_rate": 2.409829567974633e-07, + "logits/chosen": -1.6072496175765991, + "logits/rejected": -1.5307139158248901, + "logps/chosen": -168.88690185546875, + "logps/rejected": -244.57989501953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.488372802734375, + "rewards/margins": 8.880040168762207, + "rewards/rejected": -15.368412971496582, + "step": 3175 + }, + { + "epoch": 5.1, + "learning_rate": 2.408838684106223e-07, + "logits/chosen": -1.3584026098251343, + "logits/rejected": -1.3968713283538818, + "logps/chosen": -149.29000854492188, + "logps/rejected": -283.7102355957031, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.038351058959961, + "rewards/margins": 13.755119323730469, + "rewards/rejected": -20.79347038269043, + "step": 3176 + }, + { + "epoch": 5.1, + "learning_rate": 2.407847800237812e-07, + "logits/chosen": -1.6401853561401367, + "logits/rejected": -1.5146300792694092, + "logps/chosen": -199.17617797851562, + "logps/rejected": -282.709716796875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.502864360809326, + "rewards/margins": 10.344626426696777, + "rewards/rejected": -17.847490310668945, + "step": 3177 + }, + { + "epoch": 5.1, + "learning_rate": 2.4068569163694014e-07, + "logits/chosen": -1.6261147260665894, + "logits/rejected": -1.6783071756362915, + "logps/chosen": -195.93594360351562, + "logps/rejected": -296.75115966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.813990592956543, + "rewards/margins": 9.98478889465332, + "rewards/rejected": -16.798778533935547, + "step": 3178 + }, + { + "epoch": 5.1, + "learning_rate": 2.405866032500991e-07, + "logits/chosen": -1.5799000263214111, + "logits/rejected": -1.5825241804122925, + "logps/chosen": -116.3267593383789, + "logps/rejected": -216.80186462402344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.04426383972168, + "rewards/margins": 9.95551872253418, + "rewards/rejected": -13.999783515930176, + "step": 3179 + }, + { + "epoch": 5.1, + "learning_rate": 2.40487514863258e-07, + "logits/chosen": -1.7790751457214355, + "logits/rejected": -1.7287636995315552, + "logps/chosen": -162.95701599121094, + "logps/rejected": -292.09710693359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.054365158081055, + "rewards/margins": 14.684898376464844, + "rewards/rejected": -21.73926544189453, + "step": 3180 + }, + { + "epoch": 5.11, + "learning_rate": 2.4038842647641697e-07, + "logits/chosen": -1.5564777851104736, + "logits/rejected": -1.4983683824539185, + "logps/chosen": -153.91122436523438, + "logps/rejected": -304.90020751953125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.46242094039917, + "rewards/margins": 13.035486221313477, + "rewards/rejected": -18.497905731201172, + "step": 3181 + }, + { + "epoch": 5.11, + "learning_rate": 2.402893380895759e-07, + "logits/chosen": -1.5475441217422485, + "logits/rejected": -1.5356594324111938, + "logps/chosen": -147.68112182617188, + "logps/rejected": -248.908935546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.369869232177734, + "rewards/margins": 9.40334415435791, + "rewards/rejected": -16.773212432861328, + "step": 3182 + }, + { + "epoch": 5.11, + "learning_rate": 2.4019024970273484e-07, + "logits/chosen": -1.5567400455474854, + "logits/rejected": -1.5901281833648682, + "logps/chosen": -165.19375610351562, + "logps/rejected": -299.53887939453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.687152862548828, + "rewards/margins": 11.854867935180664, + "rewards/rejected": -19.542020797729492, + "step": 3183 + }, + { + "epoch": 5.11, + "learning_rate": 2.400911613158938e-07, + "logits/chosen": -1.5024412870407104, + "logits/rejected": -1.550827980041504, + "logps/chosen": -143.9988555908203, + "logps/rejected": -267.4727783203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.880146026611328, + "rewards/margins": 11.272326469421387, + "rewards/rejected": -17.15247344970703, + "step": 3184 + }, + { + "epoch": 5.11, + "learning_rate": 2.399920729290527e-07, + "logits/chosen": -1.5089281797409058, + "logits/rejected": -1.4820448160171509, + "logps/chosen": -104.75028228759766, + "logps/rejected": -213.5843505859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3720955848693848, + "rewards/margins": 10.765411376953125, + "rewards/rejected": -14.137507438659668, + "step": 3185 + }, + { + "epoch": 5.11, + "learning_rate": 2.398929845422116e-07, + "logits/chosen": -1.5917717218399048, + "logits/rejected": -1.5431129932403564, + "logps/chosen": -97.7753677368164, + "logps/rejected": -204.78872680664062, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.330921173095703, + "rewards/margins": 11.401549339294434, + "rewards/rejected": -13.732471466064453, + "step": 3186 + }, + { + "epoch": 5.12, + "learning_rate": 2.3979389615537057e-07, + "logits/chosen": -1.54288649559021, + "logits/rejected": -1.52110755443573, + "logps/chosen": -131.3324737548828, + "logps/rejected": -243.46820068359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9985809326171875, + "rewards/margins": 11.441302299499512, + "rewards/rejected": -15.439882278442383, + "step": 3187 + }, + { + "epoch": 5.12, + "learning_rate": 2.3969480776852953e-07, + "logits/chosen": -1.559395432472229, + "logits/rejected": -1.570784330368042, + "logps/chosen": -137.42257690429688, + "logps/rejected": -250.772216796875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.311646938323975, + "rewards/margins": 11.314321517944336, + "rewards/rejected": -16.62596893310547, + "step": 3188 + }, + { + "epoch": 5.12, + "learning_rate": 2.3959571938168843e-07, + "logits/chosen": -1.4227893352508545, + "logits/rejected": -1.4221336841583252, + "logps/chosen": -161.3375244140625, + "logps/rejected": -251.66964721679688, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.020692825317383, + "rewards/margins": 9.012049674987793, + "rewards/rejected": -17.03274154663086, + "step": 3189 + }, + { + "epoch": 5.12, + "learning_rate": 2.394966309948474e-07, + "logits/chosen": -1.6116386651992798, + "logits/rejected": -1.6298054456710815, + "logps/chosen": -136.89144897460938, + "logps/rejected": -299.12750244140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.895498275756836, + "rewards/margins": 15.831296920776367, + "rewards/rejected": -20.726795196533203, + "step": 3190 + }, + { + "epoch": 5.12, + "learning_rate": 2.393975426080063e-07, + "logits/chosen": -1.624500036239624, + "logits/rejected": -1.7062586545944214, + "logps/chosen": -158.8345947265625, + "logps/rejected": -289.94500732421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.207781791687012, + "rewards/margins": 12.679265975952148, + "rewards/rejected": -18.887046813964844, + "step": 3191 + }, + { + "epoch": 5.12, + "learning_rate": 2.3929845422116526e-07, + "logits/chosen": -1.6003546714782715, + "logits/rejected": -1.613721489906311, + "logps/chosen": -154.72366333007812, + "logps/rejected": -287.6579895019531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.323756217956543, + "rewards/margins": 13.252193450927734, + "rewards/rejected": -19.575950622558594, + "step": 3192 + }, + { + "epoch": 5.13, + "learning_rate": 2.391993658343242e-07, + "logits/chosen": -1.550842046737671, + "logits/rejected": -1.6385729312896729, + "logps/chosen": -100.74845886230469, + "logps/rejected": -266.0341796875, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.272961139678955, + "rewards/margins": 14.451205253601074, + "rewards/rejected": -16.724166870117188, + "step": 3193 + }, + { + "epoch": 5.13, + "learning_rate": 2.3910027744748313e-07, + "logits/chosen": -1.4961446523666382, + "logits/rejected": -1.4985783100128174, + "logps/chosen": -145.59654235839844, + "logps/rejected": -288.4313049316406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.940141677856445, + "rewards/margins": 14.661964416503906, + "rewards/rejected": -21.60210609436035, + "step": 3194 + }, + { + "epoch": 5.13, + "learning_rate": 2.390011890606421e-07, + "logits/chosen": -1.419656753540039, + "logits/rejected": -1.4520695209503174, + "logps/chosen": -123.42374420166016, + "logps/rejected": -256.1627197265625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6006455421447754, + "rewards/margins": 13.859405517578125, + "rewards/rejected": -17.460050582885742, + "step": 3195 + }, + { + "epoch": 5.13, + "learning_rate": 2.38902100673801e-07, + "logits/chosen": -1.5750820636749268, + "logits/rejected": -1.584234595298767, + "logps/chosen": -126.82999420166016, + "logps/rejected": -229.6516571044922, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.250291347503662, + "rewards/margins": 10.041524887084961, + "rewards/rejected": -14.291815757751465, + "step": 3196 + }, + { + "epoch": 5.13, + "learning_rate": 2.3880301228695995e-07, + "logits/chosen": -1.646533727645874, + "logits/rejected": -1.6222277879714966, + "logps/chosen": -158.57931518554688, + "logps/rejected": -277.1866455078125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.46047306060791, + "rewards/margins": 12.738115310668945, + "rewards/rejected": -20.19858741760254, + "step": 3197 + }, + { + "epoch": 5.13, + "learning_rate": 2.387039239001189e-07, + "logits/chosen": -1.5066620111465454, + "logits/rejected": -1.5057357549667358, + "logps/chosen": -148.2268829345703, + "logps/rejected": -294.7750244140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.161953449249268, + "rewards/margins": 14.586830139160156, + "rewards/rejected": -21.748783111572266, + "step": 3198 + }, + { + "epoch": 5.13, + "learning_rate": 2.386048355132778e-07, + "logits/chosen": -1.6287343502044678, + "logits/rejected": -1.6164053678512573, + "logps/chosen": -164.13414001464844, + "logps/rejected": -249.60653686523438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.410094738006592, + "rewards/margins": 10.765108108520508, + "rewards/rejected": -17.17520523071289, + "step": 3199 + }, + { + "epoch": 5.14, + "learning_rate": 2.385057471264368e-07, + "logits/chosen": -1.4799693822860718, + "logits/rejected": -1.5283031463623047, + "logps/chosen": -146.6845703125, + "logps/rejected": -269.7674560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.749364852905273, + "rewards/margins": 11.234390258789062, + "rewards/rejected": -16.983755111694336, + "step": 3200 + }, + { + "epoch": 5.14, + "learning_rate": 2.3840665873959571e-07, + "logits/chosen": -1.5180721282958984, + "logits/rejected": -1.5383292436599731, + "logps/chosen": -140.6108856201172, + "logps/rejected": -295.345458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.321572303771973, + "rewards/margins": 13.186603546142578, + "rewards/rejected": -18.508174896240234, + "step": 3201 + }, + { + "epoch": 5.14, + "learning_rate": 2.3830757035275465e-07, + "logits/chosen": -1.6529362201690674, + "logits/rejected": -1.6601307392120361, + "logps/chosen": -145.5390625, + "logps/rejected": -277.162841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.232569694519043, + "rewards/margins": 13.778791427612305, + "rewards/rejected": -19.01136016845703, + "step": 3202 + }, + { + "epoch": 5.14, + "learning_rate": 2.3820848196591358e-07, + "logits/chosen": -1.4580551385879517, + "logits/rejected": -1.5134916305541992, + "logps/chosen": -106.58941650390625, + "logps/rejected": -242.60333251953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5802881717681885, + "rewards/margins": 10.833939552307129, + "rewards/rejected": -14.414228439331055, + "step": 3203 + }, + { + "epoch": 5.14, + "learning_rate": 2.3810939357907254e-07, + "logits/chosen": -1.5074307918548584, + "logits/rejected": -1.5751277208328247, + "logps/chosen": -147.79502868652344, + "logps/rejected": -249.8145751953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987096786499023, + "rewards/margins": 10.71384334564209, + "rewards/rejected": -16.700939178466797, + "step": 3204 + }, + { + "epoch": 5.14, + "learning_rate": 2.3801030519223145e-07, + "logits/chosen": -1.5912379026412964, + "logits/rejected": -1.605468988418579, + "logps/chosen": -127.42286682128906, + "logps/rejected": -254.30679321289062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.925098896026611, + "rewards/margins": 12.603862762451172, + "rewards/rejected": -17.528961181640625, + "step": 3205 + }, + { + "epoch": 5.15, + "learning_rate": 2.3791121680539038e-07, + "logits/chosen": -1.432645320892334, + "logits/rejected": -1.4523851871490479, + "logps/chosen": -142.0499267578125, + "logps/rejected": -309.50653076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.040424823760986, + "rewards/margins": 14.45701789855957, + "rewards/rejected": -20.4974422454834, + "step": 3206 + }, + { + "epoch": 5.15, + "learning_rate": 2.3781212841854934e-07, + "logits/chosen": -1.5784976482391357, + "logits/rejected": -1.5892794132232666, + "logps/chosen": -84.11479949951172, + "logps/rejected": -244.76019287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9428043365478516, + "rewards/margins": 15.518158912658691, + "rewards/rejected": -17.460962295532227, + "step": 3207 + }, + { + "epoch": 5.15, + "learning_rate": 2.3771304003170827e-07, + "logits/chosen": -1.5341260433197021, + "logits/rejected": -1.5225542783737183, + "logps/chosen": -169.13433837890625, + "logps/rejected": -288.4042663574219, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.703637599945068, + "rewards/margins": 12.756125450134277, + "rewards/rejected": -19.459762573242188, + "step": 3208 + }, + { + "epoch": 5.15, + "learning_rate": 2.3761395164486723e-07, + "logits/chosen": -1.478395938873291, + "logits/rejected": -1.5561151504516602, + "logps/chosen": -131.5529022216797, + "logps/rejected": -290.9166564941406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7935383319854736, + "rewards/margins": 15.696136474609375, + "rewards/rejected": -19.489673614501953, + "step": 3209 + }, + { + "epoch": 5.15, + "learning_rate": 2.3751486325802614e-07, + "logits/chosen": -1.643330454826355, + "logits/rejected": -1.7135322093963623, + "logps/chosen": -144.00119018554688, + "logps/rejected": -237.15234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.963705062866211, + "rewards/margins": 10.236708641052246, + "rewards/rejected": -16.200414657592773, + "step": 3210 + }, + { + "epoch": 5.15, + "learning_rate": 2.3741577487118507e-07, + "logits/chosen": -1.4808939695358276, + "logits/rejected": -1.4823921918869019, + "logps/chosen": -152.72988891601562, + "logps/rejected": -240.95889282226562, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.707036972045898, + "rewards/margins": 9.11284351348877, + "rewards/rejected": -15.819881439208984, + "step": 3211 + }, + { + "epoch": 5.16, + "learning_rate": 2.3731668648434403e-07, + "logits/chosen": -1.6992356777191162, + "logits/rejected": -1.7742265462875366, + "logps/chosen": -107.19584655761719, + "logps/rejected": -208.1270751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9430060386657715, + "rewards/margins": 10.682143211364746, + "rewards/rejected": -13.625149726867676, + "step": 3212 + }, + { + "epoch": 5.16, + "learning_rate": 2.3721759809750297e-07, + "logits/chosen": -1.4266363382339478, + "logits/rejected": -1.4660193920135498, + "logps/chosen": -150.39674377441406, + "logps/rejected": -326.59552001953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.167367935180664, + "rewards/margins": 17.413372039794922, + "rewards/rejected": -23.58074188232422, + "step": 3213 + }, + { + "epoch": 5.16, + "learning_rate": 2.371185097106619e-07, + "logits/chosen": -1.6089842319488525, + "logits/rejected": -1.5871546268463135, + "logps/chosen": -114.25248718261719, + "logps/rejected": -219.93336486816406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5139923095703125, + "rewards/margins": 11.068422317504883, + "rewards/rejected": -14.582414627075195, + "step": 3214 + }, + { + "epoch": 5.16, + "learning_rate": 2.3701942132382083e-07, + "logits/chosen": -1.4985086917877197, + "logits/rejected": -1.544895052909851, + "logps/chosen": -203.76171875, + "logps/rejected": -313.0286865234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.600740432739258, + "rewards/margins": 10.468510627746582, + "rewards/rejected": -20.069250106811523, + "step": 3215 + }, + { + "epoch": 5.16, + "learning_rate": 2.3692033293697977e-07, + "logits/chosen": -1.5833156108856201, + "logits/rejected": -1.6952776908874512, + "logps/chosen": -115.45599365234375, + "logps/rejected": -243.67604064941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.594620704650879, + "rewards/margins": 11.863938331604004, + "rewards/rejected": -17.458559036254883, + "step": 3216 + }, + { + "epoch": 5.16, + "learning_rate": 2.3682124455013873e-07, + "logits/chosen": -1.4744211435317993, + "logits/rejected": -1.4402027130126953, + "logps/chosen": -122.39469909667969, + "logps/rejected": -213.79937744140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.751378059387207, + "rewards/margins": 10.528996467590332, + "rewards/rejected": -14.280374526977539, + "step": 3217 + }, + { + "epoch": 5.17, + "learning_rate": 2.3672215616329766e-07, + "logits/chosen": -1.4203299283981323, + "logits/rejected": -1.5032073259353638, + "logps/chosen": -117.93722534179688, + "logps/rejected": -296.6234130859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.513025283813477, + "rewards/margins": 15.677573204040527, + "rewards/rejected": -21.190597534179688, + "step": 3218 + }, + { + "epoch": 5.17, + "learning_rate": 2.3662306777645657e-07, + "logits/chosen": -1.5693271160125732, + "logits/rejected": -1.5620808601379395, + "logps/chosen": -161.18385314941406, + "logps/rejected": -257.6756591796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115326881408691, + "rewards/margins": 10.601192474365234, + "rewards/rejected": -16.71651840209961, + "step": 3219 + }, + { + "epoch": 5.17, + "learning_rate": 2.3652397938961553e-07, + "logits/chosen": -1.5126863718032837, + "logits/rejected": -1.5029302835464478, + "logps/chosen": -132.8124237060547, + "logps/rejected": -271.66888427734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.430583477020264, + "rewards/margins": 13.23404598236084, + "rewards/rejected": -18.664630889892578, + "step": 3220 + }, + { + "epoch": 5.17, + "learning_rate": 2.3642489100277446e-07, + "logits/chosen": -1.591031551361084, + "logits/rejected": -1.5691275596618652, + "logps/chosen": -119.74256896972656, + "logps/rejected": -261.3751220703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.776947975158691, + "rewards/margins": 14.37158489227295, + "rewards/rejected": -19.14853286743164, + "step": 3221 + }, + { + "epoch": 5.17, + "learning_rate": 2.3632580261593342e-07, + "logits/chosen": -1.4939556121826172, + "logits/rejected": -1.4509109258651733, + "logps/chosen": -126.8677978515625, + "logps/rejected": -199.62594604492188, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6359381675720215, + "rewards/margins": 7.349055767059326, + "rewards/rejected": -11.984993934631348, + "step": 3222 + }, + { + "epoch": 5.17, + "learning_rate": 2.3622671422909235e-07, + "logits/chosen": -1.5984020233154297, + "logits/rejected": -1.5163812637329102, + "logps/chosen": -205.803466796875, + "logps/rejected": -278.2143859863281, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.173091888427734, + "rewards/margins": 9.750670433044434, + "rewards/rejected": -18.923763275146484, + "step": 3223 + }, + { + "epoch": 5.17, + "learning_rate": 2.3612762584225126e-07, + "logits/chosen": -1.5635510683059692, + "logits/rejected": -1.5466606616973877, + "logps/chosen": -132.71099853515625, + "logps/rejected": -317.65966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.270294666290283, + "rewards/margins": 17.413549423217773, + "rewards/rejected": -21.6838436126709, + "step": 3224 + }, + { + "epoch": 5.18, + "learning_rate": 2.3602853745541022e-07, + "logits/chosen": -1.6111106872558594, + "logits/rejected": -1.6414375305175781, + "logps/chosen": -124.83035278320312, + "logps/rejected": -252.02389526367188, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.641130447387695, + "rewards/margins": 13.044724464416504, + "rewards/rejected": -17.685855865478516, + "step": 3225 + }, + { + "epoch": 5.18, + "learning_rate": 2.3592944906856915e-07, + "logits/chosen": -1.4124680757522583, + "logits/rejected": -1.4615193605422974, + "logps/chosen": -167.5802459716797, + "logps/rejected": -281.37481689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.54211950302124, + "rewards/margins": 11.974138259887695, + "rewards/rejected": -18.516258239746094, + "step": 3226 + }, + { + "epoch": 5.18, + "learning_rate": 2.3583036068172809e-07, + "logits/chosen": -1.5986031293869019, + "logits/rejected": -1.6610945463180542, + "logps/chosen": -141.59658813476562, + "logps/rejected": -285.4009704589844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.51434326171875, + "rewards/margins": 14.436666488647461, + "rewards/rejected": -18.95100975036621, + "step": 3227 + }, + { + "epoch": 5.18, + "learning_rate": 2.3573127229488705e-07, + "logits/chosen": -1.4598937034606934, + "logits/rejected": -1.487381935119629, + "logps/chosen": -182.51158142089844, + "logps/rejected": -309.7664794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.192888259887695, + "rewards/margins": 11.046586990356445, + "rewards/rejected": -19.23947525024414, + "step": 3228 + }, + { + "epoch": 5.18, + "learning_rate": 2.3563218390804595e-07, + "logits/chosen": -1.6894848346710205, + "logits/rejected": -1.6161319017410278, + "logps/chosen": -169.37271118164062, + "logps/rejected": -266.62249755859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.891951084136963, + "rewards/margins": 11.862787246704102, + "rewards/rejected": -18.754737854003906, + "step": 3229 + }, + { + "epoch": 5.18, + "learning_rate": 2.355330955212049e-07, + "logits/chosen": -1.5411924123764038, + "logits/rejected": -1.6398751735687256, + "logps/chosen": -165.51388549804688, + "logps/rejected": -267.7999267578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.744457244873047, + "rewards/margins": 10.326869010925293, + "rewards/rejected": -18.071327209472656, + "step": 3230 + }, + { + "epoch": 5.19, + "learning_rate": 2.3543400713436385e-07, + "logits/chosen": -1.5037921667099, + "logits/rejected": -1.5333102941513062, + "logps/chosen": -149.53155517578125, + "logps/rejected": -303.3439025878906, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.761889934539795, + "rewards/margins": 13.212581634521484, + "rewards/rejected": -19.974472045898438, + "step": 3231 + }, + { + "epoch": 5.19, + "learning_rate": 2.3533491874752278e-07, + "logits/chosen": -1.675283670425415, + "logits/rejected": -1.6726597547531128, + "logps/chosen": -108.67935180664062, + "logps/rejected": -214.2249298095703, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9023025035858154, + "rewards/margins": 11.04350757598877, + "rewards/rejected": -13.945810317993164, + "step": 3232 + }, + { + "epoch": 5.19, + "learning_rate": 2.3523583036068174e-07, + "logits/chosen": -1.4788655042648315, + "logits/rejected": -1.407546043395996, + "logps/chosen": -163.31475830078125, + "logps/rejected": -283.85540771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.411073684692383, + "rewards/margins": 11.93958854675293, + "rewards/rejected": -18.350662231445312, + "step": 3233 + }, + { + "epoch": 5.19, + "learning_rate": 2.3513674197384065e-07, + "logits/chosen": -1.5954835414886475, + "logits/rejected": -1.5625483989715576, + "logps/chosen": -134.38287353515625, + "logps/rejected": -241.89967346191406, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.399716377258301, + "rewards/margins": 9.627535820007324, + "rewards/rejected": -16.027252197265625, + "step": 3234 + }, + { + "epoch": 5.19, + "learning_rate": 2.3503765358699958e-07, + "logits/chosen": -1.6475552320480347, + "logits/rejected": -1.6721241474151611, + "logps/chosen": -176.6290740966797, + "logps/rejected": -307.8644104003906, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.776479244232178, + "rewards/margins": 12.946535110473633, + "rewards/rejected": -20.72301483154297, + "step": 3235 + }, + { + "epoch": 5.19, + "learning_rate": 2.3493856520015854e-07, + "logits/chosen": -1.3998464345932007, + "logits/rejected": -1.5255343914031982, + "logps/chosen": -152.8020477294922, + "logps/rejected": -280.48681640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6975202560424805, + "rewards/margins": 11.225238800048828, + "rewards/rejected": -17.922760009765625, + "step": 3236 + }, + { + "epoch": 5.2, + "learning_rate": 2.3483947681331747e-07, + "logits/chosen": -1.594093918800354, + "logits/rejected": -1.7372207641601562, + "logps/chosen": -108.70181274414062, + "logps/rejected": -230.80320739746094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.338521480560303, + "rewards/margins": 11.556549072265625, + "rewards/rejected": -15.895071029663086, + "step": 3237 + }, + { + "epoch": 5.2, + "learning_rate": 2.347403884264764e-07, + "logits/chosen": -1.476464033126831, + "logits/rejected": -1.5167592763900757, + "logps/chosen": -186.84690856933594, + "logps/rejected": -275.9740295410156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.133797645568848, + "rewards/margins": 9.393763542175293, + "rewards/rejected": -18.52756118774414, + "step": 3238 + }, + { + "epoch": 5.2, + "learning_rate": 2.3464130003963534e-07, + "logits/chosen": -1.7026022672653198, + "logits/rejected": -1.6584250926971436, + "logps/chosen": -160.26150512695312, + "logps/rejected": -283.57635498046875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.77739143371582, + "rewards/margins": 13.933647155761719, + "rewards/rejected": -18.71103858947754, + "step": 3239 + }, + { + "epoch": 5.2, + "learning_rate": 2.3454221165279427e-07, + "logits/chosen": -1.472076177597046, + "logits/rejected": -1.4352281093597412, + "logps/chosen": -167.53164672851562, + "logps/rejected": -320.10174560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.890683650970459, + "rewards/margins": 14.155416488647461, + "rewards/rejected": -22.046100616455078, + "step": 3240 + }, + { + "epoch": 5.2, + "learning_rate": 2.3444312326595323e-07, + "logits/chosen": -1.5412604808807373, + "logits/rejected": -1.453109860420227, + "logps/chosen": -185.27133178710938, + "logps/rejected": -314.72479248046875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.390541076660156, + "rewards/margins": 13.547540664672852, + "rewards/rejected": -22.938081741333008, + "step": 3241 + }, + { + "epoch": 5.2, + "learning_rate": 2.3434403487911216e-07, + "logits/chosen": -1.6711206436157227, + "logits/rejected": -1.6967322826385498, + "logps/chosen": -148.71234130859375, + "logps/rejected": -305.2937316894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.829867362976074, + "rewards/margins": 13.141830444335938, + "rewards/rejected": -19.971698760986328, + "step": 3242 + }, + { + "epoch": 5.21, + "learning_rate": 2.3424494649227107e-07, + "logits/chosen": -1.63736891746521, + "logits/rejected": -1.6895978450775146, + "logps/chosen": -155.7957000732422, + "logps/rejected": -304.1769714355469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.213360786437988, + "rewards/margins": 14.222009658813477, + "rewards/rejected": -22.43536949157715, + "step": 3243 + }, + { + "epoch": 5.21, + "learning_rate": 2.3414585810543003e-07, + "logits/chosen": -1.5346466302871704, + "logits/rejected": -1.4475499391555786, + "logps/chosen": -156.03607177734375, + "logps/rejected": -272.4963684082031, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2712812423706055, + "rewards/margins": 11.385296821594238, + "rewards/rejected": -17.656578063964844, + "step": 3244 + }, + { + "epoch": 5.21, + "learning_rate": 2.3404676971858896e-07, + "logits/chosen": -1.5307530164718628, + "logits/rejected": -1.6006977558135986, + "logps/chosen": -132.73867797851562, + "logps/rejected": -228.21435546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.018735885620117, + "rewards/margins": 8.67677116394043, + "rewards/rejected": -13.695507049560547, + "step": 3245 + }, + { + "epoch": 5.21, + "learning_rate": 2.3394768133174792e-07, + "logits/chosen": -1.3633646965026855, + "logits/rejected": -1.4136271476745605, + "logps/chosen": -132.14773559570312, + "logps/rejected": -255.9661865234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.652203559875488, + "rewards/margins": 10.587190628051758, + "rewards/rejected": -17.239395141601562, + "step": 3246 + }, + { + "epoch": 5.21, + "learning_rate": 2.3384859294490686e-07, + "logits/chosen": -1.5207713842391968, + "logits/rejected": -1.5364586114883423, + "logps/chosen": -125.44482421875, + "logps/rejected": -233.26730346679688, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.150960922241211, + "rewards/margins": 9.541621208190918, + "rewards/rejected": -14.692583084106445, + "step": 3247 + }, + { + "epoch": 5.21, + "learning_rate": 2.3374950455806576e-07, + "logits/chosen": -1.5898964405059814, + "logits/rejected": -1.6817491054534912, + "logps/chosen": -130.79428100585938, + "logps/rejected": -329.93597412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.431640625, + "rewards/margins": 16.04582405090332, + "rewards/rejected": -22.477462768554688, + "step": 3248 + }, + { + "epoch": 5.22, + "learning_rate": 2.3365041617122472e-07, + "logits/chosen": -1.7000527381896973, + "logits/rejected": -1.6941851377487183, + "logps/chosen": -140.56613159179688, + "logps/rejected": -214.4590606689453, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.589258193969727, + "rewards/margins": 9.570714950561523, + "rewards/rejected": -14.159974098205566, + "step": 3249 + }, + { + "epoch": 5.22, + "learning_rate": 2.3355132778438366e-07, + "logits/chosen": -1.5193397998809814, + "logits/rejected": -1.5657575130462646, + "logps/chosen": -185.35009765625, + "logps/rejected": -344.8037414550781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.23292350769043, + "rewards/margins": 15.275766372680664, + "rewards/rejected": -24.508689880371094, + "step": 3250 + }, + { + "epoch": 5.22, + "learning_rate": 2.3345223939754262e-07, + "logits/chosen": -1.5152623653411865, + "logits/rejected": -1.607582926750183, + "logps/chosen": -165.06732177734375, + "logps/rejected": -300.64239501953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.504597663879395, + "rewards/margins": 12.122743606567383, + "rewards/rejected": -21.627342224121094, + "step": 3251 + }, + { + "epoch": 5.22, + "learning_rate": 2.3335315101070155e-07, + "logits/chosen": -1.5840299129486084, + "logits/rejected": -1.6357758045196533, + "logps/chosen": -127.7243881225586, + "logps/rejected": -240.23248291015625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.148174285888672, + "rewards/margins": 10.319061279296875, + "rewards/rejected": -15.467235565185547, + "step": 3252 + }, + { + "epoch": 5.22, + "learning_rate": 2.3325406262386046e-07, + "logits/chosen": -1.5520362854003906, + "logits/rejected": -1.5606131553649902, + "logps/chosen": -146.765380859375, + "logps/rejected": -264.03167724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.288313865661621, + "rewards/margins": 11.18571662902832, + "rewards/rejected": -16.474029541015625, + "step": 3253 + }, + { + "epoch": 5.22, + "learning_rate": 2.3315497423701942e-07, + "logits/chosen": -1.418746829032898, + "logits/rejected": -1.3575607538223267, + "logps/chosen": -156.94210815429688, + "logps/rejected": -277.6868896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.608823299407959, + "rewards/margins": 12.016447067260742, + "rewards/rejected": -19.62527084350586, + "step": 3254 + }, + { + "epoch": 5.22, + "learning_rate": 2.3305588585017835e-07, + "logits/chosen": -1.7951960563659668, + "logits/rejected": -1.7447463274002075, + "logps/chosen": -178.4369354248047, + "logps/rejected": -295.24310302734375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.495842456817627, + "rewards/margins": 12.618724822998047, + "rewards/rejected": -18.114566802978516, + "step": 3255 + }, + { + "epoch": 5.23, + "learning_rate": 2.3295679746333728e-07, + "logits/chosen": -1.7019855976104736, + "logits/rejected": -1.6987783908843994, + "logps/chosen": -137.5904083251953, + "logps/rejected": -266.77313232421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.693784713745117, + "rewards/margins": 13.427701950073242, + "rewards/rejected": -18.12148666381836, + "step": 3256 + }, + { + "epoch": 5.23, + "learning_rate": 2.3285770907649622e-07, + "logits/chosen": -1.5488239526748657, + "logits/rejected": -1.5518592596054077, + "logps/chosen": -161.21530151367188, + "logps/rejected": -262.62506103515625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.895386219024658, + "rewards/margins": 11.002251625061035, + "rewards/rejected": -17.89763641357422, + "step": 3257 + }, + { + "epoch": 5.23, + "learning_rate": 2.3275862068965515e-07, + "logits/chosen": -1.467268466949463, + "logits/rejected": -1.4266059398651123, + "logps/chosen": -167.54747009277344, + "logps/rejected": -259.21722412109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.52095365524292, + "rewards/margins": 10.596217155456543, + "rewards/rejected": -17.117170333862305, + "step": 3258 + }, + { + "epoch": 5.23, + "learning_rate": 2.326595323028141e-07, + "logits/chosen": -1.352489948272705, + "logits/rejected": -1.4461692571640015, + "logps/chosen": -150.4564666748047, + "logps/rejected": -260.1969909667969, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.805381774902344, + "rewards/margins": 8.863666534423828, + "rewards/rejected": -15.669048309326172, + "step": 3259 + }, + { + "epoch": 5.23, + "learning_rate": 2.3256044391597304e-07, + "logits/chosen": -1.535402774810791, + "logits/rejected": -1.5595383644104004, + "logps/chosen": -118.28038024902344, + "logps/rejected": -230.89389038085938, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7228240966796875, + "rewards/margins": 10.820110321044922, + "rewards/rejected": -15.54293441772461, + "step": 3260 + }, + { + "epoch": 5.23, + "learning_rate": 2.3246135552913198e-07, + "logits/chosen": -1.5762367248535156, + "logits/rejected": -1.5843279361724854, + "logps/chosen": -178.55323791503906, + "logps/rejected": -325.1698913574219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.951789379119873, + "rewards/margins": 13.694095611572266, + "rewards/rejected": -21.645885467529297, + "step": 3261 + }, + { + "epoch": 5.24, + "learning_rate": 2.323622671422909e-07, + "logits/chosen": -1.4012043476104736, + "logits/rejected": -1.6242129802703857, + "logps/chosen": -112.91183471679688, + "logps/rejected": -305.6136169433594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.708033800125122, + "rewards/margins": 13.885345458984375, + "rewards/rejected": -17.593379974365234, + "step": 3262 + }, + { + "epoch": 5.24, + "learning_rate": 2.3226317875544984e-07, + "logits/chosen": -1.7395079135894775, + "logits/rejected": -1.6762055158615112, + "logps/chosen": -124.63433837890625, + "logps/rejected": -242.65408325195312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.523141384124756, + "rewards/margins": 12.231035232543945, + "rewards/rejected": -16.75417709350586, + "step": 3263 + }, + { + "epoch": 5.24, + "learning_rate": 2.3216409036860878e-07, + "logits/chosen": -1.611687183380127, + "logits/rejected": -1.5990633964538574, + "logps/chosen": -154.560546875, + "logps/rejected": -262.45916748046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.212143898010254, + "rewards/margins": 10.791460990905762, + "rewards/rejected": -17.003604888916016, + "step": 3264 + }, + { + "epoch": 5.24, + "learning_rate": 2.3206500198176774e-07, + "logits/chosen": -1.494474172592163, + "logits/rejected": -1.493428349494934, + "logps/chosen": -182.43023681640625, + "logps/rejected": -310.861328125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.06014347076416, + "rewards/margins": 11.876724243164062, + "rewards/rejected": -19.93686866760254, + "step": 3265 + }, + { + "epoch": 5.24, + "learning_rate": 2.3196591359492667e-07, + "logits/chosen": -1.4659819602966309, + "logits/rejected": -1.4336625337600708, + "logps/chosen": -120.85904693603516, + "logps/rejected": -222.517578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.353159427642822, + "rewards/margins": 10.461543083190918, + "rewards/rejected": -14.814702987670898, + "step": 3266 + }, + { + "epoch": 5.24, + "learning_rate": 2.318668252080856e-07, + "logits/chosen": -1.7432674169540405, + "logits/rejected": -1.617167592048645, + "logps/chosen": -130.8292236328125, + "logps/rejected": -244.19369506835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7796995639801025, + "rewards/margins": 14.482709884643555, + "rewards/rejected": -16.262409210205078, + "step": 3267 + }, + { + "epoch": 5.25, + "learning_rate": 2.3176773682124454e-07, + "logits/chosen": -1.6015013456344604, + "logits/rejected": -1.5804815292358398, + "logps/chosen": -110.40737915039062, + "logps/rejected": -279.0521240234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.332005023956299, + "rewards/margins": 15.928768157958984, + "rewards/rejected": -20.260772705078125, + "step": 3268 + }, + { + "epoch": 5.25, + "learning_rate": 2.3166864843440347e-07, + "logits/chosen": -1.5537872314453125, + "logits/rejected": -1.5661128759384155, + "logps/chosen": -180.87454223632812, + "logps/rejected": -294.96343994140625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.87488842010498, + "rewards/margins": 11.79098129272461, + "rewards/rejected": -20.665868759155273, + "step": 3269 + }, + { + "epoch": 5.25, + "learning_rate": 2.3156956004756243e-07, + "logits/chosen": -1.4591339826583862, + "logits/rejected": -1.4485584497451782, + "logps/chosen": -162.53550720214844, + "logps/rejected": -280.2856140136719, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.929163455963135, + "rewards/margins": 10.754241943359375, + "rewards/rejected": -18.68340492248535, + "step": 3270 + }, + { + "epoch": 5.25, + "learning_rate": 2.3147047166072134e-07, + "logits/chosen": -1.390628457069397, + "logits/rejected": -1.3830832242965698, + "logps/chosen": -157.52175903320312, + "logps/rejected": -244.1839599609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.661186218261719, + "rewards/margins": 9.477179527282715, + "rewards/rejected": -16.138364791870117, + "step": 3271 + }, + { + "epoch": 5.25, + "learning_rate": 2.313713832738803e-07, + "logits/chosen": -1.5687041282653809, + "logits/rejected": -1.5763561725616455, + "logps/chosen": -173.70741271972656, + "logps/rejected": -299.7364807128906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.241046905517578, + "rewards/margins": 12.588224411010742, + "rewards/rejected": -20.829273223876953, + "step": 3272 + }, + { + "epoch": 5.25, + "learning_rate": 2.3127229488703923e-07, + "logits/chosen": -1.5640531778335571, + "logits/rejected": -1.5544629096984863, + "logps/chosen": -110.02836608886719, + "logps/rejected": -266.82366943359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2608039379119873, + "rewards/margins": 14.549251556396484, + "rewards/rejected": -17.810054779052734, + "step": 3273 + }, + { + "epoch": 5.26, + "learning_rate": 2.3117320650019816e-07, + "logits/chosen": -1.558677077293396, + "logits/rejected": -1.617550015449524, + "logps/chosen": -112.341552734375, + "logps/rejected": -240.3922576904297, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.679274559020996, + "rewards/margins": 12.593742370605469, + "rewards/rejected": -17.27301788330078, + "step": 3274 + }, + { + "epoch": 5.26, + "learning_rate": 2.3107411811335712e-07, + "logits/chosen": -1.719848394393921, + "logits/rejected": -1.7204039096832275, + "logps/chosen": -124.46070861816406, + "logps/rejected": -249.36705017089844, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4637322425842285, + "rewards/margins": 12.323979377746582, + "rewards/rejected": -16.78771209716797, + "step": 3275 + }, + { + "epoch": 5.26, + "learning_rate": 2.3097502972651603e-07, + "logits/chosen": -1.5134086608886719, + "logits/rejected": -1.4789170026779175, + "logps/chosen": -203.6083984375, + "logps/rejected": -327.28729248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.652441024780273, + "rewards/margins": 13.799589157104492, + "rewards/rejected": -21.452028274536133, + "step": 3276 + }, + { + "epoch": 5.26, + "learning_rate": 2.3087594133967496e-07, + "logits/chosen": -1.5415900945663452, + "logits/rejected": -1.581908941268921, + "logps/chosen": -188.88739013671875, + "logps/rejected": -316.2205505371094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.665101051330566, + "rewards/margins": 13.70997428894043, + "rewards/rejected": -20.37507438659668, + "step": 3277 + }, + { + "epoch": 5.26, + "learning_rate": 2.3077685295283392e-07, + "logits/chosen": -1.400320053100586, + "logits/rejected": -1.4474568367004395, + "logps/chosen": -176.33660888671875, + "logps/rejected": -345.6932373046875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.29930305480957, + "rewards/margins": 17.116024017333984, + "rewards/rejected": -25.415327072143555, + "step": 3278 + }, + { + "epoch": 5.26, + "learning_rate": 2.3067776456599286e-07, + "logits/chosen": -1.5643625259399414, + "logits/rejected": -1.5363796949386597, + "logps/chosen": -122.2890396118164, + "logps/rejected": -227.96897888183594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6713435649871826, + "rewards/margins": 10.427351951599121, + "rewards/rejected": -14.098694801330566, + "step": 3279 + }, + { + "epoch": 5.26, + "learning_rate": 2.3057867617915181e-07, + "logits/chosen": -1.6216700077056885, + "logits/rejected": -1.6071879863739014, + "logps/chosen": -162.56187438964844, + "logps/rejected": -268.1200256347656, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.406813621520996, + "rewards/margins": 11.757715225219727, + "rewards/rejected": -19.16452980041504, + "step": 3280 + }, + { + "epoch": 5.27, + "learning_rate": 2.3047958779231072e-07, + "logits/chosen": -1.5049550533294678, + "logits/rejected": -1.495296835899353, + "logps/chosen": -151.47744750976562, + "logps/rejected": -236.39984130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.343379020690918, + "rewards/margins": 10.43460464477539, + "rewards/rejected": -15.777982711791992, + "step": 3281 + }, + { + "epoch": 5.27, + "learning_rate": 2.3038049940546966e-07, + "logits/chosen": -1.5928688049316406, + "logits/rejected": -1.6381444931030273, + "logps/chosen": -136.7577667236328, + "logps/rejected": -288.31103515625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.79954719543457, + "rewards/margins": 14.656883239746094, + "rewards/rejected": -20.456430435180664, + "step": 3282 + }, + { + "epoch": 5.27, + "learning_rate": 2.3028141101862861e-07, + "logits/chosen": -1.490057349205017, + "logits/rejected": -1.4614628553390503, + "logps/chosen": -175.01210021972656, + "logps/rejected": -289.3257141113281, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.845943450927734, + "rewards/margins": 10.557852745056152, + "rewards/rejected": -20.40379524230957, + "step": 3283 + }, + { + "epoch": 5.27, + "learning_rate": 2.3018232263178755e-07, + "logits/chosen": -1.465447187423706, + "logits/rejected": -1.5683213472366333, + "logps/chosen": -190.9565887451172, + "logps/rejected": -349.9540710449219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.824747085571289, + "rewards/margins": 14.203786849975586, + "rewards/rejected": -23.028533935546875, + "step": 3284 + }, + { + "epoch": 5.27, + "learning_rate": 2.300832342449465e-07, + "logits/chosen": -1.5052553415298462, + "logits/rejected": -1.6110550165176392, + "logps/chosen": -166.7972869873047, + "logps/rejected": -287.06744384765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.260326385498047, + "rewards/margins": 9.954858779907227, + "rewards/rejected": -18.215185165405273, + "step": 3285 + }, + { + "epoch": 5.27, + "learning_rate": 2.2998414585810541e-07, + "logits/chosen": -1.7910115718841553, + "logits/rejected": -1.7335350513458252, + "logps/chosen": -130.99227905273438, + "logps/rejected": -274.59332275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3881120681762695, + "rewards/margins": 15.901726722717285, + "rewards/rejected": -19.289838790893555, + "step": 3286 + }, + { + "epoch": 5.28, + "learning_rate": 2.2988505747126435e-07, + "logits/chosen": -1.38985276222229, + "logits/rejected": -1.3994165658950806, + "logps/chosen": -148.76405334472656, + "logps/rejected": -247.40203857421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.58608341217041, + "rewards/margins": 10.773872375488281, + "rewards/rejected": -17.359956741333008, + "step": 3287 + }, + { + "epoch": 5.28, + "learning_rate": 2.297859690844233e-07, + "logits/chosen": -1.48069167137146, + "logits/rejected": -1.5046736001968384, + "logps/chosen": -182.3841552734375, + "logps/rejected": -284.04144287109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.171463966369629, + "rewards/margins": 9.217697143554688, + "rewards/rejected": -18.389162063598633, + "step": 3288 + }, + { + "epoch": 5.28, + "learning_rate": 2.2968688069758224e-07, + "logits/chosen": -1.4783912897109985, + "logits/rejected": -1.5058367252349854, + "logps/chosen": -161.37045288085938, + "logps/rejected": -275.2249755859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.754576683044434, + "rewards/margins": 10.274942398071289, + "rewards/rejected": -17.02952003479004, + "step": 3289 + }, + { + "epoch": 5.28, + "learning_rate": 2.2958779231074115e-07, + "logits/chosen": -1.4675097465515137, + "logits/rejected": -1.4226959943771362, + "logps/chosen": -129.98219299316406, + "logps/rejected": -293.51397705078125, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.891878128051758, + "rewards/margins": 16.738704681396484, + "rewards/rejected": -21.63058090209961, + "step": 3290 + }, + { + "epoch": 5.28, + "learning_rate": 2.294887039239001e-07, + "logits/chosen": -1.6181025505065918, + "logits/rejected": -1.6557501554489136, + "logps/chosen": -128.70790100097656, + "logps/rejected": -269.06646728515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.766424655914307, + "rewards/margins": 12.692357063293457, + "rewards/rejected": -17.458782196044922, + "step": 3291 + }, + { + "epoch": 5.28, + "learning_rate": 2.2938961553705904e-07, + "logits/chosen": -1.40916109085083, + "logits/rejected": -1.4052858352661133, + "logps/chosen": -168.75111389160156, + "logps/rejected": -291.683349609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.780033111572266, + "rewards/margins": 11.931514739990234, + "rewards/rejected": -20.7115478515625, + "step": 3292 + }, + { + "epoch": 5.29, + "learning_rate": 2.29290527150218e-07, + "logits/chosen": -1.6158010959625244, + "logits/rejected": -1.530199646949768, + "logps/chosen": -202.82101440429688, + "logps/rejected": -296.4802551269531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.339792251586914, + "rewards/margins": 9.82237434387207, + "rewards/rejected": -20.162166595458984, + "step": 3293 + }, + { + "epoch": 5.29, + "learning_rate": 2.2919143876337693e-07, + "logits/chosen": -1.493280291557312, + "logits/rejected": -1.451041579246521, + "logps/chosen": -163.53591918945312, + "logps/rejected": -268.64324951171875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.058167457580566, + "rewards/margins": 10.893571853637695, + "rewards/rejected": -18.951740264892578, + "step": 3294 + }, + { + "epoch": 5.29, + "learning_rate": 2.2909235037653584e-07, + "logits/chosen": -1.4288734197616577, + "logits/rejected": -1.4758449792861938, + "logps/chosen": -130.56301879882812, + "logps/rejected": -293.0477294921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.947011470794678, + "rewards/margins": 15.635238647460938, + "rewards/rejected": -20.58224868774414, + "step": 3295 + }, + { + "epoch": 5.29, + "learning_rate": 2.289932619896948e-07, + "logits/chosen": -1.3653998374938965, + "logits/rejected": -1.3507616519927979, + "logps/chosen": -113.7938003540039, + "logps/rejected": -212.2369384765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.329197406768799, + "rewards/margins": 10.41024398803711, + "rewards/rejected": -13.739441871643066, + "step": 3296 + }, + { + "epoch": 5.29, + "learning_rate": 2.2889417360285373e-07, + "logits/chosen": -1.4972128868103027, + "logits/rejected": -1.5469497442245483, + "logps/chosen": -116.20575714111328, + "logps/rejected": -253.90692138671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.379452705383301, + "rewards/margins": 13.03973388671875, + "rewards/rejected": -17.419187545776367, + "step": 3297 + }, + { + "epoch": 5.29, + "learning_rate": 2.2879508521601267e-07, + "logits/chosen": -1.5097559690475464, + "logits/rejected": -1.5235953330993652, + "logps/chosen": -132.9793701171875, + "logps/rejected": -266.6614685058594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.667145252227783, + "rewards/margins": 12.524120330810547, + "rewards/rejected": -17.191265106201172, + "step": 3298 + }, + { + "epoch": 5.3, + "learning_rate": 2.2869599682917163e-07, + "logits/chosen": -1.48417329788208, + "logits/rejected": -1.510541558265686, + "logps/chosen": -118.65602111816406, + "logps/rejected": -273.9678955078125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.223136901855469, + "rewards/margins": 13.71442699432373, + "rewards/rejected": -18.937564849853516, + "step": 3299 + }, + { + "epoch": 5.3, + "learning_rate": 2.2859690844233053e-07, + "logits/chosen": -1.5556857585906982, + "logits/rejected": -1.5986441373825073, + "logps/chosen": -148.67242431640625, + "logps/rejected": -272.2933044433594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.443939685821533, + "rewards/margins": 11.895475387573242, + "rewards/rejected": -18.339414596557617, + "step": 3300 + }, + { + "epoch": 5.3, + "learning_rate": 2.284978200554895e-07, + "logits/chosen": -1.5792648792266846, + "logits/rejected": -1.5814098119735718, + "logps/chosen": -186.92202758789062, + "logps/rejected": -259.39453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.63750171661377, + "rewards/margins": 8.415603637695312, + "rewards/rejected": -17.0531063079834, + "step": 3301 + }, + { + "epoch": 5.3, + "learning_rate": 2.2839873166864843e-07, + "logits/chosen": -1.500571370124817, + "logits/rejected": -1.6020872592926025, + "logps/chosen": -161.57269287109375, + "logps/rejected": -272.75360107421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.336462020874023, + "rewards/margins": 8.724156379699707, + "rewards/rejected": -16.060617446899414, + "step": 3302 + }, + { + "epoch": 5.3, + "learning_rate": 2.2829964328180736e-07, + "logits/chosen": -1.4412658214569092, + "logits/rejected": -1.4683027267456055, + "logps/chosen": -155.39122009277344, + "logps/rejected": -253.5810546875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.24833869934082, + "rewards/margins": 9.23845386505127, + "rewards/rejected": -15.486793518066406, + "step": 3303 + }, + { + "epoch": 5.3, + "learning_rate": 2.2820055489496632e-07, + "logits/chosen": -1.4774980545043945, + "logits/rejected": -1.5155560970306396, + "logps/chosen": -173.79515075683594, + "logps/rejected": -273.3815002441406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.15631103515625, + "rewards/margins": 10.717728614807129, + "rewards/rejected": -18.874038696289062, + "step": 3304 + }, + { + "epoch": 5.3, + "learning_rate": 2.2810146650812523e-07, + "logits/chosen": -1.6362597942352295, + "logits/rejected": -1.7329810857772827, + "logps/chosen": -136.3558807373047, + "logps/rejected": -292.86810302734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.342553615570068, + "rewards/margins": 15.292057037353516, + "rewards/rejected": -20.63460922241211, + "step": 3305 + }, + { + "epoch": 5.31, + "learning_rate": 2.2800237812128416e-07, + "logits/chosen": -1.347977638244629, + "logits/rejected": -1.3817092180252075, + "logps/chosen": -137.6536865234375, + "logps/rejected": -270.13775634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.370110034942627, + "rewards/margins": 12.63882827758789, + "rewards/rejected": -20.00893783569336, + "step": 3306 + }, + { + "epoch": 5.31, + "learning_rate": 2.2790328973444312e-07, + "logits/chosen": -1.4882045984268188, + "logits/rejected": -1.5241202116012573, + "logps/chosen": -156.85360717773438, + "logps/rejected": -248.65489196777344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.829144477844238, + "rewards/margins": 9.813946723937988, + "rewards/rejected": -16.643091201782227, + "step": 3307 + }, + { + "epoch": 5.31, + "learning_rate": 2.2780420134760205e-07, + "logits/chosen": -1.5645530223846436, + "logits/rejected": -1.551581621170044, + "logps/chosen": -143.82723999023438, + "logps/rejected": -332.50396728515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.892045021057129, + "rewards/margins": 15.889039039611816, + "rewards/rejected": -23.781082153320312, + "step": 3308 + }, + { + "epoch": 5.31, + "learning_rate": 2.2770511296076099e-07, + "logits/chosen": -1.5151004791259766, + "logits/rejected": -1.5553202629089355, + "logps/chosen": -153.21485900878906, + "logps/rejected": -297.355224609375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.613072395324707, + "rewards/margins": 12.98960018157959, + "rewards/rejected": -19.60267448425293, + "step": 3309 + }, + { + "epoch": 5.31, + "learning_rate": 2.2760602457391992e-07, + "logits/chosen": -1.5750819444656372, + "logits/rejected": -1.5507875680923462, + "logps/chosen": -178.33319091796875, + "logps/rejected": -288.76751708984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.701860427856445, + "rewards/margins": 12.18509292602539, + "rewards/rejected": -18.88695526123047, + "step": 3310 + }, + { + "epoch": 5.31, + "learning_rate": 2.2750693618707885e-07, + "logits/chosen": -1.577398419380188, + "logits/rejected": -1.6510770320892334, + "logps/chosen": -143.89920043945312, + "logps/rejected": -282.8096923828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.01813268661499, + "rewards/margins": 12.017876625061035, + "rewards/rejected": -18.036008834838867, + "step": 3311 + }, + { + "epoch": 5.32, + "learning_rate": 2.274078478002378e-07, + "logits/chosen": -1.4647969007492065, + "logits/rejected": -1.3879203796386719, + "logps/chosen": -145.91766357421875, + "logps/rejected": -298.03466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.840178489685059, + "rewards/margins": 14.308417320251465, + "rewards/rejected": -20.148595809936523, + "step": 3312 + }, + { + "epoch": 5.32, + "learning_rate": 2.2730875941339675e-07, + "logits/chosen": -1.6105878353118896, + "logits/rejected": -1.4899859428405762, + "logps/chosen": -164.84814453125, + "logps/rejected": -248.66580200195312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.969851016998291, + "rewards/margins": 9.671026229858398, + "rewards/rejected": -15.640876770019531, + "step": 3313 + }, + { + "epoch": 5.32, + "learning_rate": 2.2720967102655565e-07, + "logits/chosen": -1.5811818838119507, + "logits/rejected": -1.5612133741378784, + "logps/chosen": -126.66638946533203, + "logps/rejected": -225.18374633789062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.562431812286377, + "rewards/margins": 9.958690643310547, + "rewards/rejected": -13.521121978759766, + "step": 3314 + }, + { + "epoch": 5.32, + "learning_rate": 2.271105826397146e-07, + "logits/chosen": -1.5350260734558105, + "logits/rejected": -1.6077206134796143, + "logps/chosen": -140.8316650390625, + "logps/rejected": -253.14756774902344, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.808531761169434, + "rewards/margins": 10.061297416687012, + "rewards/rejected": -15.869829177856445, + "step": 3315 + }, + { + "epoch": 5.32, + "learning_rate": 2.2701149425287355e-07, + "logits/chosen": -1.4854471683502197, + "logits/rejected": -1.525792121887207, + "logps/chosen": -120.77369689941406, + "logps/rejected": -205.78564453125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.336245059967041, + "rewards/margins": 9.537599563598633, + "rewards/rejected": -13.873845100402832, + "step": 3316 + }, + { + "epoch": 5.32, + "learning_rate": 2.269124058660325e-07, + "logits/chosen": -1.4358478784561157, + "logits/rejected": -1.447367787361145, + "logps/chosen": -103.92425537109375, + "logps/rejected": -211.43685913085938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.216926097869873, + "rewards/margins": 10.131386756896973, + "rewards/rejected": -14.348312377929688, + "step": 3317 + }, + { + "epoch": 5.33, + "learning_rate": 2.2681331747919144e-07, + "logits/chosen": -1.545941948890686, + "logits/rejected": -1.5655145645141602, + "logps/chosen": -157.71240234375, + "logps/rejected": -271.8997802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9868927001953125, + "rewards/margins": 10.784299850463867, + "rewards/rejected": -17.771190643310547, + "step": 3318 + }, + { + "epoch": 5.33, + "learning_rate": 2.2671422909235035e-07, + "logits/chosen": -1.6806466579437256, + "logits/rejected": -1.6708253622055054, + "logps/chosen": -87.8006591796875, + "logps/rejected": -243.98867797851562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2052860260009766, + "rewards/margins": 15.323360443115234, + "rewards/rejected": -16.52864646911621, + "step": 3319 + }, + { + "epoch": 5.33, + "learning_rate": 2.266151407055093e-07, + "logits/chosen": -1.5885224342346191, + "logits/rejected": -1.6605273485183716, + "logps/chosen": -158.06655883789062, + "logps/rejected": -338.20062255859375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.112833023071289, + "rewards/margins": 16.720754623413086, + "rewards/rejected": -22.833587646484375, + "step": 3320 + }, + { + "epoch": 5.33, + "learning_rate": 2.2651605231866824e-07, + "logits/chosen": -1.7238097190856934, + "logits/rejected": -1.7660444974899292, + "logps/chosen": -116.2686767578125, + "logps/rejected": -262.0578918457031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.963369369506836, + "rewards/margins": 12.433982849121094, + "rewards/rejected": -17.39735221862793, + "step": 3321 + }, + { + "epoch": 5.33, + "learning_rate": 2.264169639318272e-07, + "logits/chosen": -1.6300324201583862, + "logits/rejected": -1.4522960186004639, + "logps/chosen": -135.105712890625, + "logps/rejected": -253.45008850097656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.38510799407959, + "rewards/margins": 12.70014762878418, + "rewards/rejected": -18.085254669189453, + "step": 3322 + }, + { + "epoch": 5.33, + "learning_rate": 2.2631787554498613e-07, + "logits/chosen": -1.5349348783493042, + "logits/rejected": -1.4331884384155273, + "logps/chosen": -149.43663024902344, + "logps/rejected": -251.2551727294922, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.056065559387207, + "rewards/margins": 10.902688980102539, + "rewards/rejected": -17.958755493164062, + "step": 3323 + }, + { + "epoch": 5.34, + "learning_rate": 2.2621878715814504e-07, + "logits/chosen": -1.3630272150039673, + "logits/rejected": -1.4950703382492065, + "logps/chosen": -111.91983795166016, + "logps/rejected": -217.37673950195312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.111433506011963, + "rewards/margins": 8.157143592834473, + "rewards/rejected": -13.268577575683594, + "step": 3324 + }, + { + "epoch": 5.34, + "learning_rate": 2.26119698771304e-07, + "logits/chosen": -1.632229208946228, + "logits/rejected": -1.62516188621521, + "logps/chosen": -192.82925415039062, + "logps/rejected": -296.07208251953125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.844401359558105, + "rewards/margins": 10.255441665649414, + "rewards/rejected": -19.099842071533203, + "step": 3325 + }, + { + "epoch": 5.34, + "learning_rate": 2.2602061038446293e-07, + "logits/chosen": -1.453197717666626, + "logits/rejected": -1.4036836624145508, + "logps/chosen": -120.9522933959961, + "logps/rejected": -197.45651245117188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7538535594940186, + "rewards/margins": 10.840166091918945, + "rewards/rejected": -13.594019889831543, + "step": 3326 + }, + { + "epoch": 5.34, + "learning_rate": 2.2592152199762187e-07, + "logits/chosen": -1.5256881713867188, + "logits/rejected": -1.5507906675338745, + "logps/chosen": -180.89517211914062, + "logps/rejected": -300.45220947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.152420043945312, + "rewards/margins": 12.529157638549805, + "rewards/rejected": -20.681577682495117, + "step": 3327 + }, + { + "epoch": 5.34, + "learning_rate": 2.258224336107808e-07, + "logits/chosen": -1.5061918497085571, + "logits/rejected": -1.462681770324707, + "logps/chosen": -138.4984130859375, + "logps/rejected": -234.9783935546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132136344909668, + "rewards/margins": 10.839895248413086, + "rewards/rejected": -16.97203254699707, + "step": 3328 + }, + { + "epoch": 5.34, + "learning_rate": 2.2572334522393973e-07, + "logits/chosen": -1.6498697996139526, + "logits/rejected": -1.6011707782745361, + "logps/chosen": -154.8911590576172, + "logps/rejected": -286.8303527832031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.219493865966797, + "rewards/margins": 13.911413192749023, + "rewards/rejected": -20.13090705871582, + "step": 3329 + }, + { + "epoch": 5.35, + "learning_rate": 2.256242568370987e-07, + "logits/chosen": -1.5793354511260986, + "logits/rejected": -1.532224178314209, + "logps/chosen": -171.15084838867188, + "logps/rejected": -269.3011779785156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.45323371887207, + "rewards/margins": 11.509300231933594, + "rewards/rejected": -18.962533950805664, + "step": 3330 + }, + { + "epoch": 5.35, + "learning_rate": 2.2552516845025762e-07, + "logits/chosen": -1.5102906227111816, + "logits/rejected": -1.5350855588912964, + "logps/chosen": -136.7614288330078, + "logps/rejected": -304.9424743652344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.676676273345947, + "rewards/margins": 15.80157470703125, + "rewards/rejected": -20.478252410888672, + "step": 3331 + }, + { + "epoch": 5.35, + "learning_rate": 2.2542608006341656e-07, + "logits/chosen": -1.4463659524917603, + "logits/rejected": -1.5209327936172485, + "logps/chosen": -152.08547973632812, + "logps/rejected": -315.5830993652344, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7533159255981445, + "rewards/margins": 15.747629165649414, + "rewards/rejected": -22.500946044921875, + "step": 3332 + }, + { + "epoch": 5.35, + "learning_rate": 2.253269916765755e-07, + "logits/chosen": -1.4326118230819702, + "logits/rejected": -1.605055332183838, + "logps/chosen": -161.41836547851562, + "logps/rejected": -299.6729431152344, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.34447717666626, + "rewards/margins": 11.521932601928711, + "rewards/rejected": -18.866409301757812, + "step": 3333 + }, + { + "epoch": 5.35, + "learning_rate": 2.2522790328973442e-07, + "logits/chosen": -1.535721778869629, + "logits/rejected": -1.5774413347244263, + "logps/chosen": -132.92913818359375, + "logps/rejected": -272.3597717285156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.416486740112305, + "rewards/margins": 11.906148910522461, + "rewards/rejected": -17.322635650634766, + "step": 3334 + }, + { + "epoch": 5.35, + "learning_rate": 2.2512881490289338e-07, + "logits/chosen": -1.570178508758545, + "logits/rejected": -1.5657563209533691, + "logps/chosen": -172.29144287109375, + "logps/rejected": -257.20306396484375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.493678092956543, + "rewards/margins": 9.694555282592773, + "rewards/rejected": -17.188232421875, + "step": 3335 + }, + { + "epoch": 5.35, + "learning_rate": 2.2502972651605232e-07, + "logits/chosen": -1.4509042501449585, + "logits/rejected": -1.4092247486114502, + "logps/chosen": -153.49996948242188, + "logps/rejected": -304.203857421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7667236328125, + "rewards/margins": 14.15746784210205, + "rewards/rejected": -19.924190521240234, + "step": 3336 + }, + { + "epoch": 5.36, + "learning_rate": 2.2493063812921125e-07, + "logits/chosen": -1.5317697525024414, + "logits/rejected": -1.4868426322937012, + "logps/chosen": -129.99703979492188, + "logps/rejected": -251.9051513671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795592308044434, + "rewards/margins": 11.148192405700684, + "rewards/rejected": -16.943784713745117, + "step": 3337 + }, + { + "epoch": 5.36, + "learning_rate": 2.2483154974237018e-07, + "logits/chosen": -1.457454800605774, + "logits/rejected": -1.4756230115890503, + "logps/chosen": -123.6118392944336, + "logps/rejected": -240.3975830078125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.904645919799805, + "rewards/margins": 12.0093355178833, + "rewards/rejected": -16.913982391357422, + "step": 3338 + }, + { + "epoch": 5.36, + "learning_rate": 2.2473246135552912e-07, + "logits/chosen": -1.5437859296798706, + "logits/rejected": -1.5696723461151123, + "logps/chosen": -152.26199340820312, + "logps/rejected": -246.85086059570312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5242486000061035, + "rewards/margins": 9.850549697875977, + "rewards/rejected": -14.374797821044922, + "step": 3339 + }, + { + "epoch": 5.36, + "learning_rate": 2.2463337296868805e-07, + "logits/chosen": -1.3714799880981445, + "logits/rejected": -1.3958139419555664, + "logps/chosen": -184.04798889160156, + "logps/rejected": -267.7174072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.70742654800415, + "rewards/margins": 10.010367393493652, + "rewards/rejected": -17.71779441833496, + "step": 3340 + }, + { + "epoch": 5.36, + "learning_rate": 2.24534284581847e-07, + "logits/chosen": -1.5415242910385132, + "logits/rejected": -1.6627039909362793, + "logps/chosen": -139.56666564941406, + "logps/rejected": -238.7611083984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5672101974487305, + "rewards/margins": 8.37545394897461, + "rewards/rejected": -14.942663192749023, + "step": 3341 + }, + { + "epoch": 5.36, + "learning_rate": 2.2443519619500592e-07, + "logits/chosen": -1.5591256618499756, + "logits/rejected": -1.5583850145339966, + "logps/chosen": -144.3818359375, + "logps/rejected": -284.83392333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.635990619659424, + "rewards/margins": 12.929494857788086, + "rewards/rejected": -19.56548500061035, + "step": 3342 + }, + { + "epoch": 5.37, + "learning_rate": 2.2433610780816488e-07, + "logits/chosen": -1.592528223991394, + "logits/rejected": -1.6109066009521484, + "logps/chosen": -139.62033081054688, + "logps/rejected": -276.4849548339844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.375699043273926, + "rewards/margins": 14.04610824584961, + "rewards/rejected": -20.421810150146484, + "step": 3343 + }, + { + "epoch": 5.37, + "learning_rate": 2.242370194213238e-07, + "logits/chosen": -1.5474252700805664, + "logits/rejected": -1.449514627456665, + "logps/chosen": -135.19134521484375, + "logps/rejected": -255.0718994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3810648918151855, + "rewards/margins": 13.859562873840332, + "rewards/rejected": -18.24062728881836, + "step": 3344 + }, + { + "epoch": 5.37, + "learning_rate": 2.2413793103448274e-07, + "logits/chosen": -1.6842137575149536, + "logits/rejected": -1.5948195457458496, + "logps/chosen": -186.7830810546875, + "logps/rejected": -284.5519104003906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.71574592590332, + "rewards/margins": 12.599804878234863, + "rewards/rejected": -20.315549850463867, + "step": 3345 + }, + { + "epoch": 5.37, + "learning_rate": 2.240388426476417e-07, + "logits/chosen": -1.698282241821289, + "logits/rejected": -1.620221495628357, + "logps/chosen": -108.21202087402344, + "logps/rejected": -239.95144653320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.161227226257324, + "rewards/margins": 14.208375930786133, + "rewards/rejected": -16.36960220336914, + "step": 3346 + }, + { + "epoch": 5.37, + "learning_rate": 2.239397542608006e-07, + "logits/chosen": -1.5395092964172363, + "logits/rejected": -1.532173752784729, + "logps/chosen": -155.18951416015625, + "logps/rejected": -272.2019348144531, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.746330738067627, + "rewards/margins": 11.576675415039062, + "rewards/rejected": -18.32300567626953, + "step": 3347 + }, + { + "epoch": 5.37, + "learning_rate": 2.2384066587395954e-07, + "logits/chosen": -1.3876769542694092, + "logits/rejected": -1.449293851852417, + "logps/chosen": -125.07403564453125, + "logps/rejected": -278.3925476074219, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.222963809967041, + "rewards/margins": 14.931577682495117, + "rewards/rejected": -20.154542922973633, + "step": 3348 + }, + { + "epoch": 5.38, + "learning_rate": 2.237415774871185e-07, + "logits/chosen": -1.5666778087615967, + "logits/rejected": -1.711693525314331, + "logps/chosen": -117.53593444824219, + "logps/rejected": -291.260986328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.911811828613281, + "rewards/margins": 14.627108573913574, + "rewards/rejected": -19.53891944885254, + "step": 3349 + }, + { + "epoch": 5.38, + "learning_rate": 2.2364248910027744e-07, + "logits/chosen": -1.5801458358764648, + "logits/rejected": -1.568089485168457, + "logps/chosen": -134.3922119140625, + "logps/rejected": -234.49005126953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.527836322784424, + "rewards/margins": 9.966581344604492, + "rewards/rejected": -15.494417190551758, + "step": 3350 + }, + { + "epoch": 5.38, + "learning_rate": 2.235434007134364e-07, + "logits/chosen": -1.5634105205535889, + "logits/rejected": -1.5351967811584473, + "logps/chosen": -162.6669464111328, + "logps/rejected": -270.06829833984375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.583613395690918, + "rewards/margins": 9.931751251220703, + "rewards/rejected": -16.515365600585938, + "step": 3351 + }, + { + "epoch": 5.38, + "learning_rate": 2.234443123265953e-07, + "logits/chosen": -1.4940297603607178, + "logits/rejected": -1.4565410614013672, + "logps/chosen": -152.0593719482422, + "logps/rejected": -296.40203857421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.479157447814941, + "rewards/margins": 14.992426872253418, + "rewards/rejected": -22.47158432006836, + "step": 3352 + }, + { + "epoch": 5.38, + "learning_rate": 2.2334522393975424e-07, + "logits/chosen": -1.7206963300704956, + "logits/rejected": -1.6227232217788696, + "logps/chosen": -114.64385223388672, + "logps/rejected": -263.2828369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.159984588623047, + "rewards/margins": 16.36581802368164, + "rewards/rejected": -19.525802612304688, + "step": 3353 + }, + { + "epoch": 5.38, + "learning_rate": 2.232461355529132e-07, + "logits/chosen": -1.463364601135254, + "logits/rejected": -1.5254769325256348, + "logps/chosen": -131.56166076660156, + "logps/rejected": -277.1054992675781, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.043187141418457, + "rewards/margins": 15.634376525878906, + "rewards/rejected": -19.67756462097168, + "step": 3354 + }, + { + "epoch": 5.39, + "learning_rate": 2.2314704716607213e-07, + "logits/chosen": -1.4703330993652344, + "logits/rejected": -1.524598240852356, + "logps/chosen": -139.65371704101562, + "logps/rejected": -243.3048095703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.039375305175781, + "rewards/margins": 10.573387145996094, + "rewards/rejected": -16.612762451171875, + "step": 3355 + }, + { + "epoch": 5.39, + "learning_rate": 2.230479587792311e-07, + "logits/chosen": -1.6625964641571045, + "logits/rejected": -1.7274631261825562, + "logps/chosen": -116.28390502929688, + "logps/rejected": -272.6272277832031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.50934362411499, + "rewards/margins": 13.706849098205566, + "rewards/rejected": -18.21619415283203, + "step": 3356 + }, + { + "epoch": 5.39, + "learning_rate": 2.2294887039239e-07, + "logits/chosen": -1.6670743227005005, + "logits/rejected": -1.5316647291183472, + "logps/chosen": -139.34652709960938, + "logps/rejected": -230.95863342285156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.874160289764404, + "rewards/margins": 11.127359390258789, + "rewards/rejected": -17.00151824951172, + "step": 3357 + }, + { + "epoch": 5.39, + "learning_rate": 2.2284978200554893e-07, + "logits/chosen": -1.5382909774780273, + "logits/rejected": -1.529942274093628, + "logps/chosen": -200.63597106933594, + "logps/rejected": -310.177978515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.582544326782227, + "rewards/margins": 11.499267578125, + "rewards/rejected": -21.081811904907227, + "step": 3358 + }, + { + "epoch": 5.39, + "learning_rate": 2.227506936187079e-07, + "logits/chosen": -1.546960473060608, + "logits/rejected": -1.5958765745162964, + "logps/chosen": -113.99314880371094, + "logps/rejected": -274.0147705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.952789783477783, + "rewards/margins": 15.033554077148438, + "rewards/rejected": -18.986343383789062, + "step": 3359 + }, + { + "epoch": 5.39, + "learning_rate": 2.2265160523186682e-07, + "logits/chosen": -1.4956220388412476, + "logits/rejected": -1.5152796506881714, + "logps/chosen": -118.70171356201172, + "logps/rejected": -261.7405090332031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6020045280456543, + "rewards/margins": 13.936685562133789, + "rewards/rejected": -17.5386905670166, + "step": 3360 + }, + { + "epoch": 5.39, + "learning_rate": 2.2255251684502573e-07, + "logits/chosen": -1.526985764503479, + "logits/rejected": -1.555748462677002, + "logps/chosen": -133.33059692382812, + "logps/rejected": -251.51255798339844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9240264892578125, + "rewards/margins": 11.591777801513672, + "rewards/rejected": -15.515804290771484, + "step": 3361 + }, + { + "epoch": 5.4, + "learning_rate": 2.224534284581847e-07, + "logits/chosen": -1.614539623260498, + "logits/rejected": -1.5123636722564697, + "logps/chosen": -118.60287475585938, + "logps/rejected": -211.76608276367188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.677467346191406, + "rewards/margins": 9.764123916625977, + "rewards/rejected": -14.441591262817383, + "step": 3362 + }, + { + "epoch": 5.4, + "learning_rate": 2.2235434007134362e-07, + "logits/chosen": -1.566989779472351, + "logits/rejected": -1.6183035373687744, + "logps/chosen": -134.1474609375, + "logps/rejected": -233.4180908203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1238274574279785, + "rewards/margins": 9.82984733581543, + "rewards/rejected": -14.95367431640625, + "step": 3363 + }, + { + "epoch": 5.4, + "learning_rate": 2.2225525168450258e-07, + "logits/chosen": -1.4607737064361572, + "logits/rejected": -1.4881620407104492, + "logps/chosen": -179.74595642089844, + "logps/rejected": -278.3017578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.919157981872559, + "rewards/margins": 10.873269081115723, + "rewards/rejected": -18.79242706298828, + "step": 3364 + }, + { + "epoch": 5.4, + "learning_rate": 2.2215616329766152e-07, + "logits/chosen": -1.6124848127365112, + "logits/rejected": -1.5343328714370728, + "logps/chosen": -121.9565200805664, + "logps/rejected": -217.14706420898438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.396571636199951, + "rewards/margins": 11.568666458129883, + "rewards/rejected": -14.965237617492676, + "step": 3365 + }, + { + "epoch": 5.4, + "learning_rate": 2.2205707491082042e-07, + "logits/chosen": -1.52131187915802, + "logits/rejected": -1.5269343852996826, + "logps/chosen": -127.14079284667969, + "logps/rejected": -254.12628173828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8454363346099854, + "rewards/margins": 12.024837493896484, + "rewards/rejected": -15.870275497436523, + "step": 3366 + }, + { + "epoch": 5.4, + "learning_rate": 2.2195798652397938e-07, + "logits/chosen": -1.460913896560669, + "logits/rejected": -1.490901231765747, + "logps/chosen": -169.38668823242188, + "logps/rejected": -343.76806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.101324081420898, + "rewards/margins": 15.524415016174316, + "rewards/rejected": -23.6257381439209, + "step": 3367 + }, + { + "epoch": 5.41, + "learning_rate": 2.2185889813713832e-07, + "logits/chosen": -1.4364376068115234, + "logits/rejected": -1.5023207664489746, + "logps/chosen": -183.91632080078125, + "logps/rejected": -288.2415771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.191518783569336, + "rewards/margins": 11.386292457580566, + "rewards/rejected": -19.57781219482422, + "step": 3368 + }, + { + "epoch": 5.41, + "learning_rate": 2.2175980975029725e-07, + "logits/chosen": -1.3790723085403442, + "logits/rejected": -1.3920319080352783, + "logps/chosen": -102.01871490478516, + "logps/rejected": -176.1423797607422, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.574155330657959, + "rewards/margins": 9.471451759338379, + "rewards/rejected": -12.045607566833496, + "step": 3369 + }, + { + "epoch": 5.41, + "learning_rate": 2.216607213634562e-07, + "logits/chosen": -1.7128570079803467, + "logits/rejected": -1.7457858324050903, + "logps/chosen": -173.56561279296875, + "logps/rejected": -302.0688171386719, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.877683162689209, + "rewards/margins": 11.865898132324219, + "rewards/rejected": -19.743579864501953, + "step": 3370 + }, + { + "epoch": 5.41, + "learning_rate": 2.2156163297661512e-07, + "logits/chosen": -1.5667210817337036, + "logits/rejected": -1.5284806489944458, + "logps/chosen": -147.80776977539062, + "logps/rejected": -247.23788452148438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.01568603515625, + "rewards/margins": 10.655046463012695, + "rewards/rejected": -16.670734405517578, + "step": 3371 + }, + { + "epoch": 5.41, + "learning_rate": 2.2146254458977408e-07, + "logits/chosen": -1.5296783447265625, + "logits/rejected": -1.5212018489837646, + "logps/chosen": -149.3108673095703, + "logps/rejected": -262.4295654296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.820113182067871, + "rewards/margins": 11.58735466003418, + "rewards/rejected": -18.407466888427734, + "step": 3372 + }, + { + "epoch": 5.41, + "learning_rate": 2.21363456202933e-07, + "logits/chosen": -1.7003076076507568, + "logits/rejected": -1.7299786806106567, + "logps/chosen": -130.5372772216797, + "logps/rejected": -283.408203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891129970550537, + "rewards/margins": 14.459957122802734, + "rewards/rejected": -20.35108757019043, + "step": 3373 + }, + { + "epoch": 5.42, + "learning_rate": 2.2126436781609194e-07, + "logits/chosen": -1.4702881574630737, + "logits/rejected": -1.5102828741073608, + "logps/chosen": -106.86442565917969, + "logps/rejected": -254.77456665039062, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.383804798126221, + "rewards/margins": 13.272889137268066, + "rewards/rejected": -17.656692504882812, + "step": 3374 + }, + { + "epoch": 5.42, + "learning_rate": 2.211652794292509e-07, + "logits/chosen": -1.532680869102478, + "logits/rejected": -1.4243810176849365, + "logps/chosen": -155.78079223632812, + "logps/rejected": -237.3238525390625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9549431800842285, + "rewards/margins": 10.20772933959961, + "rewards/rejected": -16.162673950195312, + "step": 3375 + }, + { + "epoch": 5.42, + "learning_rate": 2.210661910424098e-07, + "logits/chosen": -1.5506293773651123, + "logits/rejected": -1.6120247840881348, + "logps/chosen": -150.59664916992188, + "logps/rejected": -238.9454345703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4619221687316895, + "rewards/margins": 9.693292617797852, + "rewards/rejected": -16.15521240234375, + "step": 3376 + }, + { + "epoch": 5.42, + "learning_rate": 2.2096710265556874e-07, + "logits/chosen": -1.7855098247528076, + "logits/rejected": -1.7643678188323975, + "logps/chosen": -144.03543090820312, + "logps/rejected": -274.50439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.230337142944336, + "rewards/margins": 13.725303649902344, + "rewards/rejected": -18.95564079284668, + "step": 3377 + }, + { + "epoch": 5.42, + "learning_rate": 2.208680142687277e-07, + "logits/chosen": -1.5035109519958496, + "logits/rejected": -1.5759987831115723, + "logps/chosen": -171.74893188476562, + "logps/rejected": -282.68402099609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.193686485290527, + "rewards/margins": 10.870843887329102, + "rewards/rejected": -20.064529418945312, + "step": 3378 + }, + { + "epoch": 5.42, + "learning_rate": 2.2076892588188663e-07, + "logits/chosen": -1.6473997831344604, + "logits/rejected": -1.6807217597961426, + "logps/chosen": -119.12995910644531, + "logps/rejected": -251.85171508789062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.423239707946777, + "rewards/margins": 12.765886306762695, + "rewards/rejected": -17.189125061035156, + "step": 3379 + }, + { + "epoch": 5.43, + "learning_rate": 2.2066983749504557e-07, + "logits/chosen": -1.4059085845947266, + "logits/rejected": -1.4500528573989868, + "logps/chosen": -150.53305053710938, + "logps/rejected": -272.1643371582031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.726877212524414, + "rewards/margins": 11.987201690673828, + "rewards/rejected": -19.714078903198242, + "step": 3380 + }, + { + "epoch": 5.43, + "learning_rate": 2.205707491082045e-07, + "logits/chosen": -1.5820019245147705, + "logits/rejected": -1.513116717338562, + "logps/chosen": -145.5394744873047, + "logps/rejected": -277.168212890625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.813573837280273, + "rewards/margins": 13.872626304626465, + "rewards/rejected": -18.686199188232422, + "step": 3381 + }, + { + "epoch": 5.43, + "learning_rate": 2.2047166072136343e-07, + "logits/chosen": -1.5477066040039062, + "logits/rejected": -1.5952719449996948, + "logps/chosen": -167.15444946289062, + "logps/rejected": -288.15570068359375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.680694580078125, + "rewards/margins": 11.664008140563965, + "rewards/rejected": -19.344701766967773, + "step": 3382 + }, + { + "epoch": 5.43, + "learning_rate": 2.203725723345224e-07, + "logits/chosen": -1.4422056674957275, + "logits/rejected": -1.5392370223999023, + "logps/chosen": -135.5591278076172, + "logps/rejected": -247.91094970703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.057971954345703, + "rewards/margins": 10.212977409362793, + "rewards/rejected": -15.27094841003418, + "step": 3383 + }, + { + "epoch": 5.43, + "learning_rate": 2.2027348394768133e-07, + "logits/chosen": -1.5374940633773804, + "logits/rejected": -1.5760115385055542, + "logps/chosen": -101.42708587646484, + "logps/rejected": -266.1417236328125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.863496780395508, + "rewards/margins": 14.370357513427734, + "rewards/rejected": -18.233856201171875, + "step": 3384 + }, + { + "epoch": 5.43, + "learning_rate": 2.2017439556084026e-07, + "logits/chosen": -1.435502290725708, + "logits/rejected": -1.3635164499282837, + "logps/chosen": -165.69476318359375, + "logps/rejected": -265.2212219238281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.772959232330322, + "rewards/margins": 10.885974884033203, + "rewards/rejected": -18.658935546875, + "step": 3385 + }, + { + "epoch": 5.43, + "learning_rate": 2.200753071739992e-07, + "logits/chosen": -1.5999939441680908, + "logits/rejected": -1.6163662672042847, + "logps/chosen": -138.12277221679688, + "logps/rejected": -288.14154052734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.978157997131348, + "rewards/margins": 14.016080856323242, + "rewards/rejected": -19.994237899780273, + "step": 3386 + }, + { + "epoch": 5.44, + "learning_rate": 2.1997621878715813e-07, + "logits/chosen": -1.5740634202957153, + "logits/rejected": -1.4953091144561768, + "logps/chosen": -147.4095001220703, + "logps/rejected": -285.9583435058594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.825496673583984, + "rewards/margins": 13.267680168151855, + "rewards/rejected": -19.093175888061523, + "step": 3387 + }, + { + "epoch": 5.44, + "learning_rate": 2.198771304003171e-07, + "logits/chosen": -1.474001169204712, + "logits/rejected": -1.532721996307373, + "logps/chosen": -150.21087646484375, + "logps/rejected": -290.87530517578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.657354831695557, + "rewards/margins": 14.362232208251953, + "rewards/rejected": -20.01958656311035, + "step": 3388 + }, + { + "epoch": 5.44, + "learning_rate": 2.1977804201347602e-07, + "logits/chosen": -1.3892403841018677, + "logits/rejected": -1.4526026248931885, + "logps/chosen": -138.7139892578125, + "logps/rejected": -288.3580627441406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.710198879241943, + "rewards/margins": 14.914694786071777, + "rewards/rejected": -20.624893188476562, + "step": 3389 + }, + { + "epoch": 5.44, + "learning_rate": 2.1967895362663493e-07, + "logits/chosen": -1.4090297222137451, + "logits/rejected": -1.4123625755310059, + "logps/chosen": -151.28958129882812, + "logps/rejected": -292.968017578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.033416748046875, + "rewards/margins": 13.450350761413574, + "rewards/rejected": -20.483768463134766, + "step": 3390 + }, + { + "epoch": 5.44, + "learning_rate": 2.195798652397939e-07, + "logits/chosen": -1.6812785863876343, + "logits/rejected": -1.715536117553711, + "logps/chosen": -125.8580322265625, + "logps/rejected": -242.32733154296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.532454490661621, + "rewards/margins": 10.984025955200195, + "rewards/rejected": -15.5164794921875, + "step": 3391 + }, + { + "epoch": 5.44, + "learning_rate": 2.1948077685295282e-07, + "logits/chosen": -1.5406553745269775, + "logits/rejected": -1.5125868320465088, + "logps/chosen": -150.23541259765625, + "logps/rejected": -261.8678894042969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.892866611480713, + "rewards/margins": 11.752792358398438, + "rewards/rejected": -16.645660400390625, + "step": 3392 + }, + { + "epoch": 5.45, + "learning_rate": 2.1938168846611178e-07, + "logits/chosen": -1.5411978960037231, + "logits/rejected": -1.4925578832626343, + "logps/chosen": -209.87864685058594, + "logps/rejected": -335.8096618652344, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.847379684448242, + "rewards/margins": 13.121818542480469, + "rewards/rejected": -22.96919822692871, + "step": 3393 + }, + { + "epoch": 5.45, + "learning_rate": 2.1928260007927071e-07, + "logits/chosen": -1.47858726978302, + "logits/rejected": -1.4731569290161133, + "logps/chosen": -172.7291717529297, + "logps/rejected": -258.4764099121094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.767223834991455, + "rewards/margins": 9.968217849731445, + "rewards/rejected": -17.735441207885742, + "step": 3394 + }, + { + "epoch": 5.45, + "learning_rate": 2.1918351169242962e-07, + "logits/chosen": -1.4767916202545166, + "logits/rejected": -1.5431993007659912, + "logps/chosen": -162.16171264648438, + "logps/rejected": -347.032958984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.341720581054688, + "rewards/margins": 14.830513000488281, + "rewards/rejected": -23.17223358154297, + "step": 3395 + }, + { + "epoch": 5.45, + "learning_rate": 2.1908442330558858e-07, + "logits/chosen": -1.5759713649749756, + "logits/rejected": -1.590254306793213, + "logps/chosen": -228.2081298828125, + "logps/rejected": -327.6415710449219, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.776154518127441, + "rewards/margins": 10.774214744567871, + "rewards/rejected": -20.55036735534668, + "step": 3396 + }, + { + "epoch": 5.45, + "learning_rate": 2.1898533491874751e-07, + "logits/chosen": -1.4583839178085327, + "logits/rejected": -1.531585931777954, + "logps/chosen": -159.20938110351562, + "logps/rejected": -274.0878601074219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6368913650512695, + "rewards/margins": 11.348533630371094, + "rewards/rejected": -17.985424041748047, + "step": 3397 + }, + { + "epoch": 5.45, + "learning_rate": 2.1888624653190645e-07, + "logits/chosen": -1.6541659832000732, + "logits/rejected": -1.6589959859848022, + "logps/chosen": -125.59602355957031, + "logps/rejected": -249.15585327148438, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.201717376708984, + "rewards/margins": 12.439949035644531, + "rewards/rejected": -17.641666412353516, + "step": 3398 + }, + { + "epoch": 5.46, + "learning_rate": 2.1878715814506538e-07, + "logits/chosen": -1.5422694683074951, + "logits/rejected": -1.558213710784912, + "logps/chosen": -98.25377655029297, + "logps/rejected": -233.5749969482422, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1022322177886963, + "rewards/margins": 14.079442024230957, + "rewards/rejected": -17.18167495727539, + "step": 3399 + }, + { + "epoch": 5.46, + "learning_rate": 2.1868806975822431e-07, + "logits/chosen": -1.5851092338562012, + "logits/rejected": -1.5383484363555908, + "logps/chosen": -117.46080017089844, + "logps/rejected": -273.9396057128906, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9035251140594482, + "rewards/margins": 14.74166202545166, + "rewards/rejected": -18.645187377929688, + "step": 3400 + }, + { + "epoch": 5.46, + "learning_rate": 2.1858898137138327e-07, + "logits/chosen": -1.4096810817718506, + "logits/rejected": -1.4691529273986816, + "logps/chosen": -188.3643798828125, + "logps/rejected": -331.3023681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.140613555908203, + "rewards/margins": 13.731876373291016, + "rewards/rejected": -22.87248992919922, + "step": 3401 + }, + { + "epoch": 5.46, + "learning_rate": 2.184898929845422e-07, + "logits/chosen": -1.6868270635604858, + "logits/rejected": -1.5930402278900146, + "logps/chosen": -161.64739990234375, + "logps/rejected": -291.76275634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.854515075683594, + "rewards/margins": 15.682470321655273, + "rewards/rejected": -21.536985397338867, + "step": 3402 + }, + { + "epoch": 5.46, + "learning_rate": 2.1839080459770114e-07, + "logits/chosen": -1.3915770053863525, + "logits/rejected": -1.3953169584274292, + "logps/chosen": -97.4369125366211, + "logps/rejected": -196.7766571044922, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.489727020263672, + "rewards/margins": 9.68229866027832, + "rewards/rejected": -14.172026634216309, + "step": 3403 + }, + { + "epoch": 5.46, + "learning_rate": 2.1829171621086007e-07, + "logits/chosen": -1.530609130859375, + "logits/rejected": -1.564410924911499, + "logps/chosen": -197.47984313964844, + "logps/rejected": -284.98614501953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.315011024475098, + "rewards/margins": 9.376608848571777, + "rewards/rejected": -18.691619873046875, + "step": 3404 + }, + { + "epoch": 5.47, + "learning_rate": 2.18192627824019e-07, + "logits/chosen": -1.5130506753921509, + "logits/rejected": -1.541816234588623, + "logps/chosen": -169.7334747314453, + "logps/rejected": -274.76068115234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.375558853149414, + "rewards/margins": 10.44033432006836, + "rewards/rejected": -18.815893173217773, + "step": 3405 + }, + { + "epoch": 5.47, + "learning_rate": 2.1809353943717797e-07, + "logits/chosen": -1.4647538661956787, + "logits/rejected": -1.5671467781066895, + "logps/chosen": -118.93898010253906, + "logps/rejected": -298.5596923828125, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.821059226989746, + "rewards/margins": 14.004740715026855, + "rewards/rejected": -19.825801849365234, + "step": 3406 + }, + { + "epoch": 5.47, + "learning_rate": 2.179944510503369e-07, + "logits/chosen": -1.4423624277114868, + "logits/rejected": -1.4711577892303467, + "logps/chosen": -145.74716186523438, + "logps/rejected": -269.322998046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.104604721069336, + "rewards/margins": 10.80618667602539, + "rewards/rejected": -16.910791397094727, + "step": 3407 + }, + { + "epoch": 5.47, + "learning_rate": 2.1789536266349583e-07, + "logits/chosen": -1.4702683687210083, + "logits/rejected": -1.4496498107910156, + "logps/chosen": -127.46072387695312, + "logps/rejected": -237.20611572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.806929588317871, + "rewards/margins": 12.100058555603027, + "rewards/rejected": -16.9069881439209, + "step": 3408 + }, + { + "epoch": 5.47, + "learning_rate": 2.1779627427665477e-07, + "logits/chosen": -1.4614458084106445, + "logits/rejected": -1.4422098398208618, + "logps/chosen": -150.91543579101562, + "logps/rejected": -260.36627197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6605634689331055, + "rewards/margins": 10.555765151977539, + "rewards/rejected": -17.216327667236328, + "step": 3409 + }, + { + "epoch": 5.47, + "learning_rate": 2.176971858898137e-07, + "logits/chosen": -1.4418106079101562, + "logits/rejected": -1.4834110736846924, + "logps/chosen": -162.11143493652344, + "logps/rejected": -253.22183227539062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.522298812866211, + "rewards/margins": 8.737313270568848, + "rewards/rejected": -17.259613037109375, + "step": 3410 + }, + { + "epoch": 5.48, + "learning_rate": 2.1759809750297263e-07, + "logits/chosen": -1.3720133304595947, + "logits/rejected": -1.4534249305725098, + "logps/chosen": -148.52078247070312, + "logps/rejected": -326.36737060546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.168240070343018, + "rewards/margins": 16.55620574951172, + "rewards/rejected": -22.724445343017578, + "step": 3411 + }, + { + "epoch": 5.48, + "learning_rate": 2.174990091161316e-07, + "logits/chosen": -1.443222999572754, + "logits/rejected": -1.4689271450042725, + "logps/chosen": -117.4993896484375, + "logps/rejected": -213.51210021972656, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.965090274810791, + "rewards/margins": 10.409432411193848, + "rewards/rejected": -15.37452220916748, + "step": 3412 + }, + { + "epoch": 5.48, + "learning_rate": 2.173999207292905e-07, + "logits/chosen": -1.5441243648529053, + "logits/rejected": -1.501905083656311, + "logps/chosen": -133.42288208007812, + "logps/rejected": -249.25625610351562, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.117092132568359, + "rewards/margins": 13.081611633300781, + "rewards/rejected": -17.19870376586914, + "step": 3413 + }, + { + "epoch": 5.48, + "learning_rate": 2.1730083234244946e-07, + "logits/chosen": -1.524592638015747, + "logits/rejected": -1.4863003492355347, + "logps/chosen": -102.32524108886719, + "logps/rejected": -273.52764892578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.379734516143799, + "rewards/margins": 15.495105743408203, + "rewards/rejected": -18.874839782714844, + "step": 3414 + }, + { + "epoch": 5.48, + "learning_rate": 2.172017439556084e-07, + "logits/chosen": -1.3711071014404297, + "logits/rejected": -1.5472300052642822, + "logps/chosen": -155.7696533203125, + "logps/rejected": -295.6031799316406, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.725275993347168, + "rewards/margins": 12.450275421142578, + "rewards/rejected": -20.17555046081543, + "step": 3415 + }, + { + "epoch": 5.48, + "learning_rate": 2.1710265556876733e-07, + "logits/chosen": -1.3435040712356567, + "logits/rejected": -1.3740893602371216, + "logps/chosen": -107.17675018310547, + "logps/rejected": -239.00540161132812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.672761917114258, + "rewards/margins": 11.973121643066406, + "rewards/rejected": -16.64588165283203, + "step": 3416 + }, + { + "epoch": 5.48, + "learning_rate": 2.1700356718192629e-07, + "logits/chosen": -1.3675215244293213, + "logits/rejected": -1.3907420635223389, + "logps/chosen": -145.81846618652344, + "logps/rejected": -292.1810302734375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.917076110839844, + "rewards/margins": 14.353343963623047, + "rewards/rejected": -21.270421981811523, + "step": 3417 + }, + { + "epoch": 5.49, + "learning_rate": 2.169044787950852e-07, + "logits/chosen": -1.454232931137085, + "logits/rejected": -1.4496525526046753, + "logps/chosen": -192.88731384277344, + "logps/rejected": -301.1120910644531, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.94334602355957, + "rewards/margins": 9.59316635131836, + "rewards/rejected": -19.53651237487793, + "step": 3418 + }, + { + "epoch": 5.49, + "learning_rate": 2.1680539040824413e-07, + "logits/chosen": -1.4443333148956299, + "logits/rejected": -1.5949188470840454, + "logps/chosen": -163.4739990234375, + "logps/rejected": -267.27728271484375, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.709090232849121, + "rewards/margins": 8.831838607788086, + "rewards/rejected": -16.540929794311523, + "step": 3419 + }, + { + "epoch": 5.49, + "learning_rate": 2.1670630202140309e-07, + "logits/chosen": -1.4827373027801514, + "logits/rejected": -1.494735598564148, + "logps/chosen": -114.93074035644531, + "logps/rejected": -264.31060791015625, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.187319755554199, + "rewards/margins": 13.390203475952148, + "rewards/rejected": -17.577524185180664, + "step": 3420 + }, + { + "epoch": 5.49, + "learning_rate": 2.1660721363456202e-07, + "logits/chosen": -1.5603173971176147, + "logits/rejected": -1.5269737243652344, + "logps/chosen": -126.33602905273438, + "logps/rejected": -245.9889373779297, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3714752197265625, + "rewards/margins": 11.540634155273438, + "rewards/rejected": -16.912109375, + "step": 3421 + }, + { + "epoch": 5.49, + "learning_rate": 2.1650812524772098e-07, + "logits/chosen": -1.3887673616409302, + "logits/rejected": -1.4665441513061523, + "logps/chosen": -154.67999267578125, + "logps/rejected": -293.91424560546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.805274486541748, + "rewards/margins": 13.488457679748535, + "rewards/rejected": -20.293731689453125, + "step": 3422 + }, + { + "epoch": 5.49, + "learning_rate": 2.1640903686087989e-07, + "logits/chosen": -1.5458590984344482, + "logits/rejected": -1.56844162940979, + "logps/chosen": -158.26779174804688, + "logps/rejected": -304.8733215332031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.423677444458008, + "rewards/margins": 14.188054084777832, + "rewards/rejected": -21.611732482910156, + "step": 3423 + }, + { + "epoch": 5.5, + "learning_rate": 2.1630994847403882e-07, + "logits/chosen": -1.4092642068862915, + "logits/rejected": -1.566103219985962, + "logps/chosen": -139.1046600341797, + "logps/rejected": -322.3624572753906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.243317604064941, + "rewards/margins": 14.497196197509766, + "rewards/rejected": -20.74051284790039, + "step": 3424 + }, + { + "epoch": 5.5, + "learning_rate": 2.1621086008719778e-07, + "logits/chosen": -1.4919686317443848, + "logits/rejected": -1.5348799228668213, + "logps/chosen": -135.2732391357422, + "logps/rejected": -253.70298767089844, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.664247512817383, + "rewards/margins": 11.335546493530273, + "rewards/rejected": -17.999794006347656, + "step": 3425 + }, + { + "epoch": 5.5, + "learning_rate": 2.161117717003567e-07, + "logits/chosen": -1.484498381614685, + "logits/rejected": -1.5397371053695679, + "logps/chosen": -141.9759063720703, + "logps/rejected": -303.1224365234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.133430004119873, + "rewards/margins": 14.223925590515137, + "rewards/rejected": -20.35735511779785, + "step": 3426 + }, + { + "epoch": 5.5, + "learning_rate": 2.1601268331351567e-07, + "logits/chosen": -1.4503648281097412, + "logits/rejected": -1.4295748472213745, + "logps/chosen": -160.02755737304688, + "logps/rejected": -266.31207275390625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.710765838623047, + "rewards/margins": 11.482879638671875, + "rewards/rejected": -18.193645477294922, + "step": 3427 + }, + { + "epoch": 5.5, + "learning_rate": 2.1591359492667458e-07, + "logits/chosen": -1.5500283241271973, + "logits/rejected": -1.6177650690078735, + "logps/chosen": -143.53424072265625, + "logps/rejected": -283.0515441894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.169302463531494, + "rewards/margins": 13.649847984313965, + "rewards/rejected": -19.819149017333984, + "step": 3428 + }, + { + "epoch": 5.5, + "learning_rate": 2.158145065398335e-07, + "logits/chosen": -1.4650323390960693, + "logits/rejected": -1.4889445304870605, + "logps/chosen": -169.26470947265625, + "logps/rejected": -296.1282958984375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.847884178161621, + "rewards/margins": 12.830238342285156, + "rewards/rejected": -19.67812156677246, + "step": 3429 + }, + { + "epoch": 5.51, + "learning_rate": 2.1571541815299247e-07, + "logits/chosen": -1.5286608934402466, + "logits/rejected": -1.6418735980987549, + "logps/chosen": -188.9846649169922, + "logps/rejected": -297.3423156738281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.809849739074707, + "rewards/margins": 9.70708179473877, + "rewards/rejected": -18.51693344116211, + "step": 3430 + }, + { + "epoch": 5.51, + "learning_rate": 2.156163297661514e-07, + "logits/chosen": -1.4271926879882812, + "logits/rejected": -1.3851617574691772, + "logps/chosen": -161.28384399414062, + "logps/rejected": -250.90151977539062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6663994789123535, + "rewards/margins": 12.147462844848633, + "rewards/rejected": -16.813861846923828, + "step": 3431 + }, + { + "epoch": 5.51, + "learning_rate": 2.155172413793103e-07, + "logits/chosen": -1.7137434482574463, + "logits/rejected": -1.7398827075958252, + "logps/chosen": -157.59165954589844, + "logps/rejected": -268.9953308105469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.546286582946777, + "rewards/margins": 11.670187950134277, + "rewards/rejected": -18.216474533081055, + "step": 3432 + }, + { + "epoch": 5.51, + "learning_rate": 2.1541815299246927e-07, + "logits/chosen": -1.534991979598999, + "logits/rejected": -1.4968020915985107, + "logps/chosen": -105.90400695800781, + "logps/rejected": -226.97796630859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.785414695739746, + "rewards/margins": 12.468313217163086, + "rewards/rejected": -16.25372886657715, + "step": 3433 + }, + { + "epoch": 5.51, + "learning_rate": 2.153190646056282e-07, + "logits/chosen": -1.4106501340866089, + "logits/rejected": -1.4255609512329102, + "logps/chosen": -176.66073608398438, + "logps/rejected": -274.36572265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.97189712524414, + "rewards/margins": 10.777088165283203, + "rewards/rejected": -19.748985290527344, + "step": 3434 + }, + { + "epoch": 5.51, + "learning_rate": 2.1521997621878716e-07, + "logits/chosen": -1.7300175428390503, + "logits/rejected": -1.6854029893875122, + "logps/chosen": -128.77200317382812, + "logps/rejected": -273.62225341796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.551894187927246, + "rewards/margins": 14.43295669555664, + "rewards/rejected": -19.984851837158203, + "step": 3435 + }, + { + "epoch": 5.52, + "learning_rate": 2.151208878319461e-07, + "logits/chosen": -1.4227174520492554, + "logits/rejected": -1.4231430292129517, + "logps/chosen": -144.3121337890625, + "logps/rejected": -311.919189453125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6403489112854, + "rewards/margins": 16.390661239624023, + "rewards/rejected": -22.031009674072266, + "step": 3436 + }, + { + "epoch": 5.52, + "learning_rate": 2.15021799445105e-07, + "logits/chosen": -1.440816879272461, + "logits/rejected": -1.4553241729736328, + "logps/chosen": -114.87913513183594, + "logps/rejected": -227.59336853027344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.729645252227783, + "rewards/margins": 11.623116493225098, + "rewards/rejected": -15.352761268615723, + "step": 3437 + }, + { + "epoch": 5.52, + "learning_rate": 2.1492271105826396e-07, + "logits/chosen": -1.4858475923538208, + "logits/rejected": -1.4217169284820557, + "logps/chosen": -209.4885711669922, + "logps/rejected": -283.1610412597656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.446978569030762, + "rewards/margins": 9.776723861694336, + "rewards/rejected": -18.223703384399414, + "step": 3438 + }, + { + "epoch": 5.52, + "learning_rate": 2.148236226714229e-07, + "logits/chosen": -1.5462822914123535, + "logits/rejected": -1.4937914609909058, + "logps/chosen": -163.20535278320312, + "logps/rejected": -295.1076354980469, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032345771789551, + "rewards/margins": 12.072263717651367, + "rewards/rejected": -18.1046085357666, + "step": 3439 + }, + { + "epoch": 5.52, + "learning_rate": 2.1472453428458183e-07, + "logits/chosen": -1.561997413635254, + "logits/rejected": -1.5506079196929932, + "logps/chosen": -162.46746826171875, + "logps/rejected": -277.029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.701944351196289, + "rewards/margins": 12.85633659362793, + "rewards/rejected": -19.55828094482422, + "step": 3440 + }, + { + "epoch": 5.52, + "learning_rate": 2.146254458977408e-07, + "logits/chosen": -1.5318663120269775, + "logits/rejected": -1.5665340423583984, + "logps/chosen": -160.5145263671875, + "logps/rejected": -339.3807373046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1625776290893555, + "rewards/margins": 16.76846694946289, + "rewards/rejected": -23.931041717529297, + "step": 3441 + }, + { + "epoch": 5.52, + "learning_rate": 2.145263575108997e-07, + "logits/chosen": -1.5023705959320068, + "logits/rejected": -1.4268518686294556, + "logps/chosen": -231.8016815185547, + "logps/rejected": -264.9580383300781, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.721844673156738, + "rewards/margins": 8.513740539550781, + "rewards/rejected": -17.235586166381836, + "step": 3442 + }, + { + "epoch": 5.53, + "learning_rate": 2.1442726912405866e-07, + "logits/chosen": -1.5287936925888062, + "logits/rejected": -1.4619197845458984, + "logps/chosen": -182.6705780029297, + "logps/rejected": -293.1990966796875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.571776390075684, + "rewards/margins": 12.777178764343262, + "rewards/rejected": -21.348955154418945, + "step": 3443 + }, + { + "epoch": 5.53, + "learning_rate": 2.143281807372176e-07, + "logits/chosen": -1.4586869478225708, + "logits/rejected": -1.497658371925354, + "logps/chosen": -149.40089416503906, + "logps/rejected": -286.95098876953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.645305633544922, + "rewards/margins": 13.105929374694824, + "rewards/rejected": -19.75123405456543, + "step": 3444 + }, + { + "epoch": 5.53, + "learning_rate": 2.1422909235037652e-07, + "logits/chosen": -1.6375882625579834, + "logits/rejected": -1.662219524383545, + "logps/chosen": -119.6922607421875, + "logps/rejected": -257.5961608886719, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.347027778625488, + "rewards/margins": 13.006986618041992, + "rewards/rejected": -17.354015350341797, + "step": 3445 + }, + { + "epoch": 5.53, + "learning_rate": 2.1413000396353548e-07, + "logits/chosen": -1.3750144243240356, + "logits/rejected": -1.518882393836975, + "logps/chosen": -113.49573516845703, + "logps/rejected": -245.63491821289062, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.284154891967773, + "rewards/margins": 10.61517333984375, + "rewards/rejected": -15.899328231811523, + "step": 3446 + }, + { + "epoch": 5.53, + "learning_rate": 2.140309155766944e-07, + "logits/chosen": -1.420674443244934, + "logits/rejected": -1.4245469570159912, + "logps/chosen": -146.1182861328125, + "logps/rejected": -256.2110595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.880023002624512, + "rewards/margins": 10.483912467956543, + "rewards/rejected": -17.363935470581055, + "step": 3447 + }, + { + "epoch": 5.53, + "learning_rate": 2.1393182718985332e-07, + "logits/chosen": -1.4224376678466797, + "logits/rejected": -1.4667400121688843, + "logps/chosen": -157.93873596191406, + "logps/rejected": -319.5220642089844, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.863391876220703, + "rewards/margins": 14.624561309814453, + "rewards/rejected": -22.487951278686523, + "step": 3448 + }, + { + "epoch": 5.54, + "learning_rate": 2.1383273880301228e-07, + "logits/chosen": -1.5504735708236694, + "logits/rejected": -1.6136361360549927, + "logps/chosen": -126.59565734863281, + "logps/rejected": -279.4231262207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.845088958740234, + "rewards/margins": 12.714363098144531, + "rewards/rejected": -18.559452056884766, + "step": 3449 + }, + { + "epoch": 5.54, + "learning_rate": 2.1373365041617122e-07, + "logits/chosen": -1.475582480430603, + "logits/rejected": -1.4415696859359741, + "logps/chosen": -133.4188995361328, + "logps/rejected": -290.6744079589844, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.813361644744873, + "rewards/margins": 16.151216506958008, + "rewards/rejected": -20.964580535888672, + "step": 3450 + }, + { + "epoch": 5.54, + "learning_rate": 2.1363456202933015e-07, + "logits/chosen": -1.4117300510406494, + "logits/rejected": -1.483047604560852, + "logps/chosen": -152.42173767089844, + "logps/rejected": -312.80426025390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.858860015869141, + "rewards/margins": 15.626880645751953, + "rewards/rejected": -22.485740661621094, + "step": 3451 + }, + { + "epoch": 5.54, + "learning_rate": 2.1353547364248908e-07, + "logits/chosen": -1.4515260457992554, + "logits/rejected": -1.4625074863433838, + "logps/chosen": -136.45709228515625, + "logps/rejected": -269.1407470703125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.452543258666992, + "rewards/margins": 12.845173835754395, + "rewards/rejected": -18.29771614074707, + "step": 3452 + }, + { + "epoch": 5.54, + "learning_rate": 2.1343638525564802e-07, + "logits/chosen": -1.5901306867599487, + "logits/rejected": -1.6348779201507568, + "logps/chosen": -230.0763397216797, + "logps/rejected": -355.6812744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.871137619018555, + "rewards/margins": 10.222611427307129, + "rewards/rejected": -22.09375, + "step": 3453 + }, + { + "epoch": 5.54, + "learning_rate": 2.1333729686880698e-07, + "logits/chosen": -1.4135034084320068, + "logits/rejected": -1.4534860849380493, + "logps/chosen": -115.4327392578125, + "logps/rejected": -229.9302978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.28120756149292, + "rewards/margins": 10.383345603942871, + "rewards/rejected": -15.664552688598633, + "step": 3454 + }, + { + "epoch": 5.55, + "learning_rate": 2.132382084819659e-07, + "logits/chosen": -1.545456886291504, + "logits/rejected": -1.5549765825271606, + "logps/chosen": -154.99961853027344, + "logps/rejected": -304.4394836425781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.052010536193848, + "rewards/margins": 12.902388572692871, + "rewards/rejected": -20.95439910888672, + "step": 3455 + }, + { + "epoch": 5.55, + "learning_rate": 2.1313912009512484e-07, + "logits/chosen": -1.513218641281128, + "logits/rejected": -1.4968972206115723, + "logps/chosen": -169.7577667236328, + "logps/rejected": -305.21429443359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.250058174133301, + "rewards/margins": 14.359582901000977, + "rewards/rejected": -20.60964012145996, + "step": 3456 + }, + { + "epoch": 5.55, + "learning_rate": 2.1304003170828378e-07, + "logits/chosen": -1.4491702318191528, + "logits/rejected": -1.4875330924987793, + "logps/chosen": -135.46507263183594, + "logps/rejected": -266.0128173828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.584984302520752, + "rewards/margins": 12.297099113464355, + "rewards/rejected": -16.882083892822266, + "step": 3457 + }, + { + "epoch": 5.55, + "learning_rate": 2.129409433214427e-07, + "logits/chosen": -1.4234471321105957, + "logits/rejected": -1.5615057945251465, + "logps/chosen": -165.02728271484375, + "logps/rejected": -291.1048278808594, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.628448486328125, + "rewards/margins": 10.694999694824219, + "rewards/rejected": -19.323448181152344, + "step": 3458 + }, + { + "epoch": 5.55, + "learning_rate": 2.1284185493460167e-07, + "logits/chosen": -1.5023926496505737, + "logits/rejected": -1.5145243406295776, + "logps/chosen": -166.86395263671875, + "logps/rejected": -322.5313720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.474535942077637, + "rewards/margins": 13.365673065185547, + "rewards/rejected": -21.840208053588867, + "step": 3459 + }, + { + "epoch": 5.55, + "learning_rate": 2.127427665477606e-07, + "logits/chosen": -1.4995983839035034, + "logits/rejected": -1.6318519115447998, + "logps/chosen": -147.55908203125, + "logps/rejected": -263.0432434082031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.746371746063232, + "rewards/margins": 9.30437183380127, + "rewards/rejected": -17.050743103027344, + "step": 3460 + }, + { + "epoch": 5.56, + "learning_rate": 2.126436781609195e-07, + "logits/chosen": -1.4209668636322021, + "logits/rejected": -1.4657387733459473, + "logps/chosen": -199.90377807617188, + "logps/rejected": -326.2291259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.882694244384766, + "rewards/margins": 12.913928031921387, + "rewards/rejected": -22.79662322998047, + "step": 3461 + }, + { + "epoch": 5.56, + "learning_rate": 2.1254458977407847e-07, + "logits/chosen": -1.510312557220459, + "logits/rejected": -1.526473045349121, + "logps/chosen": -104.86387634277344, + "logps/rejected": -236.08953857421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.114480495452881, + "rewards/margins": 12.498226165771484, + "rewards/rejected": -16.612707138061523, + "step": 3462 + }, + { + "epoch": 5.56, + "learning_rate": 2.124455013872374e-07, + "logits/chosen": -1.6254152059555054, + "logits/rejected": -1.5807161331176758, + "logps/chosen": -172.88888549804688, + "logps/rejected": -278.616943359375, + "loss": 0.0075, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6392130851745605, + "rewards/margins": 12.036291122436523, + "rewards/rejected": -18.67550277709961, + "step": 3463 + }, + { + "epoch": 5.56, + "learning_rate": 2.1234641300039636e-07, + "logits/chosen": -1.580838918685913, + "logits/rejected": -1.449442744255066, + "logps/chosen": -167.49075317382812, + "logps/rejected": -256.189453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.328787803649902, + "rewards/margins": 10.541651725769043, + "rewards/rejected": -17.870439529418945, + "step": 3464 + }, + { + "epoch": 5.56, + "learning_rate": 2.122473246135553e-07, + "logits/chosen": -1.467173457145691, + "logits/rejected": -1.42405366897583, + "logps/chosen": -128.48675537109375, + "logps/rejected": -219.92221069335938, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.125523567199707, + "rewards/margins": 10.216520309448242, + "rewards/rejected": -14.34204387664795, + "step": 3465 + }, + { + "epoch": 5.56, + "learning_rate": 2.121482362267142e-07, + "logits/chosen": -1.536937952041626, + "logits/rejected": -1.5816075801849365, + "logps/chosen": -120.34854125976562, + "logps/rejected": -264.47314453125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.967096328735352, + "rewards/margins": 13.329809188842773, + "rewards/rejected": -18.296905517578125, + "step": 3466 + }, + { + "epoch": 5.57, + "learning_rate": 2.1204914783987316e-07, + "logits/chosen": -1.5219027996063232, + "logits/rejected": -1.5078930854797363, + "logps/chosen": -163.04747009277344, + "logps/rejected": -305.27471923828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.313136100769043, + "rewards/margins": 13.732112884521484, + "rewards/rejected": -21.045249938964844, + "step": 3467 + }, + { + "epoch": 5.57, + "learning_rate": 2.119500594530321e-07, + "logits/chosen": -1.588708758354187, + "logits/rejected": -1.538028359413147, + "logps/chosen": -175.19378662109375, + "logps/rejected": -268.5478820800781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.663297653198242, + "rewards/margins": 9.261184692382812, + "rewards/rejected": -17.924482345581055, + "step": 3468 + }, + { + "epoch": 5.57, + "learning_rate": 2.1185097106619106e-07, + "logits/chosen": -1.5705335140228271, + "logits/rejected": -1.5216724872589111, + "logps/chosen": -165.31146240234375, + "logps/rejected": -282.43084716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5694098472595215, + "rewards/margins": 12.201087951660156, + "rewards/rejected": -18.770496368408203, + "step": 3469 + }, + { + "epoch": 5.57, + "learning_rate": 2.1175188267934996e-07, + "logits/chosen": -1.480363368988037, + "logits/rejected": -1.4656383991241455, + "logps/chosen": -190.98443603515625, + "logps/rejected": -305.0986328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.11679458618164, + "rewards/margins": 12.317111015319824, + "rewards/rejected": -21.43390464782715, + "step": 3470 + }, + { + "epoch": 5.57, + "learning_rate": 2.116527942925089e-07, + "logits/chosen": -1.619957685470581, + "logits/rejected": -1.546367883682251, + "logps/chosen": -153.62782287597656, + "logps/rejected": -268.7706298828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.356766700744629, + "rewards/margins": 12.042521476745605, + "rewards/rejected": -18.399288177490234, + "step": 3471 + }, + { + "epoch": 5.57, + "learning_rate": 2.1155370590566785e-07, + "logits/chosen": -1.6157139539718628, + "logits/rejected": -1.5353957414627075, + "logps/chosen": -129.51522827148438, + "logps/rejected": -270.95965576171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2077975273132324, + "rewards/margins": 14.3507661819458, + "rewards/rejected": -17.558563232421875, + "step": 3472 + }, + { + "epoch": 5.57, + "learning_rate": 2.114546175188268e-07, + "logits/chosen": -1.411192536354065, + "logits/rejected": -1.4063540697097778, + "logps/chosen": -172.2589111328125, + "logps/rejected": -272.8524475097656, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.989190101623535, + "rewards/margins": 10.653397560119629, + "rewards/rejected": -18.642587661743164, + "step": 3473 + }, + { + "epoch": 5.58, + "learning_rate": 2.1135552913198572e-07, + "logits/chosen": -1.598246693611145, + "logits/rejected": -1.4570465087890625, + "logps/chosen": -151.0003204345703, + "logps/rejected": -236.34561157226562, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.301996231079102, + "rewards/margins": 10.25024127960205, + "rewards/rejected": -16.55223846435547, + "step": 3474 + }, + { + "epoch": 5.58, + "learning_rate": 2.1125644074514465e-07, + "logits/chosen": -1.6418222188949585, + "logits/rejected": -1.6030536890029907, + "logps/chosen": -101.15557098388672, + "logps/rejected": -223.5531463623047, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.030444622039795, + "rewards/margins": 13.175436019897461, + "rewards/rejected": -16.205881118774414, + "step": 3475 + }, + { + "epoch": 5.58, + "learning_rate": 2.111573523583036e-07, + "logits/chosen": -1.572303056716919, + "logits/rejected": -1.4535349607467651, + "logps/chosen": -183.78753662109375, + "logps/rejected": -268.50311279296875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.888599395751953, + "rewards/margins": 10.309011459350586, + "rewards/rejected": -19.19761085510254, + "step": 3476 + }, + { + "epoch": 5.58, + "learning_rate": 2.1105826397146255e-07, + "logits/chosen": -1.3945050239562988, + "logits/rejected": -1.413754940032959, + "logps/chosen": -154.4713134765625, + "logps/rejected": -276.90478515625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.282293319702148, + "rewards/margins": 10.809566497802734, + "rewards/rejected": -19.091859817504883, + "step": 3477 + }, + { + "epoch": 5.58, + "learning_rate": 2.1095917558462148e-07, + "logits/chosen": -1.4268770217895508, + "logits/rejected": -1.377671241760254, + "logps/chosen": -149.87142944335938, + "logps/rejected": -231.5007781982422, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.529449462890625, + "rewards/margins": 10.307394027709961, + "rewards/rejected": -16.836843490600586, + "step": 3478 + }, + { + "epoch": 5.58, + "learning_rate": 2.1086008719778041e-07, + "logits/chosen": -1.524371862411499, + "logits/rejected": -1.5602476596832275, + "logps/chosen": -153.4820098876953, + "logps/rejected": -267.6202697753906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.067951202392578, + "rewards/margins": 9.18103313446045, + "rewards/rejected": -17.24898338317871, + "step": 3479 + }, + { + "epoch": 5.59, + "learning_rate": 2.1076099881093935e-07, + "logits/chosen": -1.5817943811416626, + "logits/rejected": -1.513641595840454, + "logps/chosen": -177.39862060546875, + "logps/rejected": -269.17144775390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.720231056213379, + "rewards/margins": 11.374757766723633, + "rewards/rejected": -19.094989776611328, + "step": 3480 + }, + { + "epoch": 5.59, + "learning_rate": 2.1066191042409828e-07, + "logits/chosen": -1.506537675857544, + "logits/rejected": -1.5210570096969604, + "logps/chosen": -141.04534912109375, + "logps/rejected": -242.78671264648438, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.220089912414551, + "rewards/margins": 9.612577438354492, + "rewards/rejected": -14.83266830444336, + "step": 3481 + }, + { + "epoch": 5.59, + "learning_rate": 2.1056282203725721e-07, + "logits/chosen": -1.555442452430725, + "logits/rejected": -1.6271581649780273, + "logps/chosen": -144.35475158691406, + "logps/rejected": -296.9488830566406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988823890686035, + "rewards/margins": 14.041683197021484, + "rewards/rejected": -20.030508041381836, + "step": 3482 + }, + { + "epoch": 5.59, + "learning_rate": 2.1046373365041617e-07, + "logits/chosen": -1.6058940887451172, + "logits/rejected": -1.6205238103866577, + "logps/chosen": -147.2259063720703, + "logps/rejected": -277.75469970703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.05565071105957, + "rewards/margins": 14.026470184326172, + "rewards/rejected": -20.082120895385742, + "step": 3483 + }, + { + "epoch": 5.59, + "learning_rate": 2.1036464526357508e-07, + "logits/chosen": -1.467818021774292, + "logits/rejected": -1.5783430337905884, + "logps/chosen": -159.65179443359375, + "logps/rejected": -281.60931396484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.15143871307373, + "rewards/margins": 11.02839183807373, + "rewards/rejected": -19.179832458496094, + "step": 3484 + }, + { + "epoch": 5.59, + "learning_rate": 2.1026555687673404e-07, + "logits/chosen": -1.5809314250946045, + "logits/rejected": -1.5349500179290771, + "logps/chosen": -151.61505126953125, + "logps/rejected": -244.9976348876953, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910013198852539, + "rewards/margins": 11.858074188232422, + "rewards/rejected": -17.76808738708496, + "step": 3485 + }, + { + "epoch": 5.6, + "learning_rate": 2.1016646848989297e-07, + "logits/chosen": -1.4671688079833984, + "logits/rejected": -1.4165728092193604, + "logps/chosen": -161.30531311035156, + "logps/rejected": -294.05560302734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.514042377471924, + "rewards/margins": 12.113029479980469, + "rewards/rejected": -19.627071380615234, + "step": 3486 + }, + { + "epoch": 5.6, + "learning_rate": 2.100673801030519e-07, + "logits/chosen": -1.4841006994247437, + "logits/rejected": -1.4929653406143188, + "logps/chosen": -147.3690185546875, + "logps/rejected": -285.8001708984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.313442230224609, + "rewards/margins": 12.737858772277832, + "rewards/rejected": -19.051300048828125, + "step": 3487 + }, + { + "epoch": 5.6, + "learning_rate": 2.0996829171621087e-07, + "logits/chosen": -1.4345812797546387, + "logits/rejected": -1.5191748142242432, + "logps/chosen": -161.46340942382812, + "logps/rejected": -255.54258728027344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.819353103637695, + "rewards/margins": 9.715951919555664, + "rewards/rejected": -16.53530502319336, + "step": 3488 + }, + { + "epoch": 5.6, + "learning_rate": 2.0986920332936977e-07, + "logits/chosen": -1.441930890083313, + "logits/rejected": -1.4702404737472534, + "logps/chosen": -170.82797241210938, + "logps/rejected": -305.4510498046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.844261646270752, + "rewards/margins": 11.98133659362793, + "rewards/rejected": -19.825599670410156, + "step": 3489 + }, + { + "epoch": 5.6, + "learning_rate": 2.097701149425287e-07, + "logits/chosen": -1.537670612335205, + "logits/rejected": -1.4510598182678223, + "logps/chosen": -163.748779296875, + "logps/rejected": -318.516845703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.953666687011719, + "rewards/margins": 14.541647911071777, + "rewards/rejected": -21.495315551757812, + "step": 3490 + }, + { + "epoch": 5.6, + "learning_rate": 2.0967102655568767e-07, + "logits/chosen": -1.622631549835205, + "logits/rejected": -1.6390390396118164, + "logps/chosen": -141.270263671875, + "logps/rejected": -254.4559326171875, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.802359580993652, + "rewards/margins": 11.843047142028809, + "rewards/rejected": -16.64540672302246, + "step": 3491 + }, + { + "epoch": 5.61, + "learning_rate": 2.095719381688466e-07, + "logits/chosen": -1.449061393737793, + "logits/rejected": -1.5777435302734375, + "logps/chosen": -128.67315673828125, + "logps/rejected": -268.0546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2907233238220215, + "rewards/margins": 11.80470085144043, + "rewards/rejected": -18.09542465209961, + "step": 3492 + }, + { + "epoch": 5.61, + "learning_rate": 2.0947284978200556e-07, + "logits/chosen": -1.5823407173156738, + "logits/rejected": -1.550660490989685, + "logps/chosen": -146.87701416015625, + "logps/rejected": -268.1518859863281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.771166801452637, + "rewards/margins": 12.822327613830566, + "rewards/rejected": -19.593494415283203, + "step": 3493 + }, + { + "epoch": 5.61, + "learning_rate": 2.0937376139516447e-07, + "logits/chosen": -1.622446060180664, + "logits/rejected": -1.6589446067810059, + "logps/chosen": -86.09461212158203, + "logps/rejected": -205.93997192382812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3844799995422363, + "rewards/margins": 11.903199195861816, + "rewards/rejected": -14.287679672241211, + "step": 3494 + }, + { + "epoch": 5.61, + "learning_rate": 2.092746730083234e-07, + "logits/chosen": -1.5718985795974731, + "logits/rejected": -1.599583387374878, + "logps/chosen": -170.6344757080078, + "logps/rejected": -314.8659973144531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.866886138916016, + "rewards/margins": 12.410504341125488, + "rewards/rejected": -21.277389526367188, + "step": 3495 + }, + { + "epoch": 5.61, + "learning_rate": 2.0917558462148236e-07, + "logits/chosen": -1.3862360715866089, + "logits/rejected": -1.3888206481933594, + "logps/chosen": -169.1226043701172, + "logps/rejected": -307.270263671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.87095832824707, + "rewards/margins": 14.130465507507324, + "rewards/rejected": -22.001422882080078, + "step": 3496 + }, + { + "epoch": 5.61, + "learning_rate": 2.090764962346413e-07, + "logits/chosen": -1.4592256546020508, + "logits/rejected": -1.5071525573730469, + "logps/chosen": -178.7654266357422, + "logps/rejected": -272.4892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.426994323730469, + "rewards/margins": 10.035850524902344, + "rewards/rejected": -18.462844848632812, + "step": 3497 + }, + { + "epoch": 5.61, + "learning_rate": 2.0897740784780025e-07, + "logits/chosen": -1.4971421957015991, + "logits/rejected": -1.5510032176971436, + "logps/chosen": -171.96389770507812, + "logps/rejected": -257.8359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.277139663696289, + "rewards/margins": 8.066900253295898, + "rewards/rejected": -16.344039916992188, + "step": 3498 + }, + { + "epoch": 5.62, + "learning_rate": 2.0887831946095916e-07, + "logits/chosen": -1.6488255262374878, + "logits/rejected": -1.6917957067489624, + "logps/chosen": -128.809814453125, + "logps/rejected": -297.54876708984375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061434745788574, + "rewards/margins": 15.731962203979492, + "rewards/rejected": -20.79339599609375, + "step": 3499 + }, + { + "epoch": 5.62, + "learning_rate": 2.087792310741181e-07, + "logits/chosen": -1.657825231552124, + "logits/rejected": -1.7020796537399292, + "logps/chosen": -128.25462341308594, + "logps/rejected": -291.19573974609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.090444564819336, + "rewards/margins": 14.861099243164062, + "rewards/rejected": -19.9515438079834, + "step": 3500 + }, + { + "epoch": 5.62, + "learning_rate": 2.0868014268727705e-07, + "logits/chosen": -1.5991452932357788, + "logits/rejected": -1.637681007385254, + "logps/chosen": -113.05462646484375, + "logps/rejected": -255.38168334960938, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.677969932556152, + "rewards/margins": 14.873306274414062, + "rewards/rejected": -19.5512752532959, + "step": 3501 + }, + { + "epoch": 5.62, + "learning_rate": 2.0858105430043599e-07, + "logits/chosen": -1.6034021377563477, + "logits/rejected": -1.5931682586669922, + "logps/chosen": -156.61996459960938, + "logps/rejected": -280.9715270996094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.966739654541016, + "rewards/margins": 12.928670883178711, + "rewards/rejected": -19.895410537719727, + "step": 3502 + }, + { + "epoch": 5.62, + "learning_rate": 2.084819659135949e-07, + "logits/chosen": -1.457820177078247, + "logits/rejected": -1.4800223112106323, + "logps/chosen": -121.35319519042969, + "logps/rejected": -276.4391174316406, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.449268817901611, + "rewards/margins": 15.169504165649414, + "rewards/rejected": -20.618772506713867, + "step": 3503 + }, + { + "epoch": 5.62, + "learning_rate": 2.0838287752675385e-07, + "logits/chosen": -1.5091395378112793, + "logits/rejected": -1.5735511779785156, + "logps/chosen": -142.26956176757812, + "logps/rejected": -306.8866271972656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.00269889831543, + "rewards/margins": 14.74393081665039, + "rewards/rejected": -20.74662971496582, + "step": 3504 + }, + { + "epoch": 5.63, + "learning_rate": 2.0828378913991279e-07, + "logits/chosen": -1.378591537475586, + "logits/rejected": -1.3285752534866333, + "logps/chosen": -144.63565063476562, + "logps/rejected": -275.3168029785156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.038185119628906, + "rewards/margins": 12.782560348510742, + "rewards/rejected": -19.82074546813965, + "step": 3505 + }, + { + "epoch": 5.63, + "learning_rate": 2.0818470075307175e-07, + "logits/chosen": -1.5825040340423584, + "logits/rejected": -1.580885410308838, + "logps/chosen": -142.89547729492188, + "logps/rejected": -253.44285583496094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151176452636719, + "rewards/margins": 11.703563690185547, + "rewards/rejected": -17.854740142822266, + "step": 3506 + }, + { + "epoch": 5.63, + "learning_rate": 2.0808561236623068e-07, + "logits/chosen": -1.4873921871185303, + "logits/rejected": -1.5253534317016602, + "logps/chosen": -143.24313354492188, + "logps/rejected": -292.96026611328125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.160441875457764, + "rewards/margins": 13.942279815673828, + "rewards/rejected": -19.10272216796875, + "step": 3507 + }, + { + "epoch": 5.63, + "learning_rate": 2.0798652397938959e-07, + "logits/chosen": -1.5862098932266235, + "logits/rejected": -1.6957588195800781, + "logps/chosen": -176.39666748046875, + "logps/rejected": -360.6898193359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.487279415130615, + "rewards/margins": 15.480062484741211, + "rewards/rejected": -22.967342376708984, + "step": 3508 + }, + { + "epoch": 5.63, + "learning_rate": 2.0788743559254855e-07, + "logits/chosen": -1.5357627868652344, + "logits/rejected": -1.5881835222244263, + "logps/chosen": -128.56817626953125, + "logps/rejected": -267.74334716796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.929163455963135, + "rewards/margins": 12.324718475341797, + "rewards/rejected": -18.253883361816406, + "step": 3509 + }, + { + "epoch": 5.63, + "learning_rate": 2.0778834720570748e-07, + "logits/chosen": -1.6373027563095093, + "logits/rejected": -1.5733379125595093, + "logps/chosen": -176.00518798828125, + "logps/rejected": -265.058349609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.064338684082031, + "rewards/margins": 10.770269393920898, + "rewards/rejected": -17.83460807800293, + "step": 3510 + }, + { + "epoch": 5.64, + "learning_rate": 2.076892588188664e-07, + "logits/chosen": -1.5358843803405762, + "logits/rejected": -1.4863566160202026, + "logps/chosen": -166.96798706054688, + "logps/rejected": -229.59681701660156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.249853134155273, + "rewards/margins": 9.163719177246094, + "rewards/rejected": -15.413572311401367, + "step": 3511 + }, + { + "epoch": 5.64, + "learning_rate": 2.0759017043202537e-07, + "logits/chosen": -1.497086763381958, + "logits/rejected": -1.5779889822006226, + "logps/chosen": -169.27078247070312, + "logps/rejected": -297.73388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.500387191772461, + "rewards/margins": 12.184248924255371, + "rewards/rejected": -20.68463706970215, + "step": 3512 + }, + { + "epoch": 5.64, + "learning_rate": 2.0749108204518428e-07, + "logits/chosen": -1.445956826210022, + "logits/rejected": -1.4788063764572144, + "logps/chosen": -161.59393310546875, + "logps/rejected": -267.75799560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.866689682006836, + "rewards/margins": 10.481036186218262, + "rewards/rejected": -19.34772491455078, + "step": 3513 + }, + { + "epoch": 5.64, + "learning_rate": 2.0739199365834324e-07, + "logits/chosen": -1.5390739440917969, + "logits/rejected": -1.5803804397583008, + "logps/chosen": -155.83084106445312, + "logps/rejected": -246.75985717773438, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.244839191436768, + "rewards/margins": 9.831026077270508, + "rewards/rejected": -17.075864791870117, + "step": 3514 + }, + { + "epoch": 5.64, + "learning_rate": 2.0729290527150217e-07, + "logits/chosen": -1.5421901941299438, + "logits/rejected": -1.5358848571777344, + "logps/chosen": -141.08447265625, + "logps/rejected": -272.2650451660156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.407374858856201, + "rewards/margins": 12.155393600463867, + "rewards/rejected": -17.562768936157227, + "step": 3515 + }, + { + "epoch": 5.64, + "learning_rate": 2.071938168846611e-07, + "logits/chosen": -1.6914560794830322, + "logits/rejected": -1.6321383714675903, + "logps/chosen": -138.86932373046875, + "logps/rejected": -233.4820098876953, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.154097080230713, + "rewards/margins": 11.60377025604248, + "rewards/rejected": -15.757867813110352, + "step": 3516 + }, + { + "epoch": 5.65, + "learning_rate": 2.0709472849782007e-07, + "logits/chosen": -1.65492582321167, + "logits/rejected": -1.6905113458633423, + "logps/chosen": -139.6899871826172, + "logps/rejected": -260.28607177734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.599881649017334, + "rewards/margins": 12.029863357543945, + "rewards/rejected": -18.629745483398438, + "step": 3517 + }, + { + "epoch": 5.65, + "learning_rate": 2.0699564011097897e-07, + "logits/chosen": -1.6711056232452393, + "logits/rejected": -1.6952325105667114, + "logps/chosen": -163.0201416015625, + "logps/rejected": -262.00250244140625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.483802795410156, + "rewards/margins": 8.725163459777832, + "rewards/rejected": -16.208967208862305, + "step": 3518 + }, + { + "epoch": 5.65, + "learning_rate": 2.0689655172413793e-07, + "logits/chosen": -1.4953594207763672, + "logits/rejected": -1.4675028324127197, + "logps/chosen": -204.3321990966797, + "logps/rejected": -318.9854431152344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.17468547821045, + "rewards/margins": 12.923084259033203, + "rewards/rejected": -23.097766876220703, + "step": 3519 + }, + { + "epoch": 5.65, + "learning_rate": 2.0679746333729687e-07, + "logits/chosen": -1.7349953651428223, + "logits/rejected": -1.6611180305480957, + "logps/chosen": -137.07705688476562, + "logps/rejected": -262.3623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.503540515899658, + "rewards/margins": 14.570038795471191, + "rewards/rejected": -19.073579788208008, + "step": 3520 + }, + { + "epoch": 5.65, + "learning_rate": 2.066983749504558e-07, + "logits/chosen": -1.4682340621948242, + "logits/rejected": -1.4893825054168701, + "logps/chosen": -140.5184326171875, + "logps/rejected": -271.0100402832031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.596077919006348, + "rewards/margins": 13.537010192871094, + "rewards/rejected": -18.133087158203125, + "step": 3521 + }, + { + "epoch": 5.65, + "learning_rate": 2.0659928656361473e-07, + "logits/chosen": -1.505333662033081, + "logits/rejected": -1.4636342525482178, + "logps/chosen": -196.52276611328125, + "logps/rejected": -318.2760009765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.679302215576172, + "rewards/margins": 11.629175186157227, + "rewards/rejected": -22.30847930908203, + "step": 3522 + }, + { + "epoch": 5.65, + "learning_rate": 2.0650019817677366e-07, + "logits/chosen": -1.37777841091156, + "logits/rejected": -1.4127016067504883, + "logps/chosen": -99.77792358398438, + "logps/rejected": -234.60333251953125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4538702964782715, + "rewards/margins": 11.31854248046875, + "rewards/rejected": -14.772412300109863, + "step": 3523 + }, + { + "epoch": 5.66, + "learning_rate": 2.064011097899326e-07, + "logits/chosen": -1.631746768951416, + "logits/rejected": -1.5850729942321777, + "logps/chosen": -141.7529296875, + "logps/rejected": -258.9488830566406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.887248992919922, + "rewards/margins": 11.788074493408203, + "rewards/rejected": -18.675325393676758, + "step": 3524 + }, + { + "epoch": 5.66, + "learning_rate": 2.0630202140309156e-07, + "logits/chosen": -1.5112628936767578, + "logits/rejected": -1.6130366325378418, + "logps/chosen": -154.795654296875, + "logps/rejected": -316.2657470703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.618958473205566, + "rewards/margins": 13.794021606445312, + "rewards/rejected": -20.412979125976562, + "step": 3525 + }, + { + "epoch": 5.66, + "learning_rate": 2.062029330162505e-07, + "logits/chosen": -1.6076500415802002, + "logits/rejected": -1.5037975311279297, + "logps/chosen": -154.53564453125, + "logps/rejected": -241.74484252929688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.068292617797852, + "rewards/margins": 10.463736534118652, + "rewards/rejected": -16.53203010559082, + "step": 3526 + }, + { + "epoch": 5.66, + "learning_rate": 2.0610384462940942e-07, + "logits/chosen": -1.461061716079712, + "logits/rejected": -1.525355339050293, + "logps/chosen": -155.22055053710938, + "logps/rejected": -325.6090087890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9952239990234375, + "rewards/margins": 14.975289344787598, + "rewards/rejected": -22.97051429748535, + "step": 3527 + }, + { + "epoch": 5.66, + "learning_rate": 2.0600475624256836e-07, + "logits/chosen": -1.7046408653259277, + "logits/rejected": -1.6654388904571533, + "logps/chosen": -144.3090362548828, + "logps/rejected": -273.2762145996094, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3456645011901855, + "rewards/margins": 13.871570587158203, + "rewards/rejected": -18.217233657836914, + "step": 3528 + }, + { + "epoch": 5.66, + "learning_rate": 2.059056678557273e-07, + "logits/chosen": -1.5195531845092773, + "logits/rejected": -1.5716814994812012, + "logps/chosen": -154.99191284179688, + "logps/rejected": -286.3543395996094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.913886070251465, + "rewards/margins": 11.889086723327637, + "rewards/rejected": -19.802974700927734, + "step": 3529 + }, + { + "epoch": 5.67, + "learning_rate": 2.0580657946888625e-07, + "logits/chosen": -1.6514383554458618, + "logits/rejected": -1.5940759181976318, + "logps/chosen": -149.56546020507812, + "logps/rejected": -269.6033630371094, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9260735511779785, + "rewards/margins": 12.607810020446777, + "rewards/rejected": -20.533884048461914, + "step": 3530 + }, + { + "epoch": 5.67, + "learning_rate": 2.0570749108204518e-07, + "logits/chosen": -1.796550989151001, + "logits/rejected": -1.7530274391174316, + "logps/chosen": -125.2088851928711, + "logps/rejected": -291.35943603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.228306770324707, + "rewards/margins": 16.907188415527344, + "rewards/rejected": -21.135496139526367, + "step": 3531 + }, + { + "epoch": 5.67, + "learning_rate": 2.056084026952041e-07, + "logits/chosen": -1.4930200576782227, + "logits/rejected": -1.5153943300247192, + "logps/chosen": -124.77980041503906, + "logps/rejected": -284.3247375488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6466524600982666, + "rewards/margins": 15.05573844909668, + "rewards/rejected": -18.702390670776367, + "step": 3532 + }, + { + "epoch": 5.67, + "learning_rate": 2.0550931430836305e-07, + "logits/chosen": -1.4678759574890137, + "logits/rejected": -1.4246290922164917, + "logps/chosen": -170.02784729003906, + "logps/rejected": -297.3772277832031, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.877201557159424, + "rewards/margins": 13.11614990234375, + "rewards/rejected": -20.993350982666016, + "step": 3533 + }, + { + "epoch": 5.67, + "learning_rate": 2.0541022592152198e-07, + "logits/chosen": -1.5578639507293701, + "logits/rejected": -1.4947177171707153, + "logps/chosen": -165.95391845703125, + "logps/rejected": -293.1918640136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.643244743347168, + "rewards/margins": 13.28227424621582, + "rewards/rejected": -20.925518035888672, + "step": 3534 + }, + { + "epoch": 5.67, + "learning_rate": 2.0531113753468094e-07, + "logits/chosen": -1.3211886882781982, + "logits/rejected": -1.3503947257995605, + "logps/chosen": -160.87661743164062, + "logps/rejected": -283.2664794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.231307029724121, + "rewards/margins": 12.0439453125, + "rewards/rejected": -20.275251388549805, + "step": 3535 + }, + { + "epoch": 5.68, + "learning_rate": 2.0521204914783988e-07, + "logits/chosen": -1.5581319332122803, + "logits/rejected": -1.4653578996658325, + "logps/chosen": -200.51364135742188, + "logps/rejected": -304.32989501953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.42978286743164, + "rewards/margins": 13.421276092529297, + "rewards/rejected": -22.851058959960938, + "step": 3536 + }, + { + "epoch": 5.68, + "learning_rate": 2.0511296076099878e-07, + "logits/chosen": -1.618722915649414, + "logits/rejected": -1.517574667930603, + "logps/chosen": -185.39898681640625, + "logps/rejected": -268.9796447753906, + "loss": 0.0092, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.109620094299316, + "rewards/margins": 9.910978317260742, + "rewards/rejected": -18.020599365234375, + "step": 3537 + }, + { + "epoch": 5.68, + "learning_rate": 2.0501387237415774e-07, + "logits/chosen": -1.4455080032348633, + "logits/rejected": -1.4987713098526, + "logps/chosen": -118.1763687133789, + "logps/rejected": -295.4964599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.477498531341553, + "rewards/margins": 13.800302505493164, + "rewards/rejected": -19.277801513671875, + "step": 3538 + }, + { + "epoch": 5.68, + "learning_rate": 2.0491478398731668e-07, + "logits/chosen": -1.4148435592651367, + "logits/rejected": -1.5270719528198242, + "logps/chosen": -178.26426696777344, + "logps/rejected": -302.45404052734375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.34427547454834, + "rewards/margins": 11.370718002319336, + "rewards/rejected": -19.71499252319336, + "step": 3539 + }, + { + "epoch": 5.68, + "learning_rate": 2.0481569560047564e-07, + "logits/chosen": -1.7418153285980225, + "logits/rejected": -1.7663133144378662, + "logps/chosen": -133.5062255859375, + "logps/rejected": -294.9941711425781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.562310695648193, + "rewards/margins": 14.592042922973633, + "rewards/rejected": -19.154354095458984, + "step": 3540 + }, + { + "epoch": 5.68, + "learning_rate": 2.0471660721363454e-07, + "logits/chosen": -1.4859404563903809, + "logits/rejected": -1.4931477308273315, + "logps/chosen": -155.93174743652344, + "logps/rejected": -336.06915283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.504201889038086, + "rewards/margins": 15.88814640045166, + "rewards/rejected": -21.392349243164062, + "step": 3541 + }, + { + "epoch": 5.69, + "learning_rate": 2.0461751882679348e-07, + "logits/chosen": -1.682798981666565, + "logits/rejected": -1.7489967346191406, + "logps/chosen": -149.91802978515625, + "logps/rejected": -301.7533874511719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.797520637512207, + "rewards/margins": 14.831779479980469, + "rewards/rejected": -21.62929916381836, + "step": 3542 + }, + { + "epoch": 5.69, + "learning_rate": 2.0451843043995244e-07, + "logits/chosen": -1.6320478916168213, + "logits/rejected": -1.7101695537567139, + "logps/chosen": -131.40391540527344, + "logps/rejected": -258.38043212890625, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.378139495849609, + "rewards/margins": 10.418229103088379, + "rewards/rejected": -16.796369552612305, + "step": 3543 + }, + { + "epoch": 5.69, + "learning_rate": 2.0441934205311137e-07, + "logits/chosen": -1.4227283000946045, + "logits/rejected": -1.4669923782348633, + "logps/chosen": -138.54624938964844, + "logps/rejected": -262.70660400390625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.704582214355469, + "rewards/margins": 11.912574768066406, + "rewards/rejected": -18.617156982421875, + "step": 3544 + }, + { + "epoch": 5.69, + "learning_rate": 2.043202536662703e-07, + "logits/chosen": -1.4847792387008667, + "logits/rejected": -1.5021198987960815, + "logps/chosen": -139.29275512695312, + "logps/rejected": -274.85467529296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.649136066436768, + "rewards/margins": 12.453357696533203, + "rewards/rejected": -18.102493286132812, + "step": 3545 + }, + { + "epoch": 5.69, + "learning_rate": 2.0422116527942924e-07, + "logits/chosen": -1.5052356719970703, + "logits/rejected": -1.5876795053482056, + "logps/chosen": -179.66453552246094, + "logps/rejected": -342.5696716308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.686628341674805, + "rewards/margins": 15.56988525390625, + "rewards/rejected": -24.256515502929688, + "step": 3546 + }, + { + "epoch": 5.69, + "learning_rate": 2.0412207689258817e-07, + "logits/chosen": -1.6352779865264893, + "logits/rejected": -1.648259162902832, + "logps/chosen": -131.49856567382812, + "logps/rejected": -241.78872680664062, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.069862365722656, + "rewards/margins": 9.054788589477539, + "rewards/rejected": -14.124650955200195, + "step": 3547 + }, + { + "epoch": 5.7, + "learning_rate": 2.0402298850574713e-07, + "logits/chosen": -1.5092244148254395, + "logits/rejected": -1.491392970085144, + "logps/chosen": -183.5706787109375, + "logps/rejected": -337.9241943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.956581115722656, + "rewards/margins": 15.629890441894531, + "rewards/rejected": -24.58647346496582, + "step": 3548 + }, + { + "epoch": 5.7, + "learning_rate": 2.0392390011890606e-07, + "logits/chosen": -1.5024819374084473, + "logits/rejected": -1.5552245378494263, + "logps/chosen": -174.82614135742188, + "logps/rejected": -292.9357604980469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.744539260864258, + "rewards/margins": 11.926436424255371, + "rewards/rejected": -20.670974731445312, + "step": 3549 + }, + { + "epoch": 5.7, + "learning_rate": 2.03824811732065e-07, + "logits/chosen": -1.511600136756897, + "logits/rejected": -1.4875361919403076, + "logps/chosen": -143.91891479492188, + "logps/rejected": -229.28689575195312, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.312620162963867, + "rewards/margins": 10.142881393432617, + "rewards/rejected": -15.455500602722168, + "step": 3550 + }, + { + "epoch": 5.7, + "learning_rate": 2.0372572334522393e-07, + "logits/chosen": -1.5917384624481201, + "logits/rejected": -1.5776280164718628, + "logps/chosen": -167.51870727539062, + "logps/rejected": -301.0332946777344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.939592361450195, + "rewards/margins": 12.207817077636719, + "rewards/rejected": -21.14740753173828, + "step": 3551 + }, + { + "epoch": 5.7, + "learning_rate": 2.0362663495838286e-07, + "logits/chosen": -1.4983164072036743, + "logits/rejected": -1.5237936973571777, + "logps/chosen": -182.20559692382812, + "logps/rejected": -276.1044921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.436517715454102, + "rewards/margins": 9.365097045898438, + "rewards/rejected": -17.80161476135254, + "step": 3552 + }, + { + "epoch": 5.7, + "learning_rate": 2.035275465715418e-07, + "logits/chosen": -1.561061978340149, + "logits/rejected": -1.7600593566894531, + "logps/chosen": -133.84555053710938, + "logps/rejected": -296.4178466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.517006874084473, + "rewards/margins": 13.70553970336914, + "rewards/rejected": -19.222545623779297, + "step": 3553 + }, + { + "epoch": 5.7, + "learning_rate": 2.0342845818470076e-07, + "logits/chosen": -1.5922131538391113, + "logits/rejected": -1.557830810546875, + "logps/chosen": -131.55929565429688, + "logps/rejected": -245.2205047607422, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.651397228240967, + "rewards/margins": 13.379343032836914, + "rewards/rejected": -18.03074073791504, + "step": 3554 + }, + { + "epoch": 5.71, + "learning_rate": 2.0332936979785966e-07, + "logits/chosen": -1.6778231859207153, + "logits/rejected": -1.6266074180603027, + "logps/chosen": -157.13607788085938, + "logps/rejected": -244.11932373046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.587542533874512, + "rewards/margins": 11.425036430358887, + "rewards/rejected": -18.0125789642334, + "step": 3555 + }, + { + "epoch": 5.71, + "learning_rate": 2.0323028141101862e-07, + "logits/chosen": -1.4737540483474731, + "logits/rejected": -1.456387996673584, + "logps/chosen": -151.18467712402344, + "logps/rejected": -244.69520568847656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.683345317840576, + "rewards/margins": 10.568519592285156, + "rewards/rejected": -17.25186538696289, + "step": 3556 + }, + { + "epoch": 5.71, + "learning_rate": 2.0313119302417756e-07, + "logits/chosen": -1.5176188945770264, + "logits/rejected": -1.5028632879257202, + "logps/chosen": -130.75633239746094, + "logps/rejected": -246.44522094726562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.476804256439209, + "rewards/margins": 12.84494400024414, + "rewards/rejected": -18.321746826171875, + "step": 3557 + }, + { + "epoch": 5.71, + "learning_rate": 2.030321046373365e-07, + "logits/chosen": -1.426805019378662, + "logits/rejected": -1.3686909675598145, + "logps/chosen": -209.47300720214844, + "logps/rejected": -308.56365966796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.321763038635254, + "rewards/margins": 11.223033905029297, + "rewards/rejected": -22.544795989990234, + "step": 3558 + }, + { + "epoch": 5.71, + "learning_rate": 2.0293301625049545e-07, + "logits/chosen": -1.4640378952026367, + "logits/rejected": -1.4723306894302368, + "logps/chosen": -159.8562469482422, + "logps/rejected": -295.4896545410156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.02308177947998, + "rewards/margins": 12.408321380615234, + "rewards/rejected": -20.43140411376953, + "step": 3559 + }, + { + "epoch": 5.71, + "learning_rate": 2.0283392786365436e-07, + "logits/chosen": -1.5524799823760986, + "logits/rejected": -1.5302510261535645, + "logps/chosen": -133.75259399414062, + "logps/rejected": -239.54159545898438, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.304057598114014, + "rewards/margins": 12.450199127197266, + "rewards/rejected": -16.754257202148438, + "step": 3560 + }, + { + "epoch": 5.72, + "learning_rate": 2.027348394768133e-07, + "logits/chosen": -1.521172285079956, + "logits/rejected": -1.488037347793579, + "logps/chosen": -159.90667724609375, + "logps/rejected": -288.56658935546875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.059093475341797, + "rewards/margins": 12.773754119873047, + "rewards/rejected": -19.832847595214844, + "step": 3561 + }, + { + "epoch": 5.72, + "learning_rate": 2.0263575108997225e-07, + "logits/chosen": -1.5032345056533813, + "logits/rejected": -1.551843285560608, + "logps/chosen": -191.45289611816406, + "logps/rejected": -288.1676330566406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.208745956420898, + "rewards/margins": 10.742368698120117, + "rewards/rejected": -19.951114654541016, + "step": 3562 + }, + { + "epoch": 5.72, + "learning_rate": 2.0253666270313118e-07, + "logits/chosen": -1.448955774307251, + "logits/rejected": -1.506296157836914, + "logps/chosen": -203.14108276367188, + "logps/rejected": -345.15936279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.56435775756836, + "rewards/margins": 13.397268295288086, + "rewards/rejected": -23.961627960205078, + "step": 3563 + }, + { + "epoch": 5.72, + "learning_rate": 2.0243757431629014e-07, + "logits/chosen": -1.6489144563674927, + "logits/rejected": -1.580488920211792, + "logps/chosen": -143.65866088867188, + "logps/rejected": -266.9210510253906, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.636787414550781, + "rewards/margins": 11.612483024597168, + "rewards/rejected": -16.249271392822266, + "step": 3564 + }, + { + "epoch": 5.72, + "learning_rate": 2.0233848592944905e-07, + "logits/chosen": -1.566999077796936, + "logits/rejected": -1.6450825929641724, + "logps/chosen": -120.28274536132812, + "logps/rejected": -287.6656799316406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.104822158813477, + "rewards/margins": 14.038522720336914, + "rewards/rejected": -19.143346786499023, + "step": 3565 + }, + { + "epoch": 5.72, + "learning_rate": 2.0223939754260798e-07, + "logits/chosen": -1.7184793949127197, + "logits/rejected": -1.5953909158706665, + "logps/chosen": -149.8682098388672, + "logps/rejected": -301.1786193847656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.588178634643555, + "rewards/margins": 15.959988594055176, + "rewards/rejected": -22.548168182373047, + "step": 3566 + }, + { + "epoch": 5.73, + "learning_rate": 2.0214030915576694e-07, + "logits/chosen": -1.4647021293640137, + "logits/rejected": -1.4719187021255493, + "logps/chosen": -123.21796417236328, + "logps/rejected": -296.1027526855469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9264678955078125, + "rewards/margins": 14.839374542236328, + "rewards/rejected": -19.765840530395508, + "step": 3567 + }, + { + "epoch": 5.73, + "learning_rate": 2.0204122076892588e-07, + "logits/chosen": -1.4788576364517212, + "logits/rejected": -1.4537010192871094, + "logps/chosen": -156.90538024902344, + "logps/rejected": -242.5503387451172, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.611600875854492, + "rewards/margins": 10.983524322509766, + "rewards/rejected": -17.595125198364258, + "step": 3568 + }, + { + "epoch": 5.73, + "learning_rate": 2.0194213238208483e-07, + "logits/chosen": -1.5989553928375244, + "logits/rejected": -1.6376492977142334, + "logps/chosen": -140.93154907226562, + "logps/rejected": -272.79827880859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4215826988220215, + "rewards/margins": 13.255082130432129, + "rewards/rejected": -18.676664352416992, + "step": 3569 + }, + { + "epoch": 5.73, + "learning_rate": 2.0184304399524374e-07, + "logits/chosen": -1.5587729215621948, + "logits/rejected": -1.4989063739776611, + "logps/chosen": -152.70803833007812, + "logps/rejected": -228.9523468017578, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8674845695495605, + "rewards/margins": 9.062484741210938, + "rewards/rejected": -15.929969787597656, + "step": 3570 + }, + { + "epoch": 5.73, + "learning_rate": 2.0174395560840267e-07, + "logits/chosen": -1.6701334714889526, + "logits/rejected": -1.6112020015716553, + "logps/chosen": -154.34176635742188, + "logps/rejected": -262.3481750488281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.533101558685303, + "rewards/margins": 11.809993743896484, + "rewards/rejected": -19.343093872070312, + "step": 3571 + }, + { + "epoch": 5.73, + "learning_rate": 2.0164486722156163e-07, + "logits/chosen": -1.393770456314087, + "logits/rejected": -1.3855311870574951, + "logps/chosen": -90.99893188476562, + "logps/rejected": -216.751220703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2704715728759766, + "rewards/margins": 11.286284446716309, + "rewards/rejected": -14.556756019592285, + "step": 3572 + }, + { + "epoch": 5.74, + "learning_rate": 2.0154577883472057e-07, + "logits/chosen": -1.6137773990631104, + "logits/rejected": -1.5613099336624146, + "logps/chosen": -143.21726989746094, + "logps/rejected": -284.9044189453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.160017967224121, + "rewards/margins": 13.069473266601562, + "rewards/rejected": -19.2294921875, + "step": 3573 + }, + { + "epoch": 5.74, + "learning_rate": 2.0144669044787947e-07, + "logits/chosen": -1.4302728176116943, + "logits/rejected": -1.4839001893997192, + "logps/chosen": -125.02999114990234, + "logps/rejected": -260.8192443847656, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.321097373962402, + "rewards/margins": 12.836235046386719, + "rewards/rejected": -19.157331466674805, + "step": 3574 + }, + { + "epoch": 5.74, + "learning_rate": 2.0134760206103843e-07, + "logits/chosen": -1.527632713317871, + "logits/rejected": -1.4656946659088135, + "logps/chosen": -156.17684936523438, + "logps/rejected": -244.519287109375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.164624214172363, + "rewards/margins": 11.137093544006348, + "rewards/rejected": -17.30171775817871, + "step": 3575 + }, + { + "epoch": 5.74, + "learning_rate": 2.0124851367419737e-07, + "logits/chosen": -1.6314806938171387, + "logits/rejected": -1.5746362209320068, + "logps/chosen": -169.90769958496094, + "logps/rejected": -289.0606994628906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.0435791015625, + "rewards/margins": 13.864806175231934, + "rewards/rejected": -21.90838623046875, + "step": 3576 + }, + { + "epoch": 5.74, + "learning_rate": 2.0114942528735633e-07, + "logits/chosen": -1.4886690378189087, + "logits/rejected": -1.40162992477417, + "logps/chosen": -172.57791137695312, + "logps/rejected": -285.21942138671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.035948753356934, + "rewards/margins": 12.426420211791992, + "rewards/rejected": -20.46236801147461, + "step": 3577 + }, + { + "epoch": 5.74, + "learning_rate": 2.0105033690051526e-07, + "logits/chosen": -1.5235354900360107, + "logits/rejected": -1.5458662509918213, + "logps/chosen": -154.47250366210938, + "logps/rejected": -296.79498291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3569416999816895, + "rewards/margins": 13.459123611450195, + "rewards/rejected": -19.816064834594727, + "step": 3578 + }, + { + "epoch": 5.74, + "learning_rate": 2.0095124851367417e-07, + "logits/chosen": -1.5485100746154785, + "logits/rejected": -1.5467251539230347, + "logps/chosen": -165.16693115234375, + "logps/rejected": -258.6487731933594, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.763125419616699, + "rewards/margins": 10.396384239196777, + "rewards/rejected": -18.159509658813477, + "step": 3579 + }, + { + "epoch": 5.75, + "learning_rate": 2.0085216012683313e-07, + "logits/chosen": -1.4398720264434814, + "logits/rejected": -1.4736427068710327, + "logps/chosen": -115.2834701538086, + "logps/rejected": -223.43472290039062, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.911824703216553, + "rewards/margins": 10.584640502929688, + "rewards/rejected": -15.496465682983398, + "step": 3580 + }, + { + "epoch": 5.75, + "learning_rate": 2.0075307173999206e-07, + "logits/chosen": -1.3699381351470947, + "logits/rejected": -1.3879597187042236, + "logps/chosen": -129.5419464111328, + "logps/rejected": -257.3197021484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5684921741485596, + "rewards/margins": 13.051033973693848, + "rewards/rejected": -16.619525909423828, + "step": 3581 + }, + { + "epoch": 5.75, + "learning_rate": 2.00653983353151e-07, + "logits/chosen": -1.3750466108322144, + "logits/rejected": -1.3528680801391602, + "logps/chosen": -170.72450256347656, + "logps/rejected": -265.4868469238281, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.602771759033203, + "rewards/margins": 9.813447952270508, + "rewards/rejected": -19.41621971130371, + "step": 3582 + }, + { + "epoch": 5.75, + "learning_rate": 2.0055489496630995e-07, + "logits/chosen": -1.434492588043213, + "logits/rejected": -1.5546616315841675, + "logps/chosen": -147.2637939453125, + "logps/rejected": -249.18231201171875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.574392318725586, + "rewards/margins": 9.400993347167969, + "rewards/rejected": -15.975385665893555, + "step": 3583 + }, + { + "epoch": 5.75, + "learning_rate": 2.0045580657946886e-07, + "logits/chosen": -1.3554716110229492, + "logits/rejected": -1.402051568031311, + "logps/chosen": -97.26850891113281, + "logps/rejected": -176.31997680664062, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.11417293548584, + "rewards/margins": 7.249265670776367, + "rewards/rejected": -11.363438606262207, + "step": 3584 + }, + { + "epoch": 5.75, + "learning_rate": 2.0035671819262782e-07, + "logits/chosen": -1.5690315961837769, + "logits/rejected": -1.5253278017044067, + "logps/chosen": -111.10417175292969, + "logps/rejected": -216.83543395996094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.784228324890137, + "rewards/margins": 10.298759460449219, + "rewards/rejected": -15.082986831665039, + "step": 3585 + }, + { + "epoch": 5.76, + "learning_rate": 2.0025762980578675e-07, + "logits/chosen": -1.5699938535690308, + "logits/rejected": -1.663338303565979, + "logps/chosen": -157.43161010742188, + "logps/rejected": -290.7130432128906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.756935119628906, + "rewards/margins": 14.020074844360352, + "rewards/rejected": -19.777009963989258, + "step": 3586 + }, + { + "epoch": 5.76, + "learning_rate": 2.001585414189457e-07, + "logits/chosen": -1.4872403144836426, + "logits/rejected": -1.4863290786743164, + "logps/chosen": -145.29481506347656, + "logps/rejected": -290.0386962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.069206237792969, + "rewards/margins": 12.496539115905762, + "rewards/rejected": -17.565746307373047, + "step": 3587 + }, + { + "epoch": 5.76, + "learning_rate": 2.0005945303210465e-07, + "logits/chosen": -1.5600666999816895, + "logits/rejected": -1.621584415435791, + "logps/chosen": -157.46954345703125, + "logps/rejected": -285.50616455078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9017181396484375, + "rewards/margins": 12.87321662902832, + "rewards/rejected": -19.774934768676758, + "step": 3588 + }, + { + "epoch": 5.76, + "learning_rate": 1.9996036464526355e-07, + "logits/chosen": -1.4330542087554932, + "logits/rejected": -1.4750866889953613, + "logps/chosen": -139.40362548828125, + "logps/rejected": -251.46456909179688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9308011531829834, + "rewards/margins": 14.118204116821289, + "rewards/rejected": -18.04900550842285, + "step": 3589 + }, + { + "epoch": 5.76, + "learning_rate": 1.9986127625842251e-07, + "logits/chosen": -1.4769009351730347, + "logits/rejected": -1.4740387201309204, + "logps/chosen": -171.4402618408203, + "logps/rejected": -332.0131530761719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.355722427368164, + "rewards/margins": 15.581747055053711, + "rewards/rejected": -22.937469482421875, + "step": 3590 + }, + { + "epoch": 5.76, + "learning_rate": 1.9976218787158145e-07, + "logits/chosen": -1.3216586112976074, + "logits/rejected": -1.3692569732666016, + "logps/chosen": -147.22802734375, + "logps/rejected": -273.540771484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8810834884643555, + "rewards/margins": 12.605966567993164, + "rewards/rejected": -18.487049102783203, + "step": 3591 + }, + { + "epoch": 5.77, + "learning_rate": 1.9966309948474038e-07, + "logits/chosen": -1.4093987941741943, + "logits/rejected": -1.4458301067352295, + "logps/chosen": -118.55331420898438, + "logps/rejected": -260.40362548828125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.255422592163086, + "rewards/margins": 13.546026229858398, + "rewards/rejected": -17.801448822021484, + "step": 3592 + }, + { + "epoch": 5.77, + "learning_rate": 1.9956401109789931e-07, + "logits/chosen": -1.6516507863998413, + "logits/rejected": -1.542681336402893, + "logps/chosen": -129.18846130371094, + "logps/rejected": -242.06375122070312, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.248739242553711, + "rewards/margins": 13.694120407104492, + "rewards/rejected": -18.942859649658203, + "step": 3593 + }, + { + "epoch": 5.77, + "learning_rate": 1.9946492271105825e-07, + "logits/chosen": -1.3624695539474487, + "logits/rejected": -1.46234130859375, + "logps/chosen": -190.02490234375, + "logps/rejected": -313.30657958984375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.15498161315918, + "rewards/margins": 10.451542854309082, + "rewards/rejected": -20.606525421142578, + "step": 3594 + }, + { + "epoch": 5.77, + "learning_rate": 1.9936583432421718e-07, + "logits/chosen": -1.4925742149353027, + "logits/rejected": -1.4886584281921387, + "logps/chosen": -114.5026626586914, + "logps/rejected": -253.30343627929688, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.153810501098633, + "rewards/margins": 13.177797317504883, + "rewards/rejected": -17.331607818603516, + "step": 3595 + }, + { + "epoch": 5.77, + "learning_rate": 1.9926674593737614e-07, + "logits/chosen": -1.6086325645446777, + "logits/rejected": -1.5818456411361694, + "logps/chosen": -158.46426391601562, + "logps/rejected": -276.2382507324219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.729948997497559, + "rewards/margins": 12.530540466308594, + "rewards/rejected": -19.260488510131836, + "step": 3596 + }, + { + "epoch": 5.77, + "learning_rate": 1.9916765755053507e-07, + "logits/chosen": -1.6947975158691406, + "logits/rejected": -1.6810553073883057, + "logps/chosen": -162.68609619140625, + "logps/rejected": -284.0870056152344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.401661396026611, + "rewards/margins": 12.530789375305176, + "rewards/rejected": -17.932451248168945, + "step": 3597 + }, + { + "epoch": 5.78, + "learning_rate": 1.99068569163694e-07, + "logits/chosen": -1.5132685899734497, + "logits/rejected": -1.4850399494171143, + "logps/chosen": -169.82498168945312, + "logps/rejected": -252.5393524169922, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1982526779174805, + "rewards/margins": 9.479042053222656, + "rewards/rejected": -16.67729377746582, + "step": 3598 + }, + { + "epoch": 5.78, + "learning_rate": 1.9896948077685294e-07, + "logits/chosen": -1.6742961406707764, + "logits/rejected": -1.6924819946289062, + "logps/chosen": -152.41909790039062, + "logps/rejected": -282.9547424316406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.786500930786133, + "rewards/margins": 12.395954132080078, + "rewards/rejected": -20.18245506286621, + "step": 3599 + }, + { + "epoch": 5.78, + "learning_rate": 1.9887039239001187e-07, + "logits/chosen": -1.5375986099243164, + "logits/rejected": -1.4762905836105347, + "logps/chosen": -169.60302734375, + "logps/rejected": -293.0714111328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.97305154800415, + "rewards/margins": 12.60361099243164, + "rewards/rejected": -19.576662063598633, + "step": 3600 + }, + { + "epoch": 5.78, + "learning_rate": 1.9877130400317083e-07, + "logits/chosen": -1.5752952098846436, + "logits/rejected": -1.5008842945098877, + "logps/chosen": -167.08627319335938, + "logps/rejected": -307.57672119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.409596920013428, + "rewards/margins": 15.092230796813965, + "rewards/rejected": -21.501827239990234, + "step": 3601 + }, + { + "epoch": 5.78, + "learning_rate": 1.9867221561632977e-07, + "logits/chosen": -1.5248076915740967, + "logits/rejected": -1.5527385473251343, + "logps/chosen": -149.1284942626953, + "logps/rejected": -289.356201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.969615936279297, + "rewards/margins": 11.953279495239258, + "rewards/rejected": -18.922895431518555, + "step": 3602 + }, + { + "epoch": 5.78, + "learning_rate": 1.9857312722948867e-07, + "logits/chosen": -1.551891565322876, + "logits/rejected": -1.6286375522613525, + "logps/chosen": -105.75352478027344, + "logps/rejected": -249.41807556152344, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.346821308135986, + "rewards/margins": 11.970518112182617, + "rewards/rejected": -16.317338943481445, + "step": 3603 + }, + { + "epoch": 5.78, + "learning_rate": 1.9847403884264763e-07, + "logits/chosen": -1.411435842514038, + "logits/rejected": -1.3770455121994019, + "logps/chosen": -119.30325317382812, + "logps/rejected": -247.80270385742188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.738180637359619, + "rewards/margins": 13.475936889648438, + "rewards/rejected": -18.21411895751953, + "step": 3604 + }, + { + "epoch": 5.79, + "learning_rate": 1.9837495045580657e-07, + "logits/chosen": -1.4738895893096924, + "logits/rejected": -1.5098522901535034, + "logps/chosen": -99.53736877441406, + "logps/rejected": -299.56439208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.749724864959717, + "rewards/margins": 17.422269821166992, + "rewards/rejected": -21.171993255615234, + "step": 3605 + }, + { + "epoch": 5.79, + "learning_rate": 1.9827586206896553e-07, + "logits/chosen": -1.5424823760986328, + "logits/rejected": -1.6310869455337524, + "logps/chosen": -140.87510681152344, + "logps/rejected": -335.4617614746094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.027637004852295, + "rewards/margins": 16.25531578063965, + "rewards/rejected": -22.2829532623291, + "step": 3606 + }, + { + "epoch": 5.79, + "learning_rate": 1.9817677368212446e-07, + "logits/chosen": -1.4546661376953125, + "logits/rejected": -1.4626210927963257, + "logps/chosen": -133.47866821289062, + "logps/rejected": -208.24957275390625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.985434532165527, + "rewards/margins": 7.077291488647461, + "rewards/rejected": -14.062725067138672, + "step": 3607 + }, + { + "epoch": 5.79, + "learning_rate": 1.9807768529528337e-07, + "logits/chosen": -1.678086519241333, + "logits/rejected": -1.7727010250091553, + "logps/chosen": -126.27224731445312, + "logps/rejected": -266.5982360839844, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.389190673828125, + "rewards/margins": 11.338318824768066, + "rewards/rejected": -16.727510452270508, + "step": 3608 + }, + { + "epoch": 5.79, + "learning_rate": 1.9797859690844233e-07, + "logits/chosen": -1.4443756341934204, + "logits/rejected": -1.4090099334716797, + "logps/chosen": -144.97027587890625, + "logps/rejected": -251.01841735839844, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2147979736328125, + "rewards/margins": 12.241732597351074, + "rewards/rejected": -18.45652961730957, + "step": 3609 + }, + { + "epoch": 5.79, + "learning_rate": 1.9787950852160126e-07, + "logits/chosen": -1.4543309211730957, + "logits/rejected": -1.4037752151489258, + "logps/chosen": -167.0087890625, + "logps/rejected": -271.51153564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.51118278503418, + "rewards/margins": 10.784629821777344, + "rewards/rejected": -19.295812606811523, + "step": 3610 + }, + { + "epoch": 5.8, + "learning_rate": 1.9778042013476022e-07, + "logits/chosen": -1.5626275539398193, + "logits/rejected": -1.539595127105713, + "logps/chosen": -151.12904357910156, + "logps/rejected": -303.9042663574219, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0249552726745605, + "rewards/margins": 15.471617698669434, + "rewards/rejected": -22.496572494506836, + "step": 3611 + }, + { + "epoch": 5.8, + "learning_rate": 1.9768133174791913e-07, + "logits/chosen": -1.5154834985733032, + "logits/rejected": -1.525287389755249, + "logps/chosen": -132.39305114746094, + "logps/rejected": -232.89024353027344, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.786574840545654, + "rewards/margins": 11.740911483764648, + "rewards/rejected": -16.52748680114746, + "step": 3612 + }, + { + "epoch": 5.8, + "learning_rate": 1.9758224336107806e-07, + "logits/chosen": -1.495429277420044, + "logits/rejected": -1.5334311723709106, + "logps/chosen": -133.42051696777344, + "logps/rejected": -297.0221862792969, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.421781063079834, + "rewards/margins": 16.473379135131836, + "rewards/rejected": -20.895160675048828, + "step": 3613 + }, + { + "epoch": 5.8, + "learning_rate": 1.9748315497423702e-07, + "logits/chosen": -1.494077205657959, + "logits/rejected": -1.473947286605835, + "logps/chosen": -173.30856323242188, + "logps/rejected": -254.0756072998047, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.575965881347656, + "rewards/margins": 9.75881576538086, + "rewards/rejected": -15.334782600402832, + "step": 3614 + }, + { + "epoch": 5.8, + "learning_rate": 1.9738406658739595e-07, + "logits/chosen": -1.507444143295288, + "logits/rejected": -1.4891562461853027, + "logps/chosen": -119.53681945800781, + "logps/rejected": -268.8651428222656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.380403995513916, + "rewards/margins": 12.298852920532227, + "rewards/rejected": -14.6792573928833, + "step": 3615 + }, + { + "epoch": 5.8, + "learning_rate": 1.9728497820055489e-07, + "logits/chosen": -1.4732081890106201, + "logits/rejected": -1.4758204221725464, + "logps/chosen": -149.52557373046875, + "logps/rejected": -294.8367919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.379432678222656, + "rewards/margins": 15.880510330200195, + "rewards/rejected": -22.25994110107422, + "step": 3616 + }, + { + "epoch": 5.81, + "learning_rate": 1.9718588981371382e-07, + "logits/chosen": -1.6422940492630005, + "logits/rejected": -1.5283619165420532, + "logps/chosen": -149.61883544921875, + "logps/rejected": -244.26026916503906, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.03014612197876, + "rewards/margins": 10.754767417907715, + "rewards/rejected": -16.784914016723633, + "step": 3617 + }, + { + "epoch": 5.81, + "learning_rate": 1.9708680142687275e-07, + "logits/chosen": -1.6985548734664917, + "logits/rejected": -1.6211954355239868, + "logps/chosen": -146.80267333984375, + "logps/rejected": -214.2506866455078, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.056077480316162, + "rewards/margins": 9.950145721435547, + "rewards/rejected": -15.00622272491455, + "step": 3618 + }, + { + "epoch": 5.81, + "learning_rate": 1.969877130400317e-07, + "logits/chosen": -1.5220632553100586, + "logits/rejected": -1.5187170505523682, + "logps/chosen": -132.83258056640625, + "logps/rejected": -265.8354187011719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.797036170959473, + "rewards/margins": 13.89424991607666, + "rewards/rejected": -18.691286087036133, + "step": 3619 + }, + { + "epoch": 5.81, + "learning_rate": 1.9688862465319064e-07, + "logits/chosen": -1.4496593475341797, + "logits/rejected": -1.6414544582366943, + "logps/chosen": -132.57351684570312, + "logps/rejected": -276.1432189941406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7112555503845215, + "rewards/margins": 12.481143951416016, + "rewards/rejected": -19.192399978637695, + "step": 3620 + }, + { + "epoch": 5.81, + "learning_rate": 1.9678953626634958e-07, + "logits/chosen": -1.602809190750122, + "logits/rejected": -1.5953152179718018, + "logps/chosen": -141.86465454101562, + "logps/rejected": -280.22393798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.632419109344482, + "rewards/margins": 12.905250549316406, + "rewards/rejected": -17.537670135498047, + "step": 3621 + }, + { + "epoch": 5.81, + "learning_rate": 1.966904478795085e-07, + "logits/chosen": -1.7487375736236572, + "logits/rejected": -1.7852073907852173, + "logps/chosen": -143.2958221435547, + "logps/rejected": -298.2388916015625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.60183572769165, + "rewards/margins": 15.003372192382812, + "rewards/rejected": -20.605207443237305, + "step": 3622 + }, + { + "epoch": 5.82, + "learning_rate": 1.9659135949266744e-07, + "logits/chosen": -1.5541441440582275, + "logits/rejected": -1.5739750862121582, + "logps/chosen": -148.06402587890625, + "logps/rejected": -284.72100830078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.945123672485352, + "rewards/margins": 11.871484756469727, + "rewards/rejected": -17.816608428955078, + "step": 3623 + }, + { + "epoch": 5.82, + "learning_rate": 1.9649227110582638e-07, + "logits/chosen": -1.610907793045044, + "logits/rejected": -1.553051233291626, + "logps/chosen": -125.56085205078125, + "logps/rejected": -265.0923156738281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.031960964202881, + "rewards/margins": 15.170647621154785, + "rewards/rejected": -19.202608108520508, + "step": 3624 + }, + { + "epoch": 5.82, + "learning_rate": 1.9639318271898534e-07, + "logits/chosen": -1.6155335903167725, + "logits/rejected": -1.5425313711166382, + "logps/chosen": -137.10391235351562, + "logps/rejected": -272.3021240234375, + "loss": 0.0117, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.423020362854004, + "rewards/margins": 13.866828918457031, + "rewards/rejected": -19.28984832763672, + "step": 3625 + }, + { + "epoch": 5.82, + "learning_rate": 1.9629409433214424e-07, + "logits/chosen": -1.5763707160949707, + "logits/rejected": -1.6264281272888184, + "logps/chosen": -173.49237060546875, + "logps/rejected": -329.8870544433594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.510921001434326, + "rewards/margins": 15.485235214233398, + "rewards/rejected": -22.996156692504883, + "step": 3626 + }, + { + "epoch": 5.82, + "learning_rate": 1.961950059453032e-07, + "logits/chosen": -1.566690444946289, + "logits/rejected": -1.5727636814117432, + "logps/chosen": -159.57525634765625, + "logps/rejected": -280.1453552246094, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.94136905670166, + "rewards/margins": 12.805856704711914, + "rewards/rejected": -18.74722671508789, + "step": 3627 + }, + { + "epoch": 5.82, + "learning_rate": 1.9609591755846214e-07, + "logits/chosen": -1.448369026184082, + "logits/rejected": -1.4360079765319824, + "logps/chosen": -146.82530212402344, + "logps/rejected": -259.76483154296875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.333584785461426, + "rewards/margins": 11.439241409301758, + "rewards/rejected": -17.772825241088867, + "step": 3628 + }, + { + "epoch": 5.83, + "learning_rate": 1.9599682917162107e-07, + "logits/chosen": -1.416291356086731, + "logits/rejected": -1.5277870893478394, + "logps/chosen": -137.57135009765625, + "logps/rejected": -278.9080505371094, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.08071756362915, + "rewards/margins": 11.568477630615234, + "rewards/rejected": -18.64919662475586, + "step": 3629 + }, + { + "epoch": 5.83, + "learning_rate": 1.9589774078478003e-07, + "logits/chosen": -1.5619186162948608, + "logits/rejected": -1.5831950902938843, + "logps/chosen": -133.29714965820312, + "logps/rejected": -243.6216278076172, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.843698024749756, + "rewards/margins": 11.363057136535645, + "rewards/rejected": -17.206754684448242, + "step": 3630 + }, + { + "epoch": 5.83, + "learning_rate": 1.9579865239793894e-07, + "logits/chosen": -1.5447971820831299, + "logits/rejected": -1.5462377071380615, + "logps/chosen": -201.35948181152344, + "logps/rejected": -301.2703857421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.162944793701172, + "rewards/margins": 12.206296920776367, + "rewards/rejected": -21.36924171447754, + "step": 3631 + }, + { + "epoch": 5.83, + "learning_rate": 1.956995640110979e-07, + "logits/chosen": -1.4950284957885742, + "logits/rejected": -1.4623119831085205, + "logps/chosen": -180.0721435546875, + "logps/rejected": -306.3988342285156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.075178146362305, + "rewards/margins": 13.221765518188477, + "rewards/rejected": -22.29694366455078, + "step": 3632 + }, + { + "epoch": 5.83, + "learning_rate": 1.9560047562425683e-07, + "logits/chosen": -1.5311946868896484, + "logits/rejected": -1.535646915435791, + "logps/chosen": -118.03160858154297, + "logps/rejected": -251.7478485107422, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.309359073638916, + "rewards/margins": 13.63560676574707, + "rewards/rejected": -17.944965362548828, + "step": 3633 + }, + { + "epoch": 5.83, + "learning_rate": 1.9550138723741576e-07, + "logits/chosen": -1.4413695335388184, + "logits/rejected": -1.496425986289978, + "logps/chosen": -118.56185913085938, + "logps/rejected": -259.22149658203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.958490371704102, + "rewards/margins": 11.972829818725586, + "rewards/rejected": -17.931320190429688, + "step": 3634 + }, + { + "epoch": 5.83, + "learning_rate": 1.9540229885057472e-07, + "logits/chosen": -1.647402048110962, + "logits/rejected": -1.6106317043304443, + "logps/chosen": -169.7397918701172, + "logps/rejected": -284.4171447753906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.414337635040283, + "rewards/margins": 11.571904182434082, + "rewards/rejected": -17.986242294311523, + "step": 3635 + }, + { + "epoch": 5.84, + "learning_rate": 1.9530321046373363e-07, + "logits/chosen": -1.547605276107788, + "logits/rejected": -1.4955909252166748, + "logps/chosen": -149.25592041015625, + "logps/rejected": -274.1378173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.790032386779785, + "rewards/margins": 14.382854461669922, + "rewards/rejected": -20.172887802124023, + "step": 3636 + }, + { + "epoch": 5.84, + "learning_rate": 1.9520412207689256e-07, + "logits/chosen": -1.4827961921691895, + "logits/rejected": -1.5378875732421875, + "logps/chosen": -151.05050659179688, + "logps/rejected": -268.6923522949219, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.258273124694824, + "rewards/margins": 12.2664794921875, + "rewards/rejected": -19.524751663208008, + "step": 3637 + }, + { + "epoch": 5.84, + "learning_rate": 1.9510503369005152e-07, + "logits/chosen": -1.5171899795532227, + "logits/rejected": -1.4746599197387695, + "logps/chosen": -124.65941619873047, + "logps/rejected": -221.1080322265625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.475859642028809, + "rewards/margins": 9.939740180969238, + "rewards/rejected": -15.41559886932373, + "step": 3638 + }, + { + "epoch": 5.84, + "learning_rate": 1.9500594530321046e-07, + "logits/chosen": -1.435643196105957, + "logits/rejected": -1.4645709991455078, + "logps/chosen": -186.7717742919922, + "logps/rejected": -279.43536376953125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.460721015930176, + "rewards/margins": 11.204614639282227, + "rewards/rejected": -19.665334701538086, + "step": 3639 + }, + { + "epoch": 5.84, + "learning_rate": 1.9490685691636942e-07, + "logits/chosen": -1.5706042051315308, + "logits/rejected": -1.5340220928192139, + "logps/chosen": -154.95777893066406, + "logps/rejected": -301.5893249511719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.094677925109863, + "rewards/margins": 14.367963790893555, + "rewards/rejected": -21.462642669677734, + "step": 3640 + }, + { + "epoch": 5.84, + "learning_rate": 1.9480776852952832e-07, + "logits/chosen": -1.5330665111541748, + "logits/rejected": -1.5439238548278809, + "logps/chosen": -143.248046875, + "logps/rejected": -311.9872741699219, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.508798599243164, + "rewards/margins": 15.64439582824707, + "rewards/rejected": -22.153194427490234, + "step": 3641 + }, + { + "epoch": 5.85, + "learning_rate": 1.9470868014268726e-07, + "logits/chosen": -1.4373807907104492, + "logits/rejected": -1.450153112411499, + "logps/chosen": -164.4884490966797, + "logps/rejected": -312.8091735839844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.459832191467285, + "rewards/margins": 13.822973251342773, + "rewards/rejected": -21.282806396484375, + "step": 3642 + }, + { + "epoch": 5.85, + "learning_rate": 1.9460959175584622e-07, + "logits/chosen": -1.4590200185775757, + "logits/rejected": -1.49994695186615, + "logps/chosen": -95.1679458618164, + "logps/rejected": -213.40316772460938, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.255184650421143, + "rewards/margins": 10.410913467407227, + "rewards/rejected": -14.666098594665527, + "step": 3643 + }, + { + "epoch": 5.85, + "learning_rate": 1.9451050336900515e-07, + "logits/chosen": -1.4650955200195312, + "logits/rejected": -1.5128268003463745, + "logps/chosen": -136.38409423828125, + "logps/rejected": -273.6250305175781, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.704941749572754, + "rewards/margins": 13.200605392456055, + "rewards/rejected": -18.905546188354492, + "step": 3644 + }, + { + "epoch": 5.85, + "learning_rate": 1.9441141498216406e-07, + "logits/chosen": -1.4618892669677734, + "logits/rejected": -1.44507896900177, + "logps/chosen": -161.7688446044922, + "logps/rejected": -305.83111572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.389949321746826, + "rewards/margins": 13.65894889831543, + "rewards/rejected": -21.04889678955078, + "step": 3645 + }, + { + "epoch": 5.85, + "learning_rate": 1.9431232659532302e-07, + "logits/chosen": -1.5288586616516113, + "logits/rejected": -1.511027216911316, + "logps/chosen": -152.18124389648438, + "logps/rejected": -247.59426879882812, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090428829193115, + "rewards/margins": 10.249648094177246, + "rewards/rejected": -17.340076446533203, + "step": 3646 + }, + { + "epoch": 5.85, + "learning_rate": 1.9421323820848195e-07, + "logits/chosen": -1.5149869918823242, + "logits/rejected": -1.5595722198486328, + "logps/chosen": -137.8832550048828, + "logps/rejected": -250.62841796875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.474795818328857, + "rewards/margins": 10.082210540771484, + "rewards/rejected": -15.557004928588867, + "step": 3647 + }, + { + "epoch": 5.86, + "learning_rate": 1.941141498216409e-07, + "logits/chosen": -1.517293930053711, + "logits/rejected": -1.6003823280334473, + "logps/chosen": -95.16696166992188, + "logps/rejected": -228.02438354492188, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.010128021240234, + "rewards/margins": 12.445441246032715, + "rewards/rejected": -17.455570220947266, + "step": 3648 + }, + { + "epoch": 5.86, + "learning_rate": 1.9401506143479984e-07, + "logits/chosen": -1.5968083143234253, + "logits/rejected": -1.6257691383361816, + "logps/chosen": -166.9114227294922, + "logps/rejected": -249.9268035888672, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.440021514892578, + "rewards/margins": 9.112191200256348, + "rewards/rejected": -15.552213668823242, + "step": 3649 + }, + { + "epoch": 5.86, + "learning_rate": 1.9391597304795875e-07, + "logits/chosen": -1.4860155582427979, + "logits/rejected": -1.4595417976379395, + "logps/chosen": -172.56219482421875, + "logps/rejected": -299.3880615234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.659389495849609, + "rewards/margins": 13.330337524414062, + "rewards/rejected": -19.989727020263672, + "step": 3650 + }, + { + "epoch": 5.86, + "learning_rate": 1.938168846611177e-07, + "logits/chosen": -1.610443115234375, + "logits/rejected": -1.5963492393493652, + "logps/chosen": -213.02243041992188, + "logps/rejected": -296.08905029296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.291985511779785, + "rewards/margins": 10.119683265686035, + "rewards/rejected": -20.411670684814453, + "step": 3651 + }, + { + "epoch": 5.86, + "learning_rate": 1.9371779627427664e-07, + "logits/chosen": -1.4166561365127563, + "logits/rejected": -1.424612045288086, + "logps/chosen": -164.54742431640625, + "logps/rejected": -253.05850219726562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7008256912231445, + "rewards/margins": 10.451868057250977, + "rewards/rejected": -15.152694702148438, + "step": 3652 + }, + { + "epoch": 5.86, + "learning_rate": 1.936187078874356e-07, + "logits/chosen": -1.494945764541626, + "logits/rejected": -1.4699807167053223, + "logps/chosen": -113.80967712402344, + "logps/rejected": -236.53164672851562, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9342713356018066, + "rewards/margins": 12.6946439743042, + "rewards/rejected": -15.628915786743164, + "step": 3653 + }, + { + "epoch": 5.87, + "learning_rate": 1.9351961950059454e-07, + "logits/chosen": -1.5003175735473633, + "logits/rejected": -1.5379233360290527, + "logps/chosen": -133.9944610595703, + "logps/rejected": -280.30853271484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.822778701782227, + "rewards/margins": 13.631128311157227, + "rewards/rejected": -19.453907012939453, + "step": 3654 + }, + { + "epoch": 5.87, + "learning_rate": 1.9342053111375344e-07, + "logits/chosen": -1.4672565460205078, + "logits/rejected": -1.4937043190002441, + "logps/chosen": -106.48299407958984, + "logps/rejected": -238.42361450195312, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.253215312957764, + "rewards/margins": 12.468277931213379, + "rewards/rejected": -16.721492767333984, + "step": 3655 + }, + { + "epoch": 5.87, + "learning_rate": 1.933214427269124e-07, + "logits/chosen": -1.408714771270752, + "logits/rejected": -1.3810824155807495, + "logps/chosen": -119.79273223876953, + "logps/rejected": -220.61209106445312, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9259769916534424, + "rewards/margins": 11.27649211883545, + "rewards/rejected": -15.202468872070312, + "step": 3656 + }, + { + "epoch": 5.87, + "learning_rate": 1.9322235434007134e-07, + "logits/chosen": -1.6185362339019775, + "logits/rejected": -1.5941388607025146, + "logps/chosen": -144.82681274414062, + "logps/rejected": -278.14581298828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.164248466491699, + "rewards/margins": 14.599987030029297, + "rewards/rejected": -18.764236450195312, + "step": 3657 + }, + { + "epoch": 5.87, + "learning_rate": 1.9312326595323027e-07, + "logits/chosen": -1.4566502571105957, + "logits/rejected": -1.5016978979110718, + "logps/chosen": -123.39091491699219, + "logps/rejected": -259.015869140625, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.765225410461426, + "rewards/margins": 11.899807929992676, + "rewards/rejected": -15.665034294128418, + "step": 3658 + }, + { + "epoch": 5.87, + "learning_rate": 1.9302417756638923e-07, + "logits/chosen": -1.5461838245391846, + "logits/rejected": -1.5293848514556885, + "logps/chosen": -161.35906982421875, + "logps/rejected": -276.5901184082031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.117895126342773, + "rewards/margins": 11.426451683044434, + "rewards/rejected": -18.54434585571289, + "step": 3659 + }, + { + "epoch": 5.87, + "learning_rate": 1.9292508917954814e-07, + "logits/chosen": -1.4827799797058105, + "logits/rejected": -1.5103849172592163, + "logps/chosen": -117.5271987915039, + "logps/rejected": -210.06454467773438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.808136463165283, + "rewards/margins": 9.750738143920898, + "rewards/rejected": -13.55887508392334, + "step": 3660 + }, + { + "epoch": 5.88, + "learning_rate": 1.928260007927071e-07, + "logits/chosen": -1.718045711517334, + "logits/rejected": -1.7224940061569214, + "logps/chosen": -150.13800048828125, + "logps/rejected": -280.9176330566406, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.734304428100586, + "rewards/margins": 12.752914428710938, + "rewards/rejected": -19.487220764160156, + "step": 3661 + }, + { + "epoch": 5.88, + "learning_rate": 1.9272691240586603e-07, + "logits/chosen": -1.4959279298782349, + "logits/rejected": -1.5190191268920898, + "logps/chosen": -126.38053131103516, + "logps/rejected": -247.49526977539062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.780041217803955, + "rewards/margins": 11.220012664794922, + "rewards/rejected": -16.00005340576172, + "step": 3662 + }, + { + "epoch": 5.88, + "learning_rate": 1.9262782401902496e-07, + "logits/chosen": -1.5342084169387817, + "logits/rejected": -1.5700995922088623, + "logps/chosen": -152.32943725585938, + "logps/rejected": -258.2336120605469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.056337356567383, + "rewards/margins": 9.165003776550293, + "rewards/rejected": -16.221342086791992, + "step": 3663 + }, + { + "epoch": 5.88, + "learning_rate": 1.925287356321839e-07, + "logits/chosen": -1.5052680969238281, + "logits/rejected": -1.4243559837341309, + "logps/chosen": -163.4620819091797, + "logps/rejected": -302.07861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.449522495269775, + "rewards/margins": 14.556560516357422, + "rewards/rejected": -20.006084442138672, + "step": 3664 + }, + { + "epoch": 5.88, + "learning_rate": 1.9242964724534283e-07, + "logits/chosen": -1.5509437322616577, + "logits/rejected": -1.5314981937408447, + "logps/chosen": -165.66082763671875, + "logps/rejected": -253.30783081054688, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.470024585723877, + "rewards/margins": 10.727234840393066, + "rewards/rejected": -18.1972599029541, + "step": 3665 + }, + { + "epoch": 5.88, + "learning_rate": 1.9233055885850176e-07, + "logits/chosen": -1.3682401180267334, + "logits/rejected": -1.3855705261230469, + "logps/chosen": -125.54808807373047, + "logps/rejected": -240.92471313476562, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1726155281066895, + "rewards/margins": 10.08011245727539, + "rewards/rejected": -16.252729415893555, + "step": 3666 + }, + { + "epoch": 5.89, + "learning_rate": 1.9223147047166072e-07, + "logits/chosen": -1.5226454734802246, + "logits/rejected": -1.6500730514526367, + "logps/chosen": -98.17153930664062, + "logps/rejected": -284.37322998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7687339782714844, + "rewards/margins": 17.142288208007812, + "rewards/rejected": -20.911022186279297, + "step": 3667 + }, + { + "epoch": 5.89, + "learning_rate": 1.9213238208481965e-07, + "logits/chosen": -1.5499145984649658, + "logits/rejected": -1.5094082355499268, + "logps/chosen": -136.2602996826172, + "logps/rejected": -241.84165954589844, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.06522274017334, + "rewards/margins": 11.144933700561523, + "rewards/rejected": -15.21015739440918, + "step": 3668 + }, + { + "epoch": 5.89, + "learning_rate": 1.920332936979786e-07, + "logits/chosen": -1.6371996402740479, + "logits/rejected": -1.5926542282104492, + "logps/chosen": -160.00906372070312, + "logps/rejected": -304.69354248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.27596664428711, + "rewards/margins": 13.391912460327148, + "rewards/rejected": -21.667877197265625, + "step": 3669 + }, + { + "epoch": 5.89, + "learning_rate": 1.9193420531113752e-07, + "logits/chosen": -1.6718637943267822, + "logits/rejected": -1.6634516716003418, + "logps/chosen": -135.7847900390625, + "logps/rejected": -252.88002014160156, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.889223098754883, + "rewards/margins": 11.005735397338867, + "rewards/rejected": -17.89495849609375, + "step": 3670 + }, + { + "epoch": 5.89, + "learning_rate": 1.9183511692429645e-07, + "logits/chosen": -1.7969212532043457, + "logits/rejected": -1.824304461479187, + "logps/chosen": -126.70066833496094, + "logps/rejected": -228.4222412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.722947835922241, + "rewards/margins": 10.630352020263672, + "rewards/rejected": -14.353300094604492, + "step": 3671 + }, + { + "epoch": 5.89, + "learning_rate": 1.9173602853745541e-07, + "logits/chosen": -1.4347548484802246, + "logits/rejected": -1.4165340662002563, + "logps/chosen": -145.5503692626953, + "logps/rejected": -241.5057830810547, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.195026397705078, + "rewards/margins": 10.51089859008789, + "rewards/rejected": -16.70592498779297, + "step": 3672 + }, + { + "epoch": 5.9, + "learning_rate": 1.9163694015061435e-07, + "logits/chosen": -1.5520538091659546, + "logits/rejected": -1.6382880210876465, + "logps/chosen": -86.64100646972656, + "logps/rejected": -248.48851013183594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.385003089904785, + "rewards/margins": 14.836771011352539, + "rewards/rejected": -17.221773147583008, + "step": 3673 + }, + { + "epoch": 5.9, + "learning_rate": 1.9153785176377325e-07, + "logits/chosen": -1.4207098484039307, + "logits/rejected": -1.5153276920318604, + "logps/chosen": -119.36077880859375, + "logps/rejected": -229.57427978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.647098541259766, + "rewards/margins": 9.655750274658203, + "rewards/rejected": -14.302848815917969, + "step": 3674 + }, + { + "epoch": 5.9, + "learning_rate": 1.9143876337693221e-07, + "logits/chosen": -1.6695656776428223, + "logits/rejected": -1.6403167247772217, + "logps/chosen": -120.75398254394531, + "logps/rejected": -219.77392578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.281494617462158, + "rewards/margins": 10.195404052734375, + "rewards/rejected": -14.476898193359375, + "step": 3675 + }, + { + "epoch": 5.9, + "learning_rate": 1.9133967499009115e-07, + "logits/chosen": -1.6315786838531494, + "logits/rejected": -1.559051752090454, + "logps/chosen": -141.34146118164062, + "logps/rejected": -296.9576416015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.48838996887207, + "rewards/margins": 16.799072265625, + "rewards/rejected": -22.28746223449707, + "step": 3676 + }, + { + "epoch": 5.9, + "learning_rate": 1.912405866032501e-07, + "logits/chosen": -1.480517864227295, + "logits/rejected": -1.4801418781280518, + "logps/chosen": -130.97279357910156, + "logps/rejected": -255.55825805664062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.76218843460083, + "rewards/margins": 12.43912124633789, + "rewards/rejected": -18.201309204101562, + "step": 3677 + }, + { + "epoch": 5.9, + "learning_rate": 1.9114149821640904e-07, + "logits/chosen": -1.534497857093811, + "logits/rejected": -1.3863554000854492, + "logps/chosen": -140.65335083007812, + "logps/rejected": -246.45889282226562, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.218197345733643, + "rewards/margins": 13.02921199798584, + "rewards/rejected": -17.24740982055664, + "step": 3678 + }, + { + "epoch": 5.91, + "learning_rate": 1.9104240982956795e-07, + "logits/chosen": -1.6438411474227905, + "logits/rejected": -1.5998400449752808, + "logps/chosen": -171.29721069335938, + "logps/rejected": -342.979736328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.31832218170166, + "rewards/margins": 17.253503799438477, + "rewards/rejected": -24.571826934814453, + "step": 3679 + }, + { + "epoch": 5.91, + "learning_rate": 1.909433214427269e-07, + "logits/chosen": -1.4763121604919434, + "logits/rejected": -1.4671536684036255, + "logps/chosen": -125.75518798828125, + "logps/rejected": -302.0433349609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.461373329162598, + "rewards/margins": 15.335487365722656, + "rewards/rejected": -20.796859741210938, + "step": 3680 + }, + { + "epoch": 5.91, + "learning_rate": 1.9084423305588584e-07, + "logits/chosen": -1.5870596170425415, + "logits/rejected": -1.6089247465133667, + "logps/chosen": -130.16603088378906, + "logps/rejected": -265.1653137207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.111386299133301, + "rewards/margins": 12.33742904663086, + "rewards/rejected": -18.448814392089844, + "step": 3681 + }, + { + "epoch": 5.91, + "learning_rate": 1.907451446690448e-07, + "logits/chosen": -1.5631225109100342, + "logits/rejected": -1.4993486404418945, + "logps/chosen": -164.56576538085938, + "logps/rejected": -276.1475830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.375594139099121, + "rewards/margins": 12.361285209655762, + "rewards/rejected": -19.736879348754883, + "step": 3682 + }, + { + "epoch": 5.91, + "learning_rate": 1.906460562822037e-07, + "logits/chosen": -1.4620574712753296, + "logits/rejected": -1.5107136964797974, + "logps/chosen": -144.11215209960938, + "logps/rejected": -261.0520935058594, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4463043212890625, + "rewards/margins": 10.59670639038086, + "rewards/rejected": -16.043010711669922, + "step": 3683 + }, + { + "epoch": 5.91, + "learning_rate": 1.9054696789536264e-07, + "logits/chosen": -1.5434565544128418, + "logits/rejected": -1.5131157636642456, + "logps/chosen": -162.0755615234375, + "logps/rejected": -280.32537841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.071712493896484, + "rewards/margins": 9.292452812194824, + "rewards/rejected": -17.364164352416992, + "step": 3684 + }, + { + "epoch": 5.91, + "learning_rate": 1.904478795085216e-07, + "logits/chosen": -1.5239708423614502, + "logits/rejected": -1.6967506408691406, + "logps/chosen": -70.85276794433594, + "logps/rejected": -293.915771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4649735689163208, + "rewards/margins": 16.974592208862305, + "rewards/rejected": -18.43956756591797, + "step": 3685 + }, + { + "epoch": 5.92, + "learning_rate": 1.9034879112168053e-07, + "logits/chosen": -1.4462329149246216, + "logits/rejected": -1.5220086574554443, + "logps/chosen": -135.77420043945312, + "logps/rejected": -254.41748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.89843225479126, + "rewards/margins": 13.211665153503418, + "rewards/rejected": -18.110097885131836, + "step": 3686 + }, + { + "epoch": 5.92, + "learning_rate": 1.9024970273483947e-07, + "logits/chosen": -1.4700506925582886, + "logits/rejected": -1.5044233798980713, + "logps/chosen": -117.62751007080078, + "logps/rejected": -211.74563598632812, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7619452476501465, + "rewards/margins": 8.892574310302734, + "rewards/rejected": -14.654519081115723, + "step": 3687 + }, + { + "epoch": 5.92, + "learning_rate": 1.901506143479984e-07, + "logits/chosen": -1.5958352088928223, + "logits/rejected": -1.6465210914611816, + "logps/chosen": -104.88279724121094, + "logps/rejected": -243.6307373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.036955833435059, + "rewards/margins": 11.254576683044434, + "rewards/rejected": -15.291532516479492, + "step": 3688 + }, + { + "epoch": 5.92, + "learning_rate": 1.9005152596115733e-07, + "logits/chosen": -1.5979094505310059, + "logits/rejected": -1.6012042760849, + "logps/chosen": -128.72796630859375, + "logps/rejected": -266.0513916015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.283300399780273, + "rewards/margins": 13.259130477905273, + "rewards/rejected": -17.542430877685547, + "step": 3689 + }, + { + "epoch": 5.92, + "learning_rate": 1.899524375743163e-07, + "logits/chosen": -1.3955384492874146, + "logits/rejected": -1.4877187013626099, + "logps/chosen": -146.68392944335938, + "logps/rejected": -308.4697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.643572807312012, + "rewards/margins": 13.967140197753906, + "rewards/rejected": -21.610713958740234, + "step": 3690 + }, + { + "epoch": 5.92, + "learning_rate": 1.8985334918747523e-07, + "logits/chosen": -1.5422056913375854, + "logits/rejected": -1.5991989374160767, + "logps/chosen": -152.45521545410156, + "logps/rejected": -292.18634033203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.650496482849121, + "rewards/margins": 12.444392204284668, + "rewards/rejected": -20.09488868713379, + "step": 3691 + }, + { + "epoch": 5.93, + "learning_rate": 1.8975426080063416e-07, + "logits/chosen": -1.3720598220825195, + "logits/rejected": -1.3632543087005615, + "logps/chosen": -152.57733154296875, + "logps/rejected": -301.2712707519531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.584931373596191, + "rewards/margins": 14.315500259399414, + "rewards/rejected": -20.900432586669922, + "step": 3692 + }, + { + "epoch": 5.93, + "learning_rate": 1.896551724137931e-07, + "logits/chosen": -1.5509374141693115, + "logits/rejected": -1.6073428392410278, + "logps/chosen": -133.9862060546875, + "logps/rejected": -251.84368896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5873799324035645, + "rewards/margins": 11.93701457977295, + "rewards/rejected": -17.524394989013672, + "step": 3693 + }, + { + "epoch": 5.93, + "learning_rate": 1.8955608402695203e-07, + "logits/chosen": -1.4050536155700684, + "logits/rejected": -1.5354934930801392, + "logps/chosen": -138.06346130371094, + "logps/rejected": -300.16064453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3258538246154785, + "rewards/margins": 13.810397148132324, + "rewards/rejected": -20.13625144958496, + "step": 3694 + }, + { + "epoch": 5.93, + "learning_rate": 1.8945699564011096e-07, + "logits/chosen": -1.4550800323486328, + "logits/rejected": -1.3998072147369385, + "logps/chosen": -143.1812286376953, + "logps/rejected": -234.50949096679688, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.388744831085205, + "rewards/margins": 10.256027221679688, + "rewards/rejected": -15.644771575927734, + "step": 3695 + }, + { + "epoch": 5.93, + "learning_rate": 1.8935790725326992e-07, + "logits/chosen": -1.4175169467926025, + "logits/rejected": -1.393728256225586, + "logps/chosen": -192.8778839111328, + "logps/rejected": -348.37835693359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.646796226501465, + "rewards/margins": 15.695621490478516, + "rewards/rejected": -25.342418670654297, + "step": 3696 + }, + { + "epoch": 5.93, + "learning_rate": 1.8925881886642883e-07, + "logits/chosen": -1.768151044845581, + "logits/rejected": -1.7857685089111328, + "logps/chosen": -162.59762573242188, + "logps/rejected": -300.15850830078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.62718677520752, + "rewards/margins": 14.317544937133789, + "rewards/rejected": -22.944730758666992, + "step": 3697 + }, + { + "epoch": 5.94, + "learning_rate": 1.8915973047958779e-07, + "logits/chosen": -1.513951301574707, + "logits/rejected": -1.6251705884933472, + "logps/chosen": -162.78033447265625, + "logps/rejected": -295.7638854980469, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.491299629211426, + "rewards/margins": 11.151388168334961, + "rewards/rejected": -19.642688751220703, + "step": 3698 + }, + { + "epoch": 5.94, + "learning_rate": 1.8906064209274672e-07, + "logits/chosen": -1.6087629795074463, + "logits/rejected": -1.6428990364074707, + "logps/chosen": -188.2069854736328, + "logps/rejected": -284.1829528808594, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.806699752807617, + "rewards/margins": 9.564703941345215, + "rewards/rejected": -19.37140464782715, + "step": 3699 + }, + { + "epoch": 5.94, + "learning_rate": 1.8896155370590565e-07, + "logits/chosen": -1.6553736925125122, + "logits/rejected": -1.729343295097351, + "logps/chosen": -113.10751342773438, + "logps/rejected": -262.05474853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.107659339904785, + "rewards/margins": 12.38688850402832, + "rewards/rejected": -17.49454689025879, + "step": 3700 + }, + { + "epoch": 5.94, + "learning_rate": 1.888624653190646e-07, + "logits/chosen": -1.5415149927139282, + "logits/rejected": -1.4571938514709473, + "logps/chosen": -130.62001037597656, + "logps/rejected": -203.699951171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.10054349899292, + "rewards/margins": 9.422486305236816, + "rewards/rejected": -13.523029327392578, + "step": 3701 + }, + { + "epoch": 5.94, + "learning_rate": 1.8876337693222352e-07, + "logits/chosen": -1.4082000255584717, + "logits/rejected": -1.4240295886993408, + "logps/chosen": -161.91326904296875, + "logps/rejected": -336.9073486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.498258590698242, + "rewards/margins": 13.28285026550293, + "rewards/rejected": -21.781108856201172, + "step": 3702 + }, + { + "epoch": 5.94, + "learning_rate": 1.8866428854538248e-07, + "logits/chosen": -1.522631049156189, + "logits/rejected": -1.4284687042236328, + "logps/chosen": -173.02447509765625, + "logps/rejected": -318.6395568847656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.221850395202637, + "rewards/margins": 15.330917358398438, + "rewards/rejected": -22.55276870727539, + "step": 3703 + }, + { + "epoch": 5.95, + "learning_rate": 1.885652001585414e-07, + "logits/chosen": -1.666113257408142, + "logits/rejected": -1.6611957550048828, + "logps/chosen": -127.99026489257812, + "logps/rejected": -303.2962951660156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5711822509765625, + "rewards/margins": 16.306671142578125, + "rewards/rejected": -21.877853393554688, + "step": 3704 + }, + { + "epoch": 5.95, + "learning_rate": 1.8846611177170035e-07, + "logits/chosen": -1.4214556217193604, + "logits/rejected": -1.4787708520889282, + "logps/chosen": -105.6366958618164, + "logps/rejected": -219.59039306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.811261177062988, + "rewards/margins": 10.935802459716797, + "rewards/rejected": -15.747063636779785, + "step": 3705 + }, + { + "epoch": 5.95, + "learning_rate": 1.883670233848593e-07, + "logits/chosen": -1.4125428199768066, + "logits/rejected": -1.4424316883087158, + "logps/chosen": -157.37298583984375, + "logps/rejected": -254.4681854248047, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.305351257324219, + "rewards/margins": 10.276167869567871, + "rewards/rejected": -18.581520080566406, + "step": 3706 + }, + { + "epoch": 5.95, + "learning_rate": 1.882679349980182e-07, + "logits/chosen": -1.4438930749893188, + "logits/rejected": -1.4940712451934814, + "logps/chosen": -191.05059814453125, + "logps/rejected": -313.15069580078125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.298832893371582, + "rewards/margins": 10.93721866607666, + "rewards/rejected": -21.236053466796875, + "step": 3707 + }, + { + "epoch": 5.95, + "learning_rate": 1.8816884661117715e-07, + "logits/chosen": -1.570009469985962, + "logits/rejected": -1.6258060932159424, + "logps/chosen": -149.80404663085938, + "logps/rejected": -272.12823486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.608193397521973, + "rewards/margins": 12.463358879089355, + "rewards/rejected": -19.071552276611328, + "step": 3708 + }, + { + "epoch": 5.95, + "learning_rate": 1.880697582243361e-07, + "logits/chosen": -1.5564316511154175, + "logits/rejected": -1.5848227739334106, + "logps/chosen": -189.29261779785156, + "logps/rejected": -337.5676574707031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.573407173156738, + "rewards/margins": 14.258111000061035, + "rewards/rejected": -23.83151626586914, + "step": 3709 + }, + { + "epoch": 5.96, + "learning_rate": 1.8797066983749504e-07, + "logits/chosen": -1.4031949043273926, + "logits/rejected": -1.4441360235214233, + "logps/chosen": -135.97903442382812, + "logps/rejected": -287.9762878417969, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.494509220123291, + "rewards/margins": 13.647260665893555, + "rewards/rejected": -21.141769409179688, + "step": 3710 + }, + { + "epoch": 5.96, + "learning_rate": 1.87871581450654e-07, + "logits/chosen": -1.4672036170959473, + "logits/rejected": -1.4632892608642578, + "logps/chosen": -132.6333770751953, + "logps/rejected": -283.39324951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.481267929077148, + "rewards/margins": 14.675756454467773, + "rewards/rejected": -20.15702247619629, + "step": 3711 + }, + { + "epoch": 5.96, + "learning_rate": 1.877724930638129e-07, + "logits/chosen": -1.4450477361679077, + "logits/rejected": -1.4160045385360718, + "logps/chosen": -164.26168823242188, + "logps/rejected": -286.7809143066406, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.212534427642822, + "rewards/margins": 11.673108100891113, + "rewards/rejected": -18.885643005371094, + "step": 3712 + }, + { + "epoch": 5.96, + "learning_rate": 1.8767340467697184e-07, + "logits/chosen": -1.6563515663146973, + "logits/rejected": -1.5344302654266357, + "logps/chosen": -189.4044189453125, + "logps/rejected": -272.1693420410156, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.401962280273438, + "rewards/margins": 9.75802993774414, + "rewards/rejected": -18.159992218017578, + "step": 3713 + }, + { + "epoch": 5.96, + "learning_rate": 1.875743162901308e-07, + "logits/chosen": -1.5040847063064575, + "logits/rejected": -1.5975149869918823, + "logps/chosen": -70.62644958496094, + "logps/rejected": -211.5277099609375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9613145589828491, + "rewards/margins": 13.232431411743164, + "rewards/rejected": -14.193745613098145, + "step": 3714 + }, + { + "epoch": 5.96, + "learning_rate": 1.8747522790328973e-07, + "logits/chosen": -1.7836558818817139, + "logits/rejected": -1.7876358032226562, + "logps/chosen": -140.72097778320312, + "logps/rejected": -275.6371154785156, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.797496795654297, + "rewards/margins": 12.669290542602539, + "rewards/rejected": -19.466787338256836, + "step": 3715 + }, + { + "epoch": 5.96, + "learning_rate": 1.8737613951644864e-07, + "logits/chosen": -1.3768072128295898, + "logits/rejected": -1.465212345123291, + "logps/chosen": -120.96431732177734, + "logps/rejected": -258.19769287109375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.034911632537842, + "rewards/margins": 13.804450988769531, + "rewards/rejected": -17.83936309814453, + "step": 3716 + }, + { + "epoch": 5.97, + "learning_rate": 1.872770511296076e-07, + "logits/chosen": -1.611348271369934, + "logits/rejected": -1.667280912399292, + "logps/chosen": -136.48165893554688, + "logps/rejected": -288.8954772949219, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08405876159668, + "rewards/margins": 13.650394439697266, + "rewards/rejected": -19.734453201293945, + "step": 3717 + }, + { + "epoch": 5.97, + "learning_rate": 1.8717796274276653e-07, + "logits/chosen": -1.481740117073059, + "logits/rejected": -1.5134330987930298, + "logps/chosen": -158.48883056640625, + "logps/rejected": -293.87384033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.975423336029053, + "rewards/margins": 13.202312469482422, + "rewards/rejected": -21.177734375, + "step": 3718 + }, + { + "epoch": 5.97, + "learning_rate": 1.870788743559255e-07, + "logits/chosen": -1.4430646896362305, + "logits/rejected": -1.4120030403137207, + "logps/chosen": -150.5356903076172, + "logps/rejected": -245.22471618652344, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.788219451904297, + "rewards/margins": 10.5567045211792, + "rewards/rejected": -16.344924926757812, + "step": 3719 + }, + { + "epoch": 5.97, + "learning_rate": 1.8697978596908442e-07, + "logits/chosen": -1.7909808158874512, + "logits/rejected": -1.6334832906723022, + "logps/chosen": -137.17445373535156, + "logps/rejected": -226.52706909179688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.884033441543579, + "rewards/margins": 11.848114967346191, + "rewards/rejected": -15.732149124145508, + "step": 3720 + }, + { + "epoch": 5.97, + "learning_rate": 1.8688069758224333e-07, + "logits/chosen": -1.4211233854293823, + "logits/rejected": -1.3814936876296997, + "logps/chosen": -160.5302734375, + "logps/rejected": -254.46755981445312, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.037004470825195, + "rewards/margins": 10.84640121459961, + "rewards/rejected": -17.883405685424805, + "step": 3721 + }, + { + "epoch": 5.97, + "learning_rate": 1.867816091954023e-07, + "logits/chosen": -1.7150602340698242, + "logits/rejected": -1.7019472122192383, + "logps/chosen": -92.93966674804688, + "logps/rejected": -230.4935302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.899407386779785, + "rewards/margins": 13.690093040466309, + "rewards/rejected": -16.589500427246094, + "step": 3722 + }, + { + "epoch": 5.98, + "learning_rate": 1.8668252080856122e-07, + "logits/chosen": -1.502805233001709, + "logits/rejected": -1.478081226348877, + "logps/chosen": -189.28366088867188, + "logps/rejected": -265.9151916503906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.784960746765137, + "rewards/margins": 9.927692413330078, + "rewards/rejected": -18.71265411376953, + "step": 3723 + }, + { + "epoch": 5.98, + "learning_rate": 1.8658343242172018e-07, + "logits/chosen": -1.3867504596710205, + "logits/rejected": -1.4207792282104492, + "logps/chosen": -157.76779174804688, + "logps/rejected": -289.5389709472656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.68863582611084, + "rewards/margins": 13.308408737182617, + "rewards/rejected": -19.99704360961914, + "step": 3724 + }, + { + "epoch": 5.98, + "learning_rate": 1.8648434403487912e-07, + "logits/chosen": -1.5385690927505493, + "logits/rejected": -1.6557109355926514, + "logps/chosen": -131.52041625976562, + "logps/rejected": -232.51535034179688, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.508660316467285, + "rewards/margins": 9.426185607910156, + "rewards/rejected": -13.934846878051758, + "step": 3725 + }, + { + "epoch": 5.98, + "learning_rate": 1.8638525564803802e-07, + "logits/chosen": -1.5688905715942383, + "logits/rejected": -1.5362516641616821, + "logps/chosen": -133.5271453857422, + "logps/rejected": -264.12646484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7925689220428467, + "rewards/margins": 13.396677017211914, + "rewards/rejected": -17.189245223999023, + "step": 3726 + }, + { + "epoch": 5.98, + "learning_rate": 1.8628616726119698e-07, + "logits/chosen": -1.4343740940093994, + "logits/rejected": -1.400930643081665, + "logps/chosen": -132.9620819091797, + "logps/rejected": -227.93939208984375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.864546298980713, + "rewards/margins": 10.637269973754883, + "rewards/rejected": -16.501815795898438, + "step": 3727 + }, + { + "epoch": 5.98, + "learning_rate": 1.8618707887435592e-07, + "logits/chosen": -1.3522822856903076, + "logits/rejected": -1.3627744913101196, + "logps/chosen": -138.448974609375, + "logps/rejected": -287.5068359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.474243640899658, + "rewards/margins": 14.87516975402832, + "rewards/rejected": -21.349414825439453, + "step": 3728 + }, + { + "epoch": 5.99, + "learning_rate": 1.8608799048751485e-07, + "logits/chosen": -1.4551266431808472, + "logits/rejected": -1.5144580602645874, + "logps/chosen": -114.57350158691406, + "logps/rejected": -240.2957305908203, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.49277400970459, + "rewards/margins": 12.307703018188477, + "rewards/rejected": -16.80047607421875, + "step": 3729 + }, + { + "epoch": 5.99, + "learning_rate": 1.859889021006738e-07, + "logits/chosen": -1.5020544528961182, + "logits/rejected": -1.59376060962677, + "logps/chosen": -114.01325225830078, + "logps/rejected": -259.8193359375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.326720714569092, + "rewards/margins": 12.606695175170898, + "rewards/rejected": -16.93341636657715, + "step": 3730 + }, + { + "epoch": 5.99, + "learning_rate": 1.8588981371383272e-07, + "logits/chosen": -1.5785584449768066, + "logits/rejected": -1.6342432498931885, + "logps/chosen": -106.4214096069336, + "logps/rejected": -249.5433807373047, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.555849313735962, + "rewards/margins": 14.302323341369629, + "rewards/rejected": -17.858173370361328, + "step": 3731 + }, + { + "epoch": 5.99, + "learning_rate": 1.8579072532699168e-07, + "logits/chosen": -1.6108336448669434, + "logits/rejected": -1.5623013973236084, + "logps/chosen": -180.12229919433594, + "logps/rejected": -306.132080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.040301322937012, + "rewards/margins": 13.10683822631836, + "rewards/rejected": -22.147140502929688, + "step": 3732 + }, + { + "epoch": 5.99, + "learning_rate": 1.856916369401506e-07, + "logits/chosen": -1.4907872676849365, + "logits/rejected": -1.4656487703323364, + "logps/chosen": -159.18826293945312, + "logps/rejected": -247.97947692871094, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.089438438415527, + "rewards/margins": 9.549057006835938, + "rewards/rejected": -16.63849639892578, + "step": 3733 + }, + { + "epoch": 5.99, + "learning_rate": 1.8559254855330954e-07, + "logits/chosen": -1.4108601808547974, + "logits/rejected": -1.500807762145996, + "logps/chosen": -174.00119018554688, + "logps/rejected": -308.46392822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.421490669250488, + "rewards/margins": 12.244840621948242, + "rewards/rejected": -20.666330337524414, + "step": 3734 + }, + { + "epoch": 6.0, + "learning_rate": 1.8549346016646848e-07, + "logits/chosen": -1.6357122659683228, + "logits/rejected": -1.6525511741638184, + "logps/chosen": -130.7125701904297, + "logps/rejected": -286.3031005859375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.685513973236084, + "rewards/margins": 15.708707809448242, + "rewards/rejected": -20.394222259521484, + "step": 3735 + }, + { + "epoch": 6.0, + "learning_rate": 1.853943717796274e-07, + "logits/chosen": -1.631971836090088, + "logits/rejected": -1.62613046169281, + "logps/chosen": -99.45304107666016, + "logps/rejected": -230.4906005859375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7393112182617188, + "rewards/margins": 13.323470115661621, + "rewards/rejected": -16.062782287597656, + "step": 3736 + }, + { + "epoch": 6.0, + "learning_rate": 1.8529528339278634e-07, + "logits/chosen": -1.47713041305542, + "logits/rejected": -1.5451579093933105, + "logps/chosen": -101.16068267822266, + "logps/rejected": -283.6063232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.585237503051758, + "rewards/margins": 16.273935317993164, + "rewards/rejected": -19.859172821044922, + "step": 3737 + }, + { + "epoch": 6.0, + "learning_rate": 1.851961950059453e-07, + "logits/chosen": -1.330141544342041, + "logits/rejected": -1.283247709274292, + "logps/chosen": -111.42694854736328, + "logps/rejected": -242.72128295898438, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7566046714782715, + "rewards/margins": 12.802552223205566, + "rewards/rejected": -17.55915641784668, + "step": 3738 + }, + { + "epoch": 6.0, + "learning_rate": 1.8509710661910424e-07, + "logits/chosen": -1.5390558242797852, + "logits/rejected": -1.5887528657913208, + "logps/chosen": -117.5335693359375, + "logps/rejected": -225.11244201660156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.619745254516602, + "rewards/margins": 10.816580772399902, + "rewards/rejected": -15.436326026916504, + "step": 3739 + }, + { + "epoch": 6.0, + "learning_rate": 1.8499801823226317e-07, + "logits/chosen": -1.416367769241333, + "logits/rejected": -1.4028477668762207, + "logps/chosen": -173.96469116210938, + "logps/rejected": -277.3204040527344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.947136878967285, + "rewards/margins": 11.761013984680176, + "rewards/rejected": -19.708148956298828, + "step": 3740 + }, + { + "epoch": 6.0, + "learning_rate": 1.848989298454221e-07, + "logits/chosen": -1.531615138053894, + "logits/rejected": -1.5191782712936401, + "logps/chosen": -199.8966522216797, + "logps/rejected": -321.79681396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.083995819091797, + "rewards/margins": 11.639716148376465, + "rewards/rejected": -19.723711013793945, + "step": 3741 + }, + { + "epoch": 6.01, + "learning_rate": 1.8479984145858104e-07, + "logits/chosen": -1.3982784748077393, + "logits/rejected": -1.4397295713424683, + "logps/chosen": -174.62762451171875, + "logps/rejected": -293.93682861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.023435592651367, + "rewards/margins": 10.839275360107422, + "rewards/rejected": -18.862712860107422, + "step": 3742 + }, + { + "epoch": 6.01, + "learning_rate": 1.8470075307174e-07, + "logits/chosen": -1.5266352891921997, + "logits/rejected": -1.481297254562378, + "logps/chosen": -171.38177490234375, + "logps/rejected": -277.15167236328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.255051612854004, + "rewards/margins": 12.699668884277344, + "rewards/rejected": -19.95471954345703, + "step": 3743 + }, + { + "epoch": 6.01, + "learning_rate": 1.8460166468489893e-07, + "logits/chosen": -1.4513261318206787, + "logits/rejected": -1.3418900966644287, + "logps/chosen": -112.23099517822266, + "logps/rejected": -219.62103271484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6433634757995605, + "rewards/margins": 11.723655700683594, + "rewards/rejected": -16.367019653320312, + "step": 3744 + }, + { + "epoch": 6.01, + "learning_rate": 1.8450257629805784e-07, + "logits/chosen": -1.5302023887634277, + "logits/rejected": -1.5946837663650513, + "logps/chosen": -184.11492919921875, + "logps/rejected": -329.04974365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.555567741394043, + "rewards/margins": 13.98332405090332, + "rewards/rejected": -22.53889274597168, + "step": 3745 + }, + { + "epoch": 6.01, + "learning_rate": 1.844034879112168e-07, + "logits/chosen": -1.4342551231384277, + "logits/rejected": -1.43612539768219, + "logps/chosen": -146.6827392578125, + "logps/rejected": -276.5037841796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.963329315185547, + "rewards/margins": 13.246003150939941, + "rewards/rejected": -19.209333419799805, + "step": 3746 + }, + { + "epoch": 6.01, + "learning_rate": 1.8430439952437573e-07, + "logits/chosen": -1.627333402633667, + "logits/rejected": -1.6453521251678467, + "logps/chosen": -180.25430297851562, + "logps/rejected": -292.6415100097656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.190729141235352, + "rewards/margins": 12.59901237487793, + "rewards/rejected": -19.78974151611328, + "step": 3747 + }, + { + "epoch": 6.02, + "learning_rate": 1.842053111375347e-07, + "logits/chosen": -1.514379620552063, + "logits/rejected": -1.5775344371795654, + "logps/chosen": -159.56065368652344, + "logps/rejected": -275.2501525878906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.024033069610596, + "rewards/margins": 12.119817733764648, + "rewards/rejected": -18.14385223388672, + "step": 3748 + }, + { + "epoch": 6.02, + "learning_rate": 1.8410622275069362e-07, + "logits/chosen": -1.6145408153533936, + "logits/rejected": -1.6594067811965942, + "logps/chosen": -147.41680908203125, + "logps/rejected": -264.0226745605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.344820499420166, + "rewards/margins": 11.834607124328613, + "rewards/rejected": -18.179428100585938, + "step": 3749 + }, + { + "epoch": 6.02, + "learning_rate": 1.8400713436385253e-07, + "logits/chosen": -1.5392974615097046, + "logits/rejected": -1.4972853660583496, + "logps/chosen": -140.9880828857422, + "logps/rejected": -273.8529052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.352421283721924, + "rewards/margins": 13.943973541259766, + "rewards/rejected": -19.29639434814453, + "step": 3750 + }, + { + "epoch": 6.02, + "learning_rate": 1.839080459770115e-07, + "logits/chosen": -1.449488878250122, + "logits/rejected": -1.4488788843154907, + "logps/chosen": -138.74526977539062, + "logps/rejected": -281.5375061035156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.07131290435791, + "rewards/margins": 13.863448143005371, + "rewards/rejected": -19.93476104736328, + "step": 3751 + }, + { + "epoch": 6.02, + "learning_rate": 1.8380895759017042e-07, + "logits/chosen": -1.4665435552597046, + "logits/rejected": -1.4294648170471191, + "logps/chosen": -194.15631103515625, + "logps/rejected": -345.0496826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.602141380310059, + "rewards/margins": 14.402270317077637, + "rewards/rejected": -25.004413604736328, + "step": 3752 + }, + { + "epoch": 6.02, + "learning_rate": 1.8370986920332938e-07, + "logits/chosen": -1.4537912607192993, + "logits/rejected": -1.4255924224853516, + "logps/chosen": -143.8022003173828, + "logps/rejected": -306.625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.235991477966309, + "rewards/margins": 16.183080673217773, + "rewards/rejected": -22.4190731048584, + "step": 3753 + }, + { + "epoch": 6.03, + "learning_rate": 1.836107808164883e-07, + "logits/chosen": -1.4513514041900635, + "logits/rejected": -1.4844778776168823, + "logps/chosen": -201.3562469482422, + "logps/rejected": -301.7613525390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.75457763671875, + "rewards/margins": 10.705495834350586, + "rewards/rejected": -20.460073471069336, + "step": 3754 + }, + { + "epoch": 6.03, + "learning_rate": 1.8351169242964722e-07, + "logits/chosen": -1.5252188444137573, + "logits/rejected": -1.5071762800216675, + "logps/chosen": -102.25727844238281, + "logps/rejected": -226.20709228515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.541853904724121, + "rewards/margins": 11.987116813659668, + "rewards/rejected": -15.528970718383789, + "step": 3755 + }, + { + "epoch": 6.03, + "learning_rate": 1.8341260404280618e-07, + "logits/chosen": -1.4276235103607178, + "logits/rejected": -1.478003740310669, + "logps/chosen": -132.8735809326172, + "logps/rejected": -267.139892578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.851090431213379, + "rewards/margins": 12.239778518676758, + "rewards/rejected": -18.09086799621582, + "step": 3756 + }, + { + "epoch": 6.03, + "learning_rate": 1.8331351565596512e-07, + "logits/chosen": -1.4930830001831055, + "logits/rejected": -1.5063660144805908, + "logps/chosen": -183.04066467285156, + "logps/rejected": -371.712646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.156694412231445, + "rewards/margins": 19.094629287719727, + "rewards/rejected": -28.251323699951172, + "step": 3757 + }, + { + "epoch": 6.03, + "learning_rate": 1.8321442726912405e-07, + "logits/chosen": -1.3763052225112915, + "logits/rejected": -1.3766800165176392, + "logps/chosen": -143.50381469726562, + "logps/rejected": -308.6813049316406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9523091316223145, + "rewards/margins": 14.761186599731445, + "rewards/rejected": -20.713497161865234, + "step": 3758 + }, + { + "epoch": 6.03, + "learning_rate": 1.8311533888228298e-07, + "logits/chosen": -1.4609462022781372, + "logits/rejected": -1.3504738807678223, + "logps/chosen": -192.33236694335938, + "logps/rejected": -284.9546813964844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.8056058883667, + "rewards/margins": 11.831807136535645, + "rewards/rejected": -21.637413024902344, + "step": 3759 + }, + { + "epoch": 6.04, + "learning_rate": 1.8301625049544192e-07, + "logits/chosen": -1.4034686088562012, + "logits/rejected": -1.3783164024353027, + "logps/chosen": -142.7501220703125, + "logps/rejected": -271.149169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.015439987182617, + "rewards/margins": 13.045120239257812, + "rewards/rejected": -21.060558319091797, + "step": 3760 + }, + { + "epoch": 6.04, + "learning_rate": 1.8291716210860087e-07, + "logits/chosen": -1.4516793489456177, + "logits/rejected": -1.446109414100647, + "logps/chosen": -118.95420837402344, + "logps/rejected": -247.193359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4847025871276855, + "rewards/margins": 12.702291488647461, + "rewards/rejected": -17.186992645263672, + "step": 3761 + }, + { + "epoch": 6.04, + "learning_rate": 1.828180737217598e-07, + "logits/chosen": -1.489023208618164, + "logits/rejected": -1.478100061416626, + "logps/chosen": -172.7230682373047, + "logps/rejected": -331.7967529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.193109512329102, + "rewards/margins": 15.603076934814453, + "rewards/rejected": -23.796188354492188, + "step": 3762 + }, + { + "epoch": 6.04, + "learning_rate": 1.8271898533491874e-07, + "logits/chosen": -1.4038270711898804, + "logits/rejected": -1.3796181678771973, + "logps/chosen": -192.1802978515625, + "logps/rejected": -323.2943115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.976802825927734, + "rewards/margins": 13.481634140014648, + "rewards/rejected": -22.458438873291016, + "step": 3763 + }, + { + "epoch": 6.04, + "learning_rate": 1.8261989694807767e-07, + "logits/chosen": -1.3714425563812256, + "logits/rejected": -1.3603813648223877, + "logps/chosen": -186.00930786132812, + "logps/rejected": -314.2300109863281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.517593383789062, + "rewards/margins": 12.490150451660156, + "rewards/rejected": -23.00774383544922, + "step": 3764 + }, + { + "epoch": 6.04, + "learning_rate": 1.825208085612366e-07, + "logits/chosen": -1.4109392166137695, + "logits/rejected": -1.4348992109298706, + "logps/chosen": -164.87130737304688, + "logps/rejected": -298.5825500488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.826113700866699, + "rewards/margins": 11.549124717712402, + "rewards/rejected": -19.3752384185791, + "step": 3765 + }, + { + "epoch": 6.04, + "learning_rate": 1.8242172017439557e-07, + "logits/chosen": -1.4163470268249512, + "logits/rejected": -1.4462199211120605, + "logps/chosen": -134.70814514160156, + "logps/rejected": -297.186767578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.378488540649414, + "rewards/margins": 13.460439682006836, + "rewards/rejected": -19.838926315307617, + "step": 3766 + }, + { + "epoch": 6.05, + "learning_rate": 1.823226317875545e-07, + "logits/chosen": -1.3906621932983398, + "logits/rejected": -1.4663701057434082, + "logps/chosen": -124.69303894042969, + "logps/rejected": -259.7684326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.260793209075928, + "rewards/margins": 12.29456901550293, + "rewards/rejected": -17.555362701416016, + "step": 3767 + }, + { + "epoch": 6.05, + "learning_rate": 1.8222354340071343e-07, + "logits/chosen": -1.4665334224700928, + "logits/rejected": -1.4844856262207031, + "logps/chosen": -171.92459106445312, + "logps/rejected": -289.804931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.954721450805664, + "rewards/margins": 12.6197509765625, + "rewards/rejected": -20.574472427368164, + "step": 3768 + }, + { + "epoch": 6.05, + "learning_rate": 1.8212445501387237e-07, + "logits/chosen": -1.4873765707015991, + "logits/rejected": -1.5165445804595947, + "logps/chosen": -131.38307189941406, + "logps/rejected": -315.9477844238281, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.881532669067383, + "rewards/margins": 17.7265682220459, + "rewards/rejected": -22.60810089111328, + "step": 3769 + }, + { + "epoch": 6.05, + "learning_rate": 1.820253666270313e-07, + "logits/chosen": -1.3193511962890625, + "logits/rejected": -1.30250084400177, + "logps/chosen": -154.16888427734375, + "logps/rejected": -279.6081237792969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.555691719055176, + "rewards/margins": 14.110661506652832, + "rewards/rejected": -20.666353225708008, + "step": 3770 + }, + { + "epoch": 6.05, + "learning_rate": 1.8192627824019023e-07, + "logits/chosen": -1.4352095127105713, + "logits/rejected": -1.4608900547027588, + "logps/chosen": -222.8261260986328, + "logps/rejected": -380.8035583496094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.399138450622559, + "rewards/margins": 15.254426956176758, + "rewards/rejected": -26.653564453125, + "step": 3771 + }, + { + "epoch": 6.05, + "learning_rate": 1.818271898533492e-07, + "logits/chosen": -1.486783742904663, + "logits/rejected": -1.5051430463790894, + "logps/chosen": -181.663330078125, + "logps/rejected": -328.8226623535156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.14644718170166, + "rewards/margins": 15.112361907958984, + "rewards/rejected": -24.25881004333496, + "step": 3772 + }, + { + "epoch": 6.06, + "learning_rate": 1.817281014665081e-07, + "logits/chosen": -1.6808695793151855, + "logits/rejected": -1.7473810911178589, + "logps/chosen": -102.59202575683594, + "logps/rejected": -265.6480407714844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7721221446990967, + "rewards/margins": 17.1637020111084, + "rewards/rejected": -18.93582534790039, + "step": 3773 + }, + { + "epoch": 6.06, + "learning_rate": 1.8162901307966706e-07, + "logits/chosen": -1.567201852798462, + "logits/rejected": -1.5589690208435059, + "logps/chosen": -188.40087890625, + "logps/rejected": -312.92010498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.422811508178711, + "rewards/margins": 13.399160385131836, + "rewards/rejected": -21.821971893310547, + "step": 3774 + }, + { + "epoch": 6.06, + "learning_rate": 1.81529924692826e-07, + "logits/chosen": -1.5052729845046997, + "logits/rejected": -1.5610016584396362, + "logps/chosen": -131.29763793945312, + "logps/rejected": -264.26446533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0080084800720215, + "rewards/margins": 12.385486602783203, + "rewards/rejected": -18.393495559692383, + "step": 3775 + }, + { + "epoch": 6.06, + "learning_rate": 1.8143083630598493e-07, + "logits/chosen": -1.4382182359695435, + "logits/rejected": -1.4311517477035522, + "logps/chosen": -133.76930236816406, + "logps/rejected": -263.8323974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2685956954956055, + "rewards/margins": 12.965446472167969, + "rewards/rejected": -18.23404312133789, + "step": 3776 + }, + { + "epoch": 6.06, + "learning_rate": 1.813317479191439e-07, + "logits/chosen": -1.6239570379257202, + "logits/rejected": -1.516526460647583, + "logps/chosen": -118.94258117675781, + "logps/rejected": -227.5891571044922, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5296781063079834, + "rewards/margins": 13.924795150756836, + "rewards/rejected": -17.4544734954834, + "step": 3777 + }, + { + "epoch": 6.06, + "learning_rate": 1.812326595323028e-07, + "logits/chosen": -1.6953494548797607, + "logits/rejected": -1.6409553289413452, + "logps/chosen": -123.7352294921875, + "logps/rejected": -266.2259826660156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8071422576904297, + "rewards/margins": 14.900186538696289, + "rewards/rejected": -18.70732879638672, + "step": 3778 + }, + { + "epoch": 6.07, + "learning_rate": 1.8113357114546173e-07, + "logits/chosen": -1.4740943908691406, + "logits/rejected": -1.454007625579834, + "logps/chosen": -214.21633911132812, + "logps/rejected": -263.9981384277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.605491638183594, + "rewards/margins": 8.986871719360352, + "rewards/rejected": -18.592363357543945, + "step": 3779 + }, + { + "epoch": 6.07, + "learning_rate": 1.810344827586207e-07, + "logits/chosen": -1.4661208391189575, + "logits/rejected": -1.4076242446899414, + "logps/chosen": -134.00616455078125, + "logps/rejected": -237.57586669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.082403182983398, + "rewards/margins": 10.657125473022461, + "rewards/rejected": -15.73952865600586, + "step": 3780 + }, + { + "epoch": 6.07, + "learning_rate": 1.8093539437177962e-07, + "logits/chosen": -1.4575575590133667, + "logits/rejected": -1.4409706592559814, + "logps/chosen": -167.14247131347656, + "logps/rejected": -276.9107360839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.19277286529541, + "rewards/margins": 12.372063636779785, + "rewards/rejected": -19.564836502075195, + "step": 3781 + }, + { + "epoch": 6.07, + "learning_rate": 1.8083630598493858e-07, + "logits/chosen": -1.4474996328353882, + "logits/rejected": -1.465004801750183, + "logps/chosen": -124.71369171142578, + "logps/rejected": -230.5960693359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.554933547973633, + "rewards/margins": 10.66431999206543, + "rewards/rejected": -15.219253540039062, + "step": 3782 + }, + { + "epoch": 6.07, + "learning_rate": 1.807372175980975e-07, + "logits/chosen": -1.6190226078033447, + "logits/rejected": -1.642683982849121, + "logps/chosen": -140.6440887451172, + "logps/rejected": -270.41217041015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.868729591369629, + "rewards/margins": 12.8851900100708, + "rewards/rejected": -18.753921508789062, + "step": 3783 + }, + { + "epoch": 6.07, + "learning_rate": 1.8063812921125642e-07, + "logits/chosen": -1.631064534187317, + "logits/rejected": -1.549789309501648, + "logps/chosen": -160.88836669921875, + "logps/rejected": -257.59588623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.184457778930664, + "rewards/margins": 11.752429008483887, + "rewards/rejected": -18.936887741088867, + "step": 3784 + }, + { + "epoch": 6.08, + "learning_rate": 1.8053904082441538e-07, + "logits/chosen": -1.5913455486297607, + "logits/rejected": -1.6091259717941284, + "logps/chosen": -139.75454711914062, + "logps/rejected": -296.818359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.965237140655518, + "rewards/margins": 15.649551391601562, + "rewards/rejected": -20.614788055419922, + "step": 3785 + }, + { + "epoch": 6.08, + "learning_rate": 1.804399524375743e-07, + "logits/chosen": -1.7018628120422363, + "logits/rejected": -1.7536125183105469, + "logps/chosen": -125.75643920898438, + "logps/rejected": -266.48870849609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.68771505355835, + "rewards/margins": 13.164011001586914, + "rewards/rejected": -17.851726531982422, + "step": 3786 + }, + { + "epoch": 6.08, + "learning_rate": 1.8034086405073322e-07, + "logits/chosen": -1.6061551570892334, + "logits/rejected": -1.542169451713562, + "logps/chosen": -114.40037536621094, + "logps/rejected": -213.57174682617188, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.734830856323242, + "rewards/margins": 10.017226219177246, + "rewards/rejected": -14.752057075500488, + "step": 3787 + }, + { + "epoch": 6.08, + "learning_rate": 1.8024177566389218e-07, + "logits/chosen": -1.5183043479919434, + "logits/rejected": -1.526430368423462, + "logps/chosen": -138.40643310546875, + "logps/rejected": -249.5294189453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.439443588256836, + "rewards/margins": 10.88719367980957, + "rewards/rejected": -16.326637268066406, + "step": 3788 + }, + { + "epoch": 6.08, + "learning_rate": 1.801426872770511e-07, + "logits/chosen": -1.565469741821289, + "logits/rejected": -1.547842264175415, + "logps/chosen": -157.80914306640625, + "logps/rejected": -293.969970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.375960350036621, + "rewards/margins": 14.547316551208496, + "rewards/rejected": -19.923276901245117, + "step": 3789 + }, + { + "epoch": 6.08, + "learning_rate": 1.8004359889021007e-07, + "logits/chosen": -1.4169995784759521, + "logits/rejected": -1.4991260766983032, + "logps/chosen": -136.2613525390625, + "logps/rejected": -282.9382019042969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.45950984954834, + "rewards/margins": 12.52802848815918, + "rewards/rejected": -18.987539291381836, + "step": 3790 + }, + { + "epoch": 6.09, + "learning_rate": 1.79944510503369e-07, + "logits/chosen": -1.6300346851348877, + "logits/rejected": -1.5992354154586792, + "logps/chosen": -178.6389923095703, + "logps/rejected": -275.06304931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.132242202758789, + "rewards/margins": 12.213111877441406, + "rewards/rejected": -20.345354080200195, + "step": 3791 + }, + { + "epoch": 6.09, + "learning_rate": 1.798454221165279e-07, + "logits/chosen": -1.393763542175293, + "logits/rejected": -1.3956931829452515, + "logps/chosen": -129.41075134277344, + "logps/rejected": -238.068115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.418262958526611, + "rewards/margins": 10.327736854553223, + "rewards/rejected": -16.746000289916992, + "step": 3792 + }, + { + "epoch": 6.09, + "learning_rate": 1.7974633372968687e-07, + "logits/chosen": -1.34010648727417, + "logits/rejected": -1.3275750875473022, + "logps/chosen": -160.78604125976562, + "logps/rejected": -345.16265869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.877038478851318, + "rewards/margins": 15.498299598693848, + "rewards/rejected": -23.375335693359375, + "step": 3793 + }, + { + "epoch": 6.09, + "learning_rate": 1.796472453428458e-07, + "logits/chosen": -1.6587071418762207, + "logits/rejected": -1.628845453262329, + "logps/chosen": -134.6777801513672, + "logps/rejected": -306.2210998535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.81648588180542, + "rewards/margins": 17.395549774169922, + "rewards/rejected": -22.212034225463867, + "step": 3794 + }, + { + "epoch": 6.09, + "learning_rate": 1.7954815695600477e-07, + "logits/chosen": -1.4521899223327637, + "logits/rejected": -1.5008140802383423, + "logps/chosen": -157.00741577148438, + "logps/rejected": -260.797119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5496063232421875, + "rewards/margins": 9.704809188842773, + "rewards/rejected": -17.25441551208496, + "step": 3795 + }, + { + "epoch": 6.09, + "learning_rate": 1.794490685691637e-07, + "logits/chosen": -1.637709140777588, + "logits/rejected": -1.6971737146377563, + "logps/chosen": -124.56082153320312, + "logps/rejected": -297.3476257324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.865278244018555, + "rewards/margins": 14.160666465759277, + "rewards/rejected": -19.02594566345215, + "step": 3796 + }, + { + "epoch": 6.09, + "learning_rate": 1.793499801823226e-07, + "logits/chosen": -1.5442612171173096, + "logits/rejected": -1.5449774265289307, + "logps/chosen": -166.0533447265625, + "logps/rejected": -279.19281005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.236565589904785, + "rewards/margins": 12.470453262329102, + "rewards/rejected": -19.70701789855957, + "step": 3797 + }, + { + "epoch": 6.1, + "learning_rate": 1.7925089179548157e-07, + "logits/chosen": -1.595421314239502, + "logits/rejected": -1.4901849031448364, + "logps/chosen": -127.79541015625, + "logps/rejected": -301.0218505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.283175468444824, + "rewards/margins": 17.78931427001953, + "rewards/rejected": -22.072490692138672, + "step": 3798 + }, + { + "epoch": 6.1, + "learning_rate": 1.791518034086405e-07, + "logits/chosen": -1.565866470336914, + "logits/rejected": -1.6001019477844238, + "logps/chosen": -175.692626953125, + "logps/rejected": -291.1026611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.593716621398926, + "rewards/margins": 11.742859840393066, + "rewards/rejected": -21.336576461791992, + "step": 3799 + }, + { + "epoch": 6.1, + "learning_rate": 1.7905271502179943e-07, + "logits/chosen": -1.5721890926361084, + "logits/rejected": -1.567267656326294, + "logps/chosen": -129.5963592529297, + "logps/rejected": -238.68072509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.484330654144287, + "rewards/margins": 11.897600173950195, + "rewards/rejected": -17.381929397583008, + "step": 3800 + }, + { + "epoch": 6.1, + "learning_rate": 1.789536266349584e-07, + "logits/chosen": -1.536003828048706, + "logits/rejected": -1.5603069067001343, + "logps/chosen": -179.0333251953125, + "logps/rejected": -325.583251953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.69306468963623, + "rewards/margins": 13.500709533691406, + "rewards/rejected": -23.19377326965332, + "step": 3801 + }, + { + "epoch": 6.1, + "learning_rate": 1.788545382481173e-07, + "logits/chosen": -1.4298464059829712, + "logits/rejected": -1.49652099609375, + "logps/chosen": -157.45657348632812, + "logps/rejected": -278.4204406738281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.556663513183594, + "rewards/margins": 12.258509635925293, + "rewards/rejected": -19.815174102783203, + "step": 3802 + }, + { + "epoch": 6.1, + "learning_rate": 1.7875544986127626e-07, + "logits/chosen": -1.4853955507278442, + "logits/rejected": -1.476299524307251, + "logps/chosen": -166.4022979736328, + "logps/rejected": -293.12030029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.654758453369141, + "rewards/margins": 12.98419189453125, + "rewards/rejected": -20.63895034790039, + "step": 3803 + }, + { + "epoch": 6.11, + "learning_rate": 1.786563614744352e-07, + "logits/chosen": -1.247530221939087, + "logits/rejected": -1.2877860069274902, + "logps/chosen": -218.80303955078125, + "logps/rejected": -361.0220031738281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.733563423156738, + "rewards/margins": 12.186948776245117, + "rewards/rejected": -24.920513153076172, + "step": 3804 + }, + { + "epoch": 6.11, + "learning_rate": 1.7855727308759413e-07, + "logits/chosen": -1.4932072162628174, + "logits/rejected": -1.4531999826431274, + "logps/chosen": -172.45372009277344, + "logps/rejected": -285.5954284667969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.857060432434082, + "rewards/margins": 12.19033432006836, + "rewards/rejected": -20.047393798828125, + "step": 3805 + }, + { + "epoch": 6.11, + "learning_rate": 1.7845818470075306e-07, + "logits/chosen": -1.482393503189087, + "logits/rejected": -1.4890785217285156, + "logps/chosen": -171.34042358398438, + "logps/rejected": -306.6715393066406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.051907539367676, + "rewards/margins": 13.611867904663086, + "rewards/rejected": -21.663776397705078, + "step": 3806 + }, + { + "epoch": 6.11, + "learning_rate": 1.78359096313912e-07, + "logits/chosen": -1.423534870147705, + "logits/rejected": -1.3842170238494873, + "logps/chosen": -167.61651611328125, + "logps/rejected": -268.9778137207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.304862022399902, + "rewards/margins": 11.43362045288086, + "rewards/rejected": -18.738483428955078, + "step": 3807 + }, + { + "epoch": 6.11, + "learning_rate": 1.7826000792707093e-07, + "logits/chosen": -1.6707885265350342, + "logits/rejected": -1.70903742313385, + "logps/chosen": -123.28440856933594, + "logps/rejected": -261.91973876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9392216205596924, + "rewards/margins": 14.484914779663086, + "rewards/rejected": -18.424137115478516, + "step": 3808 + }, + { + "epoch": 6.11, + "learning_rate": 1.7816091954022988e-07, + "logits/chosen": -1.35280442237854, + "logits/rejected": -1.3350253105163574, + "logps/chosen": -186.35333251953125, + "logps/rejected": -282.67352294921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.204326152801514, + "rewards/margins": 11.649652481079102, + "rewards/rejected": -18.853979110717773, + "step": 3809 + }, + { + "epoch": 6.12, + "learning_rate": 1.7806183115338882e-07, + "logits/chosen": -1.4358875751495361, + "logits/rejected": -1.3967214822769165, + "logps/chosen": -162.30902099609375, + "logps/rejected": -274.95941162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.812281131744385, + "rewards/margins": 11.479654312133789, + "rewards/rejected": -19.291934967041016, + "step": 3810 + }, + { + "epoch": 6.12, + "learning_rate": 1.7796274276654775e-07, + "logits/chosen": -1.3384654521942139, + "logits/rejected": -1.3976777791976929, + "logps/chosen": -173.08551025390625, + "logps/rejected": -281.0780029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.944849491119385, + "rewards/margins": 9.928544044494629, + "rewards/rejected": -17.87339210510254, + "step": 3811 + }, + { + "epoch": 6.12, + "learning_rate": 1.7786365437970668e-07, + "logits/chosen": -1.444828987121582, + "logits/rejected": -1.5061047077178955, + "logps/chosen": -137.6788787841797, + "logps/rejected": -297.1807861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.707231521606445, + "rewards/margins": 14.911031723022461, + "rewards/rejected": -21.61826515197754, + "step": 3812 + }, + { + "epoch": 6.12, + "learning_rate": 1.7776456599286562e-07, + "logits/chosen": -1.6114637851715088, + "logits/rejected": -1.6083928346633911, + "logps/chosen": -150.7445068359375, + "logps/rejected": -300.4595947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.256232261657715, + "rewards/margins": 15.430974006652832, + "rewards/rejected": -23.687206268310547, + "step": 3813 + }, + { + "epoch": 6.12, + "learning_rate": 1.7766547760602458e-07, + "logits/chosen": -1.4876816272735596, + "logits/rejected": -1.3876979351043701, + "logps/chosen": -152.75875854492188, + "logps/rejected": -270.2669677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.008551120758057, + "rewards/margins": 12.534966468811035, + "rewards/rejected": -18.54351806640625, + "step": 3814 + }, + { + "epoch": 6.12, + "learning_rate": 1.775663892191835e-07, + "logits/chosen": -1.4808738231658936, + "logits/rejected": -1.575512170791626, + "logps/chosen": -154.97482299804688, + "logps/rejected": -297.31329345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.233509063720703, + "rewards/margins": 12.483366012573242, + "rewards/rejected": -19.716875076293945, + "step": 3815 + }, + { + "epoch": 6.13, + "learning_rate": 1.7746730083234244e-07, + "logits/chosen": -1.4333171844482422, + "logits/rejected": -1.3958699703216553, + "logps/chosen": -170.19345092773438, + "logps/rejected": -254.851806640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.281806945800781, + "rewards/margins": 10.081523895263672, + "rewards/rejected": -18.363330841064453, + "step": 3816 + }, + { + "epoch": 6.13, + "learning_rate": 1.7736821244550138e-07, + "logits/chosen": -1.5037004947662354, + "logits/rejected": -1.4921587705612183, + "logps/chosen": -172.55136108398438, + "logps/rejected": -278.90374755859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.565716743469238, + "rewards/margins": 11.258262634277344, + "rewards/rejected": -19.823978424072266, + "step": 3817 + }, + { + "epoch": 6.13, + "learning_rate": 1.772691240586603e-07, + "logits/chosen": -1.6077229976654053, + "logits/rejected": -1.565847635269165, + "logps/chosen": -123.74120330810547, + "logps/rejected": -240.32485961914062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.144407272338867, + "rewards/margins": 12.720708847045898, + "rewards/rejected": -17.865116119384766, + "step": 3818 + }, + { + "epoch": 6.13, + "learning_rate": 1.7717003567181927e-07, + "logits/chosen": -1.4853519201278687, + "logits/rejected": -1.4600698947906494, + "logps/chosen": -131.24649047851562, + "logps/rejected": -269.6234130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.49221658706665, + "rewards/margins": 12.163769721984863, + "rewards/rejected": -17.655986785888672, + "step": 3819 + }, + { + "epoch": 6.13, + "learning_rate": 1.770709472849782e-07, + "logits/chosen": -1.5571489334106445, + "logits/rejected": -1.5601773262023926, + "logps/chosen": -147.54766845703125, + "logps/rejected": -326.118408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4067535400390625, + "rewards/margins": 17.37704086303711, + "rewards/rejected": -24.783794403076172, + "step": 3820 + }, + { + "epoch": 6.13, + "learning_rate": 1.769718588981371e-07, + "logits/chosen": -1.619027853012085, + "logits/rejected": -1.6156718730926514, + "logps/chosen": -124.7366943359375, + "logps/rejected": -259.7955627441406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9635443687438965, + "rewards/margins": 14.071791648864746, + "rewards/rejected": -18.035335540771484, + "step": 3821 + }, + { + "epoch": 6.13, + "learning_rate": 1.7687277051129607e-07, + "logits/chosen": -1.4566301107406616, + "logits/rejected": -1.4034392833709717, + "logps/chosen": -154.65065002441406, + "logps/rejected": -242.01780700683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.770689964294434, + "rewards/margins": 9.75728988647461, + "rewards/rejected": -16.52798080444336, + "step": 3822 + }, + { + "epoch": 6.14, + "learning_rate": 1.76773682124455e-07, + "logits/chosen": -1.4636826515197754, + "logits/rejected": -1.5505468845367432, + "logps/chosen": -121.57402038574219, + "logps/rejected": -320.09912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0760722160339355, + "rewards/margins": 16.59515380859375, + "rewards/rejected": -22.671226501464844, + "step": 3823 + }, + { + "epoch": 6.14, + "learning_rate": 1.7667459373761396e-07, + "logits/chosen": -1.6819249391555786, + "logits/rejected": -1.6390478610992432, + "logps/chosen": -153.12631225585938, + "logps/rejected": -253.43814086914062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.84613037109375, + "rewards/margins": 11.02760124206543, + "rewards/rejected": -17.87373161315918, + "step": 3824 + }, + { + "epoch": 6.14, + "learning_rate": 1.7657550535077287e-07, + "logits/chosen": -1.5726685523986816, + "logits/rejected": -1.5228798389434814, + "logps/chosen": -170.64163208007812, + "logps/rejected": -267.82781982421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.40782356262207, + "rewards/margins": 12.538034439086914, + "rewards/rejected": -18.945858001708984, + "step": 3825 + }, + { + "epoch": 6.14, + "learning_rate": 1.764764169639318e-07, + "logits/chosen": -1.5259493589401245, + "logits/rejected": -1.5667859315872192, + "logps/chosen": -169.5206756591797, + "logps/rejected": -290.6390075683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.270910263061523, + "rewards/margins": 11.483455657958984, + "rewards/rejected": -20.75436782836914, + "step": 3826 + }, + { + "epoch": 6.14, + "learning_rate": 1.7637732857709076e-07, + "logits/chosen": -1.2963566780090332, + "logits/rejected": -1.3839125633239746, + "logps/chosen": -178.90089416503906, + "logps/rejected": -299.0060729980469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.238103866577148, + "rewards/margins": 11.606868743896484, + "rewards/rejected": -21.844970703125, + "step": 3827 + }, + { + "epoch": 6.14, + "learning_rate": 1.762782401902497e-07, + "logits/chosen": -1.5130980014801025, + "logits/rejected": -1.4904916286468506, + "logps/chosen": -120.67831420898438, + "logps/rejected": -322.59869384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3660995960235596, + "rewards/margins": 18.8270320892334, + "rewards/rejected": -22.193130493164062, + "step": 3828 + }, + { + "epoch": 6.15, + "learning_rate": 1.7617915180340863e-07, + "logits/chosen": -1.5276298522949219, + "logits/rejected": -1.4329999685287476, + "logps/chosen": -106.82864379882812, + "logps/rejected": -215.99310302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.975137948989868, + "rewards/margins": 12.048977851867676, + "rewards/rejected": -15.024116516113281, + "step": 3829 + }, + { + "epoch": 6.15, + "learning_rate": 1.7608006341656756e-07, + "logits/chosen": -1.5999598503112793, + "logits/rejected": -1.5721654891967773, + "logps/chosen": -191.3624267578125, + "logps/rejected": -279.52374267578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.868209838867188, + "rewards/margins": 10.693227767944336, + "rewards/rejected": -19.561439514160156, + "step": 3830 + }, + { + "epoch": 6.15, + "learning_rate": 1.759809750297265e-07, + "logits/chosen": -1.4275267124176025, + "logits/rejected": -1.4615769386291504, + "logps/chosen": -165.47213745117188, + "logps/rejected": -292.7216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.581279754638672, + "rewards/margins": 12.478967666625977, + "rewards/rejected": -21.06024742126465, + "step": 3831 + }, + { + "epoch": 6.15, + "learning_rate": 1.7588188664288546e-07, + "logits/chosen": -1.4849669933319092, + "logits/rejected": -1.504443645477295, + "logps/chosen": -128.66592407226562, + "logps/rejected": -311.8310852050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076352596282959, + "rewards/margins": 16.04985809326172, + "rewards/rejected": -22.126209259033203, + "step": 3832 + }, + { + "epoch": 6.15, + "learning_rate": 1.757827982560444e-07, + "logits/chosen": -1.3865934610366821, + "logits/rejected": -1.4249476194381714, + "logps/chosen": -121.48272705078125, + "logps/rejected": -279.328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.088095188140869, + "rewards/margins": 15.226882934570312, + "rewards/rejected": -21.314977645874023, + "step": 3833 + }, + { + "epoch": 6.15, + "learning_rate": 1.7568370986920332e-07, + "logits/chosen": -1.3261303901672363, + "logits/rejected": -1.4245729446411133, + "logps/chosen": -176.65623474121094, + "logps/rejected": -327.67803955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.250896453857422, + "rewards/margins": 13.696477890014648, + "rewards/rejected": -22.947372436523438, + "step": 3834 + }, + { + "epoch": 6.16, + "learning_rate": 1.7558462148236226e-07, + "logits/chosen": -1.3696670532226562, + "logits/rejected": -1.3856624364852905, + "logps/chosen": -152.82598876953125, + "logps/rejected": -300.609130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.825052738189697, + "rewards/margins": 13.269974708557129, + "rewards/rejected": -21.095027923583984, + "step": 3835 + }, + { + "epoch": 6.16, + "learning_rate": 1.754855330955212e-07, + "logits/chosen": -1.3844093084335327, + "logits/rejected": -1.4814257621765137, + "logps/chosen": -166.3373565673828, + "logps/rejected": -326.5030517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.192693710327148, + "rewards/margins": 12.815999031066895, + "rewards/rejected": -22.00869369506836, + "step": 3836 + }, + { + "epoch": 6.16, + "learning_rate": 1.7538644470868015e-07, + "logits/chosen": -1.4280104637145996, + "logits/rejected": -1.4718190431594849, + "logps/chosen": -113.91871643066406, + "logps/rejected": -299.0685729980469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.845567226409912, + "rewards/margins": 16.911596298217773, + "rewards/rejected": -21.757164001464844, + "step": 3837 + }, + { + "epoch": 6.16, + "learning_rate": 1.7528735632183908e-07, + "logits/chosen": -1.4824315309524536, + "logits/rejected": -1.5338945388793945, + "logps/chosen": -162.718994140625, + "logps/rejected": -291.10479736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8893866539001465, + "rewards/margins": 13.124232292175293, + "rewards/rejected": -21.01361846923828, + "step": 3838 + }, + { + "epoch": 6.16, + "learning_rate": 1.7518826793499802e-07, + "logits/chosen": -1.4497954845428467, + "logits/rejected": -1.522520661354065, + "logps/chosen": -120.57606506347656, + "logps/rejected": -287.828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.103967189788818, + "rewards/margins": 15.613201141357422, + "rewards/rejected": -21.7171688079834, + "step": 3839 + }, + { + "epoch": 6.16, + "learning_rate": 1.7508917954815695e-07, + "logits/chosen": -1.7610411643981934, + "logits/rejected": -1.661388874053955, + "logps/chosen": -115.68321228027344, + "logps/rejected": -240.237060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.477254629135132, + "rewards/margins": 14.764564514160156, + "rewards/rejected": -18.241819381713867, + "step": 3840 + }, + { + "epoch": 6.17, + "learning_rate": 1.7499009116131588e-07, + "logits/chosen": -1.6221367120742798, + "logits/rejected": -1.5923718214035034, + "logps/chosen": -161.3482666015625, + "logps/rejected": -290.1473388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.769899368286133, + "rewards/margins": 12.521329879760742, + "rewards/rejected": -17.291229248046875, + "step": 3841 + }, + { + "epoch": 6.17, + "learning_rate": 1.7489100277447482e-07, + "logits/chosen": -1.569892406463623, + "logits/rejected": -1.509198546409607, + "logps/chosen": -171.5223388671875, + "logps/rejected": -302.9300537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.834332466125488, + "rewards/margins": 13.204143524169922, + "rewards/rejected": -22.038475036621094, + "step": 3842 + }, + { + "epoch": 6.17, + "learning_rate": 1.7479191438763378e-07, + "logits/chosen": -1.4206053018569946, + "logits/rejected": -1.3526794910430908, + "logps/chosen": -166.36354064941406, + "logps/rejected": -270.6518859863281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2524919509887695, + "rewards/margins": 11.531538009643555, + "rewards/rejected": -18.78403091430664, + "step": 3843 + }, + { + "epoch": 6.17, + "learning_rate": 1.7469282600079268e-07, + "logits/chosen": -1.7332799434661865, + "logits/rejected": -1.626183032989502, + "logps/chosen": -157.85369873046875, + "logps/rejected": -270.10540771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.139922618865967, + "rewards/margins": 12.709182739257812, + "rewards/rejected": -19.849105834960938, + "step": 3844 + }, + { + "epoch": 6.17, + "learning_rate": 1.7459373761395164e-07, + "logits/chosen": -1.5969452857971191, + "logits/rejected": -1.657511830329895, + "logps/chosen": -77.7225570678711, + "logps/rejected": -218.53013610839844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.833327293395996, + "rewards/margins": 11.985757827758789, + "rewards/rejected": -14.819086074829102, + "step": 3845 + }, + { + "epoch": 6.17, + "learning_rate": 1.7449464922711058e-07, + "logits/chosen": -1.428108811378479, + "logits/rejected": -1.4289007186889648, + "logps/chosen": -128.5738525390625, + "logps/rejected": -279.18505859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6427388191223145, + "rewards/margins": 14.539169311523438, + "rewards/rejected": -21.181907653808594, + "step": 3846 + }, + { + "epoch": 6.17, + "learning_rate": 1.743955608402695e-07, + "logits/chosen": -1.3786137104034424, + "logits/rejected": -1.4462168216705322, + "logps/chosen": -150.94070434570312, + "logps/rejected": -269.56561279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0785064697265625, + "rewards/margins": 11.051803588867188, + "rewards/rejected": -16.13031005859375, + "step": 3847 + }, + { + "epoch": 6.18, + "learning_rate": 1.7429647245342847e-07, + "logits/chosen": -1.3843207359313965, + "logits/rejected": -1.4085087776184082, + "logps/chosen": -158.68716430664062, + "logps/rejected": -268.3040771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.329726219177246, + "rewards/margins": 11.531207084655762, + "rewards/rejected": -17.860933303833008, + "step": 3848 + }, + { + "epoch": 6.18, + "learning_rate": 1.7419738406658738e-07, + "logits/chosen": -1.519884467124939, + "logits/rejected": -1.5395328998565674, + "logps/chosen": -141.9871826171875, + "logps/rejected": -303.6024169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.050459861755371, + "rewards/margins": 15.11154556274414, + "rewards/rejected": -21.162006378173828, + "step": 3849 + }, + { + "epoch": 6.18, + "learning_rate": 1.740982956797463e-07, + "logits/chosen": -1.6215970516204834, + "logits/rejected": -1.629342794418335, + "logps/chosen": -132.2390594482422, + "logps/rejected": -319.824951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.919148921966553, + "rewards/margins": 18.122648239135742, + "rewards/rejected": -23.04179573059082, + "step": 3850 + }, + { + "epoch": 6.18, + "learning_rate": 1.7399920729290527e-07, + "logits/chosen": -1.3492869138717651, + "logits/rejected": -1.4705023765563965, + "logps/chosen": -113.3492431640625, + "logps/rejected": -282.7255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.74180269241333, + "rewards/margins": 14.137002944946289, + "rewards/rejected": -17.878807067871094, + "step": 3851 + }, + { + "epoch": 6.18, + "learning_rate": 1.739001189060642e-07, + "logits/chosen": -1.5944342613220215, + "logits/rejected": -1.5429421663284302, + "logps/chosen": -168.31809997558594, + "logps/rejected": -282.19879150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.379705429077148, + "rewards/margins": 13.792737007141113, + "rewards/rejected": -21.172443389892578, + "step": 3852 + }, + { + "epoch": 6.18, + "learning_rate": 1.7380103051922316e-07, + "logits/chosen": -1.5550401210784912, + "logits/rejected": -1.5876328945159912, + "logps/chosen": -154.95199584960938, + "logps/rejected": -270.5619201660156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.908344268798828, + "rewards/margins": 11.830986022949219, + "rewards/rejected": -19.739328384399414, + "step": 3853 + }, + { + "epoch": 6.19, + "learning_rate": 1.7370194213238207e-07, + "logits/chosen": -1.4398596286773682, + "logits/rejected": -1.4562010765075684, + "logps/chosen": -154.61407470703125, + "logps/rejected": -276.36370849609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.429623603820801, + "rewards/margins": 12.411519050598145, + "rewards/rejected": -19.841142654418945, + "step": 3854 + }, + { + "epoch": 6.19, + "learning_rate": 1.73602853745541e-07, + "logits/chosen": -1.593056321144104, + "logits/rejected": -1.6533660888671875, + "logps/chosen": -93.23754119873047, + "logps/rejected": -282.664794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.56699800491333, + "rewards/margins": 15.884660720825195, + "rewards/rejected": -18.451658248901367, + "step": 3855 + }, + { + "epoch": 6.19, + "learning_rate": 1.7350376535869996e-07, + "logits/chosen": -1.5050660371780396, + "logits/rejected": -1.4875307083129883, + "logps/chosen": -121.5550765991211, + "logps/rejected": -259.6245422363281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.520781517028809, + "rewards/margins": 12.151273727416992, + "rewards/rejected": -16.672054290771484, + "step": 3856 + }, + { + "epoch": 6.19, + "learning_rate": 1.734046769718589e-07, + "logits/chosen": -1.551447868347168, + "logits/rejected": -1.5426613092422485, + "logps/chosen": -138.09239196777344, + "logps/rejected": -255.3394012451172, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.444283485412598, + "rewards/margins": 12.174798011779785, + "rewards/rejected": -17.619081497192383, + "step": 3857 + }, + { + "epoch": 6.19, + "learning_rate": 1.733055885850178e-07, + "logits/chosen": -1.4740409851074219, + "logits/rejected": -1.5714890956878662, + "logps/chosen": -132.3528594970703, + "logps/rejected": -300.00994873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.720621109008789, + "rewards/margins": 13.848098754882812, + "rewards/rejected": -19.5687198638916, + "step": 3858 + }, + { + "epoch": 6.19, + "learning_rate": 1.7320650019817676e-07, + "logits/chosen": -1.5807878971099854, + "logits/rejected": -1.7224830389022827, + "logps/chosen": -126.27462768554688, + "logps/rejected": -300.0312805175781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.346314430236816, + "rewards/margins": 14.826430320739746, + "rewards/rejected": -20.172744750976562, + "step": 3859 + }, + { + "epoch": 6.2, + "learning_rate": 1.731074118113357e-07, + "logits/chosen": -1.4225494861602783, + "logits/rejected": -1.4204446077346802, + "logps/chosen": -153.34326171875, + "logps/rejected": -288.9552917480469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.690119743347168, + "rewards/margins": 13.606597900390625, + "rewards/rejected": -22.296716690063477, + "step": 3860 + }, + { + "epoch": 6.2, + "learning_rate": 1.7300832342449465e-07, + "logits/chosen": -1.3409863710403442, + "logits/rejected": -1.3816914558410645, + "logps/chosen": -154.36805725097656, + "logps/rejected": -302.51080322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967069625854492, + "rewards/margins": 15.24156379699707, + "rewards/rejected": -21.208633422851562, + "step": 3861 + }, + { + "epoch": 6.2, + "learning_rate": 1.729092350376536e-07, + "logits/chosen": -1.5778148174285889, + "logits/rejected": -1.6799161434173584, + "logps/chosen": -134.7712860107422, + "logps/rejected": -282.6299133300781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987733840942383, + "rewards/margins": 12.59692668914795, + "rewards/rejected": -18.584659576416016, + "step": 3862 + }, + { + "epoch": 6.2, + "learning_rate": 1.728101466508125e-07, + "logits/chosen": -1.4984519481658936, + "logits/rejected": -1.533626675605774, + "logps/chosen": -170.5453643798828, + "logps/rejected": -349.5455322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.560235023498535, + "rewards/margins": 16.893150329589844, + "rewards/rejected": -25.453386306762695, + "step": 3863 + }, + { + "epoch": 6.2, + "learning_rate": 1.7271105826397145e-07, + "logits/chosen": -1.533313512802124, + "logits/rejected": -1.5506547689437866, + "logps/chosen": -145.4617156982422, + "logps/rejected": -272.6354064941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.333415985107422, + "rewards/margins": 13.494709014892578, + "rewards/rejected": -19.828123092651367, + "step": 3864 + }, + { + "epoch": 6.2, + "learning_rate": 1.726119698771304e-07, + "logits/chosen": -1.4255099296569824, + "logits/rejected": -1.4249063730239868, + "logps/chosen": -164.3316192626953, + "logps/rejected": -298.12640380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.051913261413574, + "rewards/margins": 11.631453514099121, + "rewards/rejected": -19.683364868164062, + "step": 3865 + }, + { + "epoch": 6.21, + "learning_rate": 1.7251288149028935e-07, + "logits/chosen": -1.6056209802627563, + "logits/rejected": -1.5921669006347656, + "logps/chosen": -166.289306640625, + "logps/rejected": -338.6816101074219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.280487537384033, + "rewards/margins": 18.513002395629883, + "rewards/rejected": -24.79349136352539, + "step": 3866 + }, + { + "epoch": 6.21, + "learning_rate": 1.7241379310344828e-07, + "logits/chosen": -1.4909693002700806, + "logits/rejected": -1.5521025657653809, + "logps/chosen": -138.48324584960938, + "logps/rejected": -320.2838439941406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.431232929229736, + "rewards/margins": 15.430791854858398, + "rewards/rejected": -20.862024307250977, + "step": 3867 + }, + { + "epoch": 6.21, + "learning_rate": 1.723147047166072e-07, + "logits/chosen": -1.3606196641921997, + "logits/rejected": -1.3225680589675903, + "logps/chosen": -129.82752990722656, + "logps/rejected": -262.2251281738281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.352655410766602, + "rewards/margins": 12.738580703735352, + "rewards/rejected": -18.091236114501953, + "step": 3868 + }, + { + "epoch": 6.21, + "learning_rate": 1.7221561632976615e-07, + "logits/chosen": -1.5962382555007935, + "logits/rejected": -1.5871429443359375, + "logps/chosen": -185.88157653808594, + "logps/rejected": -323.19512939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.974701881408691, + "rewards/margins": 13.709521293640137, + "rewards/rejected": -23.684221267700195, + "step": 3869 + }, + { + "epoch": 6.21, + "learning_rate": 1.7211652794292508e-07, + "logits/chosen": -1.541239619255066, + "logits/rejected": -1.604701280593872, + "logps/chosen": -128.6729736328125, + "logps/rejected": -254.98373413085938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.761159420013428, + "rewards/margins": 12.612956047058105, + "rewards/rejected": -17.374114990234375, + "step": 3870 + }, + { + "epoch": 6.21, + "learning_rate": 1.7201743955608401e-07, + "logits/chosen": -1.4594465494155884, + "logits/rejected": -1.4292292594909668, + "logps/chosen": -167.54849243164062, + "logps/rejected": -311.68603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.87504768371582, + "rewards/margins": 13.862178802490234, + "rewards/rejected": -21.737226486206055, + "step": 3871 + }, + { + "epoch": 6.22, + "learning_rate": 1.7191835116924297e-07, + "logits/chosen": -1.5023512840270996, + "logits/rejected": -1.4889732599258423, + "logps/chosen": -201.88916015625, + "logps/rejected": -302.737060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.768281936645508, + "rewards/margins": 11.471170425415039, + "rewards/rejected": -20.239452362060547, + "step": 3872 + }, + { + "epoch": 6.22, + "learning_rate": 1.7181926278240188e-07, + "logits/chosen": -1.4305771589279175, + "logits/rejected": -1.4483516216278076, + "logps/chosen": -173.68829345703125, + "logps/rejected": -284.6599426269531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.438737869262695, + "rewards/margins": 10.638248443603516, + "rewards/rejected": -20.07698631286621, + "step": 3873 + }, + { + "epoch": 6.22, + "learning_rate": 1.7172017439556084e-07, + "logits/chosen": -1.4120426177978516, + "logits/rejected": -1.456087350845337, + "logps/chosen": -190.09632873535156, + "logps/rejected": -312.9935302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.297791481018066, + "rewards/margins": 13.136938095092773, + "rewards/rejected": -21.434730529785156, + "step": 3874 + }, + { + "epoch": 6.22, + "learning_rate": 1.7162108600871977e-07, + "logits/chosen": -1.5439200401306152, + "logits/rejected": -1.552311897277832, + "logps/chosen": -135.64334106445312, + "logps/rejected": -289.7987365722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.463281631469727, + "rewards/margins": 13.073899269104004, + "rewards/rejected": -18.537181854248047, + "step": 3875 + }, + { + "epoch": 6.22, + "learning_rate": 1.715219976218787e-07, + "logits/chosen": -1.4085543155670166, + "logits/rejected": -1.4225096702575684, + "logps/chosen": -185.99681091308594, + "logps/rejected": -295.7558288574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.702444076538086, + "rewards/margins": 12.530111312866211, + "rewards/rejected": -21.232555389404297, + "step": 3876 + }, + { + "epoch": 6.22, + "learning_rate": 1.7142290923503764e-07, + "logits/chosen": -1.4050679206848145, + "logits/rejected": -1.3458149433135986, + "logps/chosen": -142.16769409179688, + "logps/rejected": -274.2831115722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.059338092803955, + "rewards/margins": 13.936020851135254, + "rewards/rejected": -19.995359420776367, + "step": 3877 + }, + { + "epoch": 6.22, + "learning_rate": 1.7132382084819657e-07, + "logits/chosen": -1.5970219373703003, + "logits/rejected": -1.5252021551132202, + "logps/chosen": -166.8612060546875, + "logps/rejected": -263.92755126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.00337028503418, + "rewards/margins": 9.398751258850098, + "rewards/rejected": -17.402122497558594, + "step": 3878 + }, + { + "epoch": 6.23, + "learning_rate": 1.712247324613555e-07, + "logits/chosen": -1.6325063705444336, + "logits/rejected": -1.5764509439468384, + "logps/chosen": -189.055908203125, + "logps/rejected": -307.99609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.103551864624023, + "rewards/margins": 12.151839256286621, + "rewards/rejected": -21.25539207458496, + "step": 3879 + }, + { + "epoch": 6.23, + "learning_rate": 1.7112564407451447e-07, + "logits/chosen": -1.4125529527664185, + "logits/rejected": -1.434945821762085, + "logps/chosen": -159.01956176757812, + "logps/rejected": -320.3140869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.397619724273682, + "rewards/margins": 15.68539047241211, + "rewards/rejected": -23.083011627197266, + "step": 3880 + }, + { + "epoch": 6.23, + "learning_rate": 1.710265556876734e-07, + "logits/chosen": -1.524092674255371, + "logits/rejected": -1.5119178295135498, + "logps/chosen": -145.8486785888672, + "logps/rejected": -270.34722900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.972131729125977, + "rewards/margins": 13.597867012023926, + "rewards/rejected": -19.56999969482422, + "step": 3881 + }, + { + "epoch": 6.23, + "learning_rate": 1.7092746730083233e-07, + "logits/chosen": -1.3804686069488525, + "logits/rejected": -1.5149834156036377, + "logps/chosen": -129.577880859375, + "logps/rejected": -311.12353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.484848976135254, + "rewards/margins": 16.10519790649414, + "rewards/rejected": -21.59004783630371, + "step": 3882 + }, + { + "epoch": 6.23, + "learning_rate": 1.7082837891399127e-07, + "logits/chosen": -1.5789371728897095, + "logits/rejected": -1.486254334449768, + "logps/chosen": -185.633056640625, + "logps/rejected": -255.0836181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.563197135925293, + "rewards/margins": 10.21501350402832, + "rewards/rejected": -17.77821159362793, + "step": 3883 + }, + { + "epoch": 6.23, + "learning_rate": 1.707292905271502e-07, + "logits/chosen": -1.425544023513794, + "logits/rejected": -1.48406183719635, + "logps/chosen": -189.84178161621094, + "logps/rejected": -351.216552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.790539741516113, + "rewards/margins": 14.423883438110352, + "rewards/rejected": -25.21442222595215, + "step": 3884 + }, + { + "epoch": 6.24, + "learning_rate": 1.7063020214030916e-07, + "logits/chosen": -1.458839774131775, + "logits/rejected": -1.460540533065796, + "logps/chosen": -163.9482421875, + "logps/rejected": -285.7064208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.216795921325684, + "rewards/margins": 12.228906631469727, + "rewards/rejected": -19.445703506469727, + "step": 3885 + }, + { + "epoch": 6.24, + "learning_rate": 1.705311137534681e-07, + "logits/chosen": -1.6150254011154175, + "logits/rejected": -1.6892033815383911, + "logps/chosen": -123.8370361328125, + "logps/rejected": -274.9578857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.753222465515137, + "rewards/margins": 12.837717056274414, + "rewards/rejected": -18.590938568115234, + "step": 3886 + }, + { + "epoch": 6.24, + "learning_rate": 1.7043202536662703e-07, + "logits/chosen": -1.391291856765747, + "logits/rejected": -1.3539625406265259, + "logps/chosen": -127.42044830322266, + "logps/rejected": -272.901123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.314912796020508, + "rewards/margins": 12.438775062561035, + "rewards/rejected": -17.75368881225586, + "step": 3887 + }, + { + "epoch": 6.24, + "learning_rate": 1.7033293697978596e-07, + "logits/chosen": -1.5950490236282349, + "logits/rejected": -1.594017505645752, + "logps/chosen": -170.873291015625, + "logps/rejected": -333.9078063964844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.738178253173828, + "rewards/margins": 16.04368019104004, + "rewards/rejected": -23.781858444213867, + "step": 3888 + }, + { + "epoch": 6.24, + "learning_rate": 1.702338485929449e-07, + "logits/chosen": -1.5385055541992188, + "logits/rejected": -1.5058715343475342, + "logps/chosen": -127.00202178955078, + "logps/rejected": -270.83782958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5927042961120605, + "rewards/margins": 14.673528671264648, + "rewards/rejected": -21.266233444213867, + "step": 3889 + }, + { + "epoch": 6.24, + "learning_rate": 1.7013476020610385e-07, + "logits/chosen": -1.736159324645996, + "logits/rejected": -1.6818408966064453, + "logps/chosen": -115.66167449951172, + "logps/rejected": -251.61285400390625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6863386631011963, + "rewards/margins": 15.092650413513184, + "rewards/rejected": -17.778989791870117, + "step": 3890 + }, + { + "epoch": 6.25, + "learning_rate": 1.7003567181926279e-07, + "logits/chosen": -1.5683491230010986, + "logits/rejected": -1.4765475988388062, + "logps/chosen": -140.60000610351562, + "logps/rejected": -256.1197814941406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.914010047912598, + "rewards/margins": 13.672219276428223, + "rewards/rejected": -19.58622932434082, + "step": 3891 + }, + { + "epoch": 6.25, + "learning_rate": 1.699365834324217e-07, + "logits/chosen": -1.3702197074890137, + "logits/rejected": -1.3703851699829102, + "logps/chosen": -139.6952362060547, + "logps/rejected": -263.6590881347656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.659933090209961, + "rewards/margins": 13.055978775024414, + "rewards/rejected": -17.715911865234375, + "step": 3892 + }, + { + "epoch": 6.25, + "learning_rate": 1.6983749504558065e-07, + "logits/chosen": -1.6215869188308716, + "logits/rejected": -1.663212776184082, + "logps/chosen": -133.451171875, + "logps/rejected": -266.4822998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3023176193237305, + "rewards/margins": 11.696677207946777, + "rewards/rejected": -17.998992919921875, + "step": 3893 + }, + { + "epoch": 6.25, + "learning_rate": 1.6973840665873959e-07, + "logits/chosen": -1.4693995714187622, + "logits/rejected": -1.4857287406921387, + "logps/chosen": -118.60004425048828, + "logps/rejected": -237.61465454101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.585785388946533, + "rewards/margins": 11.622251510620117, + "rewards/rejected": -16.208036422729492, + "step": 3894 + }, + { + "epoch": 6.25, + "learning_rate": 1.6963931827189855e-07, + "logits/chosen": -1.48971426486969, + "logits/rejected": -1.6138503551483154, + "logps/chosen": -170.00245666503906, + "logps/rejected": -338.73504638671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.606276988983154, + "rewards/margins": 13.955498695373535, + "rewards/rejected": -21.56177520751953, + "step": 3895 + }, + { + "epoch": 6.25, + "learning_rate": 1.6954022988505745e-07, + "logits/chosen": -1.476684808731079, + "logits/rejected": -1.6425307989120483, + "logps/chosen": -91.8194808959961, + "logps/rejected": -278.4526062011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8845624923706055, + "rewards/margins": 14.39467716217041, + "rewards/rejected": -18.279239654541016, + "step": 3896 + }, + { + "epoch": 6.26, + "learning_rate": 1.6944114149821639e-07, + "logits/chosen": -1.4746347665786743, + "logits/rejected": -1.5137865543365479, + "logps/chosen": -89.16888427734375, + "logps/rejected": -217.9373016357422, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4629018306732178, + "rewards/margins": 11.086264610290527, + "rewards/rejected": -14.549165725708008, + "step": 3897 + }, + { + "epoch": 6.26, + "learning_rate": 1.6934205311137535e-07, + "logits/chosen": -1.2870584726333618, + "logits/rejected": -1.3494127988815308, + "logps/chosen": -130.896484375, + "logps/rejected": -246.9204559326172, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.605616569519043, + "rewards/margins": 11.094234466552734, + "rewards/rejected": -16.699851989746094, + "step": 3898 + }, + { + "epoch": 6.26, + "learning_rate": 1.6924296472453428e-07, + "logits/chosen": -1.4696335792541504, + "logits/rejected": -1.5489413738250732, + "logps/chosen": -209.91419982910156, + "logps/rejected": -321.0644836425781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.179826736450195, + "rewards/margins": 11.845887184143066, + "rewards/rejected": -22.025712966918945, + "step": 3899 + }, + { + "epoch": 6.26, + "learning_rate": 1.6914387633769324e-07, + "logits/chosen": -1.456642746925354, + "logits/rejected": -1.4238017797470093, + "logps/chosen": -225.08555603027344, + "logps/rejected": -308.24456787109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.017528533935547, + "rewards/margins": 10.307282447814941, + "rewards/rejected": -22.324811935424805, + "step": 3900 + }, + { + "epoch": 6.26, + "learning_rate": 1.6904478795085215e-07, + "logits/chosen": -1.25442373752594, + "logits/rejected": -1.2574689388275146, + "logps/chosen": -132.80120849609375, + "logps/rejected": -235.2710723876953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.650886535644531, + "rewards/margins": 10.778369903564453, + "rewards/rejected": -15.429256439208984, + "step": 3901 + }, + { + "epoch": 6.26, + "learning_rate": 1.6894569956401108e-07, + "logits/chosen": -1.5512027740478516, + "logits/rejected": -1.565185785293579, + "logps/chosen": -134.906982421875, + "logps/rejected": -309.6687316894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.782958030700684, + "rewards/margins": 17.268020629882812, + "rewards/rejected": -22.050979614257812, + "step": 3902 + }, + { + "epoch": 6.26, + "learning_rate": 1.6884661117717004e-07, + "logits/chosen": -1.3824589252471924, + "logits/rejected": -1.445134162902832, + "logps/chosen": -129.37356567382812, + "logps/rejected": -245.88743591308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.811347007751465, + "rewards/margins": 10.565105438232422, + "rewards/rejected": -17.376453399658203, + "step": 3903 + }, + { + "epoch": 6.27, + "learning_rate": 1.6874752279032897e-07, + "logits/chosen": -1.5099104642868042, + "logits/rejected": -1.5295369625091553, + "logps/chosen": -184.1702423095703, + "logps/rejected": -344.0736083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.89108657836914, + "rewards/margins": 15.398740768432617, + "rewards/rejected": -24.289825439453125, + "step": 3904 + }, + { + "epoch": 6.27, + "learning_rate": 1.686484344034879e-07, + "logits/chosen": -1.4581186771392822, + "logits/rejected": -1.42510187625885, + "logps/chosen": -186.78285217285156, + "logps/rejected": -342.8776550292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.103338241577148, + "rewards/margins": 15.993819236755371, + "rewards/rejected": -25.097156524658203, + "step": 3905 + }, + { + "epoch": 6.27, + "learning_rate": 1.6854934601664684e-07, + "logits/chosen": -1.7315924167633057, + "logits/rejected": -1.5733197927474976, + "logps/chosen": -116.51252746582031, + "logps/rejected": -276.1841735839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.995081663131714, + "rewards/margins": 18.008350372314453, + "rewards/rejected": -21.003433227539062, + "step": 3906 + }, + { + "epoch": 6.27, + "learning_rate": 1.6845025762980577e-07, + "logits/chosen": -1.6040890216827393, + "logits/rejected": -1.6402450799942017, + "logps/chosen": -124.61642456054688, + "logps/rejected": -312.6434326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101325511932373, + "rewards/margins": 16.236629486083984, + "rewards/rejected": -22.337955474853516, + "step": 3907 + }, + { + "epoch": 6.27, + "learning_rate": 1.6835116924296473e-07, + "logits/chosen": -1.4962958097457886, + "logits/rejected": -1.4931763410568237, + "logps/chosen": -158.1457061767578, + "logps/rejected": -273.2548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5857343673706055, + "rewards/margins": 11.426156044006348, + "rewards/rejected": -18.011890411376953, + "step": 3908 + }, + { + "epoch": 6.27, + "learning_rate": 1.6825208085612366e-07, + "logits/chosen": -1.490328073501587, + "logits/rejected": -1.5195735692977905, + "logps/chosen": -167.98883056640625, + "logps/rejected": -320.948974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.061930656433105, + "rewards/margins": 14.700002670288086, + "rewards/rejected": -22.761932373046875, + "step": 3909 + }, + { + "epoch": 6.28, + "learning_rate": 1.681529924692826e-07, + "logits/chosen": -1.3981870412826538, + "logits/rejected": -1.430159568786621, + "logps/chosen": -161.3581085205078, + "logps/rejected": -314.7900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.326447486877441, + "rewards/margins": 13.712295532226562, + "rewards/rejected": -22.038742065429688, + "step": 3910 + }, + { + "epoch": 6.28, + "learning_rate": 1.6805390408244153e-07, + "logits/chosen": -1.4249391555786133, + "logits/rejected": -1.4937368631362915, + "logps/chosen": -159.84837341308594, + "logps/rejected": -299.9945373535156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.913923740386963, + "rewards/margins": 12.096325874328613, + "rewards/rejected": -20.010250091552734, + "step": 3911 + }, + { + "epoch": 6.28, + "learning_rate": 1.6795481569560046e-07, + "logits/chosen": -1.6382650136947632, + "logits/rejected": -1.5445024967193604, + "logps/chosen": -143.21714782714844, + "logps/rejected": -254.938232421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.424627304077148, + "rewards/margins": 13.045330047607422, + "rewards/rejected": -19.46995735168457, + "step": 3912 + }, + { + "epoch": 6.28, + "learning_rate": 1.678557273087594e-07, + "logits/chosen": -1.5665897130966187, + "logits/rejected": -1.5277615785598755, + "logps/chosen": -159.82220458984375, + "logps/rejected": -305.2598876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5474982261657715, + "rewards/margins": 14.1263427734375, + "rewards/rejected": -20.67384147644043, + "step": 3913 + }, + { + "epoch": 6.28, + "learning_rate": 1.6775663892191836e-07, + "logits/chosen": -1.4943060874938965, + "logits/rejected": -1.5069241523742676, + "logps/chosen": -152.5938262939453, + "logps/rejected": -310.50787353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.281498908996582, + "rewards/margins": 15.097932815551758, + "rewards/rejected": -23.379432678222656, + "step": 3914 + }, + { + "epoch": 6.28, + "learning_rate": 1.6765755053507726e-07, + "logits/chosen": -1.4406414031982422, + "logits/rejected": -1.4236328601837158, + "logps/chosen": -184.52505493164062, + "logps/rejected": -298.0910339355469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.425711631774902, + "rewards/margins": 12.35720443725586, + "rewards/rejected": -22.782917022705078, + "step": 3915 + }, + { + "epoch": 6.29, + "learning_rate": 1.6755846214823622e-07, + "logits/chosen": -1.3292018175125122, + "logits/rejected": -1.3749730587005615, + "logps/chosen": -123.0317611694336, + "logps/rejected": -237.62698364257812, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.623045921325684, + "rewards/margins": 11.66877555847168, + "rewards/rejected": -17.291820526123047, + "step": 3916 + }, + { + "epoch": 6.29, + "learning_rate": 1.6745937376139516e-07, + "logits/chosen": -1.6437163352966309, + "logits/rejected": -1.5841041803359985, + "logps/chosen": -143.4715118408203, + "logps/rejected": -281.08355712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.558477401733398, + "rewards/margins": 14.997993469238281, + "rewards/rejected": -19.55647087097168, + "step": 3917 + }, + { + "epoch": 6.29, + "learning_rate": 1.673602853745541e-07, + "logits/chosen": -1.5948247909545898, + "logits/rejected": -1.5530383586883545, + "logps/chosen": -180.7176055908203, + "logps/rejected": -302.14093017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7121453285217285, + "rewards/margins": 12.731134414672852, + "rewards/rejected": -20.443279266357422, + "step": 3918 + }, + { + "epoch": 6.29, + "learning_rate": 1.6726119698771305e-07, + "logits/chosen": -1.4951684474945068, + "logits/rejected": -1.5472091436386108, + "logps/chosen": -175.42410278320312, + "logps/rejected": -320.24981689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.012097358703613, + "rewards/margins": 14.288057327270508, + "rewards/rejected": -23.300155639648438, + "step": 3919 + }, + { + "epoch": 6.29, + "learning_rate": 1.6716210860087196e-07, + "logits/chosen": -1.5128087997436523, + "logits/rejected": -1.5726499557495117, + "logps/chosen": -167.81069946289062, + "logps/rejected": -287.0572204589844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.458122253417969, + "rewards/margins": 12.17203426361084, + "rewards/rejected": -19.630157470703125, + "step": 3920 + }, + { + "epoch": 6.29, + "learning_rate": 1.670630202140309e-07, + "logits/chosen": -1.329018473625183, + "logits/rejected": -1.4045355319976807, + "logps/chosen": -153.38424682617188, + "logps/rejected": -323.7459716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.477097511291504, + "rewards/margins": 16.50042724609375, + "rewards/rejected": -24.97752571105957, + "step": 3921 + }, + { + "epoch": 6.3, + "learning_rate": 1.6696393182718985e-07, + "logits/chosen": -1.5834708213806152, + "logits/rejected": -1.5859671831130981, + "logps/chosen": -145.66639709472656, + "logps/rejected": -271.28167724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.670893669128418, + "rewards/margins": 12.54137134552002, + "rewards/rejected": -19.212265014648438, + "step": 3922 + }, + { + "epoch": 6.3, + "learning_rate": 1.6686484344034878e-07, + "logits/chosen": -1.6217231750488281, + "logits/rejected": -1.6505992412567139, + "logps/chosen": -110.48384094238281, + "logps/rejected": -298.868408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.155902624130249, + "rewards/margins": 18.67673110961914, + "rewards/rejected": -21.832632064819336, + "step": 3923 + }, + { + "epoch": 6.3, + "learning_rate": 1.6676575505350774e-07, + "logits/chosen": -1.5178585052490234, + "logits/rejected": -1.5125021934509277, + "logps/chosen": -121.4353256225586, + "logps/rejected": -232.28118896484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.116846561431885, + "rewards/margins": 11.216753005981445, + "rewards/rejected": -16.333600997924805, + "step": 3924 + }, + { + "epoch": 6.3, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -1.5145236253738403, + "logits/rejected": -1.5426770448684692, + "logps/chosen": -135.06344604492188, + "logps/rejected": -297.96112060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.359985828399658, + "rewards/margins": 16.958215713500977, + "rewards/rejected": -22.31820297241211, + "step": 3925 + }, + { + "epoch": 6.3, + "learning_rate": 1.6656757827982558e-07, + "logits/chosen": -1.436886191368103, + "logits/rejected": -1.4327298402786255, + "logps/chosen": -178.8482208251953, + "logps/rejected": -289.58514404296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.92324447631836, + "rewards/margins": 11.526580810546875, + "rewards/rejected": -20.449825286865234, + "step": 3926 + }, + { + "epoch": 6.3, + "learning_rate": 1.6646848989298454e-07, + "logits/chosen": -1.497796654701233, + "logits/rejected": -1.4908064603805542, + "logps/chosen": -131.18984985351562, + "logps/rejected": -303.9706115722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9991021156311035, + "rewards/margins": 18.290143966674805, + "rewards/rejected": -22.289247512817383, + "step": 3927 + }, + { + "epoch": 6.3, + "learning_rate": 1.6636940150614348e-07, + "logits/chosen": -1.45273756980896, + "logits/rejected": -1.4951268434524536, + "logps/chosen": -150.1935577392578, + "logps/rejected": -305.368408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.11789608001709, + "rewards/margins": 14.827159881591797, + "rewards/rejected": -20.945056915283203, + "step": 3928 + }, + { + "epoch": 6.31, + "learning_rate": 1.6627031311930238e-07, + "logits/chosen": -1.380833387374878, + "logits/rejected": -1.5213100910186768, + "logps/chosen": -174.23814392089844, + "logps/rejected": -327.9368896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.808857917785645, + "rewards/margins": 13.467377662658691, + "rewards/rejected": -22.276235580444336, + "step": 3929 + }, + { + "epoch": 6.31, + "learning_rate": 1.6617122473246134e-07, + "logits/chosen": -1.6637816429138184, + "logits/rejected": -1.7057750225067139, + "logps/chosen": -122.02420043945312, + "logps/rejected": -287.966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301971912384033, + "rewards/margins": 15.0834321975708, + "rewards/rejected": -19.385404586791992, + "step": 3930 + }, + { + "epoch": 6.31, + "learning_rate": 1.6607213634562028e-07, + "logits/chosen": -1.4318759441375732, + "logits/rejected": -1.402531385421753, + "logps/chosen": -158.71206665039062, + "logps/rejected": -289.5340270996094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.177924156188965, + "rewards/margins": 13.949573516845703, + "rewards/rejected": -21.127498626708984, + "step": 3931 + }, + { + "epoch": 6.31, + "learning_rate": 1.6597304795877924e-07, + "logits/chosen": -1.4782382249832153, + "logits/rejected": -1.518707275390625, + "logps/chosen": -105.34712219238281, + "logps/rejected": -249.3628692626953, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1274826526641846, + "rewards/margins": 14.668456077575684, + "rewards/rejected": -17.79593849182129, + "step": 3932 + }, + { + "epoch": 6.31, + "learning_rate": 1.6587395957193817e-07, + "logits/chosen": -1.48872971534729, + "logits/rejected": -1.4784202575683594, + "logps/chosen": -179.91456604003906, + "logps/rejected": -286.9005432128906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.385456085205078, + "rewards/margins": 12.12592601776123, + "rewards/rejected": -20.511383056640625, + "step": 3933 + }, + { + "epoch": 6.31, + "learning_rate": 1.6577487118509708e-07, + "logits/chosen": -1.4260241985321045, + "logits/rejected": -1.441740870475769, + "logps/chosen": -159.15692138671875, + "logps/rejected": -304.0753173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.682707786560059, + "rewards/margins": 13.278423309326172, + "rewards/rejected": -21.961132049560547, + "step": 3934 + }, + { + "epoch": 6.32, + "learning_rate": 1.6567578279825604e-07, + "logits/chosen": -1.565319538116455, + "logits/rejected": -1.4701380729675293, + "logps/chosen": -147.08578491210938, + "logps/rejected": -257.8539733886719, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.302391529083252, + "rewards/margins": 11.434797286987305, + "rewards/rejected": -17.7371883392334, + "step": 3935 + }, + { + "epoch": 6.32, + "learning_rate": 1.6557669441141497e-07, + "logits/chosen": -1.5070892572402954, + "logits/rejected": -1.46585214138031, + "logps/chosen": -124.07930755615234, + "logps/rejected": -259.25347900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.764210224151611, + "rewards/margins": 11.769156455993652, + "rewards/rejected": -17.533367156982422, + "step": 3936 + }, + { + "epoch": 6.32, + "learning_rate": 1.6547760602457393e-07, + "logits/chosen": -1.5915930271148682, + "logits/rejected": -1.6002469062805176, + "logps/chosen": -114.32086944580078, + "logps/rejected": -242.3919219970703, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.834458351135254, + "rewards/margins": 12.412710189819336, + "rewards/rejected": -17.247169494628906, + "step": 3937 + }, + { + "epoch": 6.32, + "learning_rate": 1.6537851763773286e-07, + "logits/chosen": -1.4663176536560059, + "logits/rejected": -1.6317639350891113, + "logps/chosen": -112.09112548828125, + "logps/rejected": -309.00482177734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.963474750518799, + "rewards/margins": 16.44464111328125, + "rewards/rejected": -21.408117294311523, + "step": 3938 + }, + { + "epoch": 6.32, + "learning_rate": 1.6527942925089177e-07, + "logits/chosen": -1.5561463832855225, + "logits/rejected": -1.4748085737228394, + "logps/chosen": -141.75355529785156, + "logps/rejected": -241.1177978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.80802059173584, + "rewards/margins": 12.3397216796875, + "rewards/rejected": -18.147743225097656, + "step": 3939 + }, + { + "epoch": 6.32, + "learning_rate": 1.6518034086405073e-07, + "logits/chosen": -1.6014814376831055, + "logits/rejected": -1.5534241199493408, + "logps/chosen": -176.32872009277344, + "logps/rejected": -268.36962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.164235591888428, + "rewards/margins": 10.997298240661621, + "rewards/rejected": -18.16153335571289, + "step": 3940 + }, + { + "epoch": 6.33, + "learning_rate": 1.6508125247720966e-07, + "logits/chosen": -1.4826384782791138, + "logits/rejected": -1.4709956645965576, + "logps/chosen": -157.76043701171875, + "logps/rejected": -242.96405029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.429962635040283, + "rewards/margins": 10.02396297454834, + "rewards/rejected": -16.45392608642578, + "step": 3941 + }, + { + "epoch": 6.33, + "learning_rate": 1.649821640903686e-07, + "logits/chosen": -1.4272725582122803, + "logits/rejected": -1.4178071022033691, + "logps/chosen": -145.05706787109375, + "logps/rejected": -226.4466552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9261980056762695, + "rewards/margins": 9.814224243164062, + "rewards/rejected": -15.740421295166016, + "step": 3942 + }, + { + "epoch": 6.33, + "learning_rate": 1.6488307570352756e-07, + "logits/chosen": -1.5481877326965332, + "logits/rejected": -1.6056452989578247, + "logps/chosen": -133.4686279296875, + "logps/rejected": -259.7863464355469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.713413238525391, + "rewards/margins": 12.394735336303711, + "rewards/rejected": -18.108150482177734, + "step": 3943 + }, + { + "epoch": 6.33, + "learning_rate": 1.6478398731668646e-07, + "logits/chosen": -1.5274676084518433, + "logits/rejected": -1.5501234531402588, + "logps/chosen": -101.38319396972656, + "logps/rejected": -283.90545654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.120100498199463, + "rewards/margins": 15.812801361083984, + "rewards/rejected": -18.932903289794922, + "step": 3944 + }, + { + "epoch": 6.33, + "learning_rate": 1.6468489892984542e-07, + "logits/chosen": -1.5590604543685913, + "logits/rejected": -1.5858144760131836, + "logps/chosen": -216.06515502929688, + "logps/rejected": -325.2770080566406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.338653564453125, + "rewards/margins": 11.537188529968262, + "rewards/rejected": -21.87584114074707, + "step": 3945 + }, + { + "epoch": 6.33, + "learning_rate": 1.6458581054300436e-07, + "logits/chosen": -1.4349849224090576, + "logits/rejected": -1.3805030584335327, + "logps/chosen": -159.8118896484375, + "logps/rejected": -278.40655517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1569132804870605, + "rewards/margins": 13.383237838745117, + "rewards/rejected": -20.540149688720703, + "step": 3946 + }, + { + "epoch": 6.34, + "learning_rate": 1.644867221561633e-07, + "logits/chosen": -1.4467719793319702, + "logits/rejected": -1.4682390689849854, + "logps/chosen": -111.69586181640625, + "logps/rejected": -245.0198974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.848292827606201, + "rewards/margins": 12.602740287780762, + "rewards/rejected": -17.451032638549805, + "step": 3947 + }, + { + "epoch": 6.34, + "learning_rate": 1.6438763376932222e-07, + "logits/chosen": -1.3886165618896484, + "logits/rejected": -1.4153711795806885, + "logps/chosen": -192.13400268554688, + "logps/rejected": -336.62481689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.283824920654297, + "rewards/margins": 15.424585342407227, + "rewards/rejected": -24.70840835571289, + "step": 3948 + }, + { + "epoch": 6.34, + "learning_rate": 1.6428854538248116e-07, + "logits/chosen": -1.4544893503189087, + "logits/rejected": -1.4096252918243408, + "logps/chosen": -153.1943359375, + "logps/rejected": -261.0718078613281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.217142105102539, + "rewards/margins": 11.858192443847656, + "rewards/rejected": -19.075334548950195, + "step": 3949 + }, + { + "epoch": 6.34, + "learning_rate": 1.6418945699564012e-07, + "logits/chosen": -1.611348032951355, + "logits/rejected": -1.585326075553894, + "logps/chosen": -171.95423889160156, + "logps/rejected": -287.021240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.607560634613037, + "rewards/margins": 13.285688400268555, + "rewards/rejected": -20.89324951171875, + "step": 3950 + }, + { + "epoch": 6.34, + "learning_rate": 1.6409036860879905e-07, + "logits/chosen": -1.574178695678711, + "logits/rejected": -1.5552211999893188, + "logps/chosen": -143.1689453125, + "logps/rejected": -275.5705871582031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.878445625305176, + "rewards/margins": 10.498218536376953, + "rewards/rejected": -16.376663208007812, + "step": 3951 + }, + { + "epoch": 6.34, + "learning_rate": 1.6399128022195798e-07, + "logits/chosen": -1.5309865474700928, + "logits/rejected": -1.4454176425933838, + "logps/chosen": -177.5360870361328, + "logps/rejected": -267.66070556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.446547508239746, + "rewards/margins": 9.913538932800293, + "rewards/rejected": -19.36008644104004, + "step": 3952 + }, + { + "epoch": 6.35, + "learning_rate": 1.6389219183511691e-07, + "logits/chosen": -1.5138297080993652, + "logits/rejected": -1.5664706230163574, + "logps/chosen": -178.10263061523438, + "logps/rejected": -329.0377197265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.158344268798828, + "rewards/margins": 14.226865768432617, + "rewards/rejected": -22.385210037231445, + "step": 3953 + }, + { + "epoch": 6.35, + "learning_rate": 1.6379310344827585e-07, + "logits/chosen": -1.4848296642303467, + "logits/rejected": -1.5652869939804077, + "logps/chosen": -138.16136169433594, + "logps/rejected": -281.2196044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.452284812927246, + "rewards/margins": 11.854347229003906, + "rewards/rejected": -18.306631088256836, + "step": 3954 + }, + { + "epoch": 6.35, + "learning_rate": 1.6369401506143478e-07, + "logits/chosen": -1.3937268257141113, + "logits/rejected": -1.448549747467041, + "logps/chosen": -178.87010192871094, + "logps/rejected": -356.12335205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.499481201171875, + "rewards/margins": 16.344602584838867, + "rewards/rejected": -24.844083786010742, + "step": 3955 + }, + { + "epoch": 6.35, + "learning_rate": 1.6359492667459374e-07, + "logits/chosen": -1.5862387418746948, + "logits/rejected": -1.5898258686065674, + "logps/chosen": -114.17906951904297, + "logps/rejected": -295.2752685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.248535633087158, + "rewards/margins": 15.324241638183594, + "rewards/rejected": -19.572776794433594, + "step": 3956 + }, + { + "epoch": 6.35, + "learning_rate": 1.6349583828775267e-07, + "logits/chosen": -1.5324729681015015, + "logits/rejected": -1.4619789123535156, + "logps/chosen": -141.83250427246094, + "logps/rejected": -239.22146606445312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.866727352142334, + "rewards/margins": 13.088116645812988, + "rewards/rejected": -16.954843521118164, + "step": 3957 + }, + { + "epoch": 6.35, + "learning_rate": 1.633967499009116e-07, + "logits/chosen": -1.4563406705856323, + "logits/rejected": -1.4612839221954346, + "logps/chosen": -146.20460510253906, + "logps/rejected": -230.49400329589844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.711292743682861, + "rewards/margins": 9.491861343383789, + "rewards/rejected": -16.203155517578125, + "step": 3958 + }, + { + "epoch": 6.35, + "learning_rate": 1.6329766151407054e-07, + "logits/chosen": -1.4298436641693115, + "logits/rejected": -1.4685190916061401, + "logps/chosen": -176.33087158203125, + "logps/rejected": -347.7360534667969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.961612701416016, + "rewards/margins": 17.022851943969727, + "rewards/rejected": -23.984464645385742, + "step": 3959 + }, + { + "epoch": 6.36, + "learning_rate": 1.6319857312722947e-07, + "logits/chosen": -1.5172276496887207, + "logits/rejected": -1.4890596866607666, + "logps/chosen": -135.53721618652344, + "logps/rejected": -273.04595947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.017271518707275, + "rewards/margins": 13.089927673339844, + "rewards/rejected": -19.10719871520996, + "step": 3960 + }, + { + "epoch": 6.36, + "learning_rate": 1.6309948474038843e-07, + "logits/chosen": -1.5351402759552002, + "logits/rejected": -1.5822473764419556, + "logps/chosen": -145.30728149414062, + "logps/rejected": -269.07012939453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.862299919128418, + "rewards/margins": 10.376681327819824, + "rewards/rejected": -16.238981246948242, + "step": 3961 + }, + { + "epoch": 6.36, + "learning_rate": 1.6300039635354737e-07, + "logits/chosen": -1.5119330883026123, + "logits/rejected": -1.5082218647003174, + "logps/chosen": -172.72796630859375, + "logps/rejected": -268.2436828613281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.157577514648438, + "rewards/margins": 9.139463424682617, + "rewards/rejected": -17.297040939331055, + "step": 3962 + }, + { + "epoch": 6.36, + "learning_rate": 1.6290130796670627e-07, + "logits/chosen": -1.5119835138320923, + "logits/rejected": -1.6388754844665527, + "logps/chosen": -128.8704071044922, + "logps/rejected": -314.309326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.851696491241455, + "rewards/margins": 14.602123260498047, + "rewards/rejected": -20.453819274902344, + "step": 3963 + }, + { + "epoch": 6.36, + "learning_rate": 1.6280221957986523e-07, + "logits/chosen": -1.4335370063781738, + "logits/rejected": -1.4531058073043823, + "logps/chosen": -151.99932861328125, + "logps/rejected": -330.26531982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.935831546783447, + "rewards/margins": 15.584542274475098, + "rewards/rejected": -22.520374298095703, + "step": 3964 + }, + { + "epoch": 6.36, + "learning_rate": 1.6270313119302417e-07, + "logits/chosen": -1.4839730262756348, + "logits/rejected": -1.3839322328567505, + "logps/chosen": -175.63381958007812, + "logps/rejected": -276.9841003417969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.180705070495605, + "rewards/margins": 10.646291732788086, + "rewards/rejected": -19.826995849609375, + "step": 3965 + }, + { + "epoch": 6.37, + "learning_rate": 1.6260404280618313e-07, + "logits/chosen": -1.4569215774536133, + "logits/rejected": -1.4089760780334473, + "logps/chosen": -145.66085815429688, + "logps/rejected": -295.6203308105469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.313427925109863, + "rewards/margins": 14.913666725158691, + "rewards/rejected": -21.227096557617188, + "step": 3966 + }, + { + "epoch": 6.37, + "learning_rate": 1.6250495441934203e-07, + "logits/chosen": -1.4601564407348633, + "logits/rejected": -1.5214871168136597, + "logps/chosen": -131.26988220214844, + "logps/rejected": -272.28802490234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.458736419677734, + "rewards/margins": 11.883935928344727, + "rewards/rejected": -18.342674255371094, + "step": 3967 + }, + { + "epoch": 6.37, + "learning_rate": 1.6240586603250097e-07, + "logits/chosen": -1.6175827980041504, + "logits/rejected": -1.6038904190063477, + "logps/chosen": -107.5924301147461, + "logps/rejected": -237.25375366210938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.65548038482666, + "rewards/margins": 11.880311965942383, + "rewards/rejected": -16.53579330444336, + "step": 3968 + }, + { + "epoch": 6.37, + "learning_rate": 1.6230677764565993e-07, + "logits/chosen": -1.7689862251281738, + "logits/rejected": -1.7836031913757324, + "logps/chosen": -114.25425720214844, + "logps/rejected": -237.8696746826172, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.137484550476074, + "rewards/margins": 12.622108459472656, + "rewards/rejected": -16.759592056274414, + "step": 3969 + }, + { + "epoch": 6.37, + "learning_rate": 1.6220768925881886e-07, + "logits/chosen": -1.4086711406707764, + "logits/rejected": -1.5081077814102173, + "logps/chosen": -102.00456237792969, + "logps/rejected": -270.527099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5692057609558105, + "rewards/margins": 14.846660614013672, + "rewards/rejected": -18.41586685180664, + "step": 3970 + }, + { + "epoch": 6.37, + "learning_rate": 1.6210860087197782e-07, + "logits/chosen": -1.608017086982727, + "logits/rejected": -1.5650662183761597, + "logps/chosen": -123.24845123291016, + "logps/rejected": -234.94717407226562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.47219181060791, + "rewards/margins": 11.062385559082031, + "rewards/rejected": -15.534577369689941, + "step": 3971 + }, + { + "epoch": 6.38, + "learning_rate": 1.6200951248513673e-07, + "logits/chosen": -1.3778172731399536, + "logits/rejected": -1.3590917587280273, + "logps/chosen": -222.862060546875, + "logps/rejected": -283.87274169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.502344131469727, + "rewards/margins": 11.53364372253418, + "rewards/rejected": -20.035987854003906, + "step": 3972 + }, + { + "epoch": 6.38, + "learning_rate": 1.6191042409829566e-07, + "logits/chosen": -1.4982316493988037, + "logits/rejected": -1.539750337600708, + "logps/chosen": -172.0172119140625, + "logps/rejected": -275.61785888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.788986206054688, + "rewards/margins": 10.190176010131836, + "rewards/rejected": -18.97916030883789, + "step": 3973 + }, + { + "epoch": 6.38, + "learning_rate": 1.6181133571145462e-07, + "logits/chosen": -1.5428799390792847, + "logits/rejected": -1.6092513799667358, + "logps/chosen": -155.87754821777344, + "logps/rejected": -292.10845947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.423941135406494, + "rewards/margins": 13.171009063720703, + "rewards/rejected": -20.594951629638672, + "step": 3974 + }, + { + "epoch": 6.38, + "learning_rate": 1.6171224732461355e-07, + "logits/chosen": -1.6777634620666504, + "logits/rejected": -1.6639420986175537, + "logps/chosen": -100.64932250976562, + "logps/rejected": -224.90467834472656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.968264579772949, + "rewards/margins": 13.40709114074707, + "rewards/rejected": -16.375356674194336, + "step": 3975 + }, + { + "epoch": 6.38, + "learning_rate": 1.6161315893777249e-07, + "logits/chosen": -1.5678623914718628, + "logits/rejected": -1.4481014013290405, + "logps/chosen": -185.88197326660156, + "logps/rejected": -327.3393249511719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.950716972351074, + "rewards/margins": 15.109739303588867, + "rewards/rejected": -24.060455322265625, + "step": 3976 + }, + { + "epoch": 6.38, + "learning_rate": 1.6151407055093142e-07, + "logits/chosen": -1.292104721069336, + "logits/rejected": -1.4341113567352295, + "logps/chosen": -117.68405151367188, + "logps/rejected": -257.08154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.303684711456299, + "rewards/margins": 11.20142650604248, + "rewards/rejected": -16.505111694335938, + "step": 3977 + }, + { + "epoch": 6.39, + "learning_rate": 1.6141498216409035e-07, + "logits/chosen": -1.4918079376220703, + "logits/rejected": -1.518143653869629, + "logps/chosen": -148.5343780517578, + "logps/rejected": -254.49838256835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.138147354125977, + "rewards/margins": 11.770766258239746, + "rewards/rejected": -18.908912658691406, + "step": 3978 + }, + { + "epoch": 6.39, + "learning_rate": 1.613158937772493e-07, + "logits/chosen": -1.4118938446044922, + "logits/rejected": -1.4329826831817627, + "logps/chosen": -180.9456787109375, + "logps/rejected": -298.9212646484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.040372848510742, + "rewards/margins": 11.520695686340332, + "rewards/rejected": -20.56106948852539, + "step": 3979 + }, + { + "epoch": 6.39, + "learning_rate": 1.6121680539040825e-07, + "logits/chosen": -1.545224666595459, + "logits/rejected": -1.5012035369873047, + "logps/chosen": -200.25958251953125, + "logps/rejected": -320.175048828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.988940238952637, + "rewards/margins": 13.523780822753906, + "rewards/rejected": -22.512720108032227, + "step": 3980 + }, + { + "epoch": 6.39, + "learning_rate": 1.6111771700356718e-07, + "logits/chosen": -1.3925974369049072, + "logits/rejected": -1.3957159519195557, + "logps/chosen": -145.08267211914062, + "logps/rejected": -282.7617492675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.291065692901611, + "rewards/margins": 13.710073471069336, + "rewards/rejected": -21.001140594482422, + "step": 3981 + }, + { + "epoch": 6.39, + "learning_rate": 1.610186286167261e-07, + "logits/chosen": -1.4944243431091309, + "logits/rejected": -1.430525779724121, + "logps/chosen": -148.34033203125, + "logps/rejected": -259.8709716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.749449253082275, + "rewards/margins": 10.940183639526367, + "rewards/rejected": -16.689632415771484, + "step": 3982 + }, + { + "epoch": 6.39, + "learning_rate": 1.6091954022988505e-07, + "logits/chosen": -1.4707145690917969, + "logits/rejected": -1.4026367664337158, + "logps/chosen": -148.54312133789062, + "logps/rejected": -274.0921630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.918565273284912, + "rewards/margins": 13.038726806640625, + "rewards/rejected": -18.957292556762695, + "step": 3983 + }, + { + "epoch": 6.39, + "learning_rate": 1.6082045184304398e-07, + "logits/chosen": -1.5322589874267578, + "logits/rejected": -1.5631089210510254, + "logps/chosen": -140.72003173828125, + "logps/rejected": -275.0437316894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.445150375366211, + "rewards/margins": 11.071027755737305, + "rewards/rejected": -16.516178131103516, + "step": 3984 + }, + { + "epoch": 6.4, + "learning_rate": 1.6072136345620294e-07, + "logits/chosen": -1.5657658576965332, + "logits/rejected": -1.5092768669128418, + "logps/chosen": -127.70248413085938, + "logps/rejected": -248.90765380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.499255657196045, + "rewards/margins": 11.999900817871094, + "rewards/rejected": -17.499156951904297, + "step": 3985 + }, + { + "epoch": 6.4, + "learning_rate": 1.6062227506936185e-07, + "logits/chosen": -1.6717333793640137, + "logits/rejected": -1.696704626083374, + "logps/chosen": -141.57635498046875, + "logps/rejected": -305.54833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.044940948486328, + "rewards/margins": 13.380005836486816, + "rewards/rejected": -20.424945831298828, + "step": 3986 + }, + { + "epoch": 6.4, + "learning_rate": 1.605231866825208e-07, + "logits/chosen": -1.471925973892212, + "logits/rejected": -1.5221645832061768, + "logps/chosen": -142.48593139648438, + "logps/rejected": -301.0586853027344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.977481365203857, + "rewards/margins": 14.57772159576416, + "rewards/rejected": -20.55520248413086, + "step": 3987 + }, + { + "epoch": 6.4, + "learning_rate": 1.6042409829567974e-07, + "logits/chosen": -1.5294488668441772, + "logits/rejected": -1.5482138395309448, + "logps/chosen": -137.36618041992188, + "logps/rejected": -303.8057556152344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.090366840362549, + "rewards/margins": 15.792388916015625, + "rewards/rejected": -21.88275718688965, + "step": 3988 + }, + { + "epoch": 6.4, + "learning_rate": 1.6032500990883867e-07, + "logits/chosen": -1.5439040660858154, + "logits/rejected": -1.522796392440796, + "logps/chosen": -115.33815002441406, + "logps/rejected": -263.8655090332031, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.630532741546631, + "rewards/margins": 15.538803100585938, + "rewards/rejected": -19.169336318969727, + "step": 3989 + }, + { + "epoch": 6.4, + "learning_rate": 1.6022592152199763e-07, + "logits/chosen": -1.5632652044296265, + "logits/rejected": -1.5821970701217651, + "logps/chosen": -128.12454223632812, + "logps/rejected": -242.43075561523438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.349673271179199, + "rewards/margins": 10.831616401672363, + "rewards/rejected": -16.181289672851562, + "step": 3990 + }, + { + "epoch": 6.41, + "learning_rate": 1.6012683313515654e-07, + "logits/chosen": -1.566260576248169, + "logits/rejected": -1.5405153036117554, + "logps/chosen": -152.32485961914062, + "logps/rejected": -299.2727966308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.996333599090576, + "rewards/margins": 13.206554412841797, + "rewards/rejected": -19.20288848876953, + "step": 3991 + }, + { + "epoch": 6.41, + "learning_rate": 1.6002774474831547e-07, + "logits/chosen": -1.4987488985061646, + "logits/rejected": -1.5142943859100342, + "logps/chosen": -152.9492645263672, + "logps/rejected": -286.52044677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.817915439605713, + "rewards/margins": 11.753351211547852, + "rewards/rejected": -18.571266174316406, + "step": 3992 + }, + { + "epoch": 6.41, + "learning_rate": 1.5992865636147443e-07, + "logits/chosen": -1.6702743768692017, + "logits/rejected": -1.6214627027511597, + "logps/chosen": -175.078369140625, + "logps/rejected": -304.1716613769531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.049171447753906, + "rewards/margins": 12.150520324707031, + "rewards/rejected": -21.199691772460938, + "step": 3993 + }, + { + "epoch": 6.41, + "learning_rate": 1.5982956797463337e-07, + "logits/chosen": -1.4068892002105713, + "logits/rejected": -1.4163830280303955, + "logps/chosen": -198.59129333496094, + "logps/rejected": -272.71990966796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.383121490478516, + "rewards/margins": 9.940868377685547, + "rewards/rejected": -18.323989868164062, + "step": 3994 + }, + { + "epoch": 6.41, + "learning_rate": 1.5973047958779233e-07, + "logits/chosen": -1.4574804306030273, + "logits/rejected": -1.456333041191101, + "logps/chosen": -125.31588745117188, + "logps/rejected": -244.9595947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.521605014801025, + "rewards/margins": 12.06488037109375, + "rewards/rejected": -18.586484909057617, + "step": 3995 + }, + { + "epoch": 6.41, + "learning_rate": 1.5963139120095123e-07, + "logits/chosen": -1.5615702867507935, + "logits/rejected": -1.5320665836334229, + "logps/chosen": -133.40367126464844, + "logps/rejected": -290.703369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.839057445526123, + "rewards/margins": 15.359874725341797, + "rewards/rejected": -19.198932647705078, + "step": 3996 + }, + { + "epoch": 6.42, + "learning_rate": 1.5953230281411017e-07, + "logits/chosen": -1.5317665338516235, + "logits/rejected": -1.5759742259979248, + "logps/chosen": -117.11602783203125, + "logps/rejected": -281.5647888183594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.161654472351074, + "rewards/margins": 15.053659439086914, + "rewards/rejected": -20.215312957763672, + "step": 3997 + }, + { + "epoch": 6.42, + "learning_rate": 1.5943321442726913e-07, + "logits/chosen": -1.7259312868118286, + "logits/rejected": -1.6679203510284424, + "logps/chosen": -125.95059967041016, + "logps/rejected": -220.51663208007812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.294445276260376, + "rewards/margins": 11.433616638183594, + "rewards/rejected": -14.728063583374023, + "step": 3998 + }, + { + "epoch": 6.42, + "learning_rate": 1.5933412604042806e-07, + "logits/chosen": -1.532376766204834, + "logits/rejected": -1.4637141227722168, + "logps/chosen": -159.4670867919922, + "logps/rejected": -284.4428405761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.34735631942749, + "rewards/margins": 14.270535469055176, + "rewards/rejected": -20.617891311645508, + "step": 3999 + }, + { + "epoch": 6.42, + "learning_rate": 1.59235037653587e-07, + "logits/chosen": -1.3065749406814575, + "logits/rejected": -1.374037742614746, + "logps/chosen": -185.95437622070312, + "logps/rejected": -316.81842041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.636619567871094, + "rewards/margins": 12.462785720825195, + "rewards/rejected": -22.09940528869629, + "step": 4000 + }, + { + "epoch": 6.42, + "learning_rate": 1.5913594926674592e-07, + "logits/chosen": -1.4686524868011475, + "logits/rejected": -1.4706889390945435, + "logps/chosen": -166.93557739257812, + "logps/rejected": -268.86920166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.190255641937256, + "rewards/margins": 11.974369049072266, + "rewards/rejected": -18.16462516784668, + "step": 4001 + }, + { + "epoch": 6.42, + "learning_rate": 1.5903686087990486e-07, + "logits/chosen": -1.6474428176879883, + "logits/rejected": -1.6152622699737549, + "logps/chosen": -112.57962036132812, + "logps/rejected": -200.76976013183594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4474239349365234, + "rewards/margins": 9.468438148498535, + "rewards/rejected": -12.915862083435059, + "step": 4002 + }, + { + "epoch": 6.43, + "learning_rate": 1.5893777249306382e-07, + "logits/chosen": -1.3121598958969116, + "logits/rejected": -1.2661166191101074, + "logps/chosen": -159.44471740722656, + "logps/rejected": -260.6114501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.206964492797852, + "rewards/margins": 10.608610153198242, + "rewards/rejected": -18.815574645996094, + "step": 4003 + }, + { + "epoch": 6.43, + "learning_rate": 1.5883868410622275e-07, + "logits/chosen": -1.524505376815796, + "logits/rejected": -1.5568066835403442, + "logps/chosen": -173.30853271484375, + "logps/rejected": -287.29364013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.556703567504883, + "rewards/margins": 11.73339557647705, + "rewards/rejected": -19.290098190307617, + "step": 4004 + }, + { + "epoch": 6.43, + "learning_rate": 1.5873959571938166e-07, + "logits/chosen": -1.6058626174926758, + "logits/rejected": -1.6432983875274658, + "logps/chosen": -105.3158950805664, + "logps/rejected": -257.5553283691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.105947971343994, + "rewards/margins": 14.854936599731445, + "rewards/rejected": -17.96088409423828, + "step": 4005 + }, + { + "epoch": 6.43, + "learning_rate": 1.5864050733254062e-07, + "logits/chosen": -1.551438570022583, + "logits/rejected": -1.4922043085098267, + "logps/chosen": -148.701416015625, + "logps/rejected": -332.36407470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.61543607711792, + "rewards/margins": 17.985177993774414, + "rewards/rejected": -24.600614547729492, + "step": 4006 + }, + { + "epoch": 6.43, + "learning_rate": 1.5854141894569955e-07, + "logits/chosen": -1.6053153276443481, + "logits/rejected": -1.6445326805114746, + "logps/chosen": -116.2775650024414, + "logps/rejected": -265.2764892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.607358694076538, + "rewards/margins": 15.314004898071289, + "rewards/rejected": -18.921363830566406, + "step": 4007 + }, + { + "epoch": 6.43, + "learning_rate": 1.584423305588585e-07, + "logits/chosen": -1.5090774297714233, + "logits/rejected": -1.4739888906478882, + "logps/chosen": -187.30592346191406, + "logps/rejected": -284.43609619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.676107406616211, + "rewards/margins": 10.218493461608887, + "rewards/rejected": -17.89459991455078, + "step": 4008 + }, + { + "epoch": 6.43, + "learning_rate": 1.5834324217201744e-07, + "logits/chosen": -1.5496875047683716, + "logits/rejected": -1.6941678524017334, + "logps/chosen": -114.19149017333984, + "logps/rejected": -323.5517883300781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.632122039794922, + "rewards/margins": 18.16196060180664, + "rewards/rejected": -21.794084548950195, + "step": 4009 + }, + { + "epoch": 6.44, + "learning_rate": 1.5824415378517635e-07, + "logits/chosen": -1.5770862102508545, + "logits/rejected": -1.4732118844985962, + "logps/chosen": -176.50079345703125, + "logps/rejected": -294.6792907714844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.269203186035156, + "rewards/margins": 13.095417022705078, + "rewards/rejected": -20.364620208740234, + "step": 4010 + }, + { + "epoch": 6.44, + "learning_rate": 1.581450653983353e-07, + "logits/chosen": -1.3086860179901123, + "logits/rejected": -1.4644720554351807, + "logps/chosen": -128.7637176513672, + "logps/rejected": -292.11627197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.257585048675537, + "rewards/margins": 15.899805068969727, + "rewards/rejected": -20.157392501831055, + "step": 4011 + }, + { + "epoch": 6.44, + "learning_rate": 1.5804597701149424e-07, + "logits/chosen": -1.4771032333374023, + "logits/rejected": -1.5301387310028076, + "logps/chosen": -144.66453552246094, + "logps/rejected": -275.156494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2535858154296875, + "rewards/margins": 11.568961143493652, + "rewards/rejected": -17.822547912597656, + "step": 4012 + }, + { + "epoch": 6.44, + "learning_rate": 1.5794688862465318e-07, + "logits/chosen": -1.446357011795044, + "logits/rejected": -1.4339394569396973, + "logps/chosen": -149.5625, + "logps/rejected": -276.4503173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.790907859802246, + "rewards/margins": 12.779481887817383, + "rewards/rejected": -20.570388793945312, + "step": 4013 + }, + { + "epoch": 6.44, + "learning_rate": 1.5784780023781214e-07, + "logits/chosen": -1.5044715404510498, + "logits/rejected": -1.5016742944717407, + "logps/chosen": -145.66880798339844, + "logps/rejected": -282.7096862792969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.762472629547119, + "rewards/margins": 15.048627853393555, + "rewards/rejected": -20.811100006103516, + "step": 4014 + }, + { + "epoch": 6.44, + "learning_rate": 1.5774871185097104e-07, + "logits/chosen": -1.552590250968933, + "logits/rejected": -1.563828945159912, + "logps/chosen": -165.28549194335938, + "logps/rejected": -318.8038330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.110502243041992, + "rewards/margins": 13.313064575195312, + "rewards/rejected": -20.423566818237305, + "step": 4015 + }, + { + "epoch": 6.45, + "learning_rate": 1.5764962346413e-07, + "logits/chosen": -1.4708999395370483, + "logits/rejected": -1.5288752317428589, + "logps/chosen": -159.75576782226562, + "logps/rejected": -312.43658447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.523534774780273, + "rewards/margins": 14.518341064453125, + "rewards/rejected": -21.041873931884766, + "step": 4016 + }, + { + "epoch": 6.45, + "learning_rate": 1.5755053507728894e-07, + "logits/chosen": -1.7209053039550781, + "logits/rejected": -1.6415555477142334, + "logps/chosen": -174.29444885253906, + "logps/rejected": -298.5415954589844, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.518221855163574, + "rewards/margins": 14.24942398071289, + "rewards/rejected": -21.76764678955078, + "step": 4017 + }, + { + "epoch": 6.45, + "learning_rate": 1.5745144669044787e-07, + "logits/chosen": -1.4763753414154053, + "logits/rejected": -1.5161622762680054, + "logps/chosen": -177.55010986328125, + "logps/rejected": -301.70379638671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.10258674621582, + "rewards/margins": 12.763797760009766, + "rewards/rejected": -21.866384506225586, + "step": 4018 + }, + { + "epoch": 6.45, + "learning_rate": 1.573523583036068e-07, + "logits/chosen": -1.665682315826416, + "logits/rejected": -1.625770092010498, + "logps/chosen": -141.99594116210938, + "logps/rejected": -303.4227294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.150576114654541, + "rewards/margins": 15.851202011108398, + "rewards/rejected": -22.00177764892578, + "step": 4019 + }, + { + "epoch": 6.45, + "learning_rate": 1.5725326991676574e-07, + "logits/chosen": -1.4847831726074219, + "logits/rejected": -1.5872048139572144, + "logps/chosen": -103.22293090820312, + "logps/rejected": -257.4953918457031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7639405727386475, + "rewards/margins": 14.22868537902832, + "rewards/rejected": -17.992626190185547, + "step": 4020 + }, + { + "epoch": 6.45, + "learning_rate": 1.571541815299247e-07, + "logits/chosen": -1.4240381717681885, + "logits/rejected": -1.5253708362579346, + "logps/chosen": -129.3914031982422, + "logps/rejected": -274.4068908691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.696744918823242, + "rewards/margins": 11.765289306640625, + "rewards/rejected": -16.462034225463867, + "step": 4021 + }, + { + "epoch": 6.46, + "learning_rate": 1.5705509314308363e-07, + "logits/chosen": -1.449000358581543, + "logits/rejected": -1.3978806734085083, + "logps/chosen": -192.55979919433594, + "logps/rejected": -288.3559875488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.606399536132812, + "rewards/margins": 10.967217445373535, + "rewards/rejected": -20.57361602783203, + "step": 4022 + }, + { + "epoch": 6.46, + "learning_rate": 1.5695600475624256e-07, + "logits/chosen": -1.492967128753662, + "logits/rejected": -1.5154919624328613, + "logps/chosen": -139.0252227783203, + "logps/rejected": -258.66302490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.300014972686768, + "rewards/margins": 11.301677703857422, + "rewards/rejected": -16.601694107055664, + "step": 4023 + }, + { + "epoch": 6.46, + "learning_rate": 1.568569163694015e-07, + "logits/chosen": -1.489736795425415, + "logits/rejected": -1.5334579944610596, + "logps/chosen": -112.01498413085938, + "logps/rejected": -231.52047729492188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.152523040771484, + "rewards/margins": 12.57978630065918, + "rewards/rejected": -17.732309341430664, + "step": 4024 + }, + { + "epoch": 6.46, + "learning_rate": 1.5675782798256043e-07, + "logits/chosen": -1.4706202745437622, + "logits/rejected": -1.4792487621307373, + "logps/chosen": -150.43295288085938, + "logps/rejected": -300.1558837890625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.121355056762695, + "rewards/margins": 14.558577537536621, + "rewards/rejected": -22.679931640625, + "step": 4025 + }, + { + "epoch": 6.46, + "learning_rate": 1.5665873959571936e-07, + "logits/chosen": -1.5216423273086548, + "logits/rejected": -1.5291874408721924, + "logps/chosen": -172.5982208251953, + "logps/rejected": -260.7652587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.815979957580566, + "rewards/margins": 9.141763687133789, + "rewards/rejected": -17.957744598388672, + "step": 4026 + }, + { + "epoch": 6.46, + "learning_rate": 1.5655965120887832e-07, + "logits/chosen": -1.5779879093170166, + "logits/rejected": -1.6116265058517456, + "logps/chosen": -142.5040283203125, + "logps/rejected": -300.6974182128906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.42648983001709, + "rewards/margins": 14.707590103149414, + "rewards/rejected": -21.13408088684082, + "step": 4027 + }, + { + "epoch": 6.47, + "learning_rate": 1.5646056282203726e-07, + "logits/chosen": -1.3537213802337646, + "logits/rejected": -1.4076082706451416, + "logps/chosen": -125.388671875, + "logps/rejected": -259.47393798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.980767726898193, + "rewards/margins": 10.933026313781738, + "rewards/rejected": -15.913793563842773, + "step": 4028 + }, + { + "epoch": 6.47, + "learning_rate": 1.563614744351962e-07, + "logits/chosen": -1.4563075304031372, + "logits/rejected": -1.4627364873886108, + "logps/chosen": -179.6787109375, + "logps/rejected": -298.1599426269531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.388933181762695, + "rewards/margins": 11.938668251037598, + "rewards/rejected": -21.327600479125977, + "step": 4029 + }, + { + "epoch": 6.47, + "learning_rate": 1.5626238604835512e-07, + "logits/chosen": -1.4612109661102295, + "logits/rejected": -1.3519361019134521, + "logps/chosen": -139.62327575683594, + "logps/rejected": -259.12493896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.951837062835693, + "rewards/margins": 13.576677322387695, + "rewards/rejected": -18.528514862060547, + "step": 4030 + }, + { + "epoch": 6.47, + "learning_rate": 1.5616329766151406e-07, + "logits/chosen": -1.4441955089569092, + "logits/rejected": -1.3917382955551147, + "logps/chosen": -172.54931640625, + "logps/rejected": -298.00079345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.140151977539062, + "rewards/margins": 12.766448020935059, + "rewards/rejected": -20.906600952148438, + "step": 4031 + }, + { + "epoch": 6.47, + "learning_rate": 1.5606420927467302e-07, + "logits/chosen": -1.4717050790786743, + "logits/rejected": -1.5717570781707764, + "logps/chosen": -142.377197265625, + "logps/rejected": -342.005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0678558349609375, + "rewards/margins": 17.64043617248535, + "rewards/rejected": -24.70829200744629, + "step": 4032 + }, + { + "epoch": 6.47, + "learning_rate": 1.5596512088783195e-07, + "logits/chosen": -1.472227692604065, + "logits/rejected": -1.4111366271972656, + "logps/chosen": -137.22311401367188, + "logps/rejected": -272.8728942871094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061628341674805, + "rewards/margins": 13.836725234985352, + "rewards/rejected": -18.898353576660156, + "step": 4033 + }, + { + "epoch": 6.48, + "learning_rate": 1.5586603250099086e-07, + "logits/chosen": -1.4909038543701172, + "logits/rejected": -1.5268927812576294, + "logps/chosen": -138.73800659179688, + "logps/rejected": -240.01583862304688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.46785306930542, + "rewards/margins": 11.053812026977539, + "rewards/rejected": -16.521663665771484, + "step": 4034 + }, + { + "epoch": 6.48, + "learning_rate": 1.5576694411414982e-07, + "logits/chosen": -1.5176796913146973, + "logits/rejected": -1.519277572631836, + "logps/chosen": -126.01438903808594, + "logps/rejected": -243.8826904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.158909320831299, + "rewards/margins": 12.317508697509766, + "rewards/rejected": -17.47641944885254, + "step": 4035 + }, + { + "epoch": 6.48, + "learning_rate": 1.5566785572730875e-07, + "logits/chosen": -1.6301202774047852, + "logits/rejected": -1.6603573560714722, + "logps/chosen": -148.1241455078125, + "logps/rejected": -303.43212890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5341596603393555, + "rewards/margins": 15.131271362304688, + "rewards/rejected": -22.66543197631836, + "step": 4036 + }, + { + "epoch": 6.48, + "learning_rate": 1.555687673404677e-07, + "logits/chosen": -1.689089298248291, + "logits/rejected": -1.7518136501312256, + "logps/chosen": -124.66378021240234, + "logps/rejected": -313.168212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.377783298492432, + "rewards/margins": 17.17529296875, + "rewards/rejected": -22.553077697753906, + "step": 4037 + }, + { + "epoch": 6.48, + "learning_rate": 1.5546967895362662e-07, + "logits/chosen": -1.5309021472930908, + "logits/rejected": -1.4685280323028564, + "logps/chosen": -150.35133361816406, + "logps/rejected": -261.2171936035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.955092430114746, + "rewards/margins": 12.160360336303711, + "rewards/rejected": -17.115453720092773, + "step": 4038 + }, + { + "epoch": 6.48, + "learning_rate": 1.5537059056678555e-07, + "logits/chosen": -1.2937068939208984, + "logits/rejected": -1.3220818042755127, + "logps/chosen": -162.06298828125, + "logps/rejected": -320.28204345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.562356948852539, + "rewards/margins": 13.811516761779785, + "rewards/rejected": -22.37387466430664, + "step": 4039 + }, + { + "epoch": 6.48, + "learning_rate": 1.552715021799445e-07, + "logits/chosen": -1.4887566566467285, + "logits/rejected": -1.5064687728881836, + "logps/chosen": -153.98260498046875, + "logps/rejected": -304.3556823730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.184513092041016, + "rewards/margins": 13.11319351196289, + "rewards/rejected": -21.297706604003906, + "step": 4040 + }, + { + "epoch": 6.49, + "learning_rate": 1.5517241379310344e-07, + "logits/chosen": -1.5476994514465332, + "logits/rejected": -1.522373914718628, + "logps/chosen": -164.2434539794922, + "logps/rejected": -269.0760803222656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.634079933166504, + "rewards/margins": 11.179290771484375, + "rewards/rejected": -17.813371658325195, + "step": 4041 + }, + { + "epoch": 6.49, + "learning_rate": 1.550733254062624e-07, + "logits/chosen": -1.6039087772369385, + "logits/rejected": -1.54339599609375, + "logps/chosen": -166.3028106689453, + "logps/rejected": -300.0264892578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.200139999389648, + "rewards/margins": 14.276904106140137, + "rewards/rejected": -21.47704315185547, + "step": 4042 + }, + { + "epoch": 6.49, + "learning_rate": 1.549742370194213e-07, + "logits/chosen": -1.4729117155075073, + "logits/rejected": -1.5165952444076538, + "logps/chosen": -165.81039428710938, + "logps/rejected": -275.4357604980469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7823805809021, + "rewards/margins": 10.663763046264648, + "rewards/rejected": -16.446144104003906, + "step": 4043 + }, + { + "epoch": 6.49, + "learning_rate": 1.5487514863258024e-07, + "logits/chosen": -1.5489170551300049, + "logits/rejected": -1.6288915872573853, + "logps/chosen": -118.94019317626953, + "logps/rejected": -269.0135498046875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.367690563201904, + "rewards/margins": 13.569025039672852, + "rewards/rejected": -18.936716079711914, + "step": 4044 + }, + { + "epoch": 6.49, + "learning_rate": 1.547760602457392e-07, + "logits/chosen": -1.3427296876907349, + "logits/rejected": -1.3488569259643555, + "logps/chosen": -162.4305419921875, + "logps/rejected": -309.95404052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4087958335876465, + "rewards/margins": 13.870898246765137, + "rewards/rejected": -21.27969741821289, + "step": 4045 + }, + { + "epoch": 6.49, + "learning_rate": 1.5467697185889814e-07, + "logits/chosen": -1.4743199348449707, + "logits/rejected": -1.5243568420410156, + "logps/chosen": -214.33221435546875, + "logps/rejected": -340.5472412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.968498229980469, + "rewards/margins": 14.186691284179688, + "rewards/rejected": -25.155187606811523, + "step": 4046 + }, + { + "epoch": 6.5, + "learning_rate": 1.5457788347205707e-07, + "logits/chosen": -1.6100554466247559, + "logits/rejected": -1.6321802139282227, + "logps/chosen": -186.58619689941406, + "logps/rejected": -293.6015319824219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.803191184997559, + "rewards/margins": 11.488643646240234, + "rewards/rejected": -20.29183578491211, + "step": 4047 + }, + { + "epoch": 6.5, + "learning_rate": 1.54478795085216e-07, + "logits/chosen": -1.6969642639160156, + "logits/rejected": -1.6392288208007812, + "logps/chosen": -138.90847778320312, + "logps/rejected": -251.98297119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.196267127990723, + "rewards/margins": 13.48394775390625, + "rewards/rejected": -18.68021583557129, + "step": 4048 + }, + { + "epoch": 6.5, + "learning_rate": 1.5437970669837494e-07, + "logits/chosen": -1.5088942050933838, + "logits/rejected": -1.4974995851516724, + "logps/chosen": -177.2664031982422, + "logps/rejected": -277.2903137207031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.642128944396973, + "rewards/margins": 11.434252738952637, + "rewards/rejected": -19.07638168334961, + "step": 4049 + }, + { + "epoch": 6.5, + "learning_rate": 1.542806183115339e-07, + "logits/chosen": -1.6334421634674072, + "logits/rejected": -1.6535859107971191, + "logps/chosen": -123.62342834472656, + "logps/rejected": -270.6128845214844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.040425777435303, + "rewards/margins": 14.677518844604492, + "rewards/rejected": -19.717945098876953, + "step": 4050 + }, + { + "epoch": 6.5, + "learning_rate": 1.5418152992469283e-07, + "logits/chosen": -1.4406675100326538, + "logits/rejected": -1.5025980472564697, + "logps/chosen": -159.02328491210938, + "logps/rejected": -284.29351806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.819018363952637, + "rewards/margins": 11.620731353759766, + "rewards/rejected": -19.43975067138672, + "step": 4051 + }, + { + "epoch": 6.5, + "learning_rate": 1.5408244153785176e-07, + "logits/chosen": -1.4954683780670166, + "logits/rejected": -1.480470895767212, + "logps/chosen": -96.87015533447266, + "logps/rejected": -189.7866668701172, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.303051948547363, + "rewards/margins": 10.316515922546387, + "rewards/rejected": -14.61956787109375, + "step": 4052 + }, + { + "epoch": 6.51, + "learning_rate": 1.539833531510107e-07, + "logits/chosen": -1.4738945960998535, + "logits/rejected": -1.3929920196533203, + "logps/chosen": -161.80072021484375, + "logps/rejected": -246.00262451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.863292694091797, + "rewards/margins": 9.966072082519531, + "rewards/rejected": -16.829364776611328, + "step": 4053 + }, + { + "epoch": 6.51, + "learning_rate": 1.5388426476416963e-07, + "logits/chosen": -1.489832878112793, + "logits/rejected": -1.4833807945251465, + "logps/chosen": -150.83251953125, + "logps/rejected": -315.46734619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.621740341186523, + "rewards/margins": 15.58474063873291, + "rewards/rejected": -22.20648193359375, + "step": 4054 + }, + { + "epoch": 6.51, + "learning_rate": 1.5378517637732856e-07, + "logits/chosen": -1.4445433616638184, + "logits/rejected": -1.4277299642562866, + "logps/chosen": -139.4583740234375, + "logps/rejected": -234.72232055664062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.540168762207031, + "rewards/margins": 9.233020782470703, + "rewards/rejected": -16.773189544677734, + "step": 4055 + }, + { + "epoch": 6.51, + "learning_rate": 1.5368608799048752e-07, + "logits/chosen": -1.3780791759490967, + "logits/rejected": -1.4367531538009644, + "logps/chosen": -128.8372802734375, + "logps/rejected": -298.2812194824219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.486569404602051, + "rewards/margins": 14.67315673828125, + "rewards/rejected": -20.159725189208984, + "step": 4056 + }, + { + "epoch": 6.51, + "learning_rate": 1.5358699960364643e-07, + "logits/chosen": -1.6670149564743042, + "logits/rejected": -1.6376612186431885, + "logps/chosen": -144.72723388671875, + "logps/rejected": -283.223876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177809715270996, + "rewards/margins": 11.866500854492188, + "rewards/rejected": -18.0443115234375, + "step": 4057 + }, + { + "epoch": 6.51, + "learning_rate": 1.534879112168054e-07, + "logits/chosen": -1.538050889968872, + "logits/rejected": -1.5637664794921875, + "logps/chosen": -131.35787963867188, + "logps/rejected": -249.2568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.585724830627441, + "rewards/margins": 11.512473106384277, + "rewards/rejected": -17.09819793701172, + "step": 4058 + }, + { + "epoch": 6.52, + "learning_rate": 1.5338882282996432e-07, + "logits/chosen": -1.600117564201355, + "logits/rejected": -1.5869152545928955, + "logps/chosen": -195.26974487304688, + "logps/rejected": -318.6341552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.631392478942871, + "rewards/margins": 10.820945739746094, + "rewards/rejected": -20.45233917236328, + "step": 4059 + }, + { + "epoch": 6.52, + "learning_rate": 1.5328973444312325e-07, + "logits/chosen": -1.3947941064834595, + "logits/rejected": -1.4391862154006958, + "logps/chosen": -173.4237518310547, + "logps/rejected": -300.8558044433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.204452514648438, + "rewards/margins": 11.748929977416992, + "rewards/rejected": -19.95338249206543, + "step": 4060 + }, + { + "epoch": 6.52, + "learning_rate": 1.5319064605628221e-07, + "logits/chosen": -1.5086286067962646, + "logits/rejected": -1.5597150325775146, + "logps/chosen": -141.82708740234375, + "logps/rejected": -287.0211181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.535424709320068, + "rewards/margins": 14.22792911529541, + "rewards/rejected": -19.76335334777832, + "step": 4061 + }, + { + "epoch": 6.52, + "learning_rate": 1.5309155766944112e-07, + "logits/chosen": -1.4420535564422607, + "logits/rejected": -1.4773969650268555, + "logps/chosen": -214.10601806640625, + "logps/rejected": -335.39935302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.91952133178711, + "rewards/margins": 11.909290313720703, + "rewards/rejected": -24.828811645507812, + "step": 4062 + }, + { + "epoch": 6.52, + "learning_rate": 1.5299246928260008e-07, + "logits/chosen": -1.5383051633834839, + "logits/rejected": -1.5570423603057861, + "logps/chosen": -158.11825561523438, + "logps/rejected": -324.3089904785156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.321404933929443, + "rewards/margins": 14.512482643127441, + "rewards/rejected": -21.833887100219727, + "step": 4063 + }, + { + "epoch": 6.52, + "learning_rate": 1.5289338089575901e-07, + "logits/chosen": -1.5520732402801514, + "logits/rejected": -1.4799624681472778, + "logps/chosen": -187.21121215820312, + "logps/rejected": -288.5232238769531, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.288106918334961, + "rewards/margins": 12.356675148010254, + "rewards/rejected": -20.6447811126709, + "step": 4064 + }, + { + "epoch": 6.52, + "learning_rate": 1.5279429250891795e-07, + "logits/chosen": -1.6449767351150513, + "logits/rejected": -1.653568983078003, + "logps/chosen": -109.162353515625, + "logps/rejected": -272.7698974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.447150230407715, + "rewards/margins": 15.514894485473633, + "rewards/rejected": -19.962045669555664, + "step": 4065 + }, + { + "epoch": 6.53, + "learning_rate": 1.526952041220769e-07, + "logits/chosen": -1.513624668121338, + "logits/rejected": -1.4873899221420288, + "logps/chosen": -190.31919860839844, + "logps/rejected": -299.04449462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.963915824890137, + "rewards/margins": 12.532449722290039, + "rewards/rejected": -22.49636459350586, + "step": 4066 + }, + { + "epoch": 6.53, + "learning_rate": 1.5259611573523581e-07, + "logits/chosen": -1.4888205528259277, + "logits/rejected": -1.4714902639389038, + "logps/chosen": -132.14393615722656, + "logps/rejected": -272.19549560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.42944860458374, + "rewards/margins": 14.731949806213379, + "rewards/rejected": -19.16139793395996, + "step": 4067 + }, + { + "epoch": 6.53, + "learning_rate": 1.5249702734839475e-07, + "logits/chosen": -1.4932118654251099, + "logits/rejected": -1.5197771787643433, + "logps/chosen": -181.98719787597656, + "logps/rejected": -314.7780456542969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.27634048461914, + "rewards/margins": 11.719573020935059, + "rewards/rejected": -22.995914459228516, + "step": 4068 + }, + { + "epoch": 6.53, + "learning_rate": 1.523979389615537e-07, + "logits/chosen": -1.3459826707839966, + "logits/rejected": -1.3479771614074707, + "logps/chosen": -158.1329345703125, + "logps/rejected": -305.89825439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.458131790161133, + "rewards/margins": 15.924132347106934, + "rewards/rejected": -23.382265090942383, + "step": 4069 + }, + { + "epoch": 6.53, + "learning_rate": 1.5229885057471264e-07, + "logits/chosen": -1.656179666519165, + "logits/rejected": -1.5793451070785522, + "logps/chosen": -136.218017578125, + "logps/rejected": -279.0203857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.587588310241699, + "rewards/margins": 15.342615127563477, + "rewards/rejected": -19.93020248413086, + "step": 4070 + }, + { + "epoch": 6.53, + "learning_rate": 1.5219976218787157e-07, + "logits/chosen": -1.486596703529358, + "logits/rejected": -1.4575188159942627, + "logps/chosen": -194.72317504882812, + "logps/rejected": -316.1368103027344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.970039367675781, + "rewards/margins": 11.540298461914062, + "rewards/rejected": -22.510337829589844, + "step": 4071 + }, + { + "epoch": 6.54, + "learning_rate": 1.521006738010305e-07, + "logits/chosen": -1.6316734552383423, + "logits/rejected": -1.604292631149292, + "logps/chosen": -144.06222534179688, + "logps/rejected": -271.86846923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.008913993835449, + "rewards/margins": 13.145017623901367, + "rewards/rejected": -18.1539306640625, + "step": 4072 + }, + { + "epoch": 6.54, + "learning_rate": 1.5200158541418944e-07, + "logits/chosen": -1.4934431314468384, + "logits/rejected": -1.6216177940368652, + "logps/chosen": -147.292724609375, + "logps/rejected": -301.3812255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668780326843262, + "rewards/margins": 12.563833236694336, + "rewards/rejected": -19.232614517211914, + "step": 4073 + }, + { + "epoch": 6.54, + "learning_rate": 1.519024970273484e-07, + "logits/chosen": -1.4712704420089722, + "logits/rejected": -1.5283141136169434, + "logps/chosen": -149.1527099609375, + "logps/rejected": -333.89715576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.419144153594971, + "rewards/margins": 16.183475494384766, + "rewards/rejected": -22.60262107849121, + "step": 4074 + }, + { + "epoch": 6.54, + "learning_rate": 1.5180340864050733e-07, + "logits/chosen": -1.4666672945022583, + "logits/rejected": -1.3869317770004272, + "logps/chosen": -198.39854431152344, + "logps/rejected": -312.58642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.85908317565918, + "rewards/margins": 12.615459442138672, + "rewards/rejected": -21.47454071044922, + "step": 4075 + }, + { + "epoch": 6.54, + "learning_rate": 1.5170432025366624e-07, + "logits/chosen": -1.402453064918518, + "logits/rejected": -1.4854819774627686, + "logps/chosen": -170.8695068359375, + "logps/rejected": -291.66143798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4193878173828125, + "rewards/margins": 13.234356880187988, + "rewards/rejected": -19.653745651245117, + "step": 4076 + }, + { + "epoch": 6.54, + "learning_rate": 1.516052318668252e-07, + "logits/chosen": -1.5971767902374268, + "logits/rejected": -1.6214932203292847, + "logps/chosen": -145.2372589111328, + "logps/rejected": -332.22235107421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7430572509765625, + "rewards/margins": 18.325904846191406, + "rewards/rejected": -24.06896209716797, + "step": 4077 + }, + { + "epoch": 6.55, + "learning_rate": 1.5150614347998413e-07, + "logits/chosen": -1.3945674896240234, + "logits/rejected": -1.521774411201477, + "logps/chosen": -163.29364013671875, + "logps/rejected": -310.1119384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.146615982055664, + "rewards/margins": 11.157669067382812, + "rewards/rejected": -18.304283142089844, + "step": 4078 + }, + { + "epoch": 6.55, + "learning_rate": 1.514070550931431e-07, + "logits/chosen": -1.662732720375061, + "logits/rejected": -1.5629010200500488, + "logps/chosen": -178.56961059570312, + "logps/rejected": -281.796142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.30190372467041, + "rewards/margins": 11.85798168182373, + "rewards/rejected": -20.15988540649414, + "step": 4079 + }, + { + "epoch": 6.55, + "learning_rate": 1.5130796670630203e-07, + "logits/chosen": -1.5804085731506348, + "logits/rejected": -1.6488217115402222, + "logps/chosen": -158.18911743164062, + "logps/rejected": -359.8983459472656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0494842529296875, + "rewards/margins": 18.54588508605957, + "rewards/rejected": -25.59537124633789, + "step": 4080 + }, + { + "epoch": 6.55, + "learning_rate": 1.5120887831946093e-07, + "logits/chosen": -1.449872612953186, + "logits/rejected": -1.4460430145263672, + "logps/chosen": -179.75961303710938, + "logps/rejected": -301.960205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.485612869262695, + "rewards/margins": 12.297904014587402, + "rewards/rejected": -21.78351593017578, + "step": 4081 + }, + { + "epoch": 6.55, + "learning_rate": 1.511097899326199e-07, + "logits/chosen": -1.6274003982543945, + "logits/rejected": -1.6389713287353516, + "logps/chosen": -121.28640747070312, + "logps/rejected": -210.23715209960938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.806861877441406, + "rewards/margins": 9.546662330627441, + "rewards/rejected": -15.353523254394531, + "step": 4082 + }, + { + "epoch": 6.55, + "learning_rate": 1.5101070154577883e-07, + "logits/chosen": -1.6186316013336182, + "logits/rejected": -1.6266202926635742, + "logps/chosen": -162.55934143066406, + "logps/rejected": -295.6706237792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.261623382568359, + "rewards/margins": 13.612985610961914, + "rewards/rejected": -20.874608993530273, + "step": 4083 + }, + { + "epoch": 6.56, + "learning_rate": 1.5091161315893779e-07, + "logits/chosen": -1.4036301374435425, + "logits/rejected": -1.3807857036590576, + "logps/chosen": -158.0211639404297, + "logps/rejected": -255.51791381835938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.10399341583252, + "rewards/margins": 9.913474082946777, + "rewards/rejected": -18.017467498779297, + "step": 4084 + }, + { + "epoch": 6.56, + "learning_rate": 1.5081252477209672e-07, + "logits/chosen": -1.5111534595489502, + "logits/rejected": -1.5831298828125, + "logps/chosen": -170.0465850830078, + "logps/rejected": -313.25994873046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.435107231140137, + "rewards/margins": 14.866000175476074, + "rewards/rejected": -22.30110740661621, + "step": 4085 + }, + { + "epoch": 6.56, + "learning_rate": 1.5071343638525563e-07, + "logits/chosen": -1.3186919689178467, + "logits/rejected": -1.4023411273956299, + "logps/chosen": -167.7193145751953, + "logps/rejected": -302.72174072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.198563575744629, + "rewards/margins": 11.965446472167969, + "rewards/rejected": -21.16400909423828, + "step": 4086 + }, + { + "epoch": 6.56, + "learning_rate": 1.5061434799841459e-07, + "logits/chosen": -1.3928310871124268, + "logits/rejected": -1.4552295207977295, + "logps/chosen": -172.41641235351562, + "logps/rejected": -307.2332458496094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.563188552856445, + "rewards/margins": 12.907790184020996, + "rewards/rejected": -22.470979690551758, + "step": 4087 + }, + { + "epoch": 6.56, + "learning_rate": 1.5051525961157352e-07, + "logits/chosen": -1.4398659467697144, + "logits/rejected": -1.4436283111572266, + "logps/chosen": -153.02853393554688, + "logps/rejected": -318.7279052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.009366035461426, + "rewards/margins": 16.107200622558594, + "rewards/rejected": -23.116565704345703, + "step": 4088 + }, + { + "epoch": 6.56, + "learning_rate": 1.5041617122473245e-07, + "logits/chosen": -1.6115996837615967, + "logits/rejected": -1.7055580615997314, + "logps/chosen": -162.7047882080078, + "logps/rejected": -306.8726501464844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.268516540527344, + "rewards/margins": 13.849541664123535, + "rewards/rejected": -21.118059158325195, + "step": 4089 + }, + { + "epoch": 6.57, + "learning_rate": 1.5031708283789139e-07, + "logits/chosen": -1.4757345914840698, + "logits/rejected": -1.4621689319610596, + "logps/chosen": -150.90625, + "logps/rejected": -258.958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.189865589141846, + "rewards/margins": 12.185379028320312, + "rewards/rejected": -19.375244140625, + "step": 4090 + }, + { + "epoch": 6.57, + "learning_rate": 1.5021799445105032e-07, + "logits/chosen": -1.5603598356246948, + "logits/rejected": -1.5140317678451538, + "logps/chosen": -156.30186462402344, + "logps/rejected": -231.10037231445312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.396376609802246, + "rewards/margins": 10.294145584106445, + "rewards/rejected": -15.690522193908691, + "step": 4091 + }, + { + "epoch": 6.57, + "learning_rate": 1.5011890606420928e-07, + "logits/chosen": -1.413588047027588, + "logits/rejected": -1.2986944913864136, + "logps/chosen": -180.11880493164062, + "logps/rejected": -262.81036376953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.319664478302002, + "rewards/margins": 11.70711612701416, + "rewards/rejected": -18.02678108215332, + "step": 4092 + }, + { + "epoch": 6.57, + "learning_rate": 1.500198176773682e-07, + "logits/chosen": -1.4531843662261963, + "logits/rejected": -1.3659489154815674, + "logps/chosen": -119.97425842285156, + "logps/rejected": -222.18466186523438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.700887680053711, + "rewards/margins": 11.426825523376465, + "rewards/rejected": -16.12771224975586, + "step": 4093 + }, + { + "epoch": 6.57, + "learning_rate": 1.4992072929052715e-07, + "logits/chosen": -1.6835366487503052, + "logits/rejected": -1.7321363687515259, + "logps/chosen": -103.9294662475586, + "logps/rejected": -245.88499450683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.15220832824707, + "rewards/margins": 12.703938484191895, + "rewards/rejected": -17.85614776611328, + "step": 4094 + }, + { + "epoch": 6.57, + "learning_rate": 1.4982164090368608e-07, + "logits/chosen": -1.5938084125518799, + "logits/rejected": -1.6410586833953857, + "logps/chosen": -104.45947265625, + "logps/rejected": -233.14706420898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.787874221801758, + "rewards/margins": 12.836455345153809, + "rewards/rejected": -15.624329566955566, + "step": 4095 + }, + { + "epoch": 6.57, + "learning_rate": 1.49722552516845e-07, + "logits/chosen": -1.443957805633545, + "logits/rejected": -1.399722695350647, + "logps/chosen": -211.9832000732422, + "logps/rejected": -328.7348327636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.599187850952148, + "rewards/margins": 12.750417709350586, + "rewards/rejected": -24.349605560302734, + "step": 4096 + }, + { + "epoch": 6.58, + "learning_rate": 1.4962346413000395e-07, + "logits/chosen": -1.5091696977615356, + "logits/rejected": -1.5460914373397827, + "logps/chosen": -150.3516082763672, + "logps/rejected": -354.9713439941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.331852436065674, + "rewards/margins": 16.30389976501465, + "rewards/rejected": -23.635751724243164, + "step": 4097 + }, + { + "epoch": 6.58, + "learning_rate": 1.495243757431629e-07, + "logits/chosen": -1.6088656187057495, + "logits/rejected": -1.556983232498169, + "logps/chosen": -138.19561767578125, + "logps/rejected": -232.7463836669922, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.46855354309082, + "rewards/margins": 9.872434616088867, + "rewards/rejected": -16.340988159179688, + "step": 4098 + }, + { + "epoch": 6.58, + "learning_rate": 1.4942528735632184e-07, + "logits/chosen": -1.49601411819458, + "logits/rejected": -1.5659170150756836, + "logps/chosen": -168.0802459716797, + "logps/rejected": -371.47625732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.519537925720215, + "rewards/margins": 17.123958587646484, + "rewards/rejected": -24.643497467041016, + "step": 4099 + }, + { + "epoch": 6.58, + "learning_rate": 1.4932619896948077e-07, + "logits/chosen": -1.5118989944458008, + "logits/rejected": -1.6016643047332764, + "logps/chosen": -153.66831970214844, + "logps/rejected": -301.02783203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.66386604309082, + "rewards/margins": 12.948326110839844, + "rewards/rejected": -19.612192153930664, + "step": 4100 + }, + { + "epoch": 6.58, + "learning_rate": 1.492271105826397e-07, + "logits/chosen": -1.5455269813537598, + "logits/rejected": -1.5489541292190552, + "logps/chosen": -155.40951538085938, + "logps/rejected": -368.84222412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.031145095825195, + "rewards/margins": 19.048681259155273, + "rewards/rejected": -27.07982635498047, + "step": 4101 + }, + { + "epoch": 6.58, + "learning_rate": 1.4912802219579864e-07, + "logits/chosen": -1.5789958238601685, + "logits/rejected": -1.489863395690918, + "logps/chosen": -150.97787475585938, + "logps/rejected": -268.7757568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.071887493133545, + "rewards/margins": 11.74664306640625, + "rewards/rejected": -18.818531036376953, + "step": 4102 + }, + { + "epoch": 6.59, + "learning_rate": 1.490289338089576e-07, + "logits/chosen": -1.4339970350265503, + "logits/rejected": -1.5039293766021729, + "logps/chosen": -168.73056030273438, + "logps/rejected": -342.98095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.63376235961914, + "rewards/margins": 15.128793716430664, + "rewards/rejected": -23.762554168701172, + "step": 4103 + }, + { + "epoch": 6.59, + "learning_rate": 1.4892984542211653e-07, + "logits/chosen": -1.4057669639587402, + "logits/rejected": -1.3966867923736572, + "logps/chosen": -149.01832580566406, + "logps/rejected": -310.9473876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.307614326477051, + "rewards/margins": 14.647039413452148, + "rewards/rejected": -21.954654693603516, + "step": 4104 + }, + { + "epoch": 6.59, + "learning_rate": 1.4883075703527544e-07, + "logits/chosen": -1.5777184963226318, + "logits/rejected": -1.578404188156128, + "logps/chosen": -150.27523803710938, + "logps/rejected": -271.3721618652344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.108552932739258, + "rewards/margins": 13.089954376220703, + "rewards/rejected": -18.19850730895996, + "step": 4105 + }, + { + "epoch": 6.59, + "learning_rate": 1.487316686484344e-07, + "logits/chosen": -1.4395304918289185, + "logits/rejected": -1.4709645509719849, + "logps/chosen": -184.512939453125, + "logps/rejected": -345.3472900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.956774711608887, + "rewards/margins": 15.761384963989258, + "rewards/rejected": -23.71816062927246, + "step": 4106 + }, + { + "epoch": 6.59, + "learning_rate": 1.4863258026159333e-07, + "logits/chosen": -1.5246232748031616, + "logits/rejected": -1.5974068641662598, + "logps/chosen": -172.21853637695312, + "logps/rejected": -309.82281494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.148319244384766, + "rewards/margins": 12.023696899414062, + "rewards/rejected": -20.172016143798828, + "step": 4107 + }, + { + "epoch": 6.59, + "learning_rate": 1.485334918747523e-07, + "logits/chosen": -1.4852598905563354, + "logits/rejected": -1.509317398071289, + "logps/chosen": -129.41629028320312, + "logps/rejected": -277.6058044433594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9687604904174805, + "rewards/margins": 13.778006553649902, + "rewards/rejected": -19.746767044067383, + "step": 4108 + }, + { + "epoch": 6.6, + "learning_rate": 1.484344034879112e-07, + "logits/chosen": -1.654557466506958, + "logits/rejected": -1.4429893493652344, + "logps/chosen": -187.88986206054688, + "logps/rejected": -286.07305908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8853440284729, + "rewards/margins": 12.055037498474121, + "rewards/rejected": -19.94038200378418, + "step": 4109 + }, + { + "epoch": 6.6, + "learning_rate": 1.4833531510107013e-07, + "logits/chosen": -1.4232083559036255, + "logits/rejected": -1.4196951389312744, + "logps/chosen": -146.98289489746094, + "logps/rejected": -317.016845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.373187065124512, + "rewards/margins": 15.869359016418457, + "rewards/rejected": -22.24254608154297, + "step": 4110 + }, + { + "epoch": 6.6, + "learning_rate": 1.482362267142291e-07, + "logits/chosen": -1.5808310508728027, + "logits/rejected": -1.611970067024231, + "logps/chosen": -165.60556030273438, + "logps/rejected": -296.2220764160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.541987419128418, + "rewards/margins": 12.11770248413086, + "rewards/rejected": -21.659690856933594, + "step": 4111 + }, + { + "epoch": 6.6, + "learning_rate": 1.4813713832738802e-07, + "logits/chosen": -1.382578730583191, + "logits/rejected": -1.4101051092147827, + "logps/chosen": -147.51280212402344, + "logps/rejected": -266.1624755859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.309831619262695, + "rewards/margins": 11.702223777770996, + "rewards/rejected": -18.012054443359375, + "step": 4112 + }, + { + "epoch": 6.6, + "learning_rate": 1.4803804994054698e-07, + "logits/chosen": -1.5783956050872803, + "logits/rejected": -1.5444945096969604, + "logps/chosen": -158.3048095703125, + "logps/rejected": -318.23974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.591848373413086, + "rewards/margins": 14.775527954101562, + "rewards/rejected": -22.36737632751465, + "step": 4113 + }, + { + "epoch": 6.6, + "learning_rate": 1.479389615537059e-07, + "logits/chosen": -1.3988580703735352, + "logits/rejected": -1.3380420207977295, + "logps/chosen": -152.75042724609375, + "logps/rejected": -267.7724304199219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2040300369262695, + "rewards/margins": 12.473662376403809, + "rewards/rejected": -18.677692413330078, + "step": 4114 + }, + { + "epoch": 6.61, + "learning_rate": 1.4783987316686482e-07, + "logits/chosen": -1.3291654586791992, + "logits/rejected": -1.3710399866104126, + "logps/chosen": -142.30471801757812, + "logps/rejected": -276.33740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.024691581726074, + "rewards/margins": 12.783974647521973, + "rewards/rejected": -19.808666229248047, + "step": 4115 + }, + { + "epoch": 6.61, + "learning_rate": 1.4774078478002378e-07, + "logits/chosen": -1.3804129362106323, + "logits/rejected": -1.3978275060653687, + "logps/chosen": -162.5071563720703, + "logps/rejected": -303.33172607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.255181312561035, + "rewards/margins": 15.716211318969727, + "rewards/rejected": -23.971393585205078, + "step": 4116 + }, + { + "epoch": 6.61, + "learning_rate": 1.4764169639318272e-07, + "logits/chosen": -1.419332504272461, + "logits/rejected": -1.376801609992981, + "logps/chosen": -190.6568603515625, + "logps/rejected": -288.55035400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.518251419067383, + "rewards/margins": 12.45156478881836, + "rewards/rejected": -20.96981430053711, + "step": 4117 + }, + { + "epoch": 6.61, + "learning_rate": 1.4754260800634165e-07, + "logits/chosen": -1.331827163696289, + "logits/rejected": -1.3622570037841797, + "logps/chosen": -158.91969299316406, + "logps/rejected": -313.927490234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.457273006439209, + "rewards/margins": 14.844523429870605, + "rewards/rejected": -22.30179786682129, + "step": 4118 + }, + { + "epoch": 6.61, + "learning_rate": 1.4744351961950058e-07, + "logits/chosen": -1.5200949907302856, + "logits/rejected": -1.5151524543762207, + "logps/chosen": -166.9207763671875, + "logps/rejected": -307.7278747558594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.971203804016113, + "rewards/margins": 12.824392318725586, + "rewards/rejected": -21.795597076416016, + "step": 4119 + }, + { + "epoch": 6.61, + "learning_rate": 1.4734443123265952e-07, + "logits/chosen": -1.5693577527999878, + "logits/rejected": -1.6414839029312134, + "logps/chosen": -148.14251708984375, + "logps/rejected": -297.39166259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.220607280731201, + "rewards/margins": 14.073882102966309, + "rewards/rejected": -20.29448890686035, + "step": 4120 + }, + { + "epoch": 6.61, + "learning_rate": 1.4724534284581848e-07, + "logits/chosen": -1.5770797729492188, + "logits/rejected": -1.7559382915496826, + "logps/chosen": -108.04955291748047, + "logps/rejected": -325.7701416015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.621438980102539, + "rewards/margins": 17.024879455566406, + "rewards/rejected": -21.646320343017578, + "step": 4121 + }, + { + "epoch": 6.62, + "learning_rate": 1.471462544589774e-07, + "logits/chosen": -1.4643566608428955, + "logits/rejected": -1.6133437156677246, + "logps/chosen": -144.56851196289062, + "logps/rejected": -264.08685302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.319293022155762, + "rewards/margins": 10.51630973815918, + "rewards/rejected": -17.835601806640625, + "step": 4122 + }, + { + "epoch": 6.62, + "learning_rate": 1.4704716607213634e-07, + "logits/chosen": -1.3060307502746582, + "logits/rejected": -1.3858160972595215, + "logps/chosen": -98.84954833984375, + "logps/rejected": -248.50494384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.014519691467285, + "rewards/margins": 11.676782608032227, + "rewards/rejected": -16.691303253173828, + "step": 4123 + }, + { + "epoch": 6.62, + "learning_rate": 1.4694807768529528e-07, + "logits/chosen": -1.4652916193008423, + "logits/rejected": -1.6324361562728882, + "logps/chosen": -135.28973388671875, + "logps/rejected": -299.8081359863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020556449890137, + "rewards/margins": 13.206077575683594, + "rewards/rejected": -19.226633071899414, + "step": 4124 + }, + { + "epoch": 6.62, + "learning_rate": 1.468489892984542e-07, + "logits/chosen": -1.4585589170455933, + "logits/rejected": -1.4125738143920898, + "logps/chosen": -181.2947540283203, + "logps/rejected": -271.39752197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.106605529785156, + "rewards/margins": 11.24235725402832, + "rewards/rejected": -18.348962783813477, + "step": 4125 + }, + { + "epoch": 6.62, + "learning_rate": 1.4674990091161314e-07, + "logits/chosen": -1.4602010250091553, + "logits/rejected": -1.5112550258636475, + "logps/chosen": -167.30206298828125, + "logps/rejected": -303.95208740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.99468994140625, + "rewards/margins": 12.562808990478516, + "rewards/rejected": -21.557498931884766, + "step": 4126 + }, + { + "epoch": 6.62, + "learning_rate": 1.466508125247721e-07, + "logits/chosen": -1.4752020835876465, + "logits/rejected": -1.4603357315063477, + "logps/chosen": -166.50213623046875, + "logps/rejected": -265.3182373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.309196472167969, + "rewards/margins": 10.920965194702148, + "rewards/rejected": -19.230159759521484, + "step": 4127 + }, + { + "epoch": 6.63, + "learning_rate": 1.46551724137931e-07, + "logits/chosen": -1.3355724811553955, + "logits/rejected": -1.4242204427719116, + "logps/chosen": -181.42282104492188, + "logps/rejected": -313.04248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.133359909057617, + "rewards/margins": 11.921483993530273, + "rewards/rejected": -21.05484390258789, + "step": 4128 + }, + { + "epoch": 6.63, + "learning_rate": 1.4645263575108997e-07, + "logits/chosen": -1.6381499767303467, + "logits/rejected": -1.5911481380462646, + "logps/chosen": -126.77091979980469, + "logps/rejected": -274.7130126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.499006271362305, + "rewards/margins": 14.977409362792969, + "rewards/rejected": -20.47641372680664, + "step": 4129 + }, + { + "epoch": 6.63, + "learning_rate": 1.463535473642489e-07, + "logits/chosen": -1.4138033390045166, + "logits/rejected": -1.3972196578979492, + "logps/chosen": -146.31008911132812, + "logps/rejected": -288.8357849121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.472864627838135, + "rewards/margins": 12.610782623291016, + "rewards/rejected": -19.083646774291992, + "step": 4130 + }, + { + "epoch": 6.63, + "learning_rate": 1.4625445897740784e-07, + "logits/chosen": -1.5702593326568604, + "logits/rejected": -1.590135097503662, + "logps/chosen": -179.53326416015625, + "logps/rejected": -298.007568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.500025749206543, + "rewards/margins": 12.267621994018555, + "rewards/rejected": -20.767650604248047, + "step": 4131 + }, + { + "epoch": 6.63, + "learning_rate": 1.461553705905668e-07, + "logits/chosen": -1.5654537677764893, + "logits/rejected": -1.4965026378631592, + "logps/chosen": -148.61912536621094, + "logps/rejected": -263.8389892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.723934650421143, + "rewards/margins": 11.422367095947266, + "rewards/rejected": -18.14630126953125, + "step": 4132 + }, + { + "epoch": 6.63, + "learning_rate": 1.460562822037257e-07, + "logits/chosen": -1.454143762588501, + "logits/rejected": -1.3970427513122559, + "logps/chosen": -205.63394165039062, + "logps/rejected": -342.7855224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.57720947265625, + "rewards/margins": 14.704434394836426, + "rewards/rejected": -25.28164291381836, + "step": 4133 + }, + { + "epoch": 6.64, + "learning_rate": 1.4595719381688466e-07, + "logits/chosen": -1.4308161735534668, + "logits/rejected": -1.4358233213424683, + "logps/chosen": -133.0418243408203, + "logps/rejected": -281.9250793457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.332579612731934, + "rewards/margins": 13.451087951660156, + "rewards/rejected": -19.783666610717773, + "step": 4134 + }, + { + "epoch": 6.64, + "learning_rate": 1.458581054300436e-07, + "logits/chosen": -1.460931658744812, + "logits/rejected": -1.3953906297683716, + "logps/chosen": -192.30526733398438, + "logps/rejected": -313.3667907714844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.678272247314453, + "rewards/margins": 13.712261199951172, + "rewards/rejected": -22.390533447265625, + "step": 4135 + }, + { + "epoch": 6.64, + "learning_rate": 1.4575901704320253e-07, + "logits/chosen": -1.280988097190857, + "logits/rejected": -1.250014066696167, + "logps/chosen": -172.99005126953125, + "logps/rejected": -299.4040832519531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.70810317993164, + "rewards/margins": 14.445243835449219, + "rewards/rejected": -23.153348922729492, + "step": 4136 + }, + { + "epoch": 6.64, + "learning_rate": 1.456599286563615e-07, + "logits/chosen": -1.5811346769332886, + "logits/rejected": -1.5825557708740234, + "logps/chosen": -171.8234405517578, + "logps/rejected": -286.1657409667969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.561553001403809, + "rewards/margins": 12.74410629272461, + "rewards/rejected": -19.305660247802734, + "step": 4137 + }, + { + "epoch": 6.64, + "learning_rate": 1.455608402695204e-07, + "logits/chosen": -1.5619128942489624, + "logits/rejected": -1.517148733139038, + "logps/chosen": -94.3139419555664, + "logps/rejected": -207.98068237304688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6813533306121826, + "rewards/margins": 12.425191879272461, + "rewards/rejected": -15.106544494628906, + "step": 4138 + }, + { + "epoch": 6.64, + "learning_rate": 1.4546175188267933e-07, + "logits/chosen": -1.5387370586395264, + "logits/rejected": -1.416195034980774, + "logps/chosen": -192.57049560546875, + "logps/rejected": -277.26348876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.053545951843262, + "rewards/margins": 11.418782234191895, + "rewards/rejected": -18.472328186035156, + "step": 4139 + }, + { + "epoch": 6.65, + "learning_rate": 1.453626634958383e-07, + "logits/chosen": -1.4890508651733398, + "logits/rejected": -1.486991286277771, + "logps/chosen": -104.88322448730469, + "logps/rejected": -262.5455322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8885533809661865, + "rewards/margins": 15.526912689208984, + "rewards/rejected": -19.41546630859375, + "step": 4140 + }, + { + "epoch": 6.65, + "learning_rate": 1.4526357510899722e-07, + "logits/chosen": -1.6078287363052368, + "logits/rejected": -1.6309047937393188, + "logps/chosen": -118.92379760742188, + "logps/rejected": -261.51959228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7476558685302734, + "rewards/margins": 14.800752639770508, + "rewards/rejected": -18.54840850830078, + "step": 4141 + }, + { + "epoch": 6.65, + "learning_rate": 1.4516448672215616e-07, + "logits/chosen": -1.4068337678909302, + "logits/rejected": -1.4641025066375732, + "logps/chosen": -141.02725219726562, + "logps/rejected": -280.63897705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.012684345245361, + "rewards/margins": 12.930212020874023, + "rewards/rejected": -18.94289779663086, + "step": 4142 + }, + { + "epoch": 6.65, + "learning_rate": 1.450653983353151e-07, + "logits/chosen": -1.5320560932159424, + "logits/rejected": -1.583310604095459, + "logps/chosen": -135.03936767578125, + "logps/rejected": -280.48828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.52979040145874, + "rewards/margins": 14.588117599487305, + "rewards/rejected": -20.117908477783203, + "step": 4143 + }, + { + "epoch": 6.65, + "learning_rate": 1.4496630994847402e-07, + "logits/chosen": -1.513519287109375, + "logits/rejected": -1.4528483152389526, + "logps/chosen": -211.60166931152344, + "logps/rejected": -330.83905029296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.44937515258789, + "rewards/margins": 13.099647521972656, + "rewards/rejected": -22.549022674560547, + "step": 4144 + }, + { + "epoch": 6.65, + "learning_rate": 1.4486722156163298e-07, + "logits/chosen": -1.5248932838439941, + "logits/rejected": -1.5117985010147095, + "logps/chosen": -163.4835968017578, + "logps/rejected": -303.4458312988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.524850845336914, + "rewards/margins": 13.669477462768555, + "rewards/rejected": -22.19432830810547, + "step": 4145 + }, + { + "epoch": 6.65, + "learning_rate": 1.4476813317479191e-07, + "logits/chosen": -1.3018198013305664, + "logits/rejected": -1.2976622581481934, + "logps/chosen": -190.58021545410156, + "logps/rejected": -329.042236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.94289779663086, + "rewards/margins": 12.605820655822754, + "rewards/rejected": -23.548717498779297, + "step": 4146 + }, + { + "epoch": 6.66, + "learning_rate": 1.4466904478795082e-07, + "logits/chosen": -1.4294718503952026, + "logits/rejected": -1.4945263862609863, + "logps/chosen": -167.09747314453125, + "logps/rejected": -332.243896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.193537712097168, + "rewards/margins": 13.761260986328125, + "rewards/rejected": -21.954795837402344, + "step": 4147 + }, + { + "epoch": 6.66, + "learning_rate": 1.4456995640110978e-07, + "logits/chosen": -1.2887673377990723, + "logits/rejected": -1.3931349515914917, + "logps/chosen": -119.58167266845703, + "logps/rejected": -279.794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.480682373046875, + "rewards/margins": 14.623363494873047, + "rewards/rejected": -19.104045867919922, + "step": 4148 + }, + { + "epoch": 6.66, + "learning_rate": 1.4447086801426871e-07, + "logits/chosen": -1.628179907798767, + "logits/rejected": -1.6781283617019653, + "logps/chosen": -122.92680358886719, + "logps/rejected": -259.0716552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9362754821777344, + "rewards/margins": 13.408138275146484, + "rewards/rejected": -17.34441375732422, + "step": 4149 + }, + { + "epoch": 6.66, + "learning_rate": 1.4437177962742767e-07, + "logits/chosen": -1.4635975360870361, + "logits/rejected": -1.3925254344940186, + "logps/chosen": -206.1355438232422, + "logps/rejected": -343.0355529785156, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.737168312072754, + "rewards/margins": 15.306827545166016, + "rewards/rejected": -26.043996810913086, + "step": 4150 + }, + { + "epoch": 6.66, + "learning_rate": 1.442726912405866e-07, + "logits/chosen": -1.5318611860275269, + "logits/rejected": -1.5216847658157349, + "logps/chosen": -177.15521240234375, + "logps/rejected": -267.67376708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.672640800476074, + "rewards/margins": 9.44110107421875, + "rewards/rejected": -18.11374282836914, + "step": 4151 + }, + { + "epoch": 6.66, + "learning_rate": 1.4417360285374551e-07, + "logits/chosen": -1.4890094995498657, + "logits/rejected": -1.5268622636795044, + "logps/chosen": -187.4859619140625, + "logps/rejected": -339.1991271972656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.167099952697754, + "rewards/margins": 13.180408477783203, + "rewards/rejected": -23.34750747680664, + "step": 4152 + }, + { + "epoch": 6.67, + "learning_rate": 1.4407451446690447e-07, + "logits/chosen": -1.5438661575317383, + "logits/rejected": -1.5583442449569702, + "logps/chosen": -117.7491455078125, + "logps/rejected": -252.84800720214844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.544641494750977, + "rewards/margins": 13.375804901123047, + "rewards/rejected": -17.920446395874023, + "step": 4153 + }, + { + "epoch": 6.67, + "learning_rate": 1.439754260800634e-07, + "logits/chosen": -1.5880905389785767, + "logits/rejected": -1.5045795440673828, + "logps/chosen": -162.87344360351562, + "logps/rejected": -261.3790283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.113718032836914, + "rewards/margins": 11.206376075744629, + "rewards/rejected": -17.320093154907227, + "step": 4154 + }, + { + "epoch": 6.67, + "learning_rate": 1.4387633769322237e-07, + "logits/chosen": -1.6531448364257812, + "logits/rejected": -1.6122077703475952, + "logps/chosen": -144.02215576171875, + "logps/rejected": -290.75042724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.697759628295898, + "rewards/margins": 14.717501640319824, + "rewards/rejected": -20.41526222229004, + "step": 4155 + }, + { + "epoch": 6.67, + "learning_rate": 1.437772493063813e-07, + "logits/chosen": -1.3774375915527344, + "logits/rejected": -1.505420207977295, + "logps/chosen": -149.7346649169922, + "logps/rejected": -306.6710205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.263072490692139, + "rewards/margins": 14.269231796264648, + "rewards/rejected": -21.532304763793945, + "step": 4156 + }, + { + "epoch": 6.67, + "learning_rate": 1.436781609195402e-07, + "logits/chosen": -1.4557645320892334, + "logits/rejected": -1.4548401832580566, + "logps/chosen": -188.98538208007812, + "logps/rejected": -335.6321105957031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.310656547546387, + "rewards/margins": 14.009450912475586, + "rewards/rejected": -24.320106506347656, + "step": 4157 + }, + { + "epoch": 6.67, + "learning_rate": 1.4357907253269917e-07, + "logits/chosen": -1.4381978511810303, + "logits/rejected": -1.3880447149276733, + "logps/chosen": -154.64810180664062, + "logps/rejected": -276.7188415527344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.377471923828125, + "rewards/margins": 11.651687622070312, + "rewards/rejected": -19.029159545898438, + "step": 4158 + }, + { + "epoch": 6.68, + "learning_rate": 1.434799841458581e-07, + "logits/chosen": -1.67477285861969, + "logits/rejected": -1.7095811367034912, + "logps/chosen": -119.07569885253906, + "logps/rejected": -299.6015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.857611656188965, + "rewards/margins": 16.669761657714844, + "rewards/rejected": -21.527372360229492, + "step": 4159 + }, + { + "epoch": 6.68, + "learning_rate": 1.4338089575901703e-07, + "logits/chosen": -1.6132476329803467, + "logits/rejected": -1.6084840297698975, + "logps/chosen": -158.45413208007812, + "logps/rejected": -317.97760009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.818764686584473, + "rewards/margins": 15.344642639160156, + "rewards/rejected": -23.163410186767578, + "step": 4160 + }, + { + "epoch": 6.68, + "learning_rate": 1.4328180737217597e-07, + "logits/chosen": -1.3431742191314697, + "logits/rejected": -1.4128085374832153, + "logps/chosen": -168.3163604736328, + "logps/rejected": -318.6066589355469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.184720039367676, + "rewards/margins": 14.864900588989258, + "rewards/rejected": -22.049619674682617, + "step": 4161 + }, + { + "epoch": 6.68, + "learning_rate": 1.431827189853349e-07, + "logits/chosen": -1.73689866065979, + "logits/rejected": -1.636826992034912, + "logps/chosen": -138.83328247070312, + "logps/rejected": -261.7895202636719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.030681133270264, + "rewards/margins": 14.110193252563477, + "rewards/rejected": -19.1408748626709, + "step": 4162 + }, + { + "epoch": 6.68, + "learning_rate": 1.4308363059849386e-07, + "logits/chosen": -1.4595438241958618, + "logits/rejected": -1.5064449310302734, + "logps/chosen": -159.46665954589844, + "logps/rejected": -298.0342102050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.28271198272705, + "rewards/margins": 12.710628509521484, + "rewards/rejected": -20.99334144592285, + "step": 4163 + }, + { + "epoch": 6.68, + "learning_rate": 1.429845422116528e-07, + "logits/chosen": -1.6640092134475708, + "logits/rejected": -1.6700853109359741, + "logps/chosen": -154.46194458007812, + "logps/rejected": -273.7508544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.393726825714111, + "rewards/margins": 12.04390811920166, + "rewards/rejected": -18.43763542175293, + "step": 4164 + }, + { + "epoch": 6.69, + "learning_rate": 1.4288545382481173e-07, + "logits/chosen": -1.547052264213562, + "logits/rejected": -1.5542913675308228, + "logps/chosen": -187.65841674804688, + "logps/rejected": -341.79925537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.650487899780273, + "rewards/margins": 15.992619514465332, + "rewards/rejected": -24.643108367919922, + "step": 4165 + }, + { + "epoch": 6.69, + "learning_rate": 1.4278636543797066e-07, + "logits/chosen": -1.3973982334136963, + "logits/rejected": -1.4237322807312012, + "logps/chosen": -160.01058959960938, + "logps/rejected": -298.11907958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.063535690307617, + "rewards/margins": 12.777791976928711, + "rewards/rejected": -19.841327667236328, + "step": 4166 + }, + { + "epoch": 6.69, + "learning_rate": 1.426872770511296e-07, + "logits/chosen": -1.5940396785736084, + "logits/rejected": -1.5663610696792603, + "logps/chosen": -144.07498168945312, + "logps/rejected": -296.02740478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173338890075684, + "rewards/margins": 14.756559371948242, + "rewards/rejected": -20.92989730834961, + "step": 4167 + }, + { + "epoch": 6.69, + "learning_rate": 1.4258818866428853e-07, + "logits/chosen": -1.42523193359375, + "logits/rejected": -1.3911864757537842, + "logps/chosen": -133.39654541015625, + "logps/rejected": -233.8380889892578, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8040876388549805, + "rewards/margins": 11.721803665161133, + "rewards/rejected": -17.525890350341797, + "step": 4168 + }, + { + "epoch": 6.69, + "learning_rate": 1.4248910027744749e-07, + "logits/chosen": -1.4825286865234375, + "logits/rejected": -1.350489854812622, + "logps/chosen": -195.60696411132812, + "logps/rejected": -265.55499267578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.542814254760742, + "rewards/margins": 11.295408248901367, + "rewards/rejected": -19.83822250366211, + "step": 4169 + }, + { + "epoch": 6.69, + "learning_rate": 1.4239001189060642e-07, + "logits/chosen": -1.6233148574829102, + "logits/rejected": -1.6303476095199585, + "logps/chosen": -145.54916381835938, + "logps/rejected": -308.0374450683594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.140379905700684, + "rewards/margins": 16.378568649291992, + "rewards/rejected": -21.51894760131836, + "step": 4170 + }, + { + "epoch": 6.7, + "learning_rate": 1.4229092350376535e-07, + "logits/chosen": -1.5391818284988403, + "logits/rejected": -1.5154540538787842, + "logps/chosen": -154.34364318847656, + "logps/rejected": -301.5252380371094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.073291778564453, + "rewards/margins": 14.543349266052246, + "rewards/rejected": -20.616641998291016, + "step": 4171 + }, + { + "epoch": 6.7, + "learning_rate": 1.4219183511692429e-07, + "logits/chosen": -1.488121747970581, + "logits/rejected": -1.400504469871521, + "logps/chosen": -142.19029235839844, + "logps/rejected": -311.00469970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.470901012420654, + "rewards/margins": 16.786785125732422, + "rewards/rejected": -23.2576847076416, + "step": 4172 + }, + { + "epoch": 6.7, + "learning_rate": 1.4209274673008322e-07, + "logits/chosen": -1.5550674200057983, + "logits/rejected": -1.5109450817108154, + "logps/chosen": -130.66900634765625, + "logps/rejected": -273.69476318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.430903434753418, + "rewards/margins": 15.731758117675781, + "rewards/rejected": -20.162660598754883, + "step": 4173 + }, + { + "epoch": 6.7, + "learning_rate": 1.4199365834324218e-07, + "logits/chosen": -1.4124763011932373, + "logits/rejected": -1.4424049854278564, + "logps/chosen": -142.48397827148438, + "logps/rejected": -247.60086059570312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.08627986907959, + "rewards/margins": 12.552963256835938, + "rewards/rejected": -17.639244079589844, + "step": 4174 + }, + { + "epoch": 6.7, + "learning_rate": 1.418945699564011e-07, + "logits/chosen": -1.4049806594848633, + "logits/rejected": -1.4243189096450806, + "logps/chosen": -188.0469207763672, + "logps/rejected": -330.2589111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.740144729614258, + "rewards/margins": 10.832058906555176, + "rewards/rejected": -21.57220458984375, + "step": 4175 + }, + { + "epoch": 6.7, + "learning_rate": 1.4179548156956002e-07, + "logits/chosen": -1.6565531492233276, + "logits/rejected": -1.7107784748077393, + "logps/chosen": -136.58639526367188, + "logps/rejected": -307.6034240722656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.976609706878662, + "rewards/margins": 17.073862075805664, + "rewards/rejected": -22.05047035217285, + "step": 4176 + }, + { + "epoch": 6.7, + "learning_rate": 1.4169639318271898e-07, + "logits/chosen": -1.566839575767517, + "logits/rejected": -1.6329126358032227, + "logps/chosen": -106.99298095703125, + "logps/rejected": -292.76776123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8332653045654297, + "rewards/margins": 18.02234649658203, + "rewards/rejected": -21.855613708496094, + "step": 4177 + }, + { + "epoch": 6.71, + "learning_rate": 1.415973047958779e-07, + "logits/chosen": -1.628218650817871, + "logits/rejected": -1.558458924293518, + "logps/chosen": -152.14390563964844, + "logps/rejected": -279.933837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.546247959136963, + "rewards/margins": 14.633050918579102, + "rewards/rejected": -20.179298400878906, + "step": 4178 + }, + { + "epoch": 6.71, + "learning_rate": 1.4149821640903687e-07, + "logits/chosen": -1.5350837707519531, + "logits/rejected": -1.5468717813491821, + "logps/chosen": -203.40269470214844, + "logps/rejected": -345.151123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.929265975952148, + "rewards/margins": 14.341625213623047, + "rewards/rejected": -23.270893096923828, + "step": 4179 + }, + { + "epoch": 6.71, + "learning_rate": 1.4139912802219578e-07, + "logits/chosen": -1.3904941082000732, + "logits/rejected": -1.4066163301467896, + "logps/chosen": -118.10150146484375, + "logps/rejected": -252.86570739746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.503152370452881, + "rewards/margins": 12.195387840270996, + "rewards/rejected": -17.69853973388672, + "step": 4180 + }, + { + "epoch": 6.71, + "learning_rate": 1.413000396353547e-07, + "logits/chosen": -1.712864637374878, + "logits/rejected": -1.6083701848983765, + "logps/chosen": -178.15542602539062, + "logps/rejected": -300.3887939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.919681549072266, + "rewards/margins": 14.121657371520996, + "rewards/rejected": -22.041339874267578, + "step": 4181 + }, + { + "epoch": 6.71, + "learning_rate": 1.4120095124851367e-07, + "logits/chosen": -1.4576765298843384, + "logits/rejected": -1.5443769693374634, + "logps/chosen": -159.44192504882812, + "logps/rejected": -333.700439453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9225640296936035, + "rewards/margins": 17.705093383789062, + "rewards/rejected": -23.627656936645508, + "step": 4182 + }, + { + "epoch": 6.71, + "learning_rate": 1.411018628616726e-07, + "logits/chosen": -1.525565266609192, + "logits/rejected": -1.4863661527633667, + "logps/chosen": -176.59548950195312, + "logps/rejected": -347.2818908691406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.023497581481934, + "rewards/margins": 15.380790710449219, + "rewards/rejected": -24.40428924560547, + "step": 4183 + }, + { + "epoch": 6.72, + "learning_rate": 1.4100277447483157e-07, + "logits/chosen": -1.5409024953842163, + "logits/rejected": -1.554563045501709, + "logps/chosen": -144.6818084716797, + "logps/rejected": -256.3326416015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2471771240234375, + "rewards/margins": 13.475988388061523, + "rewards/rejected": -18.72316551208496, + "step": 4184 + }, + { + "epoch": 6.72, + "learning_rate": 1.4090368608799047e-07, + "logits/chosen": -1.5026435852050781, + "logits/rejected": -1.5118991136550903, + "logps/chosen": -161.51602172851562, + "logps/rejected": -296.0475769042969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.294412612915039, + "rewards/margins": 13.584609985351562, + "rewards/rejected": -21.8790225982666, + "step": 4185 + }, + { + "epoch": 6.72, + "learning_rate": 1.408045977011494e-07, + "logits/chosen": -1.3960626125335693, + "logits/rejected": -1.485150694847107, + "logps/chosen": -182.22268676757812, + "logps/rejected": -351.0351867675781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.836528778076172, + "rewards/margins": 16.27189826965332, + "rewards/rejected": -26.10842514038086, + "step": 4186 + }, + { + "epoch": 6.72, + "learning_rate": 1.4070550931430837e-07, + "logits/chosen": -1.7648355960845947, + "logits/rejected": -1.7423571348190308, + "logps/chosen": -99.65127563476562, + "logps/rejected": -260.027587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.537397861480713, + "rewards/margins": 15.914102554321289, + "rewards/rejected": -17.451499938964844, + "step": 4187 + }, + { + "epoch": 6.72, + "learning_rate": 1.406064209274673e-07, + "logits/chosen": -1.6282294988632202, + "logits/rejected": -1.5688549280166626, + "logps/chosen": -147.1982421875, + "logps/rejected": -273.07373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.969296455383301, + "rewards/margins": 12.682306289672852, + "rewards/rejected": -18.65160369873047, + "step": 4188 + }, + { + "epoch": 6.72, + "learning_rate": 1.4050733254062623e-07, + "logits/chosen": -1.6054474115371704, + "logits/rejected": -1.4427701234817505, + "logps/chosen": -175.18902587890625, + "logps/rejected": -264.67578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.52860689163208, + "rewards/margins": 11.988675117492676, + "rewards/rejected": -18.51728057861328, + "step": 4189 + }, + { + "epoch": 6.73, + "learning_rate": 1.4040824415378517e-07, + "logits/chosen": -1.4576466083526611, + "logits/rejected": -1.4606050252914429, + "logps/chosen": -96.95005798339844, + "logps/rejected": -286.34759521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3959801197052, + "rewards/margins": 18.908754348754883, + "rewards/rejected": -22.30473518371582, + "step": 4190 + }, + { + "epoch": 6.73, + "learning_rate": 1.403091557669441e-07, + "logits/chosen": -1.4272748231887817, + "logits/rejected": -1.3189821243286133, + "logps/chosen": -179.88076782226562, + "logps/rejected": -301.047607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.990093231201172, + "rewards/margins": 12.916566848754883, + "rewards/rejected": -21.906660079956055, + "step": 4191 + }, + { + "epoch": 6.73, + "learning_rate": 1.4021006738010306e-07, + "logits/chosen": -1.3107473850250244, + "logits/rejected": -1.4377899169921875, + "logps/chosen": -126.6053466796875, + "logps/rejected": -317.40521240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.537108898162842, + "rewards/margins": 16.6176700592041, + "rewards/rejected": -22.1547794342041, + "step": 4192 + }, + { + "epoch": 6.73, + "learning_rate": 1.40110978993262e-07, + "logits/chosen": -1.4734348058700562, + "logits/rejected": -1.4387104511260986, + "logps/chosen": -133.36573791503906, + "logps/rejected": -292.3707275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8218231201171875, + "rewards/margins": 14.826764106750488, + "rewards/rejected": -20.648588180541992, + "step": 4193 + }, + { + "epoch": 6.73, + "learning_rate": 1.4001189060642092e-07, + "logits/chosen": -1.344437599182129, + "logits/rejected": -1.2959506511688232, + "logps/chosen": -141.65847778320312, + "logps/rejected": -263.14752197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.050051689147949, + "rewards/margins": 12.05284595489502, + "rewards/rejected": -18.10289764404297, + "step": 4194 + }, + { + "epoch": 6.73, + "learning_rate": 1.3991280221957986e-07, + "logits/chosen": -1.5837688446044922, + "logits/rejected": -1.6362450122833252, + "logps/chosen": -164.06332397460938, + "logps/rejected": -358.36041259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.501842498779297, + "rewards/margins": 17.540061950683594, + "rewards/rejected": -24.04190444946289, + "step": 4195 + }, + { + "epoch": 6.74, + "learning_rate": 1.398137138327388e-07, + "logits/chosen": -1.6112642288208008, + "logits/rejected": -1.573140025138855, + "logps/chosen": -164.20797729492188, + "logps/rejected": -275.2939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3556671142578125, + "rewards/margins": 12.353065490722656, + "rewards/rejected": -19.70873260498047, + "step": 4196 + }, + { + "epoch": 6.74, + "learning_rate": 1.3971462544589775e-07, + "logits/chosen": -1.4641635417938232, + "logits/rejected": -1.4328367710113525, + "logps/chosen": -159.75201416015625, + "logps/rejected": -280.0947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2330827713012695, + "rewards/margins": 13.1507568359375, + "rewards/rejected": -20.383840560913086, + "step": 4197 + }, + { + "epoch": 6.74, + "learning_rate": 1.3961553705905668e-07, + "logits/chosen": -1.5324251651763916, + "logits/rejected": -1.5155521631240845, + "logps/chosen": -109.0118179321289, + "logps/rejected": -241.6544647216797, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1436262130737305, + "rewards/margins": 13.517034530639648, + "rewards/rejected": -17.660659790039062, + "step": 4198 + }, + { + "epoch": 6.74, + "learning_rate": 1.395164486722156e-07, + "logits/chosen": -1.5373634099960327, + "logits/rejected": -1.570500135421753, + "logps/chosen": -195.3729248046875, + "logps/rejected": -345.05096435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.909837245941162, + "rewards/margins": 14.379840850830078, + "rewards/rejected": -22.289676666259766, + "step": 4199 + }, + { + "epoch": 6.74, + "learning_rate": 1.3941736028537455e-07, + "logits/chosen": -1.4142670631408691, + "logits/rejected": -1.3718929290771484, + "logps/chosen": -176.2945556640625, + "logps/rejected": -275.48077392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.357383728027344, + "rewards/margins": 11.141830444335938, + "rewards/rejected": -19.49921417236328, + "step": 4200 + }, + { + "epoch": 6.74, + "learning_rate": 1.3931827189853348e-07, + "logits/chosen": -1.416426181793213, + "logits/rejected": -1.3661353588104248, + "logps/chosen": -180.42660522460938, + "logps/rejected": -301.6062927246094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.546095848083496, + "rewards/margins": 12.149117469787598, + "rewards/rejected": -20.695213317871094, + "step": 4201 + }, + { + "epoch": 6.74, + "learning_rate": 1.3921918351169242e-07, + "logits/chosen": -1.502349853515625, + "logits/rejected": -1.5710465908050537, + "logps/chosen": -165.03587341308594, + "logps/rejected": -269.8828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.48328971862793, + "rewards/margins": 12.111289978027344, + "rewards/rejected": -19.594579696655273, + "step": 4202 + }, + { + "epoch": 6.75, + "learning_rate": 1.3912009512485138e-07, + "logits/chosen": -1.471245288848877, + "logits/rejected": -1.5619478225708008, + "logps/chosen": -199.80055236816406, + "logps/rejected": -381.7022399902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.83605670928955, + "rewards/margins": 15.710522651672363, + "rewards/rejected": -25.54657745361328, + "step": 4203 + }, + { + "epoch": 6.75, + "learning_rate": 1.3902100673801028e-07, + "logits/chosen": -1.6255221366882324, + "logits/rejected": -1.5882902145385742, + "logps/chosen": -114.98757934570312, + "logps/rejected": -282.79827880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2930526733398438, + "rewards/margins": 17.85187530517578, + "rewards/rejected": -21.144927978515625, + "step": 4204 + }, + { + "epoch": 6.75, + "learning_rate": 1.3892191835116924e-07, + "logits/chosen": -1.355288028717041, + "logits/rejected": -1.3818347454071045, + "logps/chosen": -84.55691528320312, + "logps/rejected": -225.534423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8436410427093506, + "rewards/margins": 12.789389610290527, + "rewards/rejected": -15.633030891418457, + "step": 4205 + }, + { + "epoch": 6.75, + "learning_rate": 1.3882282996432818e-07, + "logits/chosen": -1.4046738147735596, + "logits/rejected": -1.3920999765396118, + "logps/chosen": -169.51559448242188, + "logps/rejected": -317.60003662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.968656539916992, + "rewards/margins": 14.347142219543457, + "rewards/rejected": -23.315799713134766, + "step": 4206 + }, + { + "epoch": 6.75, + "learning_rate": 1.387237415774871e-07, + "logits/chosen": -1.3879774808883667, + "logits/rejected": -1.3791463375091553, + "logps/chosen": -185.00335693359375, + "logps/rejected": -296.15008544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.037274360656738, + "rewards/margins": 11.37918472290039, + "rewards/rejected": -20.416458129882812, + "step": 4207 + }, + { + "epoch": 6.75, + "learning_rate": 1.3862465319064607e-07, + "logits/chosen": -1.5228712558746338, + "logits/rejected": -1.5581618547439575, + "logps/chosen": -145.5684356689453, + "logps/rejected": -322.1065673828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948997497558594, + "rewards/margins": 15.489208221435547, + "rewards/rejected": -21.43820571899414, + "step": 4208 + }, + { + "epoch": 6.76, + "learning_rate": 1.3852556480380498e-07, + "logits/chosen": -1.4421032667160034, + "logits/rejected": -1.477669596672058, + "logps/chosen": -199.07562255859375, + "logps/rejected": -302.7087097167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.027076721191406, + "rewards/margins": 9.495296478271484, + "rewards/rejected": -20.52237319946289, + "step": 4209 + }, + { + "epoch": 6.76, + "learning_rate": 1.384264764169639e-07, + "logits/chosen": -1.542765498161316, + "logits/rejected": -1.5292882919311523, + "logps/chosen": -128.90863037109375, + "logps/rejected": -249.88949584960938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.555241823196411, + "rewards/margins": 13.459095001220703, + "rewards/rejected": -16.01433563232422, + "step": 4210 + }, + { + "epoch": 6.76, + "learning_rate": 1.3832738803012287e-07, + "logits/chosen": -1.3053468465805054, + "logits/rejected": -1.4114680290222168, + "logps/chosen": -185.9967498779297, + "logps/rejected": -381.86553955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.74640941619873, + "rewards/margins": 14.839594841003418, + "rewards/rejected": -23.58600425720215, + "step": 4211 + }, + { + "epoch": 6.76, + "learning_rate": 1.382282996432818e-07, + "logits/chosen": -1.5004873275756836, + "logits/rejected": -1.3799601793289185, + "logps/chosen": -159.48114013671875, + "logps/rejected": -239.1923370361328, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.310884475708008, + "rewards/margins": 12.190966606140137, + "rewards/rejected": -17.501850128173828, + "step": 4212 + }, + { + "epoch": 6.76, + "learning_rate": 1.3812921125644074e-07, + "logits/chosen": -1.6431342363357544, + "logits/rejected": -1.6274620294570923, + "logps/chosen": -152.58880615234375, + "logps/rejected": -253.568603515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.372913360595703, + "rewards/margins": 11.662158012390137, + "rewards/rejected": -18.035072326660156, + "step": 4213 + }, + { + "epoch": 6.76, + "learning_rate": 1.3803012286959967e-07, + "logits/chosen": -1.406298041343689, + "logits/rejected": -1.4026992321014404, + "logps/chosen": -152.84335327148438, + "logps/rejected": -295.94561767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.797030925750732, + "rewards/margins": 14.592645645141602, + "rewards/rejected": -21.389677047729492, + "step": 4214 + }, + { + "epoch": 6.77, + "learning_rate": 1.379310344827586e-07, + "logits/chosen": -1.3578693866729736, + "logits/rejected": -1.3330128192901611, + "logps/chosen": -178.56866455078125, + "logps/rejected": -332.51531982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.198840141296387, + "rewards/margins": 17.167245864868164, + "rewards/rejected": -25.366086959838867, + "step": 4215 + }, + { + "epoch": 6.77, + "learning_rate": 1.3783194609591756e-07, + "logits/chosen": -1.646689534187317, + "logits/rejected": -1.7635502815246582, + "logps/chosen": -121.24423217773438, + "logps/rejected": -271.32440185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.927326202392578, + "rewards/margins": 14.422897338867188, + "rewards/rejected": -19.350223541259766, + "step": 4216 + }, + { + "epoch": 6.77, + "learning_rate": 1.377328577090765e-07, + "logits/chosen": -1.3439942598342896, + "logits/rejected": -1.313960075378418, + "logps/chosen": -142.451416015625, + "logps/rejected": -256.6858215332031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.165729522705078, + "rewards/margins": 12.596446990966797, + "rewards/rejected": -18.762176513671875, + "step": 4217 + }, + { + "epoch": 6.77, + "learning_rate": 1.376337693222354e-07, + "logits/chosen": -1.4488309621810913, + "logits/rejected": -1.455206274986267, + "logps/chosen": -124.57482147216797, + "logps/rejected": -352.192138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6294145584106445, + "rewards/margins": 20.568099975585938, + "rewards/rejected": -26.1975154876709, + "step": 4218 + }, + { + "epoch": 6.77, + "learning_rate": 1.3753468093539436e-07, + "logits/chosen": -1.481250286102295, + "logits/rejected": -1.5213878154754639, + "logps/chosen": -92.03909301757812, + "logps/rejected": -273.85400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.907594919204712, + "rewards/margins": 17.696212768554688, + "rewards/rejected": -19.603809356689453, + "step": 4219 + }, + { + "epoch": 6.77, + "learning_rate": 1.374355925485533e-07, + "logits/chosen": -1.4872835874557495, + "logits/rejected": -1.4210989475250244, + "logps/chosen": -131.97666931152344, + "logps/rejected": -246.2215118408203, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.808626174926758, + "rewards/margins": 12.157038688659668, + "rewards/rejected": -17.965665817260742, + "step": 4220 + }, + { + "epoch": 6.78, + "learning_rate": 1.3733650416171226e-07, + "logits/chosen": -1.4906489849090576, + "logits/rejected": -1.5348397493362427, + "logps/chosen": -178.25982666015625, + "logps/rejected": -323.1091003417969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.397759914398193, + "rewards/margins": 14.381185531616211, + "rewards/rejected": -21.778945922851562, + "step": 4221 + }, + { + "epoch": 6.78, + "learning_rate": 1.372374157748712e-07, + "logits/chosen": -1.367074728012085, + "logits/rejected": -1.4018521308898926, + "logps/chosen": -159.77740478515625, + "logps/rejected": -316.6710205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5631303787231445, + "rewards/margins": 14.684538841247559, + "rewards/rejected": -22.247669219970703, + "step": 4222 + }, + { + "epoch": 6.78, + "learning_rate": 1.371383273880301e-07, + "logits/chosen": -1.2958322763442993, + "logits/rejected": -1.341019868850708, + "logps/chosen": -139.02597045898438, + "logps/rejected": -310.98291015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.969642639160156, + "rewards/margins": 14.775911331176758, + "rewards/rejected": -21.745555877685547, + "step": 4223 + }, + { + "epoch": 6.78, + "learning_rate": 1.3703923900118906e-07, + "logits/chosen": -1.4638042449951172, + "logits/rejected": -1.5520609617233276, + "logps/chosen": -174.69342041015625, + "logps/rejected": -304.9420166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.791754245758057, + "rewards/margins": 12.500545501708984, + "rewards/rejected": -19.292301177978516, + "step": 4224 + }, + { + "epoch": 6.78, + "learning_rate": 1.36940150614348e-07, + "logits/chosen": -1.3869901895523071, + "logits/rejected": -1.4309580326080322, + "logps/chosen": -160.62710571289062, + "logps/rejected": -301.4296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.455282211303711, + "rewards/margins": 12.66466999053955, + "rewards/rejected": -20.119953155517578, + "step": 4225 + }, + { + "epoch": 6.78, + "learning_rate": 1.3684106222750695e-07, + "logits/chosen": -1.607903242111206, + "logits/rejected": -1.6546087265014648, + "logps/chosen": -92.54154968261719, + "logps/rejected": -250.07321166992188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.969646692276001, + "rewards/margins": 14.390527725219727, + "rewards/rejected": -17.360172271728516, + "step": 4226 + }, + { + "epoch": 6.78, + "learning_rate": 1.3674197384066588e-07, + "logits/chosen": -1.4558486938476562, + "logits/rejected": -1.5134347677230835, + "logps/chosen": -147.99209594726562, + "logps/rejected": -322.4406433105469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.608140468597412, + "rewards/margins": 15.668767929077148, + "rewards/rejected": -20.27690887451172, + "step": 4227 + }, + { + "epoch": 6.79, + "learning_rate": 1.366428854538248e-07, + "logits/chosen": -1.452075481414795, + "logits/rejected": -1.4377785921096802, + "logps/chosen": -221.9173126220703, + "logps/rejected": -350.39691162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.757213592529297, + "rewards/margins": 11.450274467468262, + "rewards/rejected": -25.207489013671875, + "step": 4228 + }, + { + "epoch": 6.79, + "learning_rate": 1.3654379706698375e-07, + "logits/chosen": -1.5230522155761719, + "logits/rejected": -1.609006404876709, + "logps/chosen": -186.92562866210938, + "logps/rejected": -365.34649658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.43431282043457, + "rewards/margins": 16.729055404663086, + "rewards/rejected": -26.163368225097656, + "step": 4229 + }, + { + "epoch": 6.79, + "learning_rate": 1.3644470868014268e-07, + "logits/chosen": -1.4394872188568115, + "logits/rejected": -1.4731945991516113, + "logps/chosen": -126.47897338867188, + "logps/rejected": -329.49090576171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.604137897491455, + "rewards/margins": 18.451519012451172, + "rewards/rejected": -23.0556583404541, + "step": 4230 + }, + { + "epoch": 6.79, + "learning_rate": 1.3634562029330162e-07, + "logits/chosen": -1.4288427829742432, + "logits/rejected": -1.3802759647369385, + "logps/chosen": -125.47401428222656, + "logps/rejected": -231.2393798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3236517906188965, + "rewards/margins": 12.081584930419922, + "rewards/rejected": -16.405237197875977, + "step": 4231 + }, + { + "epoch": 6.79, + "learning_rate": 1.3624653190646055e-07, + "logits/chosen": -1.4611788988113403, + "logits/rejected": -1.5623860359191895, + "logps/chosen": -125.67829132080078, + "logps/rejected": -285.93658447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.990811824798584, + "rewards/margins": 15.615158081054688, + "rewards/rejected": -20.60597038269043, + "step": 4232 + }, + { + "epoch": 6.79, + "learning_rate": 1.3614744351961948e-07, + "logits/chosen": -1.3753681182861328, + "logits/rejected": -1.3948185443878174, + "logps/chosen": -190.4010772705078, + "logps/rejected": -320.72198486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.330679893493652, + "rewards/margins": 12.345712661743164, + "rewards/rejected": -22.676393508911133, + "step": 4233 + }, + { + "epoch": 6.8, + "learning_rate": 1.3604835513277844e-07, + "logits/chosen": -1.4113807678222656, + "logits/rejected": -1.339273452758789, + "logps/chosen": -203.28053283691406, + "logps/rejected": -315.11187744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.20056438446045, + "rewards/margins": 11.58791732788086, + "rewards/rejected": -21.788480758666992, + "step": 4234 + }, + { + "epoch": 6.8, + "learning_rate": 1.3594926674593738e-07, + "logits/chosen": -1.5670324563980103, + "logits/rejected": -1.6419540643692017, + "logps/chosen": -109.01220703125, + "logps/rejected": -258.7642822265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8818726539611816, + "rewards/margins": 12.2427978515625, + "rewards/rejected": -16.124670028686523, + "step": 4235 + }, + { + "epoch": 6.8, + "learning_rate": 1.358501783590963e-07, + "logits/chosen": -1.4653048515319824, + "logits/rejected": -1.5415207147598267, + "logps/chosen": -133.68951416015625, + "logps/rejected": -271.817138671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.913015365600586, + "rewards/margins": 12.05225944519043, + "rewards/rejected": -18.965274810791016, + "step": 4236 + }, + { + "epoch": 6.8, + "learning_rate": 1.3575108997225524e-07, + "logits/chosen": -1.3659536838531494, + "logits/rejected": -1.342930793762207, + "logps/chosen": -117.63931274414062, + "logps/rejected": -216.10165405273438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.422656536102295, + "rewards/margins": 11.153700828552246, + "rewards/rejected": -15.576356887817383, + "step": 4237 + }, + { + "epoch": 6.8, + "learning_rate": 1.3565200158541418e-07, + "logits/chosen": -1.392491102218628, + "logits/rejected": -1.4203555583953857, + "logps/chosen": -156.10675048828125, + "logps/rejected": -298.53973388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.859453201293945, + "rewards/margins": 13.579512596130371, + "rewards/rejected": -20.438966751098633, + "step": 4238 + }, + { + "epoch": 6.8, + "learning_rate": 1.355529131985731e-07, + "logits/chosen": -1.444943904876709, + "logits/rejected": -1.3559720516204834, + "logps/chosen": -185.278076171875, + "logps/rejected": -307.0498046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.274322509765625, + "rewards/margins": 13.988917350769043, + "rewards/rejected": -22.263240814208984, + "step": 4239 + }, + { + "epoch": 6.81, + "learning_rate": 1.3545382481173207e-07, + "logits/chosen": -1.5708236694335938, + "logits/rejected": -1.545567512512207, + "logps/chosen": -139.77146911621094, + "logps/rejected": -270.19512939453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.674892902374268, + "rewards/margins": 13.2164306640625, + "rewards/rejected": -18.89132308959961, + "step": 4240 + }, + { + "epoch": 6.81, + "learning_rate": 1.35354736424891e-07, + "logits/chosen": -1.382473349571228, + "logits/rejected": -1.5197217464447021, + "logps/chosen": -89.27389526367188, + "logps/rejected": -238.52078247070312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6011040210723877, + "rewards/margins": 13.426082611083984, + "rewards/rejected": -17.02718734741211, + "step": 4241 + }, + { + "epoch": 6.81, + "learning_rate": 1.3525564803804993e-07, + "logits/chosen": -1.5791114568710327, + "logits/rejected": -1.6141993999481201, + "logps/chosen": -192.59625244140625, + "logps/rejected": -286.80072021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.338299751281738, + "rewards/margins": 10.783578872680664, + "rewards/rejected": -20.121877670288086, + "step": 4242 + }, + { + "epoch": 6.81, + "learning_rate": 1.3515655965120887e-07, + "logits/chosen": -1.5115331411361694, + "logits/rejected": -1.51179039478302, + "logps/chosen": -171.96302795410156, + "logps/rejected": -334.5011291503906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.919258117675781, + "rewards/margins": 16.53603744506836, + "rewards/rejected": -24.45529556274414, + "step": 4243 + }, + { + "epoch": 6.81, + "learning_rate": 1.350574712643678e-07, + "logits/chosen": -1.4239180088043213, + "logits/rejected": -1.4461281299591064, + "logps/chosen": -102.62583923339844, + "logps/rejected": -223.0908966064453, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8368186950683594, + "rewards/margins": 12.051172256469727, + "rewards/rejected": -15.887989044189453, + "step": 4244 + }, + { + "epoch": 6.81, + "learning_rate": 1.3495838287752676e-07, + "logits/chosen": -1.4536802768707275, + "logits/rejected": -1.442291498184204, + "logps/chosen": -135.4903564453125, + "logps/rejected": -233.75701904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6245832443237305, + "rewards/margins": 10.887350082397461, + "rewards/rejected": -16.511934280395508, + "step": 4245 + }, + { + "epoch": 6.82, + "learning_rate": 1.348592944906857e-07, + "logits/chosen": -1.4449113607406616, + "logits/rejected": -1.4207689762115479, + "logps/chosen": -229.13827514648438, + "logps/rejected": -321.19879150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.323939323425293, + "rewards/margins": 12.243558883666992, + "rewards/rejected": -23.56749725341797, + "step": 4246 + }, + { + "epoch": 6.82, + "learning_rate": 1.3476020610384463e-07, + "logits/chosen": -1.307510256767273, + "logits/rejected": -1.3112220764160156, + "logps/chosen": -165.0018310546875, + "logps/rejected": -280.0767517089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.487719535827637, + "rewards/margins": 13.49551773071289, + "rewards/rejected": -20.983238220214844, + "step": 4247 + }, + { + "epoch": 6.82, + "learning_rate": 1.3466111771700356e-07, + "logits/chosen": -1.545485258102417, + "logits/rejected": -1.4272127151489258, + "logps/chosen": -216.63613891601562, + "logps/rejected": -303.2686767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.721135139465332, + "rewards/margins": 11.529053688049316, + "rewards/rejected": -22.25018882751465, + "step": 4248 + }, + { + "epoch": 6.82, + "learning_rate": 1.345620293301625e-07, + "logits/chosen": -1.4033474922180176, + "logits/rejected": -1.3280503749847412, + "logps/chosen": -159.3905029296875, + "logps/rejected": -256.2696533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.223450183868408, + "rewards/margins": 11.601327896118164, + "rewards/rejected": -17.824777603149414, + "step": 4249 + }, + { + "epoch": 6.82, + "learning_rate": 1.3446294094332145e-07, + "logits/chosen": -1.4835851192474365, + "logits/rejected": -1.4732414484024048, + "logps/chosen": -155.80023193359375, + "logps/rejected": -312.1395263671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.359098434448242, + "rewards/margins": 14.757181167602539, + "rewards/rejected": -22.11627960205078, + "step": 4250 + }, + { + "epoch": 6.82, + "learning_rate": 1.3436385255648036e-07, + "logits/chosen": -1.4709765911102295, + "logits/rejected": -1.4556918144226074, + "logps/chosen": -158.65200805664062, + "logps/rejected": -336.28106689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.515510559082031, + "rewards/margins": 16.66599464416504, + "rewards/rejected": -23.18150520324707, + "step": 4251 + }, + { + "epoch": 6.83, + "learning_rate": 1.342647641696393e-07, + "logits/chosen": -1.579154372215271, + "logits/rejected": -1.5992822647094727, + "logps/chosen": -145.5520782470703, + "logps/rejected": -269.52178955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.703699588775635, + "rewards/margins": 12.847330093383789, + "rewards/rejected": -18.551029205322266, + "step": 4252 + }, + { + "epoch": 6.83, + "learning_rate": 1.3416567578279825e-07, + "logits/chosen": -1.5302876234054565, + "logits/rejected": -1.4907629489898682, + "logps/chosen": -167.96246337890625, + "logps/rejected": -247.34103393554688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5380659103393555, + "rewards/margins": 9.099124908447266, + "rewards/rejected": -16.637189865112305, + "step": 4253 + }, + { + "epoch": 6.83, + "learning_rate": 1.340665873959572e-07, + "logits/chosen": -1.4703125953674316, + "logits/rejected": -1.5003622770309448, + "logps/chosen": -110.11369323730469, + "logps/rejected": -228.05165100097656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.230956554412842, + "rewards/margins": 11.44955062866211, + "rewards/rejected": -16.68050765991211, + "step": 4254 + }, + { + "epoch": 6.83, + "learning_rate": 1.3396749900911615e-07, + "logits/chosen": -1.5509713888168335, + "logits/rejected": -1.5155510902404785, + "logps/chosen": -115.55642700195312, + "logps/rejected": -217.81393432617188, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.608709335327148, + "rewards/margins": 11.265159606933594, + "rewards/rejected": -15.873870849609375, + "step": 4255 + }, + { + "epoch": 6.83, + "learning_rate": 1.3386841062227505e-07, + "logits/chosen": -1.3940749168395996, + "logits/rejected": -1.4406899213790894, + "logps/chosen": -180.99130249023438, + "logps/rejected": -315.6795349121094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.061578750610352, + "rewards/margins": 12.910501480102539, + "rewards/rejected": -22.97208023071289, + "step": 4256 + }, + { + "epoch": 6.83, + "learning_rate": 1.33769322235434e-07, + "logits/chosen": -1.4535274505615234, + "logits/rejected": -1.528532862663269, + "logps/chosen": -157.77195739746094, + "logps/rejected": -293.68701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.224557876586914, + "rewards/margins": 11.541576385498047, + "rewards/rejected": -19.76613426208496, + "step": 4257 + }, + { + "epoch": 6.83, + "learning_rate": 1.3367023384859295e-07, + "logits/chosen": -1.4985743761062622, + "logits/rejected": -1.4585516452789307, + "logps/chosen": -206.73086547851562, + "logps/rejected": -338.92230224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.333765029907227, + "rewards/margins": 14.604330062866211, + "rewards/rejected": -25.938095092773438, + "step": 4258 + }, + { + "epoch": 6.84, + "learning_rate": 1.3357114546175188e-07, + "logits/chosen": -1.5046782493591309, + "logits/rejected": -1.495187759399414, + "logps/chosen": -160.5237579345703, + "logps/rejected": -296.5344543457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.188738822937012, + "rewards/margins": 13.967012405395508, + "rewards/rejected": -21.155752182006836, + "step": 4259 + }, + { + "epoch": 6.84, + "learning_rate": 1.3347205707491081e-07, + "logits/chosen": -1.5589667558670044, + "logits/rejected": -1.6188794374465942, + "logps/chosen": -135.3613739013672, + "logps/rejected": -298.66302490234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.690541744232178, + "rewards/margins": 13.391782760620117, + "rewards/rejected": -20.082324981689453, + "step": 4260 + }, + { + "epoch": 6.84, + "learning_rate": 1.3337296868806975e-07, + "logits/chosen": -1.4349784851074219, + "logits/rejected": -1.4999220371246338, + "logps/chosen": -150.71524047851562, + "logps/rejected": -334.1357727050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.589204788208008, + "rewards/margins": 17.417346954345703, + "rewards/rejected": -25.006553649902344, + "step": 4261 + }, + { + "epoch": 6.84, + "learning_rate": 1.3327388030122868e-07, + "logits/chosen": -1.5153224468231201, + "logits/rejected": -1.3695610761642456, + "logps/chosen": -179.51065063476562, + "logps/rejected": -256.43719482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.449282169342041, + "rewards/margins": 11.090581893920898, + "rewards/rejected": -18.53986358642578, + "step": 4262 + }, + { + "epoch": 6.84, + "learning_rate": 1.3317479191438764e-07, + "logits/chosen": -1.5446449518203735, + "logits/rejected": -1.5583688020706177, + "logps/chosen": -142.17074584960938, + "logps/rejected": -327.21966552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.682271480560303, + "rewards/margins": 17.637310028076172, + "rewards/rejected": -23.319583892822266, + "step": 4263 + }, + { + "epoch": 6.84, + "learning_rate": 1.3307570352754657e-07, + "logits/chosen": -1.3670862913131714, + "logits/rejected": -1.4265691041946411, + "logps/chosen": -183.40567016601562, + "logps/rejected": -296.88665771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.634556770324707, + "rewards/margins": 11.373229026794434, + "rewards/rejected": -21.00778579711914, + "step": 4264 + }, + { + "epoch": 6.85, + "learning_rate": 1.329766151407055e-07, + "logits/chosen": -1.535935401916504, + "logits/rejected": -1.5516026020050049, + "logps/chosen": -174.38327026367188, + "logps/rejected": -303.5965270996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.910573959350586, + "rewards/margins": 12.454301834106445, + "rewards/rejected": -21.36487579345703, + "step": 4265 + }, + { + "epoch": 6.85, + "learning_rate": 1.3287752675386444e-07, + "logits/chosen": -1.4564298391342163, + "logits/rejected": -1.4800231456756592, + "logps/chosen": -149.65760803222656, + "logps/rejected": -288.11639404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.360375881195068, + "rewards/margins": 13.698225975036621, + "rewards/rejected": -21.05860137939453, + "step": 4266 + }, + { + "epoch": 6.85, + "learning_rate": 1.3277843836702337e-07, + "logits/chosen": -1.3553333282470703, + "logits/rejected": -1.3605999946594238, + "logps/chosen": -191.07833862304688, + "logps/rejected": -290.35400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.556795120239258, + "rewards/margins": 11.366944313049316, + "rewards/rejected": -20.923738479614258, + "step": 4267 + }, + { + "epoch": 6.85, + "learning_rate": 1.3267934998018233e-07, + "logits/chosen": -1.498841404914856, + "logits/rejected": -1.5227713584899902, + "logps/chosen": -143.8974609375, + "logps/rejected": -300.13287353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.49031400680542, + "rewards/margins": 15.746574401855469, + "rewards/rejected": -22.236888885498047, + "step": 4268 + }, + { + "epoch": 6.85, + "learning_rate": 1.3258026159334127e-07, + "logits/chosen": -1.5693347454071045, + "logits/rejected": -1.6507294178009033, + "logps/chosen": -120.5625, + "logps/rejected": -295.6064453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.028434753417969, + "rewards/margins": 14.605969429016113, + "rewards/rejected": -19.6344051361084, + "step": 4269 + }, + { + "epoch": 6.85, + "learning_rate": 1.3248117320650017e-07, + "logits/chosen": -1.4581916332244873, + "logits/rejected": -1.4435745477676392, + "logps/chosen": -106.74722290039062, + "logps/rejected": -270.0750732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.826873540878296, + "rewards/margins": 17.50199317932129, + "rewards/rejected": -20.32886505126953, + "step": 4270 + }, + { + "epoch": 6.86, + "learning_rate": 1.3238208481965913e-07, + "logits/chosen": -1.4398397207260132, + "logits/rejected": -1.4245291948318481, + "logps/chosen": -154.8601531982422, + "logps/rejected": -303.6233825683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.262284755706787, + "rewards/margins": 13.917516708374023, + "rewards/rejected": -20.17980194091797, + "step": 4271 + }, + { + "epoch": 6.86, + "learning_rate": 1.3228299643281807e-07, + "logits/chosen": -1.5410716533660889, + "logits/rejected": -1.569474220275879, + "logps/chosen": -159.18771362304688, + "logps/rejected": -357.85089111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.405728340148926, + "rewards/margins": 18.594573974609375, + "rewards/rejected": -26.000303268432617, + "step": 4272 + }, + { + "epoch": 6.86, + "learning_rate": 1.32183908045977e-07, + "logits/chosen": -1.4270234107971191, + "logits/rejected": -1.537653923034668, + "logps/chosen": -176.86102294921875, + "logps/rejected": -385.6090087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.321099281311035, + "rewards/margins": 18.798484802246094, + "rewards/rejected": -28.119586944580078, + "step": 4273 + }, + { + "epoch": 6.86, + "learning_rate": 1.3208481965913596e-07, + "logits/chosen": -1.4427878856658936, + "logits/rejected": -1.4544813632965088, + "logps/chosen": -224.9772491455078, + "logps/rejected": -332.191650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.838093757629395, + "rewards/margins": 12.031434059143066, + "rewards/rejected": -24.86952781677246, + "step": 4274 + }, + { + "epoch": 6.86, + "learning_rate": 1.3198573127229487e-07, + "logits/chosen": -1.4862805604934692, + "logits/rejected": -1.5148365497589111, + "logps/chosen": -123.48849487304688, + "logps/rejected": -270.2886962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962425231933594, + "rewards/margins": 13.866817474365234, + "rewards/rejected": -19.829242706298828, + "step": 4275 + }, + { + "epoch": 6.86, + "learning_rate": 1.3188664288545383e-07, + "logits/chosen": -1.6777883768081665, + "logits/rejected": -1.7043771743774414, + "logps/chosen": -166.06011962890625, + "logps/rejected": -279.2103271484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.672287940979004, + "rewards/margins": 11.566886901855469, + "rewards/rejected": -19.239173889160156, + "step": 4276 + }, + { + "epoch": 6.87, + "learning_rate": 1.3178755449861276e-07, + "logits/chosen": -1.5696409940719604, + "logits/rejected": -1.4400540590286255, + "logps/chosen": -153.15509033203125, + "logps/rejected": -281.15252685546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.518252372741699, + "rewards/margins": 13.754487037658691, + "rewards/rejected": -20.27273941040039, + "step": 4277 + }, + { + "epoch": 6.87, + "learning_rate": 1.316884661117717e-07, + "logits/chosen": -1.5224648714065552, + "logits/rejected": -1.5814319849014282, + "logps/chosen": -164.186279296875, + "logps/rejected": -332.18701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.82908821105957, + "rewards/margins": 16.91836166381836, + "rewards/rejected": -23.747451782226562, + "step": 4278 + }, + { + "epoch": 6.87, + "learning_rate": 1.3158937772493065e-07, + "logits/chosen": -1.4290246963500977, + "logits/rejected": -1.5582456588745117, + "logps/chosen": -179.80572509765625, + "logps/rejected": -322.2653503417969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.820511817932129, + "rewards/margins": 13.470190048217773, + "rewards/rejected": -20.290700912475586, + "step": 4279 + }, + { + "epoch": 6.87, + "learning_rate": 1.3149028933808956e-07, + "logits/chosen": -1.3676645755767822, + "logits/rejected": -1.4288015365600586, + "logps/chosen": -137.02755737304688, + "logps/rejected": -278.9418640136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.417818546295166, + "rewards/margins": 13.079514503479004, + "rewards/rejected": -17.497333526611328, + "step": 4280 + }, + { + "epoch": 6.87, + "learning_rate": 1.313912009512485e-07, + "logits/chosen": -1.5556520223617554, + "logits/rejected": -1.583845853805542, + "logps/chosen": -151.59805297851562, + "logps/rejected": -315.99078369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.469965934753418, + "rewards/margins": 16.242353439331055, + "rewards/rejected": -23.712318420410156, + "step": 4281 + }, + { + "epoch": 6.87, + "learning_rate": 1.3129211256440745e-07, + "logits/chosen": -1.4821794033050537, + "logits/rejected": -1.5267000198364258, + "logps/chosen": -147.068115234375, + "logps/rejected": -283.7849426269531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.342219829559326, + "rewards/margins": 13.891756057739258, + "rewards/rejected": -20.233976364135742, + "step": 4282 + }, + { + "epoch": 6.87, + "learning_rate": 1.3119302417756639e-07, + "logits/chosen": -1.4784932136535645, + "logits/rejected": -1.4943057298660278, + "logps/chosen": -181.0413360595703, + "logps/rejected": -347.7564697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.350299835205078, + "rewards/margins": 17.27113914489746, + "rewards/rejected": -24.62143898010254, + "step": 4283 + }, + { + "epoch": 6.88, + "learning_rate": 1.3109393579072532e-07, + "logits/chosen": -1.5137200355529785, + "logits/rejected": -1.5007898807525635, + "logps/chosen": -168.63641357421875, + "logps/rejected": -321.08251953125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.49238109588623, + "rewards/margins": 15.637548446655273, + "rewards/rejected": -24.129928588867188, + "step": 4284 + }, + { + "epoch": 6.88, + "learning_rate": 1.3099484740388425e-07, + "logits/chosen": -1.5510516166687012, + "logits/rejected": -1.587266206741333, + "logps/chosen": -187.2799530029297, + "logps/rejected": -286.5812072753906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.653657913208008, + "rewards/margins": 10.423924446105957, + "rewards/rejected": -20.07758331298828, + "step": 4285 + }, + { + "epoch": 6.88, + "learning_rate": 1.3089575901704319e-07, + "logits/chosen": -1.480712890625, + "logits/rejected": -1.5313217639923096, + "logps/chosen": -172.68942260742188, + "logps/rejected": -315.8013916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.25301742553711, + "rewards/margins": 13.626751899719238, + "rewards/rejected": -22.87976837158203, + "step": 4286 + }, + { + "epoch": 6.88, + "learning_rate": 1.3079667063020214e-07, + "logits/chosen": -1.2362608909606934, + "logits/rejected": -1.2863179445266724, + "logps/chosen": -183.6317138671875, + "logps/rejected": -311.90960693359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.576515197753906, + "rewards/margins": 12.326824188232422, + "rewards/rejected": -22.903339385986328, + "step": 4287 + }, + { + "epoch": 6.88, + "learning_rate": 1.3069758224336108e-07, + "logits/chosen": -1.6772159337997437, + "logits/rejected": -1.5387132167816162, + "logps/chosen": -189.92189025878906, + "logps/rejected": -291.9770202636719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.35258960723877, + "rewards/margins": 12.875459671020508, + "rewards/rejected": -21.228050231933594, + "step": 4288 + }, + { + "epoch": 6.88, + "learning_rate": 1.3059849385651999e-07, + "logits/chosen": -1.4387080669403076, + "logits/rejected": -1.4397437572479248, + "logps/chosen": -168.4374237060547, + "logps/rejected": -298.67498779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.485949993133545, + "rewards/margins": 13.225630760192871, + "rewards/rejected": -20.711580276489258, + "step": 4289 + }, + { + "epoch": 6.89, + "learning_rate": 1.3049940546967894e-07, + "logits/chosen": -1.4883170127868652, + "logits/rejected": -1.5089118480682373, + "logps/chosen": -139.19073486328125, + "logps/rejected": -274.90606689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.884763240814209, + "rewards/margins": 13.900800704956055, + "rewards/rejected": -18.78556251525879, + "step": 4290 + }, + { + "epoch": 6.89, + "learning_rate": 1.3040031708283788e-07, + "logits/chosen": -1.3994731903076172, + "logits/rejected": -1.412219524383545, + "logps/chosen": -146.13217163085938, + "logps/rejected": -291.40997314453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.722890377044678, + "rewards/margins": 14.221756935119629, + "rewards/rejected": -20.94464683532715, + "step": 4291 + }, + { + "epoch": 6.89, + "learning_rate": 1.3030122869599684e-07, + "logits/chosen": -1.533659815788269, + "logits/rejected": -1.4114477634429932, + "logps/chosen": -177.42849731445312, + "logps/rejected": -267.626708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.664575576782227, + "rewards/margins": 11.954486846923828, + "rewards/rejected": -18.619062423706055, + "step": 4292 + }, + { + "epoch": 6.89, + "learning_rate": 1.3020214030915577e-07, + "logits/chosen": -1.726412057876587, + "logits/rejected": -1.7339627742767334, + "logps/chosen": -141.17657470703125, + "logps/rejected": -296.1455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.869499206542969, + "rewards/margins": 15.45513916015625, + "rewards/rejected": -20.32463836669922, + "step": 4293 + }, + { + "epoch": 6.89, + "learning_rate": 1.3010305192231468e-07, + "logits/chosen": -1.3734432458877563, + "logits/rejected": -1.3948428630828857, + "logps/chosen": -152.02963256835938, + "logps/rejected": -266.4960021972656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.83348274230957, + "rewards/margins": 10.864480972290039, + "rewards/rejected": -18.69796371459961, + "step": 4294 + }, + { + "epoch": 6.89, + "learning_rate": 1.3000396353547364e-07, + "logits/chosen": -1.3678107261657715, + "logits/rejected": -1.3610135316848755, + "logps/chosen": -134.41851806640625, + "logps/rejected": -231.99363708496094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.509337425231934, + "rewards/margins": 10.38669490814209, + "rewards/rejected": -16.896032333374023, + "step": 4295 + }, + { + "epoch": 6.9, + "learning_rate": 1.2990487514863257e-07, + "logits/chosen": -1.5657483339309692, + "logits/rejected": -1.5451221466064453, + "logps/chosen": -171.8986358642578, + "logps/rejected": -254.52366638183594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.943211555480957, + "rewards/margins": 10.141676902770996, + "rewards/rejected": -17.084888458251953, + "step": 4296 + }, + { + "epoch": 6.9, + "learning_rate": 1.2980578676179153e-07, + "logits/chosen": -1.5095562934875488, + "logits/rejected": -1.5189958810806274, + "logps/chosen": -187.65733337402344, + "logps/rejected": -300.6346435546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.892082214355469, + "rewards/margins": 10.896815299987793, + "rewards/rejected": -20.788896560668945, + "step": 4297 + }, + { + "epoch": 6.9, + "learning_rate": 1.2970669837495046e-07, + "logits/chosen": -1.4297704696655273, + "logits/rejected": -1.5709483623504639, + "logps/chosen": -140.33506774902344, + "logps/rejected": -298.2570495605469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.668919086456299, + "rewards/margins": 14.307233810424805, + "rewards/rejected": -19.976152420043945, + "step": 4298 + }, + { + "epoch": 6.9, + "learning_rate": 1.2960760998810937e-07, + "logits/chosen": -1.5177892446517944, + "logits/rejected": -1.499434471130371, + "logps/chosen": -177.0868377685547, + "logps/rejected": -306.70391845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.092121124267578, + "rewards/margins": 11.351507186889648, + "rewards/rejected": -21.44363021850586, + "step": 4299 + }, + { + "epoch": 6.9, + "learning_rate": 1.2950852160126833e-07, + "logits/chosen": -1.4757041931152344, + "logits/rejected": -1.55293607711792, + "logps/chosen": -146.1461944580078, + "logps/rejected": -279.0918884277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.655509948730469, + "rewards/margins": 12.780374526977539, + "rewards/rejected": -19.435884475708008, + "step": 4300 + }, + { + "epoch": 6.9, + "learning_rate": 1.2940943321442726e-07, + "logits/chosen": -1.355355978012085, + "logits/rejected": -1.3110828399658203, + "logps/chosen": -184.12118530273438, + "logps/rejected": -266.32025146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.04542064666748, + "rewards/margins": 11.658212661743164, + "rewards/rejected": -20.703632354736328, + "step": 4301 + }, + { + "epoch": 6.91, + "learning_rate": 1.293103448275862e-07, + "logits/chosen": -1.4735065698623657, + "logits/rejected": -1.5677225589752197, + "logps/chosen": -109.21278381347656, + "logps/rejected": -265.9952392578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.959303379058838, + "rewards/margins": 15.024749755859375, + "rewards/rejected": -18.984052658081055, + "step": 4302 + }, + { + "epoch": 6.91, + "learning_rate": 1.2921125644074513e-07, + "logits/chosen": -1.5612730979919434, + "logits/rejected": -1.5712697505950928, + "logps/chosen": -172.47418212890625, + "logps/rejected": -312.5321044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.856637001037598, + "rewards/margins": 12.654653549194336, + "rewards/rejected": -21.51129150390625, + "step": 4303 + }, + { + "epoch": 6.91, + "learning_rate": 1.2911216805390406e-07, + "logits/chosen": -1.587221622467041, + "logits/rejected": -1.4515109062194824, + "logps/chosen": -147.28927612304688, + "logps/rejected": -213.03211975097656, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9463725090026855, + "rewards/margins": 9.342229843139648, + "rewards/rejected": -13.288601875305176, + "step": 4304 + }, + { + "epoch": 6.91, + "learning_rate": 1.2901307966706302e-07, + "logits/chosen": -1.5290552377700806, + "logits/rejected": -1.5231764316558838, + "logps/chosen": -134.39151000976562, + "logps/rejected": -247.91705322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.591496467590332, + "rewards/margins": 11.982635498046875, + "rewards/rejected": -17.57413101196289, + "step": 4305 + }, + { + "epoch": 6.91, + "learning_rate": 1.2891399128022196e-07, + "logits/chosen": -1.4264874458312988, + "logits/rejected": -1.5338096618652344, + "logps/chosen": -129.68983459472656, + "logps/rejected": -262.00665283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.54251766204834, + "rewards/margins": 11.844867706298828, + "rewards/rejected": -18.38738441467285, + "step": 4306 + }, + { + "epoch": 6.91, + "learning_rate": 1.288149028933809e-07, + "logits/chosen": -1.4937124252319336, + "logits/rejected": -1.5156177282333374, + "logps/chosen": -137.41441345214844, + "logps/rejected": -246.71856689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.466919422149658, + "rewards/margins": 10.736380577087402, + "rewards/rejected": -17.20330047607422, + "step": 4307 + }, + { + "epoch": 6.91, + "learning_rate": 1.2871581450653982e-07, + "logits/chosen": -1.4568756818771362, + "logits/rejected": -1.5030415058135986, + "logps/chosen": -133.63563537597656, + "logps/rejected": -259.2521667480469, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.009183883666992, + "rewards/margins": 12.545990943908691, + "rewards/rejected": -17.555173873901367, + "step": 4308 + }, + { + "epoch": 6.92, + "learning_rate": 1.2861672611969876e-07, + "logits/chosen": -1.5248723030090332, + "logits/rejected": -1.5386236906051636, + "logps/chosen": -112.57725524902344, + "logps/rejected": -220.40223693847656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.225460052490234, + "rewards/margins": 11.656986236572266, + "rewards/rejected": -15.8824462890625, + "step": 4309 + }, + { + "epoch": 6.92, + "learning_rate": 1.285176377328577e-07, + "logits/chosen": -1.6561203002929688, + "logits/rejected": -1.697479486465454, + "logps/chosen": -177.25717163085938, + "logps/rejected": -313.2477722167969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.710463523864746, + "rewards/margins": 12.630510330200195, + "rewards/rejected": -21.340972900390625, + "step": 4310 + }, + { + "epoch": 6.92, + "learning_rate": 1.2841854934601665e-07, + "logits/chosen": -1.560349464416504, + "logits/rejected": -1.6322051286697388, + "logps/chosen": -132.02638244628906, + "logps/rejected": -271.97515869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4519453048706055, + "rewards/margins": 11.839178085327148, + "rewards/rejected": -18.291122436523438, + "step": 4311 + }, + { + "epoch": 6.92, + "learning_rate": 1.2831946095917558e-07, + "logits/chosen": -1.5629547834396362, + "logits/rejected": -1.5957180261611938, + "logps/chosen": -137.21560668945312, + "logps/rejected": -317.1037292480469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.305415153503418, + "rewards/margins": 17.662899017333984, + "rewards/rejected": -22.96831512451172, + "step": 4312 + }, + { + "epoch": 6.92, + "learning_rate": 1.2822037257233452e-07, + "logits/chosen": -1.3525371551513672, + "logits/rejected": -1.3652621507644653, + "logps/chosen": -126.0892562866211, + "logps/rejected": -316.083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.30812931060791, + "rewards/margins": 15.535308837890625, + "rewards/rejected": -21.84343910217285, + "step": 4313 + }, + { + "epoch": 6.92, + "learning_rate": 1.2812128418549345e-07, + "logits/chosen": -1.408422827720642, + "logits/rejected": -1.4256047010421753, + "logps/chosen": -90.45665740966797, + "logps/rejected": -205.7488555908203, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.566164493560791, + "rewards/margins": 11.031644821166992, + "rewards/rejected": -14.597808837890625, + "step": 4314 + }, + { + "epoch": 6.93, + "learning_rate": 1.2802219579865238e-07, + "logits/chosen": -1.4226073026657104, + "logits/rejected": -1.50832200050354, + "logps/chosen": -167.0977783203125, + "logps/rejected": -366.845947265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.75607442855835, + "rewards/margins": 18.29708480834961, + "rewards/rejected": -25.053159713745117, + "step": 4315 + }, + { + "epoch": 6.93, + "learning_rate": 1.2792310741181134e-07, + "logits/chosen": -1.470255732536316, + "logits/rejected": -1.5280423164367676, + "logps/chosen": -135.12840270996094, + "logps/rejected": -342.0127868652344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1385722160339355, + "rewards/margins": 18.15222930908203, + "rewards/rejected": -24.290802001953125, + "step": 4316 + }, + { + "epoch": 6.93, + "learning_rate": 1.2782401902497028e-07, + "logits/chosen": -1.347651481628418, + "logits/rejected": -1.3877893686294556, + "logps/chosen": -210.9666748046875, + "logps/rejected": -377.32489013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.988554000854492, + "rewards/margins": 16.125385284423828, + "rewards/rejected": -28.11393928527832, + "step": 4317 + }, + { + "epoch": 6.93, + "learning_rate": 1.277249306381292e-07, + "logits/chosen": -1.4457354545593262, + "logits/rejected": -1.427058458328247, + "logps/chosen": -183.82545471191406, + "logps/rejected": -357.3561096191406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.28513240814209, + "rewards/margins": 17.351966857910156, + "rewards/rejected": -26.637102127075195, + "step": 4318 + }, + { + "epoch": 6.93, + "learning_rate": 1.2762584225128814e-07, + "logits/chosen": -1.5396779775619507, + "logits/rejected": -1.484226107597351, + "logps/chosen": -130.5384521484375, + "logps/rejected": -268.9083557128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.09043025970459, + "rewards/margins": 13.801069259643555, + "rewards/rejected": -18.89150047302246, + "step": 4319 + }, + { + "epoch": 6.93, + "learning_rate": 1.2752675386444708e-07, + "logits/chosen": -1.5914158821105957, + "logits/rejected": -1.642514705657959, + "logps/chosen": -103.25065612792969, + "logps/rejected": -293.6171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.964869499206543, + "rewards/margins": 16.868938446044922, + "rewards/rejected": -20.83380699157715, + "step": 4320 + }, + { + "epoch": 6.94, + "learning_rate": 1.2742766547760604e-07, + "logits/chosen": -1.6004083156585693, + "logits/rejected": -1.5024765729904175, + "logps/chosen": -153.00497436523438, + "logps/rejected": -227.97891235351562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.593244552612305, + "rewards/margins": 11.049426078796387, + "rewards/rejected": -15.642671585083008, + "step": 4321 + }, + { + "epoch": 6.94, + "learning_rate": 1.2732857709076494e-07, + "logits/chosen": -1.3689305782318115, + "logits/rejected": -1.3575849533081055, + "logps/chosen": -141.92520141601562, + "logps/rejected": -275.8426208496094, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.579128742218018, + "rewards/margins": 12.354674339294434, + "rewards/rejected": -18.93380355834961, + "step": 4322 + }, + { + "epoch": 6.94, + "learning_rate": 1.2722948870392388e-07, + "logits/chosen": -1.484875202178955, + "logits/rejected": -1.5846595764160156, + "logps/chosen": -152.639404296875, + "logps/rejected": -341.92633056640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.637639999389648, + "rewards/margins": 15.9112548828125, + "rewards/rejected": -23.54889488220215, + "step": 4323 + }, + { + "epoch": 6.94, + "learning_rate": 1.2713040031708284e-07, + "logits/chosen": -1.7022486925125122, + "logits/rejected": -1.5783485174179077, + "logps/chosen": -141.59970092773438, + "logps/rejected": -228.56942749023438, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.71903657913208, + "rewards/margins": 11.14383316040039, + "rewards/rejected": -16.862869262695312, + "step": 4324 + }, + { + "epoch": 6.94, + "learning_rate": 1.2703131193024177e-07, + "logits/chosen": -1.399747371673584, + "logits/rejected": -1.4299784898757935, + "logps/chosen": -141.03260803222656, + "logps/rejected": -263.9141845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.242031574249268, + "rewards/margins": 12.130402565002441, + "rewards/rejected": -18.372434616088867, + "step": 4325 + }, + { + "epoch": 6.94, + "learning_rate": 1.2693222354340073e-07, + "logits/chosen": -1.4708397388458252, + "logits/rejected": -1.4839723110198975, + "logps/chosen": -140.30972290039062, + "logps/rejected": -280.33154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.224983215332031, + "rewards/margins": 11.686664581298828, + "rewards/rejected": -16.91164779663086, + "step": 4326 + }, + { + "epoch": 6.95, + "learning_rate": 1.2683313515655964e-07, + "logits/chosen": -1.4596989154815674, + "logits/rejected": -1.4362398386001587, + "logps/chosen": -134.68399047851562, + "logps/rejected": -299.4694519042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.061861991882324, + "rewards/margins": 16.057004928588867, + "rewards/rejected": -22.118867874145508, + "step": 4327 + }, + { + "epoch": 6.95, + "learning_rate": 1.2673404676971857e-07, + "logits/chosen": -1.4489200115203857, + "logits/rejected": -1.400987148284912, + "logps/chosen": -193.5735321044922, + "logps/rejected": -334.8089599609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.789441108703613, + "rewards/margins": 11.954316139221191, + "rewards/rejected": -22.743757247924805, + "step": 4328 + }, + { + "epoch": 6.95, + "learning_rate": 1.2663495838287753e-07, + "logits/chosen": -1.6088558435440063, + "logits/rejected": -1.6177834272384644, + "logps/chosen": -172.08392333984375, + "logps/rejected": -326.7933349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.902764320373535, + "rewards/margins": 15.188146591186523, + "rewards/rejected": -22.090909957885742, + "step": 4329 + }, + { + "epoch": 6.95, + "learning_rate": 1.2653586999603646e-07, + "logits/chosen": -1.5794850587844849, + "logits/rejected": -1.5611016750335693, + "logps/chosen": -143.60308837890625, + "logps/rejected": -281.58123779296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.910326957702637, + "rewards/margins": 13.70084285736084, + "rewards/rejected": -20.61117172241211, + "step": 4330 + }, + { + "epoch": 6.95, + "learning_rate": 1.2643678160919542e-07, + "logits/chosen": -1.5582289695739746, + "logits/rejected": -1.5649431943893433, + "logps/chosen": -139.22743225097656, + "logps/rejected": -269.9653015136719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.808687210083008, + "rewards/margins": 12.504372596740723, + "rewards/rejected": -18.313060760498047, + "step": 4331 + }, + { + "epoch": 6.95, + "learning_rate": 1.2633769322235433e-07, + "logits/chosen": -1.4214506149291992, + "logits/rejected": -1.4490200281143188, + "logps/chosen": -159.44528198242188, + "logps/rejected": -338.112548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.098174095153809, + "rewards/margins": 17.26534080505371, + "rewards/rejected": -25.363513946533203, + "step": 4332 + }, + { + "epoch": 6.96, + "learning_rate": 1.2623860483551326e-07, + "logits/chosen": -1.6215322017669678, + "logits/rejected": -1.7945780754089355, + "logps/chosen": -133.97959899902344, + "logps/rejected": -317.5301208496094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.092343330383301, + "rewards/margins": 14.559404373168945, + "rewards/rejected": -21.651748657226562, + "step": 4333 + }, + { + "epoch": 6.96, + "learning_rate": 1.2613951644867222e-07, + "logits/chosen": -1.4846603870391846, + "logits/rejected": -1.4378643035888672, + "logps/chosen": -202.65493774414062, + "logps/rejected": -334.9845886230469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.635741233825684, + "rewards/margins": 12.923158645629883, + "rewards/rejected": -24.55889892578125, + "step": 4334 + }, + { + "epoch": 6.96, + "learning_rate": 1.2604042806183116e-07, + "logits/chosen": -1.4485752582550049, + "logits/rejected": -1.4352654218673706, + "logps/chosen": -128.20318603515625, + "logps/rejected": -233.1246337890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.599601745605469, + "rewards/margins": 12.369684219360352, + "rewards/rejected": -16.96928596496582, + "step": 4335 + }, + { + "epoch": 6.96, + "learning_rate": 1.259413396749901e-07, + "logits/chosen": -1.427125096321106, + "logits/rejected": -1.3672806024551392, + "logps/chosen": -118.27519226074219, + "logps/rejected": -234.66537475585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7877249717712402, + "rewards/margins": 12.478940963745117, + "rewards/rejected": -16.266666412353516, + "step": 4336 + }, + { + "epoch": 6.96, + "learning_rate": 1.2584225128814902e-07, + "logits/chosen": -1.602966070175171, + "logits/rejected": -1.5214741230010986, + "logps/chosen": -119.90373229980469, + "logps/rejected": -223.2362823486328, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9289188385009766, + "rewards/margins": 11.311025619506836, + "rewards/rejected": -15.239944458007812, + "step": 4337 + }, + { + "epoch": 6.96, + "learning_rate": 1.2574316290130795e-07, + "logits/chosen": -1.444711685180664, + "logits/rejected": -1.460920810699463, + "logps/chosen": -116.18070983886719, + "logps/rejected": -261.0518493652344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.299787521362305, + "rewards/margins": 14.132219314575195, + "rewards/rejected": -19.432008743286133, + "step": 4338 + }, + { + "epoch": 6.96, + "learning_rate": 1.2564407451446691e-07, + "logits/chosen": -1.3033758401870728, + "logits/rejected": -1.299605369567871, + "logps/chosen": -166.93743896484375, + "logps/rejected": -289.3072204589844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.856400489807129, + "rewards/margins": 12.895968437194824, + "rewards/rejected": -21.752368927001953, + "step": 4339 + }, + { + "epoch": 6.97, + "learning_rate": 1.2554498612762585e-07, + "logits/chosen": -1.6564733982086182, + "logits/rejected": -1.5853700637817383, + "logps/chosen": -181.61209106445312, + "logps/rejected": -296.44329833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.322128772735596, + "rewards/margins": 12.886022567749023, + "rewards/rejected": -20.208152770996094, + "step": 4340 + }, + { + "epoch": 6.97, + "learning_rate": 1.2544589774078475e-07, + "logits/chosen": -1.477905511856079, + "logits/rejected": -1.4226627349853516, + "logps/chosen": -220.5298614501953, + "logps/rejected": -335.6328430175781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.493106842041016, + "rewards/margins": 12.343870162963867, + "rewards/rejected": -23.836977005004883, + "step": 4341 + }, + { + "epoch": 6.97, + "learning_rate": 1.2534680935394371e-07, + "logits/chosen": -1.3785821199417114, + "logits/rejected": -1.3501948118209839, + "logps/chosen": -114.79023742675781, + "logps/rejected": -308.3306579589844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.111201763153076, + "rewards/margins": 17.29815673828125, + "rewards/rejected": -21.409358978271484, + "step": 4342 + }, + { + "epoch": 6.97, + "learning_rate": 1.2524772096710265e-07, + "logits/chosen": -1.528933048248291, + "logits/rejected": -1.5162949562072754, + "logps/chosen": -170.25083923339844, + "logps/rejected": -305.92376708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.048604011535645, + "rewards/margins": 11.975053787231445, + "rewards/rejected": -21.023658752441406, + "step": 4343 + }, + { + "epoch": 6.97, + "learning_rate": 1.2514863258026158e-07, + "logits/chosen": -1.5402898788452148, + "logits/rejected": -1.5932040214538574, + "logps/chosen": -165.48687744140625, + "logps/rejected": -303.7412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.816098213195801, + "rewards/margins": 14.347036361694336, + "rewards/rejected": -21.163135528564453, + "step": 4344 + }, + { + "epoch": 6.97, + "learning_rate": 1.2504954419342054e-07, + "logits/chosen": -1.4314227104187012, + "logits/rejected": -1.4015283584594727, + "logps/chosen": -213.84091186523438, + "logps/rejected": -314.7798156738281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.597672462463379, + "rewards/margins": 11.267631530761719, + "rewards/rejected": -21.865304946899414, + "step": 4345 + }, + { + "epoch": 6.98, + "learning_rate": 1.2495045580657947e-07, + "logits/chosen": -1.320091962814331, + "logits/rejected": -1.3571906089782715, + "logps/chosen": -205.00474548339844, + "logps/rejected": -309.4711608886719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.23813247680664, + "rewards/margins": 11.156259536743164, + "rewards/rejected": -22.394392013549805, + "step": 4346 + }, + { + "epoch": 6.98, + "learning_rate": 1.248513674197384e-07, + "logits/chosen": -1.286195158958435, + "logits/rejected": -1.3818988800048828, + "logps/chosen": -175.76976013183594, + "logps/rejected": -306.3787841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.991697311401367, + "rewards/margins": 11.727690696716309, + "rewards/rejected": -21.71938705444336, + "step": 4347 + }, + { + "epoch": 6.98, + "learning_rate": 1.2475227903289734e-07, + "logits/chosen": -1.7449885606765747, + "logits/rejected": -1.6435647010803223, + "logps/chosen": -199.27691650390625, + "logps/rejected": -307.28863525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.85352897644043, + "rewards/margins": 12.099481582641602, + "rewards/rejected": -20.95301055908203, + "step": 4348 + }, + { + "epoch": 6.98, + "learning_rate": 1.2465319064605627e-07, + "logits/chosen": -1.4709646701812744, + "logits/rejected": -1.450405240058899, + "logps/chosen": -119.30654907226562, + "logps/rejected": -275.74298095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.484562873840332, + "rewards/margins": 15.570978164672852, + "rewards/rejected": -20.0555419921875, + "step": 4349 + }, + { + "epoch": 6.98, + "learning_rate": 1.245541022592152e-07, + "logits/chosen": -1.3718750476837158, + "logits/rejected": -1.5239137411117554, + "logps/chosen": -149.04898071289062, + "logps/rejected": -298.96209716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.040124893188477, + "rewards/margins": 13.255836486816406, + "rewards/rejected": -20.295961380004883, + "step": 4350 + }, + { + "epoch": 6.98, + "learning_rate": 1.2445501387237417e-07, + "logits/chosen": -1.6435092687606812, + "logits/rejected": -1.6289594173431396, + "logps/chosen": -109.72781372070312, + "logps/rejected": -258.74896240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.06679105758667, + "rewards/margins": 14.82215404510498, + "rewards/rejected": -18.888944625854492, + "step": 4351 + }, + { + "epoch": 6.99, + "learning_rate": 1.2435592548553307e-07, + "logits/chosen": -1.5125677585601807, + "logits/rejected": -1.5900278091430664, + "logps/chosen": -97.42155456542969, + "logps/rejected": -249.88551330566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1825666427612305, + "rewards/margins": 15.161263465881348, + "rewards/rejected": -18.34383201599121, + "step": 4352 + }, + { + "epoch": 6.99, + "learning_rate": 1.2425683709869203e-07, + "logits/chosen": -1.654233694076538, + "logits/rejected": -1.5677802562713623, + "logps/chosen": -162.11654663085938, + "logps/rejected": -277.99517822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9748992919921875, + "rewards/margins": 12.15892505645752, + "rewards/rejected": -18.133825302124023, + "step": 4353 + }, + { + "epoch": 6.99, + "learning_rate": 1.2415774871185097e-07, + "logits/chosen": -1.6477930545806885, + "logits/rejected": -1.6572380065917969, + "logps/chosen": -142.06248474121094, + "logps/rejected": -260.0127258300781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.362393856048584, + "rewards/margins": 12.69320297241211, + "rewards/rejected": -18.05559730529785, + "step": 4354 + }, + { + "epoch": 6.99, + "learning_rate": 1.240586603250099e-07, + "logits/chosen": -1.4674665927886963, + "logits/rejected": -1.4407474994659424, + "logps/chosen": -184.73048400878906, + "logps/rejected": -277.995361328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.985604763031006, + "rewards/margins": 9.408061981201172, + "rewards/rejected": -17.393667221069336, + "step": 4355 + }, + { + "epoch": 6.99, + "learning_rate": 1.2395957193816886e-07, + "logits/chosen": -1.5035059452056885, + "logits/rejected": -1.4764820337295532, + "logps/chosen": -152.93182373046875, + "logps/rejected": -286.5018005371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.285207748413086, + "rewards/margins": 14.789018630981445, + "rewards/rejected": -21.07422637939453, + "step": 4356 + }, + { + "epoch": 6.99, + "learning_rate": 1.2386048355132777e-07, + "logits/chosen": -1.6303181648254395, + "logits/rejected": -1.4925563335418701, + "logps/chosen": -178.75704956054688, + "logps/rejected": -275.66058349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.514385223388672, + "rewards/margins": 11.352544784545898, + "rewards/rejected": -19.86693000793457, + "step": 4357 + }, + { + "epoch": 7.0, + "learning_rate": 1.2376139516448673e-07, + "logits/chosen": -1.5762104988098145, + "logits/rejected": -1.630784511566162, + "logps/chosen": -189.03720092773438, + "logps/rejected": -342.8877258300781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.583969116210938, + "rewards/margins": 13.548315048217773, + "rewards/rejected": -22.132282257080078, + "step": 4358 + }, + { + "epoch": 7.0, + "learning_rate": 1.2366230677764566e-07, + "logits/chosen": -1.611122965812683, + "logits/rejected": -1.5208609104156494, + "logps/chosen": -158.3083038330078, + "logps/rejected": -267.5928955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.909217834472656, + "rewards/margins": 12.562777519226074, + "rewards/rejected": -18.471996307373047, + "step": 4359 + }, + { + "epoch": 7.0, + "learning_rate": 1.235632183908046e-07, + "logits/chosen": -1.3326228857040405, + "logits/rejected": -1.3369784355163574, + "logps/chosen": -186.82571411132812, + "logps/rejected": -320.1168212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.14082145690918, + "rewards/margins": 12.48708724975586, + "rewards/rejected": -23.627910614013672, + "step": 4360 + }, + { + "epoch": 7.0, + "learning_rate": 1.2346413000396353e-07, + "logits/chosen": -1.4784719944000244, + "logits/rejected": -1.506305456161499, + "logps/chosen": -101.01057434082031, + "logps/rejected": -257.0434265136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9965596199035645, + "rewards/margins": 15.007376670837402, + "rewards/rejected": -19.003936767578125, + "step": 4361 + }, + { + "epoch": 7.0, + "learning_rate": 1.2336504161712246e-07, + "logits/chosen": -1.3592658042907715, + "logits/rejected": -1.3227920532226562, + "logps/chosen": -185.77207946777344, + "logps/rejected": -303.3753662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.643695831298828, + "rewards/margins": 13.453834533691406, + "rewards/rejected": -22.097530364990234, + "step": 4362 + }, + { + "epoch": 7.0, + "learning_rate": 1.2326595323028142e-07, + "logits/chosen": -1.6914420127868652, + "logits/rejected": -1.7723102569580078, + "logps/chosen": -156.48257446289062, + "logps/rejected": -307.8924255371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.793942451477051, + "rewards/margins": 12.395276069641113, + "rewards/rejected": -20.189218521118164, + "step": 4363 + }, + { + "epoch": 7.0, + "learning_rate": 1.2316686484344035e-07, + "logits/chosen": -1.6555708646774292, + "logits/rejected": -1.6472747325897217, + "logps/chosen": -147.65478515625, + "logps/rejected": -276.0013732910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3416218757629395, + "rewards/margins": 13.329360008239746, + "rewards/rejected": -18.67098045349121, + "step": 4364 + }, + { + "epoch": 7.01, + "learning_rate": 1.2306777645659929e-07, + "logits/chosen": -1.5685001611709595, + "logits/rejected": -1.5460562705993652, + "logps/chosen": -140.03431701660156, + "logps/rejected": -287.5618591308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.312833309173584, + "rewards/margins": 15.868837356567383, + "rewards/rejected": -22.181671142578125, + "step": 4365 + }, + { + "epoch": 7.01, + "learning_rate": 1.2296868806975822e-07, + "logits/chosen": -1.3017094135284424, + "logits/rejected": -1.4309937953948975, + "logps/chosen": -156.5979766845703, + "logps/rejected": -308.9305419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.258066177368164, + "rewards/margins": 12.527042388916016, + "rewards/rejected": -19.78510856628418, + "step": 4366 + }, + { + "epoch": 7.01, + "learning_rate": 1.2286959968291715e-07, + "logits/chosen": -1.4879162311553955, + "logits/rejected": -1.4898526668548584, + "logps/chosen": -132.4640655517578, + "logps/rejected": -280.7765197753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.587507247924805, + "rewards/margins": 12.685235977172852, + "rewards/rejected": -19.272743225097656, + "step": 4367 + }, + { + "epoch": 7.01, + "learning_rate": 1.2277051129607609e-07, + "logits/chosen": -1.458630084991455, + "logits/rejected": -1.4607023000717163, + "logps/chosen": -167.19024658203125, + "logps/rejected": -323.1876220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.481583595275879, + "rewards/margins": 15.827878952026367, + "rewards/rejected": -23.30946159362793, + "step": 4368 + }, + { + "epoch": 7.01, + "learning_rate": 1.2267142290923502e-07, + "logits/chosen": -1.4201716184616089, + "logits/rejected": -1.4372873306274414, + "logps/chosen": -188.40719604492188, + "logps/rejected": -338.65093994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.276450157165527, + "rewards/margins": 13.215478897094727, + "rewards/rejected": -23.491928100585938, + "step": 4369 + }, + { + "epoch": 7.01, + "learning_rate": 1.2257233452239398e-07, + "logits/chosen": -1.6339762210845947, + "logits/rejected": -1.6343148946762085, + "logps/chosen": -160.24766540527344, + "logps/rejected": -297.3423156738281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3611321449279785, + "rewards/margins": 13.183692932128906, + "rewards/rejected": -19.544824600219727, + "step": 4370 + }, + { + "epoch": 7.02, + "learning_rate": 1.224732461355529e-07, + "logits/chosen": -1.626706600189209, + "logits/rejected": -1.6109113693237305, + "logps/chosen": -139.7855224609375, + "logps/rejected": -265.82562255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.05205774307251, + "rewards/margins": 13.116009712219238, + "rewards/rejected": -18.168067932128906, + "step": 4371 + }, + { + "epoch": 7.02, + "learning_rate": 1.2237415774871185e-07, + "logits/chosen": -1.365592122077942, + "logits/rejected": -1.3639887571334839, + "logps/chosen": -155.46438598632812, + "logps/rejected": -315.6642150878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.844658374786377, + "rewards/margins": 13.99251937866211, + "rewards/rejected": -20.837177276611328, + "step": 4372 + }, + { + "epoch": 7.02, + "learning_rate": 1.2227506936187078e-07, + "logits/chosen": -1.5425270795822144, + "logits/rejected": -1.5028343200683594, + "logps/chosen": -163.34988403320312, + "logps/rejected": -322.7779541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.515161037445068, + "rewards/margins": 16.696697235107422, + "rewards/rejected": -24.211856842041016, + "step": 4373 + }, + { + "epoch": 7.02, + "learning_rate": 1.221759809750297e-07, + "logits/chosen": -1.6626607179641724, + "logits/rejected": -1.540974497795105, + "logps/chosen": -186.1359100341797, + "logps/rejected": -257.8954772949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.79967737197876, + "rewards/margins": 11.212881088256836, + "rewards/rejected": -18.012557983398438, + "step": 4374 + }, + { + "epoch": 7.02, + "learning_rate": 1.2207689258818867e-07, + "logits/chosen": -1.6337803602218628, + "logits/rejected": -1.6138213872909546, + "logps/chosen": -110.348388671875, + "logps/rejected": -243.9624786376953, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.222269058227539, + "rewards/margins": 13.77242374420166, + "rewards/rejected": -17.994693756103516, + "step": 4375 + }, + { + "epoch": 7.02, + "learning_rate": 1.219778042013476e-07, + "logits/chosen": -1.4480528831481934, + "logits/rejected": -1.3929768800735474, + "logps/chosen": -160.10691833496094, + "logps/rejected": -260.9598693847656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.675724029541016, + "rewards/margins": 12.73061752319336, + "rewards/rejected": -19.406341552734375, + "step": 4376 + }, + { + "epoch": 7.03, + "learning_rate": 1.2187871581450654e-07, + "logits/chosen": -1.445618748664856, + "logits/rejected": -1.5290457010269165, + "logps/chosen": -130.90147399902344, + "logps/rejected": -333.85650634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.311690807342529, + "rewards/margins": 19.463947296142578, + "rewards/rejected": -24.7756404876709, + "step": 4377 + }, + { + "epoch": 7.03, + "learning_rate": 1.2177962742766547e-07, + "logits/chosen": -1.6218866109848022, + "logits/rejected": -1.61871337890625, + "logps/chosen": -141.46035766601562, + "logps/rejected": -334.3531188964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.819666862487793, + "rewards/margins": 18.168058395385742, + "rewards/rejected": -23.98772621154785, + "step": 4378 + }, + { + "epoch": 7.03, + "learning_rate": 1.216805390408244e-07, + "logits/chosen": -1.4919884204864502, + "logits/rejected": -1.5171438455581665, + "logps/chosen": -97.53219604492188, + "logps/rejected": -223.1673126220703, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.256082534790039, + "rewards/margins": 11.25899887084961, + "rewards/rejected": -15.515082359313965, + "step": 4379 + }, + { + "epoch": 7.03, + "learning_rate": 1.2158145065398334e-07, + "logits/chosen": -1.6179909706115723, + "logits/rejected": -1.6266682147979736, + "logps/chosen": -167.18922424316406, + "logps/rejected": -297.3836975097656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.91915225982666, + "rewards/margins": 12.567896842956543, + "rewards/rejected": -19.487049102783203, + "step": 4380 + }, + { + "epoch": 7.03, + "learning_rate": 1.214823622671423e-07, + "logits/chosen": -1.332798719406128, + "logits/rejected": -1.320021152496338, + "logps/chosen": -180.3543243408203, + "logps/rejected": -294.85595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.139558792114258, + "rewards/margins": 12.574420928955078, + "rewards/rejected": -21.71398162841797, + "step": 4381 + }, + { + "epoch": 7.03, + "learning_rate": 1.2138327388030123e-07, + "logits/chosen": -1.5753852128982544, + "logits/rejected": -1.4763171672821045, + "logps/chosen": -106.01918029785156, + "logps/rejected": -228.9042205810547, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0765833854675293, + "rewards/margins": 12.449565887451172, + "rewards/rejected": -15.52614974975586, + "step": 4382 + }, + { + "epoch": 7.04, + "learning_rate": 1.2128418549346017e-07, + "logits/chosen": -1.3377175331115723, + "logits/rejected": -1.4172230958938599, + "logps/chosen": -197.42666625976562, + "logps/rejected": -293.0606689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.457674026489258, + "rewards/margins": 11.102187156677246, + "rewards/rejected": -19.559860229492188, + "step": 4383 + }, + { + "epoch": 7.04, + "learning_rate": 1.211850971066191e-07, + "logits/chosen": -1.42937171459198, + "logits/rejected": -1.427314043045044, + "logps/chosen": -189.89483642578125, + "logps/rejected": -370.8692321777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.784836769104004, + "rewards/margins": 16.446590423583984, + "rewards/rejected": -26.231430053710938, + "step": 4384 + }, + { + "epoch": 7.04, + "learning_rate": 1.2108600871977803e-07, + "logits/chosen": -1.4280948638916016, + "logits/rejected": -1.3845053911209106, + "logps/chosen": -171.34970092773438, + "logps/rejected": -299.17620849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.193657875061035, + "rewards/margins": 12.318739891052246, + "rewards/rejected": -19.51239776611328, + "step": 4385 + }, + { + "epoch": 7.04, + "learning_rate": 1.2098692033293696e-07, + "logits/chosen": -1.370217204093933, + "logits/rejected": -1.4574098587036133, + "logps/chosen": -110.87786865234375, + "logps/rejected": -256.34674072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.341951370239258, + "rewards/margins": 12.749320983886719, + "rewards/rejected": -17.09127426147461, + "step": 4386 + }, + { + "epoch": 7.04, + "learning_rate": 1.208878319460959e-07, + "logits/chosen": -1.4369972944259644, + "logits/rejected": -1.4989745616912842, + "logps/chosen": -172.84030151367188, + "logps/rejected": -303.0500183105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.561979293823242, + "rewards/margins": 10.936203002929688, + "rewards/rejected": -20.49818229675293, + "step": 4387 + }, + { + "epoch": 7.04, + "learning_rate": 1.2078874355925486e-07, + "logits/chosen": -1.5658349990844727, + "logits/rejected": -1.5637575387954712, + "logps/chosen": -146.93243408203125, + "logps/rejected": -295.1549377441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028261184692383, + "rewards/margins": 14.471406936645508, + "rewards/rejected": -20.49966812133789, + "step": 4388 + }, + { + "epoch": 7.04, + "learning_rate": 1.206896551724138e-07, + "logits/chosen": -1.497483730316162, + "logits/rejected": -1.5429902076721191, + "logps/chosen": -118.52787780761719, + "logps/rejected": -274.13720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.54818058013916, + "rewards/margins": 15.234678268432617, + "rewards/rejected": -19.782859802246094, + "step": 4389 + }, + { + "epoch": 7.05, + "learning_rate": 1.2059056678557272e-07, + "logits/chosen": -1.4976643323898315, + "logits/rejected": -1.5186039209365845, + "logps/chosen": -186.392578125, + "logps/rejected": -350.80767822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.995890617370605, + "rewards/margins": 16.163089752197266, + "rewards/rejected": -25.158981323242188, + "step": 4390 + }, + { + "epoch": 7.05, + "learning_rate": 1.2049147839873166e-07, + "logits/chosen": -1.3440977334976196, + "logits/rejected": -1.3671637773513794, + "logps/chosen": -172.62954711914062, + "logps/rejected": -312.9458312988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.042278289794922, + "rewards/margins": 13.587515830993652, + "rewards/rejected": -22.62979507446289, + "step": 4391 + }, + { + "epoch": 7.05, + "learning_rate": 1.203923900118906e-07, + "logits/chosen": -1.5549403429031372, + "logits/rejected": -1.4701956510543823, + "logps/chosen": -162.96522521972656, + "logps/rejected": -286.3807067871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.806276321411133, + "rewards/margins": 13.089628219604492, + "rewards/rejected": -21.895904541015625, + "step": 4392 + }, + { + "epoch": 7.05, + "learning_rate": 1.2029330162504955e-07, + "logits/chosen": -1.329545497894287, + "logits/rejected": -1.3512823581695557, + "logps/chosen": -166.4410400390625, + "logps/rejected": -306.2109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.095890998840332, + "rewards/margins": 13.523273468017578, + "rewards/rejected": -21.619165420532227, + "step": 4393 + }, + { + "epoch": 7.05, + "learning_rate": 1.2019421323820848e-07, + "logits/chosen": -1.3834015130996704, + "logits/rejected": -1.3561965227127075, + "logps/chosen": -189.90713500976562, + "logps/rejected": -316.8287353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.710467338562012, + "rewards/margins": 12.092619895935059, + "rewards/rejected": -22.80308723449707, + "step": 4394 + }, + { + "epoch": 7.05, + "learning_rate": 1.2009512485136742e-07, + "logits/chosen": -1.4336225986480713, + "logits/rejected": -1.4905768632888794, + "logps/chosen": -199.7512969970703, + "logps/rejected": -318.2454528808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.809566497802734, + "rewards/margins": 12.135251998901367, + "rewards/rejected": -23.9448184967041, + "step": 4395 + }, + { + "epoch": 7.06, + "learning_rate": 1.1999603646452635e-07, + "logits/chosen": -1.3141175508499146, + "logits/rejected": -1.464215874671936, + "logps/chosen": -155.70541381835938, + "logps/rejected": -278.99810791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.354961395263672, + "rewards/margins": 12.077619552612305, + "rewards/rejected": -19.432580947875977, + "step": 4396 + }, + { + "epoch": 7.06, + "learning_rate": 1.1989694807768528e-07, + "logits/chosen": -1.4785109758377075, + "logits/rejected": -1.395127296447754, + "logps/chosen": -235.4036102294922, + "logps/rejected": -297.4437561035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.262883186340332, + "rewards/margins": 13.017287254333496, + "rewards/rejected": -22.28017234802246, + "step": 4397 + }, + { + "epoch": 7.06, + "learning_rate": 1.1979785969084422e-07, + "logits/chosen": -1.6896998882293701, + "logits/rejected": -1.651196002960205, + "logps/chosen": -170.74319458007812, + "logps/rejected": -334.74078369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.166314125061035, + "rewards/margins": 16.158979415893555, + "rewards/rejected": -22.325292587280273, + "step": 4398 + }, + { + "epoch": 7.06, + "learning_rate": 1.1969877130400315e-07, + "logits/chosen": -1.4963104724884033, + "logits/rejected": -1.5063445568084717, + "logps/chosen": -113.156982421875, + "logps/rejected": -248.58995056152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.673783302307129, + "rewards/margins": 13.906438827514648, + "rewards/rejected": -18.58022117614746, + "step": 4399 + }, + { + "epoch": 7.06, + "learning_rate": 1.195996829171621e-07, + "logits/chosen": -1.4310479164123535, + "logits/rejected": -1.4770886898040771, + "logps/chosen": -152.43527221679688, + "logps/rejected": -301.9744567871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1401872634887695, + "rewards/margins": 13.811334609985352, + "rewards/rejected": -20.951520919799805, + "step": 4400 + }, + { + "epoch": 7.06, + "learning_rate": 1.1950059453032104e-07, + "logits/chosen": -1.3650128841400146, + "logits/rejected": -1.4018361568450928, + "logps/chosen": -181.78424072265625, + "logps/rejected": -295.04132080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.376157760620117, + "rewards/margins": 11.968466758728027, + "rewards/rejected": -21.34462547302246, + "step": 4401 + }, + { + "epoch": 7.07, + "learning_rate": 1.1940150614347998e-07, + "logits/chosen": -1.4893929958343506, + "logits/rejected": -1.5523968935012817, + "logps/chosen": -127.37936401367188, + "logps/rejected": -279.45147705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.672035217285156, + "rewards/margins": 14.61146354675293, + "rewards/rejected": -20.283498764038086, + "step": 4402 + }, + { + "epoch": 7.07, + "learning_rate": 1.193024177566389e-07, + "logits/chosen": -1.4192248582839966, + "logits/rejected": -1.5713485479354858, + "logps/chosen": -147.90444946289062, + "logps/rejected": -307.06976318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.014726161956787, + "rewards/margins": 14.839646339416504, + "rewards/rejected": -20.854373931884766, + "step": 4403 + }, + { + "epoch": 7.07, + "learning_rate": 1.1920332936979786e-07, + "logits/chosen": -1.5117084980010986, + "logits/rejected": -1.498105764389038, + "logps/chosen": -209.72364807128906, + "logps/rejected": -310.7216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.816614151000977, + "rewards/margins": 11.283796310424805, + "rewards/rejected": -22.100412368774414, + "step": 4404 + }, + { + "epoch": 7.07, + "learning_rate": 1.1910424098295679e-07, + "logits/chosen": -1.4139434099197388, + "logits/rejected": -1.3381391763687134, + "logps/chosen": -136.28648376464844, + "logps/rejected": -291.208740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7991180419921875, + "rewards/margins": 15.359137535095215, + "rewards/rejected": -21.158254623413086, + "step": 4405 + }, + { + "epoch": 7.07, + "learning_rate": 1.1900515259611572e-07, + "logits/chosen": -1.3613402843475342, + "logits/rejected": -1.3988492488861084, + "logps/chosen": -144.20407104492188, + "logps/rejected": -268.828857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.536291599273682, + "rewards/margins": 11.424067497253418, + "rewards/rejected": -17.960359573364258, + "step": 4406 + }, + { + "epoch": 7.07, + "learning_rate": 1.1890606420927467e-07, + "logits/chosen": -1.5548901557922363, + "logits/rejected": -1.5583322048187256, + "logps/chosen": -184.01783752441406, + "logps/rejected": -354.754638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.044439315795898, + "rewards/margins": 15.768331527709961, + "rewards/rejected": -26.81277084350586, + "step": 4407 + }, + { + "epoch": 7.08, + "learning_rate": 1.1880697582243362e-07, + "logits/chosen": -1.4849333763122559, + "logits/rejected": -1.4338558912277222, + "logps/chosen": -192.2312774658203, + "logps/rejected": -341.52923583984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.298298835754395, + "rewards/margins": 14.357309341430664, + "rewards/rejected": -23.655609130859375, + "step": 4408 + }, + { + "epoch": 7.08, + "learning_rate": 1.1870788743559254e-07, + "logits/chosen": -1.361864686012268, + "logits/rejected": -1.4718658924102783, + "logps/chosen": -179.06112670898438, + "logps/rejected": -349.86395263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.71994686126709, + "rewards/margins": 15.661255836486816, + "rewards/rejected": -24.381202697753906, + "step": 4409 + }, + { + "epoch": 7.08, + "learning_rate": 1.1860879904875148e-07, + "logits/chosen": -1.431876540184021, + "logits/rejected": -1.3553695678710938, + "logps/chosen": -172.84556579589844, + "logps/rejected": -266.1935119628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.005773544311523, + "rewards/margins": 11.218879699707031, + "rewards/rejected": -19.224655151367188, + "step": 4410 + }, + { + "epoch": 7.08, + "learning_rate": 1.1850971066191042e-07, + "logits/chosen": -1.5858781337738037, + "logits/rejected": -1.5668963193893433, + "logps/chosen": -136.26177978515625, + "logps/rejected": -264.7826232910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.194519996643066, + "rewards/margins": 13.416681289672852, + "rewards/rejected": -18.6112003326416, + "step": 4411 + }, + { + "epoch": 7.08, + "learning_rate": 1.1841062227506936e-07, + "logits/chosen": -1.453546404838562, + "logits/rejected": -1.4401999711990356, + "logps/chosen": -160.61709594726562, + "logps/rejected": -277.25213623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.445626258850098, + "rewards/margins": 12.429258346557617, + "rewards/rejected": -20.87488555908203, + "step": 4412 + }, + { + "epoch": 7.08, + "learning_rate": 1.1831153388822828e-07, + "logits/chosen": -1.479432225227356, + "logits/rejected": -1.487130880355835, + "logps/chosen": -161.880615234375, + "logps/rejected": -320.90679931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.643210411071777, + "rewards/margins": 14.85353946685791, + "rewards/rejected": -22.496749877929688, + "step": 4413 + }, + { + "epoch": 7.09, + "learning_rate": 1.1821244550138723e-07, + "logits/chosen": -1.5551575422286987, + "logits/rejected": -1.5682353973388672, + "logps/chosen": -166.21414184570312, + "logps/rejected": -285.204345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.92892599105835, + "rewards/margins": 12.734193801879883, + "rewards/rejected": -19.66312026977539, + "step": 4414 + }, + { + "epoch": 7.09, + "learning_rate": 1.1811335711454618e-07, + "logits/chosen": -1.4179699420928955, + "logits/rejected": -1.3936232328414917, + "logps/chosen": -106.32705688476562, + "logps/rejected": -307.7656555175781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2879066467285156, + "rewards/margins": 19.634695053100586, + "rewards/rejected": -22.9226016998291, + "step": 4415 + }, + { + "epoch": 7.09, + "learning_rate": 1.1801426872770511e-07, + "logits/chosen": -1.5107581615447998, + "logits/rejected": -1.4787640571594238, + "logps/chosen": -165.77395629882812, + "logps/rejected": -344.2603759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.408445358276367, + "rewards/margins": 16.36794662475586, + "rewards/rejected": -24.776390075683594, + "step": 4416 + }, + { + "epoch": 7.09, + "learning_rate": 1.1791518034086404e-07, + "logits/chosen": -1.5861022472381592, + "logits/rejected": -1.6524176597595215, + "logps/chosen": -174.70314025878906, + "logps/rejected": -336.43536376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.959985256195068, + "rewards/margins": 15.298704147338867, + "rewards/rejected": -22.258689880371094, + "step": 4417 + }, + { + "epoch": 7.09, + "learning_rate": 1.1781609195402298e-07, + "logits/chosen": -1.385389804840088, + "logits/rejected": -1.4482723474502563, + "logps/chosen": -161.1041259765625, + "logps/rejected": -311.85662841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.588863372802734, + "rewards/margins": 13.352644920349121, + "rewards/rejected": -20.941509246826172, + "step": 4418 + }, + { + "epoch": 7.09, + "learning_rate": 1.1771700356718192e-07, + "logits/chosen": -1.5862985849380493, + "logits/rejected": -1.560420274734497, + "logps/chosen": -156.8106689453125, + "logps/rejected": -259.51141357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.413906097412109, + "rewards/margins": 11.655350685119629, + "rewards/rejected": -19.069255828857422, + "step": 4419 + }, + { + "epoch": 7.09, + "learning_rate": 1.1761791518034087e-07, + "logits/chosen": -1.4887361526489258, + "logits/rejected": -1.4792457818984985, + "logps/chosen": -152.6226806640625, + "logps/rejected": -276.1060485839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.357394695281982, + "rewards/margins": 12.943361282348633, + "rewards/rejected": -19.300756454467773, + "step": 4420 + }, + { + "epoch": 7.1, + "learning_rate": 1.1751882679349979e-07, + "logits/chosen": -1.483611822128296, + "logits/rejected": -1.482194185256958, + "logps/chosen": -162.638916015625, + "logps/rejected": -275.14410400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.554872512817383, + "rewards/margins": 11.56513786315918, + "rewards/rejected": -18.120010375976562, + "step": 4421 + }, + { + "epoch": 7.1, + "learning_rate": 1.1741973840665874e-07, + "logits/chosen": -1.6419750452041626, + "logits/rejected": -1.6320631504058838, + "logps/chosen": -102.1863021850586, + "logps/rejected": -221.02142333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.883676767349243, + "rewards/margins": 12.313340187072754, + "rewards/rejected": -16.197017669677734, + "step": 4422 + }, + { + "epoch": 7.1, + "learning_rate": 1.1732065001981767e-07, + "logits/chosen": -1.4688994884490967, + "logits/rejected": -1.5430920124053955, + "logps/chosen": -202.72364807128906, + "logps/rejected": -332.2009582519531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.985612869262695, + "rewards/margins": 12.8300142288208, + "rewards/rejected": -22.815628051757812, + "step": 4423 + }, + { + "epoch": 7.1, + "learning_rate": 1.1722156163297662e-07, + "logits/chosen": -1.364062786102295, + "logits/rejected": -1.5183019638061523, + "logps/chosen": -152.93630981445312, + "logps/rejected": -335.76849365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.837353706359863, + "rewards/margins": 14.4450101852417, + "rewards/rejected": -23.282363891601562, + "step": 4424 + }, + { + "epoch": 7.1, + "learning_rate": 1.1712247324613554e-07, + "logits/chosen": -1.3889567852020264, + "logits/rejected": -1.402087688446045, + "logps/chosen": -204.19203186035156, + "logps/rejected": -337.25201416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.569656372070312, + "rewards/margins": 13.781377792358398, + "rewards/rejected": -25.351036071777344, + "step": 4425 + }, + { + "epoch": 7.1, + "learning_rate": 1.1702338485929448e-07, + "logits/chosen": -1.6021469831466675, + "logits/rejected": -1.6306980848312378, + "logps/chosen": -137.90576171875, + "logps/rejected": -277.25836181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.114010810852051, + "rewards/margins": 12.51942253112793, + "rewards/rejected": -17.633432388305664, + "step": 4426 + }, + { + "epoch": 7.11, + "learning_rate": 1.1692429647245343e-07, + "logits/chosen": -1.4601731300354004, + "logits/rejected": -1.3676235675811768, + "logps/chosen": -186.82691955566406, + "logps/rejected": -293.4724426269531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.33127498626709, + "rewards/margins": 12.503325462341309, + "rewards/rejected": -21.83460235595703, + "step": 4427 + }, + { + "epoch": 7.11, + "learning_rate": 1.1682520808561236e-07, + "logits/chosen": -1.4145084619522095, + "logits/rejected": -1.4080500602722168, + "logps/chosen": -139.5091094970703, + "logps/rejected": -256.03863525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859859466552734, + "rewards/margins": 11.615273475646973, + "rewards/rejected": -17.47513198852539, + "step": 4428 + }, + { + "epoch": 7.11, + "learning_rate": 1.1672611969877131e-07, + "logits/chosen": -1.5423989295959473, + "logits/rejected": -1.5693622827529907, + "logps/chosen": -101.80856323242188, + "logps/rejected": -274.6204833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9585373401641846, + "rewards/margins": 15.323705673217773, + "rewards/rejected": -19.282241821289062, + "step": 4429 + }, + { + "epoch": 7.11, + "learning_rate": 1.1662703131193023e-07, + "logits/chosen": -1.4712005853652954, + "logits/rejected": -1.6331952810287476, + "logps/chosen": -149.66722106933594, + "logps/rejected": -301.5667419433594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.093400001525879, + "rewards/margins": 14.721809387207031, + "rewards/rejected": -21.815208435058594, + "step": 4430 + }, + { + "epoch": 7.11, + "learning_rate": 1.1652794292508918e-07, + "logits/chosen": -1.374825119972229, + "logits/rejected": -1.3486108779907227, + "logps/chosen": -130.68577575683594, + "logps/rejected": -262.4138488769531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213830947875977, + "rewards/margins": 12.66818618774414, + "rewards/rejected": -18.882017135620117, + "step": 4431 + }, + { + "epoch": 7.11, + "learning_rate": 1.1642885453824811e-07, + "logits/chosen": -1.582627773284912, + "logits/rejected": -1.585771918296814, + "logps/chosen": -150.88136291503906, + "logps/rejected": -252.6105194091797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.622832298278809, + "rewards/margins": 10.834003448486328, + "rewards/rejected": -18.45683479309082, + "step": 4432 + }, + { + "epoch": 7.12, + "learning_rate": 1.1632976615140705e-07, + "logits/chosen": -1.3234968185424805, + "logits/rejected": -1.2927043437957764, + "logps/chosen": -132.90679931640625, + "logps/rejected": -291.08648681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.712241172790527, + "rewards/margins": 15.087831497192383, + "rewards/rejected": -19.800073623657227, + "step": 4433 + }, + { + "epoch": 7.12, + "learning_rate": 1.1623067776456599e-07, + "logits/chosen": -1.4405003786087036, + "logits/rejected": -1.4827449321746826, + "logps/chosen": -151.06802368164062, + "logps/rejected": -291.8225402832031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.084744930267334, + "rewards/margins": 13.969551086425781, + "rewards/rejected": -19.054296493530273, + "step": 4434 + }, + { + "epoch": 7.12, + "learning_rate": 1.1613158937772492e-07, + "logits/chosen": -1.3239552974700928, + "logits/rejected": -1.3017215728759766, + "logps/chosen": -158.8923797607422, + "logps/rejected": -295.18505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.426975727081299, + "rewards/margins": 14.278682708740234, + "rewards/rejected": -20.705657958984375, + "step": 4435 + }, + { + "epoch": 7.12, + "learning_rate": 1.1603250099088387e-07, + "logits/chosen": -1.5890172719955444, + "logits/rejected": -1.6501963138580322, + "logps/chosen": -148.562744140625, + "logps/rejected": -280.26556396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.415689468383789, + "rewards/margins": 11.724197387695312, + "rewards/rejected": -18.1398868560791, + "step": 4436 + }, + { + "epoch": 7.12, + "learning_rate": 1.159334126040428e-07, + "logits/chosen": -1.3769185543060303, + "logits/rejected": -1.4375079870224, + "logps/chosen": -161.35206604003906, + "logps/rejected": -271.6094665527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.676819324493408, + "rewards/margins": 12.75632381439209, + "rewards/rejected": -19.433141708374023, + "step": 4437 + }, + { + "epoch": 7.12, + "learning_rate": 1.1583432421720173e-07, + "logits/chosen": -1.525823950767517, + "logits/rejected": -1.4857747554779053, + "logps/chosen": -136.2823486328125, + "logps/rejected": -261.8940124511719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6173248291015625, + "rewards/margins": 12.202953338623047, + "rewards/rejected": -17.82027816772461, + "step": 4438 + }, + { + "epoch": 7.13, + "learning_rate": 1.1573523583036067e-07, + "logits/chosen": -1.457177996635437, + "logits/rejected": -1.4627056121826172, + "logps/chosen": -195.02987670898438, + "logps/rejected": -339.2269592285156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.36465072631836, + "rewards/margins": 14.331276893615723, + "rewards/rejected": -23.6959285736084, + "step": 4439 + }, + { + "epoch": 7.13, + "learning_rate": 1.1563614744351961e-07, + "logits/chosen": -1.6132800579071045, + "logits/rejected": -1.7027281522750854, + "logps/chosen": -108.86050415039062, + "logps/rejected": -281.8426513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.172152042388916, + "rewards/margins": 15.87280559539795, + "rewards/rejected": -20.044958114624023, + "step": 4440 + }, + { + "epoch": 7.13, + "learning_rate": 1.1553705905667856e-07, + "logits/chosen": -1.4645763635635376, + "logits/rejected": -1.3885095119476318, + "logps/chosen": -174.92025756835938, + "logps/rejected": -311.6620178222656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.319334030151367, + "rewards/margins": 13.426764488220215, + "rewards/rejected": -22.746097564697266, + "step": 4441 + }, + { + "epoch": 7.13, + "learning_rate": 1.1543797066983748e-07, + "logits/chosen": -1.557429552078247, + "logits/rejected": -1.6758471727371216, + "logps/chosen": -111.71353912353516, + "logps/rejected": -316.83148193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.401368618011475, + "rewards/margins": 17.210872650146484, + "rewards/rejected": -21.612241744995117, + "step": 4442 + }, + { + "epoch": 7.13, + "learning_rate": 1.1533888228299643e-07, + "logits/chosen": -1.47796630859375, + "logits/rejected": -1.521164059638977, + "logps/chosen": -150.79031372070312, + "logps/rejected": -311.0825500488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.764964580535889, + "rewards/margins": 14.160481452941895, + "rewards/rejected": -21.925445556640625, + "step": 4443 + }, + { + "epoch": 7.13, + "learning_rate": 1.1523979389615536e-07, + "logits/chosen": -1.5690498352050781, + "logits/rejected": -1.5780504941940308, + "logps/chosen": -163.36813354492188, + "logps/rejected": -302.6620788574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.280340194702148, + "rewards/margins": 12.635034561157227, + "rewards/rejected": -18.915374755859375, + "step": 4444 + }, + { + "epoch": 7.13, + "learning_rate": 1.1514070550931431e-07, + "logits/chosen": -1.4979910850524902, + "logits/rejected": -1.5226258039474487, + "logps/chosen": -178.41664123535156, + "logps/rejected": -296.42340087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.071675300598145, + "rewards/margins": 11.806652069091797, + "rewards/rejected": -21.878326416015625, + "step": 4445 + }, + { + "epoch": 7.14, + "learning_rate": 1.1504161712247325e-07, + "logits/chosen": -1.3546698093414307, + "logits/rejected": -1.3058072328567505, + "logps/chosen": -142.22572326660156, + "logps/rejected": -278.24273681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.914264678955078, + "rewards/margins": 12.88447093963623, + "rewards/rejected": -19.798736572265625, + "step": 4446 + }, + { + "epoch": 7.14, + "learning_rate": 1.1494252873563217e-07, + "logits/chosen": -1.361534833908081, + "logits/rejected": -1.3801745176315308, + "logps/chosen": -154.43310546875, + "logps/rejected": -275.89190673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.062540054321289, + "rewards/margins": 12.86067008972168, + "rewards/rejected": -19.92321014404297, + "step": 4447 + }, + { + "epoch": 7.14, + "learning_rate": 1.1484344034879112e-07, + "logits/chosen": -1.4785490036010742, + "logits/rejected": -1.4345271587371826, + "logps/chosen": -151.44174194335938, + "logps/rejected": -259.41326904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.118020057678223, + "rewards/margins": 10.922743797302246, + "rewards/rejected": -18.04076385498047, + "step": 4448 + }, + { + "epoch": 7.14, + "learning_rate": 1.1474435196195005e-07, + "logits/chosen": -1.505431890487671, + "logits/rejected": -1.5208663940429688, + "logps/chosen": -181.7689971923828, + "logps/rejected": -360.4417724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.3892822265625, + "rewards/margins": 18.06121826171875, + "rewards/rejected": -27.450502395629883, + "step": 4449 + }, + { + "epoch": 7.14, + "learning_rate": 1.14645263575109e-07, + "logits/chosen": -1.317400336265564, + "logits/rejected": -1.4054421186447144, + "logps/chosen": -116.97425079345703, + "logps/rejected": -286.3268127441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.883481979370117, + "rewards/margins": 15.518569946289062, + "rewards/rejected": -20.40205192565918, + "step": 4450 + }, + { + "epoch": 7.14, + "learning_rate": 1.1454617518826792e-07, + "logits/chosen": -1.444591760635376, + "logits/rejected": -1.3637341260910034, + "logps/chosen": -205.36611938476562, + "logps/rejected": -295.0605163574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.934293746948242, + "rewards/margins": 11.134123802185059, + "rewards/rejected": -21.068416595458984, + "step": 4451 + }, + { + "epoch": 7.15, + "learning_rate": 1.1444708680142687e-07, + "logits/chosen": -1.4698246717453003, + "logits/rejected": -1.4161556959152222, + "logps/chosen": -171.14166259765625, + "logps/rejected": -298.9524230957031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6643500328063965, + "rewards/margins": 12.31596565246582, + "rewards/rejected": -19.980316162109375, + "step": 4452 + }, + { + "epoch": 7.15, + "learning_rate": 1.1434799841458581e-07, + "logits/chosen": -1.4975136518478394, + "logits/rejected": -1.5147759914398193, + "logps/chosen": -137.17042541503906, + "logps/rejected": -250.16195678710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0959625244140625, + "rewards/margins": 12.604036331176758, + "rewards/rejected": -18.69999885559082, + "step": 4453 + }, + { + "epoch": 7.15, + "learning_rate": 1.1424891002774475e-07, + "logits/chosen": -1.633681058883667, + "logits/rejected": -1.658503770828247, + "logps/chosen": -152.09393310546875, + "logps/rejected": -329.01788330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7407097816467285, + "rewards/margins": 18.049171447753906, + "rewards/rejected": -23.789880752563477, + "step": 4454 + }, + { + "epoch": 7.15, + "learning_rate": 1.1414982164090368e-07, + "logits/chosen": -1.5273635387420654, + "logits/rejected": -1.6087315082550049, + "logps/chosen": -157.17156982421875, + "logps/rejected": -362.447021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.445289134979248, + "rewards/margins": 18.399507522583008, + "rewards/rejected": -24.84479522705078, + "step": 4455 + }, + { + "epoch": 7.15, + "learning_rate": 1.1405073325406261e-07, + "logits/chosen": -1.5470829010009766, + "logits/rejected": -1.5684939622879028, + "logps/chosen": -148.16543579101562, + "logps/rejected": -272.06072998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.978500843048096, + "rewards/margins": 12.296247482299805, + "rewards/rejected": -18.274749755859375, + "step": 4456 + }, + { + "epoch": 7.15, + "learning_rate": 1.1395164486722156e-07, + "logits/chosen": -1.3739691972732544, + "logits/rejected": -1.4242299795150757, + "logps/chosen": -168.30699157714844, + "logps/rejected": -318.8625183105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.307641983032227, + "rewards/margins": 14.911429405212402, + "rewards/rejected": -22.219070434570312, + "step": 4457 + }, + { + "epoch": 7.16, + "learning_rate": 1.1385255648038049e-07, + "logits/chosen": -1.441756010055542, + "logits/rejected": -1.4965020418167114, + "logps/chosen": -154.40444946289062, + "logps/rejected": -286.1380615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.041849613189697, + "rewards/margins": 13.107115745544434, + "rewards/rejected": -20.14896583557129, + "step": 4458 + }, + { + "epoch": 7.16, + "learning_rate": 1.1375346809353943e-07, + "logits/chosen": -1.5607435703277588, + "logits/rejected": -1.6428611278533936, + "logps/chosen": -122.37406921386719, + "logps/rejected": -306.259033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.905029296875, + "rewards/margins": 15.576781272888184, + "rewards/rejected": -20.481809616088867, + "step": 4459 + }, + { + "epoch": 7.16, + "learning_rate": 1.1365437970669837e-07, + "logits/chosen": -1.4708340167999268, + "logits/rejected": -1.3857134580612183, + "logps/chosen": -152.0325469970703, + "logps/rejected": -266.1288146972656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.03642463684082, + "rewards/margins": 13.444053649902344, + "rewards/rejected": -18.480478286743164, + "step": 4460 + }, + { + "epoch": 7.16, + "learning_rate": 1.135552913198573e-07, + "logits/chosen": -1.6505342721939087, + "logits/rejected": -1.7063852548599243, + "logps/chosen": -103.31838989257812, + "logps/rejected": -240.4877166748047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.947434663772583, + "rewards/margins": 12.046475410461426, + "rewards/rejected": -15.99390983581543, + "step": 4461 + }, + { + "epoch": 7.16, + "learning_rate": 1.1345620293301625e-07, + "logits/chosen": -1.5462641716003418, + "logits/rejected": -1.6134413480758667, + "logps/chosen": -108.82008361816406, + "logps/rejected": -249.1681365966797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5604941844940186, + "rewards/margins": 14.273246765136719, + "rewards/rejected": -17.833740234375, + "step": 4462 + }, + { + "epoch": 7.16, + "learning_rate": 1.1335711454617517e-07, + "logits/chosen": -1.4984015226364136, + "logits/rejected": -1.5033825635910034, + "logps/chosen": -168.34661865234375, + "logps/rejected": -249.28280639648438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.406966686248779, + "rewards/margins": 10.36064338684082, + "rewards/rejected": -17.767610549926758, + "step": 4463 + }, + { + "epoch": 7.17, + "learning_rate": 1.1325802615933412e-07, + "logits/chosen": -1.5690394639968872, + "logits/rejected": -1.6479684114456177, + "logps/chosen": -137.28273010253906, + "logps/rejected": -326.61102294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.465676307678223, + "rewards/margins": 18.75271224975586, + "rewards/rejected": -24.218387603759766, + "step": 4464 + }, + { + "epoch": 7.17, + "learning_rate": 1.1315893777249307e-07, + "logits/chosen": -1.5465962886810303, + "logits/rejected": -1.5554571151733398, + "logps/chosen": -168.92828369140625, + "logps/rejected": -280.0191955566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.085268020629883, + "rewards/margins": 12.743964195251465, + "rewards/rejected": -20.829233169555664, + "step": 4465 + }, + { + "epoch": 7.17, + "learning_rate": 1.13059849385652e-07, + "logits/chosen": -1.4660253524780273, + "logits/rejected": -1.5401372909545898, + "logps/chosen": -173.46290588378906, + "logps/rejected": -332.5445556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.247304916381836, + "rewards/margins": 12.407906532287598, + "rewards/rejected": -21.655210494995117, + "step": 4466 + }, + { + "epoch": 7.17, + "learning_rate": 1.1296076099881093e-07, + "logits/chosen": -1.730126142501831, + "logits/rejected": -1.6602599620819092, + "logps/chosen": -120.71408081054688, + "logps/rejected": -280.5617370605469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.68569278717041, + "rewards/margins": 16.34003448486328, + "rewards/rejected": -20.025728225708008, + "step": 4467 + }, + { + "epoch": 7.17, + "learning_rate": 1.1286167261196987e-07, + "logits/chosen": -1.5424342155456543, + "logits/rejected": -1.4818556308746338, + "logps/chosen": -131.2424774169922, + "logps/rejected": -276.4385681152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.558210849761963, + "rewards/margins": 14.857831001281738, + "rewards/rejected": -19.41604232788086, + "step": 4468 + }, + { + "epoch": 7.17, + "learning_rate": 1.1276258422512881e-07, + "logits/chosen": -1.384629249572754, + "logits/rejected": -1.3572701215744019, + "logps/chosen": -177.1447296142578, + "logps/rejected": -312.53900146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.79460620880127, + "rewards/margins": 13.560791015625, + "rewards/rejected": -23.355398178100586, + "step": 4469 + }, + { + "epoch": 7.17, + "learning_rate": 1.1266349583828775e-07, + "logits/chosen": -1.5451263189315796, + "logits/rejected": -1.4686702489852905, + "logps/chosen": -172.42152404785156, + "logps/rejected": -265.1445617675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.076458930969238, + "rewards/margins": 11.794326782226562, + "rewards/rejected": -18.870784759521484, + "step": 4470 + }, + { + "epoch": 7.18, + "learning_rate": 1.1256440745144669e-07, + "logits/chosen": -1.4290134906768799, + "logits/rejected": -1.5221740007400513, + "logps/chosen": -165.48565673828125, + "logps/rejected": -345.3985900878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.863882064819336, + "rewards/margins": 15.052719116210938, + "rewards/rejected": -23.916603088378906, + "step": 4471 + }, + { + "epoch": 7.18, + "learning_rate": 1.1246531906460563e-07, + "logits/chosen": -1.431606411933899, + "logits/rejected": -1.4714363813400269, + "logps/chosen": -162.3533935546875, + "logps/rejected": -279.32879638671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.669551849365234, + "rewards/margins": 11.912199020385742, + "rewards/rejected": -20.581750869750977, + "step": 4472 + }, + { + "epoch": 7.18, + "learning_rate": 1.1236623067776456e-07, + "logits/chosen": -1.490502119064331, + "logits/rejected": -1.43410325050354, + "logps/chosen": -199.19113159179688, + "logps/rejected": -330.6953430175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.520463943481445, + "rewards/margins": 13.581968307495117, + "rewards/rejected": -24.102432250976562, + "step": 4473 + }, + { + "epoch": 7.18, + "learning_rate": 1.122671422909235e-07, + "logits/chosen": -1.7095839977264404, + "logits/rejected": -1.6550215482711792, + "logps/chosen": -169.77423095703125, + "logps/rejected": -300.9315490722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.238997936248779, + "rewards/margins": 15.138381958007812, + "rewards/rejected": -21.37738037109375, + "step": 4474 + }, + { + "epoch": 7.18, + "learning_rate": 1.1216805390408244e-07, + "logits/chosen": -1.405106544494629, + "logits/rejected": -1.5599141120910645, + "logps/chosen": -174.13082885742188, + "logps/rejected": -302.0238952636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.711543083190918, + "rewards/margins": 11.395574569702148, + "rewards/rejected": -21.10711669921875, + "step": 4475 + }, + { + "epoch": 7.18, + "learning_rate": 1.1206896551724137e-07, + "logits/chosen": -1.4798306226730347, + "logits/rejected": -1.4681893587112427, + "logps/chosen": -149.54212951660156, + "logps/rejected": -281.2727966308594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.086391925811768, + "rewards/margins": 13.407508850097656, + "rewards/rejected": -19.493900299072266, + "step": 4476 + }, + { + "epoch": 7.19, + "learning_rate": 1.119698771304003e-07, + "logits/chosen": -1.405415654182434, + "logits/rejected": -1.3924766778945923, + "logps/chosen": -178.48556518554688, + "logps/rejected": -271.23748779296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.524254322052002, + "rewards/margins": 9.85496711730957, + "rewards/rejected": -17.379222869873047, + "step": 4477 + }, + { + "epoch": 7.19, + "learning_rate": 1.1187078874355925e-07, + "logits/chosen": -1.5242599248886108, + "logits/rejected": -1.5426831245422363, + "logps/chosen": -139.65513610839844, + "logps/rejected": -240.63101196289062, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.065365791320801, + "rewards/margins": 9.095728874206543, + "rewards/rejected": -15.161094665527344, + "step": 4478 + }, + { + "epoch": 7.19, + "learning_rate": 1.117717003567182e-07, + "logits/chosen": -1.589646339416504, + "logits/rejected": -1.5648841857910156, + "logps/chosen": -112.52920532226562, + "logps/rejected": -248.1256866455078, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.782210350036621, + "rewards/margins": 13.190191268920898, + "rewards/rejected": -17.972400665283203, + "step": 4479 + }, + { + "epoch": 7.19, + "learning_rate": 1.1167261196987712e-07, + "logits/chosen": -1.4224445819854736, + "logits/rejected": -1.4717936515808105, + "logps/chosen": -177.76217651367188, + "logps/rejected": -301.51837158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.192911148071289, + "rewards/margins": 12.901500701904297, + "rewards/rejected": -23.094411849975586, + "step": 4480 + }, + { + "epoch": 7.19, + "learning_rate": 1.1157352358303606e-07, + "logits/chosen": -1.455580472946167, + "logits/rejected": -1.4779707193374634, + "logps/chosen": -194.71875, + "logps/rejected": -322.84454345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.99976634979248, + "rewards/margins": 13.660626411437988, + "rewards/rejected": -23.66039276123047, + "step": 4481 + }, + { + "epoch": 7.19, + "learning_rate": 1.11474435196195e-07, + "logits/chosen": -1.551810383796692, + "logits/rejected": -1.49295175075531, + "logps/chosen": -214.46377563476562, + "logps/rejected": -323.32861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.0935697555542, + "rewards/margins": 11.321246147155762, + "rewards/rejected": -21.41481590270996, + "step": 4482 + }, + { + "epoch": 7.2, + "learning_rate": 1.1137534680935394e-07, + "logits/chosen": -1.5343575477600098, + "logits/rejected": -1.5455467700958252, + "logps/chosen": -111.05654907226562, + "logps/rejected": -263.84039306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.057274341583252, + "rewards/margins": 14.446259498596191, + "rewards/rejected": -18.50353240966797, + "step": 4483 + }, + { + "epoch": 7.2, + "learning_rate": 1.1127625842251286e-07, + "logits/chosen": -1.4335224628448486, + "logits/rejected": -1.347228765487671, + "logps/chosen": -180.06182861328125, + "logps/rejected": -308.5184631347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.961424827575684, + "rewards/margins": 14.306244850158691, + "rewards/rejected": -23.267671585083008, + "step": 4484 + }, + { + "epoch": 7.2, + "learning_rate": 1.1117717003567181e-07, + "logits/chosen": -1.3963103294372559, + "logits/rejected": -1.3596796989440918, + "logps/chosen": -158.6099853515625, + "logps/rejected": -295.9410400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.79271125793457, + "rewards/margins": 13.601800918579102, + "rewards/rejected": -20.394512176513672, + "step": 4485 + }, + { + "epoch": 7.2, + "learning_rate": 1.1107808164883076e-07, + "logits/chosen": -1.5625072717666626, + "logits/rejected": -1.4849281311035156, + "logps/chosen": -125.5058364868164, + "logps/rejected": -224.4513397216797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8290112018585205, + "rewards/margins": 12.431656837463379, + "rewards/rejected": -16.26066780090332, + "step": 4486 + }, + { + "epoch": 7.2, + "learning_rate": 1.1097899326198969e-07, + "logits/chosen": -1.3261325359344482, + "logits/rejected": -1.3719425201416016, + "logps/chosen": -182.37612915039062, + "logps/rejected": -298.97308349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.615578651428223, + "rewards/margins": 13.039054870605469, + "rewards/rejected": -20.654632568359375, + "step": 4487 + }, + { + "epoch": 7.2, + "learning_rate": 1.1087990487514862e-07, + "logits/chosen": -1.6193662881851196, + "logits/rejected": -1.5166277885437012, + "logps/chosen": -166.0712890625, + "logps/rejected": -274.9893798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.84495735168457, + "rewards/margins": 13.286054611206055, + "rewards/rejected": -20.131011962890625, + "step": 4488 + }, + { + "epoch": 7.21, + "learning_rate": 1.1078081648830756e-07, + "logits/chosen": -1.5503712892532349, + "logits/rejected": -1.5378947257995605, + "logps/chosen": -179.97206115722656, + "logps/rejected": -331.73040771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.248779296875, + "rewards/margins": 15.138498306274414, + "rewards/rejected": -23.387279510498047, + "step": 4489 + }, + { + "epoch": 7.21, + "learning_rate": 1.106817281014665e-07, + "logits/chosen": -1.5703132152557373, + "logits/rejected": -1.5212407112121582, + "logps/chosen": -226.54873657226562, + "logps/rejected": -354.0195007324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.438102722167969, + "rewards/margins": 14.573719024658203, + "rewards/rejected": -25.011821746826172, + "step": 4490 + }, + { + "epoch": 7.21, + "learning_rate": 1.1058263971462545e-07, + "logits/chosen": -1.420419454574585, + "logits/rejected": -1.3829654455184937, + "logps/chosen": -158.77938842773438, + "logps/rejected": -287.09698486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.895576477050781, + "rewards/margins": 12.402054786682129, + "rewards/rejected": -20.297630310058594, + "step": 4491 + }, + { + "epoch": 7.21, + "learning_rate": 1.1048355132778437e-07, + "logits/chosen": -1.5890192985534668, + "logits/rejected": -1.5488535165786743, + "logps/chosen": -175.7840576171875, + "logps/rejected": -326.34149169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.55239200592041, + "rewards/margins": 14.421974182128906, + "rewards/rejected": -21.974365234375, + "step": 4492 + }, + { + "epoch": 7.21, + "learning_rate": 1.1038446294094332e-07, + "logits/chosen": -1.4882782697677612, + "logits/rejected": -1.5441523790359497, + "logps/chosen": -176.9685821533203, + "logps/rejected": -279.03997802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.820552825927734, + "rewards/margins": 9.516609191894531, + "rewards/rejected": -18.337162017822266, + "step": 4493 + }, + { + "epoch": 7.21, + "learning_rate": 1.1028537455410225e-07, + "logits/chosen": -1.631833553314209, + "logits/rejected": -1.568556308746338, + "logps/chosen": -205.90176391601562, + "logps/rejected": -330.54571533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.624021530151367, + "rewards/margins": 14.169875144958496, + "rewards/rejected": -24.793895721435547, + "step": 4494 + }, + { + "epoch": 7.22, + "learning_rate": 1.101862861672612e-07, + "logits/chosen": -1.4587687253952026, + "logits/rejected": -1.5807727575302124, + "logps/chosen": -122.28618621826172, + "logps/rejected": -273.742919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.651030540466309, + "rewards/margins": 13.114721298217773, + "rewards/rejected": -18.765750885009766, + "step": 4495 + }, + { + "epoch": 7.22, + "learning_rate": 1.1008719778042013e-07, + "logits/chosen": -1.5782413482666016, + "logits/rejected": -1.4885382652282715, + "logps/chosen": -177.33740234375, + "logps/rejected": -298.45166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.096373081207275, + "rewards/margins": 15.4207763671875, + "rewards/rejected": -22.517148971557617, + "step": 4496 + }, + { + "epoch": 7.22, + "learning_rate": 1.0998810939357906e-07, + "logits/chosen": -1.474774718284607, + "logits/rejected": -1.5084195137023926, + "logps/chosen": -148.48782348632812, + "logps/rejected": -286.3673095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.353923797607422, + "rewards/margins": 14.272510528564453, + "rewards/rejected": -20.626434326171875, + "step": 4497 + }, + { + "epoch": 7.22, + "learning_rate": 1.0988902100673801e-07, + "logits/chosen": -1.3779891729354858, + "logits/rejected": -1.3686630725860596, + "logps/chosen": -164.83956909179688, + "logps/rejected": -302.1667175292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.558714866638184, + "rewards/margins": 13.544612884521484, + "rewards/rejected": -22.103328704833984, + "step": 4498 + }, + { + "epoch": 7.22, + "learning_rate": 1.0978993261989694e-07, + "logits/chosen": -1.6238688230514526, + "logits/rejected": -1.6401951313018799, + "logps/chosen": -126.90721893310547, + "logps/rejected": -264.90911865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5198121070861816, + "rewards/margins": 13.504267692565918, + "rewards/rejected": -17.024080276489258, + "step": 4499 + }, + { + "epoch": 7.22, + "learning_rate": 1.0969084423305589e-07, + "logits/chosen": -1.564940094947815, + "logits/rejected": -1.527721881866455, + "logps/chosen": -115.49974060058594, + "logps/rejected": -226.72396850585938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.110440254211426, + "rewards/margins": 13.147483825683594, + "rewards/rejected": -16.257925033569336, + "step": 4500 + }, + { + "epoch": 7.22, + "learning_rate": 1.0959175584621481e-07, + "logits/chosen": -1.4791845083236694, + "logits/rejected": -1.4481521844863892, + "logps/chosen": -136.6319580078125, + "logps/rejected": -267.668701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.383074760437012, + "rewards/margins": 14.415462493896484, + "rewards/rejected": -19.798538208007812, + "step": 4501 + }, + { + "epoch": 7.23, + "learning_rate": 1.0949266745937376e-07, + "logits/chosen": -1.544517993927002, + "logits/rejected": -1.621119499206543, + "logps/chosen": -180.95120239257812, + "logps/rejected": -306.50030517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.7985258102417, + "rewards/margins": 12.58233642578125, + "rewards/rejected": -21.380861282348633, + "step": 4502 + }, + { + "epoch": 7.23, + "learning_rate": 1.0939357907253269e-07, + "logits/chosen": -1.4296298027038574, + "logits/rejected": -1.4860605001449585, + "logps/chosen": -148.98703002929688, + "logps/rejected": -318.3758239746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.293027400970459, + "rewards/margins": 15.708171844482422, + "rewards/rejected": -23.00119972229004, + "step": 4503 + }, + { + "epoch": 7.23, + "learning_rate": 1.0929449068569164e-07, + "logits/chosen": -1.4504823684692383, + "logits/rejected": -1.4094116687774658, + "logps/chosen": -193.43287658691406, + "logps/rejected": -336.18341064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.603006362915039, + "rewards/margins": 14.809040069580078, + "rewards/rejected": -25.412046432495117, + "step": 4504 + }, + { + "epoch": 7.23, + "learning_rate": 1.0919540229885057e-07, + "logits/chosen": -1.5396747589111328, + "logits/rejected": -1.5017549991607666, + "logps/chosen": -142.2953643798828, + "logps/rejected": -299.8862609863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.979361534118652, + "rewards/margins": 15.074559211730957, + "rewards/rejected": -22.05392074584961, + "step": 4505 + }, + { + "epoch": 7.23, + "learning_rate": 1.090963139120095e-07, + "logits/chosen": -1.7103101015090942, + "logits/rejected": -1.7669146060943604, + "logps/chosen": -91.86190795898438, + "logps/rejected": -255.84701538085938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5695533752441406, + "rewards/margins": 14.744503021240234, + "rewards/rejected": -17.314058303833008, + "step": 4506 + }, + { + "epoch": 7.23, + "learning_rate": 1.0899722552516845e-07, + "logits/chosen": -1.5399473905563354, + "logits/rejected": -1.6394548416137695, + "logps/chosen": -163.10610961914062, + "logps/rejected": -372.36083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.315756320953369, + "rewards/margins": 17.667261123657227, + "rewards/rejected": -23.983016967773438, + "step": 4507 + }, + { + "epoch": 7.24, + "learning_rate": 1.0889813713832738e-07, + "logits/chosen": -1.704698920249939, + "logits/rejected": -1.7729016542434692, + "logps/chosen": -154.2325439453125, + "logps/rejected": -288.46533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4761810302734375, + "rewards/margins": 13.711715698242188, + "rewards/rejected": -19.187896728515625, + "step": 4508 + }, + { + "epoch": 7.24, + "learning_rate": 1.0879904875148632e-07, + "logits/chosen": -1.4802751541137695, + "logits/rejected": -1.4764491319656372, + "logps/chosen": -143.41993713378906, + "logps/rejected": -267.24481201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018106937408447, + "rewards/margins": 13.078242301940918, + "rewards/rejected": -19.096349716186523, + "step": 4509 + }, + { + "epoch": 7.24, + "learning_rate": 1.0869996036464525e-07, + "logits/chosen": -1.4748517274856567, + "logits/rejected": -1.3933707475662231, + "logps/chosen": -166.08929443359375, + "logps/rejected": -261.2649230957031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.192656993865967, + "rewards/margins": 11.087749481201172, + "rewards/rejected": -18.280405044555664, + "step": 4510 + }, + { + "epoch": 7.24, + "learning_rate": 1.086008719778042e-07, + "logits/chosen": -1.3483836650848389, + "logits/rejected": -1.3886488676071167, + "logps/chosen": -167.26980590820312, + "logps/rejected": -262.3427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.425558090209961, + "rewards/margins": 10.493950843811035, + "rewards/rejected": -17.919509887695312, + "step": 4511 + }, + { + "epoch": 7.24, + "learning_rate": 1.0850178359096314e-07, + "logits/chosen": -1.359569787979126, + "logits/rejected": -1.4648995399475098, + "logps/chosen": -172.8695526123047, + "logps/rejected": -305.9479675292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.800233840942383, + "rewards/margins": 12.48349380493164, + "rewards/rejected": -21.283727645874023, + "step": 4512 + }, + { + "epoch": 7.24, + "learning_rate": 1.0840269520412206e-07, + "logits/chosen": -1.3028745651245117, + "logits/rejected": -1.342869520187378, + "logps/chosen": -170.48007202148438, + "logps/rejected": -326.51397705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.185457706451416, + "rewards/margins": 17.376605987548828, + "rewards/rejected": -23.562063217163086, + "step": 4513 + }, + { + "epoch": 7.25, + "learning_rate": 1.0830360681728101e-07, + "logits/chosen": -1.5547959804534912, + "logits/rejected": -1.6583147048950195, + "logps/chosen": -120.38232421875, + "logps/rejected": -299.99188232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.359068870544434, + "rewards/margins": 15.136687278747559, + "rewards/rejected": -20.495756149291992, + "step": 4514 + }, + { + "epoch": 7.25, + "learning_rate": 1.0820451843043994e-07, + "logits/chosen": -1.3409557342529297, + "logits/rejected": -1.2920868396759033, + "logps/chosen": -109.53652954101562, + "logps/rejected": -222.67977905273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.969639778137207, + "rewards/margins": 11.308141708374023, + "rewards/rejected": -16.277782440185547, + "step": 4515 + }, + { + "epoch": 7.25, + "learning_rate": 1.0810543004359889e-07, + "logits/chosen": -1.3986499309539795, + "logits/rejected": -1.46505868434906, + "logps/chosen": -168.3353271484375, + "logps/rejected": -339.884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.171112060546875, + "rewards/margins": 16.213525772094727, + "rewards/rejected": -24.38463592529297, + "step": 4516 + }, + { + "epoch": 7.25, + "learning_rate": 1.0800634165675784e-07, + "logits/chosen": -1.5528454780578613, + "logits/rejected": -1.6383026838302612, + "logps/chosen": -190.4738006591797, + "logps/rejected": -296.63037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.604737281799316, + "rewards/margins": 9.557619094848633, + "rewards/rejected": -19.162355422973633, + "step": 4517 + }, + { + "epoch": 7.25, + "learning_rate": 1.0790725326991676e-07, + "logits/chosen": -1.3121531009674072, + "logits/rejected": -1.3651115894317627, + "logps/chosen": -168.50955200195312, + "logps/rejected": -315.0152282714844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.960724830627441, + "rewards/margins": 14.044177055358887, + "rewards/rejected": -22.004901885986328, + "step": 4518 + }, + { + "epoch": 7.25, + "learning_rate": 1.078081648830757e-07, + "logits/chosen": -1.3662840127944946, + "logits/rejected": -1.4389960765838623, + "logps/chosen": -175.65724182128906, + "logps/rejected": -295.2848815917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.204470634460449, + "rewards/margins": 12.96906852722168, + "rewards/rejected": -20.173540115356445, + "step": 4519 + }, + { + "epoch": 7.26, + "learning_rate": 1.0770907649623464e-07, + "logits/chosen": -1.3955167531967163, + "logits/rejected": -1.3213417530059814, + "logps/chosen": -153.7684326171875, + "logps/rejected": -293.072509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.33896017074585, + "rewards/margins": 15.154254913330078, + "rewards/rejected": -21.493213653564453, + "step": 4520 + }, + { + "epoch": 7.26, + "learning_rate": 1.0760998810939358e-07, + "logits/chosen": -1.6157830953598022, + "logits/rejected": -1.6400020122528076, + "logps/chosen": -174.51461791992188, + "logps/rejected": -297.8363952636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.644630432128906, + "rewards/margins": 11.267107009887695, + "rewards/rejected": -19.9117374420166, + "step": 4521 + }, + { + "epoch": 7.26, + "learning_rate": 1.075108997225525e-07, + "logits/chosen": -1.5451730489730835, + "logits/rejected": -1.4743618965148926, + "logps/chosen": -195.43272399902344, + "logps/rejected": -328.095947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.728737831115723, + "rewards/margins": 14.080169677734375, + "rewards/rejected": -23.80890655517578, + "step": 4522 + }, + { + "epoch": 7.26, + "learning_rate": 1.0741181133571145e-07, + "logits/chosen": -1.3475868701934814, + "logits/rejected": -1.2962396144866943, + "logps/chosen": -149.30039978027344, + "logps/rejected": -262.15008544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.170733451843262, + "rewards/margins": 12.130914688110352, + "rewards/rejected": -20.301647186279297, + "step": 4523 + }, + { + "epoch": 7.26, + "learning_rate": 1.073127229488704e-07, + "logits/chosen": -1.3454378843307495, + "logits/rejected": -1.4265892505645752, + "logps/chosen": -117.50833129882812, + "logps/rejected": -308.58428955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8180058002471924, + "rewards/margins": 17.139118194580078, + "rewards/rejected": -20.957124710083008, + "step": 4524 + }, + { + "epoch": 7.26, + "learning_rate": 1.0721363456202933e-07, + "logits/chosen": -1.5613948106765747, + "logits/rejected": -1.6173979043960571, + "logps/chosen": -141.45318603515625, + "logps/rejected": -304.76995849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.831787586212158, + "rewards/margins": 15.147156715393066, + "rewards/rejected": -20.978944778442383, + "step": 4525 + }, + { + "epoch": 7.26, + "learning_rate": 1.0711454617518826e-07, + "logits/chosen": -1.2983345985412598, + "logits/rejected": -1.3483057022094727, + "logps/chosen": -174.48638916015625, + "logps/rejected": -277.8638610839844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.02206039428711, + "rewards/margins": 11.738786697387695, + "rewards/rejected": -20.760847091674805, + "step": 4526 + }, + { + "epoch": 7.27, + "learning_rate": 1.070154577883472e-07, + "logits/chosen": -1.3234343528747559, + "logits/rejected": -1.3763048648834229, + "logps/chosen": -176.82061767578125, + "logps/rejected": -349.3812255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.587871551513672, + "rewards/margins": 15.314958572387695, + "rewards/rejected": -24.902830123901367, + "step": 4527 + }, + { + "epoch": 7.27, + "learning_rate": 1.0691636940150614e-07, + "logits/chosen": -1.5075929164886475, + "logits/rejected": -1.4846445322036743, + "logps/chosen": -150.1634521484375, + "logps/rejected": -278.40765380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.550859451293945, + "rewards/margins": 14.760583877563477, + "rewards/rejected": -20.311443328857422, + "step": 4528 + }, + { + "epoch": 7.27, + "learning_rate": 1.0681728101466507e-07, + "logits/chosen": -1.4691309928894043, + "logits/rejected": -1.4017503261566162, + "logps/chosen": -137.88058471679688, + "logps/rejected": -253.79092407226562, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.19529914855957, + "rewards/margins": 12.694158554077148, + "rewards/rejected": -18.88945770263672, + "step": 4529 + }, + { + "epoch": 7.27, + "learning_rate": 1.0671819262782401e-07, + "logits/chosen": -1.5551271438598633, + "logits/rejected": -1.5933513641357422, + "logps/chosen": -121.57408905029297, + "logps/rejected": -287.60858154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.675888538360596, + "rewards/margins": 14.336928367614746, + "rewards/rejected": -20.0128173828125, + "step": 4530 + }, + { + "epoch": 7.27, + "learning_rate": 1.0661910424098295e-07, + "logits/chosen": -1.58342444896698, + "logits/rejected": -1.6011972427368164, + "logps/chosen": -161.8117218017578, + "logps/rejected": -294.4410095214844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.957898139953613, + "rewards/margins": 13.859831809997559, + "rewards/rejected": -20.817729949951172, + "step": 4531 + }, + { + "epoch": 7.27, + "learning_rate": 1.0652001585414189e-07, + "logits/chosen": -1.4810792207717896, + "logits/rejected": -1.5193748474121094, + "logps/chosen": -138.24905395507812, + "logps/rejected": -298.7330627441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.781042098999023, + "rewards/margins": 14.71879768371582, + "rewards/rejected": -22.499839782714844, + "step": 4532 + }, + { + "epoch": 7.28, + "learning_rate": 1.0642092746730083e-07, + "logits/chosen": -1.4313104152679443, + "logits/rejected": -1.4391809701919556, + "logps/chosen": -171.56979370117188, + "logps/rejected": -276.6543884277344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.163615226745605, + "rewards/margins": 11.0728120803833, + "rewards/rejected": -19.236427307128906, + "step": 4533 + }, + { + "epoch": 7.28, + "learning_rate": 1.0632183908045975e-07, + "logits/chosen": -1.324269413948059, + "logits/rejected": -1.2632946968078613, + "logps/chosen": -132.20773315429688, + "logps/rejected": -264.88067626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.345478534698486, + "rewards/margins": 13.314757347106934, + "rewards/rejected": -18.660236358642578, + "step": 4534 + }, + { + "epoch": 7.28, + "learning_rate": 1.062227506936187e-07, + "logits/chosen": -1.344736933708191, + "logits/rejected": -1.4119423627853394, + "logps/chosen": -148.13734436035156, + "logps/rejected": -285.03277587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.324408054351807, + "rewards/margins": 12.96264362335205, + "rewards/rejected": -19.287052154541016, + "step": 4535 + }, + { + "epoch": 7.28, + "learning_rate": 1.0612366230677765e-07, + "logits/chosen": -1.5363234281539917, + "logits/rejected": -1.5740801095962524, + "logps/chosen": -166.61517333984375, + "logps/rejected": -314.04296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.606089115142822, + "rewards/margins": 12.983720779418945, + "rewards/rejected": -20.589811325073242, + "step": 4536 + }, + { + "epoch": 7.28, + "learning_rate": 1.0602457391993658e-07, + "logits/chosen": -1.5413486957550049, + "logits/rejected": -1.5191783905029297, + "logps/chosen": -132.48916625976562, + "logps/rejected": -321.44219970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.856533050537109, + "rewards/margins": 16.75694465637207, + "rewards/rejected": -22.61347770690918, + "step": 4537 + }, + { + "epoch": 7.28, + "learning_rate": 1.0592548553309553e-07, + "logits/chosen": -1.3387703895568848, + "logits/rejected": -1.4038896560668945, + "logps/chosen": -167.7861785888672, + "logps/rejected": -330.4862060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.973254680633545, + "rewards/margins": 15.654348373413086, + "rewards/rejected": -23.627605438232422, + "step": 4538 + }, + { + "epoch": 7.29, + "learning_rate": 1.0582639714625445e-07, + "logits/chosen": -1.5405657291412354, + "logits/rejected": -1.5298421382904053, + "logps/chosen": -200.7737579345703, + "logps/rejected": -303.3057556152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.965378761291504, + "rewards/margins": 12.612499237060547, + "rewards/rejected": -21.577877044677734, + "step": 4539 + }, + { + "epoch": 7.29, + "learning_rate": 1.057273087594134e-07, + "logits/chosen": -1.3281426429748535, + "logits/rejected": -1.3116447925567627, + "logps/chosen": -148.94332885742188, + "logps/rejected": -285.2530212402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.964744567871094, + "rewards/margins": 12.591866493225098, + "rewards/rejected": -20.556612014770508, + "step": 4540 + }, + { + "epoch": 7.29, + "learning_rate": 1.0562822037257233e-07, + "logits/chosen": -1.510664701461792, + "logits/rejected": -1.4371854066848755, + "logps/chosen": -196.8003387451172, + "logps/rejected": -310.15850830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.019740104675293, + "rewards/margins": 12.42959213256836, + "rewards/rejected": -21.44933319091797, + "step": 4541 + }, + { + "epoch": 7.29, + "learning_rate": 1.0552913198573127e-07, + "logits/chosen": -1.7364600896835327, + "logits/rejected": -1.7145445346832275, + "logps/chosen": -147.9008026123047, + "logps/rejected": -271.12860107421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.881474018096924, + "rewards/margins": 14.076520919799805, + "rewards/rejected": -17.95799446105957, + "step": 4542 + }, + { + "epoch": 7.29, + "learning_rate": 1.0543004359889021e-07, + "logits/chosen": -1.549738883972168, + "logits/rejected": -1.4874011278152466, + "logps/chosen": -200.76097106933594, + "logps/rejected": -288.91455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.07199764251709, + "rewards/margins": 11.517528533935547, + "rewards/rejected": -20.58952522277832, + "step": 4543 + }, + { + "epoch": 7.29, + "learning_rate": 1.0533095521204914e-07, + "logits/chosen": -1.4343700408935547, + "logits/rejected": -1.4605865478515625, + "logps/chosen": -199.68930053710938, + "logps/rejected": -370.81854248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.872848510742188, + "rewards/margins": 15.93410587310791, + "rewards/rejected": -24.80695343017578, + "step": 4544 + }, + { + "epoch": 7.3, + "learning_rate": 1.0523186682520809e-07, + "logits/chosen": -1.3847874402999878, + "logits/rejected": -1.3588982820510864, + "logps/chosen": -146.98306274414062, + "logps/rejected": -278.8884582519531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.362154006958008, + "rewards/margins": 13.137701034545898, + "rewards/rejected": -19.499855041503906, + "step": 4545 + }, + { + "epoch": 7.3, + "learning_rate": 1.0513277843836702e-07, + "logits/chosen": -1.456251621246338, + "logits/rejected": -1.4585338830947876, + "logps/chosen": -116.31632995605469, + "logps/rejected": -228.25286865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.375673532485962, + "rewards/margins": 12.413328170776367, + "rewards/rejected": -15.78900146484375, + "step": 4546 + }, + { + "epoch": 7.3, + "learning_rate": 1.0503369005152595e-07, + "logits/chosen": -1.5672576427459717, + "logits/rejected": -1.5324070453643799, + "logps/chosen": -163.9290771484375, + "logps/rejected": -300.9716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.088903427124023, + "rewards/margins": 14.493488311767578, + "rewards/rejected": -21.5823917388916, + "step": 4547 + }, + { + "epoch": 7.3, + "learning_rate": 1.0493460166468489e-07, + "logits/chosen": -1.5024091005325317, + "logits/rejected": -1.496657371520996, + "logps/chosen": -190.49754333496094, + "logps/rejected": -340.88275146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.279966354370117, + "rewards/margins": 16.8936824798584, + "rewards/rejected": -25.173648834228516, + "step": 4548 + }, + { + "epoch": 7.3, + "learning_rate": 1.0483551327784383e-07, + "logits/chosen": -1.5514261722564697, + "logits/rejected": -1.46091890335083, + "logps/chosen": -193.9055938720703, + "logps/rejected": -278.6861267089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.841743469238281, + "rewards/margins": 11.92888355255127, + "rewards/rejected": -18.770626068115234, + "step": 4549 + }, + { + "epoch": 7.3, + "learning_rate": 1.0473642489100278e-07, + "logits/chosen": -1.5595101118087769, + "logits/rejected": -1.544304370880127, + "logps/chosen": -182.76483154296875, + "logps/rejected": -342.438232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.267621994018555, + "rewards/margins": 15.532920837402344, + "rewards/rejected": -23.80054473876953, + "step": 4550 + }, + { + "epoch": 7.3, + "learning_rate": 1.046373365041617e-07, + "logits/chosen": -1.5047022104263306, + "logits/rejected": -1.5998475551605225, + "logps/chosen": -145.42124938964844, + "logps/rejected": -298.83013916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.309586524963379, + "rewards/margins": 13.188789367675781, + "rewards/rejected": -20.498374938964844, + "step": 4551 + }, + { + "epoch": 7.31, + "learning_rate": 1.0453824811732065e-07, + "logits/chosen": -1.7066656351089478, + "logits/rejected": -1.7606239318847656, + "logps/chosen": -80.27839660644531, + "logps/rejected": -244.3646697998047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7832143306732178, + "rewards/margins": 15.396220207214355, + "rewards/rejected": -17.179433822631836, + "step": 4552 + }, + { + "epoch": 7.31, + "learning_rate": 1.0443915973047958e-07, + "logits/chosen": -1.4586191177368164, + "logits/rejected": -1.4785007238388062, + "logps/chosen": -126.30056762695312, + "logps/rejected": -260.38916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.39576530456543, + "rewards/margins": 12.397372245788574, + "rewards/rejected": -18.793136596679688, + "step": 4553 + }, + { + "epoch": 7.31, + "learning_rate": 1.0434007134363853e-07, + "logits/chosen": -1.6960012912750244, + "logits/rejected": -1.6539280414581299, + "logps/chosen": -200.55091857910156, + "logps/rejected": -312.7001647949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.723010063171387, + "rewards/margins": 11.762835502624512, + "rewards/rejected": -21.4858455657959, + "step": 4554 + }, + { + "epoch": 7.31, + "learning_rate": 1.0424098295679745e-07, + "logits/chosen": -1.4755817651748657, + "logits/rejected": -1.5005830526351929, + "logps/chosen": -157.73162841796875, + "logps/rejected": -268.2693786621094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.081024169921875, + "rewards/margins": 11.098738670349121, + "rewards/rejected": -19.179763793945312, + "step": 4555 + }, + { + "epoch": 7.31, + "learning_rate": 1.0414189456995639e-07, + "logits/chosen": -1.4595191478729248, + "logits/rejected": -1.4058254957199097, + "logps/chosen": -147.6494140625, + "logps/rejected": -278.7900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.261093616485596, + "rewards/margins": 13.954341888427734, + "rewards/rejected": -19.215436935424805, + "step": 4556 + }, + { + "epoch": 7.31, + "learning_rate": 1.0404280618311534e-07, + "logits/chosen": -1.476191759109497, + "logits/rejected": -1.4674546718597412, + "logps/chosen": -93.52493286132812, + "logps/rejected": -237.82281494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2178454399108887, + "rewards/margins": 14.413999557495117, + "rewards/rejected": -17.63184356689453, + "step": 4557 + }, + { + "epoch": 7.32, + "learning_rate": 1.0394371779627427e-07, + "logits/chosen": -1.6202484369277954, + "logits/rejected": -1.5381954908370972, + "logps/chosen": -220.19317626953125, + "logps/rejected": -355.7698974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.304471015930176, + "rewards/margins": 14.927286148071289, + "rewards/rejected": -27.23175621032715, + "step": 4558 + }, + { + "epoch": 7.32, + "learning_rate": 1.038446294094332e-07, + "logits/chosen": -1.4706439971923828, + "logits/rejected": -1.5008037090301514, + "logps/chosen": -156.0191650390625, + "logps/rejected": -329.9837951660156, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.74043083190918, + "rewards/margins": 16.88787269592285, + "rewards/rejected": -23.62830352783203, + "step": 4559 + }, + { + "epoch": 7.32, + "learning_rate": 1.0374554102259214e-07, + "logits/chosen": -1.6279335021972656, + "logits/rejected": -1.5935752391815186, + "logps/chosen": -160.97047424316406, + "logps/rejected": -302.80126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.261077880859375, + "rewards/margins": 14.293556213378906, + "rewards/rejected": -21.55463409423828, + "step": 4560 + }, + { + "epoch": 7.32, + "learning_rate": 1.0364645263575109e-07, + "logits/chosen": -1.4218772649765015, + "logits/rejected": -1.4288113117218018, + "logps/chosen": -162.15872192382812, + "logps/rejected": -343.804443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.785725593566895, + "rewards/margins": 16.83348846435547, + "rewards/rejected": -25.619213104248047, + "step": 4561 + }, + { + "epoch": 7.32, + "learning_rate": 1.0354736424891003e-07, + "logits/chosen": -1.6451842784881592, + "logits/rejected": -1.48774254322052, + "logps/chosen": -175.45468139648438, + "logps/rejected": -301.7014465332031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.250351905822754, + "rewards/margins": 15.493841171264648, + "rewards/rejected": -21.744192123413086, + "step": 4562 + }, + { + "epoch": 7.32, + "learning_rate": 1.0344827586206897e-07, + "logits/chosen": -1.5643247365951538, + "logits/rejected": -1.5147581100463867, + "logps/chosen": -183.40036010742188, + "logps/rejected": -298.45184326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.422131538391113, + "rewards/margins": 13.737112998962402, + "rewards/rejected": -23.159244537353516, + "step": 4563 + }, + { + "epoch": 7.33, + "learning_rate": 1.033491874752279e-07, + "logits/chosen": -1.5338215827941895, + "logits/rejected": -1.4537558555603027, + "logps/chosen": -145.86233520507812, + "logps/rejected": -272.96905517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023224830627441, + "rewards/margins": 13.257987976074219, + "rewards/rejected": -19.281213760375977, + "step": 4564 + }, + { + "epoch": 7.33, + "learning_rate": 1.0325009908838683e-07, + "logits/chosen": -1.290743112564087, + "logits/rejected": -1.3378586769104004, + "logps/chosen": -159.01620483398438, + "logps/rejected": -281.5267333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.535933017730713, + "rewards/margins": 12.898370742797852, + "rewards/rejected": -20.434303283691406, + "step": 4565 + }, + { + "epoch": 7.33, + "learning_rate": 1.0315101070154578e-07, + "logits/chosen": -1.4255578517913818, + "logits/rejected": -1.515138030052185, + "logps/chosen": -172.68563842773438, + "logps/rejected": -323.5512390136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.20240592956543, + "rewards/margins": 13.632437705993652, + "rewards/rejected": -23.834842681884766, + "step": 4566 + }, + { + "epoch": 7.33, + "learning_rate": 1.0305192231470471e-07, + "logits/chosen": -1.6219537258148193, + "logits/rejected": -1.6204873323440552, + "logps/chosen": -101.15103912353516, + "logps/rejected": -214.45249938964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5746665000915527, + "rewards/margins": 11.231271743774414, + "rewards/rejected": -13.805936813354492, + "step": 4567 + }, + { + "epoch": 7.33, + "learning_rate": 1.0295283392786365e-07, + "logits/chosen": -1.4064929485321045, + "logits/rejected": -1.454853892326355, + "logps/chosen": -147.9895782470703, + "logps/rejected": -322.1719970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.673306465148926, + "rewards/margins": 14.65631103515625, + "rewards/rejected": -21.32961654663086, + "step": 4568 + }, + { + "epoch": 7.33, + "learning_rate": 1.0285374554102259e-07, + "logits/chosen": -1.3610339164733887, + "logits/rejected": -1.3926249742507935, + "logps/chosen": -171.81619262695312, + "logps/rejected": -315.53363037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.26201343536377, + "rewards/margins": 13.780435562133789, + "rewards/rejected": -22.042449951171875, + "step": 4569 + }, + { + "epoch": 7.34, + "learning_rate": 1.0275465715418153e-07, + "logits/chosen": -1.5401015281677246, + "logits/rejected": -1.4695796966552734, + "logps/chosen": -177.1994171142578, + "logps/rejected": -292.0560607910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4734416007995605, + "rewards/margins": 15.219650268554688, + "rewards/rejected": -20.693090438842773, + "step": 4570 + }, + { + "epoch": 7.34, + "learning_rate": 1.0265556876734047e-07, + "logits/chosen": -1.5871025323867798, + "logits/rejected": -1.5312435626983643, + "logps/chosen": -101.41727447509766, + "logps/rejected": -265.82562255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.975416660308838, + "rewards/margins": 15.130887985229492, + "rewards/rejected": -18.106304168701172, + "step": 4571 + }, + { + "epoch": 7.34, + "learning_rate": 1.0255648038049939e-07, + "logits/chosen": -1.5037200450897217, + "logits/rejected": -1.5501196384429932, + "logps/chosen": -165.11265563964844, + "logps/rejected": -298.3482666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.323375701904297, + "rewards/margins": 12.058636665344238, + "rewards/rejected": -20.38201332092285, + "step": 4572 + }, + { + "epoch": 7.34, + "learning_rate": 1.0245739199365834e-07, + "logits/chosen": -1.3772530555725098, + "logits/rejected": -1.414858341217041, + "logps/chosen": -145.29922485351562, + "logps/rejected": -272.51239013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.534767150878906, + "rewards/margins": 12.956623077392578, + "rewards/rejected": -19.491390228271484, + "step": 4573 + }, + { + "epoch": 7.34, + "learning_rate": 1.0235830360681727e-07, + "logits/chosen": -1.5829859972000122, + "logits/rejected": -1.6160728931427002, + "logps/chosen": -99.80282592773438, + "logps/rejected": -242.61538696289062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.256227731704712, + "rewards/margins": 13.576536178588867, + "rewards/rejected": -16.832763671875, + "step": 4574 + }, + { + "epoch": 7.34, + "learning_rate": 1.0225921521997622e-07, + "logits/chosen": -1.4290008544921875, + "logits/rejected": -1.4821441173553467, + "logps/chosen": -185.411865234375, + "logps/rejected": -335.80096435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.4811372756958, + "rewards/margins": 14.69566535949707, + "rewards/rejected": -24.176803588867188, + "step": 4575 + }, + { + "epoch": 7.35, + "learning_rate": 1.0216012683313515e-07, + "logits/chosen": -1.4473015069961548, + "logits/rejected": -1.419636607170105, + "logps/chosen": -185.844482421875, + "logps/rejected": -322.7942199707031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.750572204589844, + "rewards/margins": 13.919647216796875, + "rewards/rejected": -22.67021942138672, + "step": 4576 + }, + { + "epoch": 7.35, + "learning_rate": 1.0206103844629408e-07, + "logits/chosen": -1.3833993673324585, + "logits/rejected": -1.477108120918274, + "logps/chosen": -124.96116638183594, + "logps/rejected": -262.325439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.016386032104492, + "rewards/margins": 11.366668701171875, + "rewards/rejected": -17.383054733276367, + "step": 4577 + }, + { + "epoch": 7.35, + "learning_rate": 1.0196195005945303e-07, + "logits/chosen": -1.5807570219039917, + "logits/rejected": -1.6865369081497192, + "logps/chosen": -147.71170043945312, + "logps/rejected": -309.3811340332031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.04744815826416, + "rewards/margins": 14.710670471191406, + "rewards/rejected": -21.75811767578125, + "step": 4578 + }, + { + "epoch": 7.35, + "learning_rate": 1.0186286167261196e-07, + "logits/chosen": -1.4335203170776367, + "logits/rejected": -1.3448935747146606, + "logps/chosen": -186.50392150878906, + "logps/rejected": -309.90118408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.206847190856934, + "rewards/margins": 14.435318946838379, + "rewards/rejected": -24.642166137695312, + "step": 4579 + }, + { + "epoch": 7.35, + "learning_rate": 1.017637732857709e-07, + "logits/chosen": -1.5852086544036865, + "logits/rejected": -1.5058685541152954, + "logps/chosen": -183.62774658203125, + "logps/rejected": -300.19390869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.709893226623535, + "rewards/margins": 13.424311637878418, + "rewards/rejected": -21.13420295715332, + "step": 4580 + }, + { + "epoch": 7.35, + "learning_rate": 1.0166468489892983e-07, + "logits/chosen": -1.6255788803100586, + "logits/rejected": -1.6128435134887695, + "logps/chosen": -114.23258972167969, + "logps/rejected": -239.925537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.200597763061523, + "rewards/margins": 12.705343246459961, + "rewards/rejected": -16.905941009521484, + "step": 4581 + }, + { + "epoch": 7.35, + "learning_rate": 1.0156559651208878e-07, + "logits/chosen": -1.5831925868988037, + "logits/rejected": -1.6156798601150513, + "logps/chosen": -143.03834533691406, + "logps/rejected": -267.6253662109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.068941593170166, + "rewards/margins": 11.544445037841797, + "rewards/rejected": -17.613386154174805, + "step": 4582 + }, + { + "epoch": 7.36, + "learning_rate": 1.0146650812524772e-07, + "logits/chosen": -1.5583727359771729, + "logits/rejected": -1.6269680261611938, + "logps/chosen": -118.80663299560547, + "logps/rejected": -235.78518676757812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.354904651641846, + "rewards/margins": 11.892274856567383, + "rewards/rejected": -16.247180938720703, + "step": 4583 + }, + { + "epoch": 7.36, + "learning_rate": 1.0136741973840664e-07, + "logits/chosen": -1.5381572246551514, + "logits/rejected": -1.6095712184906006, + "logps/chosen": -170.47564697265625, + "logps/rejected": -301.60028076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.293197154998779, + "rewards/margins": 14.034934997558594, + "rewards/rejected": -21.32813262939453, + "step": 4584 + }, + { + "epoch": 7.36, + "learning_rate": 1.0126833135156559e-07, + "logits/chosen": -1.5749778747558594, + "logits/rejected": -1.5365815162658691, + "logps/chosen": -151.7555694580078, + "logps/rejected": -274.4482727050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.050795078277588, + "rewards/margins": 13.092572212219238, + "rewards/rejected": -20.143367767333984, + "step": 4585 + }, + { + "epoch": 7.36, + "learning_rate": 1.0116924296472452e-07, + "logits/chosen": -1.4875764846801758, + "logits/rejected": -1.4846076965332031, + "logps/chosen": -118.90524291992188, + "logps/rejected": -280.4741516113281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.814988613128662, + "rewards/margins": 15.438146591186523, + "rewards/rejected": -20.253135681152344, + "step": 4586 + }, + { + "epoch": 7.36, + "learning_rate": 1.0107015457788347e-07, + "logits/chosen": -1.3939523696899414, + "logits/rejected": -1.5155441761016846, + "logps/chosen": -137.75546264648438, + "logps/rejected": -325.9117126464844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.974702835083008, + "rewards/margins": 15.131844520568848, + "rewards/rejected": -21.106548309326172, + "step": 4587 + }, + { + "epoch": 7.36, + "learning_rate": 1.0097106619104242e-07, + "logits/chosen": -1.5683947801589966, + "logits/rejected": -1.6654645204544067, + "logps/chosen": -132.42120361328125, + "logps/rejected": -314.8275146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.167524814605713, + "rewards/margins": 16.78534698486328, + "rewards/rejected": -20.952869415283203, + "step": 4588 + }, + { + "epoch": 7.37, + "learning_rate": 1.0087197780420134e-07, + "logits/chosen": -1.4684202671051025, + "logits/rejected": -1.4627599716186523, + "logps/chosen": -105.77696228027344, + "logps/rejected": -261.33148193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4249725341796875, + "rewards/margins": 15.401926040649414, + "rewards/rejected": -18.82689666748047, + "step": 4589 + }, + { + "epoch": 7.37, + "learning_rate": 1.0077288941736028e-07, + "logits/chosen": -1.491817831993103, + "logits/rejected": -1.4655892848968506, + "logps/chosen": -122.58031463623047, + "logps/rejected": -235.76451110839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.757542610168457, + "rewards/margins": 11.746440887451172, + "rewards/rejected": -16.503984451293945, + "step": 4590 + }, + { + "epoch": 7.37, + "learning_rate": 1.0067380103051922e-07, + "logits/chosen": -1.2601001262664795, + "logits/rejected": -1.276187539100647, + "logps/chosen": -138.05221557617188, + "logps/rejected": -254.05972290039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.337490081787109, + "rewards/margins": 11.425405502319336, + "rewards/rejected": -17.762895584106445, + "step": 4591 + }, + { + "epoch": 7.37, + "learning_rate": 1.0057471264367816e-07, + "logits/chosen": -1.4975332021713257, + "logits/rejected": -1.372803807258606, + "logps/chosen": -228.57313537597656, + "logps/rejected": -294.83441162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.978250503540039, + "rewards/margins": 13.643387794494629, + "rewards/rejected": -21.62163734436035, + "step": 4592 + }, + { + "epoch": 7.37, + "learning_rate": 1.0047562425683708e-07, + "logits/chosen": -1.3848469257354736, + "logits/rejected": -1.416968584060669, + "logps/chosen": -145.54074096679688, + "logps/rejected": -272.03192138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.087255477905273, + "rewards/margins": 11.05277156829834, + "rewards/rejected": -19.140026092529297, + "step": 4593 + }, + { + "epoch": 7.37, + "learning_rate": 1.0037653586999603e-07, + "logits/chosen": -1.6508409976959229, + "logits/rejected": -1.6827691793441772, + "logps/chosen": -155.68209838867188, + "logps/rejected": -290.50628662109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.884305953979492, + "rewards/margins": 13.210468292236328, + "rewards/rejected": -20.094776153564453, + "step": 4594 + }, + { + "epoch": 7.38, + "learning_rate": 1.0027744748315498e-07, + "logits/chosen": -1.4949091672897339, + "logits/rejected": -1.6461219787597656, + "logps/chosen": -78.65393829345703, + "logps/rejected": -254.05239868164062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7837982177734375, + "rewards/margins": 15.541844367980957, + "rewards/rejected": -17.325641632080078, + "step": 4595 + }, + { + "epoch": 7.38, + "learning_rate": 1.0017835909631391e-07, + "logits/chosen": -1.311041235923767, + "logits/rejected": -1.318776249885559, + "logps/chosen": -196.93667602539062, + "logps/rejected": -361.8273010253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.591144561767578, + "rewards/margins": 16.265777587890625, + "rewards/rejected": -26.856922149658203, + "step": 4596 + }, + { + "epoch": 7.38, + "learning_rate": 1.0007927070947284e-07, + "logits/chosen": -1.6042587757110596, + "logits/rejected": -1.419244647026062, + "logps/chosen": -133.34585571289062, + "logps/rejected": -225.91256713867188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.113045692443848, + "rewards/margins": 11.112420082092285, + "rewards/rejected": -16.225465774536133, + "step": 4597 + }, + { + "epoch": 7.38, + "learning_rate": 9.998018232263178e-08, + "logits/chosen": -1.3483706712722778, + "logits/rejected": -1.3393628597259521, + "logps/chosen": -123.93716430664062, + "logps/rejected": -263.3916320800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5801825523376465, + "rewards/margins": 15.11819839477539, + "rewards/rejected": -19.698379516601562, + "step": 4598 + }, + { + "epoch": 7.38, + "learning_rate": 9.988109393579072e-08, + "logits/chosen": -1.6670691967010498, + "logits/rejected": -1.6913690567016602, + "logps/chosen": -131.0694580078125, + "logps/rejected": -269.1467590332031, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.425271034240723, + "rewards/margins": 13.69382095336914, + "rewards/rejected": -19.119091033935547, + "step": 4599 + }, + { + "epoch": 7.38, + "learning_rate": 9.978200554894966e-08, + "logits/chosen": -1.654115080833435, + "logits/rejected": -1.6241618394851685, + "logps/chosen": -159.9171142578125, + "logps/rejected": -323.47332763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.278756141662598, + "rewards/margins": 15.391847610473633, + "rewards/rejected": -22.670604705810547, + "step": 4600 + }, + { + "epoch": 7.39, + "learning_rate": 9.968291716210859e-08, + "logits/chosen": -1.4803158044815063, + "logits/rejected": -1.5173786878585815, + "logps/chosen": -179.74053955078125, + "logps/rejected": -300.43267822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.462064743041992, + "rewards/margins": 12.35085678100586, + "rewards/rejected": -20.81292152404785, + "step": 4601 + }, + { + "epoch": 7.39, + "learning_rate": 9.958382877526754e-08, + "logits/chosen": -1.4616618156433105, + "logits/rejected": -1.4953250885009766, + "logps/chosen": -110.71243286132812, + "logps/rejected": -305.7099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.52284049987793, + "rewards/margins": 18.742788314819336, + "rewards/rejected": -23.2656307220459, + "step": 4602 + }, + { + "epoch": 7.39, + "learning_rate": 9.948474038842647e-08, + "logits/chosen": -1.4002530574798584, + "logits/rejected": -1.476383924484253, + "logps/chosen": -145.38536071777344, + "logps/rejected": -284.5558776855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.075552940368652, + "rewards/margins": 13.55805492401123, + "rewards/rejected": -19.633609771728516, + "step": 4603 + }, + { + "epoch": 7.39, + "learning_rate": 9.938565200158542e-08, + "logits/chosen": -1.3701430559158325, + "logits/rejected": -1.4366319179534912, + "logps/chosen": -102.60385131835938, + "logps/rejected": -224.62979125976562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.866495370864868, + "rewards/margins": 11.594937324523926, + "rewards/rejected": -15.461433410644531, + "step": 4604 + }, + { + "epoch": 7.39, + "learning_rate": 9.928656361474434e-08, + "logits/chosen": -1.5001806020736694, + "logits/rejected": -1.4414132833480835, + "logps/chosen": -103.25643920898438, + "logps/rejected": -272.20819091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9184024333953857, + "rewards/margins": 14.442022323608398, + "rewards/rejected": -18.360424041748047, + "step": 4605 + }, + { + "epoch": 7.39, + "learning_rate": 9.918747522790328e-08, + "logits/chosen": -1.6813652515411377, + "logits/rejected": -1.6542198657989502, + "logps/chosen": -127.0543212890625, + "logps/rejected": -271.75347900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4273295402526855, + "rewards/margins": 15.61043930053711, + "rewards/rejected": -20.037769317626953, + "step": 4606 + }, + { + "epoch": 7.39, + "learning_rate": 9.908838684106223e-08, + "logits/chosen": -1.5757476091384888, + "logits/rejected": -1.6741224527359009, + "logps/chosen": -169.7532196044922, + "logps/rejected": -359.2912902832031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.272216796875, + "rewards/margins": 17.02829360961914, + "rewards/rejected": -25.30051040649414, + "step": 4607 + }, + { + "epoch": 7.4, + "learning_rate": 9.898929845422116e-08, + "logits/chosen": -1.4953793287277222, + "logits/rejected": -1.4364008903503418, + "logps/chosen": -158.53517150878906, + "logps/rejected": -277.48822021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.075345993041992, + "rewards/margins": 13.118927955627441, + "rewards/rejected": -20.19427490234375, + "step": 4608 + }, + { + "epoch": 7.4, + "learning_rate": 9.889021006738011e-08, + "logits/chosen": -1.6366379261016846, + "logits/rejected": -1.6755564212799072, + "logps/chosen": -107.71150970458984, + "logps/rejected": -275.9061279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7229197025299072, + "rewards/margins": 16.717426300048828, + "rewards/rejected": -20.440345764160156, + "step": 4609 + }, + { + "epoch": 7.4, + "learning_rate": 9.879112168053903e-08, + "logits/chosen": -1.3992807865142822, + "logits/rejected": -1.3670930862426758, + "logps/chosen": -183.82237243652344, + "logps/rejected": -285.89202880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.305797576904297, + "rewards/margins": 11.150449752807617, + "rewards/rejected": -20.456247329711914, + "step": 4610 + }, + { + "epoch": 7.4, + "learning_rate": 9.869203329369798e-08, + "logits/chosen": -1.4467887878417969, + "logits/rejected": -1.3532929420471191, + "logps/chosen": -162.7723846435547, + "logps/rejected": -272.2936096191406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.876744270324707, + "rewards/margins": 11.827997207641602, + "rewards/rejected": -18.704742431640625, + "step": 4611 + }, + { + "epoch": 7.4, + "learning_rate": 9.859294490685691e-08, + "logits/chosen": -1.5649526119232178, + "logits/rejected": -1.5034645795822144, + "logps/chosen": -159.1440887451172, + "logps/rejected": -271.9767150878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.303107261657715, + "rewards/margins": 13.57874870300293, + "rewards/rejected": -20.88185691833496, + "step": 4612 + }, + { + "epoch": 7.4, + "learning_rate": 9.849385652001586e-08, + "logits/chosen": -1.5682395696640015, + "logits/rejected": -1.6176481246948242, + "logps/chosen": -174.67330932617188, + "logps/rejected": -280.3492431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.383489608764648, + "rewards/margins": 11.456134796142578, + "rewards/rejected": -17.839624404907227, + "step": 4613 + }, + { + "epoch": 7.41, + "learning_rate": 9.839476813317479e-08, + "logits/chosen": -1.3550658226013184, + "logits/rejected": -1.3531510829925537, + "logps/chosen": -174.87286376953125, + "logps/rejected": -314.5924072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.458209991455078, + "rewards/margins": 14.773473739624023, + "rewards/rejected": -24.2316837310791, + "step": 4614 + }, + { + "epoch": 7.41, + "learning_rate": 9.829567974633372e-08, + "logits/chosen": -1.4645625352859497, + "logits/rejected": -1.3775349855422974, + "logps/chosen": -148.8585205078125, + "logps/rejected": -305.76904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.895204067230225, + "rewards/margins": 15.411626815795898, + "rewards/rejected": -21.30683135986328, + "step": 4615 + }, + { + "epoch": 7.41, + "learning_rate": 9.819659135949267e-08, + "logits/chosen": -1.417275309562683, + "logits/rejected": -1.3251230716705322, + "logps/chosen": -191.51023864746094, + "logps/rejected": -289.1645202636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.665304183959961, + "rewards/margins": 11.91787338256836, + "rewards/rejected": -21.58317756652832, + "step": 4616 + }, + { + "epoch": 7.41, + "learning_rate": 9.80975029726516e-08, + "logits/chosen": -1.4717594385147095, + "logits/rejected": -1.4660168886184692, + "logps/chosen": -109.50291442871094, + "logps/rejected": -238.7255401611328, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.930408477783203, + "rewards/margins": 12.910100936889648, + "rewards/rejected": -17.840511322021484, + "step": 4617 + }, + { + "epoch": 7.41, + "learning_rate": 9.799841458581054e-08, + "logits/chosen": -1.477567195892334, + "logits/rejected": -1.6141666173934937, + "logps/chosen": -111.9709701538086, + "logps/rejected": -303.62213134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.842939853668213, + "rewards/margins": 16.392250061035156, + "rewards/rejected": -21.23518943786621, + "step": 4618 + }, + { + "epoch": 7.41, + "learning_rate": 9.789932619896947e-08, + "logits/chosen": -1.5361912250518799, + "logits/rejected": -1.5514413118362427, + "logps/chosen": -175.3197784423828, + "logps/rejected": -304.64739990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.306539535522461, + "rewards/margins": 12.898799896240234, + "rewards/rejected": -23.205337524414062, + "step": 4619 + }, + { + "epoch": 7.42, + "learning_rate": 9.780023781212842e-08, + "logits/chosen": -1.5513657331466675, + "logits/rejected": -1.5928735733032227, + "logps/chosen": -168.78424072265625, + "logps/rejected": -338.4950866699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.974270820617676, + "rewards/margins": 14.25711727142334, + "rewards/rejected": -23.231388092041016, + "step": 4620 + }, + { + "epoch": 7.42, + "learning_rate": 9.770114942528736e-08, + "logits/chosen": -1.4783188104629517, + "logits/rejected": -1.5972596406936646, + "logps/chosen": -127.89452362060547, + "logps/rejected": -333.6496887207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.299160003662109, + "rewards/margins": 16.15605926513672, + "rewards/rejected": -21.455219268798828, + "step": 4621 + }, + { + "epoch": 7.42, + "learning_rate": 9.760206103844628e-08, + "logits/chosen": -1.4474598169326782, + "logits/rejected": -1.3437678813934326, + "logps/chosen": -174.2814178466797, + "logps/rejected": -284.5703430175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.705958366394043, + "rewards/margins": 12.75230598449707, + "rewards/rejected": -21.45826530456543, + "step": 4622 + }, + { + "epoch": 7.42, + "learning_rate": 9.750297265160523e-08, + "logits/chosen": -1.3862318992614746, + "logits/rejected": -1.3684009313583374, + "logps/chosen": -134.7038116455078, + "logps/rejected": -281.9936218261719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6053466796875, + "rewards/margins": 14.550690650939941, + "rewards/rejected": -20.156036376953125, + "step": 4623 + }, + { + "epoch": 7.42, + "learning_rate": 9.740388426476416e-08, + "logits/chosen": -1.5838134288787842, + "logits/rejected": -1.5664482116699219, + "logps/chosen": -149.15097045898438, + "logps/rejected": -253.12823486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.008848667144775, + "rewards/margins": 10.788122177124023, + "rewards/rejected": -17.796972274780273, + "step": 4624 + }, + { + "epoch": 7.42, + "learning_rate": 9.730479587792311e-08, + "logits/chosen": -1.4348199367523193, + "logits/rejected": -1.44424569606781, + "logps/chosen": -168.58038330078125, + "logps/rejected": -305.0420227050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.621783256530762, + "rewards/margins": 13.990252494812012, + "rewards/rejected": -21.612035751342773, + "step": 4625 + }, + { + "epoch": 7.43, + "learning_rate": 9.720570749108203e-08, + "logits/chosen": -1.4955791234970093, + "logits/rejected": -1.6274482011795044, + "logps/chosen": -126.95660400390625, + "logps/rejected": -294.79156494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.744326591491699, + "rewards/margins": 15.909370422363281, + "rewards/rejected": -22.653696060180664, + "step": 4626 + }, + { + "epoch": 7.43, + "learning_rate": 9.710661910424097e-08, + "logits/chosen": -1.4449772834777832, + "logits/rejected": -1.4239524602890015, + "logps/chosen": -168.65762329101562, + "logps/rejected": -297.3428955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.982611179351807, + "rewards/margins": 13.137177467346191, + "rewards/rejected": -21.119789123535156, + "step": 4627 + }, + { + "epoch": 7.43, + "learning_rate": 9.700753071739992e-08, + "logits/chosen": -1.5636733770370483, + "logits/rejected": -1.4990777969360352, + "logps/chosen": -140.17294311523438, + "logps/rejected": -226.644775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.183783531188965, + "rewards/margins": 11.086920738220215, + "rewards/rejected": -16.270706176757812, + "step": 4628 + }, + { + "epoch": 7.43, + "learning_rate": 9.690844233055885e-08, + "logits/chosen": -1.3013708591461182, + "logits/rejected": -1.412848711013794, + "logps/chosen": -108.52095031738281, + "logps/rejected": -237.7718963623047, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.855538845062256, + "rewards/margins": 11.589658737182617, + "rewards/rejected": -17.44519805908203, + "step": 4629 + }, + { + "epoch": 7.43, + "learning_rate": 9.68093539437178e-08, + "logits/chosen": -1.6717712879180908, + "logits/rejected": -1.498887300491333, + "logps/chosen": -186.2487335205078, + "logps/rejected": -296.9094543457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.403076648712158, + "rewards/margins": 13.507612228393555, + "rewards/rejected": -20.910690307617188, + "step": 4630 + }, + { + "epoch": 7.43, + "learning_rate": 9.671026555687672e-08, + "logits/chosen": -1.4179446697235107, + "logits/rejected": -1.4262242317199707, + "logps/chosen": -173.87115478515625, + "logps/rejected": -334.72308349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.667671203613281, + "rewards/margins": 15.975678443908691, + "rewards/rejected": -24.64335060119629, + "step": 4631 + }, + { + "epoch": 7.43, + "learning_rate": 9.661117717003567e-08, + "logits/chosen": -1.3390785455703735, + "logits/rejected": -1.355980396270752, + "logps/chosen": -175.20938110351562, + "logps/rejected": -320.1448669433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.567395210266113, + "rewards/margins": 14.981678009033203, + "rewards/rejected": -22.549074172973633, + "step": 4632 + }, + { + "epoch": 7.44, + "learning_rate": 9.651208878319461e-08, + "logits/chosen": -1.5016354322433472, + "logits/rejected": -1.4794961214065552, + "logps/chosen": -147.59799194335938, + "logps/rejected": -302.3895568847656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.613769054412842, + "rewards/margins": 15.715124130249023, + "rewards/rejected": -21.328893661499023, + "step": 4633 + }, + { + "epoch": 7.44, + "learning_rate": 9.641300039635355e-08, + "logits/chosen": -1.6364526748657227, + "logits/rejected": -1.6110318899154663, + "logps/chosen": -111.86907958984375, + "logps/rejected": -275.453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.459457874298096, + "rewards/margins": 15.862634658813477, + "rewards/rejected": -20.322092056274414, + "step": 4634 + }, + { + "epoch": 7.44, + "learning_rate": 9.631391200951248e-08, + "logits/chosen": -1.3207837343215942, + "logits/rejected": -1.3478920459747314, + "logps/chosen": -166.1696319580078, + "logps/rejected": -393.1435241699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.172045707702637, + "rewards/margins": 18.62868881225586, + "rewards/rejected": -28.800737380981445, + "step": 4635 + }, + { + "epoch": 7.44, + "learning_rate": 9.621482362267141e-08, + "logits/chosen": -1.4288808107376099, + "logits/rejected": -1.482408046722412, + "logps/chosen": -145.3209686279297, + "logps/rejected": -267.8291931152344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.433206558227539, + "rewards/margins": 13.070757865905762, + "rewards/rejected": -18.503963470458984, + "step": 4636 + }, + { + "epoch": 7.44, + "learning_rate": 9.611573523583036e-08, + "logits/chosen": -1.5962858200073242, + "logits/rejected": -1.4455064535140991, + "logps/chosen": -180.2833709716797, + "logps/rejected": -231.6917724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.222241401672363, + "rewards/margins": 8.987388610839844, + "rewards/rejected": -16.209630966186523, + "step": 4637 + }, + { + "epoch": 7.44, + "learning_rate": 9.60166468489893e-08, + "logits/chosen": -1.5753028392791748, + "logits/rejected": -1.536102056503296, + "logps/chosen": -137.254638671875, + "logps/rejected": -265.11932373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.316536903381348, + "rewards/margins": 13.005054473876953, + "rewards/rejected": -18.321592330932617, + "step": 4638 + }, + { + "epoch": 7.45, + "learning_rate": 9.591755846214823e-08, + "logits/chosen": -1.4976897239685059, + "logits/rejected": -1.5179641246795654, + "logps/chosen": -126.60063934326172, + "logps/rejected": -255.5934295654297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.141974449157715, + "rewards/margins": 12.695898056030273, + "rewards/rejected": -17.837873458862305, + "step": 4639 + }, + { + "epoch": 7.45, + "learning_rate": 9.581847007530717e-08, + "logits/chosen": -1.5731432437896729, + "logits/rejected": -1.598272442817688, + "logps/chosen": -165.76541137695312, + "logps/rejected": -300.1859130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.973592758178711, + "rewards/margins": 13.029263496398926, + "rewards/rejected": -22.002857208251953, + "step": 4640 + }, + { + "epoch": 7.45, + "learning_rate": 9.571938168846611e-08, + "logits/chosen": -1.3958719968795776, + "logits/rejected": -1.4455864429473877, + "logps/chosen": -132.0583038330078, + "logps/rejected": -252.60824584960938, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.716833591461182, + "rewards/margins": 11.505600929260254, + "rewards/rejected": -16.222434997558594, + "step": 4641 + }, + { + "epoch": 7.45, + "learning_rate": 9.562029330162505e-08, + "logits/chosen": -1.5008289813995361, + "logits/rejected": -1.5104775428771973, + "logps/chosen": -186.15951538085938, + "logps/rejected": -286.67608642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.699739933013916, + "rewards/margins": 11.210260391235352, + "rewards/rejected": -18.90999984741211, + "step": 4642 + }, + { + "epoch": 7.45, + "learning_rate": 9.552120491478397e-08, + "logits/chosen": -1.3095237016677856, + "logits/rejected": -1.3937842845916748, + "logps/chosen": -186.666015625, + "logps/rejected": -317.0845947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.532760620117188, + "rewards/margins": 13.011085510253906, + "rewards/rejected": -21.543846130371094, + "step": 4643 + }, + { + "epoch": 7.45, + "learning_rate": 9.542211652794292e-08, + "logits/chosen": -1.5666929483413696, + "logits/rejected": -1.516608715057373, + "logps/chosen": -166.32217407226562, + "logps/rejected": -276.203369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.768231391906738, + "rewards/margins": 13.957283973693848, + "rewards/rejected": -20.725515365600586, + "step": 4644 + }, + { + "epoch": 7.46, + "learning_rate": 9.532302814110185e-08, + "logits/chosen": -1.5471251010894775, + "logits/rejected": -1.5631380081176758, + "logps/chosen": -158.32333374023438, + "logps/rejected": -342.7694396972656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.357734203338623, + "rewards/margins": 17.52845001220703, + "rewards/rejected": -24.886184692382812, + "step": 4645 + }, + { + "epoch": 7.46, + "learning_rate": 9.52239397542608e-08, + "logits/chosen": -1.4224404096603394, + "logits/rejected": -1.3877859115600586, + "logps/chosen": -137.76551818847656, + "logps/rejected": -232.7071990966797, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.137988567352295, + "rewards/margins": 9.450699806213379, + "rewards/rejected": -15.588687896728516, + "step": 4646 + }, + { + "epoch": 7.46, + "learning_rate": 9.512485136741973e-08, + "logits/chosen": -1.5170836448669434, + "logits/rejected": -1.5308327674865723, + "logps/chosen": -153.4864501953125, + "logps/rejected": -290.47711181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.165126800537109, + "rewards/margins": 13.684173583984375, + "rewards/rejected": -19.849302291870117, + "step": 4647 + }, + { + "epoch": 7.46, + "learning_rate": 9.502576298057867e-08, + "logits/chosen": -1.4724810123443604, + "logits/rejected": -1.4672783613204956, + "logps/chosen": -119.09646606445312, + "logps/rejected": -225.70132446289062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.696094989776611, + "rewards/margins": 11.542621612548828, + "rewards/rejected": -16.23871612548828, + "step": 4648 + }, + { + "epoch": 7.46, + "learning_rate": 9.492667459373761e-08, + "logits/chosen": -1.3102924823760986, + "logits/rejected": -1.3554913997650146, + "logps/chosen": -131.0306396484375, + "logps/rejected": -330.9505310058594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.248379707336426, + "rewards/margins": 17.606327056884766, + "rewards/rejected": -22.854707717895508, + "step": 4649 + }, + { + "epoch": 7.46, + "learning_rate": 9.482758620689655e-08, + "logits/chosen": -1.3546383380889893, + "logits/rejected": -1.3326917886734009, + "logps/chosen": -162.3204345703125, + "logps/rejected": -338.7189025878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.16411828994751, + "rewards/margins": 15.890613555908203, + "rewards/rejected": -23.054729461669922, + "step": 4650 + }, + { + "epoch": 7.47, + "learning_rate": 9.472849782005548e-08, + "logits/chosen": -1.4516221284866333, + "logits/rejected": -1.443242073059082, + "logps/chosen": -147.52035522460938, + "logps/rejected": -269.9925842285156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.060882091522217, + "rewards/margins": 13.360878944396973, + "rewards/rejected": -18.421762466430664, + "step": 4651 + }, + { + "epoch": 7.47, + "learning_rate": 9.462940943321441e-08, + "logits/chosen": -1.6977810859680176, + "logits/rejected": -1.6491694450378418, + "logps/chosen": -170.45297241210938, + "logps/rejected": -310.3915100097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.163542747497559, + "rewards/margins": 14.743598937988281, + "rewards/rejected": -20.907142639160156, + "step": 4652 + }, + { + "epoch": 7.47, + "learning_rate": 9.453032104637336e-08, + "logits/chosen": -1.3077198266983032, + "logits/rejected": -1.35025954246521, + "logps/chosen": -185.76576232910156, + "logps/rejected": -339.12957763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.258760452270508, + "rewards/margins": 14.317686080932617, + "rewards/rejected": -22.576446533203125, + "step": 4653 + }, + { + "epoch": 7.47, + "learning_rate": 9.44312326595323e-08, + "logits/chosen": -1.4999805688858032, + "logits/rejected": -1.495664358139038, + "logps/chosen": -143.48851013183594, + "logps/rejected": -346.6123962402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.809131622314453, + "rewards/margins": 17.835792541503906, + "rewards/rejected": -25.64492416381836, + "step": 4654 + }, + { + "epoch": 7.47, + "learning_rate": 9.433214427269124e-08, + "logits/chosen": -1.5503978729248047, + "logits/rejected": -1.6688485145568848, + "logps/chosen": -143.42144775390625, + "logps/rejected": -311.3262939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.694539546966553, + "rewards/margins": 15.807502746582031, + "rewards/rejected": -22.502044677734375, + "step": 4655 + }, + { + "epoch": 7.47, + "learning_rate": 9.423305588585017e-08, + "logits/chosen": -1.3424460887908936, + "logits/rejected": -1.3649423122406006, + "logps/chosen": -167.7622833251953, + "logps/rejected": -306.54412841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.154219627380371, + "rewards/margins": 13.961280822753906, + "rewards/rejected": -22.115501403808594, + "step": 4656 + }, + { + "epoch": 7.48, + "learning_rate": 9.41339674990091e-08, + "logits/chosen": -1.5832605361938477, + "logits/rejected": -1.7056794166564941, + "logps/chosen": -139.80445861816406, + "logps/rejected": -296.126708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.785496711730957, + "rewards/margins": 13.614591598510742, + "rewards/rejected": -20.400089263916016, + "step": 4657 + }, + { + "epoch": 7.48, + "learning_rate": 9.403487911216805e-08, + "logits/chosen": -1.399991512298584, + "logits/rejected": -1.3709068298339844, + "logps/chosen": -164.457275390625, + "logps/rejected": -290.4913635253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.010276794433594, + "rewards/margins": 14.605002403259277, + "rewards/rejected": -21.615278244018555, + "step": 4658 + }, + { + "epoch": 7.48, + "learning_rate": 9.3935790725327e-08, + "logits/chosen": -1.437023639678955, + "logits/rejected": -1.4438631534576416, + "logps/chosen": -140.11593627929688, + "logps/rejected": -306.06536865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.04295015335083, + "rewards/margins": 14.356986999511719, + "rewards/rejected": -20.39993667602539, + "step": 4659 + }, + { + "epoch": 7.48, + "learning_rate": 9.383670233848592e-08, + "logits/chosen": -1.4173215627670288, + "logits/rejected": -1.488646388053894, + "logps/chosen": -130.05393981933594, + "logps/rejected": -271.047607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.705996990203857, + "rewards/margins": 12.873019218444824, + "rewards/rejected": -18.579015731811523, + "step": 4660 + }, + { + "epoch": 7.48, + "learning_rate": 9.373761395164487e-08, + "logits/chosen": -1.446407675743103, + "logits/rejected": -1.4857710599899292, + "logps/chosen": -201.12258911132812, + "logps/rejected": -314.4423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.791808128356934, + "rewards/margins": 11.492267608642578, + "rewards/rejected": -22.284076690673828, + "step": 4661 + }, + { + "epoch": 7.48, + "learning_rate": 9.36385255648038e-08, + "logits/chosen": -1.5105905532836914, + "logits/rejected": -1.5299694538116455, + "logps/chosen": -121.16903686523438, + "logps/rejected": -276.4168701171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.181404113769531, + "rewards/margins": 15.038162231445312, + "rewards/rejected": -20.219566345214844, + "step": 4662 + }, + { + "epoch": 7.48, + "learning_rate": 9.353943717796275e-08, + "logits/chosen": -1.5034154653549194, + "logits/rejected": -1.458743691444397, + "logps/chosen": -163.081787109375, + "logps/rejected": -304.5079650878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.122138023376465, + "rewards/margins": 14.280546188354492, + "rewards/rejected": -21.40268325805664, + "step": 4663 + }, + { + "epoch": 7.49, + "learning_rate": 9.344034879112167e-08, + "logits/chosen": -1.5436289310455322, + "logits/rejected": -1.6150435209274292, + "logps/chosen": -159.5426025390625, + "logps/rejected": -298.88885498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.384900093078613, + "rewards/margins": 12.12484359741211, + "rewards/rejected": -20.509742736816406, + "step": 4664 + }, + { + "epoch": 7.49, + "learning_rate": 9.334126040428061e-08, + "logits/chosen": -1.4268789291381836, + "logits/rejected": -1.3405101299285889, + "logps/chosen": -200.993408203125, + "logps/rejected": -367.042236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.7235689163208, + "rewards/margins": 17.049978256225586, + "rewards/rejected": -26.773548126220703, + "step": 4665 + }, + { + "epoch": 7.49, + "learning_rate": 9.324217201743956e-08, + "logits/chosen": -1.51914381980896, + "logits/rejected": -1.5844841003417969, + "logps/chosen": -96.27718353271484, + "logps/rejected": -315.5326843261719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3314037322998047, + "rewards/margins": 20.46729850769043, + "rewards/rejected": -23.798702239990234, + "step": 4666 + }, + { + "epoch": 7.49, + "learning_rate": 9.314308363059849e-08, + "logits/chosen": -1.3720576763153076, + "logits/rejected": -1.437596082687378, + "logps/chosen": -160.74143981933594, + "logps/rejected": -363.5211181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.011680603027344, + "rewards/margins": 17.78326988220215, + "rewards/rejected": -25.79494857788086, + "step": 4667 + }, + { + "epoch": 7.49, + "learning_rate": 9.304399524375743e-08, + "logits/chosen": -1.465597152709961, + "logits/rejected": -1.490572214126587, + "logps/chosen": -196.37115478515625, + "logps/rejected": -312.5265197753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.090531349182129, + "rewards/margins": 11.958608627319336, + "rewards/rejected": -22.04914093017578, + "step": 4668 + }, + { + "epoch": 7.49, + "learning_rate": 9.294490685691636e-08, + "logits/chosen": -1.370262861251831, + "logits/rejected": -1.3424370288848877, + "logps/chosen": -175.2178955078125, + "logps/rejected": -306.3891906738281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.667069435119629, + "rewards/margins": 14.629913330078125, + "rewards/rejected": -23.296981811523438, + "step": 4669 + }, + { + "epoch": 7.5, + "learning_rate": 9.28458184700753e-08, + "logits/chosen": -1.6989083290100098, + "logits/rejected": -1.7191755771636963, + "logps/chosen": -161.3614959716797, + "logps/rejected": -319.7138671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.547299861907959, + "rewards/margins": 14.450780868530273, + "rewards/rejected": -21.99808120727539, + "step": 4670 + }, + { + "epoch": 7.5, + "learning_rate": 9.274673008323424e-08, + "logits/chosen": -1.4273133277893066, + "logits/rejected": -1.3939707279205322, + "logps/chosen": -128.52688598632812, + "logps/rejected": -289.8624572753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2614359855651855, + "rewards/margins": 14.206296920776367, + "rewards/rejected": -20.467735290527344, + "step": 4671 + }, + { + "epoch": 7.5, + "learning_rate": 9.264764169639317e-08, + "logits/chosen": -1.4797279834747314, + "logits/rejected": -1.4723173379898071, + "logps/chosen": -208.32713317871094, + "logps/rejected": -352.598876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.506843566894531, + "rewards/margins": 14.681365966796875, + "rewards/rejected": -24.188209533691406, + "step": 4672 + }, + { + "epoch": 7.5, + "learning_rate": 9.254855330955212e-08, + "logits/chosen": -1.5180813074111938, + "logits/rejected": -1.5404460430145264, + "logps/chosen": -185.95999145507812, + "logps/rejected": -327.08819580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.46059513092041, + "rewards/margins": 14.201869010925293, + "rewards/rejected": -22.662464141845703, + "step": 4673 + }, + { + "epoch": 7.5, + "learning_rate": 9.244946492271105e-08, + "logits/chosen": -1.3819918632507324, + "logits/rejected": -1.3915354013442993, + "logps/chosen": -150.98977661132812, + "logps/rejected": -260.66412353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.844151496887207, + "rewards/margins": 9.471665382385254, + "rewards/rejected": -17.31581687927246, + "step": 4674 + }, + { + "epoch": 7.5, + "learning_rate": 9.235037653587e-08, + "logits/chosen": -1.3783522844314575, + "logits/rejected": -1.3277126550674438, + "logps/chosen": -163.96575927734375, + "logps/rejected": -275.9067687988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.786041259765625, + "rewards/margins": 12.45266056060791, + "rewards/rejected": -19.23870277404785, + "step": 4675 + }, + { + "epoch": 7.51, + "learning_rate": 9.225128814902892e-08, + "logits/chosen": -1.570798397064209, + "logits/rejected": -1.5771746635437012, + "logps/chosen": -103.86129760742188, + "logps/rejected": -248.21456909179688, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9839491844177246, + "rewards/margins": 14.52195930480957, + "rewards/rejected": -17.50590705871582, + "step": 4676 + }, + { + "epoch": 7.51, + "learning_rate": 9.215219976218786e-08, + "logits/chosen": -1.26365065574646, + "logits/rejected": -1.3567373752593994, + "logps/chosen": -122.02236938476562, + "logps/rejected": -275.4773864746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.452640056610107, + "rewards/margins": 14.306793212890625, + "rewards/rejected": -19.759431838989258, + "step": 4677 + }, + { + "epoch": 7.51, + "learning_rate": 9.205311137534681e-08, + "logits/chosen": -1.3751871585845947, + "logits/rejected": -1.2816452980041504, + "logps/chosen": -136.34095764160156, + "logps/rejected": -293.5909423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.985490798950195, + "rewards/margins": 15.11579704284668, + "rewards/rejected": -20.101287841796875, + "step": 4678 + }, + { + "epoch": 7.51, + "learning_rate": 9.195402298850574e-08, + "logits/chosen": -1.4282722473144531, + "logits/rejected": -1.44568932056427, + "logps/chosen": -231.341796875, + "logps/rejected": -346.1517028808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.781965255737305, + "rewards/margins": 14.174338340759277, + "rewards/rejected": -25.9563045501709, + "step": 4679 + }, + { + "epoch": 7.51, + "learning_rate": 9.185493460166469e-08, + "logits/chosen": -1.276572585105896, + "logits/rejected": -1.3583242893218994, + "logps/chosen": -151.66256713867188, + "logps/rejected": -283.3266296386719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.48869514465332, + "rewards/margins": 11.953573226928711, + "rewards/rejected": -19.44226837158203, + "step": 4680 + }, + { + "epoch": 7.51, + "learning_rate": 9.175584621482361e-08, + "logits/chosen": -1.5001600980758667, + "logits/rejected": -1.4685509204864502, + "logps/chosen": -139.1666259765625, + "logps/rejected": -291.9445495605469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011883735656738, + "rewards/margins": 14.305089950561523, + "rewards/rejected": -20.316974639892578, + "step": 4681 + }, + { + "epoch": 7.52, + "learning_rate": 9.165675782798256e-08, + "logits/chosen": -1.4797042608261108, + "logits/rejected": -1.608716607093811, + "logps/chosen": -159.43283081054688, + "logps/rejected": -303.61358642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7165679931640625, + "rewards/margins": 11.393142700195312, + "rewards/rejected": -19.109710693359375, + "step": 4682 + }, + { + "epoch": 7.52, + "learning_rate": 9.155766944114149e-08, + "logits/chosen": -1.395006775856018, + "logits/rejected": -1.424055814743042, + "logps/chosen": -125.1851806640625, + "logps/rejected": -262.9864807128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.210836887359619, + "rewards/margins": 12.62380599975586, + "rewards/rejected": -18.834644317626953, + "step": 4683 + }, + { + "epoch": 7.52, + "learning_rate": 9.145858105430044e-08, + "logits/chosen": -1.4026089906692505, + "logits/rejected": -1.352933406829834, + "logps/chosen": -161.1826171875, + "logps/rejected": -299.03204345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.531794548034668, + "rewards/margins": 14.06709098815918, + "rewards/rejected": -21.598886489868164, + "step": 4684 + }, + { + "epoch": 7.52, + "learning_rate": 9.135949266745937e-08, + "logits/chosen": -1.3204432725906372, + "logits/rejected": -1.4012277126312256, + "logps/chosen": -186.34585571289062, + "logps/rejected": -319.91046142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.24697494506836, + "rewards/margins": 13.53145980834961, + "rewards/rejected": -22.77843475341797, + "step": 4685 + }, + { + "epoch": 7.52, + "learning_rate": 9.12604042806183e-08, + "logits/chosen": -1.383288025856018, + "logits/rejected": -1.3893635272979736, + "logps/chosen": -129.77774047851562, + "logps/rejected": -247.53672790527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.686770915985107, + "rewards/margins": 11.766590118408203, + "rewards/rejected": -17.45336151123047, + "step": 4686 + }, + { + "epoch": 7.52, + "learning_rate": 9.116131589377725e-08, + "logits/chosen": -1.580504059791565, + "logits/rejected": -1.5359766483306885, + "logps/chosen": -136.1266632080078, + "logps/rejected": -274.33251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8723464012146, + "rewards/margins": 13.256122589111328, + "rewards/rejected": -19.128467559814453, + "step": 4687 + }, + { + "epoch": 7.52, + "learning_rate": 9.106222750693618e-08, + "logits/chosen": -1.4008338451385498, + "logits/rejected": -1.4169789552688599, + "logps/chosen": -170.75767517089844, + "logps/rejected": -311.5745849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.931851387023926, + "rewards/margins": 14.704607009887695, + "rewards/rejected": -23.636459350585938, + "step": 4688 + }, + { + "epoch": 7.53, + "learning_rate": 9.096313912009512e-08, + "logits/chosen": -1.4016352891921997, + "logits/rejected": -1.494713306427002, + "logps/chosen": -200.95388793945312, + "logps/rejected": -350.3515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.099800109863281, + "rewards/margins": 14.090203285217285, + "rewards/rejected": -24.19000244140625, + "step": 4689 + }, + { + "epoch": 7.53, + "learning_rate": 9.086405073325405e-08, + "logits/chosen": -1.5773314237594604, + "logits/rejected": -1.6315600872039795, + "logps/chosen": -188.18704223632812, + "logps/rejected": -315.11865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.754254341125488, + "rewards/margins": 12.152396202087402, + "rewards/rejected": -21.90665054321289, + "step": 4690 + }, + { + "epoch": 7.53, + "learning_rate": 9.0764962346413e-08, + "logits/chosen": -1.3998918533325195, + "logits/rejected": -1.39747953414917, + "logps/chosen": -135.30029296875, + "logps/rejected": -303.98468017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.903489112854004, + "rewards/margins": 16.018362045288086, + "rewards/rejected": -20.921850204467773, + "step": 4691 + }, + { + "epoch": 7.53, + "learning_rate": 9.066587395957194e-08, + "logits/chosen": -1.5262329578399658, + "logits/rejected": -1.509722113609314, + "logps/chosen": -158.48410034179688, + "logps/rejected": -299.859130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.531373977661133, + "rewards/margins": 13.460500717163086, + "rewards/rejected": -19.99187469482422, + "step": 4692 + }, + { + "epoch": 7.53, + "learning_rate": 9.056678557273086e-08, + "logits/chosen": -1.4369230270385742, + "logits/rejected": -1.4602947235107422, + "logps/chosen": -138.4202423095703, + "logps/rejected": -342.19970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.230024337768555, + "rewards/margins": 19.51519775390625, + "rewards/rejected": -25.745222091674805, + "step": 4693 + }, + { + "epoch": 7.53, + "learning_rate": 9.046769718588981e-08, + "logits/chosen": -1.2998539209365845, + "logits/rejected": -1.489733099937439, + "logps/chosen": -134.3418731689453, + "logps/rejected": -293.5400695800781, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.83195686340332, + "rewards/margins": 11.895517349243164, + "rewards/rejected": -18.727474212646484, + "step": 4694 + }, + { + "epoch": 7.54, + "learning_rate": 9.036860879904874e-08, + "logits/chosen": -1.3041269779205322, + "logits/rejected": -1.3039062023162842, + "logps/chosen": -145.80392456054688, + "logps/rejected": -270.0723876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.189149856567383, + "rewards/margins": 12.363083839416504, + "rewards/rejected": -19.55223274230957, + "step": 4695 + }, + { + "epoch": 7.54, + "learning_rate": 9.026952041220769e-08, + "logits/chosen": -1.4478561878204346, + "logits/rejected": -1.4336051940917969, + "logps/chosen": -170.98776245117188, + "logps/rejected": -291.117431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.052258491516113, + "rewards/margins": 13.115023612976074, + "rewards/rejected": -22.167282104492188, + "step": 4696 + }, + { + "epoch": 7.54, + "learning_rate": 9.017043202536661e-08, + "logits/chosen": -1.6634595394134521, + "logits/rejected": -1.6278806924819946, + "logps/chosen": -150.7771759033203, + "logps/rejected": -274.83697509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.91173791885376, + "rewards/margins": 13.609630584716797, + "rewards/rejected": -20.52136993408203, + "step": 4697 + }, + { + "epoch": 7.54, + "learning_rate": 9.007134363852556e-08, + "logits/chosen": -1.491561770439148, + "logits/rejected": -1.5233705043792725, + "logps/chosen": -155.96990966796875, + "logps/rejected": -328.9588623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.528193950653076, + "rewards/margins": 16.92095947265625, + "rewards/rejected": -24.449153900146484, + "step": 4698 + }, + { + "epoch": 7.54, + "learning_rate": 8.99722552516845e-08, + "logits/chosen": -1.5453157424926758, + "logits/rejected": -1.6660012006759644, + "logps/chosen": -155.7532958984375, + "logps/rejected": -299.9350280761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.298360824584961, + "rewards/margins": 13.493926048278809, + "rewards/rejected": -20.792285919189453, + "step": 4699 + }, + { + "epoch": 7.54, + "learning_rate": 8.987316686484344e-08, + "logits/chosen": -1.5169858932495117, + "logits/rejected": -1.5188794136047363, + "logps/chosen": -188.78668212890625, + "logps/rejected": -327.8924560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.313609600067139, + "rewards/margins": 14.011138916015625, + "rewards/rejected": -21.324748992919922, + "step": 4700 + }, + { + "epoch": 7.55, + "learning_rate": 8.977407847800238e-08, + "logits/chosen": -1.4109373092651367, + "logits/rejected": -1.4384793043136597, + "logps/chosen": -183.653076171875, + "logps/rejected": -319.7188720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.954524040222168, + "rewards/margins": 12.710280418395996, + "rewards/rejected": -21.664804458618164, + "step": 4701 + }, + { + "epoch": 7.55, + "learning_rate": 8.96749900911613e-08, + "logits/chosen": -1.479729413986206, + "logits/rejected": -1.4415141344070435, + "logps/chosen": -141.6053466796875, + "logps/rejected": -243.20616149902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.309662818908691, + "rewards/margins": 12.552441596984863, + "rewards/rejected": -17.862104415893555, + "step": 4702 + }, + { + "epoch": 7.55, + "learning_rate": 8.957590170432025e-08, + "logits/chosen": -1.6321587562561035, + "logits/rejected": -1.6633098125457764, + "logps/chosen": -166.41702270507812, + "logps/rejected": -332.40069580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.312561988830566, + "rewards/margins": 17.46073341369629, + "rewards/rejected": -24.773296356201172, + "step": 4703 + }, + { + "epoch": 7.55, + "learning_rate": 8.94768133174792e-08, + "logits/chosen": -1.5055994987487793, + "logits/rejected": -1.4562609195709229, + "logps/chosen": -188.40005493164062, + "logps/rejected": -287.43548583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7014665603637695, + "rewards/margins": 11.956165313720703, + "rewards/rejected": -19.65763282775879, + "step": 4704 + }, + { + "epoch": 7.55, + "learning_rate": 8.937772493063813e-08, + "logits/chosen": -1.5708049535751343, + "logits/rejected": -1.5106664896011353, + "logps/chosen": -205.771728515625, + "logps/rejected": -327.30084228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.633448600769043, + "rewards/margins": 12.655526161193848, + "rewards/rejected": -23.28897476196289, + "step": 4705 + }, + { + "epoch": 7.55, + "learning_rate": 8.927863654379706e-08, + "logits/chosen": -1.5547195672988892, + "logits/rejected": -1.6433590650558472, + "logps/chosen": -154.21633911132812, + "logps/rejected": -293.0074768066406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.116382598876953, + "rewards/margins": 11.579911231994629, + "rewards/rejected": -18.6962947845459, + "step": 4706 + }, + { + "epoch": 7.56, + "learning_rate": 8.9179548156956e-08, + "logits/chosen": -1.5321779251098633, + "logits/rejected": -1.43524968624115, + "logps/chosen": -171.66183471679688, + "logps/rejected": -239.5718231201172, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177508354187012, + "rewards/margins": 9.88084888458252, + "rewards/rejected": -16.05835723876953, + "step": 4707 + }, + { + "epoch": 7.56, + "learning_rate": 8.908045977011494e-08, + "logits/chosen": -1.3314945697784424, + "logits/rejected": -1.4202792644500732, + "logps/chosen": -147.01148986816406, + "logps/rejected": -271.3104553222656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.869982719421387, + "rewards/margins": 11.690071105957031, + "rewards/rejected": -19.560054779052734, + "step": 4708 + }, + { + "epoch": 7.56, + "learning_rate": 8.898137138327388e-08, + "logits/chosen": -1.4876796007156372, + "logits/rejected": -1.5966458320617676, + "logps/chosen": -179.2229461669922, + "logps/rejected": -308.83050537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.636667251586914, + "rewards/margins": 11.332965850830078, + "rewards/rejected": -19.969633102416992, + "step": 4709 + }, + { + "epoch": 7.56, + "learning_rate": 8.888228299643281e-08, + "logits/chosen": -1.4067193269729614, + "logits/rejected": -1.449587345123291, + "logps/chosen": -121.21437072753906, + "logps/rejected": -289.77362060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.367801666259766, + "rewards/margins": 14.496866226196289, + "rewards/rejected": -20.864667892456055, + "step": 4710 + }, + { + "epoch": 7.56, + "learning_rate": 8.878319460959176e-08, + "logits/chosen": -1.3372551202774048, + "logits/rejected": -1.2549307346343994, + "logps/chosen": -166.3625030517578, + "logps/rejected": -286.3956298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.398911952972412, + "rewards/margins": 14.295525550842285, + "rewards/rejected": -20.694438934326172, + "step": 4711 + }, + { + "epoch": 7.56, + "learning_rate": 8.868410622275069e-08, + "logits/chosen": -1.4209767580032349, + "logits/rejected": -1.5228395462036133, + "logps/chosen": -168.14749145507812, + "logps/rejected": -314.01910400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.663097381591797, + "rewards/margins": 14.242862701416016, + "rewards/rejected": -22.905960083007812, + "step": 4712 + }, + { + "epoch": 7.57, + "learning_rate": 8.858501783590964e-08, + "logits/chosen": -1.485479712486267, + "logits/rejected": -1.4641757011413574, + "logps/chosen": -224.65835571289062, + "logps/rejected": -342.92242431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.812578201293945, + "rewards/margins": 13.236865997314453, + "rewards/rejected": -25.04944610595703, + "step": 4713 + }, + { + "epoch": 7.57, + "learning_rate": 8.848592944906856e-08, + "logits/chosen": -1.5092283487319946, + "logits/rejected": -1.4219037294387817, + "logps/chosen": -175.57301330566406, + "logps/rejected": -338.89569091796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.972754955291748, + "rewards/margins": 16.078598022460938, + "rewards/rejected": -23.051353454589844, + "step": 4714 + }, + { + "epoch": 7.57, + "learning_rate": 8.83868410622275e-08, + "logits/chosen": -1.6338449716567993, + "logits/rejected": -1.6691274642944336, + "logps/chosen": -156.5954132080078, + "logps/rejected": -283.3481750488281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.896700859069824, + "rewards/margins": 12.337579727172852, + "rewards/rejected": -19.23427963256836, + "step": 4715 + }, + { + "epoch": 7.57, + "learning_rate": 8.828775267538644e-08, + "logits/chosen": -1.3378397226333618, + "logits/rejected": -1.3853110074996948, + "logps/chosen": -151.28402709960938, + "logps/rejected": -280.86175537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.552168846130371, + "rewards/margins": 12.079757690429688, + "rewards/rejected": -20.631927490234375, + "step": 4716 + }, + { + "epoch": 7.57, + "learning_rate": 8.818866428854538e-08, + "logits/chosen": -1.5039646625518799, + "logits/rejected": -1.4615591764450073, + "logps/chosen": -183.71316528320312, + "logps/rejected": -316.1279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.877625465393066, + "rewards/margins": 14.499176979064941, + "rewards/rejected": -23.376802444458008, + "step": 4717 + }, + { + "epoch": 7.57, + "learning_rate": 8.808957590170432e-08, + "logits/chosen": -1.5677212476730347, + "logits/rejected": -1.485735297203064, + "logps/chosen": -173.0446014404297, + "logps/rejected": -293.56494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.827578067779541, + "rewards/margins": 14.64590072631836, + "rewards/rejected": -22.47347640991211, + "step": 4718 + }, + { + "epoch": 7.57, + "learning_rate": 8.799048751486325e-08, + "logits/chosen": -1.4940017461776733, + "logits/rejected": -1.4886754751205444, + "logps/chosen": -113.06331634521484, + "logps/rejected": -278.56915283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.232576847076416, + "rewards/margins": 15.937223434448242, + "rewards/rejected": -21.1697998046875, + "step": 4719 + }, + { + "epoch": 7.58, + "learning_rate": 8.78913991280222e-08, + "logits/chosen": -1.4246290922164917, + "logits/rejected": -1.4246153831481934, + "logps/chosen": -120.55335998535156, + "logps/rejected": -265.0108642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.209877014160156, + "rewards/margins": 14.578950881958008, + "rewards/rejected": -18.788827896118164, + "step": 4720 + }, + { + "epoch": 7.58, + "learning_rate": 8.779231074118113e-08, + "logits/chosen": -1.439969539642334, + "logits/rejected": -1.4460550546646118, + "logps/chosen": -158.12982177734375, + "logps/rejected": -312.85870361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.583681106567383, + "rewards/margins": 14.658941268920898, + "rewards/rejected": -22.24262237548828, + "step": 4721 + }, + { + "epoch": 7.58, + "learning_rate": 8.769322235434007e-08, + "logits/chosen": -1.708014726638794, + "logits/rejected": -1.6944912672042847, + "logps/chosen": -107.54056549072266, + "logps/rejected": -271.5185852050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1777000427246094, + "rewards/margins": 16.055084228515625, + "rewards/rejected": -19.232784271240234, + "step": 4722 + }, + { + "epoch": 7.58, + "learning_rate": 8.759413396749901e-08, + "logits/chosen": -1.5618550777435303, + "logits/rejected": -1.5975916385650635, + "logps/chosen": -137.83242797851562, + "logps/rejected": -317.9883728027344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.143451690673828, + "rewards/margins": 16.17877960205078, + "rewards/rejected": -22.32223129272461, + "step": 4723 + }, + { + "epoch": 7.58, + "learning_rate": 8.749504558065794e-08, + "logits/chosen": -1.375105857849121, + "logits/rejected": -1.391018271446228, + "logps/chosen": -191.09234619140625, + "logps/rejected": -322.84063720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.402933120727539, + "rewards/margins": 13.566898345947266, + "rewards/rejected": -23.969829559326172, + "step": 4724 + }, + { + "epoch": 7.58, + "learning_rate": 8.739595719381689e-08, + "logits/chosen": -1.4987280368804932, + "logits/rejected": -1.417028546333313, + "logps/chosen": -195.1342010498047, + "logps/rejected": -307.31396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.25172233581543, + "rewards/margins": 13.733865737915039, + "rewards/rejected": -22.98558807373047, + "step": 4725 + }, + { + "epoch": 7.59, + "learning_rate": 8.729686880697582e-08, + "logits/chosen": -1.5606415271759033, + "logits/rejected": -1.4880845546722412, + "logps/chosen": -115.09814453125, + "logps/rejected": -243.11570739746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6351218223571777, + "rewards/margins": 13.275308609008789, + "rewards/rejected": -15.910429954528809, + "step": 4726 + }, + { + "epoch": 7.59, + "learning_rate": 8.719778042013475e-08, + "logits/chosen": -1.5517923831939697, + "logits/rejected": -1.5226926803588867, + "logps/chosen": -186.58535766601562, + "logps/rejected": -279.57708740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.69389009475708, + "rewards/margins": 11.833724975585938, + "rewards/rejected": -18.527616500854492, + "step": 4727 + }, + { + "epoch": 7.59, + "learning_rate": 8.709869203329369e-08, + "logits/chosen": -1.322426199913025, + "logits/rejected": -1.3974436521530151, + "logps/chosen": -136.0950927734375, + "logps/rejected": -261.6964111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.680986404418945, + "rewards/margins": 12.36867904663086, + "rewards/rejected": -19.049665451049805, + "step": 4728 + }, + { + "epoch": 7.59, + "learning_rate": 8.699960364645263e-08, + "logits/chosen": -1.6627970933914185, + "logits/rejected": -1.7141175270080566, + "logps/chosen": -146.1900634765625, + "logps/rejected": -300.8796081542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.259432792663574, + "rewards/margins": 15.951683044433594, + "rewards/rejected": -21.211116790771484, + "step": 4729 + }, + { + "epoch": 7.59, + "learning_rate": 8.690051525961158e-08, + "logits/chosen": -1.3275063037872314, + "logits/rejected": -1.4167678356170654, + "logps/chosen": -155.60263061523438, + "logps/rejected": -323.9604187011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2940568923950195, + "rewards/margins": 16.25516700744629, + "rewards/rejected": -23.549224853515625, + "step": 4730 + }, + { + "epoch": 7.59, + "learning_rate": 8.68014268727705e-08, + "logits/chosen": -1.3957899808883667, + "logits/rejected": -1.419694185256958, + "logps/chosen": -233.71780395507812, + "logps/rejected": -345.4344482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.7249755859375, + "rewards/margins": 12.776243209838867, + "rewards/rejected": -24.501218795776367, + "step": 4731 + }, + { + "epoch": 7.6, + "learning_rate": 8.670233848592945e-08, + "logits/chosen": -1.2962726354599, + "logits/rejected": -1.3864226341247559, + "logps/chosen": -171.94631958007812, + "logps/rejected": -332.2319641113281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.557846069335938, + "rewards/margins": 14.34072494506836, + "rewards/rejected": -22.898571014404297, + "step": 4732 + }, + { + "epoch": 7.6, + "learning_rate": 8.660325009908838e-08, + "logits/chosen": -1.4189794063568115, + "logits/rejected": -1.4011517763137817, + "logps/chosen": -175.07763671875, + "logps/rejected": -301.352783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.694611549377441, + "rewards/margins": 10.955744743347168, + "rewards/rejected": -20.65035629272461, + "step": 4733 + }, + { + "epoch": 7.6, + "learning_rate": 8.650416171224733e-08, + "logits/chosen": -1.5141884088516235, + "logits/rejected": -1.647175908088684, + "logps/chosen": -144.28167724609375, + "logps/rejected": -303.4495849609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.171380519866943, + "rewards/margins": 14.133787155151367, + "rewards/rejected": -20.30516815185547, + "step": 4734 + }, + { + "epoch": 7.6, + "learning_rate": 8.640507332540625e-08, + "logits/chosen": -1.6075409650802612, + "logits/rejected": -1.5492010116577148, + "logps/chosen": -171.6099090576172, + "logps/rejected": -266.79583740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.834927558898926, + "rewards/margins": 10.087003707885742, + "rewards/rejected": -16.92193031311035, + "step": 4735 + }, + { + "epoch": 7.6, + "learning_rate": 8.63059849385652e-08, + "logits/chosen": -1.3793721199035645, + "logits/rejected": -1.3449857234954834, + "logps/chosen": -116.20500183105469, + "logps/rejected": -266.173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.927165508270264, + "rewards/margins": 14.923125267028809, + "rewards/rejected": -19.850292205810547, + "step": 4736 + }, + { + "epoch": 7.6, + "learning_rate": 8.620689655172414e-08, + "logits/chosen": -1.4459545612335205, + "logits/rejected": -1.4735159873962402, + "logps/chosen": -215.08274841308594, + "logps/rejected": -340.91986083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.283963203430176, + "rewards/margins": 12.812395095825195, + "rewards/rejected": -24.096359252929688, + "step": 4737 + }, + { + "epoch": 7.61, + "learning_rate": 8.610780816488307e-08, + "logits/chosen": -1.535648226737976, + "logits/rejected": -1.4717156887054443, + "logps/chosen": -149.67103576660156, + "logps/rejected": -262.577392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.651946067810059, + "rewards/margins": 13.436820983886719, + "rewards/rejected": -19.088768005371094, + "step": 4738 + }, + { + "epoch": 7.61, + "learning_rate": 8.600871977804201e-08, + "logits/chosen": -1.4350230693817139, + "logits/rejected": -1.4514607191085815, + "logps/chosen": -134.27151489257812, + "logps/rejected": -283.39385986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.934743881225586, + "rewards/margins": 14.311979293823242, + "rewards/rejected": -21.246721267700195, + "step": 4739 + }, + { + "epoch": 7.61, + "learning_rate": 8.590963139120094e-08, + "logits/chosen": -1.6292638778686523, + "logits/rejected": -1.601122260093689, + "logps/chosen": -144.06423950195312, + "logps/rejected": -240.56333923339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.001392841339111, + "rewards/margins": 11.121892929077148, + "rewards/rejected": -16.1232852935791, + "step": 4740 + }, + { + "epoch": 7.61, + "learning_rate": 8.581054300435989e-08, + "logits/chosen": -1.3901903629302979, + "logits/rejected": -1.4786686897277832, + "logps/chosen": -134.11297607421875, + "logps/rejected": -298.23004150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989151477813721, + "rewards/margins": 14.084030151367188, + "rewards/rejected": -20.073183059692383, + "step": 4741 + }, + { + "epoch": 7.61, + "learning_rate": 8.571145461751882e-08, + "logits/chosen": -1.6070424318313599, + "logits/rejected": -1.6352970600128174, + "logps/chosen": -130.3424530029297, + "logps/rejected": -262.3775329589844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.587765693664551, + "rewards/margins": 12.443835258483887, + "rewards/rejected": -18.031600952148438, + "step": 4742 + }, + { + "epoch": 7.61, + "learning_rate": 8.561236623067775e-08, + "logits/chosen": -1.4599140882492065, + "logits/rejected": -1.4015250205993652, + "logps/chosen": -143.6072998046875, + "logps/rejected": -275.05108642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.401677131652832, + "rewards/margins": 13.834518432617188, + "rewards/rejected": -21.236194610595703, + "step": 4743 + }, + { + "epoch": 7.61, + "learning_rate": 8.55132778438367e-08, + "logits/chosen": -1.571688175201416, + "logits/rejected": -1.5473450422286987, + "logps/chosen": -152.24240112304688, + "logps/rejected": -283.36529541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.187397480010986, + "rewards/margins": 13.581812858581543, + "rewards/rejected": -19.769210815429688, + "step": 4744 + }, + { + "epoch": 7.62, + "learning_rate": 8.541418945699563e-08, + "logits/chosen": -1.4360275268554688, + "logits/rejected": -1.5306233167648315, + "logps/chosen": -111.34658813476562, + "logps/rejected": -270.0908508300781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3043718338012695, + "rewards/margins": 14.378730773925781, + "rewards/rejected": -18.683101654052734, + "step": 4745 + }, + { + "epoch": 7.62, + "learning_rate": 8.531510107015458e-08, + "logits/chosen": -1.4586148262023926, + "logits/rejected": -1.5360833406448364, + "logps/chosen": -235.20675659179688, + "logps/rejected": -402.67742919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.469856262207031, + "rewards/margins": 14.584729194641113, + "rewards/rejected": -27.054584503173828, + "step": 4746 + }, + { + "epoch": 7.62, + "learning_rate": 8.521601268331351e-08, + "logits/chosen": -1.4325064420700073, + "logits/rejected": -1.3834388256072998, + "logps/chosen": -134.2305145263672, + "logps/rejected": -296.9809875488281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.741318702697754, + "rewards/margins": 14.682330131530762, + "rewards/rejected": -19.423648834228516, + "step": 4747 + }, + { + "epoch": 7.62, + "learning_rate": 8.511692429647245e-08, + "logits/chosen": -1.4673691987991333, + "logits/rejected": -1.4311282634735107, + "logps/chosen": -196.43991088867188, + "logps/rejected": -363.74102783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.276126861572266, + "rewards/margins": 15.343452453613281, + "rewards/rejected": -25.619579315185547, + "step": 4748 + }, + { + "epoch": 7.62, + "learning_rate": 8.501783590963139e-08, + "logits/chosen": -1.4131765365600586, + "logits/rejected": -1.4250236749649048, + "logps/chosen": -175.3342742919922, + "logps/rejected": -295.8018798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.878741264343262, + "rewards/margins": 13.389217376708984, + "rewards/rejected": -23.26795768737793, + "step": 4749 + }, + { + "epoch": 7.62, + "learning_rate": 8.491874752279033e-08, + "logits/chosen": -1.5945651531219482, + "logits/rejected": -1.630975365638733, + "logps/chosen": -162.35372924804688, + "logps/rejected": -319.8852844238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.963551044464111, + "rewards/margins": 13.614198684692383, + "rewards/rejected": -21.577747344970703, + "step": 4750 + }, + { + "epoch": 7.63, + "learning_rate": 8.481965913594927e-08, + "logits/chosen": -1.6661417484283447, + "logits/rejected": -1.618222951889038, + "logps/chosen": -147.11959838867188, + "logps/rejected": -286.15325927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.629298210144043, + "rewards/margins": 13.028430938720703, + "rewards/rejected": -20.657730102539062, + "step": 4751 + }, + { + "epoch": 7.63, + "learning_rate": 8.472057074910819e-08, + "logits/chosen": -1.3984218835830688, + "logits/rejected": -1.4352836608886719, + "logps/chosen": -177.99420166015625, + "logps/rejected": -354.4437255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.615397930145264, + "rewards/margins": 16.558412551879883, + "rewards/rejected": -24.173809051513672, + "step": 4752 + }, + { + "epoch": 7.63, + "learning_rate": 8.462148236226714e-08, + "logits/chosen": -1.3692353963851929, + "logits/rejected": -1.4420534372329712, + "logps/chosen": -159.91470336914062, + "logps/rejected": -318.7105407714844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.377086639404297, + "rewards/margins": 12.710928916931152, + "rewards/rejected": -22.088016510009766, + "step": 4753 + }, + { + "epoch": 7.63, + "learning_rate": 8.452239397542607e-08, + "logits/chosen": -1.5102165937423706, + "logits/rejected": -1.5122572183609009, + "logps/chosen": -168.33778381347656, + "logps/rejected": -363.8729248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.444846153259277, + "rewards/margins": 19.758359909057617, + "rewards/rejected": -27.203205108642578, + "step": 4754 + }, + { + "epoch": 7.63, + "learning_rate": 8.442330558858502e-08, + "logits/chosen": -1.5187714099884033, + "logits/rejected": -1.6741759777069092, + "logps/chosen": -108.2055435180664, + "logps/rejected": -274.7220458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1362781524658203, + "rewards/margins": 13.917840957641602, + "rewards/rejected": -17.05411720275879, + "step": 4755 + }, + { + "epoch": 7.63, + "learning_rate": 8.432421720174395e-08, + "logits/chosen": -1.6290470361709595, + "logits/rejected": -1.700708031654358, + "logps/chosen": -140.0032501220703, + "logps/rejected": -290.09576416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.377790451049805, + "rewards/margins": 14.643142700195312, + "rewards/rejected": -20.02093505859375, + "step": 4756 + }, + { + "epoch": 7.64, + "learning_rate": 8.422512881490289e-08, + "logits/chosen": -1.40584397315979, + "logits/rejected": -1.4241124391555786, + "logps/chosen": -171.97991943359375, + "logps/rejected": -311.65625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.38361930847168, + "rewards/margins": 13.001313209533691, + "rewards/rejected": -21.384931564331055, + "step": 4757 + }, + { + "epoch": 7.64, + "learning_rate": 8.412604042806183e-08, + "logits/chosen": -1.3932013511657715, + "logits/rejected": -1.3312305212020874, + "logps/chosen": -151.44078063964844, + "logps/rejected": -289.4534912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.007436275482178, + "rewards/margins": 14.932750701904297, + "rewards/rejected": -20.940185546875, + "step": 4758 + }, + { + "epoch": 7.64, + "learning_rate": 8.402695204122077e-08, + "logits/chosen": -1.4639860391616821, + "logits/rejected": -1.514063835144043, + "logps/chosen": -171.81576538085938, + "logps/rejected": -319.91656494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.182267189025879, + "rewards/margins": 14.356365203857422, + "rewards/rejected": -22.538633346557617, + "step": 4759 + }, + { + "epoch": 7.64, + "learning_rate": 8.39278636543797e-08, + "logits/chosen": -1.6304694414138794, + "logits/rejected": -1.592578411102295, + "logps/chosen": -140.41940307617188, + "logps/rejected": -252.97280883789062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.484315872192383, + "rewards/margins": 12.51307487487793, + "rewards/rejected": -17.997390747070312, + "step": 4760 + }, + { + "epoch": 7.64, + "learning_rate": 8.382877526753863e-08, + "logits/chosen": -1.5953744649887085, + "logits/rejected": -1.5051815509796143, + "logps/chosen": -195.4459991455078, + "logps/rejected": -273.8307189941406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.091302871704102, + "rewards/margins": 11.191961288452148, + "rewards/rejected": -18.28326416015625, + "step": 4761 + }, + { + "epoch": 7.64, + "learning_rate": 8.372968688069758e-08, + "logits/chosen": -1.394031047821045, + "logits/rejected": -1.4545239210128784, + "logps/chosen": -159.93218994140625, + "logps/rejected": -343.8652648925781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.849750995635986, + "rewards/margins": 16.008827209472656, + "rewards/rejected": -23.858577728271484, + "step": 4762 + }, + { + "epoch": 7.65, + "learning_rate": 8.363059849385653e-08, + "logits/chosen": -1.56461763381958, + "logits/rejected": -1.5282788276672363, + "logps/chosen": -135.47633361816406, + "logps/rejected": -278.03289794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6640472412109375, + "rewards/margins": 14.630223274230957, + "rewards/rejected": -19.294269561767578, + "step": 4763 + }, + { + "epoch": 7.65, + "learning_rate": 8.353151010701545e-08, + "logits/chosen": -1.463106632232666, + "logits/rejected": -1.4530748128890991, + "logps/chosen": -142.42347717285156, + "logps/rejected": -252.92355346679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041672229766846, + "rewards/margins": 11.401652336120605, + "rewards/rejected": -17.44332504272461, + "step": 4764 + }, + { + "epoch": 7.65, + "learning_rate": 8.343242172017439e-08, + "logits/chosen": -1.520037293434143, + "logits/rejected": -1.5279098749160767, + "logps/chosen": -165.12753295898438, + "logps/rejected": -332.9457092285156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.980277061462402, + "rewards/margins": 16.159137725830078, + "rewards/rejected": -25.139415740966797, + "step": 4765 + }, + { + "epoch": 7.65, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -1.5285849571228027, + "logits/rejected": -1.5620148181915283, + "logps/chosen": -172.17762756347656, + "logps/rejected": -281.8923645019531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.156675338745117, + "rewards/margins": 11.833928108215332, + "rewards/rejected": -17.990604400634766, + "step": 4766 + }, + { + "epoch": 7.65, + "learning_rate": 8.323424494649227e-08, + "logits/chosen": -1.4588035345077515, + "logits/rejected": -1.4886770248413086, + "logps/chosen": -124.55907440185547, + "logps/rejected": -280.48724365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.51432466506958, + "rewards/margins": 13.895761489868164, + "rewards/rejected": -19.41008758544922, + "step": 4767 + }, + { + "epoch": 7.65, + "learning_rate": 8.313515655965119e-08, + "logits/chosen": -1.5776599645614624, + "logits/rejected": -1.519174575805664, + "logps/chosen": -125.89043426513672, + "logps/rejected": -311.7229919433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.802011013031006, + "rewards/margins": 17.6610050201416, + "rewards/rejected": -22.463016510009766, + "step": 4768 + }, + { + "epoch": 7.65, + "learning_rate": 8.303606817281014e-08, + "logits/chosen": -1.533827304840088, + "logits/rejected": -1.5753484964370728, + "logps/chosen": -143.79110717773438, + "logps/rejected": -325.6652526855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.658910751342773, + "rewards/margins": 17.871015548706055, + "rewards/rejected": -23.529926300048828, + "step": 4769 + }, + { + "epoch": 7.66, + "learning_rate": 8.293697978596908e-08, + "logits/chosen": -1.475324034690857, + "logits/rejected": -1.4758656024932861, + "logps/chosen": -145.5684814453125, + "logps/rejected": -268.9165344238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.806931495666504, + "rewards/margins": 11.83403205871582, + "rewards/rejected": -18.64096450805664, + "step": 4770 + }, + { + "epoch": 7.66, + "learning_rate": 8.283789139912802e-08, + "logits/chosen": -1.3695480823516846, + "logits/rejected": -1.3622817993164062, + "logps/chosen": -188.85122680664062, + "logps/rejected": -311.6961669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.767132759094238, + "rewards/margins": 12.720229148864746, + "rewards/rejected": -21.487361907958984, + "step": 4771 + }, + { + "epoch": 7.66, + "learning_rate": 8.273880301228696e-08, + "logits/chosen": -1.6632983684539795, + "logits/rejected": -1.4955830574035645, + "logps/chosen": -169.6571807861328, + "logps/rejected": -269.78765869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.539531230926514, + "rewards/margins": 12.84572982788086, + "rewards/rejected": -20.38526153564453, + "step": 4772 + }, + { + "epoch": 7.66, + "learning_rate": 8.263971462544588e-08, + "logits/chosen": -1.3316935300827026, + "logits/rejected": -1.389329195022583, + "logps/chosen": -145.34100341796875, + "logps/rejected": -278.16455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.207615852355957, + "rewards/margins": 12.400346755981445, + "rewards/rejected": -20.60796356201172, + "step": 4773 + }, + { + "epoch": 7.66, + "learning_rate": 8.254062623860483e-08, + "logits/chosen": -1.5951980352401733, + "logits/rejected": -1.6883621215820312, + "logps/chosen": -163.28517150878906, + "logps/rejected": -301.76092529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.457943916320801, + "rewards/margins": 13.682580947875977, + "rewards/rejected": -21.140525817871094, + "step": 4774 + }, + { + "epoch": 7.66, + "learning_rate": 8.244153785176378e-08, + "logits/chosen": -1.480334758758545, + "logits/rejected": -1.4564454555511475, + "logps/chosen": -156.31349182128906, + "logps/rejected": -274.0665283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668519020080566, + "rewards/margins": 12.31429386138916, + "rewards/rejected": -18.982812881469727, + "step": 4775 + }, + { + "epoch": 7.67, + "learning_rate": 8.234244946492271e-08, + "logits/chosen": -1.5443370342254639, + "logits/rejected": -1.5495305061340332, + "logps/chosen": -217.34304809570312, + "logps/rejected": -319.9864196777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.795514106750488, + "rewards/margins": 11.725458145141602, + "rewards/rejected": -23.520973205566406, + "step": 4776 + }, + { + "epoch": 7.67, + "learning_rate": 8.224336107808164e-08, + "logits/chosen": -1.5073390007019043, + "logits/rejected": -1.5131723880767822, + "logps/chosen": -185.6825714111328, + "logps/rejected": -318.1754150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.167139053344727, + "rewards/margins": 15.179145812988281, + "rewards/rejected": -23.346284866333008, + "step": 4777 + }, + { + "epoch": 7.67, + "learning_rate": 8.214427269124058e-08, + "logits/chosen": -1.4668350219726562, + "logits/rejected": -1.2783002853393555, + "logps/chosen": -155.72756958007812, + "logps/rejected": -244.45428466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.50297737121582, + "rewards/margins": 11.257472038269043, + "rewards/rejected": -17.76045036315918, + "step": 4778 + }, + { + "epoch": 7.67, + "learning_rate": 8.204518430439952e-08, + "logits/chosen": -1.520755648612976, + "logits/rejected": -1.517698049545288, + "logps/chosen": -139.54925537109375, + "logps/rejected": -262.2976989746094, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.920642375946045, + "rewards/margins": 12.177875518798828, + "rewards/rejected": -19.0985164642334, + "step": 4779 + }, + { + "epoch": 7.67, + "learning_rate": 8.194609591755846e-08, + "logits/chosen": -1.327993392944336, + "logits/rejected": -1.3283848762512207, + "logps/chosen": -148.12619018554688, + "logps/rejected": -281.6199645996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.719305515289307, + "rewards/margins": 13.205678939819336, + "rewards/rejected": -19.924985885620117, + "step": 4780 + }, + { + "epoch": 7.67, + "learning_rate": 8.184700753071739e-08, + "logits/chosen": -1.6350281238555908, + "logits/rejected": -1.600688099861145, + "logps/chosen": -141.07574462890625, + "logps/rejected": -268.331298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.153635025024414, + "rewards/margins": 14.226531028747559, + "rewards/rejected": -20.38016700744629, + "step": 4781 + }, + { + "epoch": 7.68, + "learning_rate": 8.174791914387634e-08, + "logits/chosen": -1.574456810951233, + "logits/rejected": -1.6286429166793823, + "logps/chosen": -174.01431274414062, + "logps/rejected": -318.9050598144531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.283818244934082, + "rewards/margins": 14.253927230834961, + "rewards/rejected": -23.53774642944336, + "step": 4782 + }, + { + "epoch": 7.68, + "learning_rate": 8.164883075703527e-08, + "logits/chosen": -1.3720003366470337, + "logits/rejected": -1.4002422094345093, + "logps/chosen": -155.02651977539062, + "logps/rejected": -277.1254577636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.042394638061523, + "rewards/margins": 12.846209526062012, + "rewards/rejected": -19.88860321044922, + "step": 4783 + }, + { + "epoch": 7.68, + "learning_rate": 8.154974237019422e-08, + "logits/chosen": -1.4726266860961914, + "logits/rejected": -1.4261341094970703, + "logps/chosen": -143.4271240234375, + "logps/rejected": -323.5813293457031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.413942337036133, + "rewards/margins": 15.532570838928223, + "rewards/rejected": -21.94651222229004, + "step": 4784 + }, + { + "epoch": 7.68, + "learning_rate": 8.145065398335314e-08, + "logits/chosen": -1.3812963962554932, + "logits/rejected": -1.348036766052246, + "logps/chosen": -219.85923767089844, + "logps/rejected": -304.71856689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.80459213256836, + "rewards/margins": 11.698616027832031, + "rewards/rejected": -21.50320816040039, + "step": 4785 + }, + { + "epoch": 7.68, + "learning_rate": 8.135156559651208e-08, + "logits/chosen": -1.5422595739364624, + "logits/rejected": -1.4884412288665771, + "logps/chosen": -167.46914672851562, + "logps/rejected": -318.02093505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.197440147399902, + "rewards/margins": 16.312192916870117, + "rewards/rejected": -23.509634017944336, + "step": 4786 + }, + { + "epoch": 7.68, + "learning_rate": 8.125247720967102e-08, + "logits/chosen": -1.309543490409851, + "logits/rejected": -1.4253430366516113, + "logps/chosen": -182.3259735107422, + "logps/rejected": -337.8734436035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.064485549926758, + "rewards/margins": 15.079157829284668, + "rewards/rejected": -23.14364242553711, + "step": 4787 + }, + { + "epoch": 7.69, + "learning_rate": 8.115338882282996e-08, + "logits/chosen": -1.2776225805282593, + "logits/rejected": -1.3874870538711548, + "logps/chosen": -186.14773559570312, + "logps/rejected": -305.6936340332031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.064751625061035, + "rewards/margins": 11.589435577392578, + "rewards/rejected": -21.65418815612793, + "step": 4788 + }, + { + "epoch": 7.69, + "learning_rate": 8.105430043598891e-08, + "logits/chosen": -1.481885313987732, + "logits/rejected": -1.416290283203125, + "logps/chosen": -132.5230712890625, + "logps/rejected": -261.89434814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.168519020080566, + "rewards/margins": 14.421187400817871, + "rewards/rejected": -19.589706420898438, + "step": 4789 + }, + { + "epoch": 7.69, + "learning_rate": 8.095521204914783e-08, + "logits/chosen": -1.7414865493774414, + "logits/rejected": -1.7252850532531738, + "logps/chosen": -178.866455078125, + "logps/rejected": -336.4989013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7740278244018555, + "rewards/margins": 14.39316177368164, + "rewards/rejected": -21.167190551757812, + "step": 4790 + }, + { + "epoch": 7.69, + "learning_rate": 8.085612366230678e-08, + "logits/chosen": -1.555110216140747, + "logits/rejected": -1.5013700723648071, + "logps/chosen": -172.4111328125, + "logps/rejected": -318.74853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.568130016326904, + "rewards/margins": 15.355386734008789, + "rewards/rejected": -22.92351722717285, + "step": 4791 + }, + { + "epoch": 7.69, + "learning_rate": 8.075703527546571e-08, + "logits/chosen": -1.629441499710083, + "logits/rejected": -1.6092867851257324, + "logps/chosen": -170.12200927734375, + "logps/rejected": -294.5859069824219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.26343059539795, + "rewards/margins": 12.827059745788574, + "rewards/rejected": -21.090490341186523, + "step": 4792 + }, + { + "epoch": 7.69, + "learning_rate": 8.065794688862466e-08, + "logits/chosen": -1.6244920492172241, + "logits/rejected": -1.613559365272522, + "logps/chosen": -115.6810302734375, + "logps/rejected": -241.38870239257812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3398756980895996, + "rewards/margins": 14.09566593170166, + "rewards/rejected": -17.4355411529541, + "step": 4793 + }, + { + "epoch": 7.7, + "learning_rate": 8.055885850178359e-08, + "logits/chosen": -1.6933492422103882, + "logits/rejected": -1.668099284172058, + "logps/chosen": -171.79647827148438, + "logps/rejected": -327.5921936035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.154861927032471, + "rewards/margins": 16.589130401611328, + "rewards/rejected": -23.74399185180664, + "step": 4794 + }, + { + "epoch": 7.7, + "learning_rate": 8.045977011494252e-08, + "logits/chosen": -1.4437012672424316, + "logits/rejected": -1.4881542921066284, + "logps/chosen": -173.90151977539062, + "logps/rejected": -346.69287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.816163063049316, + "rewards/margins": 15.70803451538086, + "rewards/rejected": -25.52419662475586, + "step": 4795 + }, + { + "epoch": 7.7, + "learning_rate": 8.036068172810147e-08, + "logits/chosen": -1.4825325012207031, + "logits/rejected": -1.5425583124160767, + "logps/chosen": -189.38572692871094, + "logps/rejected": -342.0391845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.530987739562988, + "rewards/margins": 14.627291679382324, + "rewards/rejected": -25.158281326293945, + "step": 4796 + }, + { + "epoch": 7.7, + "learning_rate": 8.02615933412604e-08, + "logits/chosen": -1.4591968059539795, + "logits/rejected": -1.4739456176757812, + "logps/chosen": -202.73387145996094, + "logps/rejected": -321.1228942871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.355606555938721, + "rewards/margins": 12.76933479309082, + "rewards/rejected": -20.124940872192383, + "step": 4797 + }, + { + "epoch": 7.7, + "learning_rate": 8.016250495441934e-08, + "logits/chosen": -1.4544806480407715, + "logits/rejected": -1.4027495384216309, + "logps/chosen": -176.07504272460938, + "logps/rejected": -281.5587463378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.147566795349121, + "rewards/margins": 12.557506561279297, + "rewards/rejected": -20.705074310302734, + "step": 4798 + }, + { + "epoch": 7.7, + "learning_rate": 8.006341656757827e-08, + "logits/chosen": -1.5249247550964355, + "logits/rejected": -1.5082813501358032, + "logps/chosen": -151.040771484375, + "logps/rejected": -310.1117248535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.534489631652832, + "rewards/margins": 15.384288787841797, + "rewards/rejected": -22.918779373168945, + "step": 4799 + }, + { + "epoch": 7.7, + "learning_rate": 7.996432818073722e-08, + "logits/chosen": -1.389906644821167, + "logits/rejected": -1.4226722717285156, + "logps/chosen": -144.805419921875, + "logps/rejected": -247.98707580566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.498673915863037, + "rewards/margins": 10.713197708129883, + "rewards/rejected": -18.211872100830078, + "step": 4800 + }, + { + "epoch": 7.71, + "learning_rate": 7.986523979389616e-08, + "logits/chosen": -1.7218888998031616, + "logits/rejected": -1.727052927017212, + "logps/chosen": -110.13778686523438, + "logps/rejected": -283.1137390136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9581193923950195, + "rewards/margins": 15.769174575805664, + "rewards/rejected": -20.727293014526367, + "step": 4801 + }, + { + "epoch": 7.71, + "learning_rate": 7.976615140705508e-08, + "logits/chosen": -1.4880125522613525, + "logits/rejected": -1.534348487854004, + "logps/chosen": -158.4397430419922, + "logps/rejected": -285.0205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.003836631774902, + "rewards/margins": 11.220605850219727, + "rewards/rejected": -20.224441528320312, + "step": 4802 + }, + { + "epoch": 7.71, + "learning_rate": 7.966706302021403e-08, + "logits/chosen": -1.3613121509552002, + "logits/rejected": -1.3533039093017578, + "logps/chosen": -175.1567840576172, + "logps/rejected": -324.88153076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.729764938354492, + "rewards/margins": 14.644516944885254, + "rewards/rejected": -23.374282836914062, + "step": 4803 + }, + { + "epoch": 7.71, + "learning_rate": 7.956797463337296e-08, + "logits/chosen": -1.3851683139801025, + "logits/rejected": -1.4540512561798096, + "logps/chosen": -162.03482055664062, + "logps/rejected": -299.7041320800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.620265007019043, + "rewards/margins": 11.715475082397461, + "rewards/rejected": -19.335739135742188, + "step": 4804 + }, + { + "epoch": 7.71, + "learning_rate": 7.946888624653191e-08, + "logits/chosen": -1.542832374572754, + "logits/rejected": -1.5283925533294678, + "logps/chosen": -153.12042236328125, + "logps/rejected": -304.25457763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.089602470397949, + "rewards/margins": 14.94469165802002, + "rewards/rejected": -22.03429412841797, + "step": 4805 + }, + { + "epoch": 7.71, + "learning_rate": 7.936979785969083e-08, + "logits/chosen": -1.6238187551498413, + "logits/rejected": -1.542678952217102, + "logps/chosen": -131.7935333251953, + "logps/rejected": -264.686767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.31143045425415, + "rewards/margins": 14.365427017211914, + "rewards/rejected": -19.676856994628906, + "step": 4806 + }, + { + "epoch": 7.72, + "learning_rate": 7.927070947284978e-08, + "logits/chosen": -1.399949550628662, + "logits/rejected": -1.414551854133606, + "logps/chosen": -146.9158172607422, + "logps/rejected": -262.9752502441406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.811978340148926, + "rewards/margins": 11.208044052124023, + "rewards/rejected": -18.020023345947266, + "step": 4807 + }, + { + "epoch": 7.72, + "learning_rate": 7.917162108600872e-08, + "logits/chosen": -1.5925066471099854, + "logits/rejected": -1.4723541736602783, + "logps/chosen": -149.67431640625, + "logps/rejected": -267.8202209472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096548080444336, + "rewards/margins": 12.231141090393066, + "rewards/rejected": -18.32769012451172, + "step": 4808 + }, + { + "epoch": 7.72, + "learning_rate": 7.907253269916766e-08, + "logits/chosen": -1.3174694776535034, + "logits/rejected": -1.353927731513977, + "logps/chosen": -141.070068359375, + "logps/rejected": -264.1796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7944536209106445, + "rewards/margins": 12.498027801513672, + "rewards/rejected": -20.292482376098633, + "step": 4809 + }, + { + "epoch": 7.72, + "learning_rate": 7.897344431232659e-08, + "logits/chosen": -1.5490845441818237, + "logits/rejected": -1.5164450407028198, + "logps/chosen": -223.98516845703125, + "logps/rejected": -363.24908447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.388236999511719, + "rewards/margins": 15.413241386413574, + "rewards/rejected": -26.80147933959961, + "step": 4810 + }, + { + "epoch": 7.72, + "learning_rate": 7.887435592548552e-08, + "logits/chosen": -1.6239291429519653, + "logits/rejected": -1.6142754554748535, + "logps/chosen": -152.0208740234375, + "logps/rejected": -241.111083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.250342845916748, + "rewards/margins": 10.71432113647461, + "rewards/rejected": -15.9646635055542, + "step": 4811 + }, + { + "epoch": 7.72, + "learning_rate": 7.877526753864447e-08, + "logits/chosen": -1.401241660118103, + "logits/rejected": -1.369478464126587, + "logps/chosen": -159.365478515625, + "logps/rejected": -331.59173583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8393025398254395, + "rewards/margins": 16.836162567138672, + "rewards/rejected": -23.675466537475586, + "step": 4812 + }, + { + "epoch": 7.73, + "learning_rate": 7.86761791518034e-08, + "logits/chosen": -1.5413236618041992, + "logits/rejected": -1.6070890426635742, + "logps/chosen": -146.27789306640625, + "logps/rejected": -260.5306701660156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.914499759674072, + "rewards/margins": 11.080751419067383, + "rewards/rejected": -17.995250701904297, + "step": 4813 + }, + { + "epoch": 7.73, + "learning_rate": 7.857709076496235e-08, + "logits/chosen": -1.4498943090438843, + "logits/rejected": -1.5652554035186768, + "logps/chosen": -128.9695281982422, + "logps/rejected": -284.035400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.536181926727295, + "rewards/margins": 14.283167839050293, + "rewards/rejected": -18.81934928894043, + "step": 4814 + }, + { + "epoch": 7.73, + "learning_rate": 7.847800237812128e-08, + "logits/chosen": -1.3689227104187012, + "logits/rejected": -1.3875824213027954, + "logps/chosen": -252.39236450195312, + "logps/rejected": -389.59039306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -15.530145645141602, + "rewards/margins": 12.16543960571289, + "rewards/rejected": -27.695585250854492, + "step": 4815 + }, + { + "epoch": 7.73, + "learning_rate": 7.837891399128022e-08, + "logits/chosen": -1.5852546691894531, + "logits/rejected": -1.5809447765350342, + "logps/chosen": -162.05728149414062, + "logps/rejected": -291.93597412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.380673408508301, + "rewards/margins": 13.242101669311523, + "rewards/rejected": -18.622774124145508, + "step": 4816 + }, + { + "epoch": 7.73, + "learning_rate": 7.827982560443916e-08, + "logits/chosen": -1.483335018157959, + "logits/rejected": -1.4014983177185059, + "logps/chosen": -138.8086700439453, + "logps/rejected": -258.2655334472656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.766005516052246, + "rewards/margins": 13.203063011169434, + "rewards/rejected": -18.96906852722168, + "step": 4817 + }, + { + "epoch": 7.73, + "learning_rate": 7.81807372175981e-08, + "logits/chosen": -1.3843443393707275, + "logits/rejected": -1.474800944328308, + "logps/chosen": -167.9497833251953, + "logps/rejected": -325.1835632324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.114603042602539, + "rewards/margins": 14.83184814453125, + "rewards/rejected": -22.946453094482422, + "step": 4818 + }, + { + "epoch": 7.74, + "learning_rate": 7.808164883075703e-08, + "logits/chosen": -1.4735110998153687, + "logits/rejected": -1.4952462911605835, + "logps/chosen": -122.94868469238281, + "logps/rejected": -228.7157440185547, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.836978912353516, + "rewards/margins": 10.006217002868652, + "rewards/rejected": -14.843195915222168, + "step": 4819 + }, + { + "epoch": 7.74, + "learning_rate": 7.798256044391597e-08, + "logits/chosen": -1.4518342018127441, + "logits/rejected": -1.3866361379623413, + "logps/chosen": -164.6827850341797, + "logps/rejected": -288.794189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8500494956970215, + "rewards/margins": 13.283697128295898, + "rewards/rejected": -21.133747100830078, + "step": 4820 + }, + { + "epoch": 7.74, + "learning_rate": 7.788347205707491e-08, + "logits/chosen": -1.7389247417449951, + "logits/rejected": -1.7141294479370117, + "logps/chosen": -96.34193420410156, + "logps/rejected": -280.93212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.603455066680908, + "rewards/margins": 17.736061096191406, + "rewards/rejected": -21.339515686035156, + "step": 4821 + }, + { + "epoch": 7.74, + "learning_rate": 7.778438367023385e-08, + "logits/chosen": -1.6316708326339722, + "logits/rejected": -1.5933218002319336, + "logps/chosen": -184.2234649658203, + "logps/rejected": -282.5387878417969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.828436851501465, + "rewards/margins": 11.655150413513184, + "rewards/rejected": -19.48358917236328, + "step": 4822 + }, + { + "epoch": 7.74, + "learning_rate": 7.768529528339277e-08, + "logits/chosen": -1.5012054443359375, + "logits/rejected": -1.5627968311309814, + "logps/chosen": -196.04656982421875, + "logps/rejected": -337.346435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.974745750427246, + "rewards/margins": 12.990126609802246, + "rewards/rejected": -23.964872360229492, + "step": 4823 + }, + { + "epoch": 7.74, + "learning_rate": 7.758620689655172e-08, + "logits/chosen": -1.508507490158081, + "logits/rejected": -1.5404788255691528, + "logps/chosen": -151.99716186523438, + "logps/rejected": -280.752197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967416286468506, + "rewards/margins": 11.575347900390625, + "rewards/rejected": -17.54276466369629, + "step": 4824 + }, + { + "epoch": 7.74, + "learning_rate": 7.748711850971065e-08, + "logits/chosen": -1.4905136823654175, + "logits/rejected": -1.5457518100738525, + "logps/chosen": -171.46951293945312, + "logps/rejected": -307.81219482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7327165603637695, + "rewards/margins": 12.55955696105957, + "rewards/rejected": -20.292274475097656, + "step": 4825 + }, + { + "epoch": 7.75, + "learning_rate": 7.73880301228696e-08, + "logits/chosen": -1.2987693548202515, + "logits/rejected": -1.3742238283157349, + "logps/chosen": -168.484130859375, + "logps/rejected": -274.26788330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.711971759796143, + "rewards/margins": 10.055145263671875, + "rewards/rejected": -16.76711654663086, + "step": 4826 + }, + { + "epoch": 7.75, + "learning_rate": 7.728894173602853e-08, + "logits/chosen": -1.426423192024231, + "logits/rejected": -1.4436557292938232, + "logps/chosen": -171.75010681152344, + "logps/rejected": -312.420166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5571489334106445, + "rewards/margins": 14.538042068481445, + "rewards/rejected": -21.095191955566406, + "step": 4827 + }, + { + "epoch": 7.75, + "learning_rate": 7.718985334918747e-08, + "logits/chosen": -1.700141429901123, + "logits/rejected": -1.652474045753479, + "logps/chosen": -143.11289978027344, + "logps/rejected": -253.99090576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7678632736206055, + "rewards/margins": 11.413220405578613, + "rewards/rejected": -17.18108367919922, + "step": 4828 + }, + { + "epoch": 7.75, + "learning_rate": 7.709076496234641e-08, + "logits/chosen": -1.3797892332077026, + "logits/rejected": -1.407689094543457, + "logps/chosen": -165.33065795898438, + "logps/rejected": -277.37176513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3642578125, + "rewards/margins": 11.54411506652832, + "rewards/rejected": -18.908374786376953, + "step": 4829 + }, + { + "epoch": 7.75, + "learning_rate": 7.699167657550535e-08, + "logits/chosen": -1.3661531209945679, + "logits/rejected": -1.5483677387237549, + "logps/chosen": -124.69178771972656, + "logps/rejected": -333.93145751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.178241729736328, + "rewards/margins": 16.313688278198242, + "rewards/rejected": -21.49193000793457, + "step": 4830 + }, + { + "epoch": 7.75, + "learning_rate": 7.689258818866428e-08, + "logits/chosen": -1.6187138557434082, + "logits/rejected": -1.5985801219940186, + "logps/chosen": -173.1485137939453, + "logps/rejected": -309.1441345214844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.871877193450928, + "rewards/margins": 13.852082252502441, + "rewards/rejected": -21.723960876464844, + "step": 4831 + }, + { + "epoch": 7.76, + "learning_rate": 7.679349980182321e-08, + "logits/chosen": -1.3809478282928467, + "logits/rejected": -1.3691251277923584, + "logps/chosen": -184.273681640625, + "logps/rejected": -285.9242858886719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.966527938842773, + "rewards/margins": 13.170835494995117, + "rewards/rejected": -21.13736343383789, + "step": 4832 + }, + { + "epoch": 7.76, + "learning_rate": 7.669441141498216e-08, + "logits/chosen": -1.4514950513839722, + "logits/rejected": -1.4797409772872925, + "logps/chosen": -166.5737762451172, + "logps/rejected": -311.5140075683594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.340671062469482, + "rewards/margins": 15.422786712646484, + "rewards/rejected": -22.763460159301758, + "step": 4833 + }, + { + "epoch": 7.76, + "learning_rate": 7.659532302814111e-08, + "logits/chosen": -1.5623724460601807, + "logits/rejected": -1.5775105953216553, + "logps/chosen": -168.35044860839844, + "logps/rejected": -338.19818115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.606208801269531, + "rewards/margins": 16.411121368408203, + "rewards/rejected": -25.017330169677734, + "step": 4834 + }, + { + "epoch": 7.76, + "learning_rate": 7.649623464130004e-08, + "logits/chosen": -1.4405159950256348, + "logits/rejected": -1.358030080795288, + "logps/chosen": -149.300537109375, + "logps/rejected": -232.06472778320312, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.600377082824707, + "rewards/margins": 9.08041000366211, + "rewards/rejected": -15.6807861328125, + "step": 4835 + }, + { + "epoch": 7.76, + "learning_rate": 7.639714625445897e-08, + "logits/chosen": -1.5769211053848267, + "logits/rejected": -1.571998119354248, + "logps/chosen": -184.2384796142578, + "logps/rejected": -313.318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.026741027832031, + "rewards/margins": 13.949853897094727, + "rewards/rejected": -21.976594924926758, + "step": 4836 + }, + { + "epoch": 7.76, + "learning_rate": 7.629805786761791e-08, + "logits/chosen": -1.409580111503601, + "logits/rejected": -1.4568231105804443, + "logps/chosen": -190.1983642578125, + "logps/rejected": -336.5899658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.264533042907715, + "rewards/margins": 13.897610664367676, + "rewards/rejected": -25.16214370727539, + "step": 4837 + }, + { + "epoch": 7.77, + "learning_rate": 7.619896948077685e-08, + "logits/chosen": -1.5970262289047241, + "logits/rejected": -1.586888313293457, + "logps/chosen": -194.20663452148438, + "logps/rejected": -350.14886474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.282764434814453, + "rewards/margins": 15.461089134216309, + "rewards/rejected": -24.74385643005371, + "step": 4838 + }, + { + "epoch": 7.77, + "learning_rate": 7.609988109393579e-08, + "logits/chosen": -1.6048277616500854, + "logits/rejected": -1.5288870334625244, + "logps/chosen": -171.3717041015625, + "logps/rejected": -293.3612060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.2941312789917, + "rewards/margins": 13.402706146240234, + "rewards/rejected": -21.696836471557617, + "step": 4839 + }, + { + "epoch": 7.77, + "learning_rate": 7.600079270709472e-08, + "logits/chosen": -1.4967422485351562, + "logits/rejected": -1.533512830734253, + "logps/chosen": -159.74923706054688, + "logps/rejected": -274.52056884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.91743803024292, + "rewards/margins": 10.864860534667969, + "rewards/rejected": -18.782299041748047, + "step": 4840 + }, + { + "epoch": 7.77, + "learning_rate": 7.590170432025367e-08, + "logits/chosen": -1.3638700246810913, + "logits/rejected": -1.2319835424423218, + "logps/chosen": -213.04342651367188, + "logps/rejected": -276.067138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.137819290161133, + "rewards/margins": 10.338951110839844, + "rewards/rejected": -19.476770401000977, + "step": 4841 + }, + { + "epoch": 7.77, + "learning_rate": 7.58026159334126e-08, + "logits/chosen": -1.4018454551696777, + "logits/rejected": -1.4261980056762695, + "logps/chosen": -204.5984344482422, + "logps/rejected": -351.8634033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.209754943847656, + "rewards/margins": 14.641207695007324, + "rewards/rejected": -23.850963592529297, + "step": 4842 + }, + { + "epoch": 7.77, + "learning_rate": 7.570352754657155e-08, + "logits/chosen": -1.3693119287490845, + "logits/rejected": -1.3902335166931152, + "logps/chosen": -168.68910217285156, + "logps/rejected": -318.850830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.00726318359375, + "rewards/margins": 13.050844192504883, + "rewards/rejected": -22.05810546875, + "step": 4843 + }, + { + "epoch": 7.78, + "learning_rate": 7.560443915973047e-08, + "logits/chosen": -1.5151896476745605, + "logits/rejected": -1.5358617305755615, + "logps/chosen": -105.58500671386719, + "logps/rejected": -224.08328247070312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6505188941955566, + "rewards/margins": 12.73598575592041, + "rewards/rejected": -16.386505126953125, + "step": 4844 + }, + { + "epoch": 7.78, + "learning_rate": 7.550535077288941e-08, + "logits/chosen": -1.355112075805664, + "logits/rejected": -1.3588402271270752, + "logps/chosen": -186.6870880126953, + "logps/rejected": -301.73077392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.965882301330566, + "rewards/margins": 11.15848445892334, + "rewards/rejected": -20.124364852905273, + "step": 4845 + }, + { + "epoch": 7.78, + "learning_rate": 7.540626238604836e-08, + "logits/chosen": -1.5371506214141846, + "logits/rejected": -1.5603001117706299, + "logps/chosen": -144.6571044921875, + "logps/rejected": -290.8252868652344, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.374922752380371, + "rewards/margins": 15.334087371826172, + "rewards/rejected": -21.709009170532227, + "step": 4846 + }, + { + "epoch": 7.78, + "learning_rate": 7.530717399920729e-08, + "logits/chosen": -1.4065238237380981, + "logits/rejected": -1.4403071403503418, + "logps/chosen": -153.0740203857422, + "logps/rejected": -283.8804931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.818629741668701, + "rewards/margins": 12.213321685791016, + "rewards/rejected": -20.031951904296875, + "step": 4847 + }, + { + "epoch": 7.78, + "learning_rate": 7.520808561236623e-08, + "logits/chosen": -1.4308338165283203, + "logits/rejected": -1.409609079360962, + "logps/chosen": -168.26583862304688, + "logps/rejected": -351.00445556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.920938491821289, + "rewards/margins": 16.92116355895996, + "rewards/rejected": -25.842103958129883, + "step": 4848 + }, + { + "epoch": 7.78, + "learning_rate": 7.510899722552516e-08, + "logits/chosen": -1.4181245565414429, + "logits/rejected": -1.4206080436706543, + "logps/chosen": -209.90167236328125, + "logps/rejected": -340.98748779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.044333457946777, + "rewards/margins": 12.460296630859375, + "rewards/rejected": -23.504629135131836, + "step": 4849 + }, + { + "epoch": 7.78, + "learning_rate": 7.50099088386841e-08, + "logits/chosen": -1.5049643516540527, + "logits/rejected": -1.5141916275024414, + "logps/chosen": -149.92727661132812, + "logps/rejected": -298.65692138671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.006680011749268, + "rewards/margins": 13.425520896911621, + "rewards/rejected": -19.432201385498047, + "step": 4850 + }, + { + "epoch": 7.79, + "learning_rate": 7.491082045184304e-08, + "logits/chosen": -1.4364076852798462, + "logits/rejected": -1.4183800220489502, + "logps/chosen": -201.81112670898438, + "logps/rejected": -340.0651550292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.536672592163086, + "rewards/margins": 13.532243728637695, + "rewards/rejected": -24.068918228149414, + "step": 4851 + }, + { + "epoch": 7.79, + "learning_rate": 7.481173206500197e-08, + "logits/chosen": -1.773967981338501, + "logits/rejected": -1.7529749870300293, + "logps/chosen": -109.91448974609375, + "logps/rejected": -270.362060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8982486724853516, + "rewards/margins": 15.395431518554688, + "rewards/rejected": -19.293682098388672, + "step": 4852 + }, + { + "epoch": 7.79, + "learning_rate": 7.471264367816092e-08, + "logits/chosen": -1.4627025127410889, + "logits/rejected": -1.4475373029708862, + "logps/chosen": -167.90420532226562, + "logps/rejected": -338.00042724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.224390983581543, + "rewards/margins": 16.055959701538086, + "rewards/rejected": -25.280349731445312, + "step": 4853 + }, + { + "epoch": 7.79, + "learning_rate": 7.461355529131985e-08, + "logits/chosen": -1.5076239109039307, + "logits/rejected": -1.5342090129852295, + "logps/chosen": -182.29006958007812, + "logps/rejected": -380.78125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.894654273986816, + "rewards/margins": 17.79853630065918, + "rewards/rejected": -27.69318962097168, + "step": 4854 + }, + { + "epoch": 7.79, + "learning_rate": 7.45144669044788e-08, + "logits/chosen": -1.358757734298706, + "logits/rejected": -1.428637981414795, + "logps/chosen": -196.1201629638672, + "logps/rejected": -352.75640869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.562475204467773, + "rewards/margins": 14.56417465209961, + "rewards/rejected": -24.12664794921875, + "step": 4855 + }, + { + "epoch": 7.79, + "learning_rate": 7.441537851763772e-08, + "logits/chosen": -1.4112544059753418, + "logits/rejected": -1.3740348815917969, + "logps/chosen": -174.21014404296875, + "logps/rejected": -323.27288818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.789878845214844, + "rewards/margins": 14.540266990661621, + "rewards/rejected": -23.33014678955078, + "step": 4856 + }, + { + "epoch": 7.8, + "learning_rate": 7.431629013079667e-08, + "logits/chosen": -1.4172449111938477, + "logits/rejected": -1.4712252616882324, + "logps/chosen": -125.50572204589844, + "logps/rejected": -294.3111267089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.263299465179443, + "rewards/margins": 16.024417877197266, + "rewards/rejected": -22.287717819213867, + "step": 4857 + }, + { + "epoch": 7.8, + "learning_rate": 7.42172017439556e-08, + "logits/chosen": -1.5537174940109253, + "logits/rejected": -1.558167576789856, + "logps/chosen": -145.53089904785156, + "logps/rejected": -326.4154357910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.713858604431152, + "rewards/margins": 18.679861068725586, + "rewards/rejected": -24.393720626831055, + "step": 4858 + }, + { + "epoch": 7.8, + "learning_rate": 7.411811335711455e-08, + "logits/chosen": -1.3402225971221924, + "logits/rejected": -1.4245835542678833, + "logps/chosen": -175.9832305908203, + "logps/rejected": -299.4299621582031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.499971389770508, + "rewards/margins": 12.478273391723633, + "rewards/rejected": -21.97824478149414, + "step": 4859 + }, + { + "epoch": 7.8, + "learning_rate": 7.401902497027349e-08, + "logits/chosen": -1.337522029876709, + "logits/rejected": -1.498256802558899, + "logps/chosen": -98.90499114990234, + "logps/rejected": -296.823974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.360543966293335, + "rewards/margins": 16.74074935913086, + "rewards/rejected": -20.101293563842773, + "step": 4860 + }, + { + "epoch": 7.8, + "learning_rate": 7.391993658343241e-08, + "logits/chosen": -1.7188984155654907, + "logits/rejected": -1.6528668403625488, + "logps/chosen": -190.64923095703125, + "logps/rejected": -323.4014587402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.739084243774414, + "rewards/margins": 14.802085876464844, + "rewards/rejected": -23.54117202758789, + "step": 4861 + }, + { + "epoch": 7.8, + "learning_rate": 7.382084819659136e-08, + "logits/chosen": -1.563624382019043, + "logits/rejected": -1.56182861328125, + "logps/chosen": -134.64614868164062, + "logps/rejected": -307.25927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.19488525390625, + "rewards/margins": 17.205852508544922, + "rewards/rejected": -22.400739669799805, + "step": 4862 + }, + { + "epoch": 7.81, + "learning_rate": 7.372175980975029e-08, + "logits/chosen": -1.4475407600402832, + "logits/rejected": -1.4252361059188843, + "logps/chosen": -187.89434814453125, + "logps/rejected": -309.33197021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.48046588897705, + "rewards/margins": 13.087709426879883, + "rewards/rejected": -21.568174362182617, + "step": 4863 + }, + { + "epoch": 7.81, + "learning_rate": 7.362267142290924e-08, + "logits/chosen": -1.3884648084640503, + "logits/rejected": -1.3676702976226807, + "logps/chosen": -174.2835693359375, + "logps/rejected": -299.9855041503906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.377692222595215, + "rewards/margins": 13.401056289672852, + "rewards/rejected": -21.778749465942383, + "step": 4864 + }, + { + "epoch": 7.81, + "learning_rate": 7.352358303606817e-08, + "logits/chosen": -1.4295568466186523, + "logits/rejected": -1.4169704914093018, + "logps/chosen": -149.14547729492188, + "logps/rejected": -290.7594299316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.705175399780273, + "rewards/margins": 13.122294425964355, + "rewards/rejected": -20.827468872070312, + "step": 4865 + }, + { + "epoch": 7.81, + "learning_rate": 7.34244946492271e-08, + "logits/chosen": -1.364658236503601, + "logits/rejected": -1.4732985496520996, + "logps/chosen": -149.0472869873047, + "logps/rejected": -304.9566650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.611104965209961, + "rewards/margins": 13.67896556854248, + "rewards/rejected": -21.290069580078125, + "step": 4866 + }, + { + "epoch": 7.81, + "learning_rate": 7.332540626238605e-08, + "logits/chosen": -1.5636632442474365, + "logits/rejected": -1.52078115940094, + "logps/chosen": -157.994140625, + "logps/rejected": -341.10870361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.938293933868408, + "rewards/margins": 15.69265365600586, + "rewards/rejected": -21.630949020385742, + "step": 4867 + }, + { + "epoch": 7.81, + "learning_rate": 7.322631787554498e-08, + "logits/chosen": -1.4113765954971313, + "logits/rejected": -1.4154765605926514, + "logps/chosen": -132.22853088378906, + "logps/rejected": -282.8968505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8423075675964355, + "rewards/margins": 13.118619918823242, + "rewards/rejected": -17.960926055908203, + "step": 4868 + }, + { + "epoch": 7.82, + "learning_rate": 7.312722948870392e-08, + "logits/chosen": -1.564414143562317, + "logits/rejected": -1.5569791793823242, + "logps/chosen": -162.7370147705078, + "logps/rejected": -304.28564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.689040660858154, + "rewards/margins": 14.738759994506836, + "rewards/rejected": -22.42780113220215, + "step": 4869 + }, + { + "epoch": 7.82, + "learning_rate": 7.302814110186285e-08, + "logits/chosen": -1.345212697982788, + "logits/rejected": -1.3827612400054932, + "logps/chosen": -172.87220764160156, + "logps/rejected": -304.1089782714844, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.37352180480957, + "rewards/margins": 12.04261589050293, + "rewards/rejected": -20.4161376953125, + "step": 4870 + }, + { + "epoch": 7.82, + "learning_rate": 7.29290527150218e-08, + "logits/chosen": -1.5814214944839478, + "logits/rejected": -1.576108694076538, + "logps/chosen": -125.95165252685547, + "logps/rejected": -278.81005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.387411117553711, + "rewards/margins": 16.385461807250977, + "rewards/rejected": -21.772872924804688, + "step": 4871 + }, + { + "epoch": 7.82, + "learning_rate": 7.282996432818074e-08, + "logits/chosen": -1.5763983726501465, + "logits/rejected": -1.5403839349746704, + "logps/chosen": -160.18899536132812, + "logps/rejected": -286.1727600097656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.159602165222168, + "rewards/margins": 13.902036666870117, + "rewards/rejected": -21.0616397857666, + "step": 4872 + }, + { + "epoch": 7.82, + "learning_rate": 7.273087594133966e-08, + "logits/chosen": -1.6047157049179077, + "logits/rejected": -1.6165682077407837, + "logps/chosen": -81.00479125976562, + "logps/rejected": -230.81350708007812, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9800212383270264, + "rewards/margins": 13.989391326904297, + "rewards/rejected": -16.969411849975586, + "step": 4873 + }, + { + "epoch": 7.82, + "learning_rate": 7.263178755449861e-08, + "logits/chosen": -1.3650959730148315, + "logits/rejected": -1.4380059242248535, + "logps/chosen": -141.83006286621094, + "logps/rejected": -313.1301574707031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.302243232727051, + "rewards/margins": 15.003865242004395, + "rewards/rejected": -22.306106567382812, + "step": 4874 + }, + { + "epoch": 7.83, + "learning_rate": 7.253269916765754e-08, + "logits/chosen": -1.477924108505249, + "logits/rejected": -1.4942588806152344, + "logps/chosen": -180.15675354003906, + "logps/rejected": -319.3282470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.3624906539917, + "rewards/margins": 13.531000137329102, + "rewards/rejected": -22.893489837646484, + "step": 4875 + }, + { + "epoch": 7.83, + "learning_rate": 7.243361078081649e-08, + "logits/chosen": -1.49049711227417, + "logits/rejected": -1.5217269659042358, + "logps/chosen": -128.2622528076172, + "logps/rejected": -294.0562744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.102451324462891, + "rewards/margins": 14.645753860473633, + "rewards/rejected": -20.748207092285156, + "step": 4876 + }, + { + "epoch": 7.83, + "learning_rate": 7.233452239397541e-08, + "logits/chosen": -1.4623537063598633, + "logits/rejected": -1.5114924907684326, + "logps/chosen": -161.34439086914062, + "logps/rejected": -301.0716857910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.833163738250732, + "rewards/margins": 12.026004791259766, + "rewards/rejected": -19.859169006347656, + "step": 4877 + }, + { + "epoch": 7.83, + "learning_rate": 7.223543400713436e-08, + "logits/chosen": -1.3347097635269165, + "logits/rejected": -1.346407413482666, + "logps/chosen": -166.18698120117188, + "logps/rejected": -332.1715393066406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.586518287658691, + "rewards/margins": 16.712312698364258, + "rewards/rejected": -24.298831939697266, + "step": 4878 + }, + { + "epoch": 7.83, + "learning_rate": 7.21363456202933e-08, + "logits/chosen": -1.5563193559646606, + "logits/rejected": -1.5326755046844482, + "logps/chosen": -112.21443939208984, + "logps/rejected": -274.52447509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8612873554229736, + "rewards/margins": 16.149744033813477, + "rewards/rejected": -20.011028289794922, + "step": 4879 + }, + { + "epoch": 7.83, + "learning_rate": 7.203725723345224e-08, + "logits/chosen": -1.3675966262817383, + "logits/rejected": -1.342687964439392, + "logps/chosen": -169.2969970703125, + "logps/rejected": -291.97515869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.345970153808594, + "rewards/margins": 13.875682830810547, + "rewards/rejected": -22.22165298461914, + "step": 4880 + }, + { + "epoch": 7.83, + "learning_rate": 7.193816884661118e-08, + "logits/chosen": -1.512346863746643, + "logits/rejected": -1.5079658031463623, + "logps/chosen": -200.2500762939453, + "logps/rejected": -308.8843688964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.375031471252441, + "rewards/margins": 11.974227905273438, + "rewards/rejected": -21.349260330200195, + "step": 4881 + }, + { + "epoch": 7.84, + "learning_rate": 7.18390804597701e-08, + "logits/chosen": -1.5738625526428223, + "logits/rejected": -1.544896125793457, + "logps/chosen": -159.74859619140625, + "logps/rejected": -324.83026123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.241399765014648, + "rewards/margins": 17.356098175048828, + "rewards/rejected": -24.597497940063477, + "step": 4882 + }, + { + "epoch": 7.84, + "learning_rate": 7.173999207292905e-08, + "logits/chosen": -1.6212846040725708, + "logits/rejected": -1.6652681827545166, + "logps/chosen": -93.72679901123047, + "logps/rejected": -243.66583251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5787863731384277, + "rewards/margins": 14.963802337646484, + "rewards/rejected": -17.54258918762207, + "step": 4883 + }, + { + "epoch": 7.84, + "learning_rate": 7.164090368608798e-08, + "logits/chosen": -1.5900284051895142, + "logits/rejected": -1.5766512155532837, + "logps/chosen": -189.3577880859375, + "logps/rejected": -337.0574645996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.735782623291016, + "rewards/margins": 14.990422248840332, + "rewards/rejected": -23.726205825805664, + "step": 4884 + }, + { + "epoch": 7.84, + "learning_rate": 7.154181529924693e-08, + "logits/chosen": -1.3795907497406006, + "logits/rejected": -1.39642333984375, + "logps/chosen": -191.7391357421875, + "logps/rejected": -317.1395263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.795097351074219, + "rewards/margins": 12.655288696289062, + "rewards/rejected": -22.45038604736328, + "step": 4885 + }, + { + "epoch": 7.84, + "learning_rate": 7.144272691240586e-08, + "logits/chosen": -1.498264193534851, + "logits/rejected": -1.4970345497131348, + "logps/chosen": -163.73892211914062, + "logps/rejected": -304.87860107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.945647239685059, + "rewards/margins": 14.318655967712402, + "rewards/rejected": -21.26430320739746, + "step": 4886 + }, + { + "epoch": 7.84, + "learning_rate": 7.13436385255648e-08, + "logits/chosen": -1.3527100086212158, + "logits/rejected": -1.3110758066177368, + "logps/chosen": -156.9625244140625, + "logps/rejected": -372.102294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.291568756103516, + "rewards/margins": 18.70430564880371, + "rewards/rejected": -26.99587631225586, + "step": 4887 + }, + { + "epoch": 7.85, + "learning_rate": 7.124455013872374e-08, + "logits/chosen": -1.3910733461380005, + "logits/rejected": -1.416500449180603, + "logps/chosen": -186.55238342285156, + "logps/rejected": -330.9005126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.427082061767578, + "rewards/margins": 15.034650802612305, + "rewards/rejected": -24.46173095703125, + "step": 4888 + }, + { + "epoch": 7.85, + "learning_rate": 7.114546175188268e-08, + "logits/chosen": -1.6703134775161743, + "logits/rejected": -1.6009621620178223, + "logps/chosen": -148.96749877929688, + "logps/rejected": -270.2138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.679254055023193, + "rewards/margins": 11.9889497756958, + "rewards/rejected": -18.66820526123047, + "step": 4889 + }, + { + "epoch": 7.85, + "learning_rate": 7.104637336504161e-08, + "logits/chosen": -1.5680301189422607, + "logits/rejected": -1.685950756072998, + "logps/chosen": -180.64540100097656, + "logps/rejected": -342.57568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.302730083465576, + "rewards/margins": 16.24761962890625, + "rewards/rejected": -23.55034828186035, + "step": 4890 + }, + { + "epoch": 7.85, + "learning_rate": 7.094728497820056e-08, + "logits/chosen": -1.5419106483459473, + "logits/rejected": -1.5170013904571533, + "logps/chosen": -176.76315307617188, + "logps/rejected": -334.3760986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8622050285339355, + "rewards/margins": 16.078624725341797, + "rewards/rejected": -23.940828323364258, + "step": 4891 + }, + { + "epoch": 7.85, + "learning_rate": 7.084819659135949e-08, + "logits/chosen": -1.4276074171066284, + "logits/rejected": -1.4459203481674194, + "logps/chosen": -155.1407012939453, + "logps/rejected": -257.4079895019531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.861623764038086, + "rewards/margins": 11.5476655960083, + "rewards/rejected": -18.409290313720703, + "step": 4892 + }, + { + "epoch": 7.85, + "learning_rate": 7.074910820451844e-08, + "logits/chosen": -1.3824695348739624, + "logits/rejected": -1.50946843624115, + "logps/chosen": -134.61505126953125, + "logps/rejected": -325.2096862792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.051602363586426, + "rewards/margins": 17.127782821655273, + "rewards/rejected": -24.17938804626465, + "step": 4893 + }, + { + "epoch": 7.86, + "learning_rate": 7.065001981767736e-08, + "logits/chosen": -1.3380247354507446, + "logits/rejected": -1.3723082542419434, + "logps/chosen": -190.09011840820312, + "logps/rejected": -320.698486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.417593955993652, + "rewards/margins": 13.10705280303955, + "rewards/rejected": -21.524646759033203, + "step": 4894 + }, + { + "epoch": 7.86, + "learning_rate": 7.05509314308363e-08, + "logits/chosen": -1.4565954208374023, + "logits/rejected": -1.5765899419784546, + "logps/chosen": -177.61026000976562, + "logps/rejected": -301.9615173339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.033174514770508, + "rewards/margins": 10.82702922821045, + "rewards/rejected": -19.86020278930664, + "step": 4895 + }, + { + "epoch": 7.86, + "learning_rate": 7.045184304399524e-08, + "logits/chosen": -1.2899677753448486, + "logits/rejected": -1.3608392477035522, + "logps/chosen": -156.04837036132812, + "logps/rejected": -261.5401916503906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.698054313659668, + "rewards/margins": 9.375447273254395, + "rewards/rejected": -17.073501586914062, + "step": 4896 + }, + { + "epoch": 7.86, + "learning_rate": 7.035275465715418e-08, + "logits/chosen": -1.398398995399475, + "logits/rejected": -1.4138731956481934, + "logps/chosen": -104.43282318115234, + "logps/rejected": -297.4237976074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.847512722015381, + "rewards/margins": 15.656156539916992, + "rewards/rejected": -20.50366973876953, + "step": 4897 + }, + { + "epoch": 7.86, + "learning_rate": 7.025366627031312e-08, + "logits/chosen": -1.4477061033248901, + "logits/rejected": -1.4416265487670898, + "logps/chosen": -170.9147186279297, + "logps/rejected": -339.1037902832031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.541336059570312, + "rewards/margins": 15.939701080322266, + "rewards/rejected": -24.481037139892578, + "step": 4898 + }, + { + "epoch": 7.86, + "learning_rate": 7.015457788347205e-08, + "logits/chosen": -1.3790475130081177, + "logits/rejected": -1.3672919273376465, + "logps/chosen": -154.5796661376953, + "logps/rejected": -333.11883544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.381752014160156, + "rewards/margins": 17.15927505493164, + "rewards/rejected": -24.541027069091797, + "step": 4899 + }, + { + "epoch": 7.87, + "learning_rate": 7.0055489496631e-08, + "logits/chosen": -1.526892066001892, + "logits/rejected": -1.5011378526687622, + "logps/chosen": -168.27645874023438, + "logps/rejected": -273.31927490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.904084205627441, + "rewards/margins": 12.019091606140137, + "rewards/rejected": -18.923175811767578, + "step": 4900 + }, + { + "epoch": 7.87, + "learning_rate": 6.995640110978993e-08, + "logits/chosen": -1.5221351385116577, + "logits/rejected": -1.5968172550201416, + "logps/chosen": -186.08580017089844, + "logps/rejected": -318.0239562988281, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.667919158935547, + "rewards/margins": 12.113765716552734, + "rewards/rejected": -21.78168487548828, + "step": 4901 + }, + { + "epoch": 7.87, + "learning_rate": 6.985731272294888e-08, + "logits/chosen": -1.2596631050109863, + "logits/rejected": -1.374326467514038, + "logps/chosen": -121.94046783447266, + "logps/rejected": -317.29559326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3215532302856445, + "rewards/margins": 16.598562240600586, + "rewards/rejected": -21.920116424560547, + "step": 4902 + }, + { + "epoch": 7.87, + "learning_rate": 6.97582243361078e-08, + "logits/chosen": -1.704198956489563, + "logits/rejected": -1.694022536277771, + "logps/chosen": -117.92302703857422, + "logps/rejected": -264.2262878417969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0380120277404785, + "rewards/margins": 14.897703170776367, + "rewards/rejected": -18.935714721679688, + "step": 4903 + }, + { + "epoch": 7.87, + "learning_rate": 6.965913594926674e-08, + "logits/chosen": -1.3551716804504395, + "logits/rejected": -1.3493599891662598, + "logps/chosen": -176.69558715820312, + "logps/rejected": -334.8759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.961166381835938, + "rewards/margins": 15.426485061645508, + "rewards/rejected": -24.387653350830078, + "step": 4904 + }, + { + "epoch": 7.87, + "learning_rate": 6.956004756242569e-08, + "logits/chosen": -1.4815881252288818, + "logits/rejected": -1.5030934810638428, + "logps/chosen": -171.90492248535156, + "logps/rejected": -348.76031494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.492511749267578, + "rewards/margins": 16.735692977905273, + "rewards/rejected": -26.228206634521484, + "step": 4905 + }, + { + "epoch": 7.87, + "learning_rate": 6.946095917558462e-08, + "logits/chosen": -1.452528715133667, + "logits/rejected": -1.5219497680664062, + "logps/chosen": -138.089111328125, + "logps/rejected": -292.38775634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.458005905151367, + "rewards/margins": 14.487727165222168, + "rewards/rejected": -20.94573402404785, + "step": 4906 + }, + { + "epoch": 7.88, + "learning_rate": 6.936187078874356e-08, + "logits/chosen": -1.6059489250183105, + "logits/rejected": -1.5751614570617676, + "logps/chosen": -217.15975952148438, + "logps/rejected": -358.1731262207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.674202919006348, + "rewards/margins": 14.268885612487793, + "rewards/rejected": -24.94308853149414, + "step": 4907 + }, + { + "epoch": 7.88, + "learning_rate": 6.926278240190249e-08, + "logits/chosen": -1.3200490474700928, + "logits/rejected": -1.4186997413635254, + "logps/chosen": -151.211669921875, + "logps/rejected": -319.1556091308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.489862442016602, + "rewards/margins": 14.365056991577148, + "rewards/rejected": -22.854917526245117, + "step": 4908 + }, + { + "epoch": 7.88, + "learning_rate": 6.916369401506144e-08, + "logits/chosen": -1.6514923572540283, + "logits/rejected": -1.5708317756652832, + "logps/chosen": -174.29356384277344, + "logps/rejected": -292.65740966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.79392147064209, + "rewards/margins": 11.790153503417969, + "rewards/rejected": -20.584075927734375, + "step": 4909 + }, + { + "epoch": 7.88, + "learning_rate": 6.906460562822037e-08, + "logits/chosen": -1.308393955230713, + "logits/rejected": -1.2948050498962402, + "logps/chosen": -135.64602661132812, + "logps/rejected": -325.2373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.573548316955566, + "rewards/margins": 17.381759643554688, + "rewards/rejected": -23.95530891418457, + "step": 4910 + }, + { + "epoch": 7.88, + "learning_rate": 6.89655172413793e-08, + "logits/chosen": -1.3928608894348145, + "logits/rejected": -1.3743236064910889, + "logps/chosen": -157.57521057128906, + "logps/rejected": -310.216552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3480024337768555, + "rewards/margins": 14.799158096313477, + "rewards/rejected": -21.14716148376465, + "step": 4911 + }, + { + "epoch": 7.88, + "learning_rate": 6.886642885453825e-08, + "logits/chosen": -1.4388487339019775, + "logits/rejected": -1.4170498847961426, + "logps/chosen": -171.99237060546875, + "logps/rejected": -310.688720703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.627808570861816, + "rewards/margins": 14.217021942138672, + "rewards/rejected": -21.844829559326172, + "step": 4912 + }, + { + "epoch": 7.89, + "learning_rate": 6.876734046769718e-08, + "logits/chosen": -1.570708990097046, + "logits/rejected": -1.557126522064209, + "logps/chosen": -117.17453002929688, + "logps/rejected": -261.977783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.515327453613281, + "rewards/margins": 13.855026245117188, + "rewards/rejected": -18.37035369873047, + "step": 4913 + }, + { + "epoch": 7.89, + "learning_rate": 6.866825208085613e-08, + "logits/chosen": -1.445966362953186, + "logits/rejected": -1.4163416624069214, + "logps/chosen": -185.894287109375, + "logps/rejected": -341.7824401855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.783063888549805, + "rewards/margins": 17.014976501464844, + "rewards/rejected": -26.798038482666016, + "step": 4914 + }, + { + "epoch": 7.89, + "learning_rate": 6.856916369401505e-08, + "logits/chosen": -1.4198733568191528, + "logits/rejected": -1.5225330591201782, + "logps/chosen": -137.33880615234375, + "logps/rejected": -267.93701171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.230825901031494, + "rewards/margins": 11.987138748168945, + "rewards/rejected": -18.217966079711914, + "step": 4915 + }, + { + "epoch": 7.89, + "learning_rate": 6.8470075307174e-08, + "logits/chosen": -1.572982907295227, + "logits/rejected": -1.5584124326705933, + "logps/chosen": -150.33554077148438, + "logps/rejected": -269.40008544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.301774978637695, + "rewards/margins": 12.810094833374023, + "rewards/rejected": -19.11186981201172, + "step": 4916 + }, + { + "epoch": 7.89, + "learning_rate": 6.837098692033294e-08, + "logits/chosen": -1.45878005027771, + "logits/rejected": -1.4536166191101074, + "logps/chosen": -143.82791137695312, + "logps/rejected": -271.90301513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.52540397644043, + "rewards/margins": 13.98352336883545, + "rewards/rejected": -19.508926391601562, + "step": 4917 + }, + { + "epoch": 7.89, + "learning_rate": 6.827189853349187e-08, + "logits/chosen": -1.3929550647735596, + "logits/rejected": -1.3690567016601562, + "logps/chosen": -164.65451049804688, + "logps/rejected": -297.92333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.360965728759766, + "rewards/margins": 12.315488815307617, + "rewards/rejected": -19.676454544067383, + "step": 4918 + }, + { + "epoch": 7.9, + "learning_rate": 6.817281014665081e-08, + "logits/chosen": -1.465235948562622, + "logits/rejected": -1.474284291267395, + "logps/chosen": -117.70922088623047, + "logps/rejected": -274.656494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.360558032989502, + "rewards/margins": 15.549505233764648, + "rewards/rejected": -20.910064697265625, + "step": 4919 + }, + { + "epoch": 7.9, + "learning_rate": 6.807372175980974e-08, + "logits/chosen": -1.3068559169769287, + "logits/rejected": -1.367431640625, + "logps/chosen": -149.6103515625, + "logps/rejected": -304.3005065917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6090850830078125, + "rewards/margins": 14.959970474243164, + "rewards/rejected": -21.569055557250977, + "step": 4920 + }, + { + "epoch": 7.9, + "learning_rate": 6.797463337296869e-08, + "logits/chosen": -1.7524347305297852, + "logits/rejected": -1.7123677730560303, + "logps/chosen": -100.44873046875, + "logps/rejected": -278.62139892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.124645948410034, + "rewards/margins": 17.43526840209961, + "rewards/rejected": -20.55991554260254, + "step": 4921 + }, + { + "epoch": 7.9, + "learning_rate": 6.787554498612762e-08, + "logits/chosen": -1.2780205011367798, + "logits/rejected": -1.2836096286773682, + "logps/chosen": -182.94839477539062, + "logps/rejected": -312.30133056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.063398361206055, + "rewards/margins": 11.68855094909668, + "rewards/rejected": -22.751949310302734, + "step": 4922 + }, + { + "epoch": 7.9, + "learning_rate": 6.777645659928655e-08, + "logits/chosen": -1.6318585872650146, + "logits/rejected": -1.618431806564331, + "logps/chosen": -163.79794311523438, + "logps/rejected": -307.68353271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.305566787719727, + "rewards/margins": 13.616195678710938, + "rewards/rejected": -21.921762466430664, + "step": 4923 + }, + { + "epoch": 7.9, + "learning_rate": 6.76773682124455e-08, + "logits/chosen": -1.4422380924224854, + "logits/rejected": -1.549297571182251, + "logps/chosen": -185.56094360351562, + "logps/rejected": -311.69195556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.67754077911377, + "rewards/margins": 12.493606567382812, + "rewards/rejected": -22.1711483001709, + "step": 4924 + }, + { + "epoch": 7.91, + "learning_rate": 6.757827982560443e-08, + "logits/chosen": -1.5294691324234009, + "logits/rejected": -1.4318064451217651, + "logps/chosen": -161.6126708984375, + "logps/rejected": -322.1305236816406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.884021759033203, + "rewards/margins": 16.34178924560547, + "rewards/rejected": -22.225811004638672, + "step": 4925 + }, + { + "epoch": 7.91, + "learning_rate": 6.747919143876338e-08, + "logits/chosen": -1.6290870904922485, + "logits/rejected": -1.5757665634155273, + "logps/chosen": -172.023193359375, + "logps/rejected": -281.43048095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.551963806152344, + "rewards/margins": 13.084405899047852, + "rewards/rejected": -20.636369705200195, + "step": 4926 + }, + { + "epoch": 7.91, + "learning_rate": 6.738010305192231e-08, + "logits/chosen": -1.736436128616333, + "logits/rejected": -1.5924285650253296, + "logps/chosen": -134.4371795654297, + "logps/rejected": -230.40011596679688, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.368408679962158, + "rewards/margins": 12.347579002380371, + "rewards/rejected": -16.715988159179688, + "step": 4927 + }, + { + "epoch": 7.91, + "learning_rate": 6.728101466508125e-08, + "logits/chosen": -1.6860144138336182, + "logits/rejected": -1.629030704498291, + "logps/chosen": -152.30418395996094, + "logps/rejected": -262.88812255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.742213726043701, + "rewards/margins": 10.183951377868652, + "rewards/rejected": -17.926166534423828, + "step": 4928 + }, + { + "epoch": 7.91, + "learning_rate": 6.718192627824018e-08, + "logits/chosen": -1.386312484741211, + "logits/rejected": -1.4824061393737793, + "logps/chosen": -175.2683868408203, + "logps/rejected": -339.5726318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.86451530456543, + "rewards/margins": 13.59428596496582, + "rewards/rejected": -22.458803176879883, + "step": 4929 + }, + { + "epoch": 7.91, + "learning_rate": 6.708283789139913e-08, + "logits/chosen": -1.4574838876724243, + "logits/rejected": -1.4437687397003174, + "logps/chosen": -175.93087768554688, + "logps/rejected": -319.0188903808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.888320446014404, + "rewards/margins": 14.431939125061035, + "rewards/rejected": -21.320261001586914, + "step": 4930 + }, + { + "epoch": 7.91, + "learning_rate": 6.698374950455807e-08, + "logits/chosen": -1.5819079875946045, + "logits/rejected": -1.6094446182250977, + "logps/chosen": -181.61605834960938, + "logps/rejected": -360.66424560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.39846420288086, + "rewards/margins": 15.379329681396484, + "rewards/rejected": -25.777793884277344, + "step": 4931 + }, + { + "epoch": 7.92, + "learning_rate": 6.6884661117717e-08, + "logits/chosen": -1.4996777772903442, + "logits/rejected": -1.4895281791687012, + "logps/chosen": -172.89317321777344, + "logps/rejected": -331.88507080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.344186782836914, + "rewards/margins": 15.572493553161621, + "rewards/rejected": -23.91668128967285, + "step": 4932 + }, + { + "epoch": 7.92, + "learning_rate": 6.678557273087594e-08, + "logits/chosen": -1.3394207954406738, + "logits/rejected": -1.2772724628448486, + "logps/chosen": -199.49844360351562, + "logps/rejected": -331.49835205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.969564437866211, + "rewards/margins": 14.164509773254395, + "rewards/rejected": -25.13407325744629, + "step": 4933 + }, + { + "epoch": 7.92, + "learning_rate": 6.668648434403487e-08, + "logits/chosen": -1.3203749656677246, + "logits/rejected": -1.3941376209259033, + "logps/chosen": -160.28851318359375, + "logps/rejected": -311.13397216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.571490287780762, + "rewards/margins": 14.694190979003906, + "rewards/rejected": -22.265682220458984, + "step": 4934 + }, + { + "epoch": 7.92, + "learning_rate": 6.658739595719382e-08, + "logits/chosen": -1.5060335397720337, + "logits/rejected": -1.550856113433838, + "logps/chosen": -180.84442138671875, + "logps/rejected": -322.32696533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.170495986938477, + "rewards/margins": 13.701642990112305, + "rewards/rejected": -22.87213897705078, + "step": 4935 + }, + { + "epoch": 7.92, + "learning_rate": 6.648830757035275e-08, + "logits/chosen": -1.4521610736846924, + "logits/rejected": -1.502049446105957, + "logps/chosen": -171.68682861328125, + "logps/rejected": -348.28533935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.277660369873047, + "rewards/margins": 13.672930717468262, + "rewards/rejected": -22.950590133666992, + "step": 4936 + }, + { + "epoch": 7.92, + "learning_rate": 6.638921918351169e-08, + "logits/chosen": -1.3623425960540771, + "logits/rejected": -1.4009084701538086, + "logps/chosen": -181.5767059326172, + "logps/rejected": -348.095947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.904523849487305, + "rewards/margins": 15.734140396118164, + "rewards/rejected": -26.63866424560547, + "step": 4937 + }, + { + "epoch": 7.93, + "learning_rate": 6.629013079667063e-08, + "logits/chosen": -1.4777913093566895, + "logits/rejected": -1.4538905620574951, + "logps/chosen": -160.15347290039062, + "logps/rejected": -320.8857727050781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.095374584197998, + "rewards/margins": 16.72042465209961, + "rewards/rejected": -23.815799713134766, + "step": 4938 + }, + { + "epoch": 7.93, + "learning_rate": 6.619104240982957e-08, + "logits/chosen": -1.6272486448287964, + "logits/rejected": -1.672382116317749, + "logps/chosen": -144.9543914794922, + "logps/rejected": -276.45721435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.407447814941406, + "rewards/margins": 12.101672172546387, + "rewards/rejected": -19.50912094116211, + "step": 4939 + }, + { + "epoch": 7.93, + "learning_rate": 6.60919540229885e-08, + "logits/chosen": -1.4902807474136353, + "logits/rejected": -1.4490572214126587, + "logps/chosen": -180.0673065185547, + "logps/rejected": -291.7605895996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.824848175048828, + "rewards/margins": 12.952550888061523, + "rewards/rejected": -20.77739906311035, + "step": 4940 + }, + { + "epoch": 7.93, + "learning_rate": 6.599286563614743e-08, + "logits/chosen": -1.489199161529541, + "logits/rejected": -1.5155024528503418, + "logps/chosen": -155.2980499267578, + "logps/rejected": -304.07159423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.514796257019043, + "rewards/margins": 15.524482727050781, + "rewards/rejected": -22.039278030395508, + "step": 4941 + }, + { + "epoch": 7.93, + "learning_rate": 6.589377724930638e-08, + "logits/chosen": -1.517151117324829, + "logits/rejected": -1.4745386838912964, + "logps/chosen": -136.1370391845703, + "logps/rejected": -288.864990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.968955039978027, + "rewards/margins": 15.656716346740723, + "rewards/rejected": -20.62567138671875, + "step": 4942 + }, + { + "epoch": 7.93, + "learning_rate": 6.579468886246533e-08, + "logits/chosen": -1.60651433467865, + "logits/rejected": -1.5080313682556152, + "logps/chosen": -139.59405517578125, + "logps/rejected": -308.830810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.950434684753418, + "rewards/margins": 15.956064224243164, + "rewards/rejected": -21.9064998626709, + "step": 4943 + }, + { + "epoch": 7.94, + "learning_rate": 6.569560047562425e-08, + "logits/chosen": -1.5636556148529053, + "logits/rejected": -1.5785789489746094, + "logps/chosen": -164.36764526367188, + "logps/rejected": -345.20263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.680541038513184, + "rewards/margins": 16.847183227539062, + "rewards/rejected": -23.527725219726562, + "step": 4944 + }, + { + "epoch": 7.94, + "learning_rate": 6.559651208878319e-08, + "logits/chosen": -1.4103288650512695, + "logits/rejected": -1.3671824932098389, + "logps/chosen": -154.36090087890625, + "logps/rejected": -307.64276123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8948469161987305, + "rewards/margins": 14.568997383117676, + "rewards/rejected": -22.463844299316406, + "step": 4945 + }, + { + "epoch": 7.94, + "learning_rate": 6.549742370194213e-08, + "logits/chosen": -1.4077417850494385, + "logits/rejected": -1.4079010486602783, + "logps/chosen": -186.15493774414062, + "logps/rejected": -333.4803466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.2022123336792, + "rewards/margins": 15.131638526916504, + "rewards/rejected": -23.333852767944336, + "step": 4946 + }, + { + "epoch": 7.94, + "learning_rate": 6.539833531510107e-08, + "logits/chosen": -1.2601714134216309, + "logits/rejected": -1.3539129495620728, + "logps/chosen": -135.70098876953125, + "logps/rejected": -254.24169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.175784587860107, + "rewards/margins": 11.772151947021484, + "rewards/rejected": -17.94793701171875, + "step": 4947 + }, + { + "epoch": 7.94, + "learning_rate": 6.529924692825999e-08, + "logits/chosen": -1.3664238452911377, + "logits/rejected": -1.5036628246307373, + "logps/chosen": -156.41690063476562, + "logps/rejected": -362.9529113769531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.37971830368042, + "rewards/margins": 18.131986618041992, + "rewards/rejected": -25.511703491210938, + "step": 4948 + }, + { + "epoch": 7.94, + "learning_rate": 6.520015854141894e-08, + "logits/chosen": -1.426912546157837, + "logits/rejected": -1.5171469449996948, + "logps/chosen": -137.11573791503906, + "logps/rejected": -274.56158447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1341753005981445, + "rewards/margins": 13.728302001953125, + "rewards/rejected": -18.862478256225586, + "step": 4949 + }, + { + "epoch": 7.95, + "learning_rate": 6.510107015457789e-08, + "logits/chosen": -1.555643081665039, + "logits/rejected": -1.5726553201675415, + "logps/chosen": -142.3778076171875, + "logps/rejected": -265.91009521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.54888916015625, + "rewards/margins": 11.924670219421387, + "rewards/rejected": -18.473560333251953, + "step": 4950 + }, + { + "epoch": 7.95, + "learning_rate": 6.500198176773682e-08, + "logits/chosen": -1.430159568786621, + "logits/rejected": -1.4085278511047363, + "logps/chosen": -187.0888671875, + "logps/rejected": -313.93096923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.806796073913574, + "rewards/margins": 14.09773063659668, + "rewards/rejected": -22.90452766418457, + "step": 4951 + }, + { + "epoch": 7.95, + "learning_rate": 6.490289338089577e-08, + "logits/chosen": -1.4882748126983643, + "logits/rejected": -1.4203832149505615, + "logps/chosen": -183.2484893798828, + "logps/rejected": -329.5894775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.050426483154297, + "rewards/margins": 15.854513168334961, + "rewards/rejected": -23.904939651489258, + "step": 4952 + }, + { + "epoch": 7.95, + "learning_rate": 6.480380499405469e-08, + "logits/chosen": -1.3607265949249268, + "logits/rejected": -1.4896742105484009, + "logps/chosen": -116.90552520751953, + "logps/rejected": -325.79290771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.151294708251953, + "rewards/margins": 16.765911102294922, + "rewards/rejected": -21.917205810546875, + "step": 4953 + }, + { + "epoch": 7.95, + "learning_rate": 6.470471660721363e-08, + "logits/chosen": -1.4554328918457031, + "logits/rejected": -1.5189085006713867, + "logps/chosen": -195.41641235351562, + "logps/rejected": -334.3274230957031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.564616203308105, + "rewards/margins": 13.46866226196289, + "rewards/rejected": -23.033279418945312, + "step": 4954 + }, + { + "epoch": 7.95, + "learning_rate": 6.460562822037257e-08, + "logits/chosen": -1.2886685132980347, + "logits/rejected": -1.3147509098052979, + "logps/chosen": -144.1387939453125, + "logps/rejected": -280.32208251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4489850997924805, + "rewards/margins": 15.213923454284668, + "rewards/rejected": -18.66290855407715, + "step": 4955 + }, + { + "epoch": 7.96, + "learning_rate": 6.450653983353151e-08, + "logits/chosen": -1.4308241605758667, + "logits/rejected": -1.3839426040649414, + "logps/chosen": -123.96199798583984, + "logps/rejected": -235.07562255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0044426918029785, + "rewards/margins": 11.59727668762207, + "rewards/rejected": -16.60171890258789, + "step": 4956 + }, + { + "epoch": 7.96, + "learning_rate": 6.440745144669045e-08, + "logits/chosen": -1.5133806467056274, + "logits/rejected": -1.4385645389556885, + "logps/chosen": -124.51720428466797, + "logps/rejected": -243.2098388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.224242210388184, + "rewards/margins": 12.646797180175781, + "rewards/rejected": -17.87104034423828, + "step": 4957 + }, + { + "epoch": 7.96, + "learning_rate": 6.430836305984938e-08, + "logits/chosen": -1.5922462940216064, + "logits/rejected": -1.5461504459381104, + "logps/chosen": -129.0787353515625, + "logps/rejected": -269.197998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.945950031280518, + "rewards/margins": 14.44717025756836, + "rewards/rejected": -20.39312171936035, + "step": 4958 + }, + { + "epoch": 7.96, + "learning_rate": 6.420927467300833e-08, + "logits/chosen": -1.3654509782791138, + "logits/rejected": -1.4389649629592896, + "logps/chosen": -146.37771606445312, + "logps/rejected": -268.5397644042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4635701179504395, + "rewards/margins": 13.40743637084961, + "rewards/rejected": -18.871004104614258, + "step": 4959 + }, + { + "epoch": 7.96, + "learning_rate": 6.411018628616726e-08, + "logits/chosen": -1.6731510162353516, + "logits/rejected": -1.6004998683929443, + "logps/chosen": -166.66026306152344, + "logps/rejected": -320.0062561035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.881400108337402, + "rewards/margins": 15.552593231201172, + "rewards/rejected": -23.43399429321289, + "step": 4960 + }, + { + "epoch": 7.96, + "learning_rate": 6.401109789932619e-08, + "logits/chosen": -1.4367592334747314, + "logits/rejected": -1.4243688583374023, + "logps/chosen": -157.8418731689453, + "logps/rejected": -297.84539794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.967150688171387, + "rewards/margins": 14.239580154418945, + "rewards/rejected": -21.20673179626465, + "step": 4961 + }, + { + "epoch": 7.96, + "learning_rate": 6.391200951248514e-08, + "logits/chosen": -1.6338472366333008, + "logits/rejected": -1.6681883335113525, + "logps/chosen": -113.20653533935547, + "logps/rejected": -281.8238830566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4965133666992188, + "rewards/margins": 15.922796249389648, + "rewards/rejected": -19.419309616088867, + "step": 4962 + }, + { + "epoch": 7.97, + "learning_rate": 6.381292112564407e-08, + "logits/chosen": -1.7049845457077026, + "logits/rejected": -1.6671713590621948, + "logps/chosen": -109.30989837646484, + "logps/rejected": -213.17506408691406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.086953163146973, + "rewards/margins": 10.633581161499023, + "rewards/rejected": -14.72053337097168, + "step": 4963 + }, + { + "epoch": 7.97, + "learning_rate": 6.371383273880302e-08, + "logits/chosen": -1.6931822299957275, + "logits/rejected": -1.6428049802780151, + "logps/chosen": -112.86708068847656, + "logps/rejected": -235.69680786132812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9381697177886963, + "rewards/margins": 11.91849136352539, + "rewards/rejected": -15.856659889221191, + "step": 4964 + }, + { + "epoch": 7.97, + "learning_rate": 6.361474435196194e-08, + "logits/chosen": -1.3625528812408447, + "logits/rejected": -1.468430995941162, + "logps/chosen": -141.3026885986328, + "logps/rejected": -324.77508544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.184837341308594, + "rewards/margins": 14.380599975585938, + "rewards/rejected": -19.56543731689453, + "step": 4965 + }, + { + "epoch": 7.97, + "learning_rate": 6.351565596512088e-08, + "logits/chosen": -1.6710295677185059, + "logits/rejected": -1.4180018901824951, + "logps/chosen": -213.15365600585938, + "logps/rejected": -264.39947509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.823093414306641, + "rewards/margins": 12.569363594055176, + "rewards/rejected": -20.392457962036133, + "step": 4966 + }, + { + "epoch": 7.97, + "learning_rate": 6.341656757827982e-08, + "logits/chosen": -1.2967956066131592, + "logits/rejected": -1.3344626426696777, + "logps/chosen": -146.22398376464844, + "logps/rejected": -300.6174621582031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.068172454833984, + "rewards/margins": 14.251825332641602, + "rewards/rejected": -21.319997787475586, + "step": 4967 + }, + { + "epoch": 7.97, + "learning_rate": 6.331747919143876e-08, + "logits/chosen": -1.384425163269043, + "logits/rejected": -1.426023244857788, + "logps/chosen": -140.7677001953125, + "logps/rejected": -315.2598876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.485176086425781, + "rewards/margins": 14.645751953125, + "rewards/rejected": -21.13092803955078, + "step": 4968 + }, + { + "epoch": 7.98, + "learning_rate": 6.321839080459771e-08, + "logits/chosen": -1.4688713550567627, + "logits/rejected": -1.3148144483566284, + "logps/chosen": -204.29237365722656, + "logps/rejected": -277.81982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.133615493774414, + "rewards/margins": 12.297032356262207, + "rewards/rejected": -20.430646896362305, + "step": 4969 + }, + { + "epoch": 7.98, + "learning_rate": 6.311930241775663e-08, + "logits/chosen": -1.5003223419189453, + "logits/rejected": -1.4820430278778076, + "logps/chosen": -201.69631958007812, + "logps/rejected": -304.2740478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.915660858154297, + "rewards/margins": 10.943458557128906, + "rewards/rejected": -19.859119415283203, + "step": 4970 + }, + { + "epoch": 7.98, + "learning_rate": 6.302021403091558e-08, + "logits/chosen": -1.5471842288970947, + "logits/rejected": -1.5335127115249634, + "logps/chosen": -198.89312744140625, + "logps/rejected": -271.3827819824219, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.205077171325684, + "rewards/margins": 9.3900728225708, + "rewards/rejected": -18.595149993896484, + "step": 4971 + }, + { + "epoch": 7.98, + "learning_rate": 6.292112564407451e-08, + "logits/chosen": -1.3199806213378906, + "logits/rejected": -1.2877893447875977, + "logps/chosen": -141.52639770507812, + "logps/rejected": -253.82131958007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.901370048522949, + "rewards/margins": 12.347278594970703, + "rewards/rejected": -18.24864959716797, + "step": 4972 + }, + { + "epoch": 7.98, + "learning_rate": 6.282203725723346e-08, + "logits/chosen": -1.6656053066253662, + "logits/rejected": -1.5141671895980835, + "logps/chosen": -161.15919494628906, + "logps/rejected": -288.236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.856696605682373, + "rewards/margins": 13.049251556396484, + "rewards/rejected": -19.905948638916016, + "step": 4973 + }, + { + "epoch": 7.98, + "learning_rate": 6.272294887039238e-08, + "logits/chosen": -1.6660563945770264, + "logits/rejected": -1.6018450260162354, + "logps/chosen": -148.5950469970703, + "logps/rejected": -271.9561462402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2045698165893555, + "rewards/margins": 13.365599632263184, + "rewards/rejected": -19.570167541503906, + "step": 4974 + }, + { + "epoch": 7.99, + "learning_rate": 6.262386048355132e-08, + "logits/chosen": -1.5474579334259033, + "logits/rejected": -1.5740324258804321, + "logps/chosen": -184.74191284179688, + "logps/rejected": -305.7305908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.559179306030273, + "rewards/margins": 12.886354446411133, + "rewards/rejected": -22.445531845092773, + "step": 4975 + }, + { + "epoch": 7.99, + "learning_rate": 6.252477209671027e-08, + "logits/chosen": -1.4101706743240356, + "logits/rejected": -1.4202852249145508, + "logps/chosen": -105.90115356445312, + "logps/rejected": -262.1482238769531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.630397796630859, + "rewards/margins": 13.885977745056152, + "rewards/rejected": -19.516376495361328, + "step": 4976 + }, + { + "epoch": 7.99, + "learning_rate": 6.24256837098692e-08, + "logits/chosen": -1.5503305196762085, + "logits/rejected": -1.575042724609375, + "logps/chosen": -167.067626953125, + "logps/rejected": -299.2735290527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.530710220336914, + "rewards/margins": 12.551409721374512, + "rewards/rejected": -21.082120895385742, + "step": 4977 + }, + { + "epoch": 7.99, + "learning_rate": 6.232659532302814e-08, + "logits/chosen": -1.6049646139144897, + "logits/rejected": -1.683699369430542, + "logps/chosen": -151.47415161132812, + "logps/rejected": -326.3045654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.738122940063477, + "rewards/margins": 16.468547821044922, + "rewards/rejected": -23.206668853759766, + "step": 4978 + }, + { + "epoch": 7.99, + "learning_rate": 6.222750693618708e-08, + "logits/chosen": -1.3567569255828857, + "logits/rejected": -1.4126079082489014, + "logps/chosen": -180.74627685546875, + "logps/rejected": -307.66693115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.347441673278809, + "rewards/margins": 12.836677551269531, + "rewards/rejected": -22.184120178222656, + "step": 4979 + }, + { + "epoch": 7.99, + "learning_rate": 6.212841854934602e-08, + "logits/chosen": -1.575981616973877, + "logits/rejected": -1.59954035282135, + "logps/chosen": -144.99526977539062, + "logps/rejected": -273.07342529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.455984115600586, + "rewards/margins": 12.769399642944336, + "rewards/rejected": -18.225383758544922, + "step": 4980 + }, + { + "epoch": 8.0, + "learning_rate": 6.202933016250495e-08, + "logits/chosen": -1.5606162548065186, + "logits/rejected": -1.5900264978408813, + "logps/chosen": -169.50506591796875, + "logps/rejected": -302.9367370605469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.332746505737305, + "rewards/margins": 13.671635627746582, + "rewards/rejected": -23.004383087158203, + "step": 4981 + }, + { + "epoch": 8.0, + "learning_rate": 6.193024177566388e-08, + "logits/chosen": -1.5249961614608765, + "logits/rejected": -1.5267322063446045, + "logps/chosen": -169.36351013183594, + "logps/rejected": -318.2764892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.830323219299316, + "rewards/margins": 15.454629898071289, + "rewards/rejected": -23.28495216369629, + "step": 4982 + }, + { + "epoch": 8.0, + "learning_rate": 6.183115338882283e-08, + "logits/chosen": -1.5648270845413208, + "logits/rejected": -1.610274076461792, + "logps/chosen": -205.89422607421875, + "logps/rejected": -363.3619384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.861175537109375, + "rewards/margins": 15.284310340881348, + "rewards/rejected": -27.145484924316406, + "step": 4983 + }, + { + "epoch": 8.0, + "learning_rate": 6.173206500198176e-08, + "logits/chosen": -1.54854154586792, + "logits/rejected": -1.5049291849136353, + "logps/chosen": -168.19802856445312, + "logps/rejected": -331.0521240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.510188102722168, + "rewards/margins": 17.429115295410156, + "rewards/rejected": -23.93930435180664, + "step": 4984 + }, + { + "epoch": 8.0, + "learning_rate": 6.163297661514071e-08, + "logits/chosen": -1.5311787128448486, + "logits/rejected": -1.5140671730041504, + "logps/chosen": -180.87197875976562, + "logps/rejected": -316.3639831542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.322641372680664, + "rewards/margins": 13.630735397338867, + "rewards/rejected": -22.95337677001953, + "step": 4985 + }, + { + "epoch": 8.0, + "learning_rate": 6.153388822829964e-08, + "logits/chosen": -1.4589574337005615, + "logits/rejected": -1.4391427040100098, + "logps/chosen": -173.80447387695312, + "logps/rejected": -320.8588562011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.797813415527344, + "rewards/margins": 14.803401947021484, + "rewards/rejected": -23.601215362548828, + "step": 4986 + }, + { + "epoch": 8.0, + "learning_rate": 6.143479984145858e-08, + "logits/chosen": -1.5569584369659424, + "logits/rejected": -1.4803327322006226, + "logps/chosen": -166.62579345703125, + "logps/rejected": -265.63787841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.664750099182129, + "rewards/margins": 11.281028747558594, + "rewards/rejected": -17.945777893066406, + "step": 4987 + }, + { + "epoch": 8.01, + "learning_rate": 6.133571145461751e-08, + "logits/chosen": -1.2887532711029053, + "logits/rejected": -1.3380173444747925, + "logps/chosen": -118.62287902832031, + "logps/rejected": -267.23309326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.078327178955078, + "rewards/margins": 13.959972381591797, + "rewards/rejected": -19.038301467895508, + "step": 4988 + }, + { + "epoch": 8.01, + "learning_rate": 6.123662306777646e-08, + "logits/chosen": -1.4412992000579834, + "logits/rejected": -1.4180266857147217, + "logps/chosen": -168.19607543945312, + "logps/rejected": -313.91058349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.206597328186035, + "rewards/margins": 13.737187385559082, + "rewards/rejected": -21.943784713745117, + "step": 4989 + }, + { + "epoch": 8.01, + "learning_rate": 6.113753468093539e-08, + "logits/chosen": -1.300450325012207, + "logits/rejected": -1.3777892589569092, + "logps/chosen": -175.92239379882812, + "logps/rejected": -366.51141357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.123052597045898, + "rewards/margins": 17.542163848876953, + "rewards/rejected": -25.665218353271484, + "step": 4990 + }, + { + "epoch": 8.01, + "learning_rate": 6.103844629409434e-08, + "logits/chosen": -1.442967414855957, + "logits/rejected": -1.3799229860305786, + "logps/chosen": -81.39944458007812, + "logps/rejected": -200.34912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6719273328781128, + "rewards/margins": 13.236421585083008, + "rewards/rejected": -14.90834903717041, + "step": 4991 + }, + { + "epoch": 8.01, + "learning_rate": 6.093935790725327e-08, + "logits/chosen": -1.4674681425094604, + "logits/rejected": -1.4519776105880737, + "logps/chosen": -215.80662536621094, + "logps/rejected": -351.411376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.270995140075684, + "rewards/margins": 17.03670883178711, + "rewards/rejected": -26.307703018188477, + "step": 4992 + }, + { + "epoch": 8.01, + "learning_rate": 6.08402695204122e-08, + "logits/chosen": -1.44843590259552, + "logits/rejected": -1.463646650314331, + "logps/chosen": -128.83209228515625, + "logps/rejected": -233.4813232421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.037652969360352, + "rewards/margins": 10.914144515991211, + "rewards/rejected": -15.951797485351562, + "step": 4993 + }, + { + "epoch": 8.02, + "learning_rate": 6.074118113357115e-08, + "logits/chosen": -1.4362345933914185, + "logits/rejected": -1.461738109588623, + "logps/chosen": -126.75273132324219, + "logps/rejected": -288.6739807128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.79638671875, + "rewards/margins": 15.205348014831543, + "rewards/rejected": -21.001733779907227, + "step": 4994 + }, + { + "epoch": 8.02, + "learning_rate": 6.064209274673008e-08, + "logits/chosen": -1.455554485321045, + "logits/rejected": -1.4738891124725342, + "logps/chosen": -179.00338745117188, + "logps/rejected": -311.30364990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.892496109008789, + "rewards/margins": 13.97335433959961, + "rewards/rejected": -22.865848541259766, + "step": 4995 + }, + { + "epoch": 8.02, + "learning_rate": 6.054300435988902e-08, + "logits/chosen": -1.6714742183685303, + "logits/rejected": -1.6012918949127197, + "logps/chosen": -157.69412231445312, + "logps/rejected": -289.41217041015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.400862693786621, + "rewards/margins": 13.939992904663086, + "rewards/rejected": -20.34085464477539, + "step": 4996 + }, + { + "epoch": 8.02, + "learning_rate": 6.044391597304795e-08, + "logits/chosen": -1.47527015209198, + "logits/rejected": -1.5775623321533203, + "logps/chosen": -126.01803588867188, + "logps/rejected": -257.599853515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.485997200012207, + "rewards/margins": 11.969842910766602, + "rewards/rejected": -18.455841064453125, + "step": 4997 + }, + { + "epoch": 8.02, + "learning_rate": 6.03448275862069e-08, + "logits/chosen": -1.6175439357757568, + "logits/rejected": -1.5628142356872559, + "logps/chosen": -95.38114166259766, + "logps/rejected": -195.00906372070312, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.452406406402588, + "rewards/margins": 11.002397537231445, + "rewards/rejected": -14.454805374145508, + "step": 4998 + }, + { + "epoch": 8.02, + "learning_rate": 6.024573919936583e-08, + "logits/chosen": -1.3363347053527832, + "logits/rejected": -1.3700473308563232, + "logps/chosen": -167.11839294433594, + "logps/rejected": -290.07281494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.656893730163574, + "rewards/margins": 13.182232856750488, + "rewards/rejected": -20.839126586914062, + "step": 4999 + }, + { + "epoch": 8.03, + "learning_rate": 6.014665081252478e-08, + "logits/chosen": -1.5741082429885864, + "logits/rejected": -1.6684659719467163, + "logps/chosen": -120.42105102539062, + "logps/rejected": -261.6542663574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.292778491973877, + "rewards/margins": 12.81362247467041, + "rewards/rejected": -18.106399536132812, + "step": 5000 + }, + { + "epoch": 8.03, + "learning_rate": 6.004756242568371e-08, + "logits/chosen": -1.4564976692199707, + "logits/rejected": -1.5475804805755615, + "logps/chosen": -152.11944580078125, + "logps/rejected": -331.1118469238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.008017063140869, + "rewards/margins": 16.91470718383789, + "rewards/rejected": -22.92272186279297, + "step": 5001 + }, + { + "epoch": 8.03, + "learning_rate": 5.994847403884264e-08, + "logits/chosen": -1.2886805534362793, + "logits/rejected": -1.3354151248931885, + "logps/chosen": -154.04531860351562, + "logps/rejected": -312.11102294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.464323997497559, + "rewards/margins": 14.941532135009766, + "rewards/rejected": -22.405855178833008, + "step": 5002 + }, + { + "epoch": 8.03, + "learning_rate": 5.984938565200158e-08, + "logits/chosen": -1.4713118076324463, + "logits/rejected": -1.5071650743484497, + "logps/chosen": -166.52761840820312, + "logps/rejected": -311.44915771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.020369529724121, + "rewards/margins": 12.726841926574707, + "rewards/rejected": -21.747211456298828, + "step": 5003 + }, + { + "epoch": 8.03, + "learning_rate": 5.975029726516052e-08, + "logits/chosen": -1.6593457460403442, + "logits/rejected": -1.597397804260254, + "logps/chosen": -145.5205078125, + "logps/rejected": -290.1800231933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3631391525268555, + "rewards/margins": 15.62936782836914, + "rewards/rejected": -21.992507934570312, + "step": 5004 + }, + { + "epoch": 8.03, + "learning_rate": 5.965120887831946e-08, + "logits/chosen": -1.5022236108779907, + "logits/rejected": -1.4690546989440918, + "logps/chosen": -177.4291534423828, + "logps/rejected": -290.5950012207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.010746955871582, + "rewards/margins": 12.048233032226562, + "rewards/rejected": -21.058979034423828, + "step": 5005 + }, + { + "epoch": 8.04, + "learning_rate": 5.9552120491478395e-08, + "logits/chosen": -1.5052225589752197, + "logits/rejected": -1.553720474243164, + "logps/chosen": -199.36993408203125, + "logps/rejected": -359.786376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.366771697998047, + "rewards/margins": 15.638596534729004, + "rewards/rejected": -26.005369186401367, + "step": 5006 + }, + { + "epoch": 8.04, + "learning_rate": 5.9453032104637335e-08, + "logits/chosen": -1.437647819519043, + "logits/rejected": -1.3140289783477783, + "logps/chosen": -176.91995239257812, + "logps/rejected": -270.0015563964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.503847122192383, + "rewards/margins": 12.328627586364746, + "rewards/rejected": -20.832475662231445, + "step": 5007 + }, + { + "epoch": 8.04, + "learning_rate": 5.935394371779627e-08, + "logits/chosen": -1.516730546951294, + "logits/rejected": -1.546327829360962, + "logps/chosen": -134.669677734375, + "logps/rejected": -344.0626220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.657210350036621, + "rewards/margins": 18.82769775390625, + "rewards/rejected": -25.484909057617188, + "step": 5008 + }, + { + "epoch": 8.04, + "learning_rate": 5.925485533095521e-08, + "logits/chosen": -1.4851889610290527, + "logits/rejected": -1.5952776670455933, + "logps/chosen": -165.61199951171875, + "logps/rejected": -306.33160400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.671361923217773, + "rewards/margins": 12.877870559692383, + "rewards/rejected": -21.549232482910156, + "step": 5009 + }, + { + "epoch": 8.04, + "learning_rate": 5.915576694411414e-08, + "logits/chosen": -1.5311331748962402, + "logits/rejected": -1.4370718002319336, + "logps/chosen": -190.90576171875, + "logps/rejected": -280.8857116699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.194364547729492, + "rewards/margins": 10.498478889465332, + "rewards/rejected": -19.69284439086914, + "step": 5010 + }, + { + "epoch": 8.04, + "learning_rate": 5.905667855727309e-08, + "logits/chosen": -1.484938144683838, + "logits/rejected": -1.5076799392700195, + "logps/chosen": -137.46905517578125, + "logps/rejected": -316.4364318847656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962255001068115, + "rewards/margins": 16.017789840698242, + "rewards/rejected": -21.980045318603516, + "step": 5011 + }, + { + "epoch": 8.04, + "learning_rate": 5.895759017043202e-08, + "logits/chosen": -1.5497853755950928, + "logits/rejected": -1.5418919324874878, + "logps/chosen": -197.76304626464844, + "logps/rejected": -306.16680908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.680582046508789, + "rewards/margins": 11.870372772216797, + "rewards/rejected": -22.55095672607422, + "step": 5012 + }, + { + "epoch": 8.05, + "learning_rate": 5.885850178359096e-08, + "logits/chosen": -1.3082879781723022, + "logits/rejected": -1.306286096572876, + "logps/chosen": -143.9158935546875, + "logps/rejected": -283.1452941894531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0566606521606445, + "rewards/margins": 14.042704582214355, + "rewards/rejected": -21.099363327026367, + "step": 5013 + }, + { + "epoch": 8.05, + "learning_rate": 5.8759413396749895e-08, + "logits/chosen": -1.2717808485031128, + "logits/rejected": -1.2968636751174927, + "logps/chosen": -157.106201171875, + "logps/rejected": -279.19952392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7638397216796875, + "rewards/margins": 12.463899612426758, + "rewards/rejected": -19.227739334106445, + "step": 5014 + }, + { + "epoch": 8.05, + "learning_rate": 5.8660325009908834e-08, + "logits/chosen": -1.6310466527938843, + "logits/rejected": -1.6260137557983398, + "logps/chosen": -140.15975952148438, + "logps/rejected": -283.4669494628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.531830787658691, + "rewards/margins": 13.725627899169922, + "rewards/rejected": -20.257457733154297, + "step": 5015 + }, + { + "epoch": 8.05, + "learning_rate": 5.856123662306777e-08, + "logits/chosen": -1.6143629550933838, + "logits/rejected": -1.6170204877853394, + "logps/chosen": -187.99343872070312, + "logps/rejected": -307.4961853027344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.199689865112305, + "rewards/margins": 13.070430755615234, + "rewards/rejected": -22.27012062072754, + "step": 5016 + }, + { + "epoch": 8.05, + "learning_rate": 5.8462148236226714e-08, + "logits/chosen": -1.482712984085083, + "logits/rejected": -1.4227051734924316, + "logps/chosen": -167.87205505371094, + "logps/rejected": -277.3843078613281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.175382614135742, + "rewards/margins": 12.382181167602539, + "rewards/rejected": -19.55756378173828, + "step": 5017 + }, + { + "epoch": 8.05, + "learning_rate": 5.8363059849385654e-08, + "logits/chosen": -1.4423280954360962, + "logits/rejected": -1.4525494575500488, + "logps/chosen": -170.44740295410156, + "logps/rejected": -271.85394287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.565171241760254, + "rewards/margins": 9.839584350585938, + "rewards/rejected": -19.404756546020508, + "step": 5018 + }, + { + "epoch": 8.06, + "learning_rate": 5.826397146254459e-08, + "logits/chosen": -1.5584708452224731, + "logits/rejected": -1.5922813415527344, + "logps/chosen": -90.78547668457031, + "logps/rejected": -237.15399169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5323948860168457, + "rewards/margins": 13.885372161865234, + "rewards/rejected": -16.417766571044922, + "step": 5019 + }, + { + "epoch": 8.06, + "learning_rate": 5.816488307570353e-08, + "logits/chosen": -1.301735281944275, + "logits/rejected": -1.3792381286621094, + "logps/chosen": -168.683837890625, + "logps/rejected": -282.667724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.480048179626465, + "rewards/margins": 10.640990257263184, + "rewards/rejected": -19.12103843688965, + "step": 5020 + }, + { + "epoch": 8.06, + "learning_rate": 5.806579468886246e-08, + "logits/chosen": -1.7400531768798828, + "logits/rejected": -1.7655744552612305, + "logps/chosen": -113.97752380371094, + "logps/rejected": -265.209228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.333478927612305, + "rewards/margins": 15.259838104248047, + "rewards/rejected": -20.59331703186035, + "step": 5021 + }, + { + "epoch": 8.06, + "learning_rate": 5.79667063020214e-08, + "logits/chosen": -1.4923796653747559, + "logits/rejected": -1.546484112739563, + "logps/chosen": -126.71984100341797, + "logps/rejected": -295.7197570800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.491053581237793, + "rewards/margins": 15.13868236541748, + "rewards/rejected": -21.629737854003906, + "step": 5022 + }, + { + "epoch": 8.06, + "learning_rate": 5.7867617915180334e-08, + "logits/chosen": -1.362676739692688, + "logits/rejected": -1.3625218868255615, + "logps/chosen": -175.61293029785156, + "logps/rejected": -303.1582336425781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.784147262573242, + "rewards/margins": 13.387413024902344, + "rewards/rejected": -23.17156219482422, + "step": 5023 + }, + { + "epoch": 8.06, + "learning_rate": 5.776852952833928e-08, + "logits/chosen": -1.4822663068771362, + "logits/rejected": -1.4683314561843872, + "logps/chosen": -170.95077514648438, + "logps/rejected": -288.95501708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.182988166809082, + "rewards/margins": 12.430891990661621, + "rewards/rejected": -20.613880157470703, + "step": 5024 + }, + { + "epoch": 8.07, + "learning_rate": 5.7669441141498214e-08, + "logits/chosen": -1.4152772426605225, + "logits/rejected": -1.4138387441635132, + "logps/chosen": -142.9827423095703, + "logps/rejected": -280.953369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.155002593994141, + "rewards/margins": 13.590129852294922, + "rewards/rejected": -20.745132446289062, + "step": 5025 + }, + { + "epoch": 8.07, + "learning_rate": 5.7570352754657154e-08, + "logits/chosen": -1.456222414970398, + "logits/rejected": -1.4256058931350708, + "logps/chosen": -156.11849975585938, + "logps/rejected": -307.74884033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.136623859405518, + "rewards/margins": 16.76593780517578, + "rewards/rejected": -22.90256118774414, + "step": 5026 + }, + { + "epoch": 8.07, + "learning_rate": 5.747126436781609e-08, + "logits/chosen": -1.422315001487732, + "logits/rejected": -1.4800069332122803, + "logps/chosen": -187.79588317871094, + "logps/rejected": -334.79937744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.436535835266113, + "rewards/margins": 15.997604370117188, + "rewards/rejected": -24.434139251708984, + "step": 5027 + }, + { + "epoch": 8.07, + "learning_rate": 5.737217598097503e-08, + "logits/chosen": -1.4873183965682983, + "logits/rejected": -1.365874171257019, + "logps/chosen": -172.91888427734375, + "logps/rejected": -314.40185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.924319267272949, + "rewards/margins": 14.928220748901367, + "rewards/rejected": -22.852540969848633, + "step": 5028 + }, + { + "epoch": 8.07, + "learning_rate": 5.727308759413396e-08, + "logits/chosen": -1.3354215621948242, + "logits/rejected": -1.4064228534698486, + "logps/chosen": -162.83189392089844, + "logps/rejected": -303.8578796386719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.677907943725586, + "rewards/margins": 12.015957832336426, + "rewards/rejected": -19.693864822387695, + "step": 5029 + }, + { + "epoch": 8.07, + "learning_rate": 5.717399920729291e-08, + "logits/chosen": -1.539892554283142, + "logits/rejected": -1.6223673820495605, + "logps/chosen": -145.84188842773438, + "logps/rejected": -337.563720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.503178119659424, + "rewards/margins": 17.84561538696289, + "rewards/rejected": -24.348793029785156, + "step": 5030 + }, + { + "epoch": 8.08, + "learning_rate": 5.707491082045184e-08, + "logits/chosen": -1.511381983757019, + "logits/rejected": -1.4424058198928833, + "logps/chosen": -150.05303955078125, + "logps/rejected": -291.4830017089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.549310207366943, + "rewards/margins": 14.710238456726074, + "rewards/rejected": -21.25954818725586, + "step": 5031 + }, + { + "epoch": 8.08, + "learning_rate": 5.697582243361078e-08, + "logits/chosen": -1.4249032735824585, + "logits/rejected": -1.512303113937378, + "logps/chosen": -127.4808120727539, + "logps/rejected": -265.1947937011719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.535235404968262, + "rewards/margins": 12.452552795410156, + "rewards/rejected": -17.987789154052734, + "step": 5032 + }, + { + "epoch": 8.08, + "learning_rate": 5.687673404676971e-08, + "logits/chosen": -1.2953213453292847, + "logits/rejected": -1.3237937688827515, + "logps/chosen": -162.0536346435547, + "logps/rejected": -318.8172607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.727736473083496, + "rewards/margins": 15.681451797485352, + "rewards/rejected": -23.40918731689453, + "step": 5033 + }, + { + "epoch": 8.08, + "learning_rate": 5.677764565992865e-08, + "logits/chosen": -1.5516060590744019, + "logits/rejected": -1.520662784576416, + "logps/chosen": -198.44358825683594, + "logps/rejected": -348.1696472167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.100923538208008, + "rewards/margins": 15.151809692382812, + "rewards/rejected": -25.25273323059082, + "step": 5034 + }, + { + "epoch": 8.08, + "learning_rate": 5.6678557273087586e-08, + "logits/chosen": -1.579177975654602, + "logits/rejected": -1.559841513633728, + "logps/chosen": -140.9101104736328, + "logps/rejected": -286.6640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.182375431060791, + "rewards/margins": 14.258125305175781, + "rewards/rejected": -20.440502166748047, + "step": 5035 + }, + { + "epoch": 8.08, + "learning_rate": 5.657946888624653e-08, + "logits/chosen": -1.5426607131958008, + "logits/rejected": -1.6032276153564453, + "logps/chosen": -167.694091796875, + "logps/rejected": -284.43414306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.116045951843262, + "rewards/margins": 10.494705200195312, + "rewards/rejected": -18.61075210571289, + "step": 5036 + }, + { + "epoch": 8.09, + "learning_rate": 5.6480380499405466e-08, + "logits/chosen": -1.36954927444458, + "logits/rejected": -1.3541929721832275, + "logps/chosen": -145.0393524169922, + "logps/rejected": -315.21649169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.954878330230713, + "rewards/margins": 14.591072082519531, + "rewards/rejected": -21.54595184326172, + "step": 5037 + }, + { + "epoch": 8.09, + "learning_rate": 5.6381292112564406e-08, + "logits/chosen": -1.643929123878479, + "logits/rejected": -1.6987746953964233, + "logps/chosen": -111.92271423339844, + "logps/rejected": -275.7811584472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.963015079498291, + "rewards/margins": 15.289705276489258, + "rewards/rejected": -19.25271987915039, + "step": 5038 + }, + { + "epoch": 8.09, + "learning_rate": 5.6282203725723346e-08, + "logits/chosen": -1.5256891250610352, + "logits/rejected": -1.4382858276367188, + "logps/chosen": -126.73175048828125, + "logps/rejected": -257.52459716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.362931251525879, + "rewards/margins": 14.006728172302246, + "rewards/rejected": -18.369659423828125, + "step": 5039 + }, + { + "epoch": 8.09, + "learning_rate": 5.618311533888228e-08, + "logits/chosen": -1.4876856803894043, + "logits/rejected": -1.5014699697494507, + "logps/chosen": -165.00323486328125, + "logps/rejected": -282.1630554199219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.614208221435547, + "rewards/margins": 11.139680862426758, + "rewards/rejected": -20.753889083862305, + "step": 5040 + }, + { + "epoch": 8.09, + "learning_rate": 5.608402695204122e-08, + "logits/chosen": -1.561333417892456, + "logits/rejected": -1.5315662622451782, + "logps/chosen": -158.8871307373047, + "logps/rejected": -331.2682800292969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.816561222076416, + "rewards/margins": 17.442014694213867, + "rewards/rejected": -23.258575439453125, + "step": 5041 + }, + { + "epoch": 8.09, + "learning_rate": 5.598493856520015e-08, + "logits/chosen": -1.5518509149551392, + "logits/rejected": -1.420501470565796, + "logps/chosen": -204.3192596435547, + "logps/rejected": -263.35943603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.69873332977295, + "rewards/margins": 10.078238487243652, + "rewards/rejected": -18.776973724365234, + "step": 5042 + }, + { + "epoch": 8.09, + "learning_rate": 5.58858501783591e-08, + "logits/chosen": -1.4123389720916748, + "logits/rejected": -1.4243537187576294, + "logps/chosen": -164.47825622558594, + "logps/rejected": -311.9554443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.81943416595459, + "rewards/margins": 14.671142578125, + "rewards/rejected": -22.490577697753906, + "step": 5043 + }, + { + "epoch": 8.1, + "learning_rate": 5.578676179151803e-08, + "logits/chosen": -1.5765647888183594, + "logits/rejected": -1.6028544902801514, + "logps/chosen": -166.08102416992188, + "logps/rejected": -295.8233947753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.654403686523438, + "rewards/margins": 13.038127899169922, + "rewards/rejected": -21.692533493041992, + "step": 5044 + }, + { + "epoch": 8.1, + "learning_rate": 5.568767340467697e-08, + "logits/chosen": -1.489105463027954, + "logits/rejected": -1.5064350366592407, + "logps/chosen": -160.70042419433594, + "logps/rejected": -285.38397216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.599658966064453, + "rewards/margins": 13.865150451660156, + "rewards/rejected": -22.46480941772461, + "step": 5045 + }, + { + "epoch": 8.1, + "learning_rate": 5.5588585017835906e-08, + "logits/chosen": -1.5630371570587158, + "logits/rejected": -1.4994441270828247, + "logps/chosen": -193.54808044433594, + "logps/rejected": -293.2505798339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.045967102050781, + "rewards/margins": 11.9987154006958, + "rewards/rejected": -22.0446834564209, + "step": 5046 + }, + { + "epoch": 8.1, + "learning_rate": 5.5489496630994846e-08, + "logits/chosen": -1.538164734840393, + "logits/rejected": -1.4827446937561035, + "logps/chosen": -192.9226837158203, + "logps/rejected": -320.33245849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.058818817138672, + "rewards/margins": 13.763336181640625, + "rewards/rejected": -22.822154998779297, + "step": 5047 + }, + { + "epoch": 8.1, + "learning_rate": 5.539040824415378e-08, + "logits/chosen": -1.3537883758544922, + "logits/rejected": -1.3938060998916626, + "logps/chosen": -164.91629028320312, + "logps/rejected": -321.3673095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.238584518432617, + "rewards/margins": 13.330362319946289, + "rewards/rejected": -22.568946838378906, + "step": 5048 + }, + { + "epoch": 8.1, + "learning_rate": 5.5291319857312725e-08, + "logits/chosen": -1.5404731035232544, + "logits/rejected": -1.497546672821045, + "logps/chosen": -148.090576171875, + "logps/rejected": -273.2918395996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.117520809173584, + "rewards/margins": 12.487574577331543, + "rewards/rejected": -19.60509490966797, + "step": 5049 + }, + { + "epoch": 8.11, + "learning_rate": 5.519223147047166e-08, + "logits/chosen": -1.5431071519851685, + "logits/rejected": -1.5490161180496216, + "logps/chosen": -144.009765625, + "logps/rejected": -302.0162658691406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.666836261749268, + "rewards/margins": 15.751458168029785, + "rewards/rejected": -22.41829490661621, + "step": 5050 + }, + { + "epoch": 8.11, + "learning_rate": 5.50931430836306e-08, + "logits/chosen": -1.3245344161987305, + "logits/rejected": -1.433255672454834, + "logps/chosen": -151.63150024414062, + "logps/rejected": -315.32684326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.11798095703125, + "rewards/margins": 16.10100746154785, + "rewards/rejected": -24.2189884185791, + "step": 5051 + }, + { + "epoch": 8.11, + "learning_rate": 5.499405469678953e-08, + "logits/chosen": -1.3919018507003784, + "logits/rejected": -1.3819198608398438, + "logps/chosen": -169.50888061523438, + "logps/rejected": -335.4990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.446106910705566, + "rewards/margins": 14.945223808288574, + "rewards/rejected": -24.39133071899414, + "step": 5052 + }, + { + "epoch": 8.11, + "learning_rate": 5.489496630994847e-08, + "logits/chosen": -1.4256622791290283, + "logits/rejected": -1.3341045379638672, + "logps/chosen": -148.5101318359375, + "logps/rejected": -232.42657470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.070347785949707, + "rewards/margins": 12.743900299072266, + "rewards/rejected": -17.814247131347656, + "step": 5053 + }, + { + "epoch": 8.11, + "learning_rate": 5.4795877923107405e-08, + "logits/chosen": -1.5300273895263672, + "logits/rejected": -1.550735592842102, + "logps/chosen": -147.5670166015625, + "logps/rejected": -301.1936950683594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.415641784667969, + "rewards/margins": 13.905941009521484, + "rewards/rejected": -21.321582794189453, + "step": 5054 + }, + { + "epoch": 8.11, + "learning_rate": 5.4696789536266345e-08, + "logits/chosen": -1.5175191164016724, + "logits/rejected": -1.4347615242004395, + "logps/chosen": -187.27963256835938, + "logps/rejected": -277.860107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.056796073913574, + "rewards/margins": 10.569830894470215, + "rewards/rejected": -19.626625061035156, + "step": 5055 + }, + { + "epoch": 8.12, + "learning_rate": 5.4597701149425285e-08, + "logits/chosen": -1.4434763193130493, + "logits/rejected": -1.4689055681228638, + "logps/chosen": -115.36345672607422, + "logps/rejected": -257.468017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2415337562561035, + "rewards/margins": 14.268461227416992, + "rewards/rejected": -18.509994506835938, + "step": 5056 + }, + { + "epoch": 8.12, + "learning_rate": 5.4498612762584225e-08, + "logits/chosen": -1.348152756690979, + "logits/rejected": -1.3407429456710815, + "logps/chosen": -141.50672912597656, + "logps/rejected": -371.22857666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.665949821472168, + "rewards/margins": 19.32552719116211, + "rewards/rejected": -26.991477966308594, + "step": 5057 + }, + { + "epoch": 8.12, + "learning_rate": 5.439952437574316e-08, + "logits/chosen": -1.351288080215454, + "logits/rejected": -1.3172062635421753, + "logps/chosen": -124.28700256347656, + "logps/rejected": -238.76849365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.020299434661865, + "rewards/margins": 12.471121788024902, + "rewards/rejected": -17.49142074584961, + "step": 5058 + }, + { + "epoch": 8.12, + "learning_rate": 5.43004359889021e-08, + "logits/chosen": -1.736776351928711, + "logits/rejected": -1.6537883281707764, + "logps/chosen": -157.4159698486328, + "logps/rejected": -281.6681823730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.627684116363525, + "rewards/margins": 12.948099136352539, + "rewards/rejected": -20.575782775878906, + "step": 5059 + }, + { + "epoch": 8.12, + "learning_rate": 5.420134760206103e-08, + "logits/chosen": -1.2693170309066772, + "logits/rejected": -1.4157339334487915, + "logps/chosen": -172.37344360351562, + "logps/rejected": -341.18060302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.837562561035156, + "rewards/margins": 15.221444129943848, + "rewards/rejected": -23.059005737304688, + "step": 5060 + }, + { + "epoch": 8.12, + "learning_rate": 5.410225921521997e-08, + "logits/chosen": -1.5979796648025513, + "logits/rejected": -1.5742353200912476, + "logps/chosen": -116.18873596191406, + "logps/rejected": -240.2010498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.072524547576904, + "rewards/margins": 12.628009796142578, + "rewards/rejected": -16.70053482055664, + "step": 5061 + }, + { + "epoch": 8.13, + "learning_rate": 5.400317082837892e-08, + "logits/chosen": -1.5865850448608398, + "logits/rejected": -1.5695440769195557, + "logps/chosen": -146.6780242919922, + "logps/rejected": -264.7309875488281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6397576332092285, + "rewards/margins": 11.654539108276367, + "rewards/rejected": -18.29429817199707, + "step": 5062 + }, + { + "epoch": 8.13, + "learning_rate": 5.390408244153785e-08, + "logits/chosen": -1.4456971883773804, + "logits/rejected": -1.555199146270752, + "logps/chosen": -171.9786834716797, + "logps/rejected": -320.063232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.749044418334961, + "rewards/margins": 14.354634284973145, + "rewards/rejected": -23.103679656982422, + "step": 5063 + }, + { + "epoch": 8.13, + "learning_rate": 5.380499405469679e-08, + "logits/chosen": -1.4507858753204346, + "logits/rejected": -1.482527732849121, + "logps/chosen": -134.48248291015625, + "logps/rejected": -284.8904724121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.931842803955078, + "rewards/margins": 14.497659683227539, + "rewards/rejected": -21.429502487182617, + "step": 5064 + }, + { + "epoch": 8.13, + "learning_rate": 5.3705905667855724e-08, + "logits/chosen": -1.496683120727539, + "logits/rejected": -1.524594783782959, + "logps/chosen": -163.3860626220703, + "logps/rejected": -339.73797607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.885995864868164, + "rewards/margins": 16.588932037353516, + "rewards/rejected": -25.474929809570312, + "step": 5065 + }, + { + "epoch": 8.13, + "learning_rate": 5.3606817281014664e-08, + "logits/chosen": -1.6507940292358398, + "logits/rejected": -1.6027113199234009, + "logps/chosen": -153.49307250976562, + "logps/rejected": -323.869873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.315009593963623, + "rewards/margins": 16.979751586914062, + "rewards/rejected": -24.294761657714844, + "step": 5066 + }, + { + "epoch": 8.13, + "learning_rate": 5.35077288941736e-08, + "logits/chosen": -1.7583986520767212, + "logits/rejected": -1.657734751701355, + "logps/chosen": -129.37164306640625, + "logps/rejected": -250.50729370117188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.866494655609131, + "rewards/margins": 14.284355163574219, + "rewards/rejected": -19.150850296020508, + "step": 5067 + }, + { + "epoch": 8.13, + "learning_rate": 5.340864050733254e-08, + "logits/chosen": -1.5237237215042114, + "logits/rejected": -1.6158084869384766, + "logps/chosen": -153.3120880126953, + "logps/rejected": -309.6342468261719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.401657104492188, + "rewards/margins": 14.24422550201416, + "rewards/rejected": -22.645883560180664, + "step": 5068 + }, + { + "epoch": 8.14, + "learning_rate": 5.330955212049148e-08, + "logits/chosen": -1.333419680595398, + "logits/rejected": -1.3436493873596191, + "logps/chosen": -148.09750366210938, + "logps/rejected": -308.4159240722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034862995147705, + "rewards/margins": 15.137421607971191, + "rewards/rejected": -21.172285079956055, + "step": 5069 + }, + { + "epoch": 8.14, + "learning_rate": 5.321046373365042e-08, + "logits/chosen": -1.4813337326049805, + "logits/rejected": -1.4650534391403198, + "logps/chosen": -134.33457946777344, + "logps/rejected": -289.8671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.656108856201172, + "rewards/margins": 15.921088218688965, + "rewards/rejected": -22.577198028564453, + "step": 5070 + }, + { + "epoch": 8.14, + "learning_rate": 5.311137534680935e-08, + "logits/chosen": -1.481584072113037, + "logits/rejected": -1.4548063278198242, + "logps/chosen": -242.00047302246094, + "logps/rejected": -347.12603759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.28624439239502, + "rewards/margins": 12.193798065185547, + "rewards/rejected": -25.48004150390625, + "step": 5071 + }, + { + "epoch": 8.14, + "learning_rate": 5.301228695996829e-08, + "logits/chosen": -1.2863926887512207, + "logits/rejected": -1.3679172992706299, + "logps/chosen": -152.23240661621094, + "logps/rejected": -306.7030029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8059773445129395, + "rewards/margins": 14.300333023071289, + "rewards/rejected": -22.10630989074707, + "step": 5072 + }, + { + "epoch": 8.14, + "learning_rate": 5.2913198573127224e-08, + "logits/chosen": -1.4340667724609375, + "logits/rejected": -1.390170693397522, + "logps/chosen": -217.89894104003906, + "logps/rejected": -337.4667663574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.58055305480957, + "rewards/margins": 12.346675872802734, + "rewards/rejected": -23.927228927612305, + "step": 5073 + }, + { + "epoch": 8.14, + "learning_rate": 5.2814110186286164e-08, + "logits/chosen": -1.483917236328125, + "logits/rejected": -1.4543235301971436, + "logps/chosen": -116.32361602783203, + "logps/rejected": -300.0391845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.299561023712158, + "rewards/margins": 17.454004287719727, + "rewards/rejected": -20.753562927246094, + "step": 5074 + }, + { + "epoch": 8.15, + "learning_rate": 5.2715021799445104e-08, + "logits/chosen": -1.4699612855911255, + "logits/rejected": -1.4675521850585938, + "logps/chosen": -113.22071075439453, + "logps/rejected": -260.8274841308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.277024745941162, + "rewards/margins": 14.758099555969238, + "rewards/rejected": -19.035123825073242, + "step": 5075 + }, + { + "epoch": 8.15, + "learning_rate": 5.2615933412604044e-08, + "logits/chosen": -1.428161859512329, + "logits/rejected": -1.5020971298217773, + "logps/chosen": -168.10629272460938, + "logps/rejected": -312.34979248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.963953018188477, + "rewards/margins": 12.337993621826172, + "rewards/rejected": -21.30194854736328, + "step": 5076 + }, + { + "epoch": 8.15, + "learning_rate": 5.251684502576298e-08, + "logits/chosen": -1.3886592388153076, + "logits/rejected": -1.4737379550933838, + "logps/chosen": -135.15199279785156, + "logps/rejected": -320.681884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.636810302734375, + "rewards/margins": 15.491537094116211, + "rewards/rejected": -21.12834930419922, + "step": 5077 + }, + { + "epoch": 8.15, + "learning_rate": 5.241775663892192e-08, + "logits/chosen": -1.4938642978668213, + "logits/rejected": -1.555249810218811, + "logps/chosen": -126.94692993164062, + "logps/rejected": -302.3984069824219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.227471351623535, + "rewards/margins": 12.944963455200195, + "rewards/rejected": -18.172435760498047, + "step": 5078 + }, + { + "epoch": 8.15, + "learning_rate": 5.231866825208085e-08, + "logits/chosen": -1.601623773574829, + "logits/rejected": -1.454893946647644, + "logps/chosen": -205.5631103515625, + "logps/rejected": -310.2101135253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.927489280700684, + "rewards/margins": 13.477229118347168, + "rewards/rejected": -21.40471839904785, + "step": 5079 + }, + { + "epoch": 8.15, + "learning_rate": 5.221957986523979e-08, + "logits/chosen": -1.3790454864501953, + "logits/rejected": -1.362449049949646, + "logps/chosen": -141.59085083007812, + "logps/rejected": -305.05804443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994536876678467, + "rewards/margins": 15.143936157226562, + "rewards/rejected": -21.13847541809082, + "step": 5080 + }, + { + "epoch": 8.16, + "learning_rate": 5.212049147839872e-08, + "logits/chosen": -1.6148502826690674, + "logits/rejected": -1.613560438156128, + "logps/chosen": -121.201416015625, + "logps/rejected": -306.168701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.573497772216797, + "rewards/margins": 17.48575210571289, + "rewards/rejected": -22.059249877929688, + "step": 5081 + }, + { + "epoch": 8.16, + "learning_rate": 5.202140309155767e-08, + "logits/chosen": -1.4012417793273926, + "logits/rejected": -1.4275753498077393, + "logps/chosen": -123.32818603515625, + "logps/rejected": -295.35870361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.903429985046387, + "rewards/margins": 17.19191551208496, + "rewards/rejected": -22.095346450805664, + "step": 5082 + }, + { + "epoch": 8.16, + "learning_rate": 5.19223147047166e-08, + "logits/chosen": -1.4553865194320679, + "logits/rejected": -1.477596640586853, + "logps/chosen": -161.32533264160156, + "logps/rejected": -308.0281982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.62190055847168, + "rewards/margins": 14.484363555908203, + "rewards/rejected": -22.106266021728516, + "step": 5083 + }, + { + "epoch": 8.16, + "learning_rate": 5.182322631787554e-08, + "logits/chosen": -1.4680508375167847, + "logits/rejected": -1.537972092628479, + "logps/chosen": -128.23106384277344, + "logps/rejected": -325.38482666015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.54862117767334, + "rewards/margins": 17.914318084716797, + "rewards/rejected": -23.462940216064453, + "step": 5084 + }, + { + "epoch": 8.16, + "learning_rate": 5.172413793103448e-08, + "logits/chosen": -1.4114954471588135, + "logits/rejected": -1.4394900798797607, + "logps/chosen": -193.1170654296875, + "logps/rejected": -348.067138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.064460754394531, + "rewards/margins": 14.09317398071289, + "rewards/rejected": -24.157634735107422, + "step": 5085 + }, + { + "epoch": 8.16, + "learning_rate": 5.1625049544193416e-08, + "logits/chosen": -1.3996275663375854, + "logits/rejected": -1.4009391069412231, + "logps/chosen": -193.96548461914062, + "logps/rejected": -316.7656555175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.882570266723633, + "rewards/margins": 11.83572769165039, + "rewards/rejected": -21.718297958374023, + "step": 5086 + }, + { + "epoch": 8.17, + "learning_rate": 5.1525961157352356e-08, + "logits/chosen": -1.3705863952636719, + "logits/rejected": -1.3913012742996216, + "logps/chosen": -167.556640625, + "logps/rejected": -310.730712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.111763000488281, + "rewards/margins": 14.128606796264648, + "rewards/rejected": -22.24036979675293, + "step": 5087 + }, + { + "epoch": 8.17, + "learning_rate": 5.1426872770511296e-08, + "logits/chosen": -1.7474887371063232, + "logits/rejected": -1.748692512512207, + "logps/chosen": -79.67659759521484, + "logps/rejected": -243.1836395263672, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.365725040435791, + "rewards/margins": 15.220466613769531, + "rewards/rejected": -17.586193084716797, + "step": 5088 + }, + { + "epoch": 8.17, + "learning_rate": 5.1327784383670236e-08, + "logits/chosen": -1.5200344324111938, + "logits/rejected": -1.5627028942108154, + "logps/chosen": -89.03565979003906, + "logps/rejected": -236.24752807617188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.160780191421509, + "rewards/margins": 13.283350944519043, + "rewards/rejected": -16.444129943847656, + "step": 5089 + }, + { + "epoch": 8.17, + "learning_rate": 5.122869599682917e-08, + "logits/chosen": -1.508188247680664, + "logits/rejected": -1.395262598991394, + "logps/chosen": -161.50033569335938, + "logps/rejected": -251.58206176757812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.188433647155762, + "rewards/margins": 10.691720962524414, + "rewards/rejected": -18.88015365600586, + "step": 5090 + }, + { + "epoch": 8.17, + "learning_rate": 5.112960760998811e-08, + "logits/chosen": -1.4029464721679688, + "logits/rejected": -1.4177160263061523, + "logps/chosen": -166.69992065429688, + "logps/rejected": -322.7857360839844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.454630851745605, + "rewards/margins": 15.319622993469238, + "rewards/rejected": -23.774253845214844, + "step": 5091 + }, + { + "epoch": 8.17, + "learning_rate": 5.103051922314704e-08, + "logits/chosen": -1.6754188537597656, + "logits/rejected": -1.5991618633270264, + "logps/chosen": -188.69818115234375, + "logps/rejected": -347.3271789550781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.433531761169434, + "rewards/margins": 16.410064697265625, + "rewards/rejected": -23.843597412109375, + "step": 5092 + }, + { + "epoch": 8.17, + "learning_rate": 5.093143083630598e-08, + "logits/chosen": -1.4758355617523193, + "logits/rejected": -1.4905071258544922, + "logps/chosen": -139.42510986328125, + "logps/rejected": -295.32208251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.321619510650635, + "rewards/margins": 15.53695297241211, + "rewards/rejected": -20.85857391357422, + "step": 5093 + }, + { + "epoch": 8.18, + "learning_rate": 5.0832342449464916e-08, + "logits/chosen": -1.3711936473846436, + "logits/rejected": -1.3864518404006958, + "logps/chosen": -181.0755615234375, + "logps/rejected": -293.3828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.107027053833008, + "rewards/margins": 10.652737617492676, + "rewards/rejected": -19.759763717651367, + "step": 5094 + }, + { + "epoch": 8.18, + "learning_rate": 5.073325406262386e-08, + "logits/chosen": -1.4505525827407837, + "logits/rejected": -1.4753735065460205, + "logps/chosen": -125.75383758544922, + "logps/rejected": -298.8839111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.598180770874023, + "rewards/margins": 15.174306869506836, + "rewards/rejected": -20.77248764038086, + "step": 5095 + }, + { + "epoch": 8.18, + "learning_rate": 5.0634165675782796e-08, + "logits/chosen": -1.4835758209228516, + "logits/rejected": -1.4890576601028442, + "logps/chosen": -171.454833984375, + "logps/rejected": -298.5585021972656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.203041076660156, + "rewards/margins": 13.888840675354004, + "rewards/rejected": -22.091880798339844, + "step": 5096 + }, + { + "epoch": 8.18, + "learning_rate": 5.0535077288941735e-08, + "logits/chosen": -1.634300947189331, + "logits/rejected": -1.7208561897277832, + "logps/chosen": -110.46183776855469, + "logps/rejected": -292.8895263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.009483814239502, + "rewards/margins": 17.37281608581543, + "rewards/rejected": -21.382301330566406, + "step": 5097 + }, + { + "epoch": 8.18, + "learning_rate": 5.043598890210067e-08, + "logits/chosen": -1.5398123264312744, + "logits/rejected": -1.4083995819091797, + "logps/chosen": -188.59837341308594, + "logps/rejected": -295.8351745605469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.349726676940918, + "rewards/margins": 13.911592483520508, + "rewards/rejected": -21.261316299438477, + "step": 5098 + }, + { + "epoch": 8.18, + "learning_rate": 5.033690051525961e-08, + "logits/chosen": -1.4137226343154907, + "logits/rejected": -1.4587455987930298, + "logps/chosen": -148.58836364746094, + "logps/rejected": -271.8800048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.362157821655273, + "rewards/margins": 12.289802551269531, + "rewards/rejected": -18.651960372924805, + "step": 5099 + }, + { + "epoch": 8.19, + "learning_rate": 5.023781212841854e-08, + "logits/chosen": -1.5358881950378418, + "logits/rejected": -1.4015371799468994, + "logps/chosen": -157.66680908203125, + "logps/rejected": -260.55096435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.502677917480469, + "rewards/margins": 12.420709609985352, + "rewards/rejected": -19.923385620117188, + "step": 5100 + }, + { + "epoch": 8.19, + "learning_rate": 5.013872374157749e-08, + "logits/chosen": -1.4223418235778809, + "logits/rejected": -1.4735400676727295, + "logps/chosen": -194.16067504882812, + "logps/rejected": -359.5694885253906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.50988483428955, + "rewards/margins": 15.929627418518066, + "rewards/rejected": -26.439512252807617, + "step": 5101 + }, + { + "epoch": 8.19, + "learning_rate": 5.003963535473642e-08, + "logits/chosen": -1.4215372800827026, + "logits/rejected": -1.3772810697555542, + "logps/chosen": -146.60418701171875, + "logps/rejected": -287.7825622558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.455900192260742, + "rewards/margins": 14.481273651123047, + "rewards/rejected": -19.937171936035156, + "step": 5102 + }, + { + "epoch": 8.19, + "learning_rate": 4.994054696789536e-08, + "logits/chosen": -1.414801836013794, + "logits/rejected": -1.3963478803634644, + "logps/chosen": -200.4111328125, + "logps/rejected": -349.3544616699219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.312349319458008, + "rewards/margins": 14.568628311157227, + "rewards/rejected": -24.880977630615234, + "step": 5103 + }, + { + "epoch": 8.19, + "learning_rate": 4.9841458581054295e-08, + "logits/chosen": -1.4842941761016846, + "logits/rejected": -1.4571878910064697, + "logps/chosen": -145.43260192871094, + "logps/rejected": -281.86614990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.811075210571289, + "rewards/margins": 15.679715156555176, + "rewards/rejected": -20.49078941345215, + "step": 5104 + }, + { + "epoch": 8.19, + "learning_rate": 4.9742370194213235e-08, + "logits/chosen": -1.4301748275756836, + "logits/rejected": -1.460841178894043, + "logps/chosen": -140.57542419433594, + "logps/rejected": -258.31640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.294828414916992, + "rewards/margins": 12.014095306396484, + "rewards/rejected": -18.30892562866211, + "step": 5105 + }, + { + "epoch": 8.2, + "learning_rate": 4.964328180737217e-08, + "logits/chosen": -1.2384023666381836, + "logits/rejected": -1.3091481924057007, + "logps/chosen": -129.36288452148438, + "logps/rejected": -249.0443115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.156897068023682, + "rewards/margins": 10.944259643554688, + "rewards/rejected": -18.101158142089844, + "step": 5106 + }, + { + "epoch": 8.2, + "learning_rate": 4.9544193420531115e-08, + "logits/chosen": -1.4444642066955566, + "logits/rejected": -1.402392864227295, + "logps/chosen": -119.92792510986328, + "logps/rejected": -264.2916564941406, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0727434158325195, + "rewards/margins": 14.476799011230469, + "rewards/rejected": -20.549541473388672, + "step": 5107 + }, + { + "epoch": 8.2, + "learning_rate": 4.9445105033690055e-08, + "logits/chosen": -1.4735033512115479, + "logits/rejected": -1.4431796073913574, + "logps/chosen": -188.45013427734375, + "logps/rejected": -314.29364013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.397211074829102, + "rewards/margins": 13.13325023651123, + "rewards/rejected": -20.530460357666016, + "step": 5108 + }, + { + "epoch": 8.2, + "learning_rate": 4.934601664684899e-08, + "logits/chosen": -1.4985976219177246, + "logits/rejected": -1.521950602531433, + "logps/chosen": -151.35899353027344, + "logps/rejected": -324.0318298339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.544938564300537, + "rewards/margins": 17.623151779174805, + "rewards/rejected": -24.1680908203125, + "step": 5109 + }, + { + "epoch": 8.2, + "learning_rate": 4.924692826000793e-08, + "logits/chosen": -1.3496787548065186, + "logits/rejected": -1.4562433958053589, + "logps/chosen": -131.52581787109375, + "logps/rejected": -259.6567077636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.331926345825195, + "rewards/margins": 12.337860107421875, + "rewards/rejected": -17.66978645324707, + "step": 5110 + }, + { + "epoch": 8.2, + "learning_rate": 4.914783987316686e-08, + "logits/chosen": -1.3963894844055176, + "logits/rejected": -1.4670207500457764, + "logps/chosen": -153.27484130859375, + "logps/rejected": -316.08056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.25260591506958, + "rewards/margins": 14.616416931152344, + "rewards/rejected": -21.869022369384766, + "step": 5111 + }, + { + "epoch": 8.21, + "learning_rate": 4.90487514863258e-08, + "logits/chosen": -1.51712167263031, + "logits/rejected": -1.569057822227478, + "logps/chosen": -71.88379669189453, + "logps/rejected": -193.1371307373047, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1308507919311523, + "rewards/margins": 10.70413875579834, + "rewards/rejected": -12.834989547729492, + "step": 5112 + }, + { + "epoch": 8.21, + "learning_rate": 4.8949663099484734e-08, + "logits/chosen": -1.4194964170455933, + "logits/rejected": -1.3743622303009033, + "logps/chosen": -128.7135772705078, + "logps/rejected": -243.84156799316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7843804359436035, + "rewards/margins": 12.251638412475586, + "rewards/rejected": -18.03601837158203, + "step": 5113 + }, + { + "epoch": 8.21, + "learning_rate": 4.885057471264368e-08, + "logits/chosen": -1.5524508953094482, + "logits/rejected": -1.5147383213043213, + "logps/chosen": -125.58204650878906, + "logps/rejected": -280.8977966308594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.116694450378418, + "rewards/margins": 15.524200439453125, + "rewards/rejected": -20.64089584350586, + "step": 5114 + }, + { + "epoch": 8.21, + "learning_rate": 4.8751486325802614e-08, + "logits/chosen": -1.3182706832885742, + "logits/rejected": -1.3741024732589722, + "logps/chosen": -127.41967010498047, + "logps/rejected": -288.11529541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.105171203613281, + "rewards/margins": 15.086261749267578, + "rewards/rejected": -21.191434860229492, + "step": 5115 + }, + { + "epoch": 8.21, + "learning_rate": 4.8652397938961554e-08, + "logits/chosen": -1.5511788129806519, + "logits/rejected": -1.5150864124298096, + "logps/chosen": -197.15579223632812, + "logps/rejected": -328.03466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.27137565612793, + "rewards/margins": 13.625238418579102, + "rewards/rejected": -23.896615982055664, + "step": 5116 + }, + { + "epoch": 8.21, + "learning_rate": 4.855330955212049e-08, + "logits/chosen": -1.2001014947891235, + "logits/rejected": -1.2234985828399658, + "logps/chosen": -161.50521850585938, + "logps/rejected": -297.93402099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.792704582214355, + "rewards/margins": 14.234567642211914, + "rewards/rejected": -23.027273178100586, + "step": 5117 + }, + { + "epoch": 8.22, + "learning_rate": 4.845422116527943e-08, + "logits/chosen": -1.5437005758285522, + "logits/rejected": -1.5481176376342773, + "logps/chosen": -186.07591247558594, + "logps/rejected": -274.8922119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.598838806152344, + "rewards/margins": 10.377668380737305, + "rewards/rejected": -19.97650718688965, + "step": 5118 + }, + { + "epoch": 8.22, + "learning_rate": 4.835513277843836e-08, + "logits/chosen": -1.2566767930984497, + "logits/rejected": -1.3876781463623047, + "logps/chosen": -125.52757263183594, + "logps/rejected": -289.8056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.215272903442383, + "rewards/margins": 13.30276107788086, + "rewards/rejected": -19.51803207397461, + "step": 5119 + }, + { + "epoch": 8.22, + "learning_rate": 4.825604439159731e-08, + "logits/chosen": -1.5804853439331055, + "logits/rejected": -1.5793533325195312, + "logps/chosen": -142.08921813964844, + "logps/rejected": -339.85626220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.612100601196289, + "rewards/margins": 20.0799503326416, + "rewards/rejected": -25.69205093383789, + "step": 5120 + }, + { + "epoch": 8.22, + "learning_rate": 4.815695600475624e-08, + "logits/chosen": -1.5343151092529297, + "logits/rejected": -1.589090347290039, + "logps/chosen": -115.3098373413086, + "logps/rejected": -316.15118408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301639556884766, + "rewards/margins": 18.414833068847656, + "rewards/rejected": -22.716474533081055, + "step": 5121 + }, + { + "epoch": 8.22, + "learning_rate": 4.805786761791518e-08, + "logits/chosen": -1.5532604455947876, + "logits/rejected": -1.490398645401001, + "logps/chosen": -172.0666046142578, + "logps/rejected": -302.9784240722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.875302791595459, + "rewards/margins": 15.574329376220703, + "rewards/rejected": -23.449630737304688, + "step": 5122 + }, + { + "epoch": 8.22, + "learning_rate": 4.7958779231074114e-08, + "logits/chosen": -1.5807676315307617, + "logits/rejected": -1.6356165409088135, + "logps/chosen": -115.237060546875, + "logps/rejected": -273.85540771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.211627960205078, + "rewards/margins": 14.481056213378906, + "rewards/rejected": -17.692684173583984, + "step": 5123 + }, + { + "epoch": 8.22, + "learning_rate": 4.7859690844233054e-08, + "logits/chosen": -1.586439609527588, + "logits/rejected": -1.6394612789154053, + "logps/chosen": -152.42369079589844, + "logps/rejected": -266.5174865722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.795353889465332, + "rewards/margins": 11.712722778320312, + "rewards/rejected": -18.508075714111328, + "step": 5124 + }, + { + "epoch": 8.23, + "learning_rate": 4.776060245739199e-08, + "logits/chosen": -1.6260563135147095, + "logits/rejected": -1.5742969512939453, + "logps/chosen": -167.72543334960938, + "logps/rejected": -286.3341369628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.882300853729248, + "rewards/margins": 12.845094680786133, + "rewards/rejected": -20.72739601135254, + "step": 5125 + }, + { + "epoch": 8.23, + "learning_rate": 4.766151407055093e-08, + "logits/chosen": -1.60086190700531, + "logits/rejected": -1.5827391147613525, + "logps/chosen": -177.86361694335938, + "logps/rejected": -323.115478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7728962898254395, + "rewards/margins": 13.331098556518555, + "rewards/rejected": -21.103994369506836, + "step": 5126 + }, + { + "epoch": 8.23, + "learning_rate": 4.756242568370987e-08, + "logits/chosen": -1.311976671218872, + "logits/rejected": -1.3685868978500366, + "logps/chosen": -151.03897094726562, + "logps/rejected": -294.99102783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2030487060546875, + "rewards/margins": 13.47610855102539, + "rewards/rejected": -20.67915916442871, + "step": 5127 + }, + { + "epoch": 8.23, + "learning_rate": 4.7463337296868807e-08, + "logits/chosen": -1.458266019821167, + "logits/rejected": -1.515960693359375, + "logps/chosen": -170.29443359375, + "logps/rejected": -318.7456359863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.084096431732178, + "rewards/margins": 13.62713623046875, + "rewards/rejected": -20.711231231689453, + "step": 5128 + }, + { + "epoch": 8.23, + "learning_rate": 4.736424891002774e-08, + "logits/chosen": -1.4271690845489502, + "logits/rejected": -1.4491811990737915, + "logps/chosen": -178.47567749023438, + "logps/rejected": -324.24005126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.71942138671875, + "rewards/margins": 13.913209915161133, + "rewards/rejected": -22.632631301879883, + "step": 5129 + }, + { + "epoch": 8.23, + "learning_rate": 4.726516052318668e-08, + "logits/chosen": -1.3975355625152588, + "logits/rejected": -1.4172781705856323, + "logps/chosen": -153.28440856933594, + "logps/rejected": -317.8710021972656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.059256553649902, + "rewards/margins": 15.881263732910156, + "rewards/rejected": -22.940519332885742, + "step": 5130 + }, + { + "epoch": 8.24, + "learning_rate": 4.716607213634562e-08, + "logits/chosen": -1.5428593158721924, + "logits/rejected": -1.4763214588165283, + "logps/chosen": -133.68008422851562, + "logps/rejected": -257.60614013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.394123077392578, + "rewards/margins": 13.68747329711914, + "rewards/rejected": -18.08159637451172, + "step": 5131 + }, + { + "epoch": 8.24, + "learning_rate": 4.706698374950455e-08, + "logits/chosen": -1.484191656112671, + "logits/rejected": -1.4783127307891846, + "logps/chosen": -120.51153564453125, + "logps/rejected": -228.36972045898438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.537084102630615, + "rewards/margins": 11.270488739013672, + "rewards/rejected": -15.807573318481445, + "step": 5132 + }, + { + "epoch": 8.24, + "learning_rate": 4.69678953626635e-08, + "logits/chosen": -1.4122920036315918, + "logits/rejected": -1.4997228384017944, + "logps/chosen": -153.8106689453125, + "logps/rejected": -318.89801025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.372363567352295, + "rewards/margins": 14.612371444702148, + "rewards/rejected": -20.9847354888916, + "step": 5133 + }, + { + "epoch": 8.24, + "learning_rate": 4.686880697582243e-08, + "logits/chosen": -1.3468425273895264, + "logits/rejected": -1.3429430723190308, + "logps/chosen": -110.84014129638672, + "logps/rejected": -230.60585021972656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1200337409973145, + "rewards/margins": 12.461665153503418, + "rewards/rejected": -16.581697463989258, + "step": 5134 + }, + { + "epoch": 8.24, + "learning_rate": 4.676971858898137e-08, + "logits/chosen": -1.3859641551971436, + "logits/rejected": -1.3686461448669434, + "logps/chosen": -215.64468383789062, + "logps/rejected": -359.96905517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.716794967651367, + "rewards/margins": 15.555336952209473, + "rewards/rejected": -28.272132873535156, + "step": 5135 + }, + { + "epoch": 8.24, + "learning_rate": 4.6670630202140306e-08, + "logits/chosen": -1.4891364574432373, + "logits/rejected": -1.4850022792816162, + "logps/chosen": -166.08688354492188, + "logps/rejected": -307.7164611816406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.702917098999023, + "rewards/margins": 13.721424102783203, + "rewards/rejected": -22.424339294433594, + "step": 5136 + }, + { + "epoch": 8.25, + "learning_rate": 4.6571541815299246e-08, + "logits/chosen": -1.6219451427459717, + "logits/rejected": -1.5299346446990967, + "logps/chosen": -101.31915283203125, + "logps/rejected": -266.40838623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2050039768218994, + "rewards/margins": 16.019376754760742, + "rewards/rejected": -18.224380493164062, + "step": 5137 + }, + { + "epoch": 8.25, + "learning_rate": 4.647245342845818e-08, + "logits/chosen": -1.38709557056427, + "logits/rejected": -1.410101056098938, + "logps/chosen": -138.79559326171875, + "logps/rejected": -289.8111572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.619272708892822, + "rewards/margins": 14.890365600585938, + "rewards/rejected": -21.509639739990234, + "step": 5138 + }, + { + "epoch": 8.25, + "learning_rate": 4.637336504161712e-08, + "logits/chosen": -1.6790928840637207, + "logits/rejected": -1.7206487655639648, + "logps/chosen": -123.65803527832031, + "logps/rejected": -323.7371826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.414533615112305, + "rewards/margins": 19.56269073486328, + "rewards/rejected": -23.977222442626953, + "step": 5139 + }, + { + "epoch": 8.25, + "learning_rate": 4.627427665477606e-08, + "logits/chosen": -1.4670294523239136, + "logits/rejected": -1.3743274211883545, + "logps/chosen": -216.0963897705078, + "logps/rejected": -311.1836242675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.221027374267578, + "rewards/margins": 12.269194602966309, + "rewards/rejected": -22.490222930908203, + "step": 5140 + }, + { + "epoch": 8.25, + "learning_rate": 4.6175188267935e-08, + "logits/chosen": -1.3313297033309937, + "logits/rejected": -1.3589342832565308, + "logps/chosen": -165.27383422851562, + "logps/rejected": -304.97900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.033641815185547, + "rewards/margins": 14.711692810058594, + "rewards/rejected": -23.74533462524414, + "step": 5141 + }, + { + "epoch": 8.25, + "learning_rate": 4.607609988109393e-08, + "logits/chosen": -1.6586132049560547, + "logits/rejected": -1.6788184642791748, + "logps/chosen": -115.73136901855469, + "logps/rejected": -273.6580810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.866942882537842, + "rewards/margins": 15.593997955322266, + "rewards/rejected": -20.460941314697266, + "step": 5142 + }, + { + "epoch": 8.26, + "learning_rate": 4.597701149425287e-08, + "logits/chosen": -1.5454264879226685, + "logits/rejected": -1.4608381986618042, + "logps/chosen": -156.799560546875, + "logps/rejected": -253.8870849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.654820442199707, + "rewards/margins": 9.879606246948242, + "rewards/rejected": -17.534425735473633, + "step": 5143 + }, + { + "epoch": 8.26, + "learning_rate": 4.5877923107411806e-08, + "logits/chosen": -1.4987781047821045, + "logits/rejected": -1.5228865146636963, + "logps/chosen": -139.7482452392578, + "logps/rejected": -312.787841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.337204456329346, + "rewards/margins": 16.64303207397461, + "rewards/rejected": -21.980236053466797, + "step": 5144 + }, + { + "epoch": 8.26, + "learning_rate": 4.5778834720570745e-08, + "logits/chosen": -1.4717087745666504, + "logits/rejected": -1.5544406175613403, + "logps/chosen": -179.5944366455078, + "logps/rejected": -304.38116455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.945579528808594, + "rewards/margins": 13.935062408447266, + "rewards/rejected": -20.88064193725586, + "step": 5145 + }, + { + "epoch": 8.26, + "learning_rate": 4.5679746333729685e-08, + "logits/chosen": -1.5290045738220215, + "logits/rejected": -1.5296361446380615, + "logps/chosen": -127.41400146484375, + "logps/rejected": -272.05950927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.897920608520508, + "rewards/margins": 13.842733383178711, + "rewards/rejected": -19.74065399169922, + "step": 5146 + }, + { + "epoch": 8.26, + "learning_rate": 4.5580657946888625e-08, + "logits/chosen": -1.6737252473831177, + "logits/rejected": -1.6110094785690308, + "logps/chosen": -179.14944458007812, + "logps/rejected": -319.37322998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.549347877502441, + "rewards/margins": 16.061315536499023, + "rewards/rejected": -24.61066246032715, + "step": 5147 + }, + { + "epoch": 8.26, + "learning_rate": 4.548156956004756e-08, + "logits/chosen": -1.7044216394424438, + "logits/rejected": -1.6126453876495361, + "logps/chosen": -191.3749542236328, + "logps/rejected": -304.8066101074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.563261985778809, + "rewards/margins": 12.610811233520508, + "rewards/rejected": -21.174074172973633, + "step": 5148 + }, + { + "epoch": 8.26, + "learning_rate": 4.53824811732065e-08, + "logits/chosen": -1.3605120182037354, + "logits/rejected": -1.391648769378662, + "logps/chosen": -171.09124755859375, + "logps/rejected": -382.6812744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.649832725524902, + "rewards/margins": 19.418827056884766, + "rewards/rejected": -28.06865882873535, + "step": 5149 + }, + { + "epoch": 8.27, + "learning_rate": 4.528339278636543e-08, + "logits/chosen": -1.3749734163284302, + "logits/rejected": -1.3194057941436768, + "logps/chosen": -147.36846923828125, + "logps/rejected": -326.4046325683594, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.211730003356934, + "rewards/margins": 18.41791343688965, + "rewards/rejected": -24.62964630126953, + "step": 5150 + }, + { + "epoch": 8.27, + "learning_rate": 4.518430439952437e-08, + "logits/chosen": -1.594281792640686, + "logits/rejected": -1.5282139778137207, + "logps/chosen": -199.19683837890625, + "logps/rejected": -338.7375183105469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.40318775177002, + "rewards/margins": 15.229990005493164, + "rewards/rejected": -24.6331787109375, + "step": 5151 + }, + { + "epoch": 8.27, + "learning_rate": 4.5085216012683305e-08, + "logits/chosen": -1.4249906539916992, + "logits/rejected": -1.468375325202942, + "logps/chosen": -166.62429809570312, + "logps/rejected": -360.3803405761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.918121814727783, + "rewards/margins": 15.921875953674316, + "rewards/rejected": -23.839996337890625, + "step": 5152 + }, + { + "epoch": 8.27, + "learning_rate": 4.498612762584225e-08, + "logits/chosen": -1.4988665580749512, + "logits/rejected": -1.519554615020752, + "logps/chosen": -232.14016723632812, + "logps/rejected": -353.3851623535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.406328201293945, + "rewards/margins": 13.696742057800293, + "rewards/rejected": -26.103071212768555, + "step": 5153 + }, + { + "epoch": 8.27, + "learning_rate": 4.488703923900119e-08, + "logits/chosen": -1.4366344213485718, + "logits/rejected": -1.4614864587783813, + "logps/chosen": -237.99087524414062, + "logps/rejected": -310.2522277832031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.63541030883789, + "rewards/margins": 11.056614875793457, + "rewards/rejected": -22.692026138305664, + "step": 5154 + }, + { + "epoch": 8.27, + "learning_rate": 4.4787950852160125e-08, + "logits/chosen": -1.44674551486969, + "logits/rejected": -1.5373154878616333, + "logps/chosen": -177.5706329345703, + "logps/rejected": -318.5850524902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.852296829223633, + "rewards/margins": 12.784524917602539, + "rewards/rejected": -22.636821746826172, + "step": 5155 + }, + { + "epoch": 8.28, + "learning_rate": 4.4688862465319065e-08, + "logits/chosen": -1.3579710721969604, + "logits/rejected": -1.389004111289978, + "logps/chosen": -196.2001953125, + "logps/rejected": -346.69207763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.4711275100708, + "rewards/margins": 13.181644439697266, + "rewards/rejected": -23.65277099609375, + "step": 5156 + }, + { + "epoch": 8.28, + "learning_rate": 4.4589774078478e-08, + "logits/chosen": -1.484028935432434, + "logits/rejected": -1.5528507232666016, + "logps/chosen": -89.6433334350586, + "logps/rejected": -254.35528564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.154792547225952, + "rewards/margins": 14.168242454528809, + "rewards/rejected": -17.323034286499023, + "step": 5157 + }, + { + "epoch": 8.28, + "learning_rate": 4.449068569163694e-08, + "logits/chosen": -1.375214695930481, + "logits/rejected": -1.3577628135681152, + "logps/chosen": -169.30833435058594, + "logps/rejected": -265.0123291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.69028091430664, + "rewards/margins": 10.162666320800781, + "rewards/rejected": -18.852947235107422, + "step": 5158 + }, + { + "epoch": 8.28, + "learning_rate": 4.439159730479588e-08, + "logits/chosen": -1.3717260360717773, + "logits/rejected": -1.4075770378112793, + "logps/chosen": -180.40802001953125, + "logps/rejected": -303.2076721191406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.104228019714355, + "rewards/margins": 11.733637809753418, + "rewards/rejected": -21.837867736816406, + "step": 5159 + }, + { + "epoch": 8.28, + "learning_rate": 4.429250891795482e-08, + "logits/chosen": -1.3849165439605713, + "logits/rejected": -1.452247142791748, + "logps/chosen": -144.41082763671875, + "logps/rejected": -270.09185791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.715343475341797, + "rewards/margins": 11.57532024383545, + "rewards/rejected": -19.290664672851562, + "step": 5160 + }, + { + "epoch": 8.28, + "learning_rate": 4.419342053111375e-08, + "logits/chosen": -1.6619629859924316, + "logits/rejected": -1.5076358318328857, + "logps/chosen": -183.44464111328125, + "logps/rejected": -304.575927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.939453125, + "rewards/margins": 14.84921646118164, + "rewards/rejected": -22.78866958618164, + "step": 5161 + }, + { + "epoch": 8.29, + "learning_rate": 4.409433214427269e-08, + "logits/chosen": -1.4069055318832397, + "logits/rejected": -1.4857240915298462, + "logps/chosen": -193.20741271972656, + "logps/rejected": -330.2534484863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.965778350830078, + "rewards/margins": 11.52481746673584, + "rewards/rejected": -22.490596771240234, + "step": 5162 + }, + { + "epoch": 8.29, + "learning_rate": 4.3995243757431624e-08, + "logits/chosen": -1.4729615449905396, + "logits/rejected": -1.4703017473220825, + "logps/chosen": -157.56854248046875, + "logps/rejected": -276.4106140136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.401023864746094, + "rewards/margins": 10.746482849121094, + "rewards/rejected": -19.147504806518555, + "step": 5163 + }, + { + "epoch": 8.29, + "learning_rate": 4.3896155370590564e-08, + "logits/chosen": -1.4879143238067627, + "logits/rejected": -1.453555703163147, + "logps/chosen": -163.07437133789062, + "logps/rejected": -293.39215087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.800394058227539, + "rewards/margins": 13.85392951965332, + "rewards/rejected": -21.65432357788086, + "step": 5164 + }, + { + "epoch": 8.29, + "learning_rate": 4.3797066983749504e-08, + "logits/chosen": -1.4726214408874512, + "logits/rejected": -1.5315223932266235, + "logps/chosen": -139.57550048828125, + "logps/rejected": -301.63751220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.223726749420166, + "rewards/margins": 14.908395767211914, + "rewards/rejected": -21.132122039794922, + "step": 5165 + }, + { + "epoch": 8.29, + "learning_rate": 4.3697978596908444e-08, + "logits/chosen": -1.5038235187530518, + "logits/rejected": -1.6269805431365967, + "logps/chosen": -121.80186462402344, + "logps/rejected": -281.73876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.848247051239014, + "rewards/margins": 13.319499015808105, + "rewards/rejected": -18.16774559020996, + "step": 5166 + }, + { + "epoch": 8.29, + "learning_rate": 4.359889021006738e-08, + "logits/chosen": -1.3230708837509155, + "logits/rejected": -1.4108960628509521, + "logps/chosen": -123.50393676757812, + "logps/rejected": -269.09716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.834733247756958, + "rewards/margins": 14.46695327758789, + "rewards/rejected": -18.301685333251953, + "step": 5167 + }, + { + "epoch": 8.3, + "learning_rate": 4.349980182322632e-08, + "logits/chosen": -1.4811941385269165, + "logits/rejected": -1.5559921264648438, + "logps/chosen": -210.59011840820312, + "logps/rejected": -341.37274169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.070332527160645, + "rewards/margins": 13.439475059509277, + "rewards/rejected": -24.509807586669922, + "step": 5168 + }, + { + "epoch": 8.3, + "learning_rate": 4.340071343638525e-08, + "logits/chosen": -1.5536938905715942, + "logits/rejected": -1.597390055656433, + "logps/chosen": -110.17076873779297, + "logps/rejected": -261.3765869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4465441703796387, + "rewards/margins": 14.587087631225586, + "rewards/rejected": -18.033632278442383, + "step": 5169 + }, + { + "epoch": 8.3, + "learning_rate": 4.330162504954419e-08, + "logits/chosen": -1.4611495733261108, + "logits/rejected": -1.4258003234863281, + "logps/chosen": -187.3755645751953, + "logps/rejected": -301.1697998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.046670913696289, + "rewards/margins": 13.879430770874023, + "rewards/rejected": -21.926101684570312, + "step": 5170 + }, + { + "epoch": 8.3, + "learning_rate": 4.3202536662703124e-08, + "logits/chosen": -1.4131512641906738, + "logits/rejected": -1.4416091442108154, + "logps/chosen": -155.99752807617188, + "logps/rejected": -280.8774108886719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4151716232299805, + "rewards/margins": 13.072311401367188, + "rewards/rejected": -20.487483978271484, + "step": 5171 + }, + { + "epoch": 8.3, + "learning_rate": 4.310344827586207e-08, + "logits/chosen": -1.6658716201782227, + "logits/rejected": -1.5841137170791626, + "logps/chosen": -188.8218994140625, + "logps/rejected": -302.11627197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.584357261657715, + "rewards/margins": 14.219542503356934, + "rewards/rejected": -20.80390167236328, + "step": 5172 + }, + { + "epoch": 8.3, + "learning_rate": 4.3004359889021004e-08, + "logits/chosen": -1.5297801494598389, + "logits/rejected": -1.5144885778427124, + "logps/chosen": -182.51376342773438, + "logps/rejected": -345.5895080566406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.79861831665039, + "rewards/margins": 16.811128616333008, + "rewards/rejected": -25.6097469329834, + "step": 5173 + }, + { + "epoch": 8.3, + "learning_rate": 4.2905271502179943e-08, + "logits/chosen": -1.2920570373535156, + "logits/rejected": -1.3044688701629639, + "logps/chosen": -215.5894775390625, + "logps/rejected": -363.36810302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.452165603637695, + "rewards/margins": 13.231330871582031, + "rewards/rejected": -26.683496475219727, + "step": 5174 + }, + { + "epoch": 8.31, + "learning_rate": 4.280618311533888e-08, + "logits/chosen": -1.352297306060791, + "logits/rejected": -1.3863108158111572, + "logps/chosen": -180.0340576171875, + "logps/rejected": -340.9566650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.30068588256836, + "rewards/margins": 14.409372329711914, + "rewards/rejected": -24.710058212280273, + "step": 5175 + }, + { + "epoch": 8.31, + "learning_rate": 4.2707094728497817e-08, + "logits/chosen": -1.5788507461547852, + "logits/rejected": -1.49993896484375, + "logps/chosen": -106.58621978759766, + "logps/rejected": -261.23052978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.640275239944458, + "rewards/margins": 16.37055015563965, + "rewards/rejected": -20.010826110839844, + "step": 5176 + }, + { + "epoch": 8.31, + "learning_rate": 4.2608006341656757e-08, + "logits/chosen": -1.487769365310669, + "logits/rejected": -1.4698734283447266, + "logps/chosen": -181.6748809814453, + "logps/rejected": -298.8360595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.310256958007812, + "rewards/margins": 11.734797477722168, + "rewards/rejected": -20.045055389404297, + "step": 5177 + }, + { + "epoch": 8.31, + "learning_rate": 4.2508917954815696e-08, + "logits/chosen": -1.4002796411514282, + "logits/rejected": -1.488922119140625, + "logps/chosen": -154.93101501464844, + "logps/rejected": -323.5036315917969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.533254623413086, + "rewards/margins": 16.19569206237793, + "rewards/rejected": -23.728946685791016, + "step": 5178 + }, + { + "epoch": 8.31, + "learning_rate": 4.2409829567974636e-08, + "logits/chosen": -1.5944528579711914, + "logits/rejected": -1.5497052669525146, + "logps/chosen": -182.45437622070312, + "logps/rejected": -300.54595947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.90088176727295, + "rewards/margins": 12.162219047546387, + "rewards/rejected": -22.063100814819336, + "step": 5179 + }, + { + "epoch": 8.31, + "learning_rate": 4.231074118113357e-08, + "logits/chosen": -1.4936680793762207, + "logits/rejected": -1.4964358806610107, + "logps/chosen": -163.2779083251953, + "logps/rejected": -322.61187744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.648446083068848, + "rewards/margins": 18.054241180419922, + "rewards/rejected": -24.702686309814453, + "step": 5180 + }, + { + "epoch": 8.32, + "learning_rate": 4.221165279429251e-08, + "logits/chosen": -1.5535444021224976, + "logits/rejected": -1.6234707832336426, + "logps/chosen": -137.41046142578125, + "logps/rejected": -299.76593017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.560539722442627, + "rewards/margins": 13.806644439697266, + "rewards/rejected": -19.367185592651367, + "step": 5181 + }, + { + "epoch": 8.32, + "learning_rate": 4.211256440745144e-08, + "logits/chosen": -1.6586534976959229, + "logits/rejected": -1.5753275156021118, + "logps/chosen": -153.56471252441406, + "logps/rejected": -281.11285400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.163983345031738, + "rewards/margins": 13.705907821655273, + "rewards/rejected": -19.869892120361328, + "step": 5182 + }, + { + "epoch": 8.32, + "learning_rate": 4.201347602061038e-08, + "logits/chosen": -1.3893486261367798, + "logits/rejected": -1.464874029159546, + "logps/chosen": -159.2215118408203, + "logps/rejected": -374.84893798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.057983875274658, + "rewards/margins": 19.59354591369629, + "rewards/rejected": -26.651531219482422, + "step": 5183 + }, + { + "epoch": 8.32, + "learning_rate": 4.1914387633769316e-08, + "logits/chosen": -1.585132122039795, + "logits/rejected": -1.6148730516433716, + "logps/chosen": -130.2586212158203, + "logps/rejected": -289.8560791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.059906959533691, + "rewards/margins": 14.284687995910645, + "rewards/rejected": -20.344594955444336, + "step": 5184 + }, + { + "epoch": 8.32, + "learning_rate": 4.181529924692826e-08, + "logits/chosen": -1.4038074016571045, + "logits/rejected": -1.4598780870437622, + "logps/chosen": -158.65277099609375, + "logps/rejected": -272.58880615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.650260925292969, + "rewards/margins": 11.788235664367676, + "rewards/rejected": -18.438495635986328, + "step": 5185 + }, + { + "epoch": 8.32, + "learning_rate": 4.1716210860087196e-08, + "logits/chosen": -1.4436628818511963, + "logits/rejected": -1.439208984375, + "logps/chosen": -214.38075256347656, + "logps/rejected": -352.32550048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.86561107635498, + "rewards/margins": 11.648797988891602, + "rewards/rejected": -22.5144100189209, + "step": 5186 + }, + { + "epoch": 8.33, + "learning_rate": 4.1617122473246136e-08, + "logits/chosen": -1.4136898517608643, + "logits/rejected": -1.4545224905014038, + "logps/chosen": -164.27230834960938, + "logps/rejected": -272.8527526855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.27429723739624, + "rewards/margins": 11.074076652526855, + "rewards/rejected": -18.348373413085938, + "step": 5187 + }, + { + "epoch": 8.33, + "learning_rate": 4.151803408640507e-08, + "logits/chosen": -1.3120441436767578, + "logits/rejected": -1.3818018436431885, + "logps/chosen": -177.75006103515625, + "logps/rejected": -314.7161865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.241519927978516, + "rewards/margins": 12.669726371765137, + "rewards/rejected": -21.91124725341797, + "step": 5188 + }, + { + "epoch": 8.33, + "learning_rate": 4.141894569956401e-08, + "logits/chosen": -1.6070737838745117, + "logits/rejected": -1.6179251670837402, + "logps/chosen": -136.48150634765625, + "logps/rejected": -287.1611633300781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.623805522918701, + "rewards/margins": 14.824835777282715, + "rewards/rejected": -20.44864273071289, + "step": 5189 + }, + { + "epoch": 8.33, + "learning_rate": 4.131985731272294e-08, + "logits/chosen": -1.5084950923919678, + "logits/rejected": -1.569200038909912, + "logps/chosen": -185.41343688964844, + "logps/rejected": -314.920166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.299433708190918, + "rewards/margins": 11.884430885314941, + "rewards/rejected": -22.18386459350586, + "step": 5190 + }, + { + "epoch": 8.33, + "learning_rate": 4.122076892588189e-08, + "logits/chosen": -1.5399200916290283, + "logits/rejected": -1.4711393117904663, + "logps/chosen": -148.44656372070312, + "logps/rejected": -298.0243835449219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.551476955413818, + "rewards/margins": 15.160928726196289, + "rewards/rejected": -22.712406158447266, + "step": 5191 + }, + { + "epoch": 8.33, + "learning_rate": 4.112168053904082e-08, + "logits/chosen": -1.6096912622451782, + "logits/rejected": -1.609188199043274, + "logps/chosen": -113.57937622070312, + "logps/rejected": -238.70111083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9879555702209473, + "rewards/margins": 12.71081256866455, + "rewards/rejected": -15.698768615722656, + "step": 5192 + }, + { + "epoch": 8.34, + "learning_rate": 4.102259215219976e-08, + "logits/chosen": -1.4353996515274048, + "logits/rejected": -1.361790657043457, + "logps/chosen": -160.63870239257812, + "logps/rejected": -293.07550048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.527153015136719, + "rewards/margins": 14.260472297668457, + "rewards/rejected": -21.78762435913086, + "step": 5193 + }, + { + "epoch": 8.34, + "learning_rate": 4.0923503765358695e-08, + "logits/chosen": -1.490644931793213, + "logits/rejected": -1.520757794380188, + "logps/chosen": -193.22459411621094, + "logps/rejected": -359.93585205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.841415405273438, + "rewards/margins": 17.33649444580078, + "rewards/rejected": -26.17790985107422, + "step": 5194 + }, + { + "epoch": 8.34, + "learning_rate": 4.0824415378517635e-08, + "logits/chosen": -1.372765064239502, + "logits/rejected": -1.4092589616775513, + "logps/chosen": -130.4632568359375, + "logps/rejected": -269.2786560058594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.908160209655762, + "rewards/margins": 13.3020601272583, + "rewards/rejected": -19.210220336914062, + "step": 5195 + }, + { + "epoch": 8.34, + "learning_rate": 4.072532699167657e-08, + "logits/chosen": -1.5817519426345825, + "logits/rejected": -1.6100218296051025, + "logps/chosen": -156.6029052734375, + "logps/rejected": -288.60137939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43353796005249, + "rewards/margins": 12.578824043273926, + "rewards/rejected": -19.012361526489258, + "step": 5196 + }, + { + "epoch": 8.34, + "learning_rate": 4.062623860483551e-08, + "logits/chosen": -1.5307211875915527, + "logits/rejected": -1.5063010454177856, + "logps/chosen": -127.19876098632812, + "logps/rejected": -265.962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5858564376831055, + "rewards/margins": 12.719900131225586, + "rewards/rejected": -18.305757522583008, + "step": 5197 + }, + { + "epoch": 8.34, + "learning_rate": 4.0527150217994455e-08, + "logits/chosen": -1.5902678966522217, + "logits/rejected": -1.6491656303405762, + "logps/chosen": -112.53759765625, + "logps/rejected": -257.5395202636719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9026384353637695, + "rewards/margins": 13.203434944152832, + "rewards/rejected": -18.1060733795166, + "step": 5198 + }, + { + "epoch": 8.35, + "learning_rate": 4.042806183115339e-08, + "logits/chosen": -1.4810292720794678, + "logits/rejected": -1.4441901445388794, + "logps/chosen": -132.93214416503906, + "logps/rejected": -309.232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.081235408782959, + "rewards/margins": 17.548236846923828, + "rewards/rejected": -22.629474639892578, + "step": 5199 + }, + { + "epoch": 8.35, + "learning_rate": 4.032897344431233e-08, + "logits/chosen": -1.5036870241165161, + "logits/rejected": -1.5365803241729736, + "logps/chosen": -87.29761505126953, + "logps/rejected": -251.39393615722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.52522349357605, + "rewards/margins": 15.190144538879395, + "rewards/rejected": -17.715368270874023, + "step": 5200 + }, + { + "epoch": 8.35, + "learning_rate": 4.022988505747126e-08, + "logits/chosen": -1.683577060699463, + "logits/rejected": -1.671195387840271, + "logps/chosen": -157.57785034179688, + "logps/rejected": -243.27049255371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.680592060089111, + "rewards/margins": 11.694573402404785, + "rewards/rejected": -18.375165939331055, + "step": 5201 + }, + { + "epoch": 8.35, + "learning_rate": 4.01307966706302e-08, + "logits/chosen": -1.4120746850967407, + "logits/rejected": -1.3659647703170776, + "logps/chosen": -213.24258422851562, + "logps/rejected": -306.2867431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.79353141784668, + "rewards/margins": 11.055852890014648, + "rewards/rejected": -21.849384307861328, + "step": 5202 + }, + { + "epoch": 8.35, + "learning_rate": 4.0031708283789135e-08, + "logits/chosen": -1.5959912538528442, + "logits/rejected": -1.588282585144043, + "logps/chosen": -122.85997009277344, + "logps/rejected": -242.13272094726562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.986435413360596, + "rewards/margins": 13.014169692993164, + "rewards/rejected": -18.0006046295166, + "step": 5203 + }, + { + "epoch": 8.35, + "learning_rate": 3.993261989694808e-08, + "logits/chosen": -1.5501956939697266, + "logits/rejected": -1.5993369817733765, + "logps/chosen": -177.4984130859375, + "logps/rejected": -285.6552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.829047679901123, + "rewards/margins": 11.893478393554688, + "rewards/rejected": -18.72252655029297, + "step": 5204 + }, + { + "epoch": 8.35, + "learning_rate": 3.9833531510107015e-08, + "logits/chosen": -1.4408024549484253, + "logits/rejected": -1.4746854305267334, + "logps/chosen": -157.671142578125, + "logps/rejected": -312.572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.414280414581299, + "rewards/margins": 15.366766929626465, + "rewards/rejected": -22.781047821044922, + "step": 5205 + }, + { + "epoch": 8.36, + "learning_rate": 3.9734443123265955e-08, + "logits/chosen": -1.3644756078720093, + "logits/rejected": -1.3172588348388672, + "logps/chosen": -189.35586547851562, + "logps/rejected": -288.980712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.214544296264648, + "rewards/margins": 11.953958511352539, + "rewards/rejected": -22.168502807617188, + "step": 5206 + }, + { + "epoch": 8.36, + "learning_rate": 3.963535473642489e-08, + "logits/chosen": -1.5451061725616455, + "logits/rejected": -1.696789264678955, + "logps/chosen": -109.16316986083984, + "logps/rejected": -319.6854248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.684279918670654, + "rewards/margins": 17.410396575927734, + "rewards/rejected": -23.094676971435547, + "step": 5207 + }, + { + "epoch": 8.36, + "learning_rate": 3.953626634958383e-08, + "logits/chosen": -1.2956593036651611, + "logits/rejected": -1.356959581375122, + "logps/chosen": -148.85525512695312, + "logps/rejected": -286.4496154785156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.627510070800781, + "rewards/margins": 14.392566680908203, + "rewards/rejected": -21.020076751708984, + "step": 5208 + }, + { + "epoch": 8.36, + "learning_rate": 3.943717796274276e-08, + "logits/chosen": -1.4594923257827759, + "logits/rejected": -1.4739028215408325, + "logps/chosen": -137.37973022460938, + "logps/rejected": -271.63177490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.977591037750244, + "rewards/margins": 13.095155715942383, + "rewards/rejected": -20.07274627685547, + "step": 5209 + }, + { + "epoch": 8.36, + "learning_rate": 3.93380895759017e-08, + "logits/chosen": -1.4284814596176147, + "logits/rejected": -1.4777988195419312, + "logps/chosen": -114.83776092529297, + "logps/rejected": -253.7913360595703, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.382091999053955, + "rewards/margins": 13.83060073852539, + "rewards/rejected": -19.21269416809082, + "step": 5210 + }, + { + "epoch": 8.36, + "learning_rate": 3.923900118906064e-08, + "logits/chosen": -1.4801216125488281, + "logits/rejected": -1.5413568019866943, + "logps/chosen": -183.3799285888672, + "logps/rejected": -318.33795166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.508111000061035, + "rewards/margins": 11.898266792297363, + "rewards/rejected": -21.406375885009766, + "step": 5211 + }, + { + "epoch": 8.37, + "learning_rate": 3.913991280221958e-08, + "logits/chosen": -1.5402439832687378, + "logits/rejected": -1.636942982673645, + "logps/chosen": -96.20692443847656, + "logps/rejected": -295.10992431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4697232246398926, + "rewards/margins": 17.992088317871094, + "rewards/rejected": -21.461811065673828, + "step": 5212 + }, + { + "epoch": 8.37, + "learning_rate": 3.9040824415378514e-08, + "logits/chosen": -1.30869460105896, + "logits/rejected": -1.3125211000442505, + "logps/chosen": -147.60020446777344, + "logps/rejected": -290.9935607910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.307347297668457, + "rewards/margins": 14.866090774536133, + "rewards/rejected": -21.173437118530273, + "step": 5213 + }, + { + "epoch": 8.37, + "learning_rate": 3.8941736028537454e-08, + "logits/chosen": -1.4778603315353394, + "logits/rejected": -1.4492323398590088, + "logps/chosen": -144.427001953125, + "logps/rejected": -292.8571472167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.690684795379639, + "rewards/margins": 14.892791748046875, + "rewards/rejected": -21.58347511291504, + "step": 5214 + }, + { + "epoch": 8.37, + "learning_rate": 3.884264764169639e-08, + "logits/chosen": -1.4486738443374634, + "logits/rejected": -1.498188853263855, + "logps/chosen": -135.227294921875, + "logps/rejected": -255.45899963378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.159071445465088, + "rewards/margins": 11.745023727416992, + "rewards/rejected": -17.904094696044922, + "step": 5215 + }, + { + "epoch": 8.37, + "learning_rate": 3.874355925485533e-08, + "logits/chosen": -1.436732530593872, + "logits/rejected": -1.416080355644226, + "logps/chosen": -199.09585571289062, + "logps/rejected": -333.44146728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.779031753540039, + "rewards/margins": 12.885775566101074, + "rewards/rejected": -23.66480827331543, + "step": 5216 + }, + { + "epoch": 8.37, + "learning_rate": 3.864447086801427e-08, + "logits/chosen": -1.3233386278152466, + "logits/rejected": -1.4388632774353027, + "logps/chosen": -156.7172088623047, + "logps/rejected": -257.047607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9866943359375, + "rewards/margins": 10.689627647399902, + "rewards/rejected": -18.67632293701172, + "step": 5217 + }, + { + "epoch": 8.38, + "learning_rate": 3.854538248117321e-08, + "logits/chosen": -1.6032655239105225, + "logits/rejected": -1.530480980873108, + "logps/chosen": -169.4097900390625, + "logps/rejected": -278.61834716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.414036750793457, + "rewards/margins": 12.499564170837402, + "rewards/rejected": -19.91360092163086, + "step": 5218 + }, + { + "epoch": 8.38, + "learning_rate": 3.844629409433214e-08, + "logits/chosen": -1.5775647163391113, + "logits/rejected": -1.504279613494873, + "logps/chosen": -204.453369140625, + "logps/rejected": -318.447021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.104912757873535, + "rewards/margins": 13.439745903015137, + "rewards/rejected": -22.544658660888672, + "step": 5219 + }, + { + "epoch": 8.38, + "learning_rate": 3.834720570749108e-08, + "logits/chosen": -1.4296634197235107, + "logits/rejected": -1.5080779790878296, + "logps/chosen": -141.64187622070312, + "logps/rejected": -315.785888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.224814414978027, + "rewards/margins": 14.674476623535156, + "rewards/rejected": -19.8992919921875, + "step": 5220 + }, + { + "epoch": 8.38, + "learning_rate": 3.824811732065002e-08, + "logits/chosen": -1.2795367240905762, + "logits/rejected": -1.3242545127868652, + "logps/chosen": -160.01315307617188, + "logps/rejected": -321.10931396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5537896156311035, + "rewards/margins": 15.468358993530273, + "rewards/rejected": -22.02215003967285, + "step": 5221 + }, + { + "epoch": 8.38, + "learning_rate": 3.8149028933808953e-08, + "logits/chosen": -1.5650579929351807, + "logits/rejected": -1.6196603775024414, + "logps/chosen": -147.55059814453125, + "logps/rejected": -303.05364990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.800615310668945, + "rewards/margins": 15.06912899017334, + "rewards/rejected": -20.86974334716797, + "step": 5222 + }, + { + "epoch": 8.38, + "learning_rate": 3.8049940546967893e-08, + "logits/chosen": -1.5150845050811768, + "logits/rejected": -1.5031527280807495, + "logps/chosen": -232.778076171875, + "logps/rejected": -347.3345642089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.210617065429688, + "rewards/margins": 11.124911308288574, + "rewards/rejected": -22.335529327392578, + "step": 5223 + }, + { + "epoch": 8.39, + "learning_rate": 3.795085216012683e-08, + "logits/chosen": -1.5516533851623535, + "logits/rejected": -1.446041226387024, + "logps/chosen": -223.13400268554688, + "logps/rejected": -328.33770751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.57532024383545, + "rewards/margins": 12.697330474853516, + "rewards/rejected": -23.27265167236328, + "step": 5224 + }, + { + "epoch": 8.39, + "learning_rate": 3.785176377328577e-08, + "logits/chosen": -1.507722020149231, + "logits/rejected": -1.5008347034454346, + "logps/chosen": -156.37905883789062, + "logps/rejected": -323.9336242675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.938154220581055, + "rewards/margins": 15.982820510864258, + "rewards/rejected": -23.920974731445312, + "step": 5225 + }, + { + "epoch": 8.39, + "learning_rate": 3.7752675386444706e-08, + "logits/chosen": -1.4427845478057861, + "logits/rejected": -1.4726704359054565, + "logps/chosen": -140.3387908935547, + "logps/rejected": -258.430419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.452826023101807, + "rewards/margins": 11.602678298950195, + "rewards/rejected": -18.055503845214844, + "step": 5226 + }, + { + "epoch": 8.39, + "learning_rate": 3.7653586999603646e-08, + "logits/chosen": -1.403045415878296, + "logits/rejected": -1.4540843963623047, + "logps/chosen": -174.86976623535156, + "logps/rejected": -274.11212158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.453418731689453, + "rewards/margins": 11.237517356872559, + "rewards/rejected": -19.690937042236328, + "step": 5227 + }, + { + "epoch": 8.39, + "learning_rate": 3.755449861276258e-08, + "logits/chosen": -1.4833954572677612, + "logits/rejected": -1.4912006855010986, + "logps/chosen": -171.1744384765625, + "logps/rejected": -312.7405700683594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.513127326965332, + "rewards/margins": 14.481616020202637, + "rewards/rejected": -20.99474334716797, + "step": 5228 + }, + { + "epoch": 8.39, + "learning_rate": 3.745541022592152e-08, + "logits/chosen": -1.4925827980041504, + "logits/rejected": -1.5075308084487915, + "logps/chosen": -131.9743194580078, + "logps/rejected": -295.12353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229432106018066, + "rewards/margins": 15.406041145324707, + "rewards/rejected": -21.635473251342773, + "step": 5229 + }, + { + "epoch": 8.39, + "learning_rate": 3.735632183908046e-08, + "logits/chosen": -1.4781956672668457, + "logits/rejected": -1.4534273147583008, + "logps/chosen": -169.12677001953125, + "logps/rejected": -317.49493408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.609350681304932, + "rewards/margins": 16.122787475585938, + "rewards/rejected": -23.73213768005371, + "step": 5230 + }, + { + "epoch": 8.4, + "learning_rate": 3.72572334522394e-08, + "logits/chosen": -1.388777256011963, + "logits/rejected": -1.482797622680664, + "logps/chosen": -160.55197143554688, + "logps/rejected": -289.3282775878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.502782821655273, + "rewards/margins": 10.882402420043945, + "rewards/rejected": -19.38518524169922, + "step": 5231 + }, + { + "epoch": 8.4, + "learning_rate": 3.715814506539833e-08, + "logits/chosen": -1.4367626905441284, + "logits/rejected": -1.4412535429000854, + "logps/chosen": -158.38580322265625, + "logps/rejected": -295.0966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.823751449584961, + "rewards/margins": 14.007349014282227, + "rewards/rejected": -20.831098556518555, + "step": 5232 + }, + { + "epoch": 8.4, + "learning_rate": 3.705905667855727e-08, + "logits/chosen": -1.4811846017837524, + "logits/rejected": -1.4702043533325195, + "logps/chosen": -173.4826202392578, + "logps/rejected": -348.9783020019531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.265663146972656, + "rewards/margins": 17.80527687072754, + "rewards/rejected": -27.070938110351562, + "step": 5233 + }, + { + "epoch": 8.4, + "learning_rate": 3.6959968291716206e-08, + "logits/chosen": -1.5183436870574951, + "logits/rejected": -1.5205351114273071, + "logps/chosen": -151.37306213378906, + "logps/rejected": -301.3514099121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.086937427520752, + "rewards/margins": 14.309895515441895, + "rewards/rejected": -20.396833419799805, + "step": 5234 + }, + { + "epoch": 8.4, + "learning_rate": 3.6860879904875146e-08, + "logits/chosen": -1.325316071510315, + "logits/rejected": -1.3548123836517334, + "logps/chosen": -159.5791015625, + "logps/rejected": -320.996337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.832362174987793, + "rewards/margins": 14.226916313171387, + "rewards/rejected": -22.05927848815918, + "step": 5235 + }, + { + "epoch": 8.4, + "learning_rate": 3.6761791518034086e-08, + "logits/chosen": -1.717349648475647, + "logits/rejected": -1.6162256002426147, + "logps/chosen": -133.58148193359375, + "logps/rejected": -262.2670593261719, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.764529228210449, + "rewards/margins": 13.215309143066406, + "rewards/rejected": -18.979839324951172, + "step": 5236 + }, + { + "epoch": 8.41, + "learning_rate": 3.6662703131193026e-08, + "logits/chosen": -1.328948736190796, + "logits/rejected": -1.4306353330612183, + "logps/chosen": -158.95480346679688, + "logps/rejected": -338.68853759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.287768840789795, + "rewards/margins": 15.488578796386719, + "rewards/rejected": -22.776350021362305, + "step": 5237 + }, + { + "epoch": 8.41, + "learning_rate": 3.656361474435196e-08, + "logits/chosen": -1.5069340467453003, + "logits/rejected": -1.530819058418274, + "logps/chosen": -179.6285400390625, + "logps/rejected": -341.1346740722656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.034302711486816, + "rewards/margins": 15.756423950195312, + "rewards/rejected": -24.790727615356445, + "step": 5238 + }, + { + "epoch": 8.41, + "learning_rate": 3.64645263575109e-08, + "logits/chosen": -1.5345032215118408, + "logits/rejected": -1.497902512550354, + "logps/chosen": -154.32662963867188, + "logps/rejected": -258.1642761230469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.487472057342529, + "rewards/margins": 11.348966598510742, + "rewards/rejected": -18.83643913269043, + "step": 5239 + }, + { + "epoch": 8.41, + "learning_rate": 3.636543797066983e-08, + "logits/chosen": -1.4818075895309448, + "logits/rejected": -1.4600462913513184, + "logps/chosen": -124.35047912597656, + "logps/rejected": -310.7143249511719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.721432209014893, + "rewards/margins": 18.107807159423828, + "rewards/rejected": -22.829238891601562, + "step": 5240 + }, + { + "epoch": 8.41, + "learning_rate": 3.626634958382877e-08, + "logits/chosen": -1.588261604309082, + "logits/rejected": -1.6189210414886475, + "logps/chosen": -164.98509216308594, + "logps/rejected": -327.1116027832031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.240730285644531, + "rewards/margins": 13.817481994628906, + "rewards/rejected": -22.058212280273438, + "step": 5241 + }, + { + "epoch": 8.41, + "learning_rate": 3.6167261196987705e-08, + "logits/chosen": -1.480536937713623, + "logits/rejected": -1.5416618585586548, + "logps/chosen": -165.72998046875, + "logps/rejected": -317.8761901855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.31076717376709, + "rewards/margins": 12.711857795715332, + "rewards/rejected": -21.022624969482422, + "step": 5242 + }, + { + "epoch": 8.42, + "learning_rate": 3.606817281014665e-08, + "logits/chosen": -1.3673644065856934, + "logits/rejected": -1.415358304977417, + "logps/chosen": -181.00172424316406, + "logps/rejected": -332.39764404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.423822402954102, + "rewards/margins": 13.488641738891602, + "rewards/rejected": -22.912464141845703, + "step": 5243 + }, + { + "epoch": 8.42, + "learning_rate": 3.596908442330559e-08, + "logits/chosen": -1.4683160781860352, + "logits/rejected": -1.4639520645141602, + "logps/chosen": -166.43063354492188, + "logps/rejected": -284.51336669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.344657897949219, + "rewards/margins": 11.52599811553955, + "rewards/rejected": -20.870655059814453, + "step": 5244 + }, + { + "epoch": 8.42, + "learning_rate": 3.5869996036464525e-08, + "logits/chosen": -1.4296300411224365, + "logits/rejected": -1.4758840799331665, + "logps/chosen": -174.25518798828125, + "logps/rejected": -337.86871337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.002779006958008, + "rewards/margins": 15.148078918457031, + "rewards/rejected": -24.15085792541504, + "step": 5245 + }, + { + "epoch": 8.42, + "learning_rate": 3.5770907649623465e-08, + "logits/chosen": -1.2787925004959106, + "logits/rejected": -1.3471494913101196, + "logps/chosen": -159.3515625, + "logps/rejected": -325.78497314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.208935737609863, + "rewards/margins": 14.292337417602539, + "rewards/rejected": -22.50127410888672, + "step": 5246 + }, + { + "epoch": 8.42, + "learning_rate": 3.56718192627824e-08, + "logits/chosen": -1.6844663619995117, + "logits/rejected": -1.6411373615264893, + "logps/chosen": -178.90060424804688, + "logps/rejected": -287.5382080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.559731483459473, + "rewards/margins": 11.933184623718262, + "rewards/rejected": -19.4929141998291, + "step": 5247 + }, + { + "epoch": 8.42, + "learning_rate": 3.557273087594134e-08, + "logits/chosen": -1.5073035955429077, + "logits/rejected": -1.533237338066101, + "logps/chosen": -120.0220718383789, + "logps/rejected": -252.7034454345703, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.639236927032471, + "rewards/margins": 13.25374698638916, + "rewards/rejected": -17.892982482910156, + "step": 5248 + }, + { + "epoch": 8.43, + "learning_rate": 3.547364248910028e-08, + "logits/chosen": -1.5172070264816284, + "logits/rejected": -1.6212756633758545, + "logps/chosen": -183.94058227539062, + "logps/rejected": -301.8458251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.061911582946777, + "rewards/margins": 12.137192726135254, + "rewards/rejected": -21.19910430908203, + "step": 5249 + }, + { + "epoch": 8.43, + "learning_rate": 3.537455410225922e-08, + "logits/chosen": -1.6743720769882202, + "logits/rejected": -1.678865671157837, + "logps/chosen": -144.3065643310547, + "logps/rejected": -300.917724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.954261779785156, + "rewards/margins": 16.9066219329834, + "rewards/rejected": -22.860883712768555, + "step": 5250 + }, + { + "epoch": 8.43, + "learning_rate": 3.527546571541815e-08, + "logits/chosen": -1.504973292350769, + "logits/rejected": -1.5569875240325928, + "logps/chosen": -188.63986206054688, + "logps/rejected": -363.56304931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.361087799072266, + "rewards/margins": 16.084028244018555, + "rewards/rejected": -25.44511604309082, + "step": 5251 + }, + { + "epoch": 8.43, + "learning_rate": 3.517637732857709e-08, + "logits/chosen": -1.4265438318252563, + "logits/rejected": -1.4330487251281738, + "logps/chosen": -183.1820831298828, + "logps/rejected": -317.7728271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.840529441833496, + "rewards/margins": 12.590977668762207, + "rewards/rejected": -21.431507110595703, + "step": 5252 + }, + { + "epoch": 8.43, + "learning_rate": 3.5077288941736025e-08, + "logits/chosen": -1.5267068147659302, + "logits/rejected": -1.5230908393859863, + "logps/chosen": -144.85147094726562, + "logps/rejected": -308.7808837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.223055839538574, + "rewards/margins": 17.005367279052734, + "rewards/rejected": -23.228424072265625, + "step": 5253 + }, + { + "epoch": 8.43, + "learning_rate": 3.4978200554894965e-08, + "logits/chosen": -1.3757261037826538, + "logits/rejected": -1.4029710292816162, + "logps/chosen": -180.6495819091797, + "logps/rejected": -289.6479797363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.855740547180176, + "rewards/margins": 12.342764854431152, + "rewards/rejected": -20.198505401611328, + "step": 5254 + }, + { + "epoch": 8.43, + "learning_rate": 3.48791121680539e-08, + "logits/chosen": -1.4965698719024658, + "logits/rejected": -1.4330377578735352, + "logps/chosen": -142.36761474609375, + "logps/rejected": -278.189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.528367519378662, + "rewards/margins": 13.754584312438965, + "rewards/rejected": -20.28295135498047, + "step": 5255 + }, + { + "epoch": 8.44, + "learning_rate": 3.4780023781212844e-08, + "logits/chosen": -1.4552291631698608, + "logits/rejected": -1.459513783454895, + "logps/chosen": -155.85476684570312, + "logps/rejected": -309.05364990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.420252799987793, + "rewards/margins": 15.126474380493164, + "rewards/rejected": -23.546728134155273, + "step": 5256 + }, + { + "epoch": 8.44, + "learning_rate": 3.468093539437178e-08, + "logits/chosen": -1.7350515127182007, + "logits/rejected": -1.7191799879074097, + "logps/chosen": -145.90423583984375, + "logps/rejected": -255.39877319335938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994962692260742, + "rewards/margins": 11.831372261047363, + "rewards/rejected": -17.826335906982422, + "step": 5257 + }, + { + "epoch": 8.44, + "learning_rate": 3.458184700753072e-08, + "logits/chosen": -1.405827522277832, + "logits/rejected": -1.4365687370300293, + "logps/chosen": -158.16921997070312, + "logps/rejected": -334.7548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.476980209350586, + "rewards/margins": 15.617199897766113, + "rewards/rejected": -24.094181060791016, + "step": 5258 + }, + { + "epoch": 8.44, + "learning_rate": 3.448275862068965e-08, + "logits/chosen": -1.6482388973236084, + "logits/rejected": -1.679917335510254, + "logps/chosen": -123.4623031616211, + "logps/rejected": -263.8641357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.875180244445801, + "rewards/margins": 13.310525894165039, + "rewards/rejected": -18.185705184936523, + "step": 5259 + }, + { + "epoch": 8.44, + "learning_rate": 3.438367023384859e-08, + "logits/chosen": -1.7168251276016235, + "logits/rejected": -1.6762628555297852, + "logps/chosen": -130.94448852539062, + "logps/rejected": -288.4057922363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0485944747924805, + "rewards/margins": 16.81787872314453, + "rewards/rejected": -21.866472244262695, + "step": 5260 + }, + { + "epoch": 8.44, + "learning_rate": 3.4284581847007524e-08, + "logits/chosen": -1.555330753326416, + "logits/rejected": -1.4921875, + "logps/chosen": -203.64268493652344, + "logps/rejected": -363.66339111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.730734825134277, + "rewards/margins": 16.88429832458496, + "rewards/rejected": -24.615032196044922, + "step": 5261 + }, + { + "epoch": 8.45, + "learning_rate": 3.418549346016647e-08, + "logits/chosen": -1.3942115306854248, + "logits/rejected": -1.43662691116333, + "logps/chosen": -170.955078125, + "logps/rejected": -331.31463623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.326119422912598, + "rewards/margins": 14.603921890258789, + "rewards/rejected": -22.930042266845703, + "step": 5262 + }, + { + "epoch": 8.45, + "learning_rate": 3.4086405073325404e-08, + "logits/chosen": -1.3293402194976807, + "logits/rejected": -1.2608534097671509, + "logps/chosen": -183.66546630859375, + "logps/rejected": -289.68023681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.536821365356445, + "rewards/margins": 11.918441772460938, + "rewards/rejected": -20.455263137817383, + "step": 5263 + }, + { + "epoch": 8.45, + "learning_rate": 3.3987316686484344e-08, + "logits/chosen": -1.2768402099609375, + "logits/rejected": -1.2991749048233032, + "logps/chosen": -174.94752502441406, + "logps/rejected": -271.28680419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8092427253723145, + "rewards/margins": 11.630203247070312, + "rewards/rejected": -19.4394474029541, + "step": 5264 + }, + { + "epoch": 8.45, + "learning_rate": 3.388822829964328e-08, + "logits/chosen": -1.3512332439422607, + "logits/rejected": -1.4244446754455566, + "logps/chosen": -159.18727111816406, + "logps/rejected": -303.037841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.419425964355469, + "rewards/margins": 13.911615371704102, + "rewards/rejected": -23.331039428710938, + "step": 5265 + }, + { + "epoch": 8.45, + "learning_rate": 3.378913991280222e-08, + "logits/chosen": -1.4186686277389526, + "logits/rejected": -1.464728832244873, + "logps/chosen": -144.76004028320312, + "logps/rejected": -292.1171569824219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.278541564941406, + "rewards/margins": 13.1346435546875, + "rewards/rejected": -20.413185119628906, + "step": 5266 + }, + { + "epoch": 8.45, + "learning_rate": 3.369005152596116e-08, + "logits/chosen": -1.587506651878357, + "logits/rejected": -1.6604866981506348, + "logps/chosen": -137.87789916992188, + "logps/rejected": -276.397216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.571342945098877, + "rewards/margins": 15.248785972595215, + "rewards/rejected": -19.82012939453125, + "step": 5267 + }, + { + "epoch": 8.46, + "learning_rate": 3.359096313912009e-08, + "logits/chosen": -1.2956652641296387, + "logits/rejected": -1.3072736263275146, + "logps/chosen": -118.21259307861328, + "logps/rejected": -226.0149688720703, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.594743728637695, + "rewards/margins": 9.97244930267334, + "rewards/rejected": -16.56719398498535, + "step": 5268 + }, + { + "epoch": 8.46, + "learning_rate": 3.349187475227904e-08, + "logits/chosen": -1.4776756763458252, + "logits/rejected": -1.4153850078582764, + "logps/chosen": -142.50091552734375, + "logps/rejected": -241.03480529785156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.651760101318359, + "rewards/margins": 11.095256805419922, + "rewards/rejected": -17.74701499938965, + "step": 5269 + }, + { + "epoch": 8.46, + "learning_rate": 3.339278636543797e-08, + "logits/chosen": -1.37542724609375, + "logits/rejected": -1.3787496089935303, + "logps/chosen": -137.85227966308594, + "logps/rejected": -252.88967895507812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.452975749969482, + "rewards/margins": 11.205835342407227, + "rewards/rejected": -17.658811569213867, + "step": 5270 + }, + { + "epoch": 8.46, + "learning_rate": 3.329369797859691e-08, + "logits/chosen": -1.4245548248291016, + "logits/rejected": -1.5212044715881348, + "logps/chosen": -155.41943359375, + "logps/rejected": -264.7571105957031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.742001533508301, + "rewards/margins": 10.198533058166504, + "rewards/rejected": -17.940536499023438, + "step": 5271 + }, + { + "epoch": 8.46, + "learning_rate": 3.319460959175584e-08, + "logits/chosen": -1.5715467929840088, + "logits/rejected": -1.5951907634735107, + "logps/chosen": -164.9494171142578, + "logps/rejected": -285.21099853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.394432544708252, + "rewards/margins": 12.578388214111328, + "rewards/rejected": -18.972820281982422, + "step": 5272 + }, + { + "epoch": 8.46, + "learning_rate": 3.309552120491478e-08, + "logits/chosen": -1.4774184226989746, + "logits/rejected": -1.4651622772216797, + "logps/chosen": -194.15777587890625, + "logps/rejected": -325.71368408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.182168006896973, + "rewards/margins": 14.27343463897705, + "rewards/rejected": -24.455602645874023, + "step": 5273 + }, + { + "epoch": 8.47, + "learning_rate": 3.2996432818073717e-08, + "logits/chosen": -1.4617109298706055, + "logits/rejected": -1.5035383701324463, + "logps/chosen": -187.14645385742188, + "logps/rejected": -329.8739318847656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.873849868774414, + "rewards/margins": 12.725637435913086, + "rewards/rejected": -22.5994873046875, + "step": 5274 + }, + { + "epoch": 8.47, + "learning_rate": 3.289734443123266e-08, + "logits/chosen": -1.5149080753326416, + "logits/rejected": -1.4508497714996338, + "logps/chosen": -177.79336547851562, + "logps/rejected": -340.61419677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.437387466430664, + "rewards/margins": 17.84250259399414, + "rewards/rejected": -26.279890060424805, + "step": 5275 + }, + { + "epoch": 8.47, + "learning_rate": 3.2798256044391596e-08, + "logits/chosen": -1.4497653245925903, + "logits/rejected": -1.4628615379333496, + "logps/chosen": -231.46063232421875, + "logps/rejected": -344.92242431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.06665325164795, + "rewards/margins": 12.750734329223633, + "rewards/rejected": -25.8173885345459, + "step": 5276 + }, + { + "epoch": 8.47, + "learning_rate": 3.2699167657550536e-08, + "logits/chosen": -1.531959056854248, + "logits/rejected": -1.504407525062561, + "logps/chosen": -97.27474212646484, + "logps/rejected": -215.31446838378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.338731527328491, + "rewards/margins": 13.836234092712402, + "rewards/rejected": -16.174964904785156, + "step": 5277 + }, + { + "epoch": 8.47, + "learning_rate": 3.260007927070947e-08, + "logits/chosen": -1.7689625024795532, + "logits/rejected": -1.8111995458602905, + "logps/chosen": -153.70382690429688, + "logps/rejected": -320.62530517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.893176555633545, + "rewards/margins": 14.239767074584961, + "rewards/rejected": -21.132944107055664, + "step": 5278 + }, + { + "epoch": 8.47, + "learning_rate": 3.250099088386841e-08, + "logits/chosen": -1.5055395364761353, + "logits/rejected": -1.4872915744781494, + "logps/chosen": -170.01864624023438, + "logps/rejected": -283.79266357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.440652847290039, + "rewards/margins": 12.669122695922852, + "rewards/rejected": -21.10977554321289, + "step": 5279 + }, + { + "epoch": 8.48, + "learning_rate": 3.240190249702734e-08, + "logits/chosen": -1.5183337926864624, + "logits/rejected": -1.5557817220687866, + "logps/chosen": -146.268310546875, + "logps/rejected": -281.91796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.824717998504639, + "rewards/margins": 13.495906829833984, + "rewards/rejected": -20.32062530517578, + "step": 5280 + }, + { + "epoch": 8.48, + "learning_rate": 3.230281411018628e-08, + "logits/chosen": -1.5488252639770508, + "logits/rejected": -1.5374385118484497, + "logps/chosen": -162.37452697753906, + "logps/rejected": -323.90362548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.974320888519287, + "rewards/margins": 16.99317741394043, + "rewards/rejected": -22.967498779296875, + "step": 5281 + }, + { + "epoch": 8.48, + "learning_rate": 3.220372572334522e-08, + "logits/chosen": -1.3706458806991577, + "logits/rejected": -1.3929518461227417, + "logps/chosen": -133.4015350341797, + "logps/rejected": -311.0599365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.546018600463867, + "rewards/margins": 16.59040069580078, + "rewards/rejected": -22.13641929626465, + "step": 5282 + }, + { + "epoch": 8.48, + "learning_rate": 3.210463733650416e-08, + "logits/chosen": -1.4392982721328735, + "logits/rejected": -1.6263833045959473, + "logps/chosen": -125.22857666015625, + "logps/rejected": -303.8836669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.116809844970703, + "rewards/margins": 13.589061737060547, + "rewards/rejected": -18.70587158203125, + "step": 5283 + }, + { + "epoch": 8.48, + "learning_rate": 3.2005548949663096e-08, + "logits/chosen": -1.613411784172058, + "logits/rejected": -1.6499463319778442, + "logps/chosen": -151.08807373046875, + "logps/rejected": -328.4481506347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.604355335235596, + "rewards/margins": 15.575393676757812, + "rewards/rejected": -23.179750442504883, + "step": 5284 + }, + { + "epoch": 8.48, + "learning_rate": 3.1906460562822036e-08, + "logits/chosen": -1.5024439096450806, + "logits/rejected": -1.5183054208755493, + "logps/chosen": -153.01881408691406, + "logps/rejected": -286.07354736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.479766845703125, + "rewards/margins": 12.720052719116211, + "rewards/rejected": -19.199819564819336, + "step": 5285 + }, + { + "epoch": 8.48, + "learning_rate": 3.180737217598097e-08, + "logits/chosen": -1.2655105590820312, + "logits/rejected": -1.3786059617996216, + "logps/chosen": -126.8326187133789, + "logps/rejected": -318.4454650878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.387579441070557, + "rewards/margins": 15.695318222045898, + "rewards/rejected": -21.082897186279297, + "step": 5286 + }, + { + "epoch": 8.49, + "learning_rate": 3.170828378913991e-08, + "logits/chosen": -1.4749244451522827, + "logits/rejected": -1.4553438425064087, + "logps/chosen": -139.03369140625, + "logps/rejected": -301.682373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.121720790863037, + "rewards/margins": 17.284679412841797, + "rewards/rejected": -22.406400680541992, + "step": 5287 + }, + { + "epoch": 8.49, + "learning_rate": 3.1609195402298855e-08, + "logits/chosen": -1.3963615894317627, + "logits/rejected": -1.3579996824264526, + "logps/chosen": -228.40176391601562, + "logps/rejected": -327.9131164550781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.890525817871094, + "rewards/margins": 11.006903648376465, + "rewards/rejected": -22.897428512573242, + "step": 5288 + }, + { + "epoch": 8.49, + "learning_rate": 3.151010701545779e-08, + "logits/chosen": -1.4673823118209839, + "logits/rejected": -1.4762057065963745, + "logps/chosen": -182.8981170654297, + "logps/rejected": -333.2408447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.263812065124512, + "rewards/margins": 15.405882835388184, + "rewards/rejected": -25.669696807861328, + "step": 5289 + }, + { + "epoch": 8.49, + "learning_rate": 3.141101862861673e-08, + "logits/chosen": -1.57244873046875, + "logits/rejected": -1.4905040264129639, + "logps/chosen": -174.80377197265625, + "logps/rejected": -312.9595642089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.74770975112915, + "rewards/margins": 14.778083801269531, + "rewards/rejected": -22.525793075561523, + "step": 5290 + }, + { + "epoch": 8.49, + "learning_rate": 3.131193024177566e-08, + "logits/chosen": -1.436783790588379, + "logits/rejected": -1.454984188079834, + "logps/chosen": -160.1622314453125, + "logps/rejected": -280.0646057128906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.099636077880859, + "rewards/margins": 11.83138656616211, + "rewards/rejected": -18.93102264404297, + "step": 5291 + }, + { + "epoch": 8.49, + "learning_rate": 3.12128418549346e-08, + "logits/chosen": -1.4559252262115479, + "logits/rejected": -1.4469587802886963, + "logps/chosen": -155.05364990234375, + "logps/rejected": -275.8875732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.305416107177734, + "rewards/margins": 11.933229446411133, + "rewards/rejected": -18.238643646240234, + "step": 5292 + }, + { + "epoch": 8.5, + "learning_rate": 3.111375346809354e-08, + "logits/chosen": -1.2877352237701416, + "logits/rejected": -1.3325095176696777, + "logps/chosen": -176.763671875, + "logps/rejected": -346.1600036621094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.831888198852539, + "rewards/margins": 13.47097396850586, + "rewards/rejected": -23.3028621673584, + "step": 5293 + }, + { + "epoch": 8.5, + "learning_rate": 3.1014665081252475e-08, + "logits/chosen": -1.5829613208770752, + "logits/rejected": -1.5854076147079468, + "logps/chosen": -156.02044677734375, + "logps/rejected": -295.3919372558594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.817516803741455, + "rewards/margins": 14.865289688110352, + "rewards/rejected": -22.68280792236328, + "step": 5294 + }, + { + "epoch": 8.5, + "learning_rate": 3.0915576694411415e-08, + "logits/chosen": -1.5560623407363892, + "logits/rejected": -1.5792150497436523, + "logps/chosen": -147.4964599609375, + "logps/rejected": -321.537353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.483662128448486, + "rewards/margins": 15.627803802490234, + "rewards/rejected": -23.111465454101562, + "step": 5295 + }, + { + "epoch": 8.5, + "learning_rate": 3.0816488307570355e-08, + "logits/chosen": -1.3383984565734863, + "logits/rejected": -1.34153413772583, + "logps/chosen": -159.6108856201172, + "logps/rejected": -310.8880310058594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9567036628723145, + "rewards/margins": 14.363517761230469, + "rewards/rejected": -22.320220947265625, + "step": 5296 + }, + { + "epoch": 8.5, + "learning_rate": 3.071739992072929e-08, + "logits/chosen": -1.5884387493133545, + "logits/rejected": -1.530075192451477, + "logps/chosen": -133.04588317871094, + "logps/rejected": -246.40078735351562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2665300369262695, + "rewards/margins": 12.690102577209473, + "rewards/rejected": -15.956633567810059, + "step": 5297 + }, + { + "epoch": 8.5, + "learning_rate": 3.061831153388823e-08, + "logits/chosen": -1.4160763025283813, + "logits/rejected": -1.3645884990692139, + "logps/chosen": -183.54364013671875, + "logps/rejected": -317.9031066894531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.60521411895752, + "rewards/margins": 13.707200050354004, + "rewards/rejected": -22.312414169311523, + "step": 5298 + }, + { + "epoch": 8.51, + "learning_rate": 3.051922314704717e-08, + "logits/chosen": -1.4247241020202637, + "logits/rejected": -1.4499180316925049, + "logps/chosen": -185.49659729003906, + "logps/rejected": -323.62738037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.65927791595459, + "rewards/margins": 13.500141143798828, + "rewards/rejected": -24.1594181060791, + "step": 5299 + }, + { + "epoch": 8.51, + "learning_rate": 3.04201347602061e-08, + "logits/chosen": -1.3241069316864014, + "logits/rejected": -1.3838847875595093, + "logps/chosen": -137.77255249023438, + "logps/rejected": -239.1744842529297, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.953025817871094, + "rewards/margins": 10.195388793945312, + "rewards/rejected": -16.148414611816406, + "step": 5300 + }, + { + "epoch": 8.51, + "learning_rate": 3.032104637336504e-08, + "logits/chosen": -1.2676341533660889, + "logits/rejected": -1.3638372421264648, + "logps/chosen": -201.07879638671875, + "logps/rejected": -357.8717041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.245229721069336, + "rewards/margins": 13.346054077148438, + "rewards/rejected": -25.59128189086914, + "step": 5301 + }, + { + "epoch": 8.51, + "learning_rate": 3.0221957986523975e-08, + "logits/chosen": -1.5287601947784424, + "logits/rejected": -1.3903735876083374, + "logps/chosen": -148.32960510253906, + "logps/rejected": -283.9595947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2362060546875, + "rewards/margins": 15.048372268676758, + "rewards/rejected": -22.284578323364258, + "step": 5302 + }, + { + "epoch": 8.51, + "learning_rate": 3.0122869599682914e-08, + "logits/chosen": -1.5863018035888672, + "logits/rejected": -1.4737548828125, + "logps/chosen": -142.0721435546875, + "logps/rejected": -280.8223571777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.60758113861084, + "rewards/margins": 14.054903030395508, + "rewards/rejected": -18.662485122680664, + "step": 5303 + }, + { + "epoch": 8.51, + "learning_rate": 3.0023781212841854e-08, + "logits/chosen": -1.569753646850586, + "logits/rejected": -1.6052526235580444, + "logps/chosen": -142.28759765625, + "logps/rejected": -282.66461181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.251734733581543, + "rewards/margins": 12.034758567810059, + "rewards/rejected": -18.2864933013916, + "step": 5304 + }, + { + "epoch": 8.52, + "learning_rate": 2.992469282600079e-08, + "logits/chosen": -1.4601396322250366, + "logits/rejected": -1.498223900794983, + "logps/chosen": -209.66970825195312, + "logps/rejected": -355.26446533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.724623680114746, + "rewards/margins": 15.446722030639648, + "rewards/rejected": -25.171348571777344, + "step": 5305 + }, + { + "epoch": 8.52, + "learning_rate": 2.982560443915973e-08, + "logits/chosen": -1.4334731101989746, + "logits/rejected": -1.340925931930542, + "logps/chosen": -187.26287841796875, + "logps/rejected": -301.74932861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.62242317199707, + "rewards/margins": 13.84769058227539, + "rewards/rejected": -21.470111846923828, + "step": 5306 + }, + { + "epoch": 8.52, + "learning_rate": 2.9726516052318668e-08, + "logits/chosen": -1.4139223098754883, + "logits/rejected": -1.3967254161834717, + "logps/chosen": -172.67572021484375, + "logps/rejected": -301.9642333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.542247772216797, + "rewards/margins": 14.454609870910645, + "rewards/rejected": -21.996858596801758, + "step": 5307 + }, + { + "epoch": 8.52, + "learning_rate": 2.9627427665477604e-08, + "logits/chosen": -1.257135033607483, + "logits/rejected": -1.3382675647735596, + "logps/chosen": -200.52813720703125, + "logps/rejected": -341.8114013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.910919189453125, + "rewards/margins": 14.416033744812012, + "rewards/rejected": -25.326953887939453, + "step": 5308 + }, + { + "epoch": 8.52, + "learning_rate": 2.9528339278636544e-08, + "logits/chosen": -1.5355281829833984, + "logits/rejected": -1.5197689533233643, + "logps/chosen": -184.58172607421875, + "logps/rejected": -320.15869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.406135559082031, + "rewards/margins": 13.743247985839844, + "rewards/rejected": -22.149381637573242, + "step": 5309 + }, + { + "epoch": 8.52, + "learning_rate": 2.942925089179548e-08, + "logits/chosen": -1.5456422567367554, + "logits/rejected": -1.4861034154891968, + "logps/chosen": -192.066650390625, + "logps/rejected": -303.46527099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.090340614318848, + "rewards/margins": 11.113665580749512, + "rewards/rejected": -20.20400619506836, + "step": 5310 + }, + { + "epoch": 8.52, + "learning_rate": 2.9330162504954417e-08, + "logits/chosen": -1.2441223859786987, + "logits/rejected": -1.3233139514923096, + "logps/chosen": -124.32218170166016, + "logps/rejected": -288.00579833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.10972785949707, + "rewards/margins": 14.956622123718262, + "rewards/rejected": -22.066349029541016, + "step": 5311 + }, + { + "epoch": 8.53, + "learning_rate": 2.9231074118113357e-08, + "logits/chosen": -1.609637975692749, + "logits/rejected": -1.5811042785644531, + "logps/chosen": -161.30661010742188, + "logps/rejected": -304.3509521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.190261840820312, + "rewards/margins": 14.63977336883545, + "rewards/rejected": -22.830036163330078, + "step": 5312 + }, + { + "epoch": 8.53, + "learning_rate": 2.9131985731272294e-08, + "logits/chosen": -1.5359563827514648, + "logits/rejected": -1.4793047904968262, + "logps/chosen": -143.94017028808594, + "logps/rejected": -246.0155029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.811089992523193, + "rewards/margins": 12.697280883789062, + "rewards/rejected": -17.508371353149414, + "step": 5313 + }, + { + "epoch": 8.53, + "learning_rate": 2.903289734443123e-08, + "logits/chosen": -1.4869608879089355, + "logits/rejected": -1.5062944889068604, + "logps/chosen": -205.3507080078125, + "logps/rejected": -378.263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.85128116607666, + "rewards/margins": 15.265522956848145, + "rewards/rejected": -26.116804122924805, + "step": 5314 + }, + { + "epoch": 8.53, + "learning_rate": 2.8933808957590167e-08, + "logits/chosen": -1.474402904510498, + "logits/rejected": -1.4510655403137207, + "logps/chosen": -156.89437866210938, + "logps/rejected": -314.3916320800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.406707286834717, + "rewards/margins": 14.190284729003906, + "rewards/rejected": -21.59699249267578, + "step": 5315 + }, + { + "epoch": 8.53, + "learning_rate": 2.8834720570749107e-08, + "logits/chosen": -1.300208330154419, + "logits/rejected": -1.3384495973587036, + "logps/chosen": -153.16539001464844, + "logps/rejected": -331.3764343261719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.708973407745361, + "rewards/margins": 14.941812515258789, + "rewards/rejected": -22.650787353515625, + "step": 5316 + }, + { + "epoch": 8.53, + "learning_rate": 2.8735632183908043e-08, + "logits/chosen": -1.5049374103546143, + "logits/rejected": -1.4860382080078125, + "logps/chosen": -143.37864685058594, + "logps/rejected": -333.35089111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.017789363861084, + "rewards/margins": 16.98884391784668, + "rewards/rejected": -24.006633758544922, + "step": 5317 + }, + { + "epoch": 8.54, + "learning_rate": 2.863654379706698e-08, + "logits/chosen": -1.5117892026901245, + "logits/rejected": -1.4568637609481812, + "logps/chosen": -149.87579345703125, + "logps/rejected": -230.55477905273438, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.554865837097168, + "rewards/margins": 9.966217994689941, + "rewards/rejected": -16.52108383178711, + "step": 5318 + }, + { + "epoch": 8.54, + "learning_rate": 2.853745541022592e-08, + "logits/chosen": -1.444486141204834, + "logits/rejected": -1.4373953342437744, + "logps/chosen": -176.1895751953125, + "logps/rejected": -298.9335632324219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.927757263183594, + "rewards/margins": 12.629419326782227, + "rewards/rejected": -21.55717658996582, + "step": 5319 + }, + { + "epoch": 8.54, + "learning_rate": 2.8438367023384857e-08, + "logits/chosen": -1.457320213317871, + "logits/rejected": -1.501973032951355, + "logps/chosen": -182.3843994140625, + "logps/rejected": -350.89111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.802604675292969, + "rewards/margins": 15.620682716369629, + "rewards/rejected": -25.42328643798828, + "step": 5320 + }, + { + "epoch": 8.54, + "learning_rate": 2.8339278636543793e-08, + "logits/chosen": -1.5452220439910889, + "logits/rejected": -1.549513339996338, + "logps/chosen": -103.06615447998047, + "logps/rejected": -269.7901611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5740480422973633, + "rewards/margins": 16.437833786010742, + "rewards/rejected": -20.01188087463379, + "step": 5321 + }, + { + "epoch": 8.54, + "learning_rate": 2.8240190249702733e-08, + "logits/chosen": -1.497855305671692, + "logits/rejected": -1.4981880187988281, + "logps/chosen": -192.87225341796875, + "logps/rejected": -340.9180908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.363377571105957, + "rewards/margins": 16.250537872314453, + "rewards/rejected": -25.613914489746094, + "step": 5322 + }, + { + "epoch": 8.54, + "learning_rate": 2.8141101862861673e-08, + "logits/chosen": -1.6020822525024414, + "logits/rejected": -1.6253806352615356, + "logps/chosen": -151.6761474609375, + "logps/rejected": -356.7997131347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.516665458679199, + "rewards/margins": 18.157249450683594, + "rewards/rejected": -24.67391586303711, + "step": 5323 + }, + { + "epoch": 8.55, + "learning_rate": 2.804201347602061e-08, + "logits/chosen": -1.455428957939148, + "logits/rejected": -1.4456976652145386, + "logps/chosen": -112.84719848632812, + "logps/rejected": -234.60345458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.896343231201172, + "rewards/margins": 11.796037673950195, + "rewards/rejected": -16.692380905151367, + "step": 5324 + }, + { + "epoch": 8.55, + "learning_rate": 2.794292508917955e-08, + "logits/chosen": -1.3105742931365967, + "logits/rejected": -1.3650133609771729, + "logps/chosen": -136.65003967285156, + "logps/rejected": -254.03179931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.534463405609131, + "rewards/margins": 10.589797019958496, + "rewards/rejected": -17.12425994873047, + "step": 5325 + }, + { + "epoch": 8.55, + "learning_rate": 2.7843836702338486e-08, + "logits/chosen": -1.4856257438659668, + "logits/rejected": -1.3440372943878174, + "logps/chosen": -173.63877868652344, + "logps/rejected": -299.33905029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.506446361541748, + "rewards/margins": 15.080118179321289, + "rewards/rejected": -22.586565017700195, + "step": 5326 + }, + { + "epoch": 8.55, + "learning_rate": 2.7744748315497423e-08, + "logits/chosen": -1.6603140830993652, + "logits/rejected": -1.621292233467102, + "logps/chosen": -158.8105010986328, + "logps/rejected": -260.1262512207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.53774356842041, + "rewards/margins": 11.541341781616211, + "rewards/rejected": -19.079086303710938, + "step": 5327 + }, + { + "epoch": 8.55, + "learning_rate": 2.7645659928656363e-08, + "logits/chosen": -1.5368571281433105, + "logits/rejected": -1.4936940670013428, + "logps/chosen": -198.28060913085938, + "logps/rejected": -350.18701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.879422187805176, + "rewards/margins": 15.236845016479492, + "rewards/rejected": -27.116268157958984, + "step": 5328 + }, + { + "epoch": 8.55, + "learning_rate": 2.75465715418153e-08, + "logits/chosen": -1.4456475973129272, + "logits/rejected": -1.4464054107666016, + "logps/chosen": -175.69000244140625, + "logps/rejected": -308.7908935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.185356140136719, + "rewards/margins": 15.754175186157227, + "rewards/rejected": -22.939531326293945, + "step": 5329 + }, + { + "epoch": 8.56, + "learning_rate": 2.7447483154974236e-08, + "logits/chosen": -1.6373707056045532, + "logits/rejected": -1.6571340560913086, + "logps/chosen": -148.97531127929688, + "logps/rejected": -263.9482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.391737461090088, + "rewards/margins": 12.460760116577148, + "rewards/rejected": -18.852497100830078, + "step": 5330 + }, + { + "epoch": 8.56, + "learning_rate": 2.7348394768133173e-08, + "logits/chosen": -1.3494153022766113, + "logits/rejected": -1.3698318004608154, + "logps/chosen": -151.35052490234375, + "logps/rejected": -293.3700256347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.013472557067871, + "rewards/margins": 14.061891555786133, + "rewards/rejected": -22.075363159179688, + "step": 5331 + }, + { + "epoch": 8.56, + "learning_rate": 2.7249306381292112e-08, + "logits/chosen": -1.4712655544281006, + "logits/rejected": -1.4885873794555664, + "logps/chosen": -184.94256591796875, + "logps/rejected": -326.4329833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.715487480163574, + "rewards/margins": 14.819951057434082, + "rewards/rejected": -24.535438537597656, + "step": 5332 + }, + { + "epoch": 8.56, + "learning_rate": 2.715021799445105e-08, + "logits/chosen": -1.4161916971206665, + "logits/rejected": -1.4481312036514282, + "logps/chosen": -148.6622314453125, + "logps/rejected": -285.2359924316406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.4013090133667, + "rewards/margins": 12.715184211730957, + "rewards/rejected": -21.116493225097656, + "step": 5333 + }, + { + "epoch": 8.56, + "learning_rate": 2.7051129607609986e-08, + "logits/chosen": -1.449228048324585, + "logits/rejected": -1.5207549333572388, + "logps/chosen": -185.97470092773438, + "logps/rejected": -307.57464599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.617990970611572, + "rewards/margins": 14.06053638458252, + "rewards/rejected": -20.67852783203125, + "step": 5334 + }, + { + "epoch": 8.56, + "learning_rate": 2.6952041220768926e-08, + "logits/chosen": -1.525787115097046, + "logits/rejected": -1.5952388048171997, + "logps/chosen": -129.30865478515625, + "logps/rejected": -289.1247253417969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.446216583251953, + "rewards/margins": 14.339208602905273, + "rewards/rejected": -20.785425186157227, + "step": 5335 + }, + { + "epoch": 8.57, + "learning_rate": 2.6852952833927862e-08, + "logits/chosen": -1.4558258056640625, + "logits/rejected": -1.491516351699829, + "logps/chosen": -174.3102569580078, + "logps/rejected": -328.10162353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.144646644592285, + "rewards/margins": 13.971656799316406, + "rewards/rejected": -24.116304397583008, + "step": 5336 + }, + { + "epoch": 8.57, + "learning_rate": 2.67538644470868e-08, + "logits/chosen": -1.4059357643127441, + "logits/rejected": -1.4465179443359375, + "logps/chosen": -133.94570922851562, + "logps/rejected": -291.1237487792969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8507466316223145, + "rewards/margins": 13.945453643798828, + "rewards/rejected": -20.796199798583984, + "step": 5337 + }, + { + "epoch": 8.57, + "learning_rate": 2.665477606024574e-08, + "logits/chosen": -1.4161136150360107, + "logits/rejected": -1.4050191640853882, + "logps/chosen": -177.13253784179688, + "logps/rejected": -291.8352355957031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.949589729309082, + "rewards/margins": 13.923676490783691, + "rewards/rejected": -21.873266220092773, + "step": 5338 + }, + { + "epoch": 8.57, + "learning_rate": 2.6555687673404675e-08, + "logits/chosen": -1.5811034440994263, + "logits/rejected": -1.6446846723556519, + "logps/chosen": -149.21510314941406, + "logps/rejected": -254.60293579101562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.169478893280029, + "rewards/margins": 10.767034530639648, + "rewards/rejected": -16.936513900756836, + "step": 5339 + }, + { + "epoch": 8.57, + "learning_rate": 2.6456599286563612e-08, + "logits/chosen": -1.6126396656036377, + "logits/rejected": -1.639028787612915, + "logps/chosen": -122.22483825683594, + "logps/rejected": -343.1899719238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.837463617324829, + "rewards/margins": 22.26229476928711, + "rewards/rejected": -26.099760055541992, + "step": 5340 + }, + { + "epoch": 8.57, + "learning_rate": 2.6357510899722552e-08, + "logits/chosen": -1.4560182094573975, + "logits/rejected": -1.4611486196517944, + "logps/chosen": -189.0106201171875, + "logps/rejected": -345.90045166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.175492286682129, + "rewards/margins": 14.048584938049316, + "rewards/rejected": -23.224079132080078, + "step": 5341 + }, + { + "epoch": 8.57, + "learning_rate": 2.625842251288149e-08, + "logits/chosen": -1.4171768426895142, + "logits/rejected": -1.4787187576293945, + "logps/chosen": -173.33970642089844, + "logps/rejected": -314.5396423339844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.783740997314453, + "rewards/margins": 14.352156639099121, + "rewards/rejected": -22.13589859008789, + "step": 5342 + }, + { + "epoch": 8.58, + "learning_rate": 2.6159334126040425e-08, + "logits/chosen": -1.6631457805633545, + "logits/rejected": -1.6889064311981201, + "logps/chosen": -189.94525146484375, + "logps/rejected": -305.4121398925781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.514226913452148, + "rewards/margins": 14.296371459960938, + "rewards/rejected": -20.810598373413086, + "step": 5343 + }, + { + "epoch": 8.58, + "learning_rate": 2.606024573919936e-08, + "logits/chosen": -1.4028581380844116, + "logits/rejected": -1.5513808727264404, + "logps/chosen": -154.04302978515625, + "logps/rejected": -363.2364196777344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.899682998657227, + "rewards/margins": 15.675442695617676, + "rewards/rejected": -23.57512664794922, + "step": 5344 + }, + { + "epoch": 8.58, + "learning_rate": 2.59611573523583e-08, + "logits/chosen": -1.5182373523712158, + "logits/rejected": -1.5643322467803955, + "logps/chosen": -139.8280792236328, + "logps/rejected": -308.3028564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.419684886932373, + "rewards/margins": 15.564657211303711, + "rewards/rejected": -22.984342575073242, + "step": 5345 + }, + { + "epoch": 8.58, + "learning_rate": 2.586206896551724e-08, + "logits/chosen": -1.3556487560272217, + "logits/rejected": -1.4041593074798584, + "logps/chosen": -179.55807495117188, + "logps/rejected": -256.1380920410156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.479679107666016, + "rewards/margins": 9.009721755981445, + "rewards/rejected": -18.48940086364746, + "step": 5346 + }, + { + "epoch": 8.58, + "learning_rate": 2.5762980578676178e-08, + "logits/chosen": -1.438423752784729, + "logits/rejected": -1.5021463632583618, + "logps/chosen": -124.65530395507812, + "logps/rejected": -329.4876708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.844111442565918, + "rewards/margins": 17.670217514038086, + "rewards/rejected": -23.51432991027832, + "step": 5347 + }, + { + "epoch": 8.58, + "learning_rate": 2.5663892191835118e-08, + "logits/chosen": -1.4445135593414307, + "logits/rejected": -1.4502590894699097, + "logps/chosen": -127.3590316772461, + "logps/rejected": -309.8711853027344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.838980674743652, + "rewards/margins": 17.33839988708496, + "rewards/rejected": -22.17738151550293, + "step": 5348 + }, + { + "epoch": 8.59, + "learning_rate": 2.5564803804994055e-08, + "logits/chosen": -1.5463895797729492, + "logits/rejected": -1.446671724319458, + "logps/chosen": -151.12588500976562, + "logps/rejected": -264.6460266113281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.093775749206543, + "rewards/margins": 12.63621711730957, + "rewards/rejected": -18.729991912841797, + "step": 5349 + }, + { + "epoch": 8.59, + "learning_rate": 2.546571541815299e-08, + "logits/chosen": -1.5445969104766846, + "logits/rejected": -1.6242945194244385, + "logps/chosen": -145.60572814941406, + "logps/rejected": -325.014404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6104607582092285, + "rewards/margins": 15.057937622070312, + "rewards/rejected": -21.668397903442383, + "step": 5350 + }, + { + "epoch": 8.59, + "learning_rate": 2.536662703131193e-08, + "logits/chosen": -1.4595754146575928, + "logits/rejected": -1.464603066444397, + "logps/chosen": -169.99652099609375, + "logps/rejected": -303.6057434082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.481922149658203, + "rewards/margins": 14.053696632385254, + "rewards/rejected": -21.53561782836914, + "step": 5351 + }, + { + "epoch": 8.59, + "learning_rate": 2.5267538644470868e-08, + "logits/chosen": -1.4705016613006592, + "logits/rejected": -1.5546985864639282, + "logps/chosen": -150.09193420410156, + "logps/rejected": -311.1763916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.911481857299805, + "rewards/margins": 15.489324569702148, + "rewards/rejected": -22.400806427001953, + "step": 5352 + }, + { + "epoch": 8.59, + "learning_rate": 2.5168450257629804e-08, + "logits/chosen": -1.327255129814148, + "logits/rejected": -1.3192100524902344, + "logps/chosen": -186.3140869140625, + "logps/rejected": -326.7289123535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.561616897583008, + "rewards/margins": 14.925683975219727, + "rewards/rejected": -24.487300872802734, + "step": 5353 + }, + { + "epoch": 8.59, + "learning_rate": 2.5069361870788744e-08, + "logits/chosen": -1.3725125789642334, + "logits/rejected": -1.4012531042099, + "logps/chosen": -147.37391662597656, + "logps/rejected": -292.7457275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.230949878692627, + "rewards/margins": 13.747657775878906, + "rewards/rejected": -20.978607177734375, + "step": 5354 + }, + { + "epoch": 8.6, + "learning_rate": 2.497027348394768e-08, + "logits/chosen": -1.5507032871246338, + "logits/rejected": -1.5826445817947388, + "logps/chosen": -191.09640502929688, + "logps/rejected": -314.06878662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.122461318969727, + "rewards/margins": 13.332747459411621, + "rewards/rejected": -23.455209732055664, + "step": 5355 + }, + { + "epoch": 8.6, + "learning_rate": 2.4871185097106617e-08, + "logits/chosen": -1.4001367092132568, + "logits/rejected": -1.385588526725769, + "logps/chosen": -189.4335479736328, + "logps/rejected": -343.1103820800781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.374775886535645, + "rewards/margins": 14.609548568725586, + "rewards/rejected": -24.984323501586914, + "step": 5356 + }, + { + "epoch": 8.6, + "learning_rate": 2.4772096710265557e-08, + "logits/chosen": -1.5781500339508057, + "logits/rejected": -1.6264827251434326, + "logps/chosen": -107.07189178466797, + "logps/rejected": -297.6793212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.524516582489014, + "rewards/margins": 15.93104362487793, + "rewards/rejected": -20.45555877685547, + "step": 5357 + }, + { + "epoch": 8.6, + "learning_rate": 2.4673008323424494e-08, + "logits/chosen": -1.5420665740966797, + "logits/rejected": -1.5506874322891235, + "logps/chosen": -127.18698120117188, + "logps/rejected": -325.80718994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.931512355804443, + "rewards/margins": 18.012338638305664, + "rewards/rejected": -22.943851470947266, + "step": 5358 + }, + { + "epoch": 8.6, + "learning_rate": 2.457391993658343e-08, + "logits/chosen": -1.5057554244995117, + "logits/rejected": -1.5669517517089844, + "logps/chosen": -151.4854278564453, + "logps/rejected": -312.54058837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.736536979675293, + "rewards/margins": 14.653890609741211, + "rewards/rejected": -21.390426635742188, + "step": 5359 + }, + { + "epoch": 8.6, + "learning_rate": 2.4474831549742367e-08, + "logits/chosen": -1.6885393857955933, + "logits/rejected": -1.6867952346801758, + "logps/chosen": -110.49950408935547, + "logps/rejected": -238.9984130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.738239049911499, + "rewards/margins": 13.492115020751953, + "rewards/rejected": -17.23035430908203, + "step": 5360 + }, + { + "epoch": 8.61, + "learning_rate": 2.4375743162901307e-08, + "logits/chosen": -1.5343379974365234, + "logits/rejected": -1.5779157876968384, + "logps/chosen": -144.8140869140625, + "logps/rejected": -320.1214294433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.915621757507324, + "rewards/margins": 16.665185928344727, + "rewards/rejected": -23.580806732177734, + "step": 5361 + }, + { + "epoch": 8.61, + "learning_rate": 2.4276654776060244e-08, + "logits/chosen": -1.350724220275879, + "logits/rejected": -1.3347891569137573, + "logps/chosen": -154.35902404785156, + "logps/rejected": -295.4147644042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9923095703125, + "rewards/margins": 14.798417091369629, + "rewards/rejected": -22.790725708007812, + "step": 5362 + }, + { + "epoch": 8.61, + "learning_rate": 2.417756638921918e-08, + "logits/chosen": -1.5498409271240234, + "logits/rejected": -1.5183720588684082, + "logps/chosen": -151.90362548828125, + "logps/rejected": -320.0537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.150221824645996, + "rewards/margins": 15.93820571899414, + "rewards/rejected": -23.088428497314453, + "step": 5363 + }, + { + "epoch": 8.61, + "learning_rate": 2.407847800237812e-08, + "logits/chosen": -1.4356343746185303, + "logits/rejected": -1.422674536705017, + "logps/chosen": -140.69793701171875, + "logps/rejected": -318.1617736816406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.633386611938477, + "rewards/margins": 17.060449600219727, + "rewards/rejected": -22.693836212158203, + "step": 5364 + }, + { + "epoch": 8.61, + "learning_rate": 2.3979389615537057e-08, + "logits/chosen": -1.7460296154022217, + "logits/rejected": -1.6841731071472168, + "logps/chosen": -183.9495849609375, + "logps/rejected": -288.3733825683594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.479047775268555, + "rewards/margins": 12.009764671325684, + "rewards/rejected": -20.488813400268555, + "step": 5365 + }, + { + "epoch": 8.61, + "learning_rate": 2.3880301228695993e-08, + "logits/chosen": -1.4182606935501099, + "logits/rejected": -1.3940268754959106, + "logps/chosen": -192.3302459716797, + "logps/rejected": -330.31011962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.414491653442383, + "rewards/margins": 13.323473930358887, + "rewards/rejected": -23.737964630126953, + "step": 5366 + }, + { + "epoch": 8.61, + "learning_rate": 2.3781212841854933e-08, + "logits/chosen": -1.4237298965454102, + "logits/rejected": -1.4595540761947632, + "logps/chosen": -192.94329833984375, + "logps/rejected": -342.6048278808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.63095235824585, + "rewards/margins": 16.023216247558594, + "rewards/rejected": -23.65416717529297, + "step": 5367 + }, + { + "epoch": 8.62, + "learning_rate": 2.368212445501387e-08, + "logits/chosen": -1.4925380945205688, + "logits/rejected": -1.4754083156585693, + "logps/chosen": -138.5514373779297, + "logps/rejected": -247.54693603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4846954345703125, + "rewards/margins": 11.605632781982422, + "rewards/rejected": -18.090328216552734, + "step": 5368 + }, + { + "epoch": 8.62, + "learning_rate": 2.358303606817281e-08, + "logits/chosen": -1.2720502614974976, + "logits/rejected": -1.2640637159347534, + "logps/chosen": -176.77548217773438, + "logps/rejected": -333.67779541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.369275093078613, + "rewards/margins": 14.81212043762207, + "rewards/rejected": -24.181394577026367, + "step": 5369 + }, + { + "epoch": 8.62, + "learning_rate": 2.348394768133175e-08, + "logits/chosen": -1.398460865020752, + "logits/rejected": -1.4351037740707397, + "logps/chosen": -192.73634338378906, + "logps/rejected": -358.3936462402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.989845275878906, + "rewards/margins": 15.684565544128418, + "rewards/rejected": -25.674409866333008, + "step": 5370 + }, + { + "epoch": 8.62, + "learning_rate": 2.3384859294490686e-08, + "logits/chosen": -1.4406020641326904, + "logits/rejected": -1.4983959197998047, + "logps/chosen": -142.67947387695312, + "logps/rejected": -307.992431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7846198081970215, + "rewards/margins": 13.613470077514648, + "rewards/rejected": -19.398090362548828, + "step": 5371 + }, + { + "epoch": 8.62, + "learning_rate": 2.3285770907649623e-08, + "logits/chosen": -1.5201971530914307, + "logits/rejected": -1.5204408168792725, + "logps/chosen": -92.43046569824219, + "logps/rejected": -260.2508239746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.104915618896484, + "rewards/margins": 14.969611167907715, + "rewards/rejected": -19.074527740478516, + "step": 5372 + }, + { + "epoch": 8.62, + "learning_rate": 2.318668252080856e-08, + "logits/chosen": -1.570996880531311, + "logits/rejected": -1.5043530464172363, + "logps/chosen": -105.13887023925781, + "logps/rejected": -265.59002685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5006332397460938, + "rewards/margins": 16.251750946044922, + "rewards/rejected": -19.752384185791016, + "step": 5373 + }, + { + "epoch": 8.63, + "learning_rate": 2.30875941339675e-08, + "logits/chosen": -1.3687586784362793, + "logits/rejected": -1.387278437614441, + "logps/chosen": -154.98455810546875, + "logps/rejected": -299.95672607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.355508804321289, + "rewards/margins": 14.531704902648926, + "rewards/rejected": -21.88721466064453, + "step": 5374 + }, + { + "epoch": 8.63, + "learning_rate": 2.2988505747126436e-08, + "logits/chosen": -1.581653356552124, + "logits/rejected": -1.6940927505493164, + "logps/chosen": -102.49609375, + "logps/rejected": -295.60711669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.051746845245361, + "rewards/margins": 16.530868530273438, + "rewards/rejected": -20.58261489868164, + "step": 5375 + }, + { + "epoch": 8.63, + "learning_rate": 2.2889417360285373e-08, + "logits/chosen": -1.3821258544921875, + "logits/rejected": -1.449644684791565, + "logps/chosen": -153.39483642578125, + "logps/rejected": -314.54534912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.683879375457764, + "rewards/margins": 14.320371627807617, + "rewards/rejected": -21.00425148010254, + "step": 5376 + }, + { + "epoch": 8.63, + "learning_rate": 2.2790328973444313e-08, + "logits/chosen": -1.4055495262145996, + "logits/rejected": -1.3504836559295654, + "logps/chosen": -198.59628295898438, + "logps/rejected": -315.4260559082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.824270248413086, + "rewards/margins": 12.801178932189941, + "rewards/rejected": -23.625450134277344, + "step": 5377 + }, + { + "epoch": 8.63, + "learning_rate": 2.269124058660325e-08, + "logits/chosen": -1.4928538799285889, + "logits/rejected": -1.4539557695388794, + "logps/chosen": -178.90550231933594, + "logps/rejected": -304.42657470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.62002182006836, + "rewards/margins": 13.678658485412598, + "rewards/rejected": -22.29867935180664, + "step": 5378 + }, + { + "epoch": 8.63, + "learning_rate": 2.2592152199762186e-08, + "logits/chosen": -1.4649628400802612, + "logits/rejected": -1.4860939979553223, + "logps/chosen": -216.16201782226562, + "logps/rejected": -326.2729797363281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.439777374267578, + "rewards/margins": 12.445284843444824, + "rewards/rejected": -23.885061264038086, + "step": 5379 + }, + { + "epoch": 8.64, + "learning_rate": 2.2493063812921126e-08, + "logits/chosen": -1.4282879829406738, + "logits/rejected": -1.486496925354004, + "logps/chosen": -177.47610473632812, + "logps/rejected": -325.82806396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.224937438964844, + "rewards/margins": 15.76201057434082, + "rewards/rejected": -23.986948013305664, + "step": 5380 + }, + { + "epoch": 8.64, + "learning_rate": 2.2393975426080062e-08, + "logits/chosen": -1.5253872871398926, + "logits/rejected": -1.4889881610870361, + "logps/chosen": -146.39993286132812, + "logps/rejected": -312.7686462402344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.283271312713623, + "rewards/margins": 17.400283813476562, + "rewards/rejected": -23.683555603027344, + "step": 5381 + }, + { + "epoch": 8.64, + "learning_rate": 2.2294887039239e-08, + "logits/chosen": -1.3739409446716309, + "logits/rejected": -1.3800280094146729, + "logps/chosen": -144.0288848876953, + "logps/rejected": -323.41851806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.348338603973389, + "rewards/margins": 17.14202308654785, + "rewards/rejected": -23.4903621673584, + "step": 5382 + }, + { + "epoch": 8.64, + "learning_rate": 2.219579865239794e-08, + "logits/chosen": -1.4577126502990723, + "logits/rejected": -1.4336931705474854, + "logps/chosen": -166.07357788085938, + "logps/rejected": -302.9776611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.884833335876465, + "rewards/margins": 14.11009407043457, + "rewards/rejected": -22.99492645263672, + "step": 5383 + }, + { + "epoch": 8.64, + "learning_rate": 2.2096710265556876e-08, + "logits/chosen": -1.3703802824020386, + "logits/rejected": -1.5221235752105713, + "logps/chosen": -141.48550415039062, + "logps/rejected": -291.4110107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.26084566116333, + "rewards/margins": 13.141111373901367, + "rewards/rejected": -18.40195655822754, + "step": 5384 + }, + { + "epoch": 8.64, + "learning_rate": 2.1997621878715812e-08, + "logits/chosen": -1.337878942489624, + "logits/rejected": -1.3832314014434814, + "logps/chosen": -167.31829833984375, + "logps/rejected": -324.15179443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.515613555908203, + "rewards/margins": 14.695659637451172, + "rewards/rejected": -23.211273193359375, + "step": 5385 + }, + { + "epoch": 8.65, + "learning_rate": 2.1898533491874752e-08, + "logits/chosen": -1.3303240537643433, + "logits/rejected": -1.3096201419830322, + "logps/chosen": -151.6603546142578, + "logps/rejected": -336.8458251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.609926223754883, + "rewards/margins": 18.21916389465332, + "rewards/rejected": -25.829092025756836, + "step": 5386 + }, + { + "epoch": 8.65, + "learning_rate": 2.179944510503369e-08, + "logits/chosen": -1.4761427640914917, + "logits/rejected": -1.4257992506027222, + "logps/chosen": -162.93414306640625, + "logps/rejected": -282.4807434082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.215331077575684, + "rewards/margins": 12.161579132080078, + "rewards/rejected": -20.376911163330078, + "step": 5387 + }, + { + "epoch": 8.65, + "learning_rate": 2.1700356718192625e-08, + "logits/chosen": -1.6836434602737427, + "logits/rejected": -1.6146459579467773, + "logps/chosen": -199.28614807128906, + "logps/rejected": -335.4100341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.920113563537598, + "rewards/margins": 14.349318504333496, + "rewards/rejected": -23.269432067871094, + "step": 5388 + }, + { + "epoch": 8.65, + "learning_rate": 2.1601268331351562e-08, + "logits/chosen": -1.3653361797332764, + "logits/rejected": -1.3894007205963135, + "logps/chosen": -135.3428955078125, + "logps/rejected": -269.2462463378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.989072322845459, + "rewards/margins": 13.346622467041016, + "rewards/rejected": -18.335695266723633, + "step": 5389 + }, + { + "epoch": 8.65, + "learning_rate": 2.1502179944510502e-08, + "logits/chosen": -1.395554542541504, + "logits/rejected": -1.3156098127365112, + "logps/chosen": -180.25840759277344, + "logps/rejected": -330.35980224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.841594696044922, + "rewards/margins": 14.786198616027832, + "rewards/rejected": -22.627792358398438, + "step": 5390 + }, + { + "epoch": 8.65, + "learning_rate": 2.140309155766944e-08, + "logits/chosen": -1.3485603332519531, + "logits/rejected": -1.249395728111267, + "logps/chosen": -200.5875701904297, + "logps/rejected": -317.04608154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.751371383666992, + "rewards/margins": 13.044334411621094, + "rewards/rejected": -22.795703887939453, + "step": 5391 + }, + { + "epoch": 8.65, + "learning_rate": 2.1304003170828378e-08, + "logits/chosen": -1.2516285181045532, + "logits/rejected": -1.2582335472106934, + "logps/chosen": -150.2400360107422, + "logps/rejected": -290.5525817871094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.992382526397705, + "rewards/margins": 13.625835418701172, + "rewards/rejected": -20.61821746826172, + "step": 5392 + }, + { + "epoch": 8.66, + "learning_rate": 2.1204914783987318e-08, + "logits/chosen": -1.4317381381988525, + "logits/rejected": -1.4259569644927979, + "logps/chosen": -186.71127319335938, + "logps/rejected": -302.021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.708990097045898, + "rewards/margins": 11.027623176574707, + "rewards/rejected": -20.736614227294922, + "step": 5393 + }, + { + "epoch": 8.66, + "learning_rate": 2.1105826397146255e-08, + "logits/chosen": -1.478076457977295, + "logits/rejected": -1.4546988010406494, + "logps/chosen": -141.7439422607422, + "logps/rejected": -245.03428649902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.180087089538574, + "rewards/margins": 12.007574081420898, + "rewards/rejected": -17.187660217285156, + "step": 5394 + }, + { + "epoch": 8.66, + "learning_rate": 2.100673801030519e-08, + "logits/chosen": -1.4127628803253174, + "logits/rejected": -1.450626254081726, + "logps/chosen": -140.04383850097656, + "logps/rejected": -272.35711669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.133778095245361, + "rewards/margins": 13.330741882324219, + "rewards/rejected": -20.464519500732422, + "step": 5395 + }, + { + "epoch": 8.66, + "learning_rate": 2.090764962346413e-08, + "logits/chosen": -1.4417724609375, + "logits/rejected": -1.4480160474777222, + "logps/chosen": -111.80876159667969, + "logps/rejected": -282.4365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2673702239990234, + "rewards/margins": 16.908626556396484, + "rewards/rejected": -20.175996780395508, + "step": 5396 + }, + { + "epoch": 8.66, + "learning_rate": 2.0808561236623068e-08, + "logits/chosen": -1.534409523010254, + "logits/rejected": -1.52290940284729, + "logps/chosen": -132.01840209960938, + "logps/rejected": -261.24591064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.408312797546387, + "rewards/margins": 14.081130981445312, + "rewards/rejected": -19.489442825317383, + "step": 5397 + }, + { + "epoch": 8.66, + "learning_rate": 2.0709472849782005e-08, + "logits/chosen": -1.261778712272644, + "logits/rejected": -1.2889846563339233, + "logps/chosen": -134.3740234375, + "logps/rejected": -278.6933288574219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.665897369384766, + "rewards/margins": 13.359652519226074, + "rewards/rejected": -20.025550842285156, + "step": 5398 + }, + { + "epoch": 8.67, + "learning_rate": 2.0610384462940944e-08, + "logits/chosen": -1.406590461730957, + "logits/rejected": -1.3658708333969116, + "logps/chosen": -217.77938842773438, + "logps/rejected": -356.9627685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.433937072753906, + "rewards/margins": 14.385049819946289, + "rewards/rejected": -26.818984985351562, + "step": 5399 + }, + { + "epoch": 8.67, + "learning_rate": 2.051129607609988e-08, + "logits/chosen": -1.527463674545288, + "logits/rejected": -1.5510847568511963, + "logps/chosen": -133.41136169433594, + "logps/rejected": -286.19879150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.954122543334961, + "rewards/margins": 15.279735565185547, + "rewards/rejected": -21.233858108520508, + "step": 5400 + }, + { + "epoch": 8.67, + "learning_rate": 2.0412207689258818e-08, + "logits/chosen": -1.5254827737808228, + "logits/rejected": -1.6011466979980469, + "logps/chosen": -183.349609375, + "logps/rejected": -319.93072509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.533164978027344, + "rewards/margins": 14.015229225158691, + "rewards/rejected": -22.54839515686035, + "step": 5401 + }, + { + "epoch": 8.67, + "learning_rate": 2.0313119302417754e-08, + "logits/chosen": -1.6539002656936646, + "logits/rejected": -1.6926480531692505, + "logps/chosen": -155.47262573242188, + "logps/rejected": -335.24609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.804910659790039, + "rewards/margins": 17.76555061340332, + "rewards/rejected": -24.57046127319336, + "step": 5402 + }, + { + "epoch": 8.67, + "learning_rate": 2.0214030915576694e-08, + "logits/chosen": -1.3920235633850098, + "logits/rejected": -1.382941722869873, + "logps/chosen": -145.57815551757812, + "logps/rejected": -333.572509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.759631633758545, + "rewards/margins": 16.444793701171875, + "rewards/rejected": -23.204423904418945, + "step": 5403 + }, + { + "epoch": 8.67, + "learning_rate": 2.011494252873563e-08, + "logits/chosen": -1.4340091943740845, + "logits/rejected": -1.4690555334091187, + "logps/chosen": -161.47503662109375, + "logps/rejected": -309.48602294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.348018646240234, + "rewards/margins": 13.977944374084473, + "rewards/rejected": -22.325963973999023, + "step": 5404 + }, + { + "epoch": 8.68, + "learning_rate": 2.0015854141894567e-08, + "logits/chosen": -1.6192803382873535, + "logits/rejected": -1.4791932106018066, + "logps/chosen": -180.9537353515625, + "logps/rejected": -282.48809814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.903130531311035, + "rewards/margins": 12.08569049835205, + "rewards/rejected": -19.988821029663086, + "step": 5405 + }, + { + "epoch": 8.68, + "learning_rate": 1.9916765755053507e-08, + "logits/chosen": -1.3572323322296143, + "logits/rejected": -1.4935731887817383, + "logps/chosen": -135.30239868164062, + "logps/rejected": -305.8617858886719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.775205612182617, + "rewards/margins": 16.02194595336914, + "rewards/rejected": -22.797151565551758, + "step": 5406 + }, + { + "epoch": 8.68, + "learning_rate": 1.9817677368212444e-08, + "logits/chosen": -1.3814445734024048, + "logits/rejected": -1.3636243343353271, + "logps/chosen": -170.69541931152344, + "logps/rejected": -276.16925048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.715913772583008, + "rewards/margins": 11.35075569152832, + "rewards/rejected": -20.066669464111328, + "step": 5407 + }, + { + "epoch": 8.68, + "learning_rate": 1.971858898137138e-08, + "logits/chosen": -1.4001219272613525, + "logits/rejected": -1.4201138019561768, + "logps/chosen": -199.0594024658203, + "logps/rejected": -420.7574157714844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.527790069580078, + "rewards/margins": 20.509042739868164, + "rewards/rejected": -31.036832809448242, + "step": 5408 + }, + { + "epoch": 8.68, + "learning_rate": 1.961950059453032e-08, + "logits/chosen": -1.3986068964004517, + "logits/rejected": -1.4479210376739502, + "logps/chosen": -133.54501342773438, + "logps/rejected": -307.1135559082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.434083938598633, + "rewards/margins": 16.85643768310547, + "rewards/rejected": -23.290523529052734, + "step": 5409 + }, + { + "epoch": 8.68, + "learning_rate": 1.9520412207689257e-08, + "logits/chosen": -1.3960983753204346, + "logits/rejected": -1.3753490447998047, + "logps/chosen": -191.25808715820312, + "logps/rejected": -289.48345947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.997090339660645, + "rewards/margins": 12.245109558105469, + "rewards/rejected": -21.242198944091797, + "step": 5410 + }, + { + "epoch": 8.69, + "learning_rate": 1.9421323820848194e-08, + "logits/chosen": -1.436080813407898, + "logits/rejected": -1.5596458911895752, + "logps/chosen": -120.0604476928711, + "logps/rejected": -291.50396728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.287513256072998, + "rewards/margins": 15.447583198547363, + "rewards/rejected": -19.735095977783203, + "step": 5411 + }, + { + "epoch": 8.69, + "learning_rate": 1.9322235434007134e-08, + "logits/chosen": -1.3437585830688477, + "logits/rejected": -1.3850083351135254, + "logps/chosen": -137.52354431152344, + "logps/rejected": -275.0857849121094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.597171783447266, + "rewards/margins": 14.516707420349121, + "rewards/rejected": -21.113880157470703, + "step": 5412 + }, + { + "epoch": 8.69, + "learning_rate": 1.922314704716607e-08, + "logits/chosen": -1.513985276222229, + "logits/rejected": -1.5127615928649902, + "logps/chosen": -165.81092834472656, + "logps/rejected": -274.78375244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.767459392547607, + "rewards/margins": 11.585197448730469, + "rewards/rejected": -18.352657318115234, + "step": 5413 + }, + { + "epoch": 8.69, + "learning_rate": 1.912405866032501e-08, + "logits/chosen": -1.4278709888458252, + "logits/rejected": -1.539350986480713, + "logps/chosen": -140.39010620117188, + "logps/rejected": -296.0400085449219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.729770660400391, + "rewards/margins": 11.984197616577148, + "rewards/rejected": -17.71396827697754, + "step": 5414 + }, + { + "epoch": 8.69, + "learning_rate": 1.9024970273483947e-08, + "logits/chosen": -1.3946603536605835, + "logits/rejected": -1.388910174369812, + "logps/chosen": -188.58749389648438, + "logps/rejected": -324.81683349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.928207397460938, + "rewards/margins": 14.119169235229492, + "rewards/rejected": -23.047374725341797, + "step": 5415 + }, + { + "epoch": 8.69, + "learning_rate": 1.8925881886642887e-08, + "logits/chosen": -1.4341158866882324, + "logits/rejected": -1.408185362815857, + "logps/chosen": -164.11605834960938, + "logps/rejected": -311.2739562988281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.85394287109375, + "rewards/margins": 12.989866256713867, + "rewards/rejected": -20.843809127807617, + "step": 5416 + }, + { + "epoch": 8.7, + "learning_rate": 1.8826793499801823e-08, + "logits/chosen": -1.5777959823608398, + "logits/rejected": -1.6440120935440063, + "logps/chosen": -142.80491638183594, + "logps/rejected": -301.3917541503906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2647552490234375, + "rewards/margins": 15.107778549194336, + "rewards/rejected": -22.372533798217773, + "step": 5417 + }, + { + "epoch": 8.7, + "learning_rate": 1.872770511296076e-08, + "logits/chosen": -1.4760940074920654, + "logits/rejected": -1.5089678764343262, + "logps/chosen": -152.63711547851562, + "logps/rejected": -333.5794982910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.641969203948975, + "rewards/margins": 16.97591781616211, + "rewards/rejected": -23.617889404296875, + "step": 5418 + }, + { + "epoch": 8.7, + "learning_rate": 1.86286167261197e-08, + "logits/chosen": -1.4798284769058228, + "logits/rejected": -1.4175519943237305, + "logps/chosen": -180.53802490234375, + "logps/rejected": -290.6514892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.436674118041992, + "rewards/margins": 11.1602144241333, + "rewards/rejected": -20.59688949584961, + "step": 5419 + }, + { + "epoch": 8.7, + "learning_rate": 1.8529528339278636e-08, + "logits/chosen": -1.56388521194458, + "logits/rejected": -1.4788331985473633, + "logps/chosen": -160.33489990234375, + "logps/rejected": -317.0961608886719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.187560081481934, + "rewards/margins": 16.138172149658203, + "rewards/rejected": -23.32573127746582, + "step": 5420 + }, + { + "epoch": 8.7, + "learning_rate": 1.8430439952437573e-08, + "logits/chosen": -1.3289744853973389, + "logits/rejected": -1.31874418258667, + "logps/chosen": -131.88381958007812, + "logps/rejected": -238.69798278808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.557104110717773, + "rewards/margins": 11.977872848510742, + "rewards/rejected": -16.534975051879883, + "step": 5421 + }, + { + "epoch": 8.7, + "learning_rate": 1.8331351565596513e-08, + "logits/chosen": -1.6553711891174316, + "logits/rejected": -1.6399145126342773, + "logps/chosen": -173.7509765625, + "logps/rejected": -272.24554443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.07326889038086, + "rewards/margins": 11.623566627502441, + "rewards/rejected": -19.696834564208984, + "step": 5422 + }, + { + "epoch": 8.7, + "learning_rate": 1.823226317875545e-08, + "logits/chosen": -1.5901862382888794, + "logits/rejected": -1.5946159362792969, + "logps/chosen": -128.5502166748047, + "logps/rejected": -300.73773193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.353278160095215, + "rewards/margins": 16.295543670654297, + "rewards/rejected": -20.648822784423828, + "step": 5423 + }, + { + "epoch": 8.71, + "learning_rate": 1.8133174791914386e-08, + "logits/chosen": -1.313542127609253, + "logits/rejected": -1.33736252784729, + "logps/chosen": -186.00204467773438, + "logps/rejected": -309.9263000488281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.433143615722656, + "rewards/margins": 12.175969123840332, + "rewards/rejected": -22.609113693237305, + "step": 5424 + }, + { + "epoch": 8.71, + "learning_rate": 1.8034086405073326e-08, + "logits/chosen": -1.5076936483383179, + "logits/rejected": -1.3755697011947632, + "logps/chosen": -156.71273803710938, + "logps/rejected": -278.5020751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.852901458740234, + "rewards/margins": 12.804498672485352, + "rewards/rejected": -20.657400131225586, + "step": 5425 + }, + { + "epoch": 8.71, + "learning_rate": 1.7934998018232263e-08, + "logits/chosen": -1.5447584390640259, + "logits/rejected": -1.542043685913086, + "logps/chosen": -178.9134979248047, + "logps/rejected": -293.19342041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.491615295410156, + "rewards/margins": 11.710564613342285, + "rewards/rejected": -20.202178955078125, + "step": 5426 + }, + { + "epoch": 8.71, + "learning_rate": 1.78359096313912e-08, + "logits/chosen": -1.5317679643630981, + "logits/rejected": -1.5133222341537476, + "logps/chosen": -141.9131317138672, + "logps/rejected": -274.0616760253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.702897071838379, + "rewards/margins": 13.289934158325195, + "rewards/rejected": -18.99283218383789, + "step": 5427 + }, + { + "epoch": 8.71, + "learning_rate": 1.773682124455014e-08, + "logits/chosen": -1.4113404750823975, + "logits/rejected": -1.4432847499847412, + "logps/chosen": -147.66091918945312, + "logps/rejected": -271.05718994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.670530319213867, + "rewards/margins": 13.641035079956055, + "rewards/rejected": -20.311565399169922, + "step": 5428 + }, + { + "epoch": 8.71, + "learning_rate": 1.7637732857709076e-08, + "logits/chosen": -1.2269777059555054, + "logits/rejected": -1.304579257965088, + "logps/chosen": -202.24560546875, + "logps/rejected": -332.7342834472656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.360701560974121, + "rewards/margins": 11.67175006866455, + "rewards/rejected": -24.032451629638672, + "step": 5429 + }, + { + "epoch": 8.72, + "learning_rate": 1.7538644470868012e-08, + "logits/chosen": -1.3934226036071777, + "logits/rejected": -1.3837147951126099, + "logps/chosen": -193.45388793945312, + "logps/rejected": -295.3465270996094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.002714157104492, + "rewards/margins": 11.014525413513184, + "rewards/rejected": -22.01723861694336, + "step": 5430 + }, + { + "epoch": 8.72, + "learning_rate": 1.743955608402695e-08, + "logits/chosen": -1.3453655242919922, + "logits/rejected": -1.3514504432678223, + "logps/chosen": -150.5539093017578, + "logps/rejected": -285.1641845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.312990188598633, + "rewards/margins": 12.527724266052246, + "rewards/rejected": -18.840713500976562, + "step": 5431 + }, + { + "epoch": 8.72, + "learning_rate": 1.734046769718589e-08, + "logits/chosen": -1.5243191719055176, + "logits/rejected": -1.5087764263153076, + "logps/chosen": -125.31169128417969, + "logps/rejected": -268.10748291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.824173927307129, + "rewards/margins": 14.451338768005371, + "rewards/rejected": -20.2755126953125, + "step": 5432 + }, + { + "epoch": 8.72, + "learning_rate": 1.7241379310344825e-08, + "logits/chosen": -1.5840343236923218, + "logits/rejected": -1.6018993854522705, + "logps/chosen": -171.3840789794922, + "logps/rejected": -316.93011474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.325183868408203, + "rewards/margins": 16.179174423217773, + "rewards/rejected": -22.504358291625977, + "step": 5433 + }, + { + "epoch": 8.72, + "learning_rate": 1.7142290923503762e-08, + "logits/chosen": -1.406834363937378, + "logits/rejected": -1.3962639570236206, + "logps/chosen": -181.0482177734375, + "logps/rejected": -285.4689025878906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.471445083618164, + "rewards/margins": 10.917606353759766, + "rewards/rejected": -20.38905143737793, + "step": 5434 + }, + { + "epoch": 8.72, + "learning_rate": 1.7043202536662702e-08, + "logits/chosen": -1.4812595844268799, + "logits/rejected": -1.4489531517028809, + "logps/chosen": -132.31918334960938, + "logps/rejected": -285.6187744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.942589282989502, + "rewards/margins": 15.669097900390625, + "rewards/rejected": -21.6116886138916, + "step": 5435 + }, + { + "epoch": 8.73, + "learning_rate": 1.694411414982164e-08, + "logits/chosen": -1.4812015295028687, + "logits/rejected": -1.4928863048553467, + "logps/chosen": -261.6613464355469, + "logps/rejected": -365.59429931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -14.705549240112305, + "rewards/margins": 9.916019439697266, + "rewards/rejected": -24.62156867980957, + "step": 5436 + }, + { + "epoch": 8.73, + "learning_rate": 1.684502576298058e-08, + "logits/chosen": -1.4849706888198853, + "logits/rejected": -1.5397485494613647, + "logps/chosen": -138.46188354492188, + "logps/rejected": -284.5140380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.828947067260742, + "rewards/margins": 14.241378784179688, + "rewards/rejected": -21.07032585144043, + "step": 5437 + }, + { + "epoch": 8.73, + "learning_rate": 1.674593737613952e-08, + "logits/chosen": -1.6498498916625977, + "logits/rejected": -1.5815578699111938, + "logps/chosen": -180.39195251464844, + "logps/rejected": -319.35791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.804305076599121, + "rewards/margins": 14.553720474243164, + "rewards/rejected": -23.35802459716797, + "step": 5438 + }, + { + "epoch": 8.73, + "learning_rate": 1.6646848989298455e-08, + "logits/chosen": -1.4684767723083496, + "logits/rejected": -1.5074323415756226, + "logps/chosen": -188.34475708007812, + "logps/rejected": -340.31842041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.619147300720215, + "rewards/margins": 14.9807767868042, + "rewards/rejected": -24.599924087524414, + "step": 5439 + }, + { + "epoch": 8.73, + "learning_rate": 1.654776060245739e-08, + "logits/chosen": -1.5392978191375732, + "logits/rejected": -1.5886207818984985, + "logps/chosen": -124.53779602050781, + "logps/rejected": -252.47906494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.796805381774902, + "rewards/margins": 11.98562240600586, + "rewards/rejected": -17.782428741455078, + "step": 5440 + }, + { + "epoch": 8.73, + "learning_rate": 1.644867221561633e-08, + "logits/chosen": -1.248887062072754, + "logits/rejected": -1.2908222675323486, + "logps/chosen": -115.50232696533203, + "logps/rejected": -286.98486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.075676441192627, + "rewards/margins": 17.060115814208984, + "rewards/rejected": -22.135791778564453, + "step": 5441 + }, + { + "epoch": 8.74, + "learning_rate": 1.6349583828775268e-08, + "logits/chosen": -1.6042277812957764, + "logits/rejected": -1.6753432750701904, + "logps/chosen": -132.127685546875, + "logps/rejected": -304.082275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.450425148010254, + "rewards/margins": 15.736482620239258, + "rewards/rejected": -21.186908721923828, + "step": 5442 + }, + { + "epoch": 8.74, + "learning_rate": 1.6250495441934205e-08, + "logits/chosen": -1.4198064804077148, + "logits/rejected": -1.383195161819458, + "logps/chosen": -150.97293090820312, + "logps/rejected": -321.0423889160156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.083958625793457, + "rewards/margins": 16.40924835205078, + "rewards/rejected": -24.493206024169922, + "step": 5443 + }, + { + "epoch": 8.74, + "learning_rate": 1.615140705509314e-08, + "logits/chosen": -1.5442233085632324, + "logits/rejected": -1.524083137512207, + "logps/chosen": -162.9457550048828, + "logps/rejected": -274.1092529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.804529190063477, + "rewards/margins": 11.530630111694336, + "rewards/rejected": -18.335159301757812, + "step": 5444 + }, + { + "epoch": 8.74, + "learning_rate": 1.605231866825208e-08, + "logits/chosen": -1.5310945510864258, + "logits/rejected": -1.6240178346633911, + "logps/chosen": -135.0478515625, + "logps/rejected": -322.2422790527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128134727478027, + "rewards/margins": 16.688838958740234, + "rewards/rejected": -22.816972732543945, + "step": 5445 + }, + { + "epoch": 8.74, + "learning_rate": 1.5953230281411018e-08, + "logits/chosen": -1.469875693321228, + "logits/rejected": -1.4334754943847656, + "logps/chosen": -119.8460693359375, + "logps/rejected": -277.662841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.21774435043335, + "rewards/margins": 15.953648567199707, + "rewards/rejected": -20.1713924407959, + "step": 5446 + }, + { + "epoch": 8.74, + "learning_rate": 1.5854141894569954e-08, + "logits/chosen": -1.60468590259552, + "logits/rejected": -1.675989031791687, + "logps/chosen": -138.124755859375, + "logps/rejected": -287.9798583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6595354080200195, + "rewards/margins": 13.307097434997559, + "rewards/rejected": -17.966632843017578, + "step": 5447 + }, + { + "epoch": 8.74, + "learning_rate": 1.5755053507728894e-08, + "logits/chosen": -1.5474847555160522, + "logits/rejected": -1.524472951889038, + "logps/chosen": -127.24958038330078, + "logps/rejected": -266.0911865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.745676517486572, + "rewards/margins": 14.86463737487793, + "rewards/rejected": -19.610313415527344, + "step": 5448 + }, + { + "epoch": 8.75, + "learning_rate": 1.565596512088783e-08, + "logits/chosen": -1.3649873733520508, + "logits/rejected": -1.3556294441223145, + "logps/chosen": -189.20260620117188, + "logps/rejected": -333.01422119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.14459228515625, + "rewards/margins": 13.50729751586914, + "rewards/rejected": -23.65188980102539, + "step": 5449 + }, + { + "epoch": 8.75, + "learning_rate": 1.555687673404677e-08, + "logits/chosen": -1.4408241510391235, + "logits/rejected": -1.5028953552246094, + "logps/chosen": -142.3921661376953, + "logps/rejected": -272.3212585449219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.730820655822754, + "rewards/margins": 12.205185890197754, + "rewards/rejected": -18.93600845336914, + "step": 5450 + }, + { + "epoch": 8.75, + "learning_rate": 1.5457788347205708e-08, + "logits/chosen": -1.488051414489746, + "logits/rejected": -1.4649136066436768, + "logps/chosen": -202.55831909179688, + "logps/rejected": -309.7406921386719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.431904792785645, + "rewards/margins": 10.457812309265137, + "rewards/rejected": -20.88971710205078, + "step": 5451 + }, + { + "epoch": 8.75, + "learning_rate": 1.5358699960364644e-08, + "logits/chosen": -1.5831212997436523, + "logits/rejected": -1.5285779237747192, + "logps/chosen": -113.00989532470703, + "logps/rejected": -232.22071838378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.422863960266113, + "rewards/margins": 13.608236312866211, + "rewards/rejected": -18.031099319458008, + "step": 5452 + }, + { + "epoch": 8.75, + "learning_rate": 1.5259611573523584e-08, + "logits/chosen": -1.5451223850250244, + "logits/rejected": -1.568666934967041, + "logps/chosen": -139.60467529296875, + "logps/rejected": -311.4291076660156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.633073806762695, + "rewards/margins": 16.752666473388672, + "rewards/rejected": -22.385740280151367, + "step": 5453 + }, + { + "epoch": 8.75, + "learning_rate": 1.516052318668252e-08, + "logits/chosen": -1.4125510454177856, + "logits/rejected": -1.4446179866790771, + "logps/chosen": -155.23416137695312, + "logps/rejected": -305.07086181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.894509792327881, + "rewards/margins": 15.13357925415039, + "rewards/rejected": -22.028087615966797, + "step": 5454 + }, + { + "epoch": 8.76, + "learning_rate": 1.5061434799841457e-08, + "logits/chosen": -1.4478164911270142, + "logits/rejected": -1.4046560525894165, + "logps/chosen": -177.50704956054688, + "logps/rejected": -331.8160400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.673406600952148, + "rewards/margins": 14.341483116149902, + "rewards/rejected": -24.014888763427734, + "step": 5455 + }, + { + "epoch": 8.76, + "learning_rate": 1.4962346413000394e-08, + "logits/chosen": -1.3503386974334717, + "logits/rejected": -1.3980176448822021, + "logps/chosen": -119.40214538574219, + "logps/rejected": -292.3028564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.000727653503418, + "rewards/margins": 17.554149627685547, + "rewards/rejected": -21.55487823486328, + "step": 5456 + }, + { + "epoch": 8.76, + "learning_rate": 1.4863258026159334e-08, + "logits/chosen": -1.302933692932129, + "logits/rejected": -1.3859922885894775, + "logps/chosen": -138.23703002929688, + "logps/rejected": -280.4346923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.196687698364258, + "rewards/margins": 12.23564338684082, + "rewards/rejected": -18.432331085205078, + "step": 5457 + }, + { + "epoch": 8.76, + "learning_rate": 1.4764169639318272e-08, + "logits/chosen": -1.4657410383224487, + "logits/rejected": -1.49823796749115, + "logps/chosen": -226.15228271484375, + "logps/rejected": -353.049072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.273387908935547, + "rewards/margins": 12.98002815246582, + "rewards/rejected": -24.253416061401367, + "step": 5458 + }, + { + "epoch": 8.76, + "learning_rate": 1.4665081252477209e-08, + "logits/chosen": -1.7210065126419067, + "logits/rejected": -1.6744036674499512, + "logps/chosen": -104.07920837402344, + "logps/rejected": -273.23077392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.03729772567749, + "rewards/margins": 15.186262130737305, + "rewards/rejected": -19.223560333251953, + "step": 5459 + }, + { + "epoch": 8.76, + "learning_rate": 1.4565992865636147e-08, + "logits/chosen": -1.5666556358337402, + "logits/rejected": -1.5169615745544434, + "logps/chosen": -171.88156127929688, + "logps/rejected": -268.0328063964844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.795080184936523, + "rewards/margins": 11.920706748962402, + "rewards/rejected": -18.715787887573242, + "step": 5460 + }, + { + "epoch": 8.77, + "learning_rate": 1.4466904478795083e-08, + "logits/chosen": -1.329107642173767, + "logits/rejected": -1.4294683933258057, + "logps/chosen": -206.47232055664062, + "logps/rejected": -315.1406555175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.299001693725586, + "rewards/margins": 10.380867958068848, + "rewards/rejected": -22.67987060546875, + "step": 5461 + }, + { + "epoch": 8.77, + "learning_rate": 1.4367816091954022e-08, + "logits/chosen": -1.2842626571655273, + "logits/rejected": -1.3369603157043457, + "logps/chosen": -130.5120086669922, + "logps/rejected": -289.1618347167969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.903784275054932, + "rewards/margins": 14.653454780578613, + "rewards/rejected": -21.557239532470703, + "step": 5462 + }, + { + "epoch": 8.77, + "learning_rate": 1.426872770511296e-08, + "logits/chosen": -1.4478962421417236, + "logits/rejected": -1.4000808000564575, + "logps/chosen": -168.21865844726562, + "logps/rejected": -277.811767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19401741027832, + "rewards/margins": 11.881623268127441, + "rewards/rejected": -20.075641632080078, + "step": 5463 + }, + { + "epoch": 8.77, + "learning_rate": 1.4169639318271897e-08, + "logits/chosen": -1.3364923000335693, + "logits/rejected": -1.383387804031372, + "logps/chosen": -131.06129455566406, + "logps/rejected": -287.3696594238281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.811740398406982, + "rewards/margins": 15.013370513916016, + "rewards/rejected": -20.825111389160156, + "step": 5464 + }, + { + "epoch": 8.77, + "learning_rate": 1.4070550931430837e-08, + "logits/chosen": -1.6418187618255615, + "logits/rejected": -1.5758846998214722, + "logps/chosen": -192.51864624023438, + "logps/rejected": -330.5244140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.80848503112793, + "rewards/margins": 15.82055377960205, + "rewards/rejected": -24.629037857055664, + "step": 5465 + }, + { + "epoch": 8.77, + "learning_rate": 1.3971462544589775e-08, + "logits/chosen": -1.7147173881530762, + "logits/rejected": -1.6863746643066406, + "logps/chosen": -153.59031677246094, + "logps/rejected": -298.2607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4989166259765625, + "rewards/margins": 13.980252265930176, + "rewards/rejected": -19.479167938232422, + "step": 5466 + }, + { + "epoch": 8.78, + "learning_rate": 1.3872374157748711e-08, + "logits/chosen": -1.6767550706863403, + "logits/rejected": -1.5682001113891602, + "logps/chosen": -143.5502166748047, + "logps/rejected": -276.6429748535156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.507812023162842, + "rewards/margins": 14.43783950805664, + "rewards/rejected": -19.945650100708008, + "step": 5467 + }, + { + "epoch": 8.78, + "learning_rate": 1.377328577090765e-08, + "logits/chosen": -1.3700613975524902, + "logits/rejected": -1.3547468185424805, + "logps/chosen": -170.75778198242188, + "logps/rejected": -296.966064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.036312103271484, + "rewards/margins": 11.84096622467041, + "rewards/rejected": -20.87727928161621, + "step": 5468 + }, + { + "epoch": 8.78, + "learning_rate": 1.3674197384066586e-08, + "logits/chosen": -1.6304874420166016, + "logits/rejected": -1.5334157943725586, + "logps/chosen": -110.52366638183594, + "logps/rejected": -227.3395538330078, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.2489142417907715, + "rewards/margins": 14.229912757873535, + "rewards/rejected": -16.47882652282715, + "step": 5469 + }, + { + "epoch": 8.78, + "learning_rate": 1.3575108997225525e-08, + "logits/chosen": -1.3230500221252441, + "logits/rejected": -1.266008734703064, + "logps/chosen": -158.57864379882812, + "logps/rejected": -270.51165771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989897727966309, + "rewards/margins": 13.523969650268555, + "rewards/rejected": -19.513866424560547, + "step": 5470 + }, + { + "epoch": 8.78, + "learning_rate": 1.3476020610384463e-08, + "logits/chosen": -1.639865517616272, + "logits/rejected": -1.5158402919769287, + "logps/chosen": -212.3350830078125, + "logps/rejected": -318.9066162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.404208183288574, + "rewards/margins": 13.162343978881836, + "rewards/rejected": -22.566551208496094, + "step": 5471 + }, + { + "epoch": 8.78, + "learning_rate": 1.33769322235434e-08, + "logits/chosen": -1.3388512134552002, + "logits/rejected": -1.2424776554107666, + "logps/chosen": -161.70059204101562, + "logps/rejected": -296.9508056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.19691276550293, + "rewards/margins": 14.218036651611328, + "rewards/rejected": -22.414949417114258, + "step": 5472 + }, + { + "epoch": 8.78, + "learning_rate": 1.3277843836702338e-08, + "logits/chosen": -1.5766406059265137, + "logits/rejected": -1.686234474182129, + "logps/chosen": -141.12493896484375, + "logps/rejected": -296.121826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.619752407073975, + "rewards/margins": 14.204973220825195, + "rewards/rejected": -19.824726104736328, + "step": 5473 + }, + { + "epoch": 8.79, + "learning_rate": 1.3178755449861276e-08, + "logits/chosen": -1.6397099494934082, + "logits/rejected": -1.675049066543579, + "logps/chosen": -149.4064483642578, + "logps/rejected": -299.13592529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2261128425598145, + "rewards/margins": 16.540115356445312, + "rewards/rejected": -22.76622772216797, + "step": 5474 + }, + { + "epoch": 8.79, + "learning_rate": 1.3079667063020213e-08, + "logits/chosen": -1.4659183025360107, + "logits/rejected": -1.4307548999786377, + "logps/chosen": -174.8655242919922, + "logps/rejected": -324.6886901855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.44254207611084, + "rewards/margins": 16.410329818725586, + "rewards/rejected": -24.852872848510742, + "step": 5475 + }, + { + "epoch": 8.79, + "learning_rate": 1.298057867617915e-08, + "logits/chosen": -1.5121554136276245, + "logits/rejected": -1.5198297500610352, + "logps/chosen": -184.95260620117188, + "logps/rejected": -321.17523193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.547597885131836, + "rewards/margins": 14.086271286010742, + "rewards/rejected": -23.63387107849121, + "step": 5476 + }, + { + "epoch": 8.79, + "learning_rate": 1.2881490289338089e-08, + "logits/chosen": -1.389732837677002, + "logits/rejected": -1.4177206754684448, + "logps/chosen": -196.6880340576172, + "logps/rejected": -305.2585144042969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.282025337219238, + "rewards/margins": 10.888951301574707, + "rewards/rejected": -21.170976638793945, + "step": 5477 + }, + { + "epoch": 8.79, + "learning_rate": 1.2782401902497027e-08, + "logits/chosen": -1.5755412578582764, + "logits/rejected": -1.5377511978149414, + "logps/chosen": -110.13674926757812, + "logps/rejected": -269.607666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.820122718811035, + "rewards/margins": 17.253894805908203, + "rewards/rejected": -21.074016571044922, + "step": 5478 + }, + { + "epoch": 8.79, + "learning_rate": 1.2683313515655966e-08, + "logits/chosen": -1.4063007831573486, + "logits/rejected": -1.3440320491790771, + "logps/chosen": -163.69207763671875, + "logps/rejected": -298.35009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1937255859375, + "rewards/margins": 14.703405380249023, + "rewards/rejected": -20.897132873535156, + "step": 5479 + }, + { + "epoch": 8.8, + "learning_rate": 1.2584225128814902e-08, + "logits/chosen": -1.445094347000122, + "logits/rejected": -1.3985655307769775, + "logps/chosen": -206.08135986328125, + "logps/rejected": -335.59326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.888328552246094, + "rewards/margins": 11.823844909667969, + "rewards/rejected": -21.712173461914062, + "step": 5480 + }, + { + "epoch": 8.8, + "learning_rate": 1.248513674197384e-08, + "logits/chosen": -1.5109736919403076, + "logits/rejected": -1.4794654846191406, + "logps/chosen": -114.2931900024414, + "logps/rejected": -308.140380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.898513317108154, + "rewards/margins": 17.211626052856445, + "rewards/rejected": -23.110137939453125, + "step": 5481 + }, + { + "epoch": 8.8, + "learning_rate": 1.2386048355132779e-08, + "logits/chosen": -1.4989091157913208, + "logits/rejected": -1.4662905931472778, + "logps/chosen": -146.74285888671875, + "logps/rejected": -267.79351806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0835280418396, + "rewards/margins": 12.715306282043457, + "rewards/rejected": -19.79883575439453, + "step": 5482 + }, + { + "epoch": 8.8, + "learning_rate": 1.2286959968291715e-08, + "logits/chosen": -1.3322941064834595, + "logits/rejected": -1.395934820175171, + "logps/chosen": -167.77523803710938, + "logps/rejected": -282.19921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.862197399139404, + "rewards/margins": 11.602316856384277, + "rewards/rejected": -19.464515686035156, + "step": 5483 + }, + { + "epoch": 8.8, + "learning_rate": 1.2187871581450654e-08, + "logits/chosen": -1.4081549644470215, + "logits/rejected": -1.4481157064437866, + "logps/chosen": -136.6598663330078, + "logps/rejected": -273.2109680175781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.03472375869751, + "rewards/margins": 13.806780815124512, + "rewards/rejected": -18.84150505065918, + "step": 5484 + }, + { + "epoch": 8.8, + "learning_rate": 1.208878319460959e-08, + "logits/chosen": -1.4753353595733643, + "logits/rejected": -1.4552594423294067, + "logps/chosen": -186.96337890625, + "logps/rejected": -320.4122619628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.2470121383667, + "rewards/margins": 14.187028884887695, + "rewards/rejected": -23.43404197692871, + "step": 5485 + }, + { + "epoch": 8.81, + "learning_rate": 1.1989694807768528e-08, + "logits/chosen": -1.5732872486114502, + "logits/rejected": -1.5312550067901611, + "logps/chosen": -147.61984252929688, + "logps/rejected": -234.83689880371094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.150317668914795, + "rewards/margins": 10.661298751831055, + "rewards/rejected": -16.811616897583008, + "step": 5486 + }, + { + "epoch": 8.81, + "learning_rate": 1.1890606420927467e-08, + "logits/chosen": -1.4916954040527344, + "logits/rejected": -1.5641014575958252, + "logps/chosen": -155.28509521484375, + "logps/rejected": -306.8409423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8343505859375, + "rewards/margins": 13.516738891601562, + "rewards/rejected": -21.351089477539062, + "step": 5487 + }, + { + "epoch": 8.81, + "learning_rate": 1.1791518034086405e-08, + "logits/chosen": -1.4803061485290527, + "logits/rejected": -1.3641369342803955, + "logps/chosen": -139.37228393554688, + "logps/rejected": -238.83946228027344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.590522050857544, + "rewards/margins": 13.139620780944824, + "rewards/rejected": -16.73014259338379, + "step": 5488 + }, + { + "epoch": 8.81, + "learning_rate": 1.1692429647245343e-08, + "logits/chosen": -1.5225646495819092, + "logits/rejected": -1.4935842752456665, + "logps/chosen": -144.2186279296875, + "logps/rejected": -286.1135559082031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.663084030151367, + "rewards/margins": 15.020281791687012, + "rewards/rejected": -20.683364868164062, + "step": 5489 + }, + { + "epoch": 8.81, + "learning_rate": 1.159334126040428e-08, + "logits/chosen": -1.3419159650802612, + "logits/rejected": -1.3243415355682373, + "logps/chosen": -188.1663360595703, + "logps/rejected": -316.7386779785156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.293554306030273, + "rewards/margins": 12.919158935546875, + "rewards/rejected": -22.21271324157715, + "step": 5490 + }, + { + "epoch": 8.81, + "learning_rate": 1.1494252873563218e-08, + "logits/chosen": -1.6183624267578125, + "logits/rejected": -1.5771924257278442, + "logps/chosen": -152.2018280029297, + "logps/rejected": -295.3165283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.817328929901123, + "rewards/margins": 14.649947166442871, + "rewards/rejected": -20.467275619506836, + "step": 5491 + }, + { + "epoch": 8.82, + "learning_rate": 1.1395164486722156e-08, + "logits/chosen": -1.3251830339431763, + "logits/rejected": -1.3176028728485107, + "logps/chosen": -147.64845275878906, + "logps/rejected": -267.0843505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.172133445739746, + "rewards/margins": 13.301788330078125, + "rewards/rejected": -20.473922729492188, + "step": 5492 + }, + { + "epoch": 8.82, + "learning_rate": 1.1296076099881093e-08, + "logits/chosen": -1.5732475519180298, + "logits/rejected": -1.5450807809829712, + "logps/chosen": -168.5752410888672, + "logps/rejected": -306.53179931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.051141738891602, + "rewards/margins": 12.41102409362793, + "rewards/rejected": -21.462167739868164, + "step": 5493 + }, + { + "epoch": 8.82, + "learning_rate": 1.1196987713040031e-08, + "logits/chosen": -1.4722305536270142, + "logits/rejected": -1.480183720588684, + "logps/chosen": -164.18246459960938, + "logps/rejected": -270.2085876464844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.897164821624756, + "rewards/margins": 11.725311279296875, + "rewards/rejected": -19.62247657775879, + "step": 5494 + }, + { + "epoch": 8.82, + "learning_rate": 1.109789932619897e-08, + "logits/chosen": -1.5381789207458496, + "logits/rejected": -1.5828497409820557, + "logps/chosen": -122.709716796875, + "logps/rejected": -300.283935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.456516742706299, + "rewards/margins": 16.429759979248047, + "rewards/rejected": -21.88627815246582, + "step": 5495 + }, + { + "epoch": 8.82, + "learning_rate": 1.0998810939357906e-08, + "logits/chosen": -1.4375251531600952, + "logits/rejected": -1.4297800064086914, + "logps/chosen": -171.7742919921875, + "logps/rejected": -294.5749206542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.053570747375488, + "rewards/margins": 13.733942031860352, + "rewards/rejected": -21.787513732910156, + "step": 5496 + }, + { + "epoch": 8.82, + "learning_rate": 1.0899722552516844e-08, + "logits/chosen": -1.4139801263809204, + "logits/rejected": -1.4273046255111694, + "logps/chosen": -145.86782836914062, + "logps/rejected": -271.0716857910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.100580215454102, + "rewards/margins": 12.698921203613281, + "rewards/rejected": -20.799501419067383, + "step": 5497 + }, + { + "epoch": 8.83, + "learning_rate": 1.0800634165675781e-08, + "logits/chosen": -1.266104817390442, + "logits/rejected": -1.2670576572418213, + "logps/chosen": -162.82872009277344, + "logps/rejected": -306.3579406738281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.750912666320801, + "rewards/margins": 13.844608306884766, + "rewards/rejected": -21.595521926879883, + "step": 5498 + }, + { + "epoch": 8.83, + "learning_rate": 1.070154577883472e-08, + "logits/chosen": -1.3478909730911255, + "logits/rejected": -1.305830478668213, + "logps/chosen": -177.2524871826172, + "logps/rejected": -299.0887756347656, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.421171188354492, + "rewards/margins": 14.002289772033691, + "rewards/rejected": -21.4234619140625, + "step": 5499 + }, + { + "epoch": 8.83, + "learning_rate": 1.0602457391993659e-08, + "logits/chosen": -1.384503722190857, + "logits/rejected": -1.4365882873535156, + "logps/chosen": -119.20057678222656, + "logps/rejected": -320.4990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.318152904510498, + "rewards/margins": 15.804553985595703, + "rewards/rejected": -22.12270736694336, + "step": 5500 + }, + { + "epoch": 8.83, + "learning_rate": 1.0503369005152596e-08, + "logits/chosen": -1.4457693099975586, + "logits/rejected": -1.478636384010315, + "logps/chosen": -157.18975830078125, + "logps/rejected": -311.40972900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.174690246582031, + "rewards/margins": 14.979238510131836, + "rewards/rejected": -22.153928756713867, + "step": 5501 + }, + { + "epoch": 8.83, + "learning_rate": 1.0404280618311534e-08, + "logits/chosen": -1.3819780349731445, + "logits/rejected": -1.3861660957336426, + "logps/chosen": -133.33018493652344, + "logps/rejected": -279.2955017089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.094486236572266, + "rewards/margins": 14.40609359741211, + "rewards/rejected": -20.500579833984375, + "step": 5502 + }, + { + "epoch": 8.83, + "learning_rate": 1.0305192231470472e-08, + "logits/chosen": -1.384071707725525, + "logits/rejected": -1.5587239265441895, + "logps/chosen": -164.39601135253906, + "logps/rejected": -296.20855712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.887092590332031, + "rewards/margins": 11.937942504882812, + "rewards/rejected": -20.825035095214844, + "step": 5503 + }, + { + "epoch": 8.83, + "learning_rate": 1.0206103844629409e-08, + "logits/chosen": -1.5197057723999023, + "logits/rejected": -1.5847768783569336, + "logps/chosen": -201.81466674804688, + "logps/rejected": -368.3904113769531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.558990478515625, + "rewards/margins": 15.77241325378418, + "rewards/rejected": -26.331405639648438, + "step": 5504 + }, + { + "epoch": 8.84, + "learning_rate": 1.0107015457788347e-08, + "logits/chosen": -1.302932858467102, + "logits/rejected": -1.3689221143722534, + "logps/chosen": -179.6098175048828, + "logps/rejected": -309.509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.091455459594727, + "rewards/margins": 13.42479133605957, + "rewards/rejected": -21.516246795654297, + "step": 5505 + }, + { + "epoch": 8.84, + "learning_rate": 1.0007927070947284e-08, + "logits/chosen": -1.4446508884429932, + "logits/rejected": -1.2898387908935547, + "logps/chosen": -175.23019409179688, + "logps/rejected": -261.03900146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.882005214691162, + "rewards/margins": 11.478946685791016, + "rewards/rejected": -19.360952377319336, + "step": 5506 + }, + { + "epoch": 8.84, + "learning_rate": 9.908838684106222e-09, + "logits/chosen": -1.4923994541168213, + "logits/rejected": -1.4724560976028442, + "logps/chosen": -193.4761199951172, + "logps/rejected": -295.7413330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.44789981842041, + "rewards/margins": 13.000259399414062, + "rewards/rejected": -22.44816017150879, + "step": 5507 + }, + { + "epoch": 8.84, + "learning_rate": 9.80975029726516e-09, + "logits/chosen": -1.7107822895050049, + "logits/rejected": -1.7238879203796387, + "logps/chosen": -119.33875274658203, + "logps/rejected": -314.3605651855469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6182198524475098, + "rewards/margins": 18.876056671142578, + "rewards/rejected": -22.49427604675293, + "step": 5508 + }, + { + "epoch": 8.84, + "learning_rate": 9.710661910424097e-09, + "logits/chosen": -1.4171314239501953, + "logits/rejected": -1.3932805061340332, + "logps/chosen": -193.07138061523438, + "logps/rejected": -319.2784118652344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.281110763549805, + "rewards/margins": 14.629986763000488, + "rewards/rejected": -24.911096572875977, + "step": 5509 + }, + { + "epoch": 8.84, + "learning_rate": 9.611573523583035e-09, + "logits/chosen": -1.3705233335494995, + "logits/rejected": -1.3821635246276855, + "logps/chosen": -177.92129516601562, + "logps/rejected": -375.7994384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.335589408874512, + "rewards/margins": 16.886127471923828, + "rewards/rejected": -26.221717834472656, + "step": 5510 + }, + { + "epoch": 8.85, + "learning_rate": 9.512485136741973e-09, + "logits/chosen": -1.3588300943374634, + "logits/rejected": -1.3951325416564941, + "logps/chosen": -135.93341064453125, + "logps/rejected": -275.56109619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.945231914520264, + "rewards/margins": 13.470478057861328, + "rewards/rejected": -20.41571044921875, + "step": 5511 + }, + { + "epoch": 8.85, + "learning_rate": 9.413396749900912e-09, + "logits/chosen": -1.5015413761138916, + "logits/rejected": -1.4614593982696533, + "logps/chosen": -151.64794921875, + "logps/rejected": -279.6134948730469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.345219612121582, + "rewards/margins": 12.83059310913086, + "rewards/rejected": -20.175811767578125, + "step": 5512 + }, + { + "epoch": 8.85, + "learning_rate": 9.31430836305985e-09, + "logits/chosen": -1.444108247756958, + "logits/rejected": -1.4823031425476074, + "logps/chosen": -145.8060302734375, + "logps/rejected": -288.7027282714844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.133156776428223, + "rewards/margins": 13.690107345581055, + "rewards/rejected": -20.823265075683594, + "step": 5513 + }, + { + "epoch": 8.85, + "learning_rate": 9.215219976218786e-09, + "logits/chosen": -1.645614743232727, + "logits/rejected": -1.5895781517028809, + "logps/chosen": -145.66864013671875, + "logps/rejected": -301.36688232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.907896995544434, + "rewards/margins": 16.92184829711914, + "rewards/rejected": -21.829744338989258, + "step": 5514 + }, + { + "epoch": 8.85, + "learning_rate": 9.116131589377725e-09, + "logits/chosen": -1.461030125617981, + "logits/rejected": -1.415977954864502, + "logps/chosen": -154.76885986328125, + "logps/rejected": -295.974853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.504090309143066, + "rewards/margins": 15.350045204162598, + "rewards/rejected": -21.854135513305664, + "step": 5515 + }, + { + "epoch": 8.85, + "learning_rate": 9.017043202536663e-09, + "logits/chosen": -1.3060734272003174, + "logits/rejected": -1.422250747680664, + "logps/chosen": -135.39761352539062, + "logps/rejected": -279.13958740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.569437503814697, + "rewards/margins": 11.639434814453125, + "rewards/rejected": -19.208871841430664, + "step": 5516 + }, + { + "epoch": 8.86, + "learning_rate": 8.9179548156956e-09, + "logits/chosen": -1.742134928703308, + "logits/rejected": -1.6507973670959473, + "logps/chosen": -136.32376098632812, + "logps/rejected": -297.00006103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.076655864715576, + "rewards/margins": 17.22898292541504, + "rewards/rejected": -22.30563735961914, + "step": 5517 + }, + { + "epoch": 8.86, + "learning_rate": 8.818866428854538e-09, + "logits/chosen": -1.3851580619812012, + "logits/rejected": -1.3640110492706299, + "logps/chosen": -163.94798278808594, + "logps/rejected": -299.5345153808594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7249250411987305, + "rewards/margins": 15.897686958312988, + "rewards/rejected": -21.62261199951172, + "step": 5518 + }, + { + "epoch": 8.86, + "learning_rate": 8.719778042013474e-09, + "logits/chosen": -1.440573811531067, + "logits/rejected": -1.534522294998169, + "logps/chosen": -132.98004150390625, + "logps/rejected": -304.2972106933594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.619424819946289, + "rewards/margins": 14.860980033874512, + "rewards/rejected": -20.480405807495117, + "step": 5519 + }, + { + "epoch": 8.86, + "learning_rate": 8.620689655172413e-09, + "logits/chosen": -1.5582575798034668, + "logits/rejected": -1.6070797443389893, + "logps/chosen": -138.66014099121094, + "logps/rejected": -295.8648986816406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.639702796936035, + "rewards/margins": 14.555841445922852, + "rewards/rejected": -20.195545196533203, + "step": 5520 + }, + { + "epoch": 8.86, + "learning_rate": 8.521601268331351e-09, + "logits/chosen": -1.4509820938110352, + "logits/rejected": -1.466586947441101, + "logps/chosen": -139.82342529296875, + "logps/rejected": -313.79364013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7833662033081055, + "rewards/margins": 16.5905818939209, + "rewards/rejected": -22.373947143554688, + "step": 5521 + }, + { + "epoch": 8.86, + "learning_rate": 8.42251288149029e-09, + "logits/chosen": -1.3790929317474365, + "logits/rejected": -1.4052289724349976, + "logps/chosen": -163.9189453125, + "logps/rejected": -343.37255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.157495498657227, + "rewards/margins": 17.31974220275879, + "rewards/rejected": -24.477235794067383, + "step": 5522 + }, + { + "epoch": 8.87, + "learning_rate": 8.323424494649227e-09, + "logits/chosen": -1.5175716876983643, + "logits/rejected": -1.4883137941360474, + "logps/chosen": -133.22207641601562, + "logps/rejected": -302.68731689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.047994613647461, + "rewards/margins": 15.730720520019531, + "rewards/rejected": -21.778715133666992, + "step": 5523 + }, + { + "epoch": 8.87, + "learning_rate": 8.224336107808166e-09, + "logits/chosen": -1.4338479042053223, + "logits/rejected": -1.4013028144836426, + "logps/chosen": -149.87025451660156, + "logps/rejected": -252.82778930664062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.232932090759277, + "rewards/margins": 10.907346725463867, + "rewards/rejected": -19.140277862548828, + "step": 5524 + }, + { + "epoch": 8.87, + "learning_rate": 8.125247720967102e-09, + "logits/chosen": -1.5191059112548828, + "logits/rejected": -1.4490551948547363, + "logps/chosen": -162.77291870117188, + "logps/rejected": -307.58416748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.386801719665527, + "rewards/margins": 14.635686874389648, + "rewards/rejected": -22.022489547729492, + "step": 5525 + }, + { + "epoch": 8.87, + "learning_rate": 8.02615933412604e-09, + "logits/chosen": -1.3489198684692383, + "logits/rejected": -1.3723788261413574, + "logps/chosen": -179.38687133789062, + "logps/rejected": -296.58013916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.085679054260254, + "rewards/margins": 13.62321662902832, + "rewards/rejected": -22.708894729614258, + "step": 5526 + }, + { + "epoch": 8.87, + "learning_rate": 7.927070947284977e-09, + "logits/chosen": -1.5028525590896606, + "logits/rejected": -1.5426260232925415, + "logps/chosen": -159.83554077148438, + "logps/rejected": -311.2879638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.359861373901367, + "rewards/margins": 15.2382230758667, + "rewards/rejected": -21.598085403442383, + "step": 5527 + }, + { + "epoch": 8.87, + "learning_rate": 7.827982560443915e-09, + "logits/chosen": -1.3957993984222412, + "logits/rejected": -1.4196135997772217, + "logps/chosen": -131.39993286132812, + "logps/rejected": -269.0301513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.169286727905273, + "rewards/margins": 13.595429420471191, + "rewards/rejected": -18.76471710205078, + "step": 5528 + }, + { + "epoch": 8.87, + "learning_rate": 7.728894173602854e-09, + "logits/chosen": -1.3473258018493652, + "logits/rejected": -1.3635883331298828, + "logps/chosen": -144.09149169921875, + "logps/rejected": -309.547607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.28753662109375, + "rewards/margins": 14.876259803771973, + "rewards/rejected": -22.163795471191406, + "step": 5529 + }, + { + "epoch": 8.88, + "learning_rate": 7.629805786761792e-09, + "logits/chosen": -1.570701241493225, + "logits/rejected": -1.5551426410675049, + "logps/chosen": -193.306396484375, + "logps/rejected": -317.7586364746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.674365043640137, + "rewards/margins": 13.35536003112793, + "rewards/rejected": -23.02972412109375, + "step": 5530 + }, + { + "epoch": 8.88, + "learning_rate": 7.530717399920729e-09, + "logits/chosen": -1.3250340223312378, + "logits/rejected": -1.3717126846313477, + "logps/chosen": -164.00286865234375, + "logps/rejected": -339.76922607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.149408340454102, + "rewards/margins": 15.273136138916016, + "rewards/rejected": -23.422544479370117, + "step": 5531 + }, + { + "epoch": 8.88, + "learning_rate": 7.431629013079667e-09, + "logits/chosen": -1.5440555810928345, + "logits/rejected": -1.5173321962356567, + "logps/chosen": -149.22607421875, + "logps/rejected": -296.1837158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6196393966674805, + "rewards/margins": 15.882364273071289, + "rewards/rejected": -21.502004623413086, + "step": 5532 + }, + { + "epoch": 8.88, + "learning_rate": 7.332540626238604e-09, + "logits/chosen": -1.4197148084640503, + "logits/rejected": -1.522092342376709, + "logps/chosen": -151.8905029296875, + "logps/rejected": -286.54022216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.587240219116211, + "rewards/margins": 12.029026985168457, + "rewards/rejected": -19.61626625061035, + "step": 5533 + }, + { + "epoch": 8.88, + "learning_rate": 7.233452239397542e-09, + "logits/chosen": -1.453104853630066, + "logits/rejected": -1.4792566299438477, + "logps/chosen": -125.20794677734375, + "logps/rejected": -306.026611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.250020980834961, + "rewards/margins": 18.0235538482666, + "rewards/rejected": -23.273574829101562, + "step": 5534 + }, + { + "epoch": 8.88, + "learning_rate": 7.13436385255648e-09, + "logits/chosen": -1.2641032934188843, + "logits/rejected": -1.3249813318252563, + "logps/chosen": -199.99769592285156, + "logps/rejected": -331.6988525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.577983856201172, + "rewards/margins": 11.192280769348145, + "rewards/rejected": -22.770265579223633, + "step": 5535 + }, + { + "epoch": 8.89, + "learning_rate": 7.035275465715418e-09, + "logits/chosen": -1.3295549154281616, + "logits/rejected": -1.4133901596069336, + "logps/chosen": -221.13027954101562, + "logps/rejected": -353.1766357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.57441234588623, + "rewards/margins": 14.887897491455078, + "rewards/rejected": -26.462308883666992, + "step": 5536 + }, + { + "epoch": 8.89, + "learning_rate": 6.936187078874356e-09, + "logits/chosen": -1.276961088180542, + "logits/rejected": -1.2500523328781128, + "logps/chosen": -205.25877380371094, + "logps/rejected": -337.3580017089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.073221206665039, + "rewards/margins": 16.732133865356445, + "rewards/rejected": -24.805355072021484, + "step": 5537 + }, + { + "epoch": 8.89, + "learning_rate": 6.837098692033293e-09, + "logits/chosen": -1.5873289108276367, + "logits/rejected": -1.5893609523773193, + "logps/chosen": -122.88469696044922, + "logps/rejected": -286.1046447753906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.05858039855957, + "rewards/margins": 16.562393188476562, + "rewards/rejected": -20.6209716796875, + "step": 5538 + }, + { + "epoch": 8.89, + "learning_rate": 6.738010305192231e-09, + "logits/chosen": -1.4773001670837402, + "logits/rejected": -1.4367588758468628, + "logps/chosen": -227.33412170410156, + "logps/rejected": -342.91943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.257869720458984, + "rewards/margins": 12.835556030273438, + "rewards/rejected": -25.093427658081055, + "step": 5539 + }, + { + "epoch": 8.89, + "learning_rate": 6.638921918351169e-09, + "logits/chosen": -1.4713149070739746, + "logits/rejected": -1.4697266817092896, + "logps/chosen": -145.75970458984375, + "logps/rejected": -278.59014892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.746730804443359, + "rewards/margins": 13.78388786315918, + "rewards/rejected": -20.53061866760254, + "step": 5540 + }, + { + "epoch": 8.89, + "learning_rate": 6.539833531510106e-09, + "logits/chosen": -1.3435224294662476, + "logits/rejected": -1.326120138168335, + "logps/chosen": -203.05392456054688, + "logps/rejected": -347.365478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.239815711975098, + "rewards/margins": 13.646158218383789, + "rewards/rejected": -25.885974884033203, + "step": 5541 + }, + { + "epoch": 8.9, + "learning_rate": 6.4407451446690445e-09, + "logits/chosen": -1.5499751567840576, + "logits/rejected": -1.5833988189697266, + "logps/chosen": -170.71823120117188, + "logps/rejected": -291.5944519042969, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.566473007202148, + "rewards/margins": 10.870803833007812, + "rewards/rejected": -21.437278747558594, + "step": 5542 + }, + { + "epoch": 8.9, + "learning_rate": 6.341656757827983e-09, + "logits/chosen": -1.4533368349075317, + "logits/rejected": -1.4359675645828247, + "logps/chosen": -161.0867156982422, + "logps/rejected": -267.2300109863281, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.448526382446289, + "rewards/margins": 10.051337242126465, + "rewards/rejected": -18.49986457824707, + "step": 5543 + }, + { + "epoch": 8.9, + "learning_rate": 6.24256837098692e-09, + "logits/chosen": -1.348979115486145, + "logits/rejected": -1.380164623260498, + "logps/chosen": -162.74703979492188, + "logps/rejected": -315.3556213378906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.217746734619141, + "rewards/margins": 16.287269592285156, + "rewards/rejected": -22.505016326904297, + "step": 5544 + }, + { + "epoch": 8.9, + "learning_rate": 6.143479984145858e-09, + "logits/chosen": -1.4021108150482178, + "logits/rejected": -1.4533365964889526, + "logps/chosen": -190.911376953125, + "logps/rejected": -346.6524353027344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.786212921142578, + "rewards/margins": 14.31003189086914, + "rewards/rejected": -24.09624481201172, + "step": 5545 + }, + { + "epoch": 8.9, + "learning_rate": 6.044391597304795e-09, + "logits/chosen": -1.3962293863296509, + "logits/rejected": -1.4132952690124512, + "logps/chosen": -189.43765258789062, + "logps/rejected": -362.11334228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.910516738891602, + "rewards/margins": 16.944488525390625, + "rewards/rejected": -26.855003356933594, + "step": 5546 + }, + { + "epoch": 8.9, + "learning_rate": 5.945303210463733e-09, + "logits/chosen": -1.629744291305542, + "logits/rejected": -1.6379599571228027, + "logps/chosen": -223.2711181640625, + "logps/rejected": -349.8612976074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.427894592285156, + "rewards/margins": 13.59115982055664, + "rewards/rejected": -24.019054412841797, + "step": 5547 + }, + { + "epoch": 8.91, + "learning_rate": 5.846214823622672e-09, + "logits/chosen": -1.2757009267807007, + "logits/rejected": -1.3995387554168701, + "logps/chosen": -141.5907440185547, + "logps/rejected": -258.3895263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.266718864440918, + "rewards/margins": 11.240586280822754, + "rewards/rejected": -17.507305145263672, + "step": 5548 + }, + { + "epoch": 8.91, + "learning_rate": 5.747126436781609e-09, + "logits/chosen": -1.5586762428283691, + "logits/rejected": -1.5566110610961914, + "logps/chosen": -107.01903533935547, + "logps/rejected": -233.42617797851562, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3329362869262695, + "rewards/margins": 13.111557960510254, + "rewards/rejected": -16.444494247436523, + "step": 5549 + }, + { + "epoch": 8.91, + "learning_rate": 5.6480380499405465e-09, + "logits/chosen": -1.4988871812820435, + "logits/rejected": -1.4981805086135864, + "logps/chosen": -190.57154846191406, + "logps/rejected": -351.7620544433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.43215274810791, + "rewards/margins": 16.082725524902344, + "rewards/rejected": -24.514881134033203, + "step": 5550 + }, + { + "epoch": 8.91, + "learning_rate": 5.548949663099485e-09, + "logits/chosen": -1.649268388748169, + "logits/rejected": -1.5936956405639648, + "logps/chosen": -152.95741271972656, + "logps/rejected": -314.008056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.201050758361816, + "rewards/margins": 17.049192428588867, + "rewards/rejected": -22.250242233276367, + "step": 5551 + }, + { + "epoch": 8.91, + "learning_rate": 5.449861276258422e-09, + "logits/chosen": -1.470146656036377, + "logits/rejected": -1.45078706741333, + "logps/chosen": -158.98272705078125, + "logps/rejected": -273.12066650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.780348300933838, + "rewards/margins": 12.038002014160156, + "rewards/rejected": -17.818349838256836, + "step": 5552 + }, + { + "epoch": 8.91, + "learning_rate": 5.35077288941736e-09, + "logits/chosen": -1.3815776109695435, + "logits/rejected": -1.3912830352783203, + "logps/chosen": -117.28530883789062, + "logps/rejected": -244.11898803710938, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.505147933959961, + "rewards/margins": 12.078999519348145, + "rewards/rejected": -16.58414649963379, + "step": 5553 + }, + { + "epoch": 8.91, + "learning_rate": 5.251684502576298e-09, + "logits/chosen": -1.4675558805465698, + "logits/rejected": -1.4227486848831177, + "logps/chosen": -173.59228515625, + "logps/rejected": -293.31854248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.284395217895508, + "rewards/margins": 11.588385581970215, + "rewards/rejected": -20.872783660888672, + "step": 5554 + }, + { + "epoch": 8.92, + "learning_rate": 5.152596115735236e-09, + "logits/chosen": -1.365803837776184, + "logits/rejected": -1.37013578414917, + "logps/chosen": -169.20761108398438, + "logps/rejected": -282.65252685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.261795043945312, + "rewards/margins": 11.028141021728516, + "rewards/rejected": -19.289934158325195, + "step": 5555 + }, + { + "epoch": 8.92, + "learning_rate": 5.0535077288941735e-09, + "logits/chosen": -1.8048027753829956, + "logits/rejected": -1.7473621368408203, + "logps/chosen": -171.9785614013672, + "logps/rejected": -308.8185119628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.648723602294922, + "rewards/margins": 15.320053100585938, + "rewards/rejected": -21.96877670288086, + "step": 5556 + }, + { + "epoch": 8.92, + "learning_rate": 4.954419342053111e-09, + "logits/chosen": -1.6720865964889526, + "logits/rejected": -1.6294585466384888, + "logps/chosen": -150.77810668945312, + "logps/rejected": -261.4216613769531, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018840789794922, + "rewards/margins": 12.596641540527344, + "rewards/rejected": -18.615482330322266, + "step": 5557 + }, + { + "epoch": 8.92, + "learning_rate": 4.855330955212048e-09, + "logits/chosen": -1.3651245832443237, + "logits/rejected": -1.4107718467712402, + "logps/chosen": -171.5366973876953, + "logps/rejected": -318.0671081542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.04908561706543, + "rewards/margins": 14.390945434570312, + "rewards/rejected": -23.440031051635742, + "step": 5558 + }, + { + "epoch": 8.92, + "learning_rate": 4.756242568370987e-09, + "logits/chosen": -1.3267184495925903, + "logits/rejected": -1.4430619478225708, + "logps/chosen": -148.0964813232422, + "logps/rejected": -279.9290771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.811586380004883, + "rewards/margins": 10.503451347351074, + "rewards/rejected": -19.315038681030273, + "step": 5559 + }, + { + "epoch": 8.92, + "learning_rate": 4.657154181529925e-09, + "logits/chosen": -1.3373745679855347, + "logits/rejected": -1.3786394596099854, + "logps/chosen": -181.12130737304688, + "logps/rejected": -318.3215637207031, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.593098640441895, + "rewards/margins": 11.806376457214355, + "rewards/rejected": -21.39947509765625, + "step": 5560 + }, + { + "epoch": 8.93, + "learning_rate": 4.558065794688862e-09, + "logits/chosen": -1.580721378326416, + "logits/rejected": -1.5192993879318237, + "logps/chosen": -129.4248046875, + "logps/rejected": -270.49114990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.460580348968506, + "rewards/margins": 15.458990097045898, + "rewards/rejected": -19.919570922851562, + "step": 5561 + }, + { + "epoch": 8.93, + "learning_rate": 4.4589774078478e-09, + "logits/chosen": -1.49764084815979, + "logits/rejected": -1.5842139720916748, + "logps/chosen": -152.20143127441406, + "logps/rejected": -288.29473876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.002392768859863, + "rewards/margins": 13.14461898803711, + "rewards/rejected": -20.147010803222656, + "step": 5562 + }, + { + "epoch": 8.93, + "learning_rate": 4.359889021006737e-09, + "logits/chosen": -1.626151442527771, + "logits/rejected": -1.6294300556182861, + "logps/chosen": -92.47596740722656, + "logps/rejected": -248.18862915039062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.906686544418335, + "rewards/margins": 14.758903503417969, + "rewards/rejected": -17.665590286254883, + "step": 5563 + }, + { + "epoch": 8.93, + "learning_rate": 4.2608006341656755e-09, + "logits/chosen": -1.373234748840332, + "logits/rejected": -1.368178367614746, + "logps/chosen": -126.8079605102539, + "logps/rejected": -323.01446533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.826100826263428, + "rewards/margins": 17.397804260253906, + "rewards/rejected": -23.223907470703125, + "step": 5564 + }, + { + "epoch": 8.93, + "learning_rate": 4.161712247324614e-09, + "logits/chosen": -1.472311019897461, + "logits/rejected": -1.4220939874649048, + "logps/chosen": -139.84767150878906, + "logps/rejected": -291.8489990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.948934555053711, + "rewards/margins": 14.550586700439453, + "rewards/rejected": -21.499521255493164, + "step": 5565 + }, + { + "epoch": 8.93, + "learning_rate": 4.062623860483551e-09, + "logits/chosen": -1.4732526540756226, + "logits/rejected": -1.5243475437164307, + "logps/chosen": -191.86984252929688, + "logps/rejected": -287.5022277832031, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.804506301879883, + "rewards/margins": 10.611030578613281, + "rewards/rejected": -17.415536880493164, + "step": 5566 + }, + { + "epoch": 8.94, + "learning_rate": 3.963535473642489e-09, + "logits/chosen": -1.4829237461090088, + "logits/rejected": -1.5580952167510986, + "logps/chosen": -146.04263305664062, + "logps/rejected": -280.3955993652344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.363846778869629, + "rewards/margins": 11.082490921020508, + "rewards/rejected": -18.44633674621582, + "step": 5567 + }, + { + "epoch": 8.94, + "learning_rate": 3.864447086801427e-09, + "logits/chosen": -1.5107572078704834, + "logits/rejected": -1.4684278964996338, + "logps/chosen": -135.573974609375, + "logps/rejected": -282.4186706542969, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.035415172576904, + "rewards/margins": 16.25564193725586, + "rewards/rejected": -22.291057586669922, + "step": 5568 + }, + { + "epoch": 8.94, + "learning_rate": 3.765358699960364e-09, + "logits/chosen": -1.3906028270721436, + "logits/rejected": -1.3945419788360596, + "logps/chosen": -173.60520935058594, + "logps/rejected": -303.6537170410156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.97482442855835, + "rewards/margins": 14.139511108398438, + "rewards/rejected": -21.114336013793945, + "step": 5569 + }, + { + "epoch": 8.94, + "learning_rate": 3.666270313119302e-09, + "logits/chosen": -1.4650046825408936, + "logits/rejected": -1.4454541206359863, + "logps/chosen": -156.11907958984375, + "logps/rejected": -286.27685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076201438903809, + "rewards/margins": 13.653706550598145, + "rewards/rejected": -19.729907989501953, + "step": 5570 + }, + { + "epoch": 8.94, + "learning_rate": 3.56718192627824e-09, + "logits/chosen": -1.477257490158081, + "logits/rejected": -1.4355814456939697, + "logps/chosen": -170.29989624023438, + "logps/rejected": -291.88916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.768489837646484, + "rewards/margins": 12.574531555175781, + "rewards/rejected": -21.343021392822266, + "step": 5571 + }, + { + "epoch": 8.94, + "learning_rate": 3.468093539437178e-09, + "logits/chosen": -1.3901575803756714, + "logits/rejected": -1.4330360889434814, + "logps/chosen": -146.53173828125, + "logps/rejected": -340.9048767089844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.530895233154297, + "rewards/margins": 17.121395111083984, + "rewards/rejected": -25.65228843688965, + "step": 5572 + }, + { + "epoch": 8.95, + "learning_rate": 3.3690051525961157e-09, + "logits/chosen": -1.2886419296264648, + "logits/rejected": -1.3536070585250854, + "logps/chosen": -180.0691680908203, + "logps/rejected": -350.9300537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.001100540161133, + "rewards/margins": 15.590032577514648, + "rewards/rejected": -24.59113311767578, + "step": 5573 + }, + { + "epoch": 8.95, + "learning_rate": 3.269916765755053e-09, + "logits/chosen": -1.4912701845169067, + "logits/rejected": -1.483092188835144, + "logps/chosen": -169.70684814453125, + "logps/rejected": -311.35986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.20219612121582, + "rewards/margins": 15.46371841430664, + "rewards/rejected": -23.66591453552246, + "step": 5574 + }, + { + "epoch": 8.95, + "learning_rate": 3.1708283789139914e-09, + "logits/chosen": -1.4749550819396973, + "logits/rejected": -1.4195890426635742, + "logps/chosen": -198.25767517089844, + "logps/rejected": -323.0523376464844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.985148429870605, + "rewards/margins": 13.699481010437012, + "rewards/rejected": -23.68463134765625, + "step": 5575 + }, + { + "epoch": 8.95, + "learning_rate": 3.071739992072929e-09, + "logits/chosen": -1.437975525856018, + "logits/rejected": -1.3715416193008423, + "logps/chosen": -159.55397033691406, + "logps/rejected": -296.2626647949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.059662818908691, + "rewards/margins": 14.328288078308105, + "rewards/rejected": -22.387950897216797, + "step": 5576 + }, + { + "epoch": 8.95, + "learning_rate": 2.9726516052318667e-09, + "logits/chosen": -1.4831945896148682, + "logits/rejected": -1.4334884881973267, + "logps/chosen": -144.580810546875, + "logps/rejected": -300.0434265136719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.061241149902344, + "rewards/margins": 15.85600471496582, + "rewards/rejected": -21.91724395751953, + "step": 5577 + }, + { + "epoch": 8.95, + "learning_rate": 2.8735632183908045e-09, + "logits/chosen": -1.5568534135818481, + "logits/rejected": -1.546190857887268, + "logps/chosen": -174.35440063476562, + "logps/rejected": -305.08575439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.438225746154785, + "rewards/margins": 12.976125717163086, + "rewards/rejected": -21.414352416992188, + "step": 5578 + }, + { + "epoch": 8.96, + "learning_rate": 2.7744748315497424e-09, + "logits/chosen": -1.5426157712936401, + "logits/rejected": -1.527779459953308, + "logps/chosen": -205.07852172851562, + "logps/rejected": -314.773193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.837130546569824, + "rewards/margins": 12.570311546325684, + "rewards/rejected": -22.407442092895508, + "step": 5579 + }, + { + "epoch": 8.96, + "learning_rate": 2.67538644470868e-09, + "logits/chosen": -1.5151317119598389, + "logits/rejected": -1.4831182956695557, + "logps/chosen": -133.38833618164062, + "logps/rejected": -304.69268798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.247411251068115, + "rewards/margins": 16.467342376708984, + "rewards/rejected": -22.714752197265625, + "step": 5580 + }, + { + "epoch": 8.96, + "learning_rate": 2.576298057867618e-09, + "logits/chosen": -1.605851173400879, + "logits/rejected": -1.5621271133422852, + "logps/chosen": -145.75132751464844, + "logps/rejected": -308.599853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.437315464019775, + "rewards/margins": 15.989755630493164, + "rewards/rejected": -22.42707061767578, + "step": 5581 + }, + { + "epoch": 8.96, + "learning_rate": 2.4772096710265555e-09, + "logits/chosen": -1.4194228649139404, + "logits/rejected": -1.440765142440796, + "logps/chosen": -193.83885192871094, + "logps/rejected": -325.5533447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.937275886535645, + "rewards/margins": 12.562469482421875, + "rewards/rejected": -22.499744415283203, + "step": 5582 + }, + { + "epoch": 8.96, + "learning_rate": 2.3781212841854933e-09, + "logits/chosen": -1.446515679359436, + "logits/rejected": -1.5454959869384766, + "logps/chosen": -155.822021484375, + "logps/rejected": -311.8072204589844, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.569726943969727, + "rewards/margins": 14.861775398254395, + "rewards/rejected": -22.431501388549805, + "step": 5583 + }, + { + "epoch": 8.96, + "learning_rate": 2.279032897344431e-09, + "logits/chosen": -1.3862296342849731, + "logits/rejected": -1.4577624797821045, + "logps/chosen": -211.242919921875, + "logps/rejected": -335.4261169433594, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.653841018676758, + "rewards/margins": 11.934491157531738, + "rewards/rejected": -22.588333129882812, + "step": 5584 + }, + { + "epoch": 8.96, + "learning_rate": 2.1799445105033686e-09, + "logits/chosen": -1.6489660739898682, + "logits/rejected": -1.6513805389404297, + "logps/chosen": -150.65390014648438, + "logps/rejected": -311.5079040527344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2020673751831055, + "rewards/margins": 15.719917297363281, + "rewards/rejected": -22.921985626220703, + "step": 5585 + }, + { + "epoch": 8.97, + "learning_rate": 2.080856123662307e-09, + "logits/chosen": -1.2698103189468384, + "logits/rejected": -1.2529903650283813, + "logps/chosen": -197.02630615234375, + "logps/rejected": -307.866455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.570103645324707, + "rewards/margins": 11.92452621459961, + "rewards/rejected": -23.494630813598633, + "step": 5586 + }, + { + "epoch": 8.97, + "learning_rate": 1.9817677368212443e-09, + "logits/chosen": -1.4508063793182373, + "logits/rejected": -1.4695771932601929, + "logps/chosen": -127.34912109375, + "logps/rejected": -284.9428405761719, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.792441368103027, + "rewards/margins": 16.06023597717285, + "rewards/rejected": -20.852678298950195, + "step": 5587 + }, + { + "epoch": 8.97, + "learning_rate": 1.882679349980182e-09, + "logits/chosen": -1.3122761249542236, + "logits/rejected": -1.2863131761550903, + "logps/chosen": -175.91993713378906, + "logps/rejected": -264.55914306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.034013748168945, + "rewards/margins": 12.792272567749023, + "rewards/rejected": -20.82628631591797, + "step": 5588 + }, + { + "epoch": 8.97, + "learning_rate": 1.78359096313912e-09, + "logits/chosen": -1.6139024496078491, + "logits/rejected": -1.6038577556610107, + "logps/chosen": -157.0148162841797, + "logps/rejected": -273.06488037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.329857349395752, + "rewards/margins": 13.040611267089844, + "rewards/rejected": -19.37047004699707, + "step": 5589 + }, + { + "epoch": 8.97, + "learning_rate": 1.6845025762980578e-09, + "logits/chosen": -1.5084803104400635, + "logits/rejected": -1.502764105796814, + "logps/chosen": -182.354248046875, + "logps/rejected": -313.660400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.31116008758545, + "rewards/margins": 14.6881103515625, + "rewards/rejected": -22.999269485473633, + "step": 5590 + }, + { + "epoch": 8.97, + "learning_rate": 1.5854141894569957e-09, + "logits/chosen": -1.684743881225586, + "logits/rejected": -1.6472859382629395, + "logps/chosen": -140.39602661132812, + "logps/rejected": -255.92982482910156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5803985595703125, + "rewards/margins": 13.090886116027832, + "rewards/rejected": -18.671283721923828, + "step": 5591 + }, + { + "epoch": 8.98, + "learning_rate": 1.4863258026159333e-09, + "logits/chosen": -1.3962390422821045, + "logits/rejected": -1.4182277917861938, + "logps/chosen": -163.5077667236328, + "logps/rejected": -267.0746154785156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4592742919921875, + "rewards/margins": 11.378170013427734, + "rewards/rejected": -18.837444305419922, + "step": 5592 + }, + { + "epoch": 8.98, + "learning_rate": 1.3872374157748712e-09, + "logits/chosen": -1.4572205543518066, + "logits/rejected": -1.5391861200332642, + "logps/chosen": -95.77008056640625, + "logps/rejected": -247.80026245117188, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4822535514831543, + "rewards/margins": 15.075722694396973, + "rewards/rejected": -18.55797576904297, + "step": 5593 + }, + { + "epoch": 8.98, + "learning_rate": 1.288149028933809e-09, + "logits/chosen": -1.4385879039764404, + "logits/rejected": -1.5150814056396484, + "logps/chosen": -173.08328247070312, + "logps/rejected": -323.7647399902344, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.58642578125, + "rewards/margins": 15.04301643371582, + "rewards/rejected": -24.629444122314453, + "step": 5594 + }, + { + "epoch": 8.98, + "learning_rate": 1.1890606420927467e-09, + "logits/chosen": -1.4460622072219849, + "logits/rejected": -1.4188734292984009, + "logps/chosen": -128.3372344970703, + "logps/rejected": -273.07989501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.00007963180542, + "rewards/margins": 13.88518238067627, + "rewards/rejected": -19.88526153564453, + "step": 5595 + }, + { + "epoch": 8.98, + "learning_rate": 1.0899722552516843e-09, + "logits/chosen": -1.4325439929962158, + "logits/rejected": -1.466303825378418, + "logps/chosen": -172.8763885498047, + "logps/rejected": -296.3722839355469, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.317228317260742, + "rewards/margins": 11.966876029968262, + "rewards/rejected": -22.284103393554688, + "step": 5596 + }, + { + "epoch": 8.98, + "learning_rate": 9.908838684106222e-10, + "logits/chosen": -1.4646642208099365, + "logits/rejected": -1.49484121799469, + "logps/chosen": -133.38018798828125, + "logps/rejected": -297.247314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.331569671630859, + "rewards/margins": 14.043962478637695, + "rewards/rejected": -20.375532150268555, + "step": 5597 + }, + { + "epoch": 8.99, + "learning_rate": 8.9179548156956e-10, + "logits/chosen": -1.6434353590011597, + "logits/rejected": -1.576925277709961, + "logps/chosen": -180.9561767578125, + "logps/rejected": -310.79766845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.983611106872559, + "rewards/margins": 14.025321960449219, + "rewards/rejected": -22.008934020996094, + "step": 5598 + }, + { + "epoch": 8.99, + "learning_rate": 7.927070947284978e-10, + "logits/chosen": -1.6101654767990112, + "logits/rejected": -1.574453353881836, + "logps/chosen": -144.18386840820312, + "logps/rejected": -272.75811767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9166460037231445, + "rewards/margins": 13.731451034545898, + "rewards/rejected": -18.648096084594727, + "step": 5599 + }, + { + "epoch": 8.99, + "learning_rate": 6.936187078874356e-10, + "logits/chosen": -1.3635761737823486, + "logits/rejected": -1.3398789167404175, + "logps/chosen": -156.15357971191406, + "logps/rejected": -298.1269226074219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.194400787353516, + "rewards/margins": 14.788064002990723, + "rewards/rejected": -19.982465744018555, + "step": 5600 + }, + { + "epoch": 8.99, + "learning_rate": 5.945303210463733e-10, + "logits/chosen": -1.3833613395690918, + "logits/rejected": -1.353509545326233, + "logps/chosen": -148.7933349609375, + "logps/rejected": -310.07171630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.010641098022461, + "rewards/margins": 15.126819610595703, + "rewards/rejected": -22.137460708618164, + "step": 5601 + }, + { + "epoch": 8.99, + "learning_rate": 4.954419342053111e-10, + "logits/chosen": -1.438132643699646, + "logits/rejected": -1.4972431659698486, + "logps/chosen": -138.6864776611328, + "logps/rejected": -316.56048583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.831529140472412, + "rewards/margins": 17.135868072509766, + "rewards/rejected": -22.967397689819336, + "step": 5602 + }, + { + "epoch": 8.99, + "learning_rate": 3.963535473642489e-10, + "logits/chosen": -1.5281355381011963, + "logits/rejected": -1.458997130393982, + "logps/chosen": -155.76519775390625, + "logps/rejected": -291.20855712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.240975379943848, + "rewards/margins": 13.647465705871582, + "rewards/rejected": -19.88844108581543, + "step": 5603 + }, + { + "epoch": 9.0, + "learning_rate": 2.9726516052318667e-10, + "logits/chosen": -1.405860185623169, + "logits/rejected": -1.4780182838439941, + "logps/chosen": -152.6331024169922, + "logps/rejected": -338.17169189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.931190490722656, + "rewards/margins": 16.70195960998535, + "rewards/rejected": -24.633150100708008, + "step": 5604 + }, + { + "epoch": 9.0, + "learning_rate": 1.9817677368212446e-10, + "logits/chosen": -1.6140244007110596, + "logits/rejected": -1.5737364292144775, + "logps/chosen": -160.79434204101562, + "logps/rejected": -299.6746826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.942219257354736, + "rewards/margins": 16.059476852416992, + "rewards/rejected": -21.00169563293457, + "step": 5605 + }, + { + "epoch": 9.0, + "learning_rate": 9.908838684106223e-11, + "logits/chosen": -1.5047757625579834, + "logits/rejected": -1.5366731882095337, + "logps/chosen": -186.2240447998047, + "logps/rejected": -350.0401916503906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.321247100830078, + "rewards/margins": 15.944011688232422, + "rewards/rejected": -26.265260696411133, + "step": 5606 + }, + { + "epoch": 9.0, + "learning_rate": 0.0, + "logits/chosen": -1.2633585929870605, + "logits/rejected": -1.3158758878707886, + "logps/chosen": -154.55526733398438, + "logps/rejected": -312.0606994628906, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.210287094116211, + "rewards/margins": 14.50362777709961, + "rewards/rejected": -21.71391487121582, + "step": 5607 + }, + { + "epoch": 9.0, + "step": 5607, + "total_flos": 0.0, + "train_loss": 0.12788055668324702, + "train_runtime": 21079.0027, + "train_samples_per_second": 8.509, + "train_steps_per_second": 0.266 + } + ], + "logging_steps": 1.0, + "max_steps": 5607, + "num_input_tokens_seen": 0, + "num_train_epochs": 9, + "save_steps": 10000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}