{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 2907, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -2.809938669204712, "logits/rejected": -2.8543002605438232, "logps/chosen": -108.84485626220703, "logps/rejected": -104.8216552734375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -3.0993778705596924, "logits/rejected": -3.087177276611328, "logps/chosen": -240.08148193359375, "logps/rejected": -212.52203369140625, "loss": 0.6929, "rewards/accuracies": 0.5555555820465088, "rewards/chosen": 0.004180057905614376, "rewards/margins": 0.010151715017855167, "rewards/rejected": -0.005971657112240791, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.9975037574768066, "logits/rejected": -3.020071268081665, "logps/chosen": -277.745361328125, "logps/rejected": -251.31045532226562, "loss": 0.6891, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0006220510113053024, "rewards/margins": 0.009054403752088547, "rewards/rejected": -0.009676454588770866, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -3.0415549278259277, "logits/rejected": -3.058453321456909, "logps/chosen": -269.6055603027344, "logps/rejected": -234.25210571289062, "loss": 0.6757, "rewards/accuracies": 0.6875, "rewards/chosen": 0.023628996685147285, "rewards/margins": 0.044883329421281815, "rewards/rejected": -0.02125433087348938, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -3.0699856281280518, "logits/rejected": -3.061784505844116, "logps/chosen": -313.98760986328125, "logps/rejected": -281.23638916015625, "loss": 0.6528, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.06499715149402618, "rewards/margins": 0.10613612085580826, "rewards/rejected": -0.041138969361782074, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.979653835296631, "logits/rejected": -2.9869930744171143, "logps/chosen": -340.1573486328125, "logps/rejected": -217.7376708984375, "loss": 0.6327, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.06802070885896683, "rewards/margins": 0.1756124347448349, "rewards/rejected": -0.10759172588586807, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.9975168704986572, "logits/rejected": -2.9845681190490723, "logps/chosen": -255.650146484375, "logps/rejected": -238.10183715820312, "loss": 0.6021, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.003270727349445224, "rewards/margins": 0.21992146968841553, "rewards/rejected": -0.2231922149658203, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -3.0794243812561035, "logits/rejected": -3.0562033653259277, "logps/chosen": -333.88580322265625, "logps/rejected": -250.5960693359375, "loss": 0.5817, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1207752674818039, "rewards/margins": 0.4231022000312805, "rewards/rejected": -0.30232688784599304, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -3.0469672679901123, "logits/rejected": -3.0105535984039307, "logps/chosen": -262.3305969238281, "logps/rejected": -227.277587890625, "loss": 0.5512, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.13038447499275208, "rewards/margins": 0.5708122253417969, "rewards/rejected": -0.4404277801513672, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -3.0962467193603516, "logits/rejected": -3.0591776371002197, "logps/chosen": -260.2414855957031, "logps/rejected": -228.8573760986328, "loss": 0.5069, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.16786682605743408, "rewards/margins": 0.885595440864563, "rewards/rejected": -0.7177285552024841, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.9466209411621094, "logits/rejected": -2.9162044525146484, "logps/chosen": -279.3992614746094, "logps/rejected": -209.82199096679688, "loss": 0.5397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1838654726743698, "rewards/margins": 0.7952971458435059, "rewards/rejected": -0.6114317774772644, "step": 100 }, { "epoch": 0.1, "eval_logits/chosen": -3.00329327583313, "eval_logits/rejected": -2.9742367267608643, "eval_logps/chosen": -295.24578857421875, "eval_logps/rejected": -251.75856018066406, "eval_loss": 0.5210586190223694, "eval_rewards/accuracies": 0.7579365372657776, "eval_rewards/chosen": 0.12868967652320862, "eval_rewards/margins": 0.813798725605011, "eval_rewards/rejected": -0.6851091384887695, "eval_runtime": 83.4523, "eval_samples_per_second": 23.966, "eval_steps_per_second": 0.755, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.9794344902038574, "logits/rejected": -2.931662082672119, "logps/chosen": -259.1096496582031, "logps/rejected": -254.18624877929688, "loss": 0.5465, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.052236903458833694, "rewards/margins": 0.7087007761001587, "rewards/rejected": -0.7609376311302185, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -3.0205585956573486, "logits/rejected": -2.995251417160034, "logps/chosen": -328.3795166015625, "logps/rejected": -251.7208709716797, "loss": 0.529, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04647420346736908, "rewards/margins": 0.65358567237854, "rewards/rejected": -0.7000598311424255, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -3.0943045616149902, "logits/rejected": -3.045710563659668, "logps/chosen": -290.30181884765625, "logps/rejected": -255.75521850585938, "loss": 0.5239, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.15326093137264252, "rewards/margins": 0.905988335609436, "rewards/rejected": -0.7527275085449219, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -3.0118863582611084, "logits/rejected": -2.9938008785247803, "logps/chosen": -272.8241271972656, "logps/rejected": -227.0712890625, "loss": 0.4998, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.17975559830665588, "rewards/margins": 0.9572590589523315, "rewards/rejected": -0.7775036096572876, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -3.014378786087036, "logits/rejected": -3.011509895324707, "logps/chosen": -276.01239013671875, "logps/rejected": -243.67691040039062, "loss": 0.5232, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.018592316657304764, "rewards/margins": 0.9336894750595093, "rewards/rejected": -0.9150971174240112, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -3.080382823944092, "logits/rejected": -3.083763837814331, "logps/chosen": -319.6217346191406, "logps/rejected": -261.18292236328125, "loss": 0.4862, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07249774783849716, "rewards/margins": 1.1103062629699707, "rewards/rejected": -1.1828041076660156, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.9972214698791504, "logits/rejected": -2.9871556758880615, "logps/chosen": -280.0781555175781, "logps/rejected": -237.6610107421875, "loss": 0.5081, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24302425980567932, "rewards/margins": 0.6678057312965393, "rewards/rejected": -0.9108299016952515, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -3.060957193374634, "logits/rejected": -3.0258703231811523, "logps/chosen": -223.1851348876953, "logps/rejected": -215.2241668701172, "loss": 0.4735, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10333947092294693, "rewards/margins": 0.957025408744812, "rewards/rejected": -1.0603649616241455, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -3.0781607627868652, "logits/rejected": -3.0256905555725098, "logps/chosen": -273.40521240234375, "logps/rejected": -211.7317657470703, "loss": 0.513, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.06486662477254868, "rewards/margins": 1.1147286891937256, "rewards/rejected": -1.049862027168274, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -3.0979599952697754, "logits/rejected": -3.034374713897705, "logps/chosen": -228.7999725341797, "logps/rejected": -172.1134796142578, "loss": 0.4919, "rewards/accuracies": 0.8125, "rewards/chosen": -0.19804462790489197, "rewards/margins": 0.8455147743225098, "rewards/rejected": -1.0435593128204346, "step": 200 }, { "epoch": 0.21, "eval_logits/chosen": -3.089780569076538, "eval_logits/rejected": -3.068847179412842, "eval_logps/chosen": -296.25518798828125, "eval_logps/rejected": -256.5061340332031, "eval_loss": 0.48726680874824524, "eval_rewards/accuracies": 0.7896825671195984, "eval_rewards/chosen": 0.027751244604587555, "eval_rewards/margins": 1.1876167058944702, "eval_rewards/rejected": -1.159865379333496, "eval_runtime": 85.3677, "eval_samples_per_second": 23.428, "eval_steps_per_second": 0.738, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -3.0770697593688965, "logits/rejected": -3.0069308280944824, "logps/chosen": -305.48211669921875, "logps/rejected": -228.2117919921875, "loss": 0.4486, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06476946175098419, "rewards/margins": 1.3383097648620605, "rewards/rejected": -1.4030791521072388, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.9890925884246826, "logits/rejected": -2.9934885501861572, "logps/chosen": -230.14501953125, "logps/rejected": -242.2181396484375, "loss": 0.5072, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.388418048620224, "rewards/margins": 1.1304852962493896, "rewards/rejected": -1.5189034938812256, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -3.037731647491455, "logits/rejected": -3.0365240573883057, "logps/chosen": -246.9208984375, "logps/rejected": -229.67221069335938, "loss": 0.5144, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21415448188781738, "rewards/margins": 1.0256799459457397, "rewards/rejected": -1.2398344278335571, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -3.164783000946045, "logits/rejected": -3.105828285217285, "logps/chosen": -323.3183288574219, "logps/rejected": -223.78634643554688, "loss": 0.4627, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21443960070610046, "rewards/margins": 1.0258036851882935, "rewards/rejected": -1.2402431964874268, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -3.1041345596313477, "logits/rejected": -3.085723876953125, "logps/chosen": -266.01995849609375, "logps/rejected": -250.32656860351562, "loss": 0.4745, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4707525372505188, "rewards/margins": 1.0131726264953613, "rewards/rejected": -1.4839251041412354, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -3.0145013332366943, "logits/rejected": -3.0256457328796387, "logps/chosen": -280.07232666015625, "logps/rejected": -225.8538818359375, "loss": 0.5126, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.49882835149765015, "rewards/margins": 1.2246119976043701, "rewards/rejected": -1.723440408706665, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -3.1204419136047363, "logits/rejected": -3.060319423675537, "logps/chosen": -303.67218017578125, "logps/rejected": -246.41006469726562, "loss": 0.5243, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3401849865913391, "rewards/margins": 0.9501386880874634, "rewards/rejected": -1.2903234958648682, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -3.066493272781372, "logits/rejected": -3.0354180335998535, "logps/chosen": -282.0846862792969, "logps/rejected": -242.1811981201172, "loss": 0.5183, "rewards/accuracies": 0.75, "rewards/chosen": -0.42353829741477966, "rewards/margins": 1.0371509790420532, "rewards/rejected": -1.4606893062591553, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -3.1275315284729004, "logits/rejected": -3.0769925117492676, "logps/chosen": -279.0789794921875, "logps/rejected": -230.20285034179688, "loss": 0.5104, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23357586562633514, "rewards/margins": 0.9790124893188477, "rewards/rejected": -1.2125883102416992, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982798165137615e-07, "logits/chosen": -3.0245261192321777, "logits/rejected": -2.995250701904297, "logps/chosen": -237.4323272705078, "logps/rejected": -246.7605743408203, "loss": 0.4802, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5877944231033325, "rewards/margins": 0.8804357647895813, "rewards/rejected": -1.4682300090789795, "step": 300 }, { "epoch": 0.31, "eval_logits/chosen": -3.0827579498291016, "eval_logits/rejected": -3.0494439601898193, "eval_logps/chosen": -298.7669372558594, "eval_logps/rejected": -258.1646423339844, "eval_loss": 0.5026515126228333, "eval_rewards/accuracies": 0.7539682388305664, "eval_rewards/chosen": -0.22342802584171295, "eval_rewards/margins": 1.102290391921997, "eval_rewards/rejected": -1.3257185220718384, "eval_runtime": 84.4222, "eval_samples_per_second": 23.69, "eval_steps_per_second": 0.746, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963685015290519e-07, "logits/chosen": -3.1253762245178223, "logits/rejected": -3.056403636932373, "logps/chosen": -326.9117431640625, "logps/rejected": -284.8948059082031, "loss": 0.5358, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14515358209609985, "rewards/margins": 1.0375834703445435, "rewards/rejected": -1.182737112045288, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944571865443424e-07, "logits/chosen": -3.059190273284912, "logits/rejected": -3.0378201007843018, "logps/chosen": -255.5692596435547, "logps/rejected": -201.07577514648438, "loss": 0.4657, "rewards/accuracies": 0.8125, "rewards/chosen": -0.1327451914548874, "rewards/margins": 1.4812240600585938, "rewards/rejected": -1.6139692068099976, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.92545871559633e-07, "logits/chosen": -3.066437005996704, "logits/rejected": -3.0722594261169434, "logps/chosen": -332.57073974609375, "logps/rejected": -253.54833984375, "loss": 0.506, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.022808248177170753, "rewards/margins": 1.1694247722625732, "rewards/rejected": -1.1466165781021118, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906345565749235e-07, "logits/chosen": -2.9793455600738525, "logits/rejected": -2.957947254180908, "logps/chosen": -246.28836059570312, "logps/rejected": -238.5087890625, "loss": 0.5348, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2783312201499939, "rewards/margins": 0.9609475135803223, "rewards/rejected": -1.2392786741256714, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.88723241590214e-07, "logits/chosen": -2.998023271560669, "logits/rejected": -2.9771180152893066, "logps/chosen": -307.2586975097656, "logps/rejected": -251.5006866455078, "loss": 0.4786, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11792813241481781, "rewards/margins": 1.2310340404510498, "rewards/rejected": -1.3489621877670288, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.868119266055046e-07, "logits/chosen": -3.030704975128174, "logits/rejected": -3.0520386695861816, "logps/chosen": -295.72540283203125, "logps/rejected": -283.2640075683594, "loss": 0.4889, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5819287896156311, "rewards/margins": 1.3330873250961304, "rewards/rejected": -1.9150161743164062, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.849006116207951e-07, "logits/chosen": -3.0758090019226074, "logits/rejected": -3.0432868003845215, "logps/chosen": -293.5560607910156, "logps/rejected": -271.2206115722656, "loss": 0.5067, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4366377294063568, "rewards/margins": 1.4355862140655518, "rewards/rejected": -1.8722240924835205, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.829892966360856e-07, "logits/chosen": -3.0619266033172607, "logits/rejected": -3.067351818084717, "logps/chosen": -315.32940673828125, "logps/rejected": -284.5444030761719, "loss": 0.533, "rewards/accuracies": 0.8125, "rewards/chosen": -0.23806679248809814, "rewards/margins": 1.37300705909729, "rewards/rejected": -1.6110738515853882, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810779816513762e-07, "logits/chosen": -2.9711368083953857, "logits/rejected": -2.97477388381958, "logps/chosen": -250.7084197998047, "logps/rejected": -238.7926025390625, "loss": 0.521, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6350131034851074, "rewards/margins": 0.9497348666191101, "rewards/rejected": -1.5847480297088623, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791666666666667e-07, "logits/chosen": -2.890068531036377, "logits/rejected": -2.8569467067718506, "logps/chosen": -278.987060546875, "logps/rejected": -212.63363647460938, "loss": 0.5134, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.20403578877449036, "rewards/margins": 1.445931315422058, "rewards/rejected": -1.6499669551849365, "step": 400 }, { "epoch": 0.41, "eval_logits/chosen": -2.9178526401519775, "eval_logits/rejected": -2.884305238723755, "eval_logps/chosen": -299.4102478027344, "eval_logps/rejected": -261.616943359375, "eval_loss": 0.5097789764404297, "eval_rewards/accuracies": 0.7698412537574768, "eval_rewards/chosen": -0.28775671124458313, "eval_rewards/margins": 1.3831894397735596, "eval_rewards/rejected": -1.6709461212158203, "eval_runtime": 85.2881, "eval_samples_per_second": 23.45, "eval_steps_per_second": 0.739, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772553516819572e-07, "logits/chosen": -2.901624917984009, "logits/rejected": -2.869872808456421, "logps/chosen": -292.89959716796875, "logps/rejected": -284.45684814453125, "loss": 0.5223, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2602219879627228, "rewards/margins": 1.1454756259918213, "rewards/rejected": -1.4056975841522217, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753440366972477e-07, "logits/chosen": -3.0204930305480957, "logits/rejected": -2.975163221359253, "logps/chosen": -234.13818359375, "logps/rejected": -242.6905059814453, "loss": 0.5389, "rewards/accuracies": 0.75, "rewards/chosen": -0.4058782458305359, "rewards/margins": 0.8633907437324524, "rewards/rejected": -1.2692689895629883, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7343272171253825e-07, "logits/chosen": -3.014406681060791, "logits/rejected": -2.977393627166748, "logps/chosen": -251.76904296875, "logps/rejected": -243.6968231201172, "loss": 0.5389, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.34599775075912476, "rewards/margins": 1.2039783000946045, "rewards/rejected": -1.549976110458374, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.715214067278288e-07, "logits/chosen": -2.9510791301727295, "logits/rejected": -2.8885598182678223, "logps/chosen": -286.24798583984375, "logps/rejected": -241.7139892578125, "loss": 0.4669, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.2214752435684204, "rewards/margins": 1.800271987915039, "rewards/rejected": -2.021747350692749, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.696100917431192e-07, "logits/chosen": -3.0806918144226074, "logits/rejected": -3.0243117809295654, "logps/chosen": -324.5965881347656, "logps/rejected": -282.03594970703125, "loss": 0.5064, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27235084772109985, "rewards/margins": 0.9739311337471008, "rewards/rejected": -1.2462819814682007, "step": 450 }, { "epoch": 0.47, "learning_rate": 4.6769877675840974e-07, "logits/chosen": -2.9995086193084717, "logits/rejected": -2.9964065551757812, "logps/chosen": -262.6032409667969, "logps/rejected": -252.2246551513672, "loss": 0.4866, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4137532711029053, "rewards/margins": 1.2080386877059937, "rewards/rejected": -1.621792197227478, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.6578746177370027e-07, "logits/chosen": -2.9163661003112793, "logits/rejected": -2.9061973094940186, "logps/chosen": -201.60374450683594, "logps/rejected": -205.3343048095703, "loss": 0.4674, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.28440338373184204, "rewards/margins": 1.4812262058258057, "rewards/rejected": -1.765629768371582, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.638761467889908e-07, "logits/chosen": -2.9122395515441895, "logits/rejected": -2.8503425121307373, "logps/chosen": -287.53436279296875, "logps/rejected": -242.94888305664062, "loss": 0.5033, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.3852013349533081, "rewards/margins": 1.852294921875, "rewards/rejected": -2.2374961376190186, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6196483180428133e-07, "logits/chosen": -2.9731476306915283, "logits/rejected": -2.965475559234619, "logps/chosen": -285.62945556640625, "logps/rejected": -235.3878631591797, "loss": 0.5059, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.01807239092886448, "rewards/margins": 1.6012237071990967, "rewards/rejected": -1.6192958354949951, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.600535168195718e-07, "logits/chosen": -2.9572091102600098, "logits/rejected": -2.9516422748565674, "logps/chosen": -218.7193603515625, "logps/rejected": -225.1012420654297, "loss": 0.4534, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.3155478835105896, "rewards/margins": 0.9508973956108093, "rewards/rejected": -1.2664451599121094, "step": 500 }, { "epoch": 0.52, "eval_logits/chosen": -3.0181987285614014, "eval_logits/rejected": -2.980436325073242, "eval_logps/chosen": -298.340576171875, "eval_logps/rejected": -261.2433166503906, "eval_loss": 0.49045199155807495, "eval_rewards/accuracies": 0.7698412537574768, "eval_rewards/chosen": -0.18078860640525818, "eval_rewards/margins": 1.452793002128601, "eval_rewards/rejected": -1.6335817575454712, "eval_runtime": 86.1119, "eval_samples_per_second": 23.226, "eval_steps_per_second": 0.732, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.5814220183486234e-07, "logits/chosen": -2.905066967010498, "logits/rejected": -2.841881275177002, "logps/chosen": -303.79681396484375, "logps/rejected": -282.34051513671875, "loss": 0.5138, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.47309979796409607, "rewards/margins": 1.0634024143218994, "rewards/rejected": -1.5365021228790283, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.562308868501529e-07, "logits/chosen": -2.857588291168213, "logits/rejected": -2.8893682956695557, "logps/chosen": -264.73577880859375, "logps/rejected": -275.3564758300781, "loss": 0.5008, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2937226891517639, "rewards/margins": 1.5019558668136597, "rewards/rejected": -1.7956784963607788, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.543195718654434e-07, "logits/chosen": -2.914440393447876, "logits/rejected": -2.902010440826416, "logps/chosen": -230.23721313476562, "logps/rejected": -215.05258178710938, "loss": 0.5034, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4968380928039551, "rewards/margins": 0.6932927966117859, "rewards/rejected": -1.1901309490203857, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.5240825688073394e-07, "logits/chosen": -3.00209379196167, "logits/rejected": -2.9657745361328125, "logps/chosen": -291.4919128417969, "logps/rejected": -249.2689208984375, "loss": 0.5174, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5610731244087219, "rewards/margins": 1.5831964015960693, "rewards/rejected": -2.1442694664001465, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.504969418960244e-07, "logits/chosen": -2.939059257507324, "logits/rejected": -2.934699535369873, "logps/chosen": -267.82537841796875, "logps/rejected": -286.34893798828125, "loss": 0.5132, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7965409159660339, "rewards/margins": 1.2346630096435547, "rewards/rejected": -2.0312037467956543, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.4858562691131495e-07, "logits/chosen": -2.9643239974975586, "logits/rejected": -2.970371723175049, "logps/chosen": -303.12457275390625, "logps/rejected": -302.5494689941406, "loss": 0.5128, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6967397928237915, "rewards/margins": 1.166548490524292, "rewards/rejected": -1.863288164138794, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.466743119266055e-07, "logits/chosen": -2.896851062774658, "logits/rejected": -2.875133752822876, "logps/chosen": -287.5796813964844, "logps/rejected": -261.62530517578125, "loss": 0.4649, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7515207529067993, "rewards/margins": 1.2609531879425049, "rewards/rejected": -2.0124735832214355, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.44762996941896e-07, "logits/chosen": -2.923142910003662, "logits/rejected": -2.85493540763855, "logps/chosen": -303.4167175292969, "logps/rejected": -275.49822998046875, "loss": 0.4969, "rewards/accuracies": 0.8125, "rewards/chosen": -0.43573087453842163, "rewards/margins": 1.485872507095337, "rewards/rejected": -1.9216034412384033, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4285168195718655e-07, "logits/chosen": -2.8740463256835938, "logits/rejected": -2.8674261569976807, "logps/chosen": -239.81216430664062, "logps/rejected": -256.44781494140625, "loss": 0.4749, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5639557838439941, "rewards/margins": 1.198778748512268, "rewards/rejected": -1.7627344131469727, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.40940366972477e-07, "logits/chosen": -2.9853415489196777, "logits/rejected": -2.9935359954833984, "logps/chosen": -242.20938110351562, "logps/rejected": -233.65548706054688, "loss": 0.4976, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4875938892364502, "rewards/margins": 1.0177834033966064, "rewards/rejected": -1.5053772926330566, "step": 600 }, { "epoch": 0.62, "eval_logits/chosen": -2.972991466522217, "eval_logits/rejected": -2.9266481399536133, "eval_logps/chosen": -298.8059387207031, "eval_logps/rejected": -260.2930603027344, "eval_loss": 0.4871741831302643, "eval_rewards/accuracies": 0.7658730149269104, "eval_rewards/chosen": -0.22732341289520264, "eval_rewards/margins": 1.3112375736236572, "eval_rewards/rejected": -1.5385609865188599, "eval_runtime": 85.4126, "eval_samples_per_second": 23.416, "eval_steps_per_second": 0.738, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3902905198776756e-07, "logits/chosen": -2.962296962738037, "logits/rejected": -2.944753408432007, "logps/chosen": -282.0075378417969, "logps/rejected": -245.8345184326172, "loss": 0.4831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3591349720954895, "rewards/margins": 1.316572904586792, "rewards/rejected": -1.6757080554962158, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.371177370030581e-07, "logits/chosen": -2.9345905780792236, "logits/rejected": -2.911076545715332, "logps/chosen": -283.3570251464844, "logps/rejected": -251.5830841064453, "loss": 0.4764, "rewards/accuracies": 0.8125, "rewards/chosen": -0.44033902883529663, "rewards/margins": 1.620408296585083, "rewards/rejected": -2.0607473850250244, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.352064220183486e-07, "logits/chosen": -2.9641098976135254, "logits/rejected": -2.9336962699890137, "logps/chosen": -224.51687622070312, "logps/rejected": -220.58651733398438, "loss": 0.5132, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6185753345489502, "rewards/margins": 1.2681429386138916, "rewards/rejected": -1.8867183923721313, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3329510703363915e-07, "logits/chosen": -3.050631046295166, "logits/rejected": -2.9990761280059814, "logps/chosen": -263.21337890625, "logps/rejected": -234.34375, "loss": 0.5008, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5043941140174866, "rewards/margins": 1.566725254058838, "rewards/rejected": -2.071119546890259, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313837920489297e-07, "logits/chosen": -2.907442808151245, "logits/rejected": -2.897782802581787, "logps/chosen": -240.8859405517578, "logps/rejected": -245.74880981445312, "loss": 0.455, "rewards/accuracies": 0.75, "rewards/chosen": -0.5294120907783508, "rewards/margins": 1.4188861846923828, "rewards/rejected": -1.948298454284668, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2947247706422016e-07, "logits/chosen": -2.8966031074523926, "logits/rejected": -2.8596677780151367, "logps/chosen": -265.93841552734375, "logps/rejected": -256.66900634765625, "loss": 0.479, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.824979305267334, "rewards/margins": 1.305490255355835, "rewards/rejected": -2.130469560623169, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.275611620795107e-07, "logits/chosen": -2.9146366119384766, "logits/rejected": -2.8707921504974365, "logps/chosen": -321.3832702636719, "logps/rejected": -260.4658508300781, "loss": 0.4993, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8080131411552429, "rewards/margins": 0.9972589612007141, "rewards/rejected": -1.8052723407745361, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.2564984709480123e-07, "logits/chosen": -2.9579291343688965, "logits/rejected": -2.92578125, "logps/chosen": -300.4450988769531, "logps/rejected": -243.27645874023438, "loss": 0.576, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7343733906745911, "rewards/margins": 1.0802533626556396, "rewards/rejected": -1.814626693725586, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.2373853211009176e-07, "logits/chosen": -2.968165159225464, "logits/rejected": -2.956108331680298, "logps/chosen": -269.56500244140625, "logps/rejected": -258.65631103515625, "loss": 0.521, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3433496952056885, "rewards/margins": 1.492470145225525, "rewards/rejected": -1.8358198404312134, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2182721712538224e-07, "logits/chosen": -2.9497225284576416, "logits/rejected": -2.924623966217041, "logps/chosen": -293.396484375, "logps/rejected": -236.59671020507812, "loss": 0.5452, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5475293397903442, "rewards/margins": 1.052236795425415, "rewards/rejected": -1.5997662544250488, "step": 700 }, { "epoch": 0.72, "eval_logits/chosen": -2.968550443649292, "eval_logits/rejected": -2.937694787979126, "eval_logps/chosen": -301.3451843261719, "eval_logps/rejected": -261.7586364746094, "eval_loss": 0.4888325333595276, "eval_rewards/accuracies": 0.7341269850730896, "eval_rewards/chosen": -0.48125144839286804, "eval_rewards/margins": 1.2038646936416626, "eval_rewards/rejected": -1.6851160526275635, "eval_runtime": 85.7614, "eval_samples_per_second": 23.321, "eval_steps_per_second": 0.735, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.199159021406727e-07, "logits/chosen": -2.8664116859436035, "logits/rejected": -2.904461622238159, "logps/chosen": -250.817138671875, "logps/rejected": -236.36422729492188, "loss": 0.5166, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6456762552261353, "rewards/margins": 1.1793797016143799, "rewards/rejected": -1.8250560760498047, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.1800458715596325e-07, "logits/chosen": -2.9268696308135986, "logits/rejected": -2.853827953338623, "logps/chosen": -317.4768981933594, "logps/rejected": -273.88787841796875, "loss": 0.5413, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4396774172782898, "rewards/margins": 1.1779446601867676, "rewards/rejected": -1.6176220178604126, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.160932721712538e-07, "logits/chosen": -2.8954315185546875, "logits/rejected": -2.8464741706848145, "logps/chosen": -293.01678466796875, "logps/rejected": -276.9455261230469, "loss": 0.5042, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5548242330551147, "rewards/margins": 1.2215098142623901, "rewards/rejected": -1.7763340473175049, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.141819571865443e-07, "logits/chosen": -2.851848602294922, "logits/rejected": -2.798680067062378, "logps/chosen": -262.6050109863281, "logps/rejected": -218.8506317138672, "loss": 0.4847, "rewards/accuracies": 0.75, "rewards/chosen": -0.7612582445144653, "rewards/margins": 1.4158737659454346, "rewards/rejected": -2.1771321296691895, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.1227064220183485e-07, "logits/chosen": -2.8720107078552246, "logits/rejected": -2.8650383949279785, "logps/chosen": -267.54156494140625, "logps/rejected": -254.5974578857422, "loss": 0.4604, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6114904284477234, "rewards/margins": 1.7151912450790405, "rewards/rejected": -2.326681613922119, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.103593272171253e-07, "logits/chosen": -2.842667818069458, "logits/rejected": -2.8490633964538574, "logps/chosen": -280.39593505859375, "logps/rejected": -285.4444274902344, "loss": 0.5362, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7370551824569702, "rewards/margins": 1.257871389389038, "rewards/rejected": -1.9949266910552979, "step": 760 }, { "epoch": 0.79, "learning_rate": 4.0844801223241586e-07, "logits/chosen": -2.9199094772338867, "logits/rejected": -2.8772215843200684, "logps/chosen": -287.48944091796875, "logps/rejected": -255.93338012695312, "loss": 0.4611, "rewards/accuracies": 0.75, "rewards/chosen": -0.39787834882736206, "rewards/margins": 1.0551578998565674, "rewards/rejected": -1.4530361890792847, "step": 770 }, { "epoch": 0.8, "learning_rate": 4.065366972477064e-07, "logits/chosen": -2.8562026023864746, "logits/rejected": -2.8700435161590576, "logps/chosen": -293.22271728515625, "logps/rejected": -266.9279479980469, "loss": 0.4804, "rewards/accuracies": 0.75, "rewards/chosen": -0.5370479822158813, "rewards/margins": 1.5146362781524658, "rewards/rejected": -2.0516841411590576, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.046253822629969e-07, "logits/chosen": -2.8632709980010986, "logits/rejected": -2.8351528644561768, "logps/chosen": -268.63116455078125, "logps/rejected": -248.3597412109375, "loss": 0.4616, "rewards/accuracies": 0.8125, "rewards/chosen": -0.23348459601402283, "rewards/margins": 1.344719409942627, "rewards/rejected": -1.5782040357589722, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0271406727828745e-07, "logits/chosen": -2.8927769660949707, "logits/rejected": -2.8393540382385254, "logps/chosen": -264.2203063964844, "logps/rejected": -240.07052612304688, "loss": 0.5342, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6227496862411499, "rewards/margins": 1.1725116968154907, "rewards/rejected": -1.7952613830566406, "step": 800 }, { "epoch": 0.83, "eval_logits/chosen": -2.8821091651916504, "eval_logits/rejected": -2.8434085845947266, "eval_logps/chosen": -300.23773193359375, "eval_logps/rejected": -264.129150390625, "eval_loss": 0.47742241621017456, "eval_rewards/accuracies": 0.7857142686843872, "eval_rewards/chosen": -0.3705042004585266, "eval_rewards/margins": 1.5516674518585205, "eval_rewards/rejected": -1.9221714735031128, "eval_runtime": 84.847, "eval_samples_per_second": 23.572, "eval_steps_per_second": 0.743, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.00802752293578e-07, "logits/chosen": -2.839601993560791, "logits/rejected": -2.832960605621338, "logps/chosen": -276.8193359375, "logps/rejected": -253.14517211914062, "loss": 0.4653, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.4897233545780182, "rewards/margins": 1.3689448833465576, "rewards/rejected": -1.858668327331543, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9889143730886847e-07, "logits/chosen": -2.838459014892578, "logits/rejected": -2.8137266635894775, "logps/chosen": -314.2509460449219, "logps/rejected": -267.2218017578125, "loss": 0.4597, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45112401247024536, "rewards/margins": 1.6043586730957031, "rewards/rejected": -2.0554826259613037, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.96980122324159e-07, "logits/chosen": -2.8855397701263428, "logits/rejected": -2.8772969245910645, "logps/chosen": -271.2967224121094, "logps/rejected": -224.9015655517578, "loss": 0.4821, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6153538227081299, "rewards/margins": 1.476227879524231, "rewards/rejected": -2.0915818214416504, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.9506880733944953e-07, "logits/chosen": -2.904127597808838, "logits/rejected": -2.8686683177948, "logps/chosen": -261.08575439453125, "logps/rejected": -243.058837890625, "loss": 0.5536, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7326072454452515, "rewards/margins": 1.5098663568496704, "rewards/rejected": -2.242473602294922, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9315749235474006e-07, "logits/chosen": -2.910623788833618, "logits/rejected": -2.9078075885772705, "logps/chosen": -283.50946044921875, "logps/rejected": -288.6430358886719, "loss": 0.4894, "rewards/accuracies": 0.75, "rewards/chosen": -0.6883010864257812, "rewards/margins": 1.5665154457092285, "rewards/rejected": -2.2548165321350098, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.912461773700306e-07, "logits/chosen": -2.881904125213623, "logits/rejected": -2.8731162548065186, "logps/chosen": -304.5970458984375, "logps/rejected": -308.7020263671875, "loss": 0.5145, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7746303677558899, "rewards/margins": 1.2819398641586304, "rewards/rejected": -2.056570291519165, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8933486238532107e-07, "logits/chosen": -2.7776718139648438, "logits/rejected": -2.780966281890869, "logps/chosen": -321.63397216796875, "logps/rejected": -256.59344482421875, "loss": 0.4833, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6265315413475037, "rewards/margins": 1.2452455759048462, "rewards/rejected": -1.8717771768569946, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.874235474006116e-07, "logits/chosen": -2.8302550315856934, "logits/rejected": -2.7907934188842773, "logps/chosen": -295.404541015625, "logps/rejected": -253.10470581054688, "loss": 0.498, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7017183303833008, "rewards/margins": 1.4433740377426147, "rewards/rejected": -2.145092248916626, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8551223241590214e-07, "logits/chosen": -2.821937084197998, "logits/rejected": -2.7967801094055176, "logps/chosen": -262.8708801269531, "logps/rejected": -247.0980987548828, "loss": 0.5049, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6857427358627319, "rewards/margins": 1.306744933128357, "rewards/rejected": -1.9924876689910889, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.8360091743119267e-07, "logits/chosen": -2.826765298843384, "logits/rejected": -2.796332597732544, "logps/chosen": -267.5680847167969, "logps/rejected": -244.30905151367188, "loss": 0.5014, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3354864716529846, "rewards/margins": 1.4798164367675781, "rewards/rejected": -1.815303087234497, "step": 900 }, { "epoch": 0.93, "eval_logits/chosen": -2.878131628036499, "eval_logits/rejected": -2.8338723182678223, "eval_logps/chosen": -298.9295959472656, "eval_logps/rejected": -261.7012634277344, "eval_loss": 0.4813868999481201, "eval_rewards/accuracies": 0.761904776096344, "eval_rewards/chosen": -0.23969438672065735, "eval_rewards/margins": 1.4396847486495972, "eval_rewards/rejected": -1.6793793439865112, "eval_runtime": 85.2741, "eval_samples_per_second": 23.454, "eval_steps_per_second": 0.739, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.816896024464832e-07, "logits/chosen": -2.856722831726074, "logits/rejected": -2.767521381378174, "logps/chosen": -239.7253875732422, "logps/rejected": -228.12472534179688, "loss": 0.5082, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.3585191071033478, "rewards/margins": 1.4834765195846558, "rewards/rejected": -1.8419954776763916, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.797782874617737e-07, "logits/chosen": -2.827901601791382, "logits/rejected": -2.7887134552001953, "logps/chosen": -283.5338439941406, "logps/rejected": -246.73648071289062, "loss": 0.4744, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3626176714897156, "rewards/margins": 1.1291471719741821, "rewards/rejected": -1.491764783859253, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.778669724770642e-07, "logits/chosen": -2.7931838035583496, "logits/rejected": -2.784943103790283, "logps/chosen": -264.00958251953125, "logps/rejected": -217.96646118164062, "loss": 0.4845, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3267980217933655, "rewards/margins": 1.5182520151138306, "rewards/rejected": -1.8450498580932617, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7595565749235474e-07, "logits/chosen": -2.897451400756836, "logits/rejected": -2.854799747467041, "logps/chosen": -289.0576171875, "logps/rejected": -229.93984985351562, "loss": 0.4933, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2656581401824951, "rewards/margins": 1.4493383169174194, "rewards/rejected": -1.714996576309204, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.740443425076452e-07, "logits/chosen": -2.855194568634033, "logits/rejected": -2.843090295791626, "logps/chosen": -285.1914978027344, "logps/rejected": -263.72344970703125, "loss": 0.4782, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.46451419591903687, "rewards/margins": 1.2706360816955566, "rewards/rejected": -1.7351500988006592, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7213302752293575e-07, "logits/chosen": -2.8054327964782715, "logits/rejected": -2.819227695465088, "logps/chosen": -276.37347412109375, "logps/rejected": -247.4856719970703, "loss": 0.474, "rewards/accuracies": 0.6875, "rewards/chosen": -0.40697455406188965, "rewards/margins": 1.0086820125579834, "rewards/rejected": -1.4156566858291626, "step": 960 }, { "epoch": 1.0, "learning_rate": 3.702217125382263e-07, "logits/chosen": -2.800783634185791, "logits/rejected": -2.7564892768859863, "logps/chosen": -290.7643127441406, "logps/rejected": -270.8573303222656, "loss": 0.4028, "rewards/accuracies": 0.8125, "rewards/chosen": -0.32486557960510254, "rewards/margins": 1.6888189315795898, "rewards/rejected": -2.0136847496032715, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.6831039755351677e-07, "logits/chosen": -2.8075499534606934, "logits/rejected": -2.798651933670044, "logps/chosen": -256.40228271484375, "logps/rejected": -276.94976806640625, "loss": 0.0877, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.9983769655227661, "rewards/margins": 4.673001289367676, "rewards/rejected": -3.6746246814727783, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.663990825688073e-07, "logits/chosen": -2.7263360023498535, "logits/rejected": -2.6973845958709717, "logps/chosen": -244.35220336914062, "logps/rejected": -270.0553283691406, "loss": 0.1026, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.787170946598053, "rewards/margins": 5.061522006988525, "rewards/rejected": -4.274351119995117, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6448776758409783e-07, "logits/chosen": -2.7410221099853516, "logits/rejected": -2.718764543533325, "logps/chosen": -276.5079650878906, "logps/rejected": -291.8220520019531, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": 0.8010635375976562, "rewards/margins": 4.786960124969482, "rewards/rejected": -3.985896348953247, "step": 1000 }, { "epoch": 1.03, "eval_logits/chosen": -2.8067638874053955, "eval_logits/rejected": -2.7560861110687256, "eval_logps/chosen": -303.0184326171875, "eval_logps/rejected": -270.12823486328125, "eval_loss": 0.4820875823497772, "eval_rewards/accuracies": 0.7658730149269104, "eval_rewards/chosen": -0.648577868938446, "eval_rewards/margins": 1.8734972476959229, "eval_rewards/rejected": -2.5220751762390137, "eval_runtime": 86.0373, "eval_samples_per_second": 23.246, "eval_steps_per_second": 0.732, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6257645259938836e-07, "logits/chosen": -2.725844621658325, "logits/rejected": -2.7488369941711426, "logps/chosen": -244.24826049804688, "logps/rejected": -285.93255615234375, "loss": 0.0766, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5290582776069641, "rewards/margins": 5.142174243927002, "rewards/rejected": -4.6131157875061035, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.606651376146789e-07, "logits/chosen": -2.6446681022644043, "logits/rejected": -2.60050106048584, "logps/chosen": -243.2852325439453, "logps/rejected": -234.6096649169922, "loss": 0.0837, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5728534460067749, "rewards/margins": 4.691143989562988, "rewards/rejected": -4.118290424346924, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5875382262996937e-07, "logits/chosen": -2.730254650115967, "logits/rejected": -2.707766056060791, "logps/chosen": -269.9300231933594, "logps/rejected": -323.731689453125, "loss": 0.0731, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7072321176528931, "rewards/margins": 4.812115669250488, "rewards/rejected": -4.104883193969727, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.568425076452599e-07, "logits/chosen": -2.798232316970825, "logits/rejected": -2.7622432708740234, "logps/chosen": -283.02587890625, "logps/rejected": -288.06927490234375, "loss": 0.0847, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6590858697891235, "rewards/margins": 5.309805870056152, "rewards/rejected": -4.650720119476318, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.5493119266055044e-07, "logits/chosen": -2.6808838844299316, "logits/rejected": -2.67130708694458, "logps/chosen": -262.48748779296875, "logps/rejected": -240.3964080810547, "loss": 0.0914, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24962154030799866, "rewards/margins": 4.924658298492432, "rewards/rejected": -4.675037384033203, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5301987767584097e-07, "logits/chosen": -2.70487380027771, "logits/rejected": -2.6635613441467285, "logps/chosen": -256.9989318847656, "logps/rejected": -302.34051513671875, "loss": 0.0807, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3633723855018616, "rewards/margins": 5.548757553100586, "rewards/rejected": -5.185385704040527, "step": 1060 }, { "epoch": 1.1, "learning_rate": 3.511085626911315e-07, "logits/chosen": -2.808043956756592, "logits/rejected": -2.8087692260742188, "logps/chosen": -323.17535400390625, "logps/rejected": -289.61834716796875, "loss": 0.1467, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2529454827308655, "rewards/margins": 5.11926794052124, "rewards/rejected": -4.8663225173950195, "step": 1070 }, { "epoch": 1.11, "learning_rate": 3.49197247706422e-07, "logits/chosen": -2.775644302368164, "logits/rejected": -2.7150864601135254, "logps/chosen": -226.1105194091797, "logps/rejected": -252.4014434814453, "loss": 0.1123, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5964186787605286, "rewards/margins": 4.527159690856934, "rewards/rejected": -5.123579025268555, "step": 1080 }, { "epoch": 1.12, "learning_rate": 3.472859327217125e-07, "logits/chosen": -2.668936014175415, "logits/rejected": -2.6859848499298096, "logps/chosen": -291.4830627441406, "logps/rejected": -304.6649475097656, "loss": 0.0945, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.12123675644397736, "rewards/margins": 5.1819868087768555, "rewards/rejected": -5.303224086761475, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4537461773700304e-07, "logits/chosen": -2.767573595046997, "logits/rejected": -2.67921781539917, "logps/chosen": -213.55044555664062, "logps/rejected": -232.8217315673828, "loss": 0.0883, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.168074369430542, "rewards/margins": 4.9372406005859375, "rewards/rejected": -5.105315208435059, "step": 1100 }, { "epoch": 1.14, "eval_logits/chosen": -2.8337366580963135, "eval_logits/rejected": -2.7830586433410645, "eval_logps/chosen": -309.7097473144531, "eval_logps/rejected": -278.2621154785156, "eval_loss": 0.5074188709259033, "eval_rewards/accuracies": 0.7539682388305664, "eval_rewards/chosen": -1.317708969116211, "eval_rewards/margins": 2.017754077911377, "eval_rewards/rejected": -3.335462808609009, "eval_runtime": 84.7751, "eval_samples_per_second": 23.592, "eval_steps_per_second": 0.743, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.434633027522936e-07, "logits/chosen": -2.7691617012023926, "logits/rejected": -2.7546744346618652, "logps/chosen": -278.5716857910156, "logps/rejected": -283.21502685546875, "loss": 0.0887, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.48537954688072205, "rewards/margins": 5.524861812591553, "rewards/rejected": -5.039482593536377, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.415519877675841e-07, "logits/chosen": -2.7980878353118896, "logits/rejected": -2.808043956756592, "logps/chosen": -240.988525390625, "logps/rejected": -296.6959228515625, "loss": 0.0991, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.005442300345748663, "rewards/margins": 4.918468952178955, "rewards/rejected": -4.923911094665527, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.3964067278287464e-07, "logits/chosen": -2.8718693256378174, "logits/rejected": -2.8258144855499268, "logps/chosen": -284.6709289550781, "logps/rejected": -264.33795166015625, "loss": 0.0988, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2022373229265213, "rewards/margins": 5.098201274871826, "rewards/rejected": -4.895963668823242, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.377293577981651e-07, "logits/chosen": -2.737651824951172, "logits/rejected": -2.75420880317688, "logps/chosen": -230.8458251953125, "logps/rejected": -276.8215637207031, "loss": 0.0859, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.16446110606193542, "rewards/margins": 5.473117828369141, "rewards/rejected": -5.308656215667725, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3581804281345565e-07, "logits/chosen": -2.8187365531921387, "logits/rejected": -2.750450611114502, "logps/chosen": -287.13604736328125, "logps/rejected": -268.45318603515625, "loss": 0.078, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4848671853542328, "rewards/margins": 5.562216758728027, "rewards/rejected": -5.0773491859436035, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.339067278287462e-07, "logits/chosen": -2.625655174255371, "logits/rejected": -2.6164321899414062, "logps/chosen": -253.10617065429688, "logps/rejected": -279.876220703125, "loss": 0.0656, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3776094317436218, "rewards/margins": 5.922922134399414, "rewards/rejected": -5.545313358306885, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.319954128440367e-07, "logits/chosen": -2.8032162189483643, "logits/rejected": -2.757336139678955, "logps/chosen": -283.8640441894531, "logps/rejected": -268.12109375, "loss": 0.0696, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19224987924098969, "rewards/margins": 5.265924453735352, "rewards/rejected": -5.07367467880249, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.3008409785932725e-07, "logits/chosen": -2.699878215789795, "logits/rejected": -2.6233882904052734, "logps/chosen": -251.9101104736328, "logps/rejected": -270.0425720214844, "loss": 0.0769, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.0026382445357739925, "rewards/margins": 5.298295497894287, "rewards/rejected": -5.295657157897949, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.2817278287461773e-07, "logits/chosen": -2.767627716064453, "logits/rejected": -2.7735066413879395, "logps/chosen": -235.3647003173828, "logps/rejected": -294.1983337402344, "loss": 0.083, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.19832327961921692, "rewards/margins": 5.44091272354126, "rewards/rejected": -5.242589473724365, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.262614678899082e-07, "logits/chosen": -2.7307627201080322, "logits/rejected": -2.7215757369995117, "logps/chosen": -251.027099609375, "logps/rejected": -315.58026123046875, "loss": 0.086, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10485126823186874, "rewards/margins": 5.883278846740723, "rewards/rejected": -5.988129615783691, "step": 1200 }, { "epoch": 1.24, "eval_logits/chosen": -2.834700345993042, "eval_logits/rejected": -2.787626266479492, "eval_logps/chosen": -307.7827453613281, "eval_logps/rejected": -277.5297546386719, "eval_loss": 0.5000655651092529, "eval_rewards/accuracies": 0.7539682388305664, "eval_rewards/chosen": -1.1250085830688477, "eval_rewards/margins": 2.137223720550537, "eval_rewards/rejected": -3.262232542037964, "eval_runtime": 85.7069, "eval_samples_per_second": 23.335, "eval_steps_per_second": 0.735, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2435015290519874e-07, "logits/chosen": -2.7468724250793457, "logits/rejected": -2.7094569206237793, "logps/chosen": -232.368896484375, "logps/rejected": -263.92901611328125, "loss": 0.0689, "rewards/accuracies": 0.9375, "rewards/chosen": -0.06489093601703644, "rewards/margins": 5.3409833908081055, "rewards/rejected": -5.405873775482178, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2243883792048927e-07, "logits/chosen": -2.7789623737335205, "logits/rejected": -2.772648811340332, "logps/chosen": -290.0833435058594, "logps/rejected": -341.7529602050781, "loss": 0.0932, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.19088497757911682, "rewards/margins": 5.593591213226318, "rewards/rejected": -5.402706146240234, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.205275229357798e-07, "logits/chosen": -2.7190937995910645, "logits/rejected": -2.684502124786377, "logps/chosen": -299.87225341796875, "logps/rejected": -335.17327880859375, "loss": 0.095, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.03153310716152191, "rewards/margins": 5.8492231369018555, "rewards/rejected": -5.880755424499512, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.186162079510703e-07, "logits/chosen": -2.7524490356445312, "logits/rejected": -2.7430176734924316, "logps/chosen": -285.627197265625, "logps/rejected": -323.4786071777344, "loss": 0.0928, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.02745187282562256, "rewards/margins": 5.191933631896973, "rewards/rejected": -5.1644816398620605, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.167048929663608e-07, "logits/chosen": -2.788243293762207, "logits/rejected": -2.7167086601257324, "logps/chosen": -236.2114715576172, "logps/rejected": -248.34506225585938, "loss": 0.0924, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3661441206932068, "rewards/margins": 4.600642204284668, "rewards/rejected": -4.9667863845825195, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.1479357798165134e-07, "logits/chosen": -2.8230297565460205, "logits/rejected": -2.785630464553833, "logps/chosen": -293.46527099609375, "logps/rejected": -290.85736083984375, "loss": 0.0949, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.026639381423592567, "rewards/margins": 5.191295623779297, "rewards/rejected": -5.164656639099121, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.128822629969419e-07, "logits/chosen": -2.7404916286468506, "logits/rejected": -2.7222659587860107, "logps/chosen": -289.47161865234375, "logps/rejected": -309.0167541503906, "loss": 0.0909, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.054047178477048874, "rewards/margins": 6.036001682281494, "rewards/rejected": -6.090048789978027, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.109709480122324e-07, "logits/chosen": -2.74894380569458, "logits/rejected": -2.7412331104278564, "logps/chosen": -240.25439453125, "logps/rejected": -276.5160827636719, "loss": 0.0935, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3607255518436432, "rewards/margins": 5.7335524559021, "rewards/rejected": -5.372827053070068, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0905963302752294e-07, "logits/chosen": -2.625156879425049, "logits/rejected": -2.6567492485046387, "logps/chosen": -246.95443725585938, "logps/rejected": -286.4045104980469, "loss": 0.0966, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.23297014832496643, "rewards/margins": 5.24931001663208, "rewards/rejected": -5.482280254364014, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.071483180428134e-07, "logits/chosen": -2.8050503730773926, "logits/rejected": -2.7595503330230713, "logps/chosen": -328.2964782714844, "logps/rejected": -270.40875244140625, "loss": 0.0919, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3086862564086914, "rewards/margins": 5.241825103759766, "rewards/rejected": -4.933138847351074, "step": 1300 }, { "epoch": 1.34, "eval_logits/chosen": -2.8075647354125977, "eval_logits/rejected": -2.766228675842285, "eval_logps/chosen": -310.404541015625, "eval_logps/rejected": -280.4382629394531, "eval_loss": 0.5053635835647583, "eval_rewards/accuracies": 0.8015872836112976, "eval_rewards/chosen": -1.3871887922286987, "eval_rewards/margins": 2.1658928394317627, "eval_rewards/rejected": -3.55308198928833, "eval_runtime": 84.9554, "eval_samples_per_second": 23.542, "eval_steps_per_second": 0.742, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0523700305810395e-07, "logits/chosen": -2.7835230827331543, "logits/rejected": -2.7681832313537598, "logps/chosen": -272.99322509765625, "logps/rejected": -310.72442626953125, "loss": 0.1015, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.010780900716781616, "rewards/margins": 5.337492942810059, "rewards/rejected": -5.348274230957031, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.033256880733945e-07, "logits/chosen": -2.7876617908477783, "logits/rejected": -2.765178918838501, "logps/chosen": -248.750244140625, "logps/rejected": -260.992919921875, "loss": 0.0854, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5619943737983704, "rewards/margins": 4.9182329177856445, "rewards/rejected": -5.480227470397949, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.01414373088685e-07, "logits/chosen": -2.791926622390747, "logits/rejected": -2.6914966106414795, "logps/chosen": -287.69439697265625, "logps/rejected": -290.97320556640625, "loss": 0.0953, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.2892554998397827, "rewards/margins": 5.625554084777832, "rewards/rejected": -5.914809226989746, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.9950305810397555e-07, "logits/chosen": -2.842695951461792, "logits/rejected": -2.8599178791046143, "logps/chosen": -283.12359619140625, "logps/rejected": -292.62762451171875, "loss": 0.1007, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5628482103347778, "rewards/margins": 6.214916229248047, "rewards/rejected": -6.777764320373535, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.9759174311926603e-07, "logits/chosen": -2.841775417327881, "logits/rejected": -2.7714896202087402, "logps/chosen": -273.721435546875, "logps/rejected": -287.3270263671875, "loss": 0.0833, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3608776926994324, "rewards/margins": 5.559767246246338, "rewards/rejected": -5.920644283294678, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9568042813455656e-07, "logits/chosen": -2.7543766498565674, "logits/rejected": -2.696719169616699, "logps/chosen": -286.90130615234375, "logps/rejected": -254.12673950195312, "loss": 0.1065, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6098439693450928, "rewards/margins": 4.60134744644165, "rewards/rejected": -5.2111921310424805, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.937691131498471e-07, "logits/chosen": -2.839272975921631, "logits/rejected": -2.737689971923828, "logps/chosen": -292.3916931152344, "logps/rejected": -307.9238586425781, "loss": 0.091, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.15980760753154755, "rewards/margins": 6.0921101570129395, "rewards/rejected": -6.251918315887451, "step": 1370 }, { "epoch": 1.42, "learning_rate": 2.918577981651376e-07, "logits/chosen": -2.7997069358825684, "logits/rejected": -2.779470205307007, "logps/chosen": -241.4885711669922, "logps/rejected": -284.999267578125, "loss": 0.0908, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.28731173276901245, "rewards/margins": 5.128489017486572, "rewards/rejected": -5.415801048278809, "step": 1380 }, { "epoch": 1.43, "learning_rate": 2.8994648318042816e-07, "logits/chosen": -2.825129985809326, "logits/rejected": -2.843670129776001, "logps/chosen": -274.7068176269531, "logps/rejected": -274.2774353027344, "loss": 0.0852, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.1482582539319992, "rewards/margins": 5.11671257019043, "rewards/rejected": -5.264970779418945, "step": 1390 }, { "epoch": 1.44, "learning_rate": 2.8803516819571863e-07, "logits/chosen": -2.8828132152557373, "logits/rejected": -2.855530261993408, "logps/chosen": -315.52923583984375, "logps/rejected": -307.06707763671875, "loss": 0.105, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.19713237881660461, "rewards/margins": 5.789515495300293, "rewards/rejected": -5.59238338470459, "step": 1400 }, { "epoch": 1.44, "eval_logits/chosen": -2.829054117202759, "eval_logits/rejected": -2.787668228149414, "eval_logps/chosen": -311.6723327636719, "eval_logps/rejected": -281.1881103515625, "eval_loss": 0.5085237622261047, "eval_rewards/accuracies": 0.7817460298538208, "eval_rewards/chosen": -1.5139707326889038, "eval_rewards/margins": 2.1140964031219482, "eval_rewards/rejected": -3.6280672550201416, "eval_runtime": 84.6223, "eval_samples_per_second": 23.634, "eval_steps_per_second": 0.744, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.8612385321100917e-07, "logits/chosen": -2.6863460540771484, "logits/rejected": -2.7248947620391846, "logps/chosen": -237.7042694091797, "logps/rejected": -293.4941101074219, "loss": 0.096, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.03336421772837639, "rewards/margins": 5.846810340881348, "rewards/rejected": -5.813446044921875, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.842125382262997e-07, "logits/chosen": -2.7969777584075928, "logits/rejected": -2.7567477226257324, "logps/chosen": -258.79718017578125, "logps/rejected": -280.1507263183594, "loss": 0.0844, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.021525543183088303, "rewards/margins": 5.631020545959473, "rewards/rejected": -5.652545928955078, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8230122324159023e-07, "logits/chosen": -2.7350478172302246, "logits/rejected": -2.7465858459472656, "logps/chosen": -296.23486328125, "logps/rejected": -338.95587158203125, "loss": 0.0885, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.08497120440006256, "rewards/margins": 5.93708610534668, "rewards/rejected": -6.022058486938477, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.8038990825688076e-07, "logits/chosen": -2.8355910778045654, "logits/rejected": -2.7613484859466553, "logps/chosen": -230.56863403320312, "logps/rejected": -241.2537078857422, "loss": 0.0959, "rewards/accuracies": 0.9375, "rewards/chosen": -0.43812817335128784, "rewards/margins": 4.630409240722656, "rewards/rejected": -5.068537712097168, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.784785932721712e-07, "logits/chosen": -2.7863059043884277, "logits/rejected": -2.7627620697021484, "logps/chosen": -269.6027526855469, "logps/rejected": -288.1315002441406, "loss": 0.1224, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.12829892337322235, "rewards/margins": 6.043244361877441, "rewards/rejected": -5.914945125579834, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.765672782874617e-07, "logits/chosen": -2.7619357109069824, "logits/rejected": -2.7393484115600586, "logps/chosen": -282.9959716796875, "logps/rejected": -261.0349426269531, "loss": 0.0842, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.06687188148498535, "rewards/margins": 5.253376007080078, "rewards/rejected": -5.186504364013672, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.7465596330275225e-07, "logits/chosen": -2.807286500930786, "logits/rejected": -2.7439327239990234, "logps/chosen": -281.26104736328125, "logps/rejected": -242.0012664794922, "loss": 0.0905, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22755661606788635, "rewards/margins": 5.756918430328369, "rewards/rejected": -5.9844746589660645, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.727446483180428e-07, "logits/chosen": -2.7947564125061035, "logits/rejected": -2.7915396690368652, "logps/chosen": -266.6559143066406, "logps/rejected": -293.7073669433594, "loss": 0.0741, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4066391587257385, "rewards/margins": 5.554805278778076, "rewards/rejected": -5.96144437789917, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.708333333333333e-07, "logits/chosen": -2.6909992694854736, "logits/rejected": -2.635457992553711, "logps/chosen": -259.1954345703125, "logps/rejected": -297.5155944824219, "loss": 0.0782, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6428125500679016, "rewards/margins": 5.484756946563721, "rewards/rejected": -6.127569198608398, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.6892201834862385e-07, "logits/chosen": -2.8311927318573, "logits/rejected": -2.7327089309692383, "logps/chosen": -266.77490234375, "logps/rejected": -270.7399597167969, "loss": 0.0714, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.03750241920351982, "rewards/margins": 5.840561866760254, "rewards/rejected": -5.878064155578613, "step": 1500 }, { "epoch": 1.55, "eval_logits/chosen": -2.833139181137085, "eval_logits/rejected": -2.7888331413269043, "eval_logps/chosen": -315.17449951171875, "eval_logps/rejected": -285.445068359375, "eval_loss": 0.5215712785720825, "eval_rewards/accuracies": 0.7460317611694336, "eval_rewards/chosen": -1.8641860485076904, "eval_rewards/margins": 2.1895742416381836, "eval_rewards/rejected": -4.053760528564453, "eval_runtime": 85.2599, "eval_samples_per_second": 23.458, "eval_steps_per_second": 0.739, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6701070336391433e-07, "logits/chosen": -2.792778253555298, "logits/rejected": -2.752930164337158, "logps/chosen": -321.5592956542969, "logps/rejected": -298.44171142578125, "loss": 0.0751, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.13722172379493713, "rewards/margins": 5.923539161682129, "rewards/rejected": -5.786317825317383, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6509938837920486e-07, "logits/chosen": -2.699197769165039, "logits/rejected": -2.7083370685577393, "logps/chosen": -236.6324005126953, "logps/rejected": -276.34002685546875, "loss": 0.0918, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2362903654575348, "rewards/margins": 5.470263481140137, "rewards/rejected": -5.233973026275635, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.631880733944954e-07, "logits/chosen": -2.8059258460998535, "logits/rejected": -2.814614772796631, "logps/chosen": -260.8307800292969, "logps/rejected": -273.9322204589844, "loss": 0.0795, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10708916187286377, "rewards/margins": 5.207016468048096, "rewards/rejected": -5.314105033874512, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.612767584097859e-07, "logits/chosen": -2.8370559215545654, "logits/rejected": -2.8082430362701416, "logps/chosen": -265.97998046875, "logps/rejected": -253.841064453125, "loss": 0.0958, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.06110060214996338, "rewards/margins": 4.913632392883301, "rewards/rejected": -4.852531909942627, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.5936544342507646e-07, "logits/chosen": -2.792498826980591, "logits/rejected": -2.776141405105591, "logps/chosen": -269.06744384765625, "logps/rejected": -290.98565673828125, "loss": 0.1037, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.20791736245155334, "rewards/margins": 5.7748517990112305, "rewards/rejected": -5.5669355392456055, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.5745412844036693e-07, "logits/chosen": -2.7609810829162598, "logits/rejected": -2.7718801498413086, "logps/chosen": -291.72613525390625, "logps/rejected": -273.72003173828125, "loss": 0.0868, "rewards/accuracies": 0.9375, "rewards/chosen": -0.20280487835407257, "rewards/margins": 5.068221092224121, "rewards/rejected": -5.271025657653809, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5554281345565747e-07, "logits/chosen": -2.8320767879486084, "logits/rejected": -2.772150993347168, "logps/chosen": -267.63543701171875, "logps/rejected": -300.33319091796875, "loss": 0.0819, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3804650604724884, "rewards/margins": 5.666358470916748, "rewards/rejected": -6.046823024749756, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.53631498470948e-07, "logits/chosen": -2.8861734867095947, "logits/rejected": -2.8094377517700195, "logps/chosen": -290.4794006347656, "logps/rejected": -290.64398193359375, "loss": 0.0779, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.03389785438776016, "rewards/margins": 5.959391117095947, "rewards/rejected": -5.925492763519287, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5172018348623853e-07, "logits/chosen": -2.791463613510132, "logits/rejected": -2.7679646015167236, "logps/chosen": -296.26373291015625, "logps/rejected": -297.009521484375, "loss": 0.0821, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.11777625232934952, "rewards/margins": 5.88539457321167, "rewards/rejected": -6.003170967102051, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.4980886850152906e-07, "logits/chosen": -2.8138768672943115, "logits/rejected": -2.6688551902770996, "logps/chosen": -286.35675048828125, "logps/rejected": -278.78131103515625, "loss": 0.0874, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.002650848124176264, "rewards/margins": 5.6529927253723145, "rewards/rejected": -5.6503424644470215, "step": 1600 }, { "epoch": 1.65, "eval_logits/chosen": -2.8315298557281494, "eval_logits/rejected": -2.775137424468994, "eval_logps/chosen": -311.60955810546875, "eval_logps/rejected": -282.18365478515625, "eval_loss": 0.5050424337387085, "eval_rewards/accuracies": 0.7420634627342224, "eval_rewards/chosen": -1.5076885223388672, "eval_rewards/margins": 2.2199289798736572, "eval_rewards/rejected": -3.7276177406311035, "eval_runtime": 84.8937, "eval_samples_per_second": 23.559, "eval_steps_per_second": 0.742, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.478975535168196e-07, "logits/chosen": -2.8036742210388184, "logits/rejected": -2.7059030532836914, "logps/chosen": -268.060791015625, "logps/rejected": -258.54937744140625, "loss": 0.086, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1585809290409088, "rewards/margins": 4.931711673736572, "rewards/rejected": -5.090291976928711, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.459862385321101e-07, "logits/chosen": -2.8540966510772705, "logits/rejected": -2.816868782043457, "logps/chosen": -315.4597473144531, "logps/rejected": -307.907958984375, "loss": 0.0887, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.18126711249351501, "rewards/margins": 6.035513877868652, "rewards/rejected": -5.854246616363525, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.440749235474006e-07, "logits/chosen": -2.788999080657959, "logits/rejected": -2.7234809398651123, "logps/chosen": -292.61163330078125, "logps/rejected": -296.74871826171875, "loss": 0.0821, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.10972774028778076, "rewards/margins": 6.098582744598389, "rewards/rejected": -5.988855361938477, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.421636085626911e-07, "logits/chosen": -2.6809146404266357, "logits/rejected": -2.6293673515319824, "logps/chosen": -264.6792297363281, "logps/rejected": -270.60107421875, "loss": 0.0705, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5329539179801941, "rewards/margins": 6.360047340393066, "rewards/rejected": -5.827093601226807, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.402522935779816e-07, "logits/chosen": -2.7440340518951416, "logits/rejected": -2.7100114822387695, "logps/chosen": -293.00799560546875, "logps/rejected": -320.46685791015625, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": 0.029893267899751663, "rewards/margins": 6.438266754150391, "rewards/rejected": -6.4083733558654785, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3834097859327215e-07, "logits/chosen": -2.7580788135528564, "logits/rejected": -2.7617766857147217, "logps/chosen": -245.4547119140625, "logps/rejected": -284.69964599609375, "loss": 0.0942, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.2982054352760315, "rewards/margins": 5.770577430725098, "rewards/rejected": -6.068782329559326, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.3642966360856268e-07, "logits/chosen": -2.8160476684570312, "logits/rejected": -2.745145797729492, "logps/chosen": -277.74945068359375, "logps/rejected": -277.59478759765625, "loss": 0.0911, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.02770283818244934, "rewards/margins": 5.381051063537598, "rewards/rejected": -5.3533477783203125, "step": 1670 }, { "epoch": 1.73, "learning_rate": 2.345183486238532e-07, "logits/chosen": -2.6783447265625, "logits/rejected": -2.657252788543701, "logps/chosen": -262.15625, "logps/rejected": -323.332763671875, "loss": 0.077, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23324215412139893, "rewards/margins": 5.4570722579956055, "rewards/rejected": -5.690314769744873, "step": 1680 }, { "epoch": 1.74, "learning_rate": 2.3260703363914372e-07, "logits/chosen": -2.7587218284606934, "logits/rejected": -2.762028455734253, "logps/chosen": -313.23101806640625, "logps/rejected": -298.82940673828125, "loss": 0.0921, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3798483610153198, "rewards/margins": 5.513558864593506, "rewards/rejected": -5.1337103843688965, "step": 1690 }, { "epoch": 1.75, "learning_rate": 2.3069571865443425e-07, "logits/chosen": -2.797136068344116, "logits/rejected": -2.7841556072235107, "logps/chosen": -239.9384765625, "logps/rejected": -299.2468566894531, "loss": 0.063, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.4451209008693695, "rewards/margins": 5.984606742858887, "rewards/rejected": -6.429728031158447, "step": 1700 }, { "epoch": 1.75, "eval_logits/chosen": -2.805445432662964, "eval_logits/rejected": -2.7470343112945557, "eval_logps/chosen": -315.9737854003906, "eval_logps/rejected": -289.32904052734375, "eval_loss": 0.5349620580673218, "eval_rewards/accuracies": 0.7857142686843872, "eval_rewards/chosen": -1.944112777709961, "eval_rewards/margins": 2.4980452060699463, "eval_rewards/rejected": -4.442158222198486, "eval_runtime": 85.6472, "eval_samples_per_second": 23.352, "eval_steps_per_second": 0.736, "step": 1700 }, { "epoch": 1.76, "learning_rate": 2.2878440366972476e-07, "logits/chosen": -2.8598368167877197, "logits/rejected": -2.8258631229400635, "logps/chosen": -285.8379821777344, "logps/rejected": -356.3085021972656, "loss": 0.0798, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6007496118545532, "rewards/margins": 6.229714393615723, "rewards/rejected": -6.8304643630981445, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.268730886850153e-07, "logits/chosen": -2.6834561824798584, "logits/rejected": -2.7135403156280518, "logps/chosen": -272.9288024902344, "logps/rejected": -275.1758117675781, "loss": 0.0757, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0667431578040123, "rewards/margins": 5.4009833335876465, "rewards/rejected": -5.467726230621338, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.249617737003058e-07, "logits/chosen": -2.7773818969726562, "logits/rejected": -2.7113940715789795, "logps/chosen": -297.2420349121094, "logps/rejected": -332.0787048339844, "loss": 0.0993, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06141236424446106, "rewards/margins": 6.285152912139893, "rewards/rejected": -6.223740577697754, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.2305045871559633e-07, "logits/chosen": -2.776603937149048, "logits/rejected": -2.6816258430480957, "logps/chosen": -284.12457275390625, "logps/rejected": -295.02203369140625, "loss": 0.0807, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.05037654563784599, "rewards/margins": 5.701298713684082, "rewards/rejected": -5.751675128936768, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2113914373088686e-07, "logits/chosen": -2.8667399883270264, "logits/rejected": -2.844064235687256, "logps/chosen": -312.84820556640625, "logps/rejected": -323.13372802734375, "loss": 0.0877, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.20965537428855896, "rewards/margins": 5.274796485900879, "rewards/rejected": -5.484452247619629, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1922782874617736e-07, "logits/chosen": -2.7705488204956055, "logits/rejected": -2.794954776763916, "logps/chosen": -237.9602813720703, "logps/rejected": -337.15008544921875, "loss": 0.0808, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3575199246406555, "rewards/margins": 6.5947370529174805, "rewards/rejected": -6.9522576332092285, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1731651376146787e-07, "logits/chosen": -2.721431255340576, "logits/rejected": -2.7328743934631348, "logps/chosen": -238.32852172851562, "logps/rejected": -295.3879089355469, "loss": 0.0828, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5729081630706787, "rewards/margins": 5.2664055824279785, "rewards/rejected": -5.83931303024292, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.154051987767584e-07, "logits/chosen": -2.788494825363159, "logits/rejected": -2.7784392833709717, "logps/chosen": -275.90283203125, "logps/rejected": -276.9033508300781, "loss": 0.0768, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5950279235839844, "rewards/margins": 5.067400932312012, "rewards/rejected": -5.662428855895996, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.134938837920489e-07, "logits/chosen": -2.7264552116394043, "logits/rejected": -2.6988372802734375, "logps/chosen": -206.7015380859375, "logps/rejected": -257.45721435546875, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": -0.9315367937088013, "rewards/margins": 5.3646650314331055, "rewards/rejected": -6.296202659606934, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1158256880733944e-07, "logits/chosen": -2.823498487472534, "logits/rejected": -2.7179205417633057, "logps/chosen": -311.6935729980469, "logps/rejected": -289.9866943359375, "loss": 0.0786, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7751389741897583, "rewards/margins": 5.937049388885498, "rewards/rejected": -6.712187767028809, "step": 1800 }, { "epoch": 1.86, "eval_logits/chosen": -2.812049627304077, "eval_logits/rejected": -2.7543678283691406, "eval_logps/chosen": -316.8768615722656, "eval_logps/rejected": -289.1434020996094, "eval_loss": 0.5376091599464417, "eval_rewards/accuracies": 0.7698412537574768, "eval_rewards/chosen": -2.0344181060791016, "eval_rewards/margins": 2.389177083969116, "eval_rewards/rejected": -4.4235944747924805, "eval_runtime": 85.2124, "eval_samples_per_second": 23.471, "eval_steps_per_second": 0.739, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.0967125382262994e-07, "logits/chosen": -2.795349597930908, "logits/rejected": -2.711717128753662, "logps/chosen": -277.3824157714844, "logps/rejected": -291.24517822265625, "loss": 0.0868, "rewards/accuracies": 1.0, "rewards/chosen": -0.18659140169620514, "rewards/margins": 6.222006320953369, "rewards/rejected": -6.408597469329834, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0775993883792048e-07, "logits/chosen": -2.792492628097534, "logits/rejected": -2.6954171657562256, "logps/chosen": -289.41339111328125, "logps/rejected": -272.28094482421875, "loss": 0.0782, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2770845293998718, "rewards/margins": 5.3945207595825195, "rewards/rejected": -5.671605110168457, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.05848623853211e-07, "logits/chosen": -2.863590717315674, "logits/rejected": -2.73067045211792, "logps/chosen": -299.4620666503906, "logps/rejected": -290.4524841308594, "loss": 0.0845, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.15114930272102356, "rewards/margins": 6.369837284088135, "rewards/rejected": -6.218688011169434, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0393730886850151e-07, "logits/chosen": -2.8121609687805176, "logits/rejected": -2.760335922241211, "logps/chosen": -302.7115478515625, "logps/rejected": -290.25579833984375, "loss": 0.0758, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6565725803375244, "rewards/margins": 5.57368803024292, "rewards/rejected": -6.230259895324707, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0202599388379205e-07, "logits/chosen": -2.8598790168762207, "logits/rejected": -2.8427786827087402, "logps/chosen": -284.7428283691406, "logps/rejected": -347.52874755859375, "loss": 0.0846, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6115126609802246, "rewards/margins": 5.790914058685303, "rewards/rejected": -6.402426242828369, "step": 1850 }, { "epoch": 1.92, "learning_rate": 2.0011467889908258e-07, "logits/chosen": -2.843400716781616, "logits/rejected": -2.7829298973083496, "logps/chosen": -307.7172546386719, "logps/rejected": -240.32095336914062, "loss": 0.0842, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.43064993619918823, "rewards/margins": 5.67532205581665, "rewards/rejected": -6.1059722900390625, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9820336391437308e-07, "logits/chosen": -2.8097338676452637, "logits/rejected": -2.7943801879882812, "logps/chosen": -259.96142578125, "logps/rejected": -280.31768798828125, "loss": 0.0766, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.47454309463500977, "rewards/margins": 6.007216453552246, "rewards/rejected": -6.481759548187256, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9629204892966362e-07, "logits/chosen": -2.9081008434295654, "logits/rejected": -2.850368022918701, "logps/chosen": -285.292236328125, "logps/rejected": -275.22113037109375, "loss": 0.0707, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.27541738748550415, "rewards/margins": 5.751465797424316, "rewards/rejected": -6.026882648468018, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.943807339449541e-07, "logits/chosen": -2.783125400543213, "logits/rejected": -2.7831761837005615, "logps/chosen": -270.49285888671875, "logps/rejected": -308.9680480957031, "loss": 0.0781, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5280567407608032, "rewards/margins": 5.9228010177612305, "rewards/rejected": -6.450858116149902, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.9246941896024463e-07, "logits/chosen": -2.9015605449676514, "logits/rejected": -2.908259630203247, "logps/chosen": -266.2379455566406, "logps/rejected": -324.3561706542969, "loss": 0.1117, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6570742726325989, "rewards/margins": 6.570813179016113, "rewards/rejected": -7.227886199951172, "step": 1900 }, { "epoch": 1.96, "eval_logits/chosen": -2.8857781887054443, "eval_logits/rejected": -2.836524486541748, "eval_logps/chosen": -315.7684020996094, "eval_logps/rejected": -285.27667236328125, "eval_loss": 0.5334831476211548, "eval_rewards/accuracies": 0.7817460298538208, "eval_rewards/chosen": -1.9235715866088867, "eval_rewards/margins": 2.1133482456207275, "eval_rewards/rejected": -4.036919593811035, "eval_runtime": 84.5094, "eval_samples_per_second": 23.666, "eval_steps_per_second": 0.745, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9055810397553516e-07, "logits/chosen": -2.8626060485839844, "logits/rejected": -2.8419241905212402, "logps/chosen": -268.6602783203125, "logps/rejected": -280.9430236816406, "loss": 0.0908, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.532307505607605, "rewards/margins": 4.841288089752197, "rewards/rejected": -5.373595237731934, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8864678899082566e-07, "logits/chosen": -2.781276226043701, "logits/rejected": -2.77004337310791, "logps/chosen": -282.97320556640625, "logps/rejected": -262.6290283203125, "loss": 0.08, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.720298171043396, "rewards/margins": 4.898207187652588, "rewards/rejected": -5.618504524230957, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.867354740061162e-07, "logits/chosen": -2.870204210281372, "logits/rejected": -2.822707176208496, "logps/chosen": -266.18487548828125, "logps/rejected": -297.3182067871094, "loss": 0.102, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9683756828308105, "rewards/margins": 5.399904727935791, "rewards/rejected": -6.368279933929443, "step": 1930 }, { "epoch": 2.0, "learning_rate": 1.8482415902140673e-07, "logits/chosen": -2.838547945022583, "logits/rejected": -2.8035788536071777, "logps/chosen": -265.2318420410156, "logps/rejected": -289.78155517578125, "loss": 0.0632, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2611059546470642, "rewards/margins": 6.193750858306885, "rewards/rejected": -6.454856872558594, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.8291284403669723e-07, "logits/chosen": -2.805657386779785, "logits/rejected": -2.8378701210021973, "logps/chosen": -252.8870391845703, "logps/rejected": -289.4842224121094, "loss": 0.0223, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.05115882679820061, "rewards/margins": 6.3867082595825195, "rewards/rejected": -6.4378662109375, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8100152905198777e-07, "logits/chosen": -2.664461851119995, "logits/rejected": -2.614348888397217, "logps/chosen": -273.43170166015625, "logps/rejected": -358.24163818359375, "loss": 0.015, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.45168575644493103, "rewards/margins": 7.5087785720825195, "rewards/rejected": -7.960465431213379, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7909021406727827e-07, "logits/chosen": -2.8354854583740234, "logits/rejected": -2.696690082550049, "logps/chosen": -269.34149169921875, "logps/rejected": -288.9095153808594, "loss": 0.0221, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5660437941551208, "rewards/margins": 6.704123497009277, "rewards/rejected": -7.270166873931885, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.771788990825688e-07, "logits/chosen": -2.8703505992889404, "logits/rejected": -2.855757236480713, "logps/chosen": -288.74615478515625, "logps/rejected": -321.55267333984375, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.2832430303096771, "rewards/margins": 7.4998979568481445, "rewards/rejected": -7.783141136169434, "step": 1980 }, { "epoch": 2.05, "learning_rate": 1.7526758409785934e-07, "logits/chosen": -2.862212657928467, "logits/rejected": -2.7549288272857666, "logps/chosen": -317.54901123046875, "logps/rejected": -333.6669921875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.910609245300293, "rewards/margins": 7.41512393951416, "rewards/rejected": -8.325732231140137, "step": 1990 }, { "epoch": 2.06, "learning_rate": 1.7335626911314984e-07, "logits/chosen": -2.747159719467163, "logits/rejected": -2.723252296447754, "logps/chosen": -241.98416137695312, "logps/rejected": -286.20263671875, "loss": 0.0175, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.5073554515838623, "rewards/margins": 7.292289733886719, "rewards/rejected": -8.799646377563477, "step": 2000 }, { "epoch": 2.06, "eval_logits/chosen": -2.8335580825805664, "eval_logits/rejected": -2.7736215591430664, "eval_logps/chosen": -324.7882385253906, "eval_logps/rejected": -302.55865478515625, "eval_loss": 0.5881706476211548, "eval_rewards/accuracies": 0.761904776096344, "eval_rewards/chosen": -2.825556755065918, "eval_rewards/margins": 2.939563512802124, "eval_rewards/rejected": -5.765120506286621, "eval_runtime": 85.7099, "eval_samples_per_second": 23.335, "eval_steps_per_second": 0.735, "step": 2000 }, { "epoch": 2.07, "learning_rate": 1.7144495412844037e-07, "logits/chosen": -2.900343418121338, "logits/rejected": -2.7985424995422363, "logps/chosen": -309.027587890625, "logps/rejected": -329.75128173828125, "loss": 0.0402, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8946213722229004, "rewards/margins": 7.797415256500244, "rewards/rejected": -8.692037582397461, "step": 2010 }, { "epoch": 2.08, "learning_rate": 1.6953363914373088e-07, "logits/chosen": -2.8973569869995117, "logits/rejected": -2.883995532989502, "logps/chosen": -304.8821716308594, "logps/rejected": -307.1224060058594, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.414725124835968, "rewards/margins": 8.143148422241211, "rewards/rejected": -8.55787467956543, "step": 2020 }, { "epoch": 2.09, "learning_rate": 1.6762232415902138e-07, "logits/chosen": -2.8249168395996094, "logits/rejected": -2.755976438522339, "logps/chosen": -271.30511474609375, "logps/rejected": -302.35174560546875, "loss": 0.0183, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.110582947731018, "rewards/margins": 7.069375038146973, "rewards/rejected": -8.179957389831543, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.6571100917431192e-07, "logits/chosen": -2.9022743701934814, "logits/rejected": -2.8274800777435303, "logps/chosen": -272.07720947265625, "logps/rejected": -290.59954833984375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.42111706733703613, "rewards/margins": 7.790069580078125, "rewards/rejected": -8.211187362670898, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6379969418960242e-07, "logits/chosen": -2.8297278881073, "logits/rejected": -2.8095757961273193, "logps/chosen": -325.51202392578125, "logps/rejected": -352.79632568359375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.47327151894569397, "rewards/margins": 8.03354549407959, "rewards/rejected": -8.506816864013672, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6188837920489295e-07, "logits/chosen": -2.8418140411376953, "logits/rejected": -2.7544631958007812, "logps/chosen": -307.0643005371094, "logps/rejected": -305.1767578125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.7374988794326782, "rewards/margins": 7.880400657653809, "rewards/rejected": -8.617899894714355, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5997706422018349e-07, "logits/chosen": -2.834254741668701, "logits/rejected": -2.8546204566955566, "logps/chosen": -274.09295654296875, "logps/rejected": -375.5848388671875, "loss": 0.0133, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.05463988706469536, "rewards/margins": 8.266778945922852, "rewards/rejected": -8.321417808532715, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.58065749235474e-07, "logits/chosen": -2.8460822105407715, "logits/rejected": -2.7883737087249756, "logps/chosen": -345.500732421875, "logps/rejected": -345.20587158203125, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2549740970134735, "rewards/margins": 8.054521560668945, "rewards/rejected": -8.30949592590332, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5615443425076452e-07, "logits/chosen": -2.6797385215759277, "logits/rejected": -2.59342622756958, "logps/chosen": -272.5843200683594, "logps/rejected": -320.541015625, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7525440454483032, "rewards/margins": 8.50687026977539, "rewards/rejected": -9.25941276550293, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5424311926605506e-07, "logits/chosen": -2.7577497959136963, "logits/rejected": -2.6880905628204346, "logps/chosen": -252.0814208984375, "logps/rejected": -281.2613220214844, "loss": 0.0145, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.5481234788894653, "rewards/margins": 7.643919944763184, "rewards/rejected": -9.19204330444336, "step": 2100 }, { "epoch": 2.17, "eval_logits/chosen": -2.8019275665283203, "eval_logits/rejected": -2.7453346252441406, "eval_logps/chosen": -328.32196044921875, "eval_logps/rejected": -307.42218017578125, "eval_loss": 0.615982174873352, "eval_rewards/accuracies": 0.7658730149269104, "eval_rewards/chosen": -3.178926944732666, "eval_rewards/margins": 3.072545289993286, "eval_rewards/rejected": -6.251472473144531, "eval_runtime": 84.2778, "eval_samples_per_second": 23.731, "eval_steps_per_second": 0.748, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.5233180428134556e-07, "logits/chosen": -2.852083683013916, "logits/rejected": -2.787797451019287, "logps/chosen": -282.0943908691406, "logps/rejected": -307.4315490722656, "loss": 0.0118, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6628628969192505, "rewards/margins": 7.550381660461426, "rewards/rejected": -8.213244438171387, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.504204892966361e-07, "logits/chosen": -2.7512869834899902, "logits/rejected": -2.71586012840271, "logps/chosen": -313.5958557128906, "logps/rejected": -345.6764831542969, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.2950427532196045, "rewards/margins": 8.80932331085205, "rewards/rejected": -10.104366302490234, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.485091743119266e-07, "logits/chosen": -2.8325581550598145, "logits/rejected": -2.8173718452453613, "logps/chosen": -269.61077880859375, "logps/rejected": -339.17193603515625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.3449245691299438, "rewards/margins": 7.745511054992676, "rewards/rejected": -9.090435028076172, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.465978593272171e-07, "logits/chosen": -2.779770851135254, "logits/rejected": -2.6894686222076416, "logps/chosen": -214.4927520751953, "logps/rejected": -242.42904663085938, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.4138853549957275, "rewards/margins": 7.569909572601318, "rewards/rejected": -8.983795166015625, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4468654434250764e-07, "logits/chosen": -2.7460098266601562, "logits/rejected": -2.768864154815674, "logps/chosen": -330.4348449707031, "logps/rejected": -376.062744140625, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.25889283418655396, "rewards/margins": 9.242613792419434, "rewards/rejected": -9.501506805419922, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.4277522935779814e-07, "logits/chosen": -2.822489023208618, "logits/rejected": -2.694692373275757, "logps/chosen": -284.2818298339844, "logps/rejected": -331.88665771484375, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.7468432188034058, "rewards/margins": 7.9905853271484375, "rewards/rejected": -8.737428665161133, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4086391437308867e-07, "logits/chosen": -2.805720090866089, "logits/rejected": -2.83075213432312, "logps/chosen": -313.9166564941406, "logps/rejected": -366.8902282714844, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2891316413879395, "rewards/margins": 8.313146591186523, "rewards/rejected": -9.602277755737305, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.389525993883792e-07, "logits/chosen": -2.74735689163208, "logits/rejected": -2.7133688926696777, "logps/chosen": -248.0203399658203, "logps/rejected": -302.1765441894531, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -1.5222628116607666, "rewards/margins": 8.545629501342773, "rewards/rejected": -10.067892074584961, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.370412844036697e-07, "logits/chosen": -2.8076796531677246, "logits/rejected": -2.7455930709838867, "logps/chosen": -269.29156494140625, "logps/rejected": -332.31109619140625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.5531013011932373, "rewards/margins": 8.036242485046387, "rewards/rejected": -9.589344024658203, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3512996941896024e-07, "logits/chosen": -2.7460780143737793, "logits/rejected": -2.71882963180542, "logps/chosen": -244.74636840820312, "logps/rejected": -301.77496337890625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.5790854692459106, "rewards/margins": 8.768037796020508, "rewards/rejected": -10.347124099731445, "step": 2200 }, { "epoch": 2.27, "eval_logits/chosen": -2.775843381881714, "eval_logits/rejected": -2.71360445022583, "eval_logps/chosen": -335.1671142578125, "eval_logps/rejected": -318.3191223144531, "eval_loss": 0.6675200462341309, "eval_rewards/accuracies": 0.7658730149269104, "eval_rewards/chosen": -3.8634424209594727, "eval_rewards/margins": 3.4777252674102783, "eval_rewards/rejected": -7.34116792678833, "eval_runtime": 85.3675, "eval_samples_per_second": 23.428, "eval_steps_per_second": 0.738, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3321865443425075e-07, "logits/chosen": -2.7431907653808594, "logits/rejected": -2.666405200958252, "logps/chosen": -288.3621826171875, "logps/rejected": -333.54486083984375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -1.3674020767211914, "rewards/margins": 8.582910537719727, "rewards/rejected": -9.950313568115234, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3130733944954128e-07, "logits/chosen": -2.7455010414123535, "logits/rejected": -2.6907782554626465, "logps/chosen": -300.69732666015625, "logps/rejected": -345.77606201171875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.5738954544067383, "rewards/margins": 8.692358016967773, "rewards/rejected": -9.266255378723145, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.293960244648318e-07, "logits/chosen": -2.8134102821350098, "logits/rejected": -2.746748447418213, "logps/chosen": -320.0693664550781, "logps/rejected": -329.53509521484375, "loss": 0.0119, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.5276217460632324, "rewards/margins": 8.22559928894043, "rewards/rejected": -9.75322151184082, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2748470948012232e-07, "logits/chosen": -2.7713024616241455, "logits/rejected": -2.7079224586486816, "logps/chosen": -283.7737121582031, "logps/rejected": -363.48126220703125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2624105215072632, "rewards/margins": 8.956193923950195, "rewards/rejected": -10.218603134155273, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.2557339449541285e-07, "logits/chosen": -2.824502944946289, "logits/rejected": -2.775770425796509, "logps/chosen": -302.90985107421875, "logps/rejected": -334.00262451171875, "loss": 0.0185, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.50070059299469, "rewards/margins": 7.946015357971191, "rewards/rejected": -9.446715354919434, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2366207951070336e-07, "logits/chosen": -2.7960762977600098, "logits/rejected": -2.7740378379821777, "logps/chosen": -290.36663818359375, "logps/rejected": -346.4615478515625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.7257287502288818, "rewards/margins": 8.58584213256836, "rewards/rejected": -10.31157112121582, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.217507645259939e-07, "logits/chosen": -2.7129945755004883, "logits/rejected": -2.7220640182495117, "logps/chosen": -288.4734802246094, "logps/rejected": -330.5152893066406, "loss": 0.0062, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3350961208343506, "rewards/margins": 8.772871017456055, "rewards/rejected": -10.107967376708984, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.198394495412844e-07, "logits/chosen": -2.786776542663574, "logits/rejected": -2.7793478965759277, "logps/chosen": -297.0083923339844, "logps/rejected": -333.83319091796875, "loss": 0.0174, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.304938554763794, "rewards/margins": 8.177055358886719, "rewards/rejected": -9.48199462890625, "step": 2280 }, { "epoch": 2.36, "learning_rate": 1.1792813455657493e-07, "logits/chosen": -2.669079303741455, "logits/rejected": -2.658374786376953, "logps/chosen": -286.50946044921875, "logps/rejected": -317.75653076171875, "loss": 0.0151, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6052253246307373, "rewards/margins": 8.337682723999023, "rewards/rejected": -9.942907333374023, "step": 2290 }, { "epoch": 2.37, "learning_rate": 1.1601681957186543e-07, "logits/chosen": -2.7891390323638916, "logits/rejected": -2.755884885787964, "logps/chosen": -295.21063232421875, "logps/rejected": -317.76275634765625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.0556640625, "rewards/margins": 8.071990966796875, "rewards/rejected": -9.127655029296875, "step": 2300 }, { "epoch": 2.37, "eval_logits/chosen": -2.7460498809814453, "eval_logits/rejected": -2.684108018875122, "eval_logps/chosen": -333.36492919921875, "eval_logps/rejected": -315.51007080078125, "eval_loss": 0.655481219291687, "eval_rewards/accuracies": 0.773809552192688, "eval_rewards/chosen": -3.6832268238067627, "eval_rewards/margins": 3.3770339488983154, "eval_rewards/rejected": -7.060261249542236, "eval_runtime": 85.5955, "eval_samples_per_second": 23.366, "eval_steps_per_second": 0.736, "step": 2300 }, { "epoch": 2.38, "learning_rate": 1.1410550458715595e-07, "logits/chosen": -2.7361907958984375, "logits/rejected": -2.736358642578125, "logps/chosen": -276.85589599609375, "logps/rejected": -338.77020263671875, "loss": 0.0172, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2412726879119873, "rewards/margins": 7.8215155601501465, "rewards/rejected": -9.062788009643555, "step": 2310 }, { "epoch": 2.39, "learning_rate": 1.1219418960244648e-07, "logits/chosen": -2.7203598022460938, "logits/rejected": -2.7212142944335938, "logps/chosen": -240.63095092773438, "logps/rejected": -359.0272216796875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.4586249589920044, "rewards/margins": 8.883589744567871, "rewards/rejected": -10.342214584350586, "step": 2320 }, { "epoch": 2.4, "learning_rate": 1.10282874617737e-07, "logits/chosen": -2.7475290298461914, "logits/rejected": -2.7314276695251465, "logps/chosen": -257.7377014160156, "logps/rejected": -323.4758605957031, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.1093502044677734, "rewards/margins": 8.267637252807617, "rewards/rejected": -10.37698745727539, "step": 2330 }, { "epoch": 2.41, "learning_rate": 1.0837155963302752e-07, "logits/chosen": -2.5238680839538574, "logits/rejected": -2.5990004539489746, "logps/chosen": -231.6787567138672, "logps/rejected": -353.30950927734375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.3277311325073242, "rewards/margins": 9.420309066772461, "rewards/rejected": -10.748041152954102, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0646024464831804e-07, "logits/chosen": -2.6679482460021973, "logits/rejected": -2.676323652267456, "logps/chosen": -298.315185546875, "logps/rejected": -342.7939147949219, "loss": 0.0189, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.5310230255126953, "rewards/margins": 9.822240829467773, "rewards/rejected": -11.353262901306152, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0454892966360856e-07, "logits/chosen": -2.601186990737915, "logits/rejected": -2.6746459007263184, "logps/chosen": -277.2555847167969, "logps/rejected": -338.47589111328125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.6174863576889038, "rewards/margins": 9.97658634185791, "rewards/rejected": -11.594073295593262, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0263761467889908e-07, "logits/chosen": -2.6298816204071045, "logits/rejected": -2.5947744846343994, "logps/chosen": -280.8351135253906, "logps/rejected": -345.2566223144531, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2241837978363037, "rewards/margins": 9.190905570983887, "rewards/rejected": -10.415090560913086, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.007262996941896e-07, "logits/chosen": -2.5886502265930176, "logits/rejected": -2.556889057159424, "logps/chosen": -287.4285583496094, "logps/rejected": -309.5324401855469, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.0298352241516113, "rewards/margins": 8.339336395263672, "rewards/rejected": -10.369171142578125, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.881498470948011e-08, "logits/chosen": -2.694936752319336, "logits/rejected": -2.571977376937866, "logps/chosen": -301.35894775390625, "logps/rejected": -316.75201416015625, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.882354497909546, "rewards/margins": 8.532295227050781, "rewards/rejected": -10.414649963378906, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.690366972477065e-08, "logits/chosen": -2.6898765563964844, "logits/rejected": -2.5868537425994873, "logps/chosen": -295.41241455078125, "logps/rejected": -324.84417724609375, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7099082469940186, "rewards/margins": 8.85558795928955, "rewards/rejected": -10.565495491027832, "step": 2400 }, { "epoch": 2.48, "eval_logits/chosen": -2.6922011375427246, "eval_logits/rejected": -2.6255180835723877, "eval_logps/chosen": -334.0754699707031, "eval_logps/rejected": -316.68048095703125, "eval_loss": 0.6598305702209473, "eval_rewards/accuracies": 0.7579365372657776, "eval_rewards/chosen": -3.754283905029297, "eval_rewards/margins": 3.4230215549468994, "eval_rewards/rejected": -7.177304744720459, "eval_runtime": 86.0431, "eval_samples_per_second": 23.244, "eval_steps_per_second": 0.732, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.499235474006116e-08, "logits/chosen": -2.6745896339416504, "logits/rejected": -2.6082608699798584, "logps/chosen": -333.4400329589844, "logps/rejected": -326.5425720214844, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.5270661115646362, "rewards/margins": 8.994717597961426, "rewards/rejected": -10.521783828735352, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.308103975535168e-08, "logits/chosen": -2.687208414077759, "logits/rejected": -2.6537442207336426, "logps/chosen": -306.5165100097656, "logps/rejected": -356.3597717285156, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.8680790066719055, "rewards/margins": 8.81937026977539, "rewards/rejected": -9.68744945526123, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.116972477064219e-08, "logits/chosen": -2.739415168762207, "logits/rejected": -2.630178213119507, "logps/chosen": -307.1014709472656, "logps/rejected": -374.5372009277344, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.7804549932479858, "rewards/margins": 8.495887756347656, "rewards/rejected": -10.276342391967773, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.925840978593272e-08, "logits/chosen": -2.6330296993255615, "logits/rejected": -2.686793804168701, "logps/chosen": -229.73330688476562, "logps/rejected": -348.29071044921875, "loss": 0.0163, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2960717678070068, "rewards/margins": 8.63543701171875, "rewards/rejected": -9.931509017944336, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.734709480122324e-08, "logits/chosen": -2.619152545928955, "logits/rejected": -2.581078052520752, "logps/chosen": -273.90545654296875, "logps/rejected": -345.945556640625, "loss": 0.0139, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.8120664358139038, "rewards/margins": 9.089773178100586, "rewards/rejected": -10.901840209960938, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.543577981651376e-08, "logits/chosen": -2.6768343448638916, "logits/rejected": -2.583284378051758, "logps/chosen": -376.90606689453125, "logps/rejected": -364.72235107421875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.1708290576934814, "rewards/margins": 8.805766105651855, "rewards/rejected": -9.976595878601074, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.352446483180428e-08, "logits/chosen": -2.7543387413024902, "logits/rejected": -2.6443049907684326, "logps/chosen": -283.18182373046875, "logps/rejected": -305.6678771972656, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859144926071167, "rewards/margins": 8.413625717163086, "rewards/rejected": -9.499540328979492, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.161314984709481e-08, "logits/chosen": -2.748798131942749, "logits/rejected": -2.701770782470703, "logps/chosen": -297.24285888671875, "logps/rejected": -334.27093505859375, "loss": 0.0051, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1504005193710327, "rewards/margins": 9.096171379089355, "rewards/rejected": -10.24657154083252, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.970183486238531e-08, "logits/chosen": -2.660088300704956, "logits/rejected": -2.6433253288269043, "logps/chosen": -277.38043212890625, "logps/rejected": -327.1907653808594, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4239822626113892, "rewards/margins": 9.63415813446045, "rewards/rejected": -11.058141708374023, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.779051987767583e-08, "logits/chosen": -2.743652820587158, "logits/rejected": -2.7099053859710693, "logps/chosen": -326.1851806640625, "logps/rejected": -345.4455261230469, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.8251110315322876, "rewards/margins": 9.0416898727417, "rewards/rejected": -10.866801261901855, "step": 2500 }, { "epoch": 2.58, "eval_logits/chosen": -2.6937167644500732, "eval_logits/rejected": -2.624516725540161, "eval_logps/chosen": -342.0002746582031, "eval_logps/rejected": -328.22021484375, "eval_loss": 0.7043530941009521, "eval_rewards/accuracies": 0.7658730149269104, "eval_rewards/chosen": -4.546760082244873, "eval_rewards/margins": 3.784513235092163, "eval_rewards/rejected": -8.33127498626709, "eval_runtime": 84.8082, "eval_samples_per_second": 23.583, "eval_steps_per_second": 0.743, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.587920489296635e-08, "logits/chosen": -2.6863226890563965, "logits/rejected": -2.669240951538086, "logps/chosen": -314.3838195800781, "logps/rejected": -351.1210632324219, "loss": 0.0123, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9106096029281616, "rewards/margins": 9.383336067199707, "rewards/rejected": -11.2939453125, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.396788990825688e-08, "logits/chosen": -2.724083423614502, "logits/rejected": -2.6847984790802, "logps/chosen": -288.16204833984375, "logps/rejected": -320.74652099609375, "loss": 0.0137, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.156667947769165, "rewards/margins": 8.251482009887695, "rewards/rejected": -10.408149719238281, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.20565749235474e-08, "logits/chosen": -2.6141371726989746, "logits/rejected": -2.545928478240967, "logps/chosen": -272.58782958984375, "logps/rejected": -309.7486267089844, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.7828295230865479, "rewards/margins": 9.371472358703613, "rewards/rejected": -11.15429973602295, "step": 2530 }, { "epoch": 2.62, "learning_rate": 7.014525993883792e-08, "logits/chosen": -2.6228842735290527, "logits/rejected": -2.546093463897705, "logps/chosen": -297.15032958984375, "logps/rejected": -337.7318420410156, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.7396106719970703, "rewards/margins": 8.880767822265625, "rewards/rejected": -10.620378494262695, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.823394495412843e-08, "logits/chosen": -2.5964651107788086, "logits/rejected": -2.535104513168335, "logps/chosen": -291.49932861328125, "logps/rejected": -332.07098388671875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.4745142459869385, "rewards/margins": 10.255570411682129, "rewards/rejected": -11.730084419250488, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.632262996941895e-08, "logits/chosen": -2.6409924030303955, "logits/rejected": -2.5037286281585693, "logps/chosen": -274.09661865234375, "logps/rejected": -345.69451904296875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.433541178703308, "rewards/margins": 9.810760498046875, "rewards/rejected": -11.244300842285156, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.441131498470948e-08, "logits/chosen": -2.6809632778167725, "logits/rejected": -2.661353588104248, "logps/chosen": -267.7644958496094, "logps/rejected": -315.9635009765625, "loss": 0.0234, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3080449104309082, "rewards/margins": 8.25956916809082, "rewards/rejected": -9.56761360168457, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.25e-08, "logits/chosen": -2.7494421005249023, "logits/rejected": -2.681250810623169, "logps/chosen": -329.81817626953125, "logps/rejected": -351.62689208984375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1257236003875732, "rewards/margins": 8.99251937866211, "rewards/rejected": -10.118242263793945, "step": 2580 }, { "epoch": 2.67, "learning_rate": 6.058868501529052e-08, "logits/chosen": -2.802464008331299, "logits/rejected": -2.6624038219451904, "logps/chosen": -305.5362243652344, "logps/rejected": -341.27130126953125, "loss": 0.014, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3455936908721924, "rewards/margins": 9.046780586242676, "rewards/rejected": -10.392374038696289, "step": 2590 }, { "epoch": 2.68, "learning_rate": 5.8677370030581035e-08, "logits/chosen": -2.6268625259399414, "logits/rejected": -2.6364901065826416, "logps/chosen": -281.02655029296875, "logps/rejected": -321.53936767578125, "loss": 0.0077, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2788012027740479, "rewards/margins": 9.457226753234863, "rewards/rejected": -10.736028671264648, "step": 2600 }, { "epoch": 2.68, "eval_logits/chosen": -2.7350168228149414, "eval_logits/rejected": -2.671557664871216, "eval_logps/chosen": -336.4403381347656, "eval_logps/rejected": -321.674072265625, "eval_loss": 0.6755052208900452, "eval_rewards/accuracies": 0.7857142686843872, "eval_rewards/chosen": -3.9907662868499756, "eval_rewards/margins": 3.6858959197998047, "eval_rewards/rejected": -7.676662921905518, "eval_runtime": 85.0069, "eval_samples_per_second": 23.528, "eval_steps_per_second": 0.741, "step": 2600 }, { "epoch": 2.69, "learning_rate": 5.6766055045871554e-08, "logits/chosen": -2.7218501567840576, "logits/rejected": -2.701134443283081, "logps/chosen": -280.87249755859375, "logps/rejected": -342.28692626953125, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8790122866630554, "rewards/margins": 8.924139022827148, "rewards/rejected": -9.803152084350586, "step": 2610 }, { "epoch": 2.7, "learning_rate": 5.485474006116208e-08, "logits/chosen": -2.749300718307495, "logits/rejected": -2.7427945137023926, "logps/chosen": -281.42535400390625, "logps/rejected": -320.20855712890625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.7675425410270691, "rewards/margins": 9.114829063415527, "rewards/rejected": -9.88237190246582, "step": 2620 }, { "epoch": 2.71, "learning_rate": 5.294342507645259e-08, "logits/chosen": -2.718909978866577, "logits/rejected": -2.6332194805145264, "logps/chosen": -330.5658264160156, "logps/rejected": -326.0030517578125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.26304030418396, "rewards/margins": 9.22779369354248, "rewards/rejected": -10.49083423614502, "step": 2630 }, { "epoch": 2.72, "learning_rate": 5.1032110091743117e-08, "logits/chosen": -2.6946017742156982, "logits/rejected": -2.578606128692627, "logps/chosen": -244.9853515625, "logps/rejected": -326.56829833984375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.163881778717041, "rewards/margins": 8.259695053100586, "rewards/rejected": -10.423576354980469, "step": 2640 }, { "epoch": 2.73, "learning_rate": 4.9120795107033635e-08, "logits/chosen": -2.692305564880371, "logits/rejected": -2.621889591217041, "logps/chosen": -299.7417907714844, "logps/rejected": -354.70037841796875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.746453881263733, "rewards/margins": 9.412019729614258, "rewards/rejected": -11.158472061157227, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.7209480122324154e-08, "logits/chosen": -2.742924928665161, "logits/rejected": -2.680655002593994, "logps/chosen": -288.6100158691406, "logps/rejected": -372.38848876953125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.150846004486084, "rewards/margins": 9.178352355957031, "rewards/rejected": -11.329198837280273, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.529816513761467e-08, "logits/chosen": -2.695166826248169, "logits/rejected": -2.6738569736480713, "logps/chosen": -326.481689453125, "logps/rejected": -396.2838134765625, "loss": 0.0125, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.571830153465271, "rewards/margins": 9.341531753540039, "rewards/rejected": -10.913363456726074, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.33868501529052e-08, "logits/chosen": -2.7478764057159424, "logits/rejected": -2.7026572227478027, "logps/chosen": -257.5367126464844, "logps/rejected": -314.3909606933594, "loss": 0.0145, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.009021282196045, "rewards/margins": 8.854787826538086, "rewards/rejected": -10.863809585571289, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.147553516819572e-08, "logits/chosen": -2.660367012023926, "logits/rejected": -2.6164801120758057, "logps/chosen": -253.05874633789062, "logps/rejected": -334.41650390625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.8409353494644165, "rewards/margins": 9.41588020324707, "rewards/rejected": -11.256814956665039, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.9564220183486236e-08, "logits/chosen": -2.6420648097991943, "logits/rejected": -2.6223690509796143, "logps/chosen": -286.759521484375, "logps/rejected": -299.20928955078125, "loss": 0.0098, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.680275559425354, "rewards/margins": 8.588552474975586, "rewards/rejected": -10.268828392028809, "step": 2700 }, { "epoch": 2.79, "eval_logits/chosen": -2.751840829849243, "eval_logits/rejected": -2.689521551132202, "eval_logps/chosen": -338.3857727050781, "eval_logps/rejected": -323.7826232910156, "eval_loss": 0.6889848709106445, "eval_rewards/accuracies": 0.7777777910232544, "eval_rewards/chosen": -4.185306549072266, "eval_rewards/margins": 3.702207565307617, "eval_rewards/rejected": -7.887514114379883, "eval_runtime": 84.5931, "eval_samples_per_second": 23.643, "eval_steps_per_second": 0.745, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.7652905198776755e-08, "logits/chosen": -2.7268526554107666, "logits/rejected": -2.670037031173706, "logps/chosen": -318.42047119140625, "logps/rejected": -328.3361511230469, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.8269201517105103, "rewards/margins": 8.519743919372559, "rewards/rejected": -10.346662521362305, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.574159021406728e-08, "logits/chosen": -2.7178609371185303, "logits/rejected": -2.655142307281494, "logps/chosen": -294.76824951171875, "logps/rejected": -407.04388427734375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.5894997119903564, "rewards/margins": 9.621997833251953, "rewards/rejected": -11.21149730682373, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.383027522935779e-08, "logits/chosen": -2.7463743686676025, "logits/rejected": -2.7335140705108643, "logps/chosen": -292.24530029296875, "logps/rejected": -352.7088928222656, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4958622455596924, "rewards/margins": 9.347841262817383, "rewards/rejected": -10.843703269958496, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.191896024464832e-08, "logits/chosen": -2.74155592918396, "logits/rejected": -2.7045648097991943, "logps/chosen": -302.1263732910156, "logps/rejected": -347.8245544433594, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.5156089067459106, "rewards/margins": 9.854185104370117, "rewards/rejected": -11.369794845581055, "step": 2740 }, { "epoch": 2.84, "learning_rate": 3.0007645259938836e-08, "logits/chosen": -2.686781406402588, "logits/rejected": -2.6489763259887695, "logps/chosen": -251.529296875, "logps/rejected": -336.72222900390625, "loss": 0.0106, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2218445539474487, "rewards/margins": 10.793516159057617, "rewards/rejected": -12.015360832214355, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.809633027522936e-08, "logits/chosen": -2.693544864654541, "logits/rejected": -2.607612133026123, "logps/chosen": -274.08245849609375, "logps/rejected": -329.0926513671875, "loss": 0.0158, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8746064901351929, "rewards/margins": 9.00143814086914, "rewards/rejected": -10.876044273376465, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.6185015290519877e-08, "logits/chosen": -2.6773712635040283, "logits/rejected": -2.657872200012207, "logps/chosen": -292.4349365234375, "logps/rejected": -358.54864501953125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.64081609249115, "rewards/margins": 8.695267677307129, "rewards/rejected": -10.336084365844727, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.4273700305810396e-08, "logits/chosen": -2.694716691970825, "logits/rejected": -2.671049118041992, "logps/chosen": -332.70367431640625, "logps/rejected": -374.2784729003906, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.9321603775024414, "rewards/margins": 9.577836990356445, "rewards/rejected": -11.509998321533203, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.2362385321100918e-08, "logits/chosen": -2.7195866107940674, "logits/rejected": -2.6434059143066406, "logps/chosen": -305.3413391113281, "logps/rejected": -334.30450439453125, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -1.7689650058746338, "rewards/margins": 9.231801986694336, "rewards/rejected": -11.00076675415039, "step": 2790 }, { "epoch": 2.89, "learning_rate": 2.0451070336391437e-08, "logits/chosen": -2.658266544342041, "logits/rejected": -2.5843617916107178, "logps/chosen": -287.137451171875, "logps/rejected": -307.26544189453125, "loss": 0.0126, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1759697198867798, "rewards/margins": 9.180164337158203, "rewards/rejected": -10.356133460998535, "step": 2800 }, { "epoch": 2.89, "eval_logits/chosen": -2.7376251220703125, "eval_logits/rejected": -2.6751596927642822, "eval_logps/chosen": -339.3250427246094, "eval_logps/rejected": -325.0658874511719, "eval_loss": 0.688874363899231, "eval_rewards/accuracies": 0.7777777910232544, "eval_rewards/chosen": -4.279238224029541, "eval_rewards/margins": 3.736605644226074, "eval_rewards/rejected": -8.015843391418457, "eval_runtime": 85.0292, "eval_samples_per_second": 23.521, "eval_steps_per_second": 0.741, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.8539755351681956e-08, "logits/chosen": -2.6495871543884277, "logits/rejected": -2.643451690673828, "logps/chosen": -288.79229736328125, "logps/rejected": -351.61859130859375, "loss": 0.0141, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1320414543151855, "rewards/margins": 8.717375755310059, "rewards/rejected": -10.849416732788086, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6628440366972478e-08, "logits/chosen": -2.7097463607788086, "logits/rejected": -2.6647915840148926, "logps/chosen": -312.65277099609375, "logps/rejected": -338.9766540527344, "loss": 0.0097, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.018944025039673, "rewards/margins": 8.279979705810547, "rewards/rejected": -10.29892349243164, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4717125382262997e-08, "logits/chosen": -2.728929042816162, "logits/rejected": -2.7036802768707275, "logps/chosen": -297.9712219238281, "logps/rejected": -358.2961120605469, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.438098907470703, "rewards/margins": 8.373188018798828, "rewards/rejected": -10.811286926269531, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2805810397553517e-08, "logits/chosen": -2.7534146308898926, "logits/rejected": -2.661689519882202, "logps/chosen": -296.6317443847656, "logps/rejected": -343.7471008300781, "loss": 0.0172, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.4898061752319336, "rewards/margins": 9.379873275756836, "rewards/rejected": -10.869680404663086, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0894495412844038e-08, "logits/chosen": -2.649125099182129, "logits/rejected": -2.626152515411377, "logps/chosen": -279.52276611328125, "logps/rejected": -341.8113708496094, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.7075916528701782, "rewards/margins": 8.933737754821777, "rewards/rejected": -10.64133071899414, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.983180428134555e-09, "logits/chosen": -2.7289493083953857, "logits/rejected": -2.6224875450134277, "logps/chosen": -290.38720703125, "logps/rejected": -301.3743896484375, "loss": 0.0113, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.2014358043670654, "rewards/margins": 8.64903736114502, "rewards/rejected": -10.850473403930664, "step": 2860 }, { "epoch": 2.96, "learning_rate": 7.071865443425076e-09, "logits/chosen": -2.655238389968872, "logits/rejected": -2.6466026306152344, "logps/chosen": -304.779296875, "logps/rejected": -347.95611572265625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.1112747192382812, "rewards/margins": 9.25969123840332, "rewards/rejected": -11.370965957641602, "step": 2870 }, { "epoch": 2.97, "learning_rate": 5.1605504587155965e-09, "logits/chosen": -2.741295099258423, "logits/rejected": -2.6439971923828125, "logps/chosen": -299.2157897949219, "logps/rejected": -346.70355224609375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3350167274475098, "rewards/margins": 9.48717212677002, "rewards/rejected": -10.822189331054688, "step": 2880 }, { "epoch": 2.98, "learning_rate": 3.249235474006116e-09, "logits/chosen": -2.727517604827881, "logits/rejected": -2.720759868621826, "logps/chosen": -297.76129150390625, "logps/rejected": -341.95086669921875, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -2.0693154335021973, "rewards/margins": 8.433469772338867, "rewards/rejected": -10.502785682678223, "step": 2890 }, { "epoch": 2.99, "learning_rate": 1.3379204892966359e-09, "logits/chosen": -2.724641799926758, "logits/rejected": -2.6374106407165527, "logps/chosen": -285.85882568359375, "logps/rejected": -324.15582275390625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.20442533493042, "rewards/margins": 8.951725959777832, "rewards/rejected": -11.156152725219727, "step": 2900 }, { "epoch": 2.99, "eval_logits/chosen": -2.7404043674468994, "eval_logits/rejected": -2.6788272857666016, "eval_logps/chosen": -339.67138671875, "eval_logps/rejected": -325.6390380859375, "eval_loss": 0.6886436939239502, "eval_rewards/accuracies": 0.773809552192688, "eval_rewards/chosen": -4.313872814178467, "eval_rewards/margins": 3.759286642074585, "eval_rewards/rejected": -8.073159217834473, "eval_runtime": 84.4823, "eval_samples_per_second": 23.674, "eval_steps_per_second": 0.746, "step": 2900 }, { "epoch": 3.0, "step": 2907, "total_flos": 0.0, "train_loss": 0.20374761147621287, "train_runtime": 18387.3875, "train_samples_per_second": 10.11, "train_steps_per_second": 0.158 } ], "logging_steps": 10, "max_steps": 2907, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }