{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9993414204074695, "eval_steps": 100, "global_step": 1470, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.4013605442176867e-09, "logits/chosen": -2.8035497665405273, "logits/rejected": -2.7962629795074463, "logps/chosen": -211.36532592773438, "logps/rejected": -294.74530029296875, "loss": 0.693, "rewards/accuracies": 0.2265625, "rewards/chosen": 0.0010320872534066439, "rewards/margins": 0.0005493065109476447, "rewards/rejected": 0.0004827805096283555, "step": 1 }, { "epoch": 0.01, "learning_rate": 3.4013605442176873e-08, "logits/chosen": -2.7791833877563477, "logits/rejected": -2.804030418395996, "logps/chosen": -240.9124298095703, "logps/rejected": -369.5000305175781, "loss": 0.6926, "rewards/accuracies": 0.4696180522441864, "rewards/chosen": 0.00038262151065282524, "rewards/margins": 0.0016919042682275176, "rewards/rejected": -0.00130928261205554, "step": 10 }, { "epoch": 0.01, "learning_rate": 6.802721088435375e-08, "logits/chosen": -2.7648768424987793, "logits/rejected": -2.78273606300354, "logps/chosen": -245.15121459960938, "logps/rejected": -350.14898681640625, "loss": 0.6932, "rewards/accuracies": 0.5023437738418579, "rewards/chosen": 0.0015446910401806235, "rewards/margins": 0.0005673653213307261, "rewards/rejected": 0.000977325951680541, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.0204081632653061e-07, "logits/chosen": -2.8178772926330566, "logits/rejected": -2.786083221435547, "logps/chosen": -240.51516723632812, "logps/rejected": -352.41339111328125, "loss": 0.6929, "rewards/accuracies": 0.508593738079071, "rewards/chosen": 0.0015831931959837675, "rewards/margins": 0.001221821061335504, "rewards/rejected": 0.00036137248389422894, "step": 30 }, { "epoch": 0.03, "learning_rate": 1.360544217687075e-07, "logits/chosen": -2.803492307662964, "logits/rejected": -2.7716286182403564, "logps/chosen": -235.7887725830078, "logps/rejected": -359.8059997558594, "loss": 0.6912, "rewards/accuracies": 0.5234375, "rewards/chosen": 0.0033938586711883545, "rewards/margins": 0.004743899218738079, "rewards/rejected": -0.001350040198303759, "step": 40 }, { "epoch": 0.03, "learning_rate": 1.7006802721088434e-07, "logits/chosen": -2.8103866577148438, "logits/rejected": -2.803828716278076, "logps/chosen": -245.4801483154297, "logps/rejected": -306.09783935546875, "loss": 0.6899, "rewards/accuracies": 0.5640624761581421, "rewards/chosen": 0.00749587407335639, "rewards/margins": 0.007244518492370844, "rewards/rejected": 0.00025135590112768114, "step": 50 }, { "epoch": 0.04, "learning_rate": 2.0408163265306121e-07, "logits/chosen": -2.7881524562835693, "logits/rejected": -2.808814525604248, "logps/chosen": -269.1226501464844, "logps/rejected": -339.7763977050781, "loss": 0.6874, "rewards/accuracies": 0.6015625, "rewards/chosen": 0.011595133692026138, "rewards/margins": 0.012225830927491188, "rewards/rejected": -0.000630697060842067, "step": 60 }, { "epoch": 0.05, "learning_rate": 2.3809523809523806e-07, "logits/chosen": -2.7982544898986816, "logits/rejected": -2.765774726867676, "logps/chosen": -258.89117431640625, "logps/rejected": -372.06451416015625, "loss": 0.6864, "rewards/accuracies": 0.6148437261581421, "rewards/chosen": 0.013396549038589, "rewards/margins": 0.014446373097598553, "rewards/rejected": -0.0010498259216547012, "step": 70 }, { "epoch": 0.05, "learning_rate": 2.72108843537415e-07, "logits/chosen": -2.8109402656555176, "logits/rejected": -2.7843804359436035, "logps/chosen": -244.15817260742188, "logps/rejected": -369.6734313964844, "loss": 0.6821, "rewards/accuracies": 0.6742187738418579, "rewards/chosen": 0.02298940345644951, "rewards/margins": 0.023194540292024612, "rewards/rejected": -0.0002051351184491068, "step": 80 }, { "epoch": 0.06, "learning_rate": 3.0612244897959183e-07, "logits/chosen": -2.8090157508850098, "logits/rejected": -2.7707672119140625, "logps/chosen": -222.1091766357422, "logps/rejected": -365.6192321777344, "loss": 0.6766, "rewards/accuracies": 0.7242187261581421, "rewards/chosen": 0.030939970165491104, "rewards/margins": 0.03436826914548874, "rewards/rejected": -0.0034283031709492207, "step": 90 }, { "epoch": 0.07, "learning_rate": 3.401360544217687e-07, "logits/chosen": -2.7735049724578857, "logits/rejected": -2.7935452461242676, "logps/chosen": -251.73049926757812, "logps/rejected": -388.00115966796875, "loss": 0.6728, "rewards/accuracies": 0.735156238079071, "rewards/chosen": 0.03729977086186409, "rewards/margins": 0.04232599213719368, "rewards/rejected": -0.0050262222066521645, "step": 100 }, { "epoch": 0.07, "learning_rate": 3.741496598639456e-07, "logits/chosen": -2.797628164291382, "logits/rejected": -2.784834384918213, "logps/chosen": -255.72265625, "logps/rejected": -349.15985107421875, "loss": 0.6651, "rewards/accuracies": 0.788281261920929, "rewards/chosen": 0.05175922438502312, "rewards/margins": 0.05847715586423874, "rewards/rejected": -0.00671793520450592, "step": 110 }, { "epoch": 0.08, "learning_rate": 4.0816326530612243e-07, "logits/chosen": -2.7973737716674805, "logits/rejected": -2.7825686931610107, "logps/chosen": -252.3303985595703, "logps/rejected": -348.4207458496094, "loss": 0.6604, "rewards/accuracies": 0.813281238079071, "rewards/chosen": 0.06004839017987251, "rewards/margins": 0.06873828917741776, "rewards/rejected": -0.008689895272254944, "step": 120 }, { "epoch": 0.09, "learning_rate": 4.421768707482993e-07, "logits/chosen": -2.7856903076171875, "logits/rejected": -2.8103625774383545, "logps/chosen": -248.4453125, "logps/rejected": -316.520263671875, "loss": 0.6528, "rewards/accuracies": 0.8179687261581421, "rewards/chosen": 0.07609430700540543, "rewards/margins": 0.08578468859195709, "rewards/rejected": -0.00969038438051939, "step": 130 }, { "epoch": 0.1, "learning_rate": 4.761904761904761e-07, "logits/chosen": -2.7964794635772705, "logits/rejected": -2.8038413524627686, "logps/chosen": -251.0780029296875, "logps/rejected": -380.4024353027344, "loss": 0.6409, "rewards/accuracies": 0.842968761920929, "rewards/chosen": 0.10089793056249619, "rewards/margins": 0.11140058934688568, "rewards/rejected": -0.010502668097615242, "step": 140 }, { "epoch": 0.1, "learning_rate": 4.988662131519274e-07, "logits/chosen": -2.7733452320098877, "logits/rejected": -2.799926280975342, "logps/chosen": -259.34686279296875, "logps/rejected": -335.1527404785156, "loss": 0.6297, "rewards/accuracies": 0.8539062738418579, "rewards/chosen": 0.12008102238178253, "rewards/margins": 0.13700444996356964, "rewards/rejected": -0.016923416405916214, "step": 150 }, { "epoch": 0.11, "learning_rate": 4.950869236583522e-07, "logits/chosen": -2.774165153503418, "logits/rejected": -2.7881526947021484, "logps/chosen": -245.5338134765625, "logps/rejected": -338.31597900390625, "loss": 0.6201, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": 0.14051470160484314, "rewards/margins": 0.1599283218383789, "rewards/rejected": -0.019413620233535767, "step": 160 }, { "epoch": 0.12, "learning_rate": 4.91307634164777e-07, "logits/chosen": -2.811603546142578, "logits/rejected": -2.8174936771392822, "logps/chosen": -260.7558898925781, "logps/rejected": -356.88153076171875, "loss": 0.6041, "rewards/accuracies": 0.875, "rewards/chosen": 0.1749168038368225, "rewards/margins": 0.19711166620254517, "rewards/rejected": -0.022194867953658104, "step": 170 }, { "epoch": 0.12, "learning_rate": 4.875283446712018e-07, "logits/chosen": -2.7915146350860596, "logits/rejected": -2.7889480590820312, "logps/chosen": -264.36138916015625, "logps/rejected": -353.7435607910156, "loss": 0.5926, "rewards/accuracies": 0.883593738079071, "rewards/chosen": 0.19911792874336243, "rewards/margins": 0.22633683681488037, "rewards/rejected": -0.02721891924738884, "step": 180 }, { "epoch": 0.13, "learning_rate": 4.837490551776266e-07, "logits/chosen": -2.7990036010742188, "logits/rejected": -2.7916808128356934, "logps/chosen": -257.4069519042969, "logps/rejected": -372.6297302246094, "loss": 0.5799, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.22631244361400604, "rewards/margins": 0.2581940293312073, "rewards/rejected": -0.031881578266620636, "step": 190 }, { "epoch": 0.14, "learning_rate": 4.799697656840514e-07, "logits/chosen": -2.7753312587738037, "logits/rejected": -2.7730696201324463, "logps/chosen": -259.2568054199219, "logps/rejected": -390.26995849609375, "loss": 0.564, "rewards/accuracies": 0.889843761920929, "rewards/chosen": 0.25861743092536926, "rewards/margins": 0.30055442452430725, "rewards/rejected": -0.0419369637966156, "step": 200 }, { "epoch": 0.14, "learning_rate": 4.761904761904761e-07, "logits/chosen": -2.7830989360809326, "logits/rejected": -2.7885472774505615, "logps/chosen": -229.49685668945312, "logps/rejected": -346.35784912109375, "loss": 0.5551, "rewards/accuracies": 0.91015625, "rewards/chosen": 0.28561651706695557, "rewards/margins": 0.32180091738700867, "rewards/rejected": -0.03618443384766579, "step": 210 }, { "epoch": 0.15, "learning_rate": 4.7241118669690096e-07, "logits/chosen": -2.7914628982543945, "logits/rejected": -2.7812819480895996, "logps/chosen": -277.1968078613281, "logps/rejected": -334.34124755859375, "loss": 0.5473, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.30997538566589355, "rewards/margins": 0.3486320972442627, "rewards/rejected": -0.038656704127788544, "step": 220 }, { "epoch": 0.16, "learning_rate": 4.6863189720332574e-07, "logits/chosen": -2.7915186882019043, "logits/rejected": -2.7635109424591064, "logps/chosen": -230.6345672607422, "logps/rejected": -366.45855712890625, "loss": 0.5283, "rewards/accuracies": 0.901562511920929, "rewards/chosen": 0.3480406403541565, "rewards/margins": 0.3980127274990082, "rewards/rejected": -0.04997207969427109, "step": 230 }, { "epoch": 0.16, "learning_rate": 4.648526077097505e-07, "logits/chosen": -2.8176677227020264, "logits/rejected": -2.8094589710235596, "logps/chosen": -255.73318481445312, "logps/rejected": -356.473876953125, "loss": 0.5141, "rewards/accuracies": 0.905468761920929, "rewards/chosen": 0.38035809993743896, "rewards/margins": 0.4426742494106293, "rewards/rejected": -0.062316179275512695, "step": 240 }, { "epoch": 0.17, "learning_rate": 4.6107331821617536e-07, "logits/chosen": -2.778831958770752, "logits/rejected": -2.7532734870910645, "logps/chosen": -260.0787658691406, "logps/rejected": -382.69403076171875, "loss": 0.5037, "rewards/accuracies": 0.9078124761581421, "rewards/chosen": 0.4094300866127014, "rewards/margins": 0.4735100269317627, "rewards/rejected": -0.06407993286848068, "step": 250 }, { "epoch": 0.18, "learning_rate": 4.5729402872260014e-07, "logits/chosen": -2.7875959873199463, "logits/rejected": -2.789522647857666, "logps/chosen": -245.36215209960938, "logps/rejected": -398.8630676269531, "loss": 0.4946, "rewards/accuracies": 0.897656261920929, "rewards/chosen": 0.43164581060409546, "rewards/margins": 0.506696879863739, "rewards/rejected": -0.07505108416080475, "step": 260 }, { "epoch": 0.18, "learning_rate": 4.535147392290249e-07, "logits/chosen": -2.7784600257873535, "logits/rejected": -2.743320941925049, "logps/chosen": -240.0518035888672, "logps/rejected": -373.5130920410156, "loss": 0.4891, "rewards/accuracies": 0.89453125, "rewards/chosen": 0.45601949095726013, "rewards/margins": 0.5297552347183228, "rewards/rejected": -0.07373576611280441, "step": 270 }, { "epoch": 0.19, "learning_rate": 4.497354497354497e-07, "logits/chosen": -2.777036190032959, "logits/rejected": -2.7678191661834717, "logps/chosen": -264.9656677246094, "logps/rejected": -373.12042236328125, "loss": 0.4766, "rewards/accuracies": 0.9156249761581421, "rewards/chosen": 0.47401291131973267, "rewards/margins": 0.5673891305923462, "rewards/rejected": -0.09337621927261353, "step": 280 }, { "epoch": 0.2, "learning_rate": 4.459561602418745e-07, "logits/chosen": -2.7813751697540283, "logits/rejected": -2.7827224731445312, "logps/chosen": -239.7397918701172, "logps/rejected": -392.6272888183594, "loss": 0.4603, "rewards/accuracies": 0.9117187261581421, "rewards/chosen": 0.5112585425376892, "rewards/margins": 0.6238077878952026, "rewards/rejected": -0.11254926025867462, "step": 290 }, { "epoch": 0.2, "learning_rate": 4.421768707482993e-07, "logits/chosen": -2.784381628036499, "logits/rejected": -2.7823455333709717, "logps/chosen": -247.23696899414062, "logps/rejected": -340.01971435546875, "loss": 0.4569, "rewards/accuracies": 0.909375011920929, "rewards/chosen": 0.5431731939315796, "rewards/margins": 0.6343038082122803, "rewards/rejected": -0.09113059937953949, "step": 300 }, { "epoch": 0.21, "learning_rate": 4.383975812547241e-07, "logits/chosen": -2.7919013500213623, "logits/rejected": -2.7927372455596924, "logps/chosen": -244.9982147216797, "logps/rejected": -345.5526428222656, "loss": 0.4422, "rewards/accuracies": 0.922656238079071, "rewards/chosen": 0.5760600566864014, "rewards/margins": 0.6899352669715881, "rewards/rejected": -0.11387525498867035, "step": 310 }, { "epoch": 0.22, "learning_rate": 4.346182917611489e-07, "logits/chosen": -2.786698341369629, "logits/rejected": -2.7934978008270264, "logps/chosen": -255.37142944335938, "logps/rejected": -399.12957763671875, "loss": 0.4344, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.5843140482902527, "rewards/margins": 0.7244275808334351, "rewards/rejected": -0.1401134431362152, "step": 320 }, { "epoch": 0.22, "learning_rate": 4.308390022675737e-07, "logits/chosen": -2.7745113372802734, "logits/rejected": -2.7805206775665283, "logps/chosen": -252.92514038085938, "logps/rejected": -392.51165771484375, "loss": 0.4332, "rewards/accuracies": 0.9046875238418579, "rewards/chosen": 0.5970828533172607, "rewards/margins": 0.7323796153068542, "rewards/rejected": -0.1352967619895935, "step": 330 }, { "epoch": 0.23, "learning_rate": 4.270597127739985e-07, "logits/chosen": -2.783926486968994, "logits/rejected": -2.7875866889953613, "logps/chosen": -250.8353729248047, "logps/rejected": -335.03265380859375, "loss": 0.4175, "rewards/accuracies": 0.9164062738418579, "rewards/chosen": 0.6492675542831421, "rewards/margins": 0.7875067591667175, "rewards/rejected": -0.13823917508125305, "step": 340 }, { "epoch": 0.24, "learning_rate": 4.2328042328042324e-07, "logits/chosen": -2.7828190326690674, "logits/rejected": -2.772052764892578, "logps/chosen": -236.33706665039062, "logps/rejected": -370.28399658203125, "loss": 0.4152, "rewards/accuracies": 0.9242187738418579, "rewards/chosen": 0.662378191947937, "rewards/margins": 0.7986767888069153, "rewards/rejected": -0.13629861176013947, "step": 350 }, { "epoch": 0.24, "learning_rate": 4.19501133786848e-07, "logits/chosen": -2.780648946762085, "logits/rejected": -2.771820545196533, "logps/chosen": -228.22445678710938, "logps/rejected": -390.63751220703125, "loss": 0.4051, "rewards/accuracies": 0.9140625, "rewards/chosen": 0.6962443590164185, "rewards/margins": 0.8446155786514282, "rewards/rejected": -0.1483711302280426, "step": 360 }, { "epoch": 0.25, "learning_rate": 4.1572184429327286e-07, "logits/chosen": -2.8088645935058594, "logits/rejected": -2.7826154232025146, "logps/chosen": -255.2318572998047, "logps/rejected": -344.69183349609375, "loss": 0.3908, "rewards/accuracies": 0.9203125238418579, "rewards/chosen": 0.7306076288223267, "rewards/margins": 0.900040328502655, "rewards/rejected": -0.16943258047103882, "step": 370 }, { "epoch": 0.26, "learning_rate": 4.1194255479969764e-07, "logits/chosen": -2.7837393283843994, "logits/rejected": -2.754739284515381, "logps/chosen": -252.39779663085938, "logps/rejected": -347.7734069824219, "loss": 0.4019, "rewards/accuracies": 0.907031238079071, "rewards/chosen": 0.7146260738372803, "rewards/margins": 0.8642898797988892, "rewards/rejected": -0.14966385066509247, "step": 380 }, { "epoch": 0.27, "learning_rate": 4.0816326530612243e-07, "logits/chosen": -2.793994426727295, "logits/rejected": -2.789456605911255, "logps/chosen": -250.083984375, "logps/rejected": -345.2536315917969, "loss": 0.3843, "rewards/accuracies": 0.9203125238418579, "rewards/chosen": 0.760775089263916, "rewards/margins": 0.9284068942070007, "rewards/rejected": -0.1676318198442459, "step": 390 }, { "epoch": 0.27, "learning_rate": 4.0438397581254726e-07, "logits/chosen": -2.7863235473632812, "logits/rejected": -2.7660741806030273, "logps/chosen": -243.2860565185547, "logps/rejected": -375.15283203125, "loss": 0.3736, "rewards/accuracies": 0.9195312261581421, "rewards/chosen": 0.7728086113929749, "rewards/margins": 0.9798704385757446, "rewards/rejected": -0.20706184208393097, "step": 400 }, { "epoch": 0.28, "learning_rate": 4.0060468631897205e-07, "logits/chosen": -2.7740797996520996, "logits/rejected": -2.787078857421875, "logps/chosen": -231.3814239501953, "logps/rejected": -373.4275817871094, "loss": 0.3779, "rewards/accuracies": 0.9140625, "rewards/chosen": 0.786165177822113, "rewards/margins": 0.9645744562149048, "rewards/rejected": -0.1784091293811798, "step": 410 }, { "epoch": 0.29, "learning_rate": 3.968253968253968e-07, "logits/chosen": -2.7854466438293457, "logits/rejected": -2.782599449157715, "logps/chosen": -234.27853393554688, "logps/rejected": -341.40106201171875, "loss": 0.3758, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.8017100095748901, "rewards/margins": 0.9820283651351929, "rewards/rejected": -0.1803184449672699, "step": 420 }, { "epoch": 0.29, "learning_rate": 3.930461073318216e-07, "logits/chosen": -2.7634427547454834, "logits/rejected": -2.7768495082855225, "logps/chosen": -230.73318481445312, "logps/rejected": -427.71917724609375, "loss": 0.3665, "rewards/accuracies": 0.91796875, "rewards/chosen": 0.8091424703598022, "rewards/margins": 1.025179386138916, "rewards/rejected": -0.2160368263721466, "step": 430 }, { "epoch": 0.3, "learning_rate": 3.892668178382464e-07, "logits/chosen": -2.774629592895508, "logits/rejected": -2.7814247608184814, "logps/chosen": -253.4683074951172, "logps/rejected": -386.40216064453125, "loss": 0.3495, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.8851088285446167, "rewards/margins": 1.123652696609497, "rewards/rejected": -0.23854386806488037, "step": 440 }, { "epoch": 0.31, "learning_rate": 3.854875283446712e-07, "logits/chosen": -2.766551971435547, "logits/rejected": -2.7709641456604004, "logps/chosen": -271.8524475097656, "logps/rejected": -379.4809265136719, "loss": 0.3575, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.8432048559188843, "rewards/margins": 1.0976295471191406, "rewards/rejected": -0.254424512386322, "step": 450 }, { "epoch": 0.31, "learning_rate": 3.8170823885109596e-07, "logits/chosen": -2.8009865283966064, "logits/rejected": -2.7705283164978027, "logps/chosen": -241.07632446289062, "logps/rejected": -366.87127685546875, "loss": 0.3459, "rewards/accuracies": 0.9281250238418579, "rewards/chosen": 0.9000816345214844, "rewards/margins": 1.1292930841445923, "rewards/rejected": -0.22921133041381836, "step": 460 }, { "epoch": 0.32, "learning_rate": 3.779289493575208e-07, "logits/chosen": -2.7855477333068848, "logits/rejected": -2.771469831466675, "logps/chosen": -248.2216033935547, "logps/rejected": -379.58709716796875, "loss": 0.3488, "rewards/accuracies": 0.913281261920929, "rewards/chosen": 0.8979974985122681, "rewards/margins": 1.1383633613586426, "rewards/rejected": -0.2403658926486969, "step": 470 }, { "epoch": 0.33, "learning_rate": 3.741496598639456e-07, "logits/chosen": -2.783979892730713, "logits/rejected": -2.787400722503662, "logps/chosen": -234.78939819335938, "logps/rejected": -391.0784912109375, "loss": 0.3396, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": 0.8895782232284546, "rewards/margins": 1.1713939905166626, "rewards/rejected": -0.281815767288208, "step": 480 }, { "epoch": 0.33, "learning_rate": 3.703703703703703e-07, "logits/chosen": -2.7796401977539062, "logits/rejected": -2.78939151763916, "logps/chosen": -255.79556274414062, "logps/rejected": -376.7617492675781, "loss": 0.3496, "rewards/accuracies": 0.91796875, "rewards/chosen": 0.8880151510238647, "rewards/margins": 1.1511998176574707, "rewards/rejected": -0.26318463683128357, "step": 490 }, { "epoch": 0.34, "learning_rate": 3.6659108087679515e-07, "logits/chosen": -2.784447193145752, "logits/rejected": -2.7811279296875, "logps/chosen": -240.26943969726562, "logps/rejected": -373.43585205078125, "loss": 0.3317, "rewards/accuracies": 0.9242187738418579, "rewards/chosen": 0.9207477569580078, "rewards/margins": 1.2141565084457397, "rewards/rejected": -0.2934088110923767, "step": 500 }, { "epoch": 0.35, "learning_rate": 3.6281179138321993e-07, "logits/chosen": -2.7936480045318604, "logits/rejected": -2.7741034030914307, "logps/chosen": -253.25625610351562, "logps/rejected": -388.1740417480469, "loss": 0.3307, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.9261225461959839, "rewards/margins": 1.2367761135101318, "rewards/rejected": -0.310653418302536, "step": 510 }, { "epoch": 0.35, "learning_rate": 3.590325018896447e-07, "logits/chosen": -2.764971971511841, "logits/rejected": -2.779900074005127, "logps/chosen": -277.50433349609375, "logps/rejected": -390.9405822753906, "loss": 0.3301, "rewards/accuracies": 0.9203125238418579, "rewards/chosen": 0.9403823614120483, "rewards/margins": 1.250135898590088, "rewards/rejected": -0.3097533881664276, "step": 520 }, { "epoch": 0.36, "learning_rate": 3.5525321239606955e-07, "logits/chosen": -2.7859063148498535, "logits/rejected": -2.7852673530578613, "logps/chosen": -240.83847045898438, "logps/rejected": -329.5592346191406, "loss": 0.3185, "rewards/accuracies": 0.9296875, "rewards/chosen": 0.998257040977478, "rewards/margins": 1.3062750101089478, "rewards/rejected": -0.30801790952682495, "step": 530 }, { "epoch": 0.37, "learning_rate": 3.5147392290249433e-07, "logits/chosen": -2.7856059074401855, "logits/rejected": -2.7904558181762695, "logps/chosen": -256.13116455078125, "logps/rejected": -359.0440673828125, "loss": 0.3201, "rewards/accuracies": 0.917187511920929, "rewards/chosen": 0.9812418222427368, "rewards/margins": 1.2980186939239502, "rewards/rejected": -0.316776841878891, "step": 540 }, { "epoch": 0.37, "learning_rate": 3.4769463340891906e-07, "logits/chosen": -2.7746291160583496, "logits/rejected": -2.8083655834198, "logps/chosen": -243.3596649169922, "logps/rejected": -381.6620788574219, "loss": 0.321, "rewards/accuracies": 0.921093761920929, "rewards/chosen": 0.9785689115524292, "rewards/margins": 1.3102028369903564, "rewards/rejected": -0.33163395524024963, "step": 550 }, { "epoch": 0.38, "learning_rate": 3.439153439153439e-07, "logits/chosen": -2.788200616836548, "logits/rejected": -2.806088924407959, "logps/chosen": -243.46371459960938, "logps/rejected": -353.0728454589844, "loss": 0.3037, "rewards/accuracies": 0.9281250238418579, "rewards/chosen": 1.0423057079315186, "rewards/margins": 1.40134596824646, "rewards/rejected": -0.35903996229171753, "step": 560 }, { "epoch": 0.39, "learning_rate": 3.401360544217687e-07, "logits/chosen": -2.8205642700195312, "logits/rejected": -2.75651216506958, "logps/chosen": -225.49380493164062, "logps/rejected": -383.3102111816406, "loss": 0.2961, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.0518951416015625, "rewards/margins": 1.416092872619629, "rewards/rejected": -0.3641977310180664, "step": 570 }, { "epoch": 0.39, "learning_rate": 3.3635676492819346e-07, "logits/chosen": -2.778111696243286, "logits/rejected": -2.8062729835510254, "logps/chosen": -241.8183135986328, "logps/rejected": -360.12677001953125, "loss": 0.3026, "rewards/accuracies": 0.928906261920929, "rewards/chosen": 1.0336360931396484, "rewards/margins": 1.3975627422332764, "rewards/rejected": -0.3639264702796936, "step": 580 }, { "epoch": 0.4, "learning_rate": 3.325774754346183e-07, "logits/chosen": -2.7760305404663086, "logits/rejected": -2.7639145851135254, "logps/chosen": -263.2132568359375, "logps/rejected": -326.3753356933594, "loss": 0.3079, "rewards/accuracies": 0.917187511920929, "rewards/chosen": 1.0236365795135498, "rewards/margins": 1.3882102966308594, "rewards/rejected": -0.36457380652427673, "step": 590 }, { "epoch": 0.41, "learning_rate": 3.287981859410431e-07, "logits/chosen": -2.8040480613708496, "logits/rejected": -2.781839370727539, "logps/chosen": -232.688720703125, "logps/rejected": -341.75372314453125, "loss": 0.294, "rewards/accuracies": 0.92578125, "rewards/chosen": 1.0763448476791382, "rewards/margins": 1.459729790687561, "rewards/rejected": -0.38338491320610046, "step": 600 }, { "epoch": 0.41, "learning_rate": 3.2501889644746787e-07, "logits/chosen": -2.797874927520752, "logits/rejected": -2.748481512069702, "logps/chosen": -232.8326873779297, "logps/rejected": -369.7907409667969, "loss": 0.2837, "rewards/accuracies": 0.938281238079071, "rewards/chosen": 1.101806640625, "rewards/margins": 1.498957633972168, "rewards/rejected": -0.39715105295181274, "step": 610 }, { "epoch": 0.42, "learning_rate": 3.2123960695389265e-07, "logits/chosen": -2.780925989151001, "logits/rejected": -2.735792636871338, "logps/chosen": -222.20596313476562, "logps/rejected": -380.5815124511719, "loss": 0.2935, "rewards/accuracies": 0.921093761920929, "rewards/chosen": 1.0859084129333496, "rewards/margins": 1.4819860458374023, "rewards/rejected": -0.39607763290405273, "step": 620 }, { "epoch": 0.43, "learning_rate": 3.1746031746031743e-07, "logits/chosen": -2.7768056392669678, "logits/rejected": -2.764166831970215, "logps/chosen": -236.9914093017578, "logps/rejected": -345.6325378417969, "loss": 0.2895, "rewards/accuracies": 0.932812511920929, "rewards/chosen": 1.1014459133148193, "rewards/margins": 1.5069670677185059, "rewards/rejected": -0.40552106499671936, "step": 630 }, { "epoch": 0.44, "learning_rate": 3.136810279667422e-07, "logits/chosen": -2.7987258434295654, "logits/rejected": -2.8054118156433105, "logps/chosen": -235.97109985351562, "logps/rejected": -330.56439208984375, "loss": 0.2775, "rewards/accuracies": 0.93359375, "rewards/chosen": 1.1580729484558105, "rewards/margins": 1.5699806213378906, "rewards/rejected": -0.4119076728820801, "step": 640 }, { "epoch": 0.44, "learning_rate": 3.0990173847316705e-07, "logits/chosen": -2.7858521938323975, "logits/rejected": -2.779346466064453, "logps/chosen": -257.5158386230469, "logps/rejected": -322.25103759765625, "loss": 0.287, "rewards/accuracies": 0.9195312261581421, "rewards/chosen": 1.1325995922088623, "rewards/margins": 1.5360453128814697, "rewards/rejected": -0.40344563126564026, "step": 650 }, { "epoch": 0.45, "learning_rate": 3.0612244897959183e-07, "logits/chosen": -2.7976508140563965, "logits/rejected": -2.8010151386260986, "logps/chosen": -219.1446533203125, "logps/rejected": -315.2838439941406, "loss": 0.2703, "rewards/accuracies": 0.9453125, "rewards/chosen": 1.1511547565460205, "rewards/margins": 1.5933144092559814, "rewards/rejected": -0.44215965270996094, "step": 660 }, { "epoch": 0.46, "learning_rate": 3.023431594860166e-07, "logits/chosen": -2.767582416534424, "logits/rejected": -2.8024327754974365, "logps/chosen": -237.21578979492188, "logps/rejected": -314.68377685546875, "loss": 0.2637, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.1508355140686035, "rewards/margins": 1.6350256204605103, "rewards/rejected": -0.48419007658958435, "step": 670 }, { "epoch": 0.46, "learning_rate": 2.9856386999244145e-07, "logits/chosen": -2.7926082611083984, "logits/rejected": -2.780251979827881, "logps/chosen": -244.810302734375, "logps/rejected": -347.9936828613281, "loss": 0.2784, "rewards/accuracies": 0.930468738079071, "rewards/chosen": 1.1081712245941162, "rewards/margins": 1.5819367170333862, "rewards/rejected": -0.47376567125320435, "step": 680 }, { "epoch": 0.47, "learning_rate": 2.947845804988662e-07, "logits/chosen": -2.771953821182251, "logits/rejected": -2.768907070159912, "logps/chosen": -248.50332641601562, "logps/rejected": -360.6126403808594, "loss": 0.2897, "rewards/accuracies": 0.921875, "rewards/chosen": 1.0936378240585327, "rewards/margins": 1.5781736373901367, "rewards/rejected": -0.4845358729362488, "step": 690 }, { "epoch": 0.48, "learning_rate": 2.9100529100529097e-07, "logits/chosen": -2.7748546600341797, "logits/rejected": -2.7857470512390137, "logps/chosen": -227.1557159423828, "logps/rejected": -390.3030700683594, "loss": 0.2597, "rewards/accuracies": 0.9320312738418579, "rewards/chosen": 1.1781264543533325, "rewards/margins": 1.7109047174453735, "rewards/rejected": -0.5327781438827515, "step": 700 }, { "epoch": 0.48, "learning_rate": 2.872260015117158e-07, "logits/chosen": -2.77628231048584, "logits/rejected": -2.7869679927825928, "logps/chosen": -245.57839965820312, "logps/rejected": -326.86212158203125, "loss": 0.2613, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.1419804096221924, "rewards/margins": 1.6727325916290283, "rewards/rejected": -0.5307522416114807, "step": 710 }, { "epoch": 0.49, "learning_rate": 2.834467120181406e-07, "logits/chosen": -2.7608537673950195, "logits/rejected": -2.7646660804748535, "logps/chosen": -241.5836944580078, "logps/rejected": -379.62860107421875, "loss": 0.2738, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.1373337507247925, "rewards/margins": 1.6593284606933594, "rewards/rejected": -0.5219947099685669, "step": 720 }, { "epoch": 0.5, "learning_rate": 2.7966742252456537e-07, "logits/chosen": -2.777465343475342, "logits/rejected": -2.801975965499878, "logps/chosen": -227.2059326171875, "logps/rejected": -369.7891540527344, "loss": 0.2554, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.209241271018982, "rewards/margins": 1.7282158136367798, "rewards/rejected": -0.5189744234085083, "step": 730 }, { "epoch": 0.5, "learning_rate": 2.758881330309902e-07, "logits/chosen": -2.7639384269714355, "logits/rejected": -2.7558932304382324, "logps/chosen": -255.972412109375, "logps/rejected": -410.17431640625, "loss": 0.2766, "rewards/accuracies": 0.9156249761581421, "rewards/chosen": 1.1485779285430908, "rewards/margins": 1.672486662864685, "rewards/rejected": -0.5239086151123047, "step": 740 }, { "epoch": 0.51, "learning_rate": 2.72108843537415e-07, "logits/chosen": -2.7429962158203125, "logits/rejected": -2.7603325843811035, "logps/chosen": -248.05697631835938, "logps/rejected": -382.65863037109375, "loss": 0.2692, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.1628259420394897, "rewards/margins": 1.7002170085906982, "rewards/rejected": -0.5373910665512085, "step": 750 }, { "epoch": 0.52, "learning_rate": 2.683295540438397e-07, "logits/chosen": -2.7732365131378174, "logits/rejected": -2.7899222373962402, "logps/chosen": -230.82577514648438, "logps/rejected": -356.39349365234375, "loss": 0.262, "rewards/accuracies": 0.93359375, "rewards/chosen": 1.1516262292861938, "rewards/margins": 1.7132419347763062, "rewards/rejected": -0.5616158843040466, "step": 760 }, { "epoch": 0.52, "learning_rate": 2.645502645502645e-07, "logits/chosen": -2.764669895172119, "logits/rejected": -2.7641212940216064, "logps/chosen": -246.3456573486328, "logps/rejected": -370.99896240234375, "loss": 0.2701, "rewards/accuracies": 0.9164062738418579, "rewards/chosen": 1.191197395324707, "rewards/margins": 1.7232650518417358, "rewards/rejected": -0.5320678949356079, "step": 770 }, { "epoch": 0.53, "learning_rate": 2.6077097505668934e-07, "logits/chosen": -2.7817633152008057, "logits/rejected": -2.7922616004943848, "logps/chosen": -256.2757873535156, "logps/rejected": -356.1881408691406, "loss": 0.2571, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.2059863805770874, "rewards/margins": 1.7701711654663086, "rewards/rejected": -0.5641847848892212, "step": 780 }, { "epoch": 0.54, "learning_rate": 2.569916855631141e-07, "logits/chosen": -2.7949161529541016, "logits/rejected": -2.800379514694214, "logps/chosen": -219.2698516845703, "logps/rejected": -384.794189453125, "loss": 0.2512, "rewards/accuracies": 0.9296875, "rewards/chosen": 1.2469325065612793, "rewards/margins": 1.8279892206192017, "rewards/rejected": -0.5810565948486328, "step": 790 }, { "epoch": 0.54, "learning_rate": 2.532123960695389e-07, "logits/chosen": -2.7864224910736084, "logits/rejected": -2.8051304817199707, "logps/chosen": -243.42105102539062, "logps/rejected": -376.7647399902344, "loss": 0.2455, "rewards/accuracies": 0.9398437738418579, "rewards/chosen": 1.25649094581604, "rewards/margins": 1.8448721170425415, "rewards/rejected": -0.5883811712265015, "step": 800 }, { "epoch": 0.55, "learning_rate": 2.494331065759637e-07, "logits/chosen": -2.7794528007507324, "logits/rejected": -2.787205457687378, "logps/chosen": -239.23776245117188, "logps/rejected": -348.8122863769531, "loss": 0.2407, "rewards/accuracies": 0.940625011920929, "rewards/chosen": 1.279539942741394, "rewards/margins": 1.9069591760635376, "rewards/rejected": -0.627419114112854, "step": 810 }, { "epoch": 0.56, "learning_rate": 2.456538170823885e-07, "logits/chosen": -2.7908012866973877, "logits/rejected": -2.775237798690796, "logps/chosen": -237.18807983398438, "logps/rejected": -347.73028564453125, "loss": 0.2346, "rewards/accuracies": 0.938281238079071, "rewards/chosen": 1.2818529605865479, "rewards/margins": 1.891405701637268, "rewards/rejected": -0.6095527410507202, "step": 820 }, { "epoch": 0.56, "learning_rate": 2.418745275888133e-07, "logits/chosen": -2.788677453994751, "logits/rejected": -2.759464740753174, "logps/chosen": -244.3543243408203, "logps/rejected": -384.2773742675781, "loss": 0.249, "rewards/accuracies": 0.9273437261581421, "rewards/chosen": 1.2608978748321533, "rewards/margins": 1.8487341403961182, "rewards/rejected": -0.5878363251686096, "step": 830 }, { "epoch": 0.57, "learning_rate": 2.3809523809523806e-07, "logits/chosen": -2.7865688800811768, "logits/rejected": -2.744267463684082, "logps/chosen": -225.56716918945312, "logps/rejected": -373.64788818359375, "loss": 0.2401, "rewards/accuracies": 0.940625011920929, "rewards/chosen": 1.2590898275375366, "rewards/margins": 1.872513771057129, "rewards/rejected": -0.6134239435195923, "step": 840 }, { "epoch": 0.58, "learning_rate": 2.3431594860166287e-07, "logits/chosen": -2.763679027557373, "logits/rejected": -2.7585010528564453, "logps/chosen": -234.14706420898438, "logps/rejected": -332.43975830078125, "loss": 0.2506, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.2429834604263306, "rewards/margins": 1.8476206064224243, "rewards/rejected": -0.6046372056007385, "step": 850 }, { "epoch": 0.58, "learning_rate": 2.3053665910808768e-07, "logits/chosen": -2.7579002380371094, "logits/rejected": -2.7620043754577637, "logps/chosen": -236.3244171142578, "logps/rejected": -339.3128356933594, "loss": 0.2543, "rewards/accuracies": 0.936718761920929, "rewards/chosen": 1.218972086906433, "rewards/margins": 1.8291162252426147, "rewards/rejected": -0.6101440191268921, "step": 860 }, { "epoch": 0.59, "learning_rate": 2.2675736961451246e-07, "logits/chosen": -2.7839019298553467, "logits/rejected": -2.7369167804718018, "logps/chosen": -219.27053833007812, "logps/rejected": -405.5704650878906, "loss": 0.2458, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": 1.2801100015640259, "rewards/margins": 1.8934139013290405, "rewards/rejected": -0.6133038997650146, "step": 870 }, { "epoch": 0.6, "learning_rate": 2.2297808012093725e-07, "logits/chosen": -2.782578945159912, "logits/rejected": -2.7683374881744385, "logps/chosen": -245.6527099609375, "logps/rejected": -378.6884765625, "loss": 0.2384, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.321287751197815, "rewards/margins": 1.9386436939239502, "rewards/rejected": -0.6173557043075562, "step": 880 }, { "epoch": 0.61, "learning_rate": 2.1919879062736206e-07, "logits/chosen": -2.7775015830993652, "logits/rejected": -2.752042293548584, "logps/chosen": -229.3787078857422, "logps/rejected": -356.0593566894531, "loss": 0.2423, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.2723052501678467, "rewards/margins": 1.9301214218139648, "rewards/rejected": -0.6578160524368286, "step": 890 }, { "epoch": 0.61, "learning_rate": 2.1541950113378684e-07, "logits/chosen": -2.768510580062866, "logits/rejected": -2.7404208183288574, "logps/chosen": -265.3998107910156, "logps/rejected": -373.4928283691406, "loss": 0.2467, "rewards/accuracies": 0.9320312738418579, "rewards/chosen": 1.264615774154663, "rewards/margins": 1.920330286026001, "rewards/rejected": -0.6557145714759827, "step": 900 }, { "epoch": 0.62, "learning_rate": 2.1164021164021162e-07, "logits/chosen": -2.7891170978546143, "logits/rejected": -2.7741641998291016, "logps/chosen": -220.24307250976562, "logps/rejected": -358.5487976074219, "loss": 0.2284, "rewards/accuracies": 0.94140625, "rewards/chosen": 1.304023027420044, "rewards/margins": 1.9829524755477905, "rewards/rejected": -0.6789294481277466, "step": 910 }, { "epoch": 0.63, "learning_rate": 2.0786092214663643e-07, "logits/chosen": -2.7575926780700684, "logits/rejected": -2.7642369270324707, "logps/chosen": -234.12026977539062, "logps/rejected": -384.3020935058594, "loss": 0.2373, "rewards/accuracies": 0.94140625, "rewards/chosen": 1.2832618951797485, "rewards/margins": 1.9688091278076172, "rewards/rejected": -0.6855469942092896, "step": 920 }, { "epoch": 0.63, "learning_rate": 2.0408163265306121e-07, "logits/chosen": -2.766233444213867, "logits/rejected": -2.7951343059539795, "logps/chosen": -244.18026733398438, "logps/rejected": -320.21771240234375, "loss": 0.2259, "rewards/accuracies": 0.9398437738418579, "rewards/chosen": 1.312534213066101, "rewards/margins": 2.0482983589172363, "rewards/rejected": -0.7357643246650696, "step": 930 }, { "epoch": 0.64, "learning_rate": 2.0030234315948602e-07, "logits/chosen": -2.768256664276123, "logits/rejected": -2.7545723915100098, "logps/chosen": -248.816650390625, "logps/rejected": -401.00958251953125, "loss": 0.234, "rewards/accuracies": 0.9320312738418579, "rewards/chosen": 1.3067686557769775, "rewards/margins": 2.003986358642578, "rewards/rejected": -0.6972178816795349, "step": 940 }, { "epoch": 0.65, "learning_rate": 1.965230536659108e-07, "logits/chosen": -2.7718937397003174, "logits/rejected": -2.7864131927490234, "logps/chosen": -245.76220703125, "logps/rejected": -350.4901428222656, "loss": 0.2342, "rewards/accuracies": 0.938281238079071, "rewards/chosen": 1.3374592065811157, "rewards/margins": 2.001889228820801, "rewards/rejected": -0.6644300222396851, "step": 950 }, { "epoch": 0.65, "learning_rate": 1.927437641723356e-07, "logits/chosen": -2.7670133113861084, "logits/rejected": -2.76993465423584, "logps/chosen": -227.41748046875, "logps/rejected": -354.6375427246094, "loss": 0.2386, "rewards/accuracies": 0.92578125, "rewards/chosen": 1.3317902088165283, "rewards/margins": 1.9966375827789307, "rewards/rejected": -0.6648473739624023, "step": 960 }, { "epoch": 0.66, "learning_rate": 1.889644746787604e-07, "logits/chosen": -2.7860965728759766, "logits/rejected": -2.776639699935913, "logps/chosen": -257.2185363769531, "logps/rejected": -302.48846435546875, "loss": 0.2278, "rewards/accuracies": 0.9390624761581421, "rewards/chosen": 1.352912187576294, "rewards/margins": 2.036379814147949, "rewards/rejected": -0.6834677457809448, "step": 970 }, { "epoch": 0.67, "learning_rate": 1.8518518518518516e-07, "logits/chosen": -2.7641091346740723, "logits/rejected": -2.7789313793182373, "logps/chosen": -256.19476318359375, "logps/rejected": -390.69549560546875, "loss": 0.2325, "rewards/accuracies": 0.942187488079071, "rewards/chosen": 1.3011709451675415, "rewards/margins": 2.039425849914551, "rewards/rejected": -0.738254964351654, "step": 980 }, { "epoch": 0.67, "learning_rate": 1.8140589569160996e-07, "logits/chosen": -2.795642614364624, "logits/rejected": -2.7746355533599854, "logps/chosen": -234.4689483642578, "logps/rejected": -395.40618896484375, "loss": 0.223, "rewards/accuracies": 0.9390624761581421, "rewards/chosen": 1.3416879177093506, "rewards/margins": 2.1014368534088135, "rewards/rejected": -0.7597488164901733, "step": 990 }, { "epoch": 0.68, "learning_rate": 1.7762660619803477e-07, "logits/chosen": -2.7756259441375732, "logits/rejected": -2.741664409637451, "logps/chosen": -242.3101348876953, "logps/rejected": -363.46160888671875, "loss": 0.2285, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.3446866273880005, "rewards/margins": 2.070406436920166, "rewards/rejected": -0.7257199287414551, "step": 1000 }, { "epoch": 0.69, "learning_rate": 1.7384731670445953e-07, "logits/chosen": -2.7595419883728027, "logits/rejected": -2.7858798503875732, "logps/chosen": -259.9520568847656, "logps/rejected": -358.3509216308594, "loss": 0.2273, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.3033568859100342, "rewards/margins": 2.0887067317962646, "rewards/rejected": -0.78534996509552, "step": 1010 }, { "epoch": 0.69, "learning_rate": 1.7006802721088434e-07, "logits/chosen": -2.768449068069458, "logits/rejected": -2.7718656063079834, "logps/chosen": -238.11740112304688, "logps/rejected": -354.0820007324219, "loss": 0.236, "rewards/accuracies": 0.9351562261581421, "rewards/chosen": 1.3048899173736572, "rewards/margins": 2.033565044403076, "rewards/rejected": -0.7286752462387085, "step": 1020 }, { "epoch": 0.7, "learning_rate": 1.6628873771730915e-07, "logits/chosen": -2.7650275230407715, "logits/rejected": -2.7476916313171387, "logps/chosen": -245.41885375976562, "logps/rejected": -343.54437255859375, "loss": 0.2357, "rewards/accuracies": 0.928906261920929, "rewards/chosen": 1.316489815711975, "rewards/margins": 2.062798023223877, "rewards/rejected": -0.7463082671165466, "step": 1030 }, { "epoch": 0.71, "learning_rate": 1.6250944822373393e-07, "logits/chosen": -2.7734358310699463, "logits/rejected": -2.7748751640319824, "logps/chosen": -237.48538208007812, "logps/rejected": -389.1809997558594, "loss": 0.2308, "rewards/accuracies": 0.930468738079071, "rewards/chosen": 1.2910696268081665, "rewards/margins": 2.079051971435547, "rewards/rejected": -0.7879821062088013, "step": 1040 }, { "epoch": 0.71, "learning_rate": 1.5873015873015872e-07, "logits/chosen": -2.7670979499816895, "logits/rejected": -2.769535779953003, "logps/chosen": -214.1968536376953, "logps/rejected": -331.4734802246094, "loss": 0.224, "rewards/accuracies": 0.9390624761581421, "rewards/chosen": 1.377071738243103, "rewards/margins": 2.1104674339294434, "rewards/rejected": -0.7333956956863403, "step": 1050 }, { "epoch": 0.72, "learning_rate": 1.5495086923658353e-07, "logits/chosen": -2.789698839187622, "logits/rejected": -2.7418367862701416, "logps/chosen": -233.3469696044922, "logps/rejected": -372.7334289550781, "loss": 0.2046, "rewards/accuracies": 0.948437511920929, "rewards/chosen": 1.4205210208892822, "rewards/margins": 2.2147347927093506, "rewards/rejected": -0.7942138910293579, "step": 1060 }, { "epoch": 0.73, "learning_rate": 1.511715797430083e-07, "logits/chosen": -2.7732410430908203, "logits/rejected": -2.7837493419647217, "logps/chosen": -240.2108917236328, "logps/rejected": -340.86712646484375, "loss": 0.2229, "rewards/accuracies": 0.932812511920929, "rewards/chosen": 1.365039348602295, "rewards/margins": 2.149728298187256, "rewards/rejected": -0.7846890091896057, "step": 1070 }, { "epoch": 0.73, "learning_rate": 1.473922902494331e-07, "logits/chosen": -2.762357711791992, "logits/rejected": -2.7503538131713867, "logps/chosen": -245.73129272460938, "logps/rejected": -367.2342529296875, "loss": 0.2247, "rewards/accuracies": 0.934374988079071, "rewards/chosen": 1.3420137166976929, "rewards/margins": 2.1435036659240723, "rewards/rejected": -0.8014899492263794, "step": 1080 }, { "epoch": 0.74, "learning_rate": 1.436130007558579e-07, "logits/chosen": -2.786447048187256, "logits/rejected": -2.7433903217315674, "logps/chosen": -259.77923583984375, "logps/rejected": -384.2717590332031, "loss": 0.2176, "rewards/accuracies": 0.938281238079071, "rewards/chosen": 1.4291341304779053, "rewards/margins": 2.1485352516174316, "rewards/rejected": -0.7194010019302368, "step": 1090 }, { "epoch": 0.75, "learning_rate": 1.3983371126228268e-07, "logits/chosen": -2.766045570373535, "logits/rejected": -2.783592700958252, "logps/chosen": -258.433349609375, "logps/rejected": -356.44293212890625, "loss": 0.2166, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.3983967304229736, "rewards/margins": 2.2020390033721924, "rewards/rejected": -0.8036419153213501, "step": 1100 }, { "epoch": 0.75, "learning_rate": 1.360544217687075e-07, "logits/chosen": -2.784245491027832, "logits/rejected": -2.7566187381744385, "logps/chosen": -251.7339324951172, "logps/rejected": -356.1120300292969, "loss": 0.2042, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.394803762435913, "rewards/margins": 2.219846487045288, "rewards/rejected": -0.8250430822372437, "step": 1110 }, { "epoch": 0.76, "learning_rate": 1.3227513227513225e-07, "logits/chosen": -2.768209218978882, "logits/rejected": -2.7927510738372803, "logps/chosen": -250.1661376953125, "logps/rejected": -341.25396728515625, "loss": 0.2216, "rewards/accuracies": 0.936718761920929, "rewards/chosen": 1.3784762620925903, "rewards/margins": 2.1875884532928467, "rewards/rejected": -0.8091121912002563, "step": 1120 }, { "epoch": 0.77, "learning_rate": 1.2849584278155706e-07, "logits/chosen": -2.755992889404297, "logits/rejected": -2.7883083820343018, "logps/chosen": -258.3106384277344, "logps/rejected": -338.23822021484375, "loss": 0.2233, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.371977686882019, "rewards/margins": 2.1515755653381348, "rewards/rejected": -0.7795979380607605, "step": 1130 }, { "epoch": 0.77, "learning_rate": 1.2471655328798184e-07, "logits/chosen": -2.765443801879883, "logits/rejected": -2.773919105529785, "logps/chosen": -228.51766967773438, "logps/rejected": -353.0353698730469, "loss": 0.2184, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.3785618543624878, "rewards/margins": 2.173300266265869, "rewards/rejected": -0.7947384119033813, "step": 1140 }, { "epoch": 0.78, "learning_rate": 1.2093726379440665e-07, "logits/chosen": -2.7806408405303955, "logits/rejected": -2.756528854370117, "logps/chosen": -227.71621704101562, "logps/rejected": -391.3194580078125, "loss": 0.2113, "rewards/accuracies": 0.9476562738418579, "rewards/chosen": 1.3853504657745361, "rewards/margins": 2.21071195602417, "rewards/rejected": -0.8253618478775024, "step": 1150 }, { "epoch": 0.79, "learning_rate": 1.1715797430083144e-07, "logits/chosen": -2.7610268592834473, "logits/rejected": -2.7615675926208496, "logps/chosen": -269.79010009765625, "logps/rejected": -378.21209716796875, "loss": 0.2102, "rewards/accuracies": 0.9398437738418579, "rewards/chosen": 1.3920191526412964, "rewards/margins": 2.2798304557800293, "rewards/rejected": -0.887811541557312, "step": 1160 }, { "epoch": 0.8, "learning_rate": 1.1337868480725623e-07, "logits/chosen": -2.776198625564575, "logits/rejected": -2.768550395965576, "logps/chosen": -246.81887817382812, "logps/rejected": -365.49249267578125, "loss": 0.2134, "rewards/accuracies": 0.93359375, "rewards/chosen": 1.4072265625, "rewards/margins": 2.2190985679626465, "rewards/rejected": -0.8118720054626465, "step": 1170 }, { "epoch": 0.8, "learning_rate": 1.0959939531368103e-07, "logits/chosen": -2.782680034637451, "logits/rejected": -2.7389519214630127, "logps/chosen": -239.7198944091797, "logps/rejected": -363.89215087890625, "loss": 0.2147, "rewards/accuracies": 0.942187488079071, "rewards/chosen": 1.3731368780136108, "rewards/margins": 2.227461814880371, "rewards/rejected": -0.8543251156806946, "step": 1180 }, { "epoch": 0.81, "learning_rate": 1.0582010582010581e-07, "logits/chosen": -2.7652835845947266, "logits/rejected": -2.74135160446167, "logps/chosen": -223.03579711914062, "logps/rejected": -407.8848571777344, "loss": 0.2255, "rewards/accuracies": 0.930468738079071, "rewards/chosen": 1.3337465524673462, "rewards/margins": 2.1734132766723633, "rewards/rejected": -0.8396667242050171, "step": 1190 }, { "epoch": 0.82, "learning_rate": 1.0204081632653061e-07, "logits/chosen": -2.785404920578003, "logits/rejected": -2.768556594848633, "logps/chosen": -217.6273651123047, "logps/rejected": -390.0624694824219, "loss": 0.2004, "rewards/accuracies": 0.9515625238418579, "rewards/chosen": 1.452343225479126, "rewards/margins": 2.3132705688476562, "rewards/rejected": -0.8609271049499512, "step": 1200 }, { "epoch": 0.82, "learning_rate": 9.82615268329554e-08, "logits/chosen": -2.7641220092773438, "logits/rejected": -2.7403550148010254, "logps/chosen": -254.1678466796875, "logps/rejected": -381.2986145019531, "loss": 0.2165, "rewards/accuracies": 0.938281238079071, "rewards/chosen": 1.3716920614242554, "rewards/margins": 2.208040952682495, "rewards/rejected": -0.8363490104675293, "step": 1210 }, { "epoch": 0.83, "learning_rate": 9.44822373393802e-08, "logits/chosen": -2.7834503650665283, "logits/rejected": -2.7499313354492188, "logps/chosen": -220.555908203125, "logps/rejected": -353.34454345703125, "loss": 0.2073, "rewards/accuracies": 0.936718761920929, "rewards/chosen": 1.4545724391937256, "rewards/margins": 2.2807674407958984, "rewards/rejected": -0.826195240020752, "step": 1220 }, { "epoch": 0.84, "learning_rate": 9.070294784580498e-08, "logits/chosen": -2.7742843627929688, "logits/rejected": -2.7704269886016846, "logps/chosen": -240.16586303710938, "logps/rejected": -341.08270263671875, "loss": 0.2097, "rewards/accuracies": 0.9398437738418579, "rewards/chosen": 1.3970229625701904, "rewards/margins": 2.205933094024658, "rewards/rejected": -0.8089098930358887, "step": 1230 }, { "epoch": 0.84, "learning_rate": 8.692365835222977e-08, "logits/chosen": -2.7731618881225586, "logits/rejected": -2.7807064056396484, "logps/chosen": -246.8760223388672, "logps/rejected": -394.09661865234375, "loss": 0.1942, "rewards/accuracies": 0.94921875, "rewards/chosen": 1.4174280166625977, "rewards/margins": 2.335336446762085, "rewards/rejected": -0.9179089665412903, "step": 1240 }, { "epoch": 0.85, "learning_rate": 8.314436885865457e-08, "logits/chosen": -2.7794883251190186, "logits/rejected": -2.7599997520446777, "logps/chosen": -234.8397979736328, "logps/rejected": -354.03411865234375, "loss": 0.2101, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.3885688781738281, "rewards/margins": 2.2632603645324707, "rewards/rejected": -0.8746916651725769, "step": 1250 }, { "epoch": 0.86, "learning_rate": 7.936507936507936e-08, "logits/chosen": -2.7606375217437744, "logits/rejected": -2.7535159587860107, "logps/chosen": -225.62606811523438, "logps/rejected": -382.0788269042969, "loss": 0.2247, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3528351783752441, "rewards/margins": 2.18499755859375, "rewards/rejected": -0.8321624994277954, "step": 1260 }, { "epoch": 0.86, "learning_rate": 7.558578987150415e-08, "logits/chosen": -2.7874550819396973, "logits/rejected": -2.7440848350524902, "logps/chosen": -216.8153533935547, "logps/rejected": -372.12982177734375, "loss": 0.2204, "rewards/accuracies": 0.946093738079071, "rewards/chosen": 1.3856043815612793, "rewards/margins": 2.1913902759552, "rewards/rejected": -0.8057858347892761, "step": 1270 }, { "epoch": 0.87, "learning_rate": 7.180650037792895e-08, "logits/chosen": -2.7706284523010254, "logits/rejected": -2.7321717739105225, "logps/chosen": -249.1674041748047, "logps/rejected": -390.70855712890625, "loss": 0.2218, "rewards/accuracies": 0.932812511920929, "rewards/chosen": 1.3266818523406982, "rewards/margins": 2.204909086227417, "rewards/rejected": -0.8782272338867188, "step": 1280 }, { "epoch": 0.88, "learning_rate": 6.802721088435375e-08, "logits/chosen": -2.771331310272217, "logits/rejected": -2.7345921993255615, "logps/chosen": -244.32217407226562, "logps/rejected": -395.6925964355469, "loss": 0.2148, "rewards/accuracies": 0.93359375, "rewards/chosen": 1.379319190979004, "rewards/margins": 2.2045130729675293, "rewards/rejected": -0.8251941800117493, "step": 1290 }, { "epoch": 0.88, "learning_rate": 6.424792139077853e-08, "logits/chosen": -2.781578779220581, "logits/rejected": -2.7588868141174316, "logps/chosen": -234.79800415039062, "logps/rejected": -359.72332763671875, "loss": 0.2047, "rewards/accuracies": 0.9476562738418579, "rewards/chosen": 1.4176688194274902, "rewards/margins": 2.3017234802246094, "rewards/rejected": -0.8840547800064087, "step": 1300 }, { "epoch": 0.89, "learning_rate": 6.046863189720333e-08, "logits/chosen": -2.770113706588745, "logits/rejected": -2.73785662651062, "logps/chosen": -244.0814971923828, "logps/rejected": -370.0007019042969, "loss": 0.2086, "rewards/accuracies": 0.9351562261581421, "rewards/chosen": 1.4492876529693604, "rewards/margins": 2.2907984256744385, "rewards/rejected": -0.8415109515190125, "step": 1310 }, { "epoch": 0.9, "learning_rate": 5.6689342403628116e-08, "logits/chosen": -2.753613233566284, "logits/rejected": -2.7601518630981445, "logps/chosen": -250.3900604248047, "logps/rejected": -360.0616455078125, "loss": 0.2099, "rewards/accuracies": 0.9359375238418579, "rewards/chosen": 1.4489208459854126, "rewards/margins": 2.3246617317199707, "rewards/rejected": -0.8757408261299133, "step": 1320 }, { "epoch": 0.9, "learning_rate": 5.2910052910052905e-08, "logits/chosen": -2.7654261589050293, "logits/rejected": -2.7347397804260254, "logps/chosen": -232.9058837890625, "logps/rejected": -352.3494567871094, "loss": 0.2101, "rewards/accuracies": 0.9398437738418579, "rewards/chosen": 1.4914627075195312, "rewards/margins": 2.3455305099487305, "rewards/rejected": -0.8540679216384888, "step": 1330 }, { "epoch": 0.91, "learning_rate": 4.91307634164777e-08, "logits/chosen": -2.7729830741882324, "logits/rejected": -2.7506096363067627, "logps/chosen": -237.5419158935547, "logps/rejected": -361.7286071777344, "loss": 0.2271, "rewards/accuracies": 0.936718761920929, "rewards/chosen": 1.3387925624847412, "rewards/margins": 2.1680846214294434, "rewards/rejected": -0.8292919397354126, "step": 1340 }, { "epoch": 0.92, "learning_rate": 4.535147392290249e-08, "logits/chosen": -2.758366584777832, "logits/rejected": -2.747448444366455, "logps/chosen": -262.02313232421875, "logps/rejected": -371.6409912109375, "loss": 0.2117, "rewards/accuracies": 0.93359375, "rewards/chosen": 1.4255142211914062, "rewards/margins": 2.2661709785461426, "rewards/rejected": -0.8406568765640259, "step": 1350 }, { "epoch": 0.92, "learning_rate": 4.157218442932729e-08, "logits/chosen": -2.7460246086120605, "logits/rejected": -2.7499794960021973, "logps/chosen": -242.7806396484375, "logps/rejected": -374.79736328125, "loss": 0.2305, "rewards/accuracies": 0.9242187738418579, "rewards/chosen": 1.3290668725967407, "rewards/margins": 2.187917470932007, "rewards/rejected": -0.8588504791259766, "step": 1360 }, { "epoch": 0.93, "learning_rate": 3.779289493575208e-08, "logits/chosen": -2.7681326866149902, "logits/rejected": -2.7562155723571777, "logps/chosen": -220.0043487548828, "logps/rejected": -369.31268310546875, "loss": 0.2015, "rewards/accuracies": 0.944531261920929, "rewards/chosen": 1.4147917032241821, "rewards/margins": 2.347784996032715, "rewards/rejected": -0.9329932332038879, "step": 1370 }, { "epoch": 0.94, "learning_rate": 3.4013605442176873e-08, "logits/chosen": -2.7685980796813965, "logits/rejected": -2.761018753051758, "logps/chosen": -244.3848114013672, "logps/rejected": -352.2154235839844, "loss": 0.2147, "rewards/accuracies": 0.9398437738418579, "rewards/chosen": 1.3917274475097656, "rewards/margins": 2.2305819988250732, "rewards/rejected": -0.8388546109199524, "step": 1380 }, { "epoch": 0.94, "learning_rate": 3.023431594860166e-08, "logits/chosen": -2.7724173069000244, "logits/rejected": -2.773851156234741, "logps/chosen": -251.663330078125, "logps/rejected": -341.803466796875, "loss": 0.1992, "rewards/accuracies": 0.94921875, "rewards/chosen": 1.4376652240753174, "rewards/margins": 2.324432134628296, "rewards/rejected": -0.886766791343689, "step": 1390 }, { "epoch": 0.95, "learning_rate": 2.6455026455026453e-08, "logits/chosen": -2.758798122406006, "logits/rejected": -2.763350009918213, "logps/chosen": -238.17745971679688, "logps/rejected": -398.58135986328125, "loss": 0.2004, "rewards/accuracies": 0.9476562738418579, "rewards/chosen": 1.4174001216888428, "rewards/margins": 2.3445682525634766, "rewards/rejected": -0.9271681904792786, "step": 1400 }, { "epoch": 0.96, "learning_rate": 2.2675736961451246e-08, "logits/chosen": -2.7801098823547363, "logits/rejected": -2.7490382194519043, "logps/chosen": -242.81613159179688, "logps/rejected": -361.264892578125, "loss": 0.2077, "rewards/accuracies": 0.94140625, "rewards/chosen": 1.4166629314422607, "rewards/margins": 2.316483736038208, "rewards/rejected": -0.8998208045959473, "step": 1410 }, { "epoch": 0.97, "learning_rate": 1.889644746787604e-08, "logits/chosen": -2.75722336769104, "logits/rejected": -2.7228329181671143, "logps/chosen": -251.5331268310547, "logps/rejected": -375.8110046386719, "loss": 0.2226, "rewards/accuracies": 0.940625011920929, "rewards/chosen": 1.4032243490219116, "rewards/margins": 2.2185873985290527, "rewards/rejected": -0.8153629302978516, "step": 1420 }, { "epoch": 0.97, "learning_rate": 1.511715797430083e-08, "logits/chosen": -2.7710134983062744, "logits/rejected": -2.787081241607666, "logps/chosen": -241.9620361328125, "logps/rejected": -356.4383544921875, "loss": 0.2074, "rewards/accuracies": 0.938281238079071, "rewards/chosen": 1.4058793783187866, "rewards/margins": 2.3387274742126465, "rewards/rejected": -0.9328481554985046, "step": 1430 }, { "epoch": 0.98, "learning_rate": 1.1337868480725623e-08, "logits/chosen": -2.788255214691162, "logits/rejected": -2.790001392364502, "logps/chosen": -249.0662078857422, "logps/rejected": -375.603759765625, "loss": 0.1976, "rewards/accuracies": 0.9429687261581421, "rewards/chosen": 1.4689807891845703, "rewards/margins": 2.375899076461792, "rewards/rejected": -0.9069182276725769, "step": 1440 }, { "epoch": 0.99, "learning_rate": 7.558578987150416e-09, "logits/chosen": -2.762585401535034, "logits/rejected": -2.7085330486297607, "logps/chosen": -238.41751098632812, "logps/rejected": -380.84942626953125, "loss": 0.2228, "rewards/accuracies": 0.936718761920929, "rewards/chosen": 1.4105838537216187, "rewards/margins": 2.214503288269043, "rewards/rejected": -0.8039194345474243, "step": 1450 }, { "epoch": 0.99, "learning_rate": 3.779289493575208e-09, "logits/chosen": -2.7654013633728027, "logits/rejected": -2.7555670738220215, "logps/chosen": -237.80899047851562, "logps/rejected": -345.7412109375, "loss": 0.2026, "rewards/accuracies": 0.9515625238418579, "rewards/chosen": 1.418304443359375, "rewards/margins": 2.327260971069336, "rewards/rejected": -0.9089563488960266, "step": 1460 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -2.75099515914917, "logits/rejected": -2.7724432945251465, "logps/chosen": -255.767578125, "logps/rejected": -360.5829772949219, "loss": 0.2019, "rewards/accuracies": 0.946093738079071, "rewards/chosen": 1.4355896711349487, "rewards/margins": 2.3439955711364746, "rewards/rejected": -0.9084057807922363, "step": 1470 }, { "epoch": 1.0, "eval_logits/chosen": -2.617767333984375, "eval_logits/rejected": -2.721874952316284, "eval_logps/chosen": -238.54788208007812, "eval_logps/rejected": -388.59033203125, "eval_loss": 0.20815864205360413, "eval_rewards/accuracies": 0.9413930773735046, "eval_rewards/chosen": 1.3856867551803589, "eval_rewards/margins": 2.292266845703125, "eval_rewards/rejected": -0.9065799117088318, "eval_runtime": 2798.4996, "eval_samples_per_second": 3.395, "eval_steps_per_second": 0.425, "step": 1470 }, { "epoch": 1.0, "step": 1470, "total_flos": 0.0, "train_loss": 0.33413780781687524, "train_runtime": 91396.7242, "train_samples_per_second": 2.06, "train_steps_per_second": 0.016 } ], "logging_steps": 10, "max_steps": 1470, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }