{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9992254066615027, "eval_steps": 100, "global_step": 726, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.84931506849315e-09, "logits/chosen": -2.3491616249084473, "logits/rejected": -2.418564796447754, "logps/chosen": -271.3881530761719, "logps/rejected": -208.9749298095703, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.04, "learning_rate": 6.84931506849315e-08, "logits/chosen": -2.4231245517730713, "logits/rejected": -2.3566601276397705, "logps/chosen": -293.38800048828125, "logps/rejected": -226.29283142089844, "loss": 0.6933, "rewards/accuracies": 0.4548611044883728, "rewards/chosen": 0.003188559552654624, "rewards/margins": 0.0021638227626681328, "rewards/rejected": 0.0010247372556477785, "step": 10 }, { "epoch": 0.08, "learning_rate": 1.36986301369863e-07, "logits/chosen": -2.431933879852295, "logits/rejected": -2.405198574066162, "logps/chosen": -278.5166931152344, "logps/rejected": -216.7791290283203, "loss": 0.6942, "rewards/accuracies": 0.504687488079071, "rewards/chosen": -0.000816329091321677, "rewards/margins": 0.0019420869648456573, "rewards/rejected": -0.0027584161143749952, "step": 20 }, { "epoch": 0.12, "learning_rate": 2.054794520547945e-07, "logits/chosen": -2.389657497406006, "logits/rejected": -2.348972797393799, "logps/chosen": -252.9993438720703, "logps/rejected": -207.1633758544922, "loss": 0.6935, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0014556010719388723, "rewards/margins": 0.0006705918349325657, "rewards/rejected": -0.0021261931397020817, "step": 30 }, { "epoch": 0.17, "learning_rate": 2.73972602739726e-07, "logits/chosen": -2.460561752319336, "logits/rejected": -2.414844036102295, "logps/chosen": -283.7592468261719, "logps/rejected": -216.4773712158203, "loss": 0.6909, "rewards/accuracies": 0.5484374761581421, "rewards/chosen": 0.004375931341201067, "rewards/margins": 0.00752140861004591, "rewards/rejected": -0.0031454775016754866, "step": 40 }, { "epoch": 0.21, "learning_rate": 3.424657534246575e-07, "logits/chosen": -2.4366953372955322, "logits/rejected": -2.3972277641296387, "logps/chosen": -267.2607727050781, "logps/rejected": -223.6705780029297, "loss": 0.6897, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": 0.004865794442594051, "rewards/margins": 0.007948420941829681, "rewards/rejected": -0.0030826255679130554, "step": 50 }, { "epoch": 0.25, "learning_rate": 4.10958904109589e-07, "logits/chosen": -2.412304639816284, "logits/rejected": -2.3892178535461426, "logps/chosen": -266.85028076171875, "logps/rejected": -214.7494659423828, "loss": 0.6873, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00616841483861208, "rewards/margins": 0.012422902509570122, "rewards/rejected": -0.006254489067941904, "step": 60 }, { "epoch": 0.29, "learning_rate": 4.794520547945205e-07, "logits/chosen": -2.390881299972534, "logits/rejected": -2.3977627754211426, "logps/chosen": -254.04043579101562, "logps/rejected": -214.6400909423828, "loss": 0.6847, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.009625923819839954, "rewards/margins": 0.01777799427509308, "rewards/rejected": -0.008152070455253124, "step": 70 }, { "epoch": 0.33, "learning_rate": 4.946401225114854e-07, "logits/chosen": -2.4295055866241455, "logits/rejected": -2.37807035446167, "logps/chosen": -265.05718994140625, "logps/rejected": -218.77059936523438, "loss": 0.6795, "rewards/accuracies": 0.6015625, "rewards/chosen": 0.013735203072428703, "rewards/margins": 0.028081998229026794, "rewards/rejected": -0.014346795156598091, "step": 80 }, { "epoch": 0.37, "learning_rate": 4.869831546707504e-07, "logits/chosen": -2.4787497520446777, "logits/rejected": -2.422356128692627, "logps/chosen": -271.730224609375, "logps/rejected": -224.4839324951172, "loss": 0.6734, "rewards/accuracies": 0.640625, "rewards/chosen": 0.022287212312221527, "rewards/margins": 0.04716240242123604, "rewards/rejected": -0.02487519010901451, "step": 90 }, { "epoch": 0.41, "learning_rate": 4.793261868300153e-07, "logits/chosen": -2.417426586151123, "logits/rejected": -2.4039013385772705, "logps/chosen": -273.8762512207031, "logps/rejected": -227.797607421875, "loss": 0.6692, "rewards/accuracies": 0.659375011920929, "rewards/chosen": 0.025275733321905136, "rewards/margins": 0.057271964848041534, "rewards/rejected": -0.0319962315261364, "step": 100 }, { "epoch": 0.45, "learning_rate": 4.7166921898928023e-07, "logits/chosen": -2.4509871006011963, "logits/rejected": -2.3908462524414062, "logps/chosen": -252.6064453125, "logps/rejected": -222.64639282226562, "loss": 0.6628, "rewards/accuracies": 0.667187511920929, "rewards/chosen": 0.024279529228806496, "rewards/margins": 0.06397499889135361, "rewards/rejected": -0.03969546779990196, "step": 110 }, { "epoch": 0.5, "learning_rate": 4.640122511485451e-07, "logits/chosen": -2.412073850631714, "logits/rejected": -2.4017536640167236, "logps/chosen": -256.89813232421875, "logps/rejected": -219.8057098388672, "loss": 0.6581, "rewards/accuracies": 0.625, "rewards/chosen": 0.029207896441221237, "rewards/margins": 0.07588861137628555, "rewards/rejected": -0.04668071120977402, "step": 120 }, { "epoch": 0.54, "learning_rate": 4.563552833078101e-07, "logits/chosen": -2.4562458992004395, "logits/rejected": -2.3951973915100098, "logps/chosen": -265.9952697753906, "logps/rejected": -225.94125366210938, "loss": 0.6504, "rewards/accuracies": 0.65625, "rewards/chosen": 0.025836100801825523, "rewards/margins": 0.09206128865480423, "rewards/rejected": -0.06622518599033356, "step": 130 }, { "epoch": 0.58, "learning_rate": 4.4869831546707505e-07, "logits/chosen": -2.460266590118408, "logits/rejected": -2.401520013809204, "logps/chosen": -270.29888916015625, "logps/rejected": -230.37539672851562, "loss": 0.6495, "rewards/accuracies": 0.671875, "rewards/chosen": 0.03395112603902817, "rewards/margins": 0.11508414894342422, "rewards/rejected": -0.08113302290439606, "step": 140 }, { "epoch": 0.62, "learning_rate": 4.4104134762633994e-07, "logits/chosen": -2.4852752685546875, "logits/rejected": -2.4318604469299316, "logps/chosen": -265.973388671875, "logps/rejected": -226.55484008789062, "loss": 0.6433, "rewards/accuracies": 0.692187488079071, "rewards/chosen": 0.03317371755838394, "rewards/margins": 0.12171275913715363, "rewards/rejected": -0.08853904157876968, "step": 150 }, { "epoch": 0.66, "learning_rate": 4.333843797856049e-07, "logits/chosen": -2.4467227458953857, "logits/rejected": -2.4029695987701416, "logps/chosen": -279.33648681640625, "logps/rejected": -239.00009155273438, "loss": 0.6343, "rewards/accuracies": 0.6796875, "rewards/chosen": 0.04138711839914322, "rewards/margins": 0.1465190351009369, "rewards/rejected": -0.10513193905353546, "step": 160 }, { "epoch": 0.7, "learning_rate": 4.257274119448698e-07, "logits/chosen": -2.4798355102539062, "logits/rejected": -2.452397108078003, "logps/chosen": -270.727783203125, "logps/rejected": -239.09780883789062, "loss": 0.6335, "rewards/accuracies": 0.653124988079071, "rewards/chosen": 0.024355659261345863, "rewards/margins": 0.1352422684431076, "rewards/rejected": -0.11088661849498749, "step": 170 }, { "epoch": 0.74, "learning_rate": 4.180704441041347e-07, "logits/chosen": -2.4359121322631836, "logits/rejected": -2.388683795928955, "logps/chosen": -256.79022216796875, "logps/rejected": -226.1436767578125, "loss": 0.6367, "rewards/accuracies": 0.6640625, "rewards/chosen": 0.016556020826101303, "rewards/margins": 0.147763192653656, "rewards/rejected": -0.1312071532011032, "step": 180 }, { "epoch": 0.78, "learning_rate": 4.1041347626339966e-07, "logits/chosen": -2.4478251934051514, "logits/rejected": -2.4065427780151367, "logps/chosen": -261.63702392578125, "logps/rejected": -213.1779327392578, "loss": 0.6269, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.02061801217496395, "rewards/margins": 0.17272573709487915, "rewards/rejected": -0.15210774540901184, "step": 190 }, { "epoch": 0.83, "learning_rate": 4.027565084226646e-07, "logits/chosen": -2.4714255332946777, "logits/rejected": -2.414602279663086, "logps/chosen": -262.29486083984375, "logps/rejected": -218.0116424560547, "loss": 0.6175, "rewards/accuracies": 0.6890624761581421, "rewards/chosen": 0.027450546622276306, "rewards/margins": 0.19447624683380127, "rewards/rejected": -0.16702571511268616, "step": 200 }, { "epoch": 0.87, "learning_rate": 3.9509954058192954e-07, "logits/chosen": -2.4752840995788574, "logits/rejected": -2.4354655742645264, "logps/chosen": -283.89959716796875, "logps/rejected": -231.7078399658203, "loss": 0.6161, "rewards/accuracies": 0.653124988079071, "rewards/chosen": 0.024292152374982834, "rewards/margins": 0.21237091720104218, "rewards/rejected": -0.18807876110076904, "step": 210 }, { "epoch": 0.91, "learning_rate": 3.874425727411945e-07, "logits/chosen": -2.422091007232666, "logits/rejected": -2.40881609916687, "logps/chosen": -276.7785339355469, "logps/rejected": -229.2734832763672, "loss": 0.6144, "rewards/accuracies": 0.660937488079071, "rewards/chosen": 0.017709506675601006, "rewards/margins": 0.21948948502540588, "rewards/rejected": -0.20178000628948212, "step": 220 }, { "epoch": 0.95, "learning_rate": 3.797856049004594e-07, "logits/chosen": -2.4441866874694824, "logits/rejected": -2.38869571685791, "logps/chosen": -264.10430908203125, "logps/rejected": -228.3271484375, "loss": 0.6117, "rewards/accuracies": 0.6875, "rewards/chosen": 0.029474353417754173, "rewards/margins": 0.22035422921180725, "rewards/rejected": -0.19087985157966614, "step": 230 }, { "epoch": 0.99, "learning_rate": 3.7212863705972436e-07, "logits/chosen": -2.4633097648620605, "logits/rejected": -2.4186224937438965, "logps/chosen": -271.4654235839844, "logps/rejected": -222.46841430664062, "loss": 0.6125, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.024074096232652664, "rewards/margins": 0.2165375053882599, "rewards/rejected": -0.19246339797973633, "step": 240 }, { "epoch": 1.0, "eval_logits/chosen": -2.144517421722412, "eval_logits/rejected": -2.0242087841033936, "eval_logps/chosen": -264.5997619628906, "eval_logps/rejected": -221.6983184814453, "eval_loss": 0.6057174205780029, "eval_rewards/accuracies": 0.6759999990463257, "eval_rewards/chosen": 0.007874858565628529, "eval_rewards/margins": 0.24786852300167084, "eval_rewards/rejected": -0.23999367654323578, "eval_runtime": 238.8705, "eval_samples_per_second": 8.373, "eval_steps_per_second": 0.523, "step": 242 }, { "epoch": 1.03, "learning_rate": 3.6447166921898925e-07, "logits/chosen": -2.400252103805542, "logits/rejected": -2.3472890853881836, "logps/chosen": -257.4571838378906, "logps/rejected": -210.4391326904297, "loss": 0.6164, "rewards/accuracies": 0.6734374761581421, "rewards/chosen": 0.004229591693729162, "rewards/margins": 0.22420725226402283, "rewards/rejected": -0.21997769176959991, "step": 250 }, { "epoch": 1.07, "learning_rate": 3.568147013782542e-07, "logits/chosen": -2.415523052215576, "logits/rejected": -2.3758111000061035, "logps/chosen": -261.9351501464844, "logps/rejected": -226.16259765625, "loss": 0.6006, "rewards/accuracies": 0.7265625, "rewards/chosen": 0.023142099380493164, "rewards/margins": 0.2655286490917206, "rewards/rejected": -0.24238653481006622, "step": 260 }, { "epoch": 1.12, "learning_rate": 3.4915773353751913e-07, "logits/chosen": -2.429934024810791, "logits/rejected": -2.365861415863037, "logps/chosen": -278.4029846191406, "logps/rejected": -236.08688354492188, "loss": 0.5925, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": 0.02966948226094246, "rewards/margins": 0.33913469314575195, "rewards/rejected": -0.3094651699066162, "step": 270 }, { "epoch": 1.16, "learning_rate": 3.41500765696784e-07, "logits/chosen": -2.4358582496643066, "logits/rejected": -2.396267890930176, "logps/chosen": -251.093017578125, "logps/rejected": -225.80685424804688, "loss": 0.6036, "rewards/accuracies": 0.692187488079071, "rewards/chosen": 0.010072538629174232, "rewards/margins": 0.24589493870735168, "rewards/rejected": -0.2358224093914032, "step": 280 }, { "epoch": 1.2, "learning_rate": 3.33843797856049e-07, "logits/chosen": -2.408804416656494, "logits/rejected": -2.394888401031494, "logps/chosen": -283.15380859375, "logps/rejected": -228.33767700195312, "loss": 0.5915, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.008710218593478203, "rewards/margins": 0.3084966242313385, "rewards/rejected": -0.29978638887405396, "step": 290 }, { "epoch": 1.24, "learning_rate": 3.2618683001531396e-07, "logits/chosen": -2.4084572792053223, "logits/rejected": -2.337435722351074, "logps/chosen": -261.3924865722656, "logps/rejected": -227.77651977539062, "loss": 0.588, "rewards/accuracies": 0.698437511920929, "rewards/chosen": 0.008477389812469482, "rewards/margins": 0.298746258020401, "rewards/rejected": -0.29026883840560913, "step": 300 }, { "epoch": 1.28, "learning_rate": 3.1852986217457885e-07, "logits/chosen": -2.4575297832489014, "logits/rejected": -2.373924493789673, "logps/chosen": -261.287109375, "logps/rejected": -228.5553741455078, "loss": 0.5972, "rewards/accuracies": 0.6953125, "rewards/chosen": 0.002108477521687746, "rewards/margins": 0.2948620915412903, "rewards/rejected": -0.2927536368370056, "step": 310 }, { "epoch": 1.32, "learning_rate": 3.108728943338438e-07, "logits/chosen": -2.4443328380584717, "logits/rejected": -2.4351658821105957, "logps/chosen": -252.80996704101562, "logps/rejected": -237.87631225585938, "loss": 0.5943, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.015406561084091663, "rewards/margins": 0.2515925168991089, "rewards/rejected": -0.2669990658760071, "step": 320 }, { "epoch": 1.36, "learning_rate": 3.0321592649310873e-07, "logits/chosen": -2.424647092819214, "logits/rejected": -2.357273578643799, "logps/chosen": -253.7325897216797, "logps/rejected": -224.3144073486328, "loss": 0.587, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": -0.0076437839306890965, "rewards/margins": 0.2989902198314667, "rewards/rejected": -0.30663400888442993, "step": 330 }, { "epoch": 1.4, "learning_rate": 2.955589586523736e-07, "logits/chosen": -2.4427545070648193, "logits/rejected": -2.3824856281280518, "logps/chosen": -265.68939208984375, "logps/rejected": -226.4335174560547, "loss": 0.592, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.015530401840806007, "rewards/margins": 0.3260301351547241, "rewards/rejected": -0.3415605425834656, "step": 340 }, { "epoch": 1.45, "learning_rate": 2.8790199081163856e-07, "logits/chosen": -2.439944267272949, "logits/rejected": -2.3695976734161377, "logps/chosen": -266.065673828125, "logps/rejected": -225.2880859375, "loss": 0.5939, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -0.026788845658302307, "rewards/margins": 0.28384846448898315, "rewards/rejected": -0.31063732504844666, "step": 350 }, { "epoch": 1.49, "learning_rate": 2.802450229709035e-07, "logits/chosen": -2.399728298187256, "logits/rejected": -2.3489761352539062, "logps/chosen": -254.9022216796875, "logps/rejected": -213.33193969726562, "loss": 0.5847, "rewards/accuracies": 0.667187511920929, "rewards/chosen": -0.04024948924779892, "rewards/margins": 0.29891303181648254, "rewards/rejected": -0.33916252851486206, "step": 360 }, { "epoch": 1.53, "learning_rate": 2.725880551301684e-07, "logits/chosen": -2.462254047393799, "logits/rejected": -2.406602621078491, "logps/chosen": -274.6975402832031, "logps/rejected": -232.84591674804688, "loss": 0.5929, "rewards/accuracies": 0.6875, "rewards/chosen": -0.02199350856244564, "rewards/margins": 0.31067317724227905, "rewards/rejected": -0.33266669511795044, "step": 370 }, { "epoch": 1.57, "learning_rate": 2.649310872894334e-07, "logits/chosen": -2.4482955932617188, "logits/rejected": -2.4154446125030518, "logps/chosen": -275.00775146484375, "logps/rejected": -223.1331787109375, "loss": 0.5816, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.020305102691054344, "rewards/margins": 0.37037259340286255, "rewards/rejected": -0.3906777501106262, "step": 380 }, { "epoch": 1.61, "learning_rate": 2.572741194486983e-07, "logits/chosen": -2.448878765106201, "logits/rejected": -2.393206834793091, "logps/chosen": -273.81109619140625, "logps/rejected": -208.37985229492188, "loss": 0.5799, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.029856573790311813, "rewards/margins": 0.3645634055137634, "rewards/rejected": -0.3944200277328491, "step": 390 }, { "epoch": 1.65, "learning_rate": 2.496171516079632e-07, "logits/chosen": -2.4658501148223877, "logits/rejected": -2.399857521057129, "logps/chosen": -293.2225341796875, "logps/rejected": -239.4982452392578, "loss": 0.5813, "rewards/accuracies": 0.734375, "rewards/chosen": 0.0061371102929115295, "rewards/margins": 0.399463027715683, "rewards/rejected": -0.39332595467567444, "step": 400 }, { "epoch": 1.69, "learning_rate": 2.4196018376722816e-07, "logits/chosen": -2.429685115814209, "logits/rejected": -2.4006247520446777, "logps/chosen": -278.5813903808594, "logps/rejected": -228.4702911376953, "loss": 0.5864, "rewards/accuracies": 0.6859375238418579, "rewards/chosen": -0.02374974638223648, "rewards/margins": 0.37017589807510376, "rewards/rejected": -0.39392566680908203, "step": 410 }, { "epoch": 1.74, "learning_rate": 2.343032159264931e-07, "logits/chosen": -2.403900146484375, "logits/rejected": -2.3333194255828857, "logps/chosen": -268.872802734375, "logps/rejected": -224.37728881835938, "loss": 0.579, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.020599449053406715, "rewards/margins": 0.431951105594635, "rewards/rejected": -0.4525505602359772, "step": 420 }, { "epoch": 1.78, "learning_rate": 2.26646248085758e-07, "logits/chosen": -2.383470058441162, "logits/rejected": -2.3353710174560547, "logps/chosen": -259.7237854003906, "logps/rejected": -217.79946899414062, "loss": 0.573, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.0417955107986927, "rewards/margins": 0.39140504598617554, "rewards/rejected": -0.43320053815841675, "step": 430 }, { "epoch": 1.82, "learning_rate": 2.1898928024502298e-07, "logits/chosen": -2.4446728229522705, "logits/rejected": -2.3874154090881348, "logps/chosen": -263.4950256347656, "logps/rejected": -221.4724578857422, "loss": 0.5753, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.02173582836985588, "rewards/margins": 0.39518997073173523, "rewards/rejected": -0.4169258177280426, "step": 440 }, { "epoch": 1.86, "learning_rate": 2.113323124042879e-07, "logits/chosen": -2.4275262355804443, "logits/rejected": -2.3907971382141113, "logps/chosen": -271.2684326171875, "logps/rejected": -231.44381713867188, "loss": 0.5744, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.020908143371343613, "rewards/margins": 0.41243448853492737, "rewards/rejected": -0.4333426058292389, "step": 450 }, { "epoch": 1.9, "learning_rate": 2.036753445635528e-07, "logits/chosen": -2.4363036155700684, "logits/rejected": -2.4147400856018066, "logps/chosen": -284.01824951171875, "logps/rejected": -238.273681640625, "loss": 0.569, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.018130071461200714, "rewards/margins": 0.4541456699371338, "rewards/rejected": -0.4722757339477539, "step": 460 }, { "epoch": 1.94, "learning_rate": 1.9601837672281775e-07, "logits/chosen": -2.4180634021759033, "logits/rejected": -2.3854622840881348, "logps/chosen": -270.1515808105469, "logps/rejected": -236.3723907470703, "loss": 0.576, "rewards/accuracies": 0.7046874761581421, "rewards/chosen": -0.05924994498491287, "rewards/margins": 0.37609511613845825, "rewards/rejected": -0.4353450834751129, "step": 470 }, { "epoch": 1.98, "learning_rate": 1.883614088820827e-07, "logits/chosen": -2.4381699562072754, "logits/rejected": -2.391515016555786, "logps/chosen": -268.735595703125, "logps/rejected": -224.8667755126953, "loss": 0.5849, "rewards/accuracies": 0.723437488079071, "rewards/chosen": -0.04678649455308914, "rewards/margins": 0.3817201852798462, "rewards/rejected": -0.42850667238235474, "step": 480 }, { "epoch": 2.0, "eval_logits/chosen": -2.127939224243164, "eval_logits/rejected": -2.007131576538086, "eval_logps/chosen": -265.25634765625, "eval_logps/rejected": -224.01229858398438, "eval_loss": 0.5730655789375305, "eval_rewards/accuracies": 0.6899999976158142, "eval_rewards/chosen": -0.05778134614229202, "eval_rewards/margins": 0.4136123061180115, "eval_rewards/rejected": -0.4713936746120453, "eval_runtime": 239.033, "eval_samples_per_second": 8.367, "eval_steps_per_second": 0.523, "step": 484 }, { "epoch": 2.02, "learning_rate": 1.807044410413476e-07, "logits/chosen": -2.4097964763641357, "logits/rejected": -2.3763108253479004, "logps/chosen": -257.9292297363281, "logps/rejected": -236.3641815185547, "loss": 0.5772, "rewards/accuracies": 0.703125, "rewards/chosen": -0.05177872255444527, "rewards/margins": 0.39789050817489624, "rewards/rejected": -0.4496693015098572, "step": 490 }, { "epoch": 2.07, "learning_rate": 1.7304747320061255e-07, "logits/chosen": -2.4072229862213135, "logits/rejected": -2.4033942222595215, "logps/chosen": -263.5710754394531, "logps/rejected": -230.6610107421875, "loss": 0.5772, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.019381705671548843, "rewards/margins": 0.4000469744205475, "rewards/rejected": -0.41942867636680603, "step": 500 }, { "epoch": 2.11, "learning_rate": 1.6539050535987747e-07, "logits/chosen": -2.4798319339752197, "logits/rejected": -2.370913028717041, "logps/chosen": -270.12432861328125, "logps/rejected": -225.058349609375, "loss": 0.5712, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.039138875901699066, "rewards/margins": 0.4365014135837555, "rewards/rejected": -0.47564029693603516, "step": 510 }, { "epoch": 2.15, "learning_rate": 1.5773353751914243e-07, "logits/chosen": -2.4861385822296143, "logits/rejected": -2.425265312194824, "logps/chosen": -284.8677673339844, "logps/rejected": -229.98681640625, "loss": 0.5715, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.024127285927534103, "rewards/margins": 0.4679562449455261, "rewards/rejected": -0.4920835494995117, "step": 520 }, { "epoch": 2.19, "learning_rate": 1.5007656967840735e-07, "logits/chosen": -2.383533000946045, "logits/rejected": -2.3430206775665283, "logps/chosen": -254.0509796142578, "logps/rejected": -230.5810089111328, "loss": 0.5677, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.039885733276605606, "rewards/margins": 0.4442899823188782, "rewards/rejected": -0.4841756820678711, "step": 530 }, { "epoch": 2.23, "learning_rate": 1.4241960183767226e-07, "logits/chosen": -2.4291586875915527, "logits/rejected": -2.372559070587158, "logps/chosen": -282.87982177734375, "logps/rejected": -235.8987274169922, "loss": 0.573, "rewards/accuracies": 0.692187488079071, "rewards/chosen": -0.05947133153676987, "rewards/margins": 0.41908422112464905, "rewards/rejected": -0.4785555303096771, "step": 540 }, { "epoch": 2.27, "learning_rate": 1.347626339969372e-07, "logits/chosen": -2.423152208328247, "logits/rejected": -2.3877062797546387, "logps/chosen": -270.82269287109375, "logps/rejected": -242.1062469482422, "loss": 0.5759, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.01524378638714552, "rewards/margins": 0.40917444229125977, "rewards/rejected": -0.42441821098327637, "step": 550 }, { "epoch": 2.31, "learning_rate": 1.2710566615620215e-07, "logits/chosen": -2.3735625743865967, "logits/rejected": -2.327951431274414, "logps/chosen": -274.332763671875, "logps/rejected": -225.1637420654297, "loss": 0.5594, "rewards/accuracies": 0.707812488079071, "rewards/chosen": -0.03534569963812828, "rewards/margins": 0.4445571005344391, "rewards/rejected": -0.47990283370018005, "step": 560 }, { "epoch": 2.35, "learning_rate": 1.1944869831546706e-07, "logits/chosen": -2.3997702598571777, "logits/rejected": -2.3793346881866455, "logps/chosen": -267.025390625, "logps/rejected": -238.75692749023438, "loss": 0.5724, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.07640588283538818, "rewards/margins": 0.4082149565219879, "rewards/rejected": -0.4846208095550537, "step": 570 }, { "epoch": 2.4, "learning_rate": 1.11791730474732e-07, "logits/chosen": -2.4132089614868164, "logits/rejected": -2.3745548725128174, "logps/chosen": -262.74658203125, "logps/rejected": -226.48898315429688, "loss": 0.5658, "rewards/accuracies": 0.7359374761581421, "rewards/chosen": -0.07082664221525192, "rewards/margins": 0.48243194818496704, "rewards/rejected": -0.5532585382461548, "step": 580 }, { "epoch": 2.44, "learning_rate": 1.0413476263399694e-07, "logits/chosen": -2.451371669769287, "logits/rejected": -2.407169818878174, "logps/chosen": -269.4725646972656, "logps/rejected": -219.11929321289062, "loss": 0.5664, "rewards/accuracies": 0.729687511920929, "rewards/chosen": -0.07148631662130356, "rewards/margins": 0.4374977946281433, "rewards/rejected": -0.5089840888977051, "step": 590 }, { "epoch": 2.48, "learning_rate": 9.647779479326186e-08, "logits/chosen": -2.3929615020751953, "logits/rejected": -2.3882527351379395, "logps/chosen": -251.06576538085938, "logps/rejected": -224.4808807373047, "loss": 0.5768, "rewards/accuracies": 0.667187511920929, "rewards/chosen": -0.06799022853374481, "rewards/margins": 0.3539872467517853, "rewards/rejected": -0.42197751998901367, "step": 600 }, { "epoch": 2.52, "learning_rate": 8.88208269525268e-08, "logits/chosen": -2.3815102577209473, "logits/rejected": -2.3912739753723145, "logps/chosen": -260.7129821777344, "logps/rejected": -223.4461212158203, "loss": 0.566, "rewards/accuracies": 0.71875, "rewards/chosen": -0.06933742761611938, "rewards/margins": 0.41847410798072815, "rewards/rejected": -0.48781150579452515, "step": 610 }, { "epoch": 2.56, "learning_rate": 8.116385911179173e-08, "logits/chosen": -2.3711135387420654, "logits/rejected": -2.3626675605773926, "logps/chosen": -279.54461669921875, "logps/rejected": -219.48974609375, "loss": 0.5701, "rewards/accuracies": 0.734375, "rewards/chosen": -0.023505648598074913, "rewards/margins": 0.4627605378627777, "rewards/rejected": -0.4862661361694336, "step": 620 }, { "epoch": 2.6, "learning_rate": 7.350689127105667e-08, "logits/chosen": -2.4528985023498535, "logits/rejected": -2.3787388801574707, "logps/chosen": -272.67572021484375, "logps/rejected": -232.7178192138672, "loss": 0.5603, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0497988685965538, "rewards/margins": 0.46719294786453247, "rewards/rejected": -0.5169917941093445, "step": 630 }, { "epoch": 2.64, "learning_rate": 6.584992343032159e-08, "logits/chosen": -2.355626106262207, "logits/rejected": -2.342153549194336, "logps/chosen": -263.1979675292969, "logps/rejected": -229.1007843017578, "loss": 0.5752, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.04885732755064964, "rewards/margins": 0.42579683661460876, "rewards/rejected": -0.4746541380882263, "step": 640 }, { "epoch": 2.69, "learning_rate": 5.819295558958652e-08, "logits/chosen": -2.3994874954223633, "logits/rejected": -2.34912109375, "logps/chosen": -288.324462890625, "logps/rejected": -222.3997344970703, "loss": 0.5646, "rewards/accuracies": 0.7484375238418579, "rewards/chosen": -0.021456807851791382, "rewards/margins": 0.5244570374488831, "rewards/rejected": -0.545913815498352, "step": 650 }, { "epoch": 2.73, "learning_rate": 5.0535987748851455e-08, "logits/chosen": -2.4647653102874756, "logits/rejected": -2.4115538597106934, "logps/chosen": -275.77947998046875, "logps/rejected": -231.9734649658203, "loss": 0.5621, "rewards/accuracies": 0.7015625238418579, "rewards/chosen": -0.07127931714057922, "rewards/margins": 0.41370710730552673, "rewards/rejected": -0.48498645424842834, "step": 660 }, { "epoch": 2.77, "learning_rate": 4.287901990811638e-08, "logits/chosen": -2.4428927898406982, "logits/rejected": -2.3569588661193848, "logps/chosen": -268.5105285644531, "logps/rejected": -244.9532928466797, "loss": 0.5636, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.048072461038827896, "rewards/margins": 0.46722808480262756, "rewards/rejected": -0.5153006315231323, "step": 670 }, { "epoch": 2.81, "learning_rate": 3.522205206738132e-08, "logits/chosen": -2.4071362018585205, "logits/rejected": -2.3730602264404297, "logps/chosen": -275.3606872558594, "logps/rejected": -230.1616668701172, "loss": 0.5682, "rewards/accuracies": 0.71875, "rewards/chosen": -0.076107919216156, "rewards/margins": 0.44682103395462036, "rewards/rejected": -0.5229289531707764, "step": 680 }, { "epoch": 2.85, "learning_rate": 2.7565084226646246e-08, "logits/chosen": -2.3889846801757812, "logits/rejected": -2.376112937927246, "logps/chosen": -264.30804443359375, "logps/rejected": -236.71640014648438, "loss": 0.5747, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.04645932838320732, "rewards/margins": 0.4625559449195862, "rewards/rejected": -0.5090152621269226, "step": 690 }, { "epoch": 2.89, "learning_rate": 1.9908116385911178e-08, "logits/chosen": -2.4064643383026123, "logits/rejected": -2.388768434524536, "logps/chosen": -260.59393310546875, "logps/rejected": -227.6177215576172, "loss": 0.5718, "rewards/accuracies": 0.671875, "rewards/chosen": -0.09053535759449005, "rewards/margins": 0.3823908865451813, "rewards/rejected": -0.47292619943618774, "step": 700 }, { "epoch": 2.93, "learning_rate": 1.225114854517611e-08, "logits/chosen": -2.436859130859375, "logits/rejected": -2.3719522953033447, "logps/chosen": -280.87774658203125, "logps/rejected": -221.83944702148438, "loss": 0.5696, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.06765580177307129, "rewards/margins": 0.4343256950378418, "rewards/rejected": -0.5019814968109131, "step": 710 }, { "epoch": 2.97, "learning_rate": 4.594180704441042e-09, "logits/chosen": -2.4007372856140137, "logits/rejected": -2.378627300262451, "logps/chosen": -257.9851989746094, "logps/rejected": -225.0294189453125, "loss": 0.5671, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.053695209324359894, "rewards/margins": 0.4172247052192688, "rewards/rejected": -0.4709199070930481, "step": 720 }, { "epoch": 3.0, "eval_logits/chosen": -2.1214964389801025, "eval_logits/rejected": -2.000164031982422, "eval_logps/chosen": -265.42364501953125, "eval_logps/rejected": -224.6269989013672, "eval_loss": 0.5655443072319031, "eval_rewards/accuracies": 0.699999988079071, "eval_rewards/chosen": -0.07451467216014862, "eval_rewards/margins": 0.4583480656147003, "eval_rewards/rejected": -0.5328627228736877, "eval_runtime": 239.2829, "eval_samples_per_second": 8.358, "eval_steps_per_second": 0.522, "step": 726 }, { "epoch": 3.0, "step": 726, "total_flos": 0.0, "train_loss": 0.6037390495627379, "train_runtime": 33041.7964, "train_samples_per_second": 5.626, "train_steps_per_second": 0.022 } ], "logging_steps": 10, "max_steps": 726, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }