diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,12859 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9999296814570002, + "eval_steps": 500, + "global_step": 3555, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0014063708599957809, + "grad_norm": 23.875, + "learning_rate": 7.02247191011236e-08, + "log_odds_chosen": 0.8099881410598755, + "log_odds_ratio": -0.6670631170272827, + "logits/chosen": 0.35691261291503906, + "logits/rejected": 0.04136817157268524, + "logps/chosen": -2.435549259185791, + "logps/rejected": -3.192267656326294, + "loss": 3.9113, + "nll_loss": 3.307011127471924, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.24355490505695343, + "rewards/margins": 0.07567180693149567, + "rewards/rejected": -0.3192267417907715, + "step": 5 + }, + { + "epoch": 0.0028127417199915617, + "grad_norm": 16.5, + "learning_rate": 1.404494382022472e-07, + "log_odds_chosen": 0.09973736107349396, + "log_odds_ratio": -0.8319740295410156, + "logits/chosen": 0.4880150854587555, + "logits/rejected": 0.18690350651741028, + "logps/chosen": -2.3327722549438477, + "logps/rejected": -2.4167592525482178, + "loss": 3.9321, + "nll_loss": 3.0211567878723145, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23327720165252686, + "rewards/margins": 0.008398734033107758, + "rewards/rejected": -0.2416759431362152, + "step": 10 + }, + { + "epoch": 0.0042191125799873426, + "grad_norm": 28.375, + "learning_rate": 2.106741573033708e-07, + "log_odds_chosen": 0.46809667348861694, + "log_odds_ratio": -1.068535566329956, + "logits/chosen": 0.11707712709903717, + "logits/rejected": -0.04653818532824516, + "logps/chosen": -3.06805157661438, + "logps/rejected": -3.482112407684326, + "loss": 4.1233, + "nll_loss": 4.485260486602783, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.30680516362190247, + "rewards/margins": 0.04140608757734299, + "rewards/rejected": -0.34821125864982605, + "step": 15 + }, + { + "epoch": 0.005625483439983123, + "grad_norm": 26.25, + "learning_rate": 2.808988764044944e-07, + "log_odds_chosen": 0.4251781404018402, + "log_odds_ratio": -0.8865317106246948, + "logits/chosen": 0.14278806746006012, + "logits/rejected": -0.1758263260126114, + "logps/chosen": -2.5326249599456787, + "logps/rejected": -2.925166606903076, + "loss": 3.9756, + "nll_loss": 3.745863437652588, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2532625198364258, + "rewards/margins": 0.03925413638353348, + "rewards/rejected": -0.29251664876937866, + "step": 20 + }, + { + "epoch": 0.007031854299978904, + "grad_norm": 34.75, + "learning_rate": 3.5112359550561806e-07, + "log_odds_chosen": -0.24400389194488525, + "log_odds_ratio": -1.3111430406570435, + "logits/chosen": 0.28750285506248474, + "logits/rejected": 0.3935859799385071, + "logps/chosen": -2.9908688068389893, + "logps/rejected": -2.76876163482666, + "loss": 3.5984, + "nll_loss": 3.786576509475708, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.29908689856529236, + "rewards/margins": -0.022210732102394104, + "rewards/rejected": -0.27687615156173706, + "step": 25 + }, + { + "epoch": 0.008438225159974685, + "grad_norm": 30.75, + "learning_rate": 4.213483146067416e-07, + "log_odds_chosen": -0.18453823029994965, + "log_odds_ratio": -1.4048383235931396, + "logits/chosen": 0.19974537193775177, + "logits/rejected": 0.11300679296255112, + "logps/chosen": -2.9378466606140137, + "logps/rejected": -2.737946033477783, + "loss": 3.9317, + "nll_loss": 4.119207859039307, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.2937846779823303, + "rewards/margins": -0.01999005302786827, + "rewards/rejected": -0.27379459142684937, + "step": 30 + }, + { + "epoch": 0.009844596019970467, + "grad_norm": 14.8125, + "learning_rate": 4.915730337078652e-07, + "log_odds_chosen": 0.26785966753959656, + "log_odds_ratio": -0.9891396760940552, + "logits/chosen": 0.1649080067873001, + "logits/rejected": 0.061838340014219284, + "logps/chosen": -3.313523054122925, + "logps/rejected": -3.5287063121795654, + "loss": 3.7722, + "nll_loss": 4.149771690368652, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3313523232936859, + "rewards/margins": 0.021518340334296227, + "rewards/rejected": -0.3528706431388855, + "step": 35 + }, + { + "epoch": 0.011250966879966247, + "grad_norm": 12.5, + "learning_rate": 5.617977528089888e-07, + "log_odds_chosen": 0.4358310103416443, + "log_odds_ratio": -1.1050533056259155, + "logits/chosen": 0.384712278842926, + "logits/rejected": 0.3466298282146454, + "logps/chosen": -2.482825517654419, + "logps/rejected": -2.926034927368164, + "loss": 3.6796, + "nll_loss": 3.56708025932312, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.2482825517654419, + "rewards/margins": 0.04432091861963272, + "rewards/rejected": -0.292603462934494, + "step": 40 + }, + { + "epoch": 0.012657337739962029, + "grad_norm": 11.125, + "learning_rate": 6.320224719101125e-07, + "log_odds_chosen": 0.1292145997285843, + "log_odds_ratio": -1.3003278970718384, + "logits/chosen": 0.37478917837142944, + "logits/rejected": 0.2303522825241089, + "logps/chosen": -3.015897750854492, + "logps/rejected": -3.129483699798584, + "loss": 3.6349, + "nll_loss": 3.1559722423553467, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.3015897870063782, + "rewards/margins": 0.011358583346009254, + "rewards/rejected": -0.31294840574264526, + "step": 45 + }, + { + "epoch": 0.014063708599957809, + "grad_norm": 13.125, + "learning_rate": 7.022471910112361e-07, + "log_odds_chosen": 0.9067404866218567, + "log_odds_ratio": -0.7627468109130859, + "logits/chosen": 0.26700717210769653, + "logits/rejected": 0.2883050739765167, + "logps/chosen": -2.3781630992889404, + "logps/rejected": -3.290259838104248, + "loss": 3.5748, + "nll_loss": 3.8330466747283936, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.23781633377075195, + "rewards/margins": 0.09120965003967285, + "rewards/rejected": -0.3290259838104248, + "step": 50 + }, + { + "epoch": 0.01547007945995359, + "grad_norm": 20.375, + "learning_rate": 7.724719101123595e-07, + "log_odds_chosen": 0.7076248526573181, + "log_odds_ratio": -1.5457459688186646, + "logits/chosen": 0.2677770256996155, + "logits/rejected": -0.002324029803276062, + "logps/chosen": -2.7843823432922363, + "logps/rejected": -3.423976182937622, + "loss": 3.7694, + "nll_loss": 3.2589526176452637, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2784382402896881, + "rewards/margins": 0.06395940482616425, + "rewards/rejected": -0.34239763021469116, + "step": 55 + }, + { + "epoch": 0.01687645031994937, + "grad_norm": 11.5, + "learning_rate": 8.426966292134832e-07, + "log_odds_chosen": 0.19437791407108307, + "log_odds_ratio": -0.8209785223007202, + "logits/chosen": 0.42011842131614685, + "logits/rejected": 0.30973678827285767, + "logps/chosen": -1.9150886535644531, + "logps/rejected": -2.0626280307769775, + "loss": 3.3113, + "nll_loss": 2.7434136867523193, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.19150885939598083, + "rewards/margins": 0.014753949828445911, + "rewards/rejected": -0.20626279711723328, + "step": 60 + }, + { + "epoch": 0.01828282117994515, + "grad_norm": 8.4375, + "learning_rate": 9.129213483146068e-07, + "log_odds_chosen": -0.5769067406654358, + "log_odds_ratio": -1.397226095199585, + "logits/chosen": 0.33570200204849243, + "logits/rejected": 0.24612624943256378, + "logps/chosen": -2.5165371894836426, + "logps/rejected": -1.963801383972168, + "loss": 3.5497, + "nll_loss": 3.5812149047851562, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.2516537308692932, + "rewards/margins": -0.055273573845624924, + "rewards/rejected": -0.1963801383972168, + "step": 65 + }, + { + "epoch": 0.019689192039940934, + "grad_norm": 12.25, + "learning_rate": 9.831460674157304e-07, + "log_odds_chosen": 0.38934630155563354, + "log_odds_ratio": -0.7580283880233765, + "logits/chosen": 0.6683182716369629, + "logits/rejected": 0.12224721908569336, + "logps/chosen": -1.6888742446899414, + "logps/rejected": -1.999748945236206, + "loss": 2.8879, + "nll_loss": 2.2645983695983887, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1688874214887619, + "rewards/margins": 0.031087476760149002, + "rewards/rejected": -0.1999748945236206, + "step": 70 + }, + { + "epoch": 0.021095562899936714, + "grad_norm": 16.125, + "learning_rate": 1.053370786516854e-06, + "log_odds_chosen": 0.6304991841316223, + "log_odds_ratio": -0.6832792162895203, + "logits/chosen": 0.39566153287887573, + "logits/rejected": 0.08898230642080307, + "logps/chosen": -2.1146538257598877, + "logps/rejected": -2.7325990200042725, + "loss": 2.8228, + "nll_loss": 2.631037473678589, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.21146538853645325, + "rewards/margins": 0.06179451197385788, + "rewards/rejected": -0.2732599079608917, + "step": 75 + }, + { + "epoch": 0.022501933759932494, + "grad_norm": 8.0, + "learning_rate": 1.1235955056179777e-06, + "log_odds_chosen": -0.3980503976345062, + "log_odds_ratio": -1.2211532592773438, + "logits/chosen": 0.26113444566726685, + "logits/rejected": 0.28973740339279175, + "logps/chosen": -2.7230300903320312, + "logps/rejected": -2.336246967315674, + "loss": 2.7579, + "nll_loss": 2.8366832733154297, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.2723030149936676, + "rewards/margins": -0.03867829591035843, + "rewards/rejected": -0.23362469673156738, + "step": 80 + }, + { + "epoch": 0.023908304619928274, + "grad_norm": 13.4375, + "learning_rate": 1.1938202247191013e-06, + "log_odds_chosen": 0.149004727602005, + "log_odds_ratio": -0.8590810894966125, + "logits/chosen": 0.5030871629714966, + "logits/rejected": 0.10671982914209366, + "logps/chosen": -1.9744809865951538, + "logps/rejected": -2.104811191558838, + "loss": 2.6968, + "nll_loss": 2.141251802444458, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.19744810461997986, + "rewards/margins": 0.0130330054089427, + "rewards/rejected": -0.21048113703727722, + "step": 85 + }, + { + "epoch": 0.025314675479924057, + "grad_norm": 11.875, + "learning_rate": 1.264044943820225e-06, + "log_odds_chosen": 0.8776494860649109, + "log_odds_ratio": -0.7410035133361816, + "logits/chosen": 0.47830313444137573, + "logits/rejected": -0.01859574392437935, + "logps/chosen": -2.096220016479492, + "logps/rejected": -2.9262399673461914, + "loss": 2.5261, + "nll_loss": 2.406276226043701, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20962199568748474, + "rewards/margins": 0.083002008497715, + "rewards/rejected": -0.29262399673461914, + "step": 90 + }, + { + "epoch": 0.026721046339919837, + "grad_norm": 7.9375, + "learning_rate": 1.3342696629213484e-06, + "log_odds_chosen": 0.411318302154541, + "log_odds_ratio": -0.9822152256965637, + "logits/chosen": 0.041368693113327026, + "logits/rejected": 0.16315683722496033, + "logps/chosen": -2.8642966747283936, + "logps/rejected": -3.3535690307617188, + "loss": 2.4978, + "nll_loss": 2.6924140453338623, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.28642967343330383, + "rewards/margins": 0.04892724007368088, + "rewards/rejected": -0.3353568911552429, + "step": 95 + }, + { + "epoch": 0.028127417199915617, + "grad_norm": 3.90625, + "learning_rate": 1.4044943820224722e-06, + "log_odds_chosen": 0.7318227291107178, + "log_odds_ratio": -0.6240326762199402, + "logits/chosen": 0.21956415474414825, + "logits/rejected": -0.1278270184993744, + "logps/chosen": -2.337470531463623, + "logps/rejected": -3.0502095222473145, + "loss": 2.3359, + "nll_loss": 2.661849021911621, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23374707996845245, + "rewards/margins": 0.07127388566732407, + "rewards/rejected": -0.3050209581851959, + "step": 100 + }, + { + "epoch": 0.029533788059911397, + "grad_norm": 6.71875, + "learning_rate": 1.4747191011235956e-06, + "log_odds_chosen": 1.1011137962341309, + "log_odds_ratio": -0.7636040449142456, + "logits/chosen": 0.2855343818664551, + "logits/rejected": 0.18099990487098694, + "logps/chosen": -2.057398557662964, + "logps/rejected": -3.087165355682373, + "loss": 2.2776, + "nll_loss": 2.0564818382263184, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.20573988556861877, + "rewards/margins": 0.10297667980194092, + "rewards/rejected": -0.3087165653705597, + "step": 105 + }, + { + "epoch": 0.03094015891990718, + "grad_norm": 2.640625, + "learning_rate": 1.544943820224719e-06, + "log_odds_chosen": -0.43961867690086365, + "log_odds_ratio": -1.1604411602020264, + "logits/chosen": 0.11637775599956512, + "logits/rejected": 0.05946706607937813, + "logps/chosen": -2.9125075340270996, + "logps/rejected": -2.49198579788208, + "loss": 2.4607, + "nll_loss": 2.7069311141967773, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.2912507653236389, + "rewards/margins": -0.04205216467380524, + "rewards/rejected": -0.24919860064983368, + "step": 110 + }, + { + "epoch": 0.03234652977990296, + "grad_norm": 2.328125, + "learning_rate": 1.615168539325843e-06, + "log_odds_chosen": -0.7555695176124573, + "log_odds_ratio": -1.3248316049575806, + "logits/chosen": 0.09455225616693497, + "logits/rejected": 0.3622409999370575, + "logps/chosen": -2.631551742553711, + "logps/rejected": -1.9142179489135742, + "loss": 2.3807, + "nll_loss": 2.1373257637023926, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.2631551921367645, + "rewards/margins": -0.07173338532447815, + "rewards/rejected": -0.19142180681228638, + "step": 115 + }, + { + "epoch": 0.03375290063989874, + "grad_norm": 2.296875, + "learning_rate": 1.6853932584269663e-06, + "log_odds_chosen": -0.5937722325325012, + "log_odds_ratio": -1.3305103778839111, + "logits/chosen": 0.17280586063861847, + "logits/rejected": 0.17354366183280945, + "logps/chosen": -2.77569317817688, + "logps/rejected": -2.1679203510284424, + "loss": 2.2381, + "nll_loss": 2.0504109859466553, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.2775692939758301, + "rewards/margins": -0.06077728420495987, + "rewards/rejected": -0.2167920172214508, + "step": 120 + }, + { + "epoch": 0.03515927149989452, + "grad_norm": 2.90625, + "learning_rate": 1.7556179775280902e-06, + "log_odds_chosen": -0.6599341034889221, + "log_odds_ratio": -1.344637155532837, + "logits/chosen": 0.26881319284439087, + "logits/rejected": 0.23464787006378174, + "logps/chosen": -2.4841132164001465, + "logps/rejected": -1.8618295192718506, + "loss": 2.2696, + "nll_loss": 2.0306456089019775, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.24841132760047913, + "rewards/margins": -0.06222837418317795, + "rewards/rejected": -0.18618297576904297, + "step": 125 + }, + { + "epoch": 0.0365656423598903, + "grad_norm": 2.25, + "learning_rate": 1.8258426966292136e-06, + "log_odds_chosen": 1.4656527042388916, + "log_odds_ratio": -0.699799656867981, + "logits/chosen": 0.46662625670433044, + "logits/rejected": -0.05031327158212662, + "logps/chosen": -1.8286659717559814, + "logps/rejected": -3.2496142387390137, + "loss": 2.2419, + "nll_loss": 2.064641237258911, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18286658823490143, + "rewards/margins": 0.14209482073783875, + "rewards/rejected": -0.32496142387390137, + "step": 130 + }, + { + "epoch": 0.03797201321988609, + "grad_norm": 2.0, + "learning_rate": 1.8960674157303372e-06, + "log_odds_chosen": -0.281602144241333, + "log_odds_ratio": -1.1575530767440796, + "logits/chosen": 0.3646135926246643, + "logits/rejected": 0.10292468965053558, + "logps/chosen": -2.2076382637023926, + "logps/rejected": -1.889317512512207, + "loss": 2.3077, + "nll_loss": 2.152637481689453, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.22076383233070374, + "rewards/margins": -0.031832076609134674, + "rewards/rejected": -0.18893174827098846, + "step": 135 + }, + { + "epoch": 0.03937838407988187, + "grad_norm": 1.5390625, + "learning_rate": 1.966292134831461e-06, + "log_odds_chosen": 0.01979989930987358, + "log_odds_ratio": -1.2207305431365967, + "logits/chosen": 0.08597923815250397, + "logits/rejected": -0.05333589389920235, + "logps/chosen": -2.889000415802002, + "logps/rejected": -2.8965907096862793, + "loss": 2.0999, + "nll_loss": 2.3558218479156494, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.28890007734298706, + "rewards/margins": 0.0007590189343318343, + "rewards/rejected": -0.2896590828895569, + "step": 140 + }, + { + "epoch": 0.04078475493987765, + "grad_norm": 5.03125, + "learning_rate": 2.0365168539325845e-06, + "log_odds_chosen": 0.9782406091690063, + "log_odds_ratio": -0.7057538032531738, + "logits/chosen": 0.1825554370880127, + "logits/rejected": 0.11491219699382782, + "logps/chosen": -2.4004645347595215, + "logps/rejected": -3.380887508392334, + "loss": 2.3346, + "nll_loss": 2.3498218059539795, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.240046426653862, + "rewards/margins": 0.098042331635952, + "rewards/rejected": -0.3380887806415558, + "step": 145 + }, + { + "epoch": 0.04219112579987343, + "grad_norm": 18.25, + "learning_rate": 2.106741573033708e-06, + "log_odds_chosen": 0.6918350458145142, + "log_odds_ratio": -0.7857062816619873, + "logits/chosen": 0.3244979977607727, + "logits/rejected": 0.11548665910959244, + "logps/chosen": -1.9222161769866943, + "logps/rejected": -2.5832555294036865, + "loss": 2.3572, + "nll_loss": 2.0777595043182373, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.19222164154052734, + "rewards/margins": 0.06610391288995743, + "rewards/rejected": -0.25832557678222656, + "step": 150 + }, + { + "epoch": 0.04359749665986921, + "grad_norm": 2.390625, + "learning_rate": 2.1769662921348318e-06, + "log_odds_chosen": 0.09566085040569305, + "log_odds_ratio": -0.8652345538139343, + "logits/chosen": 0.2193879634141922, + "logits/rejected": 0.025885796174407005, + "logps/chosen": -2.0290634632110596, + "logps/rejected": -2.121151924133301, + "loss": 2.2886, + "nll_loss": 2.070652723312378, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.20290634036064148, + "rewards/margins": 0.009208852425217628, + "rewards/rejected": -0.21211519837379456, + "step": 155 + }, + { + "epoch": 0.04500386751986499, + "grad_norm": 1.609375, + "learning_rate": 2.2471910112359554e-06, + "log_odds_chosen": 0.3223455250263214, + "log_odds_ratio": -1.16544771194458, + "logits/chosen": 0.2342136800289154, + "logits/rejected": 0.018552130088210106, + "logps/chosen": -2.3197340965270996, + "logps/rejected": -2.6499991416931152, + "loss": 2.0612, + "nll_loss": 2.120389938354492, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.23197337985038757, + "rewards/margins": 0.03302653506398201, + "rewards/rejected": -0.2649999260902405, + "step": 160 + }, + { + "epoch": 0.04641023837986077, + "grad_norm": 1.9375, + "learning_rate": 2.317415730337079e-06, + "log_odds_chosen": -0.030302369967103004, + "log_odds_ratio": -0.9944000244140625, + "logits/chosen": 0.13670721650123596, + "logits/rejected": 0.0991348847746849, + "logps/chosen": -2.0291049480438232, + "logps/rejected": -2.0215368270874023, + "loss": 2.1136, + "nll_loss": 2.255488872528076, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.20291049778461456, + "rewards/margins": -0.000756812107283622, + "rewards/rejected": -0.20215372741222382, + "step": 165 + }, + { + "epoch": 0.04781660923985655, + "grad_norm": 1.28125, + "learning_rate": 2.3876404494382026e-06, + "log_odds_chosen": -0.2611065208911896, + "log_odds_ratio": -1.1305692195892334, + "logits/chosen": 0.16220179200172424, + "logits/rejected": 0.12496791034936905, + "logps/chosen": -2.1977381706237793, + "logps/rejected": -1.8998682498931885, + "loss": 1.9801, + "nll_loss": 2.043917417526245, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.2197737991809845, + "rewards/margins": -0.029786983504891396, + "rewards/rejected": -0.18998682498931885, + "step": 170 + }, + { + "epoch": 0.049222980099852334, + "grad_norm": 1.140625, + "learning_rate": 2.457865168539326e-06, + "log_odds_chosen": -0.07105789333581924, + "log_odds_ratio": -1.1078282594680786, + "logits/chosen": 0.12216529995203018, + "logits/rejected": 0.027472496032714844, + "logps/chosen": -2.0756044387817383, + "logps/rejected": -1.9600484371185303, + "loss": 1.9889, + "nll_loss": 1.8455654382705688, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2075604498386383, + "rewards/margins": -0.011555584147572517, + "rewards/rejected": -0.19600485265254974, + "step": 175 + }, + { + "epoch": 0.050629350959848114, + "grad_norm": 1.78125, + "learning_rate": 2.52808988764045e-06, + "log_odds_chosen": 1.0096280574798584, + "log_odds_ratio": -0.5677310228347778, + "logits/chosen": 0.21584467589855194, + "logits/rejected": 0.09764888882637024, + "logps/chosen": -1.5301876068115234, + "logps/rejected": -2.4704413414001465, + "loss": 1.9958, + "nll_loss": 1.8449862003326416, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1530187726020813, + "rewards/margins": 0.0940253958106041, + "rewards/rejected": -0.2470441311597824, + "step": 180 + }, + { + "epoch": 0.052035721819843894, + "grad_norm": 1.703125, + "learning_rate": 2.598314606741573e-06, + "log_odds_chosen": 0.018608326092362404, + "log_odds_ratio": -1.0833065509796143, + "logits/chosen": 0.20370593667030334, + "logits/rejected": -0.01377248764038086, + "logps/chosen": -2.2561848163604736, + "logps/rejected": -2.2673747539520264, + "loss": 2.0576, + "nll_loss": 2.2482457160949707, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.22561845183372498, + "rewards/margins": 0.0011190299410372972, + "rewards/rejected": -0.22673749923706055, + "step": 185 + }, + { + "epoch": 0.053442092679839674, + "grad_norm": 7.625, + "learning_rate": 2.6685393258426968e-06, + "log_odds_chosen": 0.6541301608085632, + "log_odds_ratio": -0.5813708305358887, + "logits/chosen": 0.2531304359436035, + "logits/rejected": 0.15365512669086456, + "logps/chosen": -1.798595666885376, + "logps/rejected": -2.4013969898223877, + "loss": 2.0605, + "nll_loss": 2.041943073272705, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.17985956370830536, + "rewards/margins": 0.06028013676404953, + "rewards/rejected": -0.2401396930217743, + "step": 190 + }, + { + "epoch": 0.054848463539835454, + "grad_norm": 0.953125, + "learning_rate": 2.7387640449438204e-06, + "log_odds_chosen": 0.7900503277778625, + "log_odds_ratio": -0.7179470658302307, + "logits/chosen": 0.3807224631309509, + "logits/rejected": 0.047808244824409485, + "logps/chosen": -1.579738974571228, + "logps/rejected": -2.2514843940734863, + "loss": 1.9199, + "nll_loss": 1.7178363800048828, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.15797391533851624, + "rewards/margins": 0.06717453896999359, + "rewards/rejected": -0.22514846920967102, + "step": 195 + }, + { + "epoch": 0.056254834399831234, + "grad_norm": 1.0234375, + "learning_rate": 2.8089887640449444e-06, + "log_odds_chosen": -0.145659938454628, + "log_odds_ratio": -0.9325029253959656, + "logits/chosen": 0.23699569702148438, + "logits/rejected": -0.00980368535965681, + "logps/chosen": -2.0339317321777344, + "logps/rejected": -1.8712536096572876, + "loss": 2.0171, + "nll_loss": 2.233694314956665, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.20339322090148926, + "rewards/margins": -0.016267839819192886, + "rewards/rejected": -0.18712535500526428, + "step": 200 + }, + { + "epoch": 0.057661205259827014, + "grad_norm": 0.8671875, + "learning_rate": 2.8792134831460676e-06, + "log_odds_chosen": 0.3622695505619049, + "log_odds_ratio": -0.7100062370300293, + "logits/chosen": 0.16171380877494812, + "logits/rejected": 0.16733792424201965, + "logps/chosen": -1.59294593334198, + "logps/rejected": -1.8916879892349243, + "loss": 1.9959, + "nll_loss": 1.71487557888031, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.15929457545280457, + "rewards/margins": 0.029874194413423538, + "rewards/rejected": -0.189168781042099, + "step": 205 + }, + { + "epoch": 0.059067576119822794, + "grad_norm": 1.6328125, + "learning_rate": 2.9494382022471913e-06, + "log_odds_chosen": 0.14909641444683075, + "log_odds_ratio": -0.8013676404953003, + "logits/chosen": 0.2264544665813446, + "logits/rejected": -0.017689814791083336, + "logps/chosen": -1.9082419872283936, + "logps/rejected": -2.013080596923828, + "loss": 1.9142, + "nll_loss": 2.3444690704345703, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.1908242255449295, + "rewards/margins": 0.010483830235898495, + "rewards/rejected": -0.20130808651447296, + "step": 210 + }, + { + "epoch": 0.06047394697981858, + "grad_norm": 1.15625, + "learning_rate": 3.019662921348315e-06, + "log_odds_chosen": 0.8857008218765259, + "log_odds_ratio": -0.521682620048523, + "logits/chosen": 0.23352375626564026, + "logits/rejected": 0.006157740950584412, + "logps/chosen": -1.452274203300476, + "logps/rejected": -2.248126983642578, + "loss": 2.0383, + "nll_loss": 2.0336852073669434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14522740244865417, + "rewards/margins": 0.07958526909351349, + "rewards/rejected": -0.22481270134449005, + "step": 215 + }, + { + "epoch": 0.06188031783981436, + "grad_norm": 1.5, + "learning_rate": 3.089887640449438e-06, + "log_odds_chosen": 0.2916131019592285, + "log_odds_ratio": -0.8203420639038086, + "logits/chosen": 0.14003995060920715, + "logits/rejected": 0.21731536090373993, + "logps/chosen": -1.518283724784851, + "logps/rejected": -1.7996505498886108, + "loss": 1.8932, + "nll_loss": 1.9349063634872437, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.15182837843894958, + "rewards/margins": 0.028136665001511574, + "rewards/rejected": -0.17996501922607422, + "step": 220 + }, + { + "epoch": 0.06328668869981013, + "grad_norm": 1.1796875, + "learning_rate": 3.160112359550562e-06, + "log_odds_chosen": 0.04508267715573311, + "log_odds_ratio": -0.8254510760307312, + "logits/chosen": 0.1539347618818283, + "logits/rejected": -0.02814999222755432, + "logps/chosen": -1.587212324142456, + "logps/rejected": -1.6075446605682373, + "loss": 1.8014, + "nll_loss": 1.7296669483184814, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.15872123837471008, + "rewards/margins": 0.002033218275755644, + "rewards/rejected": -0.16075445711612701, + "step": 225 + }, + { + "epoch": 0.06469305955980592, + "grad_norm": 4.90625, + "learning_rate": 3.230337078651686e-06, + "log_odds_chosen": -0.3043696880340576, + "log_odds_ratio": -0.881208062171936, + "logits/chosen": 0.3043157458305359, + "logits/rejected": 0.04385875537991524, + "logps/chosen": -1.3035953044891357, + "logps/rejected": -1.1122184991836548, + "loss": 1.7833, + "nll_loss": 1.782552719116211, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.13035951554775238, + "rewards/margins": -0.019137678667902946, + "rewards/rejected": -0.11122184991836548, + "step": 230 + }, + { + "epoch": 0.06609943041980171, + "grad_norm": 0.74609375, + "learning_rate": 3.3005617977528094e-06, + "log_odds_chosen": -0.13101080060005188, + "log_odds_ratio": -1.0145528316497803, + "logits/chosen": 0.09630151093006134, + "logits/rejected": 0.09045438468456268, + "logps/chosen": -1.6960340738296509, + "logps/rejected": -1.576324462890625, + "loss": 1.7729, + "nll_loss": 1.5516774654388428, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1696033924818039, + "rewards/margins": -0.01197095401585102, + "rewards/rejected": -0.15763245522975922, + "step": 235 + }, + { + "epoch": 0.06750580127979748, + "grad_norm": 1.2578125, + "learning_rate": 3.3707865168539327e-06, + "log_odds_chosen": 0.19244810938835144, + "log_odds_ratio": -0.712375819683075, + "logits/chosen": 0.2630612254142761, + "logits/rejected": 0.15920257568359375, + "logps/chosen": -1.307663083076477, + "logps/rejected": -1.4294275045394897, + "loss": 1.7556, + "nll_loss": 1.5486177206039429, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13076630234718323, + "rewards/margins": 0.012176448479294777, + "rewards/rejected": -0.14294275641441345, + "step": 240 + }, + { + "epoch": 0.06891217213979327, + "grad_norm": 1.6875, + "learning_rate": 3.4410112359550563e-06, + "log_odds_chosen": 0.2274598777294159, + "log_odds_ratio": -0.8218949437141418, + "logits/chosen": 0.1571234166622162, + "logits/rejected": 0.050012148916721344, + "logps/chosen": -1.4508711099624634, + "logps/rejected": -1.6815522909164429, + "loss": 1.6875, + "nll_loss": 1.500241756439209, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.1450871080160141, + "rewards/margins": 0.023068133741617203, + "rewards/rejected": -0.168155238032341, + "step": 245 + }, + { + "epoch": 0.07031854299978904, + "grad_norm": 0.84375, + "learning_rate": 3.5112359550561803e-06, + "log_odds_chosen": 0.29401296377182007, + "log_odds_ratio": -0.657070517539978, + "logits/chosen": 0.2837556004524231, + "logits/rejected": 0.19875159859657288, + "logps/chosen": -1.1506319046020508, + "logps/rejected": -1.3889071941375732, + "loss": 1.75, + "nll_loss": 1.6564500331878662, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11506320536136627, + "rewards/margins": 0.023827504366636276, + "rewards/rejected": -0.13889071345329285, + "step": 250 + }, + { + "epoch": 0.07172491385978483, + "grad_norm": 1.0859375, + "learning_rate": 3.581460674157304e-06, + "log_odds_chosen": 0.6645214557647705, + "log_odds_ratio": -0.5389525890350342, + "logits/chosen": 0.3206818699836731, + "logits/rejected": 0.02894558571279049, + "logps/chosen": -1.1443369388580322, + "logps/rejected": -1.6359878778457642, + "loss": 1.5984, + "nll_loss": 1.7387971878051758, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1144336685538292, + "rewards/margins": 0.04916510730981827, + "rewards/rejected": -0.16359877586364746, + "step": 255 + }, + { + "epoch": 0.0731312847197806, + "grad_norm": 0.98828125, + "learning_rate": 3.651685393258427e-06, + "log_odds_chosen": 0.17139050364494324, + "log_odds_ratio": -0.7790501713752747, + "logits/chosen": -0.0013204365968704224, + "logits/rejected": 0.07245022058486938, + "logps/chosen": -1.2540526390075684, + "logps/rejected": -1.3776941299438477, + "loss": 1.7383, + "nll_loss": 1.6374698877334595, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12540525197982788, + "rewards/margins": 0.012364145368337631, + "rewards/rejected": -0.137769415974617, + "step": 260 + }, + { + "epoch": 0.07453765557977639, + "grad_norm": 1.15625, + "learning_rate": 3.721910112359551e-06, + "log_odds_chosen": -0.09897629916667938, + "log_odds_ratio": -0.8483640551567078, + "logits/chosen": 0.06739149242639542, + "logits/rejected": -0.06628112494945526, + "logps/chosen": -1.1900030374526978, + "logps/rejected": -1.1566414833068848, + "loss": 1.6208, + "nll_loss": 1.482954502105713, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11900033056735992, + "rewards/margins": -0.003336158813908696, + "rewards/rejected": -0.11566416174173355, + "step": 265 + }, + { + "epoch": 0.07594402643977217, + "grad_norm": 0.71484375, + "learning_rate": 3.7921348314606744e-06, + "log_odds_chosen": 0.614561915397644, + "log_odds_ratio": -0.5390773415565491, + "logits/chosen": 0.2947372794151306, + "logits/rejected": 0.029941141605377197, + "logps/chosen": -1.279478669166565, + "logps/rejected": -1.805053472518921, + "loss": 1.6478, + "nll_loss": 1.631860375404358, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1279478818178177, + "rewards/margins": 0.05255746841430664, + "rewards/rejected": -0.18050536513328552, + "step": 270 + }, + { + "epoch": 0.07735039729976795, + "grad_norm": 0.71875, + "learning_rate": 3.8623595505617985e-06, + "log_odds_chosen": 0.70516437292099, + "log_odds_ratio": -0.5123119354248047, + "logits/chosen": 0.14660146832466125, + "logits/rejected": -0.19481025636196136, + "logps/chosen": -1.091347098350525, + "logps/rejected": -1.606636643409729, + "loss": 1.6133, + "nll_loss": 1.5436406135559082, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10913471132516861, + "rewards/margins": 0.051528967916965485, + "rewards/rejected": -0.1606636941432953, + "step": 275 + }, + { + "epoch": 0.07875676815976373, + "grad_norm": 0.82421875, + "learning_rate": 3.932584269662922e-06, + "log_odds_chosen": 0.42323070764541626, + "log_odds_ratio": -0.5872517824172974, + "logits/chosen": 0.2884772717952728, + "logits/rejected": 0.11415378749370575, + "logps/chosen": -0.9810200929641724, + "logps/rejected": -1.2774531841278076, + "loss": 1.6323, + "nll_loss": 1.450991153717041, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09810201078653336, + "rewards/margins": 0.029643306508660316, + "rewards/rejected": -0.12774533033370972, + "step": 280 + }, + { + "epoch": 0.08016313901975951, + "grad_norm": 0.8828125, + "learning_rate": 4.002808988764045e-06, + "log_odds_chosen": 0.240543931722641, + "log_odds_ratio": -0.6353433132171631, + "logits/chosen": 0.33016669750213623, + "logits/rejected": 0.22914664447307587, + "logps/chosen": -1.0102094411849976, + "logps/rejected": -1.1385387182235718, + "loss": 1.529, + "nll_loss": 1.167189359664917, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10102095454931259, + "rewards/margins": 0.012832917273044586, + "rewards/rejected": -0.11385388672351837, + "step": 285 + }, + { + "epoch": 0.0815695098797553, + "grad_norm": 0.79296875, + "learning_rate": 4.073033707865169e-06, + "log_odds_chosen": 0.24131183326244354, + "log_odds_ratio": -0.6839054822921753, + "logits/chosen": 0.02991688810288906, + "logits/rejected": 0.07139863073825836, + "logps/chosen": -1.1842001676559448, + "logps/rejected": -1.378699541091919, + "loss": 1.6598, + "nll_loss": 1.6421235799789429, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11842000484466553, + "rewards/margins": 0.019449947401881218, + "rewards/rejected": -0.1378699541091919, + "step": 290 + }, + { + "epoch": 0.08297588073975107, + "grad_norm": 0.63671875, + "learning_rate": 4.143258426966292e-06, + "log_odds_chosen": -0.0037903576157987118, + "log_odds_ratio": -0.752034068107605, + "logits/chosen": 0.14442117512226105, + "logits/rejected": 0.04852147772908211, + "logps/chosen": -1.194391131401062, + "logps/rejected": -1.198143482208252, + "loss": 1.5798, + "nll_loss": 1.469167709350586, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11943913996219635, + "rewards/margins": 0.00037522317143157125, + "rewards/rejected": -0.11981435120105743, + "step": 295 + }, + { + "epoch": 0.08438225159974685, + "grad_norm": 0.5703125, + "learning_rate": 4.213483146067416e-06, + "log_odds_chosen": 0.16156907379627228, + "log_odds_ratio": -0.6518368721008301, + "logits/chosen": 0.3249798119068146, + "logits/rejected": 0.059707604348659515, + "logps/chosen": -1.191025972366333, + "logps/rejected": -1.3182622194290161, + "loss": 1.6008, + "nll_loss": 1.5026135444641113, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1191026121377945, + "rewards/margins": 0.012723615393042564, + "rewards/rejected": -0.1318262368440628, + "step": 300 + }, + { + "epoch": 0.08578862245974263, + "grad_norm": 1.5390625, + "learning_rate": 4.28370786516854e-06, + "log_odds_chosen": 0.46325716376304626, + "log_odds_ratio": -0.5719884634017944, + "logits/chosen": 0.04224681854248047, + "logits/rejected": -0.18858034908771515, + "logps/chosen": -1.2465041875839233, + "logps/rejected": -1.5959270000457764, + "loss": 1.647, + "nll_loss": 1.6545063257217407, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12465040385723114, + "rewards/margins": 0.03494229167699814, + "rewards/rejected": -0.15959270298480988, + "step": 305 + }, + { + "epoch": 0.08719499331973841, + "grad_norm": 0.8515625, + "learning_rate": 4.3539325842696635e-06, + "log_odds_chosen": 0.03835631161928177, + "log_odds_ratio": -0.7204209566116333, + "logits/chosen": 0.21781399846076965, + "logits/rejected": 0.1392887532711029, + "logps/chosen": -1.0911667346954346, + "logps/rejected": -1.093153953552246, + "loss": 1.5681, + "nll_loss": 1.530747652053833, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10911668837070465, + "rewards/margins": 0.0001987170398933813, + "rewards/rejected": -0.10931539535522461, + "step": 310 + }, + { + "epoch": 0.0886013641797342, + "grad_norm": 0.7578125, + "learning_rate": 4.424157303370787e-06, + "log_odds_chosen": 0.3542148470878601, + "log_odds_ratio": -0.6624723672866821, + "logits/chosen": 0.10045752674341202, + "logits/rejected": -0.00516448775306344, + "logps/chosen": -1.1036803722381592, + "logps/rejected": -1.3415453433990479, + "loss": 1.5786, + "nll_loss": 1.6732912063598633, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11036805063486099, + "rewards/margins": 0.023786501958966255, + "rewards/rejected": -0.1341545283794403, + "step": 315 + }, + { + "epoch": 0.09000773503972997, + "grad_norm": 3.375, + "learning_rate": 4.494382022471911e-06, + "log_odds_chosen": 0.48890742659568787, + "log_odds_ratio": -0.5910844802856445, + "logits/chosen": 0.04471005126833916, + "logits/rejected": -0.138756662607193, + "logps/chosen": -1.1005867719650269, + "logps/rejected": -1.4969103336334229, + "loss": 1.6061, + "nll_loss": 1.6264108419418335, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11005868017673492, + "rewards/margins": 0.03963235393166542, + "rewards/rejected": -0.14969103038311005, + "step": 320 + }, + { + "epoch": 0.09141410589972576, + "grad_norm": 0.89453125, + "learning_rate": 4.564606741573034e-06, + "log_odds_chosen": 0.21167974174022675, + "log_odds_ratio": -0.7039065361022949, + "logits/chosen": -0.022272679954767227, + "logits/rejected": -0.04252483695745468, + "logps/chosen": -1.2426739931106567, + "logps/rejected": -1.4039318561553955, + "loss": 1.6706, + "nll_loss": 1.8168179988861084, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.12426741421222687, + "rewards/margins": 0.016125772148370743, + "rewards/rejected": -0.1403931826353073, + "step": 325 + }, + { + "epoch": 0.09282047675972153, + "grad_norm": 1.4140625, + "learning_rate": 4.634831460674158e-06, + "log_odds_chosen": 0.8199083209037781, + "log_odds_ratio": -0.46476811170578003, + "logits/chosen": 0.24483537673950195, + "logits/rejected": 0.0019326538313180208, + "logps/chosen": -0.9420560598373413, + "logps/rejected": -1.5152714252471924, + "loss": 1.526, + "nll_loss": 1.414475440979004, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09420560300350189, + "rewards/margins": 0.05732153728604317, + "rewards/rejected": -0.15152713656425476, + "step": 330 + }, + { + "epoch": 0.09422684761971732, + "grad_norm": 0.98046875, + "learning_rate": 4.705056179775281e-06, + "log_odds_chosen": 0.05631124973297119, + "log_odds_ratio": -0.7453809976577759, + "logits/chosen": 0.04857509210705757, + "logits/rejected": -0.03552461788058281, + "logps/chosen": -1.201915979385376, + "logps/rejected": -1.2734609842300415, + "loss": 1.6201, + "nll_loss": 1.506476640701294, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12019158899784088, + "rewards/margins": 0.007154509425163269, + "rewards/rejected": -0.12734608352184296, + "step": 335 + }, + { + "epoch": 0.0956332184797131, + "grad_norm": 0.92578125, + "learning_rate": 4.775280898876405e-06, + "log_odds_chosen": 0.1800452619791031, + "log_odds_ratio": -0.6534844636917114, + "logits/chosen": 0.08206330239772797, + "logits/rejected": -0.0701373964548111, + "logps/chosen": -1.2106133699417114, + "logps/rejected": -1.341378927230835, + "loss": 1.4736, + "nll_loss": 1.4450690746307373, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.12106132507324219, + "rewards/margins": 0.013076579198241234, + "rewards/rejected": -0.13413789868354797, + "step": 340 + }, + { + "epoch": 0.09703958933970888, + "grad_norm": 1.09375, + "learning_rate": 4.8455056179775285e-06, + "log_odds_chosen": 0.14819678664207458, + "log_odds_ratio": -0.701557457447052, + "logits/chosen": -0.08788873255252838, + "logits/rejected": -0.1474021077156067, + "logps/chosen": -1.1518551111221313, + "logps/rejected": -1.267639398574829, + "loss": 1.5814, + "nll_loss": 1.5248239040374756, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11518549919128418, + "rewards/margins": 0.011578412726521492, + "rewards/rejected": -0.1267639398574829, + "step": 345 + }, + { + "epoch": 0.09844596019970467, + "grad_norm": 1.2578125, + "learning_rate": 4.915730337078652e-06, + "log_odds_chosen": 0.19631382822990417, + "log_odds_ratio": -0.7674649953842163, + "logits/chosen": 0.1635451465845108, + "logits/rejected": -0.04669942334294319, + "logps/chosen": -1.2185736894607544, + "logps/rejected": -1.3684440851211548, + "loss": 1.5616, + "nll_loss": 1.4181640148162842, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.12185736000537872, + "rewards/margins": 0.014987033791840076, + "rewards/rejected": -0.13684441149234772, + "step": 350 + }, + { + "epoch": 0.09985233105970044, + "grad_norm": 0.76171875, + "learning_rate": 4.985955056179776e-06, + "log_odds_chosen": 0.5495853424072266, + "log_odds_ratio": -0.5390895009040833, + "logits/chosen": -0.0019469677936285734, + "logits/rejected": 0.007418841123580933, + "logps/chosen": -0.917323887348175, + "logps/rejected": -1.2796287536621094, + "loss": 1.6679, + "nll_loss": 1.480374813079834, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09173239022493362, + "rewards/margins": 0.03623048588633537, + "rewards/rejected": -0.1279628723859787, + "step": 355 + }, + { + "epoch": 0.10125870191969623, + "grad_norm": 2.484375, + "learning_rate": 4.999980711400201e-06, + "log_odds_chosen": 0.15539391338825226, + "log_odds_ratio": -0.6699272394180298, + "logits/chosen": 0.05738813802599907, + "logits/rejected": -0.13643920421600342, + "logps/chosen": -0.9270496368408203, + "logps/rejected": -1.0245447158813477, + "loss": 1.6375, + "nll_loss": 1.5049155950546265, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09270496666431427, + "rewards/margins": 0.009749513119459152, + "rewards/rejected": -0.10245448350906372, + "step": 360 + }, + { + "epoch": 0.102665072779692, + "grad_norm": 1.2421875, + "learning_rate": 4.999902351973632e-06, + "log_odds_chosen": 0.3612229526042938, + "log_odds_ratio": -0.6697491407394409, + "logits/chosen": 0.2046525478363037, + "logits/rejected": -0.2595598101615906, + "logps/chosen": -1.0786577463150024, + "logps/rejected": -1.2872159481048584, + "loss": 1.5486, + "nll_loss": 1.5875145196914673, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10786578804254532, + "rewards/margins": 0.020855823531746864, + "rewards/rejected": -0.12872160971164703, + "step": 365 + }, + { + "epoch": 0.10407144363968779, + "grad_norm": 1.0859375, + "learning_rate": 4.999763718070656e-06, + "log_odds_chosen": 0.2860111594200134, + "log_odds_ratio": -0.6612197160720825, + "logits/chosen": 0.23584775626659393, + "logits/rejected": -0.477583646774292, + "logps/chosen": -1.2212624549865723, + "logps/rejected": -1.4150340557098389, + "loss": 1.5084, + "nll_loss": 1.620976209640503, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1221262589097023, + "rewards/margins": 0.019377145916223526, + "rewards/rejected": -0.14150340855121613, + "step": 370 + }, + { + "epoch": 0.10547781449968356, + "grad_norm": 0.5703125, + "learning_rate": 4.999564813033837e-06, + "log_odds_chosen": 0.47208815813064575, + "log_odds_ratio": -0.6141301989555359, + "logits/chosen": 0.3282993733882904, + "logits/rejected": -0.17076356709003448, + "logps/chosen": -1.0655587911605835, + "logps/rejected": -1.4597357511520386, + "loss": 1.4887, + "nll_loss": 1.4764500856399536, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10655587911605835, + "rewards/margins": 0.03941771388053894, + "rewards/rejected": -0.14597360789775848, + "step": 375 + }, + { + "epoch": 0.10688418535967935, + "grad_norm": 1.0078125, + "learning_rate": 4.9993056416589215e-06, + "log_odds_chosen": 0.4635187089443207, + "log_odds_ratio": -0.6125253438949585, + "logits/chosen": 0.09420565515756607, + "logits/rejected": -0.2581847310066223, + "logps/chosen": -1.0355606079101562, + "logps/rejected": -1.3603068590164185, + "loss": 1.6089, + "nll_loss": 1.6906983852386475, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10355605185031891, + "rewards/margins": 0.032474637031555176, + "rewards/rejected": -0.13603070378303528, + "step": 380 + }, + { + "epoch": 0.10829055621967512, + "grad_norm": 0.6640625, + "learning_rate": 4.9989862101947215e-06, + "log_odds_chosen": 0.2843974530696869, + "log_odds_ratio": -0.636595606803894, + "logits/chosen": 0.014289943501353264, + "logits/rejected": -0.03619622439146042, + "logps/chosen": -1.036195158958435, + "logps/rejected": -1.2099014520645142, + "loss": 1.5497, + "nll_loss": 1.487868309020996, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1036195158958435, + "rewards/margins": 0.01737063005566597, + "rewards/rejected": -0.12099014222621918, + "step": 385 + }, + { + "epoch": 0.10969692707967091, + "grad_norm": 0.55859375, + "learning_rate": 4.998606526342963e-06, + "log_odds_chosen": 0.6098340153694153, + "log_odds_ratio": -0.49714794754981995, + "logits/chosen": 0.10547232627868652, + "logits/rejected": -0.30191439390182495, + "logps/chosen": -0.9376096725463867, + "logps/rejected": -1.3693550825119019, + "loss": 1.5585, + "nll_loss": 1.4435722827911377, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09376096725463867, + "rewards/margins": 0.04317455366253853, + "rewards/rejected": -0.1369355171918869, + "step": 390 + }, + { + "epoch": 0.1111032979396667, + "grad_norm": 0.70703125, + "learning_rate": 4.998166599258102e-06, + "log_odds_chosen": -0.2814989984035492, + "log_odds_ratio": -0.9766399264335632, + "logits/chosen": 0.017055341973900795, + "logits/rejected": 0.1271597445011139, + "logps/chosen": -1.3258157968521118, + "logps/rejected": -1.114498496055603, + "loss": 1.535, + "nll_loss": 1.4724485874176025, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.13258156180381775, + "rewards/margins": -0.021131718531250954, + "rewards/rejected": -0.11144986003637314, + "step": 395 + }, + { + "epoch": 0.11250966879966247, + "grad_norm": 1.1484375, + "learning_rate": 4.997666439547102e-06, + "log_odds_chosen": 0.12479138374328613, + "log_odds_ratio": -0.6825396418571472, + "logits/chosen": 0.1605800986289978, + "logits/rejected": -0.03981009125709534, + "logps/chosen": -1.0353831052780151, + "logps/rejected": -1.1214975118637085, + "loss": 1.5717, + "nll_loss": 1.446873664855957, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10353831201791763, + "rewards/margins": 0.008611435070633888, + "rewards/rejected": -0.11214976012706757, + "step": 400 + }, + { + "epoch": 0.11391603965965826, + "grad_norm": 0.7734375, + "learning_rate": 4.997106059269182e-06, + "log_odds_chosen": 0.5131736993789673, + "log_odds_ratio": -0.5569295883178711, + "logits/chosen": 0.11948621273040771, + "logits/rejected": 0.006663101725280285, + "logps/chosen": -0.6998155117034912, + "logps/rejected": -0.9955805540084839, + "loss": 1.6032, + "nll_loss": 1.5056767463684082, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06998156011104584, + "rewards/margins": 0.029576506465673447, + "rewards/rejected": -0.09955805540084839, + "step": 405 + }, + { + "epoch": 0.11532241051965403, + "grad_norm": 1.703125, + "learning_rate": 4.996485471935518e-06, + "log_odds_chosen": 0.12343521416187286, + "log_odds_ratio": -0.7595449686050415, + "logits/chosen": 0.14384707808494568, + "logits/rejected": 0.16315032541751862, + "logps/chosen": -1.0936092138290405, + "logps/rejected": -1.1240942478179932, + "loss": 1.5141, + "nll_loss": 1.4012603759765625, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10936091840267181, + "rewards/margins": 0.00304849399253726, + "rewards/rejected": -0.11240942776203156, + "step": 410 + }, + { + "epoch": 0.11672878137964982, + "grad_norm": 0.9140625, + "learning_rate": 4.995804692508927e-06, + "log_odds_chosen": 0.061908699572086334, + "log_odds_ratio": -0.766377329826355, + "logits/chosen": 0.24375347793102264, + "logits/rejected": 0.2323276698589325, + "logps/chosen": -0.9721105694770813, + "logps/rejected": -1.0543268918991089, + "loss": 1.4151, + "nll_loss": 1.2445567846298218, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09721106290817261, + "rewards/margins": 0.008221631869673729, + "rewards/rejected": -0.10543270409107208, + "step": 415 + }, + { + "epoch": 0.11813515223964559, + "grad_norm": 0.62890625, + "learning_rate": 4.9950637374035e-06, + "log_odds_chosen": 0.07634903490543365, + "log_odds_ratio": -0.7504058480262756, + "logits/chosen": 0.02690283954143524, + "logits/rejected": -0.2870177626609802, + "logps/chosen": -1.129374384880066, + "logps/rejected": -1.203255295753479, + "loss": 1.5333, + "nll_loss": 1.7005048990249634, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11293745040893555, + "rewards/margins": 0.00738809397444129, + "rewards/rejected": -0.12032552808523178, + "step": 420 + }, + { + "epoch": 0.11954152309964138, + "grad_norm": 0.6640625, + "learning_rate": 4.994262624484205e-06, + "log_odds_chosen": 0.7215684652328491, + "log_odds_ratio": -0.52166748046875, + "logits/chosen": 0.1289384961128235, + "logits/rejected": -0.4422905445098877, + "logps/chosen": -0.9521444439888, + "logps/rejected": -1.393911600112915, + "loss": 1.5728, + "nll_loss": 1.7155725955963135, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09521444886922836, + "rewards/margins": 0.044176697731018066, + "rewards/rejected": -0.13939115405082703, + "step": 425 + }, + { + "epoch": 0.12094789395963716, + "grad_norm": 0.875, + "learning_rate": 4.993401373066463e-06, + "log_odds_chosen": 0.20202788710594177, + "log_odds_ratio": -0.6701411008834839, + "logits/chosen": 0.09874279797077179, + "logits/rejected": -0.14084379374980927, + "logps/chosen": -0.9783962368965149, + "logps/rejected": -1.1814371347427368, + "loss": 1.5083, + "nll_loss": 1.4307975769042969, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09783962368965149, + "rewards/margins": 0.020304083824157715, + "rewards/rejected": -0.1181437149643898, + "step": 430 + }, + { + "epoch": 0.12235426481963294, + "grad_norm": 1.1171875, + "learning_rate": 4.992480003915675e-06, + "log_odds_chosen": -0.13087473809719086, + "log_odds_ratio": -0.8643546104431152, + "logits/chosen": 0.27809780836105347, + "logits/rejected": 0.1468941867351532, + "logps/chosen": -1.152295470237732, + "logps/rejected": -1.062312364578247, + "loss": 1.5903, + "nll_loss": 1.2978155612945557, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.11522956192493439, + "rewards/margins": -0.008998315781354904, + "rewards/rejected": -0.10623123496770859, + "step": 435 + }, + { + "epoch": 0.12376063567962872, + "grad_norm": 1.328125, + "learning_rate": 4.991498539246728e-06, + "log_odds_chosen": 0.3621232807636261, + "log_odds_ratio": -0.6083245277404785, + "logits/chosen": 0.23023250699043274, + "logits/rejected": -0.058020271360874176, + "logps/chosen": -1.0368616580963135, + "logps/rejected": -1.2852314710617065, + "loss": 1.4358, + "nll_loss": 1.2882846593856812, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10368617624044418, + "rewards/margins": 0.024836981669068336, + "rewards/rejected": -0.12852314114570618, + "step": 440 + }, + { + "epoch": 0.1251670065396245, + "grad_norm": 0.4765625, + "learning_rate": 4.990457002723452e-06, + "log_odds_chosen": 0.43172144889831543, + "log_odds_ratio": -0.6068316102027893, + "logits/chosen": 0.2114812582731247, + "logits/rejected": -0.2429969310760498, + "logps/chosen": -1.0138442516326904, + "logps/rejected": -1.36911141872406, + "loss": 1.4516, + "nll_loss": 1.4831212759017944, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10138442367315292, + "rewards/margins": 0.035526715219020844, + "rewards/rejected": -0.13691113889217377, + "step": 445 + }, + { + "epoch": 0.12657337739962027, + "grad_norm": 0.71484375, + "learning_rate": 4.989355419458055e-06, + "log_odds_chosen": 0.5212758183479309, + "log_odds_ratio": -0.6297636032104492, + "logits/chosen": 0.08004938811063766, + "logits/rejected": -0.07892777025699615, + "logps/chosen": -1.1437304019927979, + "logps/rejected": -1.4948976039886475, + "loss": 1.5292, + "nll_loss": 1.4960545301437378, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11437302827835083, + "rewards/margins": 0.03511672094464302, + "rewards/rejected": -0.14948976039886475, + "step": 450 + }, + { + "epoch": 0.12797974825961606, + "grad_norm": 1.046875, + "learning_rate": 4.988193816010518e-06, + "log_odds_chosen": 0.17997342348098755, + "log_odds_ratio": -0.7037402391433716, + "logits/chosen": -0.012340274639427662, + "logits/rejected": -0.11124049127101898, + "logps/chosen": -0.9398072361946106, + "logps/rejected": -1.043228268623352, + "loss": 1.5305, + "nll_loss": 1.656345009803772, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09398071467876434, + "rewards/margins": 0.010342110879719257, + "rewards/rejected": -0.10432282835245132, + "step": 455 + }, + { + "epoch": 0.12938611911961184, + "grad_norm": 0.6484375, + "learning_rate": 4.98697222038795e-06, + "log_odds_chosen": 0.836463451385498, + "log_odds_ratio": -0.4590897560119629, + "logits/chosen": 0.2174886167049408, + "logits/rejected": -0.2220270186662674, + "logps/chosen": -0.8792584538459778, + "logps/rejected": -1.4695708751678467, + "loss": 1.4685, + "nll_loss": 1.4058226346969604, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08792584389448166, + "rewards/margins": 0.05903124809265137, + "rewards/rejected": -0.14695709943771362, + "step": 460 + }, + { + "epoch": 0.13079248997960763, + "grad_norm": 0.63671875, + "learning_rate": 4.985690662043916e-06, + "log_odds_chosen": 0.169576495885849, + "log_odds_ratio": -0.7633191347122192, + "logits/chosen": 0.09253176301717758, + "logits/rejected": -0.011513747274875641, + "logps/chosen": -1.078896164894104, + "logps/rejected": -1.1068412065505981, + "loss": 1.5252, + "nll_loss": 1.5368982553482056, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10788961499929428, + "rewards/margins": 0.0027945064939558506, + "rewards/rejected": -0.1106841191649437, + "step": 465 + }, + { + "epoch": 0.13219886083960342, + "grad_norm": 0.4296875, + "learning_rate": 4.984349171877726e-06, + "log_odds_chosen": 0.3642016053199768, + "log_odds_ratio": -0.5838258862495422, + "logits/chosen": 0.016974186524748802, + "logits/rejected": -0.009120392613112926, + "logps/chosen": -0.9194883108139038, + "logps/rejected": -1.1762017011642456, + "loss": 1.5658, + "nll_loss": 1.5235271453857422, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09194884449243546, + "rewards/margins": 0.025671344250440598, + "rewards/rejected": -0.11762018501758575, + "step": 470 + }, + { + "epoch": 0.13360523169959918, + "grad_norm": 1.71875, + "learning_rate": 4.9829477822336905e-06, + "log_odds_chosen": 0.19526013731956482, + "log_odds_ratio": -0.7123385667800903, + "logits/chosen": 0.017217490822076797, + "logits/rejected": -0.0015163153875619173, + "logps/chosen": -1.1902854442596436, + "logps/rejected": -1.3572930097579956, + "loss": 1.4638, + "nll_loss": 1.6305532455444336, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11902855336666107, + "rewards/margins": 0.016700739040970802, + "rewards/rejected": -0.13572928309440613, + "step": 475 + }, + { + "epoch": 0.13501160255959496, + "grad_norm": 0.58984375, + "learning_rate": 4.981486526900339e-06, + "log_odds_chosen": -0.0772426575422287, + "log_odds_ratio": -0.8408387899398804, + "logits/chosen": -0.21625415980815887, + "logits/rejected": -0.19397906959056854, + "logps/chosen": -1.1577032804489136, + "logps/rejected": -1.0639727115631104, + "loss": 1.516, + "nll_loss": 1.723187804222107, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.11577033996582031, + "rewards/margins": -0.009373062290251255, + "rewards/rejected": -0.10639727115631104, + "step": 480 + }, + { + "epoch": 0.13641797341959075, + "grad_norm": 0.85546875, + "learning_rate": 4.9799654411096095e-06, + "log_odds_chosen": 0.534473717212677, + "log_odds_ratio": -0.5868498086929321, + "logits/chosen": 0.2728256583213806, + "logits/rejected": -0.06836424767971039, + "logps/chosen": -0.9803134202957153, + "logps/rejected": -1.4238746166229248, + "loss": 1.4931, + "nll_loss": 1.3809219598770142, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09803132712841034, + "rewards/margins": 0.04435613378882408, + "rewards/rejected": -0.1423874795436859, + "step": 485 + }, + { + "epoch": 0.13782434427958654, + "grad_norm": 1.0703125, + "learning_rate": 4.978384561535994e-06, + "log_odds_chosen": 0.2063991129398346, + "log_odds_ratio": -0.7101866006851196, + "logits/chosen": 0.0015419780975207686, + "logits/rejected": -0.1946432739496231, + "logps/chosen": -1.2708717584609985, + "logps/rejected": -1.3941092491149902, + "loss": 1.5895, + "nll_loss": 1.6133596897125244, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12708717584609985, + "rewards/margins": 0.012323752045631409, + "rewards/rejected": -0.13941094279289246, + "step": 490 + }, + { + "epoch": 0.1392307151395823, + "grad_norm": 1.0546875, + "learning_rate": 4.976743926295655e-06, + "log_odds_chosen": 0.042014528065919876, + "log_odds_ratio": -0.7115843296051025, + "logits/chosen": 0.24858923256397247, + "logits/rejected": 0.1665419489145279, + "logps/chosen": -1.0768249034881592, + "logps/rejected": -1.092078447341919, + "loss": 1.4598, + "nll_loss": 1.6003639698028564, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1076824888586998, + "rewards/margins": 0.0015253443270921707, + "rewards/rejected": -0.10920783132314682, + "step": 495 + }, + { + "epoch": 0.14063708599957808, + "grad_norm": 0.5703125, + "learning_rate": 4.975043574945512e-06, + "log_odds_chosen": 0.0838036760687828, + "log_odds_ratio": -0.7223269939422607, + "logits/chosen": 0.05284743383526802, + "logits/rejected": -0.08719642460346222, + "logps/chosen": -1.0573341846466064, + "logps/rejected": -1.1373835802078247, + "loss": 1.4152, + "nll_loss": 1.4871622323989868, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10573341697454453, + "rewards/margins": 0.008004938252270222, + "rewards/rejected": -0.11373835802078247, + "step": 500 + }, + { + "epoch": 0.14204345685957387, + "grad_norm": 0.72265625, + "learning_rate": 4.97328354848228e-06, + "log_odds_chosen": 0.3124513030052185, + "log_odds_ratio": -0.6181926727294922, + "logits/chosen": 0.16835010051727295, + "logits/rejected": 0.041867442429065704, + "logps/chosen": -0.9111245274543762, + "logps/rejected": -1.138530969619751, + "loss": 1.3475, + "nll_loss": 1.3216100931167603, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09111244976520538, + "rewards/margins": 0.02274065464735031, + "rewards/rejected": -0.1138530969619751, + "step": 505 + }, + { + "epoch": 0.14344982771956966, + "grad_norm": 0.96875, + "learning_rate": 4.971463889341484e-06, + "log_odds_chosen": 0.3187350630760193, + "log_odds_ratio": -0.5841793417930603, + "logits/chosen": 0.18942035734653473, + "logits/rejected": 0.06515751779079437, + "logps/chosen": -0.8589506149291992, + "logps/rejected": -1.076509714126587, + "loss": 1.4424, + "nll_loss": 1.2851884365081787, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08589507639408112, + "rewards/margins": 0.021755896508693695, + "rewards/rejected": -0.10765095800161362, + "step": 510 + }, + { + "epoch": 0.14485619857956544, + "grad_norm": 0.73046875, + "learning_rate": 4.969584641396442e-06, + "log_odds_chosen": 0.4171040952205658, + "log_odds_ratio": -0.6752759218215942, + "logits/chosen": 0.28343451023101807, + "logits/rejected": -0.119059719145298, + "logps/chosen": -0.9824682474136353, + "logps/rejected": -1.2212116718292236, + "loss": 1.4246, + "nll_loss": 1.3154187202453613, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09824682772159576, + "rewards/margins": 0.02387436106801033, + "rewards/rejected": -0.1221211776137352, + "step": 515 + }, + { + "epoch": 0.1462625694395612, + "grad_norm": 0.90234375, + "learning_rate": 4.967645849957197e-06, + "log_odds_chosen": 0.1666927933692932, + "log_odds_ratio": -0.7551862001419067, + "logits/chosen": -0.12206075340509415, + "logits/rejected": -0.02794760838150978, + "logps/chosen": -0.9220685958862305, + "logps/rejected": -1.0989015102386475, + "loss": 1.4267, + "nll_loss": 1.4034297466278076, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09220688045024872, + "rewards/margins": 0.01768328621983528, + "rewards/rejected": -0.10989014804363251, + "step": 520 + }, + { + "epoch": 0.147668940299557, + "grad_norm": 0.6953125, + "learning_rate": 4.965647561769429e-06, + "log_odds_chosen": -0.01692991331219673, + "log_odds_ratio": -0.7631421685218811, + "logits/chosen": 0.2675221860408783, + "logits/rejected": -0.0004905223613604903, + "logps/chosen": -1.0240669250488281, + "logps/rejected": -0.9854789972305298, + "loss": 1.4508, + "nll_loss": 1.454949975013733, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10240669548511505, + "rewards/margins": -0.0038588023744523525, + "rewards/rejected": -0.09854789823293686, + "step": 525 + }, + { + "epoch": 0.14907531115955278, + "grad_norm": 1.3125, + "learning_rate": 4.96358982501333e-06, + "log_odds_chosen": 0.5942908525466919, + "log_odds_ratio": -0.521664023399353, + "logits/chosen": 0.15052922070026398, + "logits/rejected": -0.12803643941879272, + "logps/chosen": -0.9777441024780273, + "logps/rejected": -1.4171960353851318, + "loss": 1.5198, + "nll_loss": 1.4873838424682617, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09777440875768661, + "rewards/margins": 0.04394518584012985, + "rewards/rejected": -0.14171959459781647, + "step": 530 + }, + { + "epoch": 0.15048168201954856, + "grad_norm": 0.984375, + "learning_rate": 4.961472689302441e-06, + "log_odds_chosen": 0.5982332825660706, + "log_odds_ratio": -0.5181711912155151, + "logits/chosen": 0.05600785091519356, + "logits/rejected": -0.03996270149946213, + "logps/chosen": -0.924593448638916, + "logps/rejected": -1.2906975746154785, + "loss": 1.5662, + "nll_loss": 1.4862396717071533, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09245933592319489, + "rewards/margins": 0.03661042079329491, + "rewards/rejected": -0.1290697604417801, + "step": 535 + }, + { + "epoch": 0.15188805287954435, + "grad_norm": 0.72265625, + "learning_rate": 4.959296205682454e-06, + "log_odds_chosen": 0.3825303912162781, + "log_odds_ratio": -0.6163605451583862, + "logits/chosen": 0.10863487422466278, + "logits/rejected": -0.2044816017150879, + "logps/chosen": -0.8504983186721802, + "logps/rejected": -1.0916732549667358, + "loss": 1.4963, + "nll_loss": 1.5003145933151245, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08504984527826309, + "rewards/margins": 0.02411748841404915, + "rewards/rejected": -0.10916732251644135, + "step": 540 + }, + { + "epoch": 0.1532944237395401, + "grad_norm": 0.8046875, + "learning_rate": 4.957060426629984e-06, + "log_odds_chosen": 0.296190083026886, + "log_odds_ratio": -0.6173704862594604, + "logits/chosen": 0.22966690361499786, + "logits/rejected": 0.16270211338996887, + "logps/chosen": -0.8794999122619629, + "logps/rejected": -1.0589183568954468, + "loss": 1.4761, + "nll_loss": 1.3681291341781616, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08794999867677689, + "rewards/margins": 0.017941845580935478, + "rewards/rejected": -0.10589183866977692, + "step": 545 + }, + { + "epoch": 0.1547007945995359, + "grad_norm": 0.89453125, + "learning_rate": 4.954765406051299e-06, + "log_odds_chosen": 0.06485619395971298, + "log_odds_ratio": -0.7391001582145691, + "logits/chosen": -0.06438665091991425, + "logits/rejected": 0.12197915464639664, + "logps/chosen": -1.1314219236373901, + "logps/rejected": -1.1761893033981323, + "loss": 1.4169, + "nll_loss": 1.3700284957885742, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.11314219236373901, + "rewards/margins": 0.004476743750274181, + "rewards/rejected": -0.11761893332004547, + "step": 550 + }, + { + "epoch": 0.15610716545953168, + "grad_norm": 0.70703125, + "learning_rate": 4.952411199281027e-06, + "log_odds_chosen": 0.4550943970680237, + "log_odds_ratio": -0.5818012952804565, + "logits/chosen": 0.048305265605449677, + "logits/rejected": -0.13713420927524567, + "logps/chosen": -1.0368098020553589, + "logps/rejected": -1.358357548713684, + "loss": 1.5256, + "nll_loss": 1.443561315536499, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10368098318576813, + "rewards/margins": 0.03215476870536804, + "rewards/rejected": -0.13583573698997498, + "step": 555 + }, + { + "epoch": 0.15751353631952747, + "grad_norm": 0.64453125, + "learning_rate": 4.9499978630808175e-06, + "log_odds_chosen": 0.20077376067638397, + "log_odds_ratio": -0.6271744966506958, + "logits/chosen": 0.06538190692663193, + "logits/rejected": -0.004076042678207159, + "logps/chosen": -0.9929560422897339, + "logps/rejected": -1.1243157386779785, + "loss": 1.4502, + "nll_loss": 1.4301426410675049, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09929562360048294, + "rewards/margins": 0.013135967776179314, + "rewards/rejected": -0.1124315857887268, + "step": 560 + }, + { + "epoch": 0.15891990717952323, + "grad_norm": 0.87109375, + "learning_rate": 4.9475254556379735e-06, + "log_odds_chosen": 0.15719819068908691, + "log_odds_ratio": -0.6915684938430786, + "logits/chosen": 0.20628222823143005, + "logits/rejected": -0.013370787724852562, + "logps/chosen": -0.9851440191268921, + "logps/rejected": -1.0367156267166138, + "loss": 1.4677, + "nll_loss": 1.3407604694366455, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09851441532373428, + "rewards/margins": 0.005157156381756067, + "rewards/rejected": -0.10367156565189362, + "step": 565 + }, + { + "epoch": 0.16032627803951902, + "grad_norm": 0.765625, + "learning_rate": 4.944994036564048e-06, + "log_odds_chosen": 0.5135098695755005, + "log_odds_ratio": -0.597516655921936, + "logits/chosen": 0.05508983135223389, + "logits/rejected": 0.02078302577137947, + "logps/chosen": -0.8372930288314819, + "logps/rejected": -1.0782281160354614, + "loss": 1.4411, + "nll_loss": 1.2578853368759155, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08372931182384491, + "rewards/margins": 0.024093495681881905, + "rewards/rejected": -0.10782281309366226, + "step": 570 + }, + { + "epoch": 0.1617326488995148, + "grad_norm": 0.6796875, + "learning_rate": 4.94240366689341e-06, + "log_odds_chosen": 0.1643310785293579, + "log_odds_ratio": -0.73926842212677, + "logits/chosen": -0.18839290738105774, + "logits/rejected": 0.1574762910604477, + "logps/chosen": -1.1360965967178345, + "logps/rejected": -1.206714391708374, + "loss": 1.4524, + "nll_loss": 1.433528184890747, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1136096715927124, + "rewards/margins": 0.007061791606247425, + "rewards/rejected": -0.12067146599292755, + "step": 575 + }, + { + "epoch": 0.1631390197595106, + "grad_norm": 1.4140625, + "learning_rate": 4.939754409081768e-06, + "log_odds_chosen": 0.1948034018278122, + "log_odds_ratio": -0.6415718793869019, + "logits/chosen": 0.2775643467903137, + "logits/rejected": -0.08088856935501099, + "logps/chosen": -0.9604623913764954, + "logps/rejected": -1.1055552959442139, + "loss": 1.4866, + "nll_loss": 1.2623450756072998, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09604625403881073, + "rewards/margins": 0.014509303495287895, + "rewards/rejected": -0.11055555194616318, + "step": 580 + }, + { + "epoch": 0.16454539061950638, + "grad_norm": 0.80078125, + "learning_rate": 4.93704632700467e-06, + "log_odds_chosen": 0.10009583085775375, + "log_odds_ratio": -0.7434337735176086, + "logits/chosen": 0.11388511955738068, + "logits/rejected": 0.20686273276805878, + "logps/chosen": -1.0210888385772705, + "logps/rejected": -1.1122162342071533, + "loss": 1.3165, + "nll_loss": 1.2572648525238037, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.102108895778656, + "rewards/margins": 0.009112725965678692, + "rewards/rejected": -0.11122162640094757, + "step": 585 + }, + { + "epoch": 0.16595176147950214, + "grad_norm": 0.796875, + "learning_rate": 4.934279485955955e-06, + "log_odds_chosen": -0.03473677486181259, + "log_odds_ratio": -0.7794255018234253, + "logits/chosen": -0.0030483484733849764, + "logits/rejected": -0.01943325623869896, + "logps/chosen": -1.1814887523651123, + "logps/rejected": -1.1899473667144775, + "loss": 1.4664, + "nll_loss": 1.5186388492584229, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.11814887821674347, + "rewards/margins": 0.0008458640659227967, + "rewards/rejected": -0.11899475008249283, + "step": 590 + }, + { + "epoch": 0.16735813233949792, + "grad_norm": 1.3359375, + "learning_rate": 4.9314539526461895e-06, + "log_odds_chosen": -0.05654100328683853, + "log_odds_ratio": -0.9299119114875793, + "logits/chosen": -0.08810718357563019, + "logits/rejected": -0.10039062798023224, + "logps/chosen": -1.084962010383606, + "logps/rejected": -1.0155553817749023, + "loss": 1.5451, + "nll_loss": 1.5907752513885498, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10849620401859283, + "rewards/margins": -0.006940663792192936, + "rewards/rejected": -0.10155554115772247, + "step": 595 + }, + { + "epoch": 0.1687645031994937, + "grad_norm": 0.671875, + "learning_rate": 4.9285697952010496e-06, + "log_odds_chosen": 0.40899768471717834, + "log_odds_ratio": -0.6665030717849731, + "logits/chosen": 0.25060832500457764, + "logits/rejected": 0.05417170375585556, + "logps/chosen": -0.9990217089653015, + "logps/rejected": -1.3330628871917725, + "loss": 1.4087, + "nll_loss": 1.316851019859314, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09990216791629791, + "rewards/margins": 0.033404115587472916, + "rewards/rejected": -0.13330629467964172, + "step": 600 + }, + { + "epoch": 0.1701708740594895, + "grad_norm": 0.7109375, + "learning_rate": 4.9256270831596835e-06, + "log_odds_chosen": 0.3590291142463684, + "log_odds_ratio": -0.5888271331787109, + "logits/chosen": 0.2401418685913086, + "logits/rejected": -0.08578919619321823, + "logps/chosen": -0.9272798299789429, + "logps/rejected": -1.1708381175994873, + "loss": 1.3763, + "nll_loss": 1.2497119903564453, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09272798150777817, + "rewards/margins": 0.024355821311473846, + "rewards/rejected": -0.11708381026983261, + "step": 605 + }, + { + "epoch": 0.17157724491948526, + "grad_norm": 0.83984375, + "learning_rate": 4.922625887473034e-06, + "log_odds_chosen": 0.40531492233276367, + "log_odds_ratio": -0.5795220732688904, + "logits/chosen": -0.025524402037262917, + "logits/rejected": 0.0651385709643364, + "logps/chosen": -0.9012480974197388, + "logps/rejected": -1.1731479167938232, + "loss": 1.434, + "nll_loss": 1.1538034677505493, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09012481570243835, + "rewards/margins": 0.02718997932970524, + "rewards/rejected": -0.11731479316949844, + "step": 610 + }, + { + "epoch": 0.17298361577948104, + "grad_norm": 0.7578125, + "learning_rate": 4.919566280502125e-06, + "log_odds_chosen": 0.03708020970225334, + "log_odds_ratio": -0.7586324214935303, + "logits/chosen": 0.250191867351532, + "logits/rejected": -0.005167156457901001, + "logps/chosen": -0.9644180536270142, + "logps/rejected": -1.0048058032989502, + "loss": 1.4514, + "nll_loss": 1.3735963106155396, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09644180536270142, + "rewards/margins": 0.004038792569190264, + "rewards/rejected": -0.1004805937409401, + "step": 615 + }, + { + "epoch": 0.17438998663947683, + "grad_norm": 1.21875, + "learning_rate": 4.916448336016324e-06, + "log_odds_chosen": 0.3182252049446106, + "log_odds_ratio": -0.6763931512832642, + "logits/chosen": -0.07354754954576492, + "logits/rejected": -0.07905907928943634, + "logps/chosen": -1.1836451292037964, + "logps/rejected": -1.4507120847702026, + "loss": 1.5021, + "nll_loss": 1.4551026821136475, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11836449801921844, + "rewards/margins": 0.026706721633672714, + "rewards/rejected": -0.14507122337818146, + "step": 620 + }, + { + "epoch": 0.17579635749947262, + "grad_norm": 0.68359375, + "learning_rate": 4.913272129191554e-06, + "log_odds_chosen": 0.5009862184524536, + "log_odds_ratio": -0.6594285368919373, + "logits/chosen": -0.007948207668960094, + "logits/rejected": -0.08926790207624435, + "logps/chosen": -1.0072381496429443, + "logps/rejected": -1.4435874223709106, + "loss": 1.2778, + "nll_loss": 1.463144063949585, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10072381794452667, + "rewards/margins": 0.04363492876291275, + "rewards/rejected": -0.14435873925685883, + "step": 625 + }, + { + "epoch": 0.1772027283594684, + "grad_norm": 0.53515625, + "learning_rate": 4.910037736608487e-06, + "log_odds_chosen": 0.9124671816825867, + "log_odds_ratio": -0.4731478691101074, + "logits/chosen": 0.17726007103919983, + "logits/rejected": -0.03324912115931511, + "logps/chosen": -0.6981981992721558, + "logps/rejected": -1.2315181493759155, + "loss": 1.4051, + "nll_loss": 1.3857619762420654, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.06981982290744781, + "rewards/margins": 0.05333200842142105, + "rewards/rejected": -0.12315182387828827, + "step": 630 + }, + { + "epoch": 0.17860909921946416, + "grad_norm": 0.54296875, + "learning_rate": 4.906745236250699e-06, + "log_odds_chosen": 0.30307167768478394, + "log_odds_ratio": -0.639373779296875, + "logits/chosen": -0.1709907501935959, + "logits/rejected": 0.07192268967628479, + "logps/chosen": -1.0287564992904663, + "logps/rejected": -1.2007570266723633, + "loss": 1.4407, + "nll_loss": 1.3757801055908203, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10287564992904663, + "rewards/margins": 0.01720007322728634, + "rewards/rejected": -0.12007571756839752, + "step": 635 + }, + { + "epoch": 0.18001547007945995, + "grad_norm": 0.56640625, + "learning_rate": 4.903394707502783e-06, + "log_odds_chosen": 0.6449100971221924, + "log_odds_ratio": -0.5406845808029175, + "logits/chosen": -0.1291421502828598, + "logits/rejected": -0.20102617144584656, + "logps/chosen": -0.8549752235412598, + "logps/rejected": -1.3028563261032104, + "loss": 1.4641, + "nll_loss": 1.5151296854019165, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08549752086400986, + "rewards/margins": 0.04478812217712402, + "rewards/rejected": -0.13028565049171448, + "step": 640 + }, + { + "epoch": 0.18142184093945574, + "grad_norm": 0.5390625, + "learning_rate": 4.899986231148441e-06, + "log_odds_chosen": 0.05296991392970085, + "log_odds_ratio": -0.781247615814209, + "logits/chosen": 0.007375895977020264, + "logits/rejected": 0.02562333643436432, + "logps/chosen": -1.1410400867462158, + "logps/rejected": -1.1650757789611816, + "loss": 1.3886, + "nll_loss": 1.3418514728546143, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1141040176153183, + "rewards/margins": 0.0024035435635596514, + "rewards/rejected": -0.11650756746530533, + "step": 645 + }, + { + "epoch": 0.18282821179945152, + "grad_norm": 0.8125, + "learning_rate": 4.896519889368535e-06, + "log_odds_chosen": 0.7971788048744202, + "log_odds_ratio": -0.4912826120853424, + "logits/chosen": 0.04553813487291336, + "logits/rejected": -0.10304268449544907, + "logps/chosen": -0.8640382885932922, + "logps/rejected": -1.428315281867981, + "loss": 1.4899, + "nll_loss": 1.4770934581756592, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08640382438898087, + "rewards/margins": 0.05642770975828171, + "rewards/rejected": -0.14283153414726257, + "step": 650 + }, + { + "epoch": 0.18423458265944728, + "grad_norm": 0.765625, + "learning_rate": 4.892995765739102e-06, + "log_odds_chosen": 0.5198310017585754, + "log_odds_ratio": -0.6252527236938477, + "logits/chosen": 0.14106041193008423, + "logits/rejected": -0.08343149721622467, + "logps/chosen": -1.023341178894043, + "logps/rejected": -1.3469436168670654, + "loss": 1.4602, + "nll_loss": 1.1736913919448853, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10233412683010101, + "rewards/margins": 0.03236023336648941, + "rewards/rejected": -0.13469436764717102, + "step": 655 + }, + { + "epoch": 0.18564095351944307, + "grad_norm": 0.5703125, + "learning_rate": 4.8894139452293446e-06, + "log_odds_chosen": 0.741043746471405, + "log_odds_ratio": -0.4976826608181, + "logits/chosen": 0.04871377348899841, + "logits/rejected": -0.0666879341006279, + "logps/chosen": -0.7123268246650696, + "logps/rejected": -1.1363470554351807, + "loss": 1.3701, + "nll_loss": 1.330862283706665, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07123267650604248, + "rewards/margins": 0.04240203648805618, + "rewards/rejected": -0.11363470554351807, + "step": 660 + }, + { + "epoch": 0.18704732437943886, + "grad_norm": 1.453125, + "learning_rate": 4.885774514199578e-06, + "log_odds_chosen": 0.3378247618675232, + "log_odds_ratio": -0.6189150810241699, + "logits/chosen": 0.2529948353767395, + "logits/rejected": -0.014105233363807201, + "logps/chosen": -1.0382691621780396, + "logps/rejected": -1.2872415781021118, + "loss": 1.4639, + "nll_loss": 1.3439507484436035, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10382692515850067, + "rewards/margins": 0.024897238239645958, + "rewards/rejected": -0.12872417271137238, + "step": 665 + }, + { + "epoch": 0.18845369523943464, + "grad_norm": 0.67578125, + "learning_rate": 4.88207756039915e-06, + "log_odds_chosen": 0.08838365226984024, + "log_odds_ratio": -0.7437705993652344, + "logits/chosen": 0.11691107600927353, + "logits/rejected": 0.023869309574365616, + "logps/chosen": -1.0130348205566406, + "logps/rejected": -1.0903738737106323, + "loss": 1.306, + "nll_loss": 1.2257177829742432, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10130348056554794, + "rewards/margins": 0.0077339173294603825, + "rewards/rejected": -0.10903739929199219, + "step": 670 + }, + { + "epoch": 0.18986006609943043, + "grad_norm": 1.1875, + "learning_rate": 4.8783231729643234e-06, + "log_odds_chosen": 0.42795419692993164, + "log_odds_ratio": -0.5492271780967712, + "logits/chosen": 0.23825743794441223, + "logits/rejected": -0.2547515034675598, + "logps/chosen": -0.8971956372261047, + "logps/rejected": -1.2151196002960205, + "loss": 1.4355, + "nll_loss": 1.3206568956375122, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08971955627202988, + "rewards/margins": 0.031792402267456055, + "rewards/rejected": -0.12151195853948593, + "step": 675 + }, + { + "epoch": 0.1912664369594262, + "grad_norm": 0.91015625, + "learning_rate": 4.874511442416128e-06, + "log_odds_chosen": 0.10579367727041245, + "log_odds_ratio": -0.7108926773071289, + "logits/chosen": -0.018588459119200706, + "logits/rejected": 0.0027497292030602694, + "logps/chosen": -1.1075600385665894, + "logps/rejected": -1.2394804954528809, + "loss": 1.4582, + "nll_loss": 1.3830382823944092, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11075599491596222, + "rewards/margins": 0.013192057609558105, + "rewards/rejected": -0.12394805997610092, + "step": 680 + }, + { + "epoch": 0.19267280781942198, + "grad_norm": 0.671875, + "learning_rate": 4.87064246065818e-06, + "log_odds_chosen": 0.6352590322494507, + "log_odds_ratio": -0.517234206199646, + "logits/chosen": 0.07716906070709229, + "logits/rejected": -0.1858917623758316, + "logps/chosen": -0.9086629152297974, + "logps/rejected": -1.3411760330200195, + "loss": 1.3114, + "nll_loss": 1.2726542949676514, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09086629003286362, + "rewards/margins": 0.043251316994428635, + "rewards/rejected": -0.13411761820316315, + "step": 685 + }, + { + "epoch": 0.19407917867941776, + "grad_norm": 0.56640625, + "learning_rate": 4.8667163209744625e-06, + "log_odds_chosen": 0.24586375057697296, + "log_odds_ratio": -0.6478797793388367, + "logits/chosen": 0.2538822591304779, + "logits/rejected": 0.0186677984893322, + "logps/chosen": -0.9751268625259399, + "logps/rejected": -1.117552638053894, + "loss": 1.3776, + "nll_loss": 1.2891550064086914, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09751268476247787, + "rewards/margins": 0.014242582023143768, + "rewards/rejected": -0.11175527423620224, + "step": 690 + }, + { + "epoch": 0.19548554953941355, + "grad_norm": 0.640625, + "learning_rate": 4.862733118027079e-06, + "log_odds_chosen": 0.2813799977302551, + "log_odds_ratio": -0.631136953830719, + "logits/chosen": 0.14162734150886536, + "logits/rejected": -0.15908537805080414, + "logps/chosen": -0.9692428708076477, + "logps/rejected": -1.123443603515625, + "loss": 1.4101, + "nll_loss": 1.2585976123809814, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09692429006099701, + "rewards/margins": 0.015420079231262207, + "rewards/rejected": -0.11234436929225922, + "step": 695 + }, + { + "epoch": 0.19689192039940934, + "grad_norm": 1.296875, + "learning_rate": 4.858692947853968e-06, + "log_odds_chosen": 0.10032544285058975, + "log_odds_ratio": -0.7577157616615295, + "logits/chosen": 0.11457610130310059, + "logits/rejected": 0.06611824035644531, + "logps/chosen": -1.1749951839447021, + "logps/rejected": -1.2147281169891357, + "loss": 1.3797, + "nll_loss": 1.3682631254196167, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11749951541423798, + "rewards/margins": 0.003973294049501419, + "rewards/rejected": -0.12147282063961029, + "step": 700 + }, + { + "epoch": 0.1982982912594051, + "grad_norm": 0.6875, + "learning_rate": 4.8545959078665915e-06, + "log_odds_chosen": -0.11273153126239777, + "log_odds_ratio": -0.8723108172416687, + "logits/chosen": 0.12857083976268768, + "logits/rejected": 0.06154397130012512, + "logps/chosen": -0.9911792874336243, + "logps/rejected": -0.9738213419914246, + "loss": 1.4424, + "nll_loss": 1.3504576683044434, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09911791980266571, + "rewards/margins": -0.0017357754986733198, + "rewards/rejected": -0.09738214313983917, + "step": 705 + }, + { + "epoch": 0.19970466211940088, + "grad_norm": 0.62890625, + "learning_rate": 4.850442096847585e-06, + "log_odds_chosen": 0.3669831156730652, + "log_odds_ratio": -0.7217445969581604, + "logits/chosen": 0.14621445536613464, + "logits/rejected": -0.021125638857483864, + "logps/chosen": -0.9672040939331055, + "logps/rejected": -1.272630214691162, + "loss": 1.4287, + "nll_loss": 1.391427993774414, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09672039747238159, + "rewards/margins": 0.030542617663741112, + "rewards/rejected": -0.12726303935050964, + "step": 710 + }, + { + "epoch": 0.20111103297939667, + "grad_norm": 0.796875, + "learning_rate": 4.846231614948373e-06, + "log_odds_chosen": 0.2515362799167633, + "log_odds_ratio": -0.6542503237724304, + "logits/chosen": 0.24039408564567566, + "logits/rejected": 0.22471091151237488, + "logps/chosen": -0.8539594411849976, + "logps/rejected": -1.0077803134918213, + "loss": 1.3366, + "nll_loss": 1.3203866481781006, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.085395947098732, + "rewards/margins": 0.015382101759314537, + "rewards/rejected": -0.10077805817127228, + "step": 715 + }, + { + "epoch": 0.20251740383939246, + "grad_norm": 1.125, + "learning_rate": 4.841964563686757e-06, + "log_odds_chosen": 0.49359601736068726, + "log_odds_ratio": -0.5489069819450378, + "logits/chosen": 0.24976961314678192, + "logits/rejected": -0.014539213851094246, + "logps/chosen": -0.9639101028442383, + "logps/rejected": -1.3213074207305908, + "loss": 1.3393, + "nll_loss": 1.4156157970428467, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09639101475477219, + "rewards/margins": 0.03573973849415779, + "rewards/rejected": -0.13213074207305908, + "step": 720 + }, + { + "epoch": 0.20392377469938822, + "grad_norm": 0.64453125, + "learning_rate": 4.83764104594447e-06, + "log_odds_chosen": 0.3946678936481476, + "log_odds_ratio": -0.6607686281204224, + "logits/chosen": -0.019294610247015953, + "logits/rejected": 0.14000949263572693, + "logps/chosen": -0.9210721254348755, + "logps/rejected": -1.1474473476409912, + "loss": 1.3942, + "nll_loss": 1.277699589729309, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09210722148418427, + "rewards/margins": 0.0226375050842762, + "rewards/rejected": -0.11474472284317017, + "step": 725 + }, + { + "epoch": 0.205330145559384, + "grad_norm": 0.8984375, + "learning_rate": 4.833261165964688e-06, + "log_odds_chosen": 0.33924877643585205, + "log_odds_ratio": -0.6919046640396118, + "logits/chosen": -0.14085440337657928, + "logits/rejected": 0.25654488801956177, + "logps/chosen": -0.865805447101593, + "logps/rejected": -1.049051284790039, + "loss": 1.4256, + "nll_loss": 1.3545150756835938, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0865805447101593, + "rewards/margins": 0.018324587494134903, + "rewards/rejected": -0.1049051284790039, + "step": 730 + }, + { + "epoch": 0.2067365164193798, + "grad_norm": 0.59375, + "learning_rate": 4.828825029349527e-06, + "log_odds_chosen": 0.3080621361732483, + "log_odds_ratio": -0.6225201487541199, + "logits/chosen": 0.09295627474784851, + "logits/rejected": 0.1192513257265091, + "logps/chosen": -0.9420360326766968, + "logps/rejected": -1.122133493423462, + "loss": 1.3749, + "nll_loss": 1.2215619087219238, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09420361369848251, + "rewards/margins": 0.018009738996624947, + "rewards/rejected": -0.11221335083246231, + "step": 735 + }, + { + "epoch": 0.20814288727937558, + "grad_norm": 0.8828125, + "learning_rate": 4.8243327430574885e-06, + "log_odds_chosen": 0.5264579057693481, + "log_odds_ratio": -0.5474775433540344, + "logits/chosen": 0.19925551116466522, + "logits/rejected": -0.10264059156179428, + "logps/chosen": -0.9675121307373047, + "logps/rejected": -1.2763597965240479, + "loss": 1.2819, + "nll_loss": 1.2236970663070679, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09675121307373047, + "rewards/margins": 0.030884766951203346, + "rewards/rejected": -0.12763598561286926, + "step": 740 + }, + { + "epoch": 0.20954925813937136, + "grad_norm": 0.81640625, + "learning_rate": 4.819784415400884e-06, + "log_odds_chosen": 0.3006175458431244, + "log_odds_ratio": -0.6740007400512695, + "logits/chosen": 0.11504560708999634, + "logits/rejected": -0.1865064650774002, + "logps/chosen": -0.8989768028259277, + "logps/rejected": -1.1000462770462036, + "loss": 1.4487, + "nll_loss": 1.4835877418518066, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08989769965410233, + "rewards/margins": 0.020106937736272812, + "rewards/rejected": -0.11000462621450424, + "step": 745 + }, + { + "epoch": 0.21095562899936712, + "grad_norm": 0.63671875, + "learning_rate": 4.8151801560432255e-06, + "log_odds_chosen": 0.15093275904655457, + "log_odds_ratio": -0.6930734515190125, + "logits/chosen": 0.1037089005112648, + "logits/rejected": 0.005026382394134998, + "logps/chosen": -0.961024284362793, + "logps/rejected": -1.100200891494751, + "loss": 1.4144, + "nll_loss": 1.2849223613739014, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09610243141651154, + "rewards/margins": 0.013917678967118263, + "rewards/rejected": -0.11002011597156525, + "step": 750 + }, + { + "epoch": 0.2123619998593629, + "grad_norm": 0.59765625, + "learning_rate": 4.810520075996577e-06, + "log_odds_chosen": 0.22566702961921692, + "log_odds_ratio": -0.6738010048866272, + "logits/chosen": 0.11294198036193848, + "logits/rejected": 0.05789243057370186, + "logps/chosen": -0.807028591632843, + "logps/rejected": -0.9467730522155762, + "loss": 1.3684, + "nll_loss": 1.3143486976623535, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08070285618305206, + "rewards/margins": 0.013974443078041077, + "rewards/rejected": -0.09467729926109314, + "step": 755 + }, + { + "epoch": 0.2137683707193587, + "grad_norm": 0.65234375, + "learning_rate": 4.80580428761888e-06, + "log_odds_chosen": 0.33120518922805786, + "log_odds_ratio": -0.6059235334396362, + "logits/chosen": 0.12496509402990341, + "logits/rejected": -0.2966843247413635, + "logps/chosen": -1.018349289894104, + "logps/rejected": -1.1662967205047607, + "loss": 1.3628, + "nll_loss": 1.3103525638580322, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10183493793010712, + "rewards/margins": 0.014794737100601196, + "rewards/rejected": -0.11662967503070831, + "step": 760 + }, + { + "epoch": 0.21517474157935448, + "grad_norm": 0.8203125, + "learning_rate": 4.801032904611249e-06, + "log_odds_chosen": 0.17109887301921844, + "log_odds_ratio": -0.7322261929512024, + "logits/chosen": 0.08799419552087784, + "logits/rejected": 0.047529660165309906, + "logps/chosen": -1.0703462362289429, + "logps/rejected": -1.121490716934204, + "loss": 1.4076, + "nll_loss": 1.515067458152771, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10703463852405548, + "rewards/margins": 0.005114448722451925, + "rewards/rejected": -0.11214907467365265, + "step": 765 + }, + { + "epoch": 0.21658111243935024, + "grad_norm": 0.7578125, + "learning_rate": 4.79620604201522e-06, + "log_odds_chosen": 0.5879980325698853, + "log_odds_ratio": -0.5942040681838989, + "logits/chosen": 0.03295837342739105, + "logits/rejected": 0.08761349320411682, + "logps/chosen": -0.9856408834457397, + "logps/rejected": -1.3201617002487183, + "loss": 1.2938, + "nll_loss": 1.329833745956421, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09856408834457397, + "rewards/margins": 0.033452074974775314, + "rewards/rejected": -0.1320161670446396, + "step": 770 + }, + { + "epoch": 0.21798748329934603, + "grad_norm": 0.7421875, + "learning_rate": 4.791323816209984e-06, + "log_odds_chosen": 0.06851278245449066, + "log_odds_ratio": -0.7348783016204834, + "logits/chosen": 0.24454455077648163, + "logits/rejected": -0.1613084226846695, + "logps/chosen": -1.074292778968811, + "logps/rejected": -1.1023520231246948, + "loss": 1.4124, + "nll_loss": 1.3813612461090088, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10742926597595215, + "rewards/margins": 0.002805921947583556, + "rewards/rejected": -0.11023519188165665, + "step": 775 + }, + { + "epoch": 0.21939385415934182, + "grad_norm": 1.421875, + "learning_rate": 4.786386344909583e-06, + "log_odds_chosen": 0.5286887884140015, + "log_odds_ratio": -0.551024317741394, + "logits/chosen": 0.10972050577402115, + "logits/rejected": -0.1185971274971962, + "logps/chosen": -1.0658420324325562, + "logps/rejected": -1.467869520187378, + "loss": 1.4933, + "nll_loss": 1.596457839012146, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10658420622348785, + "rewards/margins": 0.04020275920629501, + "rewards/rejected": -0.14678695797920227, + "step": 780 + }, + { + "epoch": 0.2208002250193376, + "grad_norm": 0.921875, + "learning_rate": 4.781393747160065e-06, + "log_odds_chosen": 0.39119476079940796, + "log_odds_ratio": -0.5956941843032837, + "logits/chosen": 0.2315189391374588, + "logits/rejected": 0.09489177167415619, + "logps/chosen": -0.9102287292480469, + "logps/rejected": -1.1937782764434814, + "loss": 1.3777, + "nll_loss": 1.2680063247680664, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09102287143468857, + "rewards/margins": 0.02835494838654995, + "rewards/rejected": -0.11937782913446426, + "step": 785 + }, + { + "epoch": 0.2222065958793334, + "grad_norm": 0.6875, + "learning_rate": 4.776346143336616e-06, + "log_odds_chosen": 0.4964830279350281, + "log_odds_ratio": -0.5945664644241333, + "logits/chosen": 0.2937574088573456, + "logits/rejected": -0.03588557988405228, + "logps/chosen": -0.9127880930900574, + "logps/rejected": -1.2661429643630981, + "loss": 1.302, + "nll_loss": 1.1640938520431519, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0912788063287735, + "rewards/margins": 0.0353354886174202, + "rewards/rejected": -0.1266143023967743, + "step": 790 + }, + { + "epoch": 0.22361296673932915, + "grad_norm": 0.86328125, + "learning_rate": 4.771243655140662e-06, + "log_odds_chosen": 0.20250296592712402, + "log_odds_ratio": -0.697245717048645, + "logits/chosen": 0.1340058445930481, + "logits/rejected": -0.004743742756545544, + "logps/chosen": -0.9509406089782715, + "logps/rejected": -1.1008963584899902, + "loss": 1.2904, + "nll_loss": 1.1836265325546265, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09509406238794327, + "rewards/margins": 0.014995579607784748, + "rewards/rejected": -0.11008964478969574, + "step": 795 + }, + { + "epoch": 0.22501933759932494, + "grad_norm": 0.6484375, + "learning_rate": 4.766086405596932e-06, + "log_odds_chosen": 0.2294325828552246, + "log_odds_ratio": -0.7108098268508911, + "logits/chosen": -0.09615223109722137, + "logits/rejected": 0.049136556684970856, + "logps/chosen": -1.0668703317642212, + "logps/rejected": -1.175934076309204, + "loss": 1.407, + "nll_loss": 1.3889058828353882, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1066870465874672, + "rewards/margins": 0.010906368494033813, + "rewards/rejected": -0.11759340763092041, + "step": 800 + }, + { + "epoch": 0.22642570845932072, + "grad_norm": 0.73828125, + "learning_rate": 4.760874519050486e-06, + "log_odds_chosen": 0.2722220718860626, + "log_odds_ratio": -0.6615415811538696, + "logits/chosen": 0.3164128363132477, + "logits/rejected": 0.02606889046728611, + "logps/chosen": -0.8725587129592896, + "logps/rejected": -1.0102307796478271, + "loss": 1.361, + "nll_loss": 1.2800710201263428, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08725588023662567, + "rewards/margins": 0.013767195865511894, + "rewards/rejected": -0.10102306306362152, + "step": 805 + }, + { + "epoch": 0.2278320793193165, + "grad_norm": 0.4921875, + "learning_rate": 4.755608121163726e-06, + "log_odds_chosen": 0.05866674333810806, + "log_odds_ratio": -0.7410587072372437, + "logits/chosen": 0.20300361514091492, + "logits/rejected": 0.20391185581684113, + "logps/chosen": -0.8736883997917175, + "logps/rejected": -0.8953003883361816, + "loss": 1.3856, + "nll_loss": 1.2794065475463867, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08736883848905563, + "rewards/margins": 0.0021611980628222227, + "rewards/rejected": -0.08953003585338593, + "step": 810 + }, + { + "epoch": 0.2292384501793123, + "grad_norm": 0.578125, + "learning_rate": 4.750287338913364e-06, + "log_odds_chosen": 0.18778538703918457, + "log_odds_ratio": -0.7045443654060364, + "logits/chosen": 0.2000313699245453, + "logits/rejected": 0.3296867907047272, + "logps/chosen": -0.9048763513565063, + "logps/rejected": -1.0671868324279785, + "loss": 1.276, + "nll_loss": 1.0233865976333618, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09048764407634735, + "rewards/margins": 0.016231058165431023, + "rewards/rejected": -0.10671870410442352, + "step": 815 + }, + { + "epoch": 0.23064482103930806, + "grad_norm": 0.6171875, + "learning_rate": 4.744912300587354e-06, + "log_odds_chosen": 0.46959003806114197, + "log_odds_ratio": -0.6398900151252747, + "logits/chosen": 0.05968532711267471, + "logits/rejected": -0.033852558583021164, + "logps/chosen": -0.9375017285346985, + "logps/rejected": -1.2653578519821167, + "loss": 1.3935, + "nll_loss": 1.3479769229888916, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09375017881393433, + "rewards/margins": 0.032785605639219284, + "rewards/rejected": -0.1265358030796051, + "step": 820 + }, + { + "epoch": 0.23205119189930384, + "grad_norm": 0.6796875, + "learning_rate": 4.739483135781807e-06, + "log_odds_chosen": 0.4090171754360199, + "log_odds_ratio": -0.6206759214401245, + "logits/chosen": 0.058540333062410355, + "logits/rejected": 0.16028887033462524, + "logps/chosen": -0.8152543306350708, + "logps/rejected": -1.080603003501892, + "loss": 1.4404, + "nll_loss": 1.306620478630066, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08152543008327484, + "rewards/margins": 0.026534873992204666, + "rewards/rejected": -0.1080603152513504, + "step": 825 + }, + { + "epoch": 0.23345756275929963, + "grad_norm": 0.59765625, + "learning_rate": 4.733999975397862e-06, + "log_odds_chosen": -0.07625510543584824, + "log_odds_ratio": -0.784987211227417, + "logits/chosen": 0.191916361451149, + "logits/rejected": 0.08183418214321136, + "logps/chosen": -1.00628662109375, + "logps/rejected": -0.9959591031074524, + "loss": 1.3965, + "nll_loss": 1.2619175910949707, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.10062865167856216, + "rewards/margins": -0.001032742322422564, + "rewards/rejected": -0.09959591180086136, + "step": 830 + }, + { + "epoch": 0.23486393361929542, + "grad_norm": 0.66015625, + "learning_rate": 4.728462951638531e-06, + "log_odds_chosen": 0.22590819001197815, + "log_odds_ratio": -0.6560646295547485, + "logits/chosen": 0.020906496793031693, + "logits/rejected": 0.11052076518535614, + "logps/chosen": -0.8826691508293152, + "logps/rejected": -1.0015770196914673, + "loss": 1.4155, + "nll_loss": 1.2367753982543945, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08826692402362823, + "rewards/margins": 0.011890767142176628, + "rewards/rejected": -0.10015769302845001, + "step": 835 + }, + { + "epoch": 0.23627030447929118, + "grad_norm": 0.640625, + "learning_rate": 4.722872198005514e-06, + "log_odds_chosen": 0.17907896637916565, + "log_odds_ratio": -0.7391910552978516, + "logits/chosen": -0.023514145985245705, + "logits/rejected": 0.08931633830070496, + "logps/chosen": -1.0978233814239502, + "logps/rejected": -1.1743838787078857, + "loss": 1.4892, + "nll_loss": 1.3842908143997192, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10978235304355621, + "rewards/margins": 0.007656055502593517, + "rewards/rejected": -0.11743839085102081, + "step": 840 + }, + { + "epoch": 0.23767667533928696, + "grad_norm": 1.8203125, + "learning_rate": 4.717227849295972e-06, + "log_odds_chosen": 0.6538313627243042, + "log_odds_ratio": -0.5776088237762451, + "logits/chosen": 0.21445605158805847, + "logits/rejected": -0.05735556036233902, + "logps/chosen": -0.9003429412841797, + "logps/rejected": -1.4265720844268799, + "loss": 1.4124, + "nll_loss": 1.4089030027389526, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09003429114818573, + "rewards/margins": 0.05262289568781853, + "rewards/rejected": -0.14265719056129456, + "step": 845 + }, + { + "epoch": 0.23908304619928275, + "grad_norm": 0.62890625, + "learning_rate": 4.711530041599287e-06, + "log_odds_chosen": 0.5957034826278687, + "log_odds_ratio": -0.5471062660217285, + "logits/chosen": 0.18590515851974487, + "logits/rejected": 0.05791671946644783, + "logps/chosen": -0.852972149848938, + "logps/rejected": -1.2458776235580444, + "loss": 1.2858, + "nll_loss": 1.1510000228881836, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08529721200466156, + "rewards/margins": 0.03929056599736214, + "rewards/rejected": -0.1245877742767334, + "step": 850 + }, + { + "epoch": 0.24048941705927854, + "grad_norm": 1.0625, + "learning_rate": 4.705778912293777e-06, + "log_odds_chosen": 0.06852801144123077, + "log_odds_ratio": -0.7809044718742371, + "logits/chosen": -0.08191190659999847, + "logits/rejected": 0.04405444115400314, + "logps/chosen": -1.0600035190582275, + "logps/rejected": -1.1098763942718506, + "loss": 1.3292, + "nll_loss": 1.3718576431274414, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10600034892559052, + "rewards/margins": 0.004987289663404226, + "rewards/rejected": -0.11098764091730118, + "step": 855 + }, + { + "epoch": 0.24189578791927432, + "grad_norm": 0.6796875, + "learning_rate": 4.699974600043378e-06, + "log_odds_chosen": 0.2891826629638672, + "log_odds_ratio": -0.6256308555603027, + "logits/chosen": 0.07481182366609573, + "logits/rejected": 0.04786193370819092, + "logps/chosen": -0.7966269254684448, + "logps/rejected": -0.9316326379776001, + "loss": 1.3299, + "nll_loss": 1.2451672554016113, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07966269552707672, + "rewards/margins": 0.013500571250915527, + "rewards/rejected": -0.09316325932741165, + "step": 860 + }, + { + "epoch": 0.24330215877927008, + "grad_norm": 0.78125, + "learning_rate": 4.694117244794311e-06, + "log_odds_chosen": 0.19615532457828522, + "log_odds_ratio": -0.6909521818161011, + "logits/chosen": 0.3612063229084015, + "logits/rejected": 0.14174401760101318, + "logps/chosen": -0.881375789642334, + "logps/rejected": -1.0324804782867432, + "loss": 1.342, + "nll_loss": 1.1763957738876343, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08813757449388504, + "rewards/margins": 0.01511046290397644, + "rewards/rejected": -0.10324803739786148, + "step": 865 + }, + { + "epoch": 0.24470852963926587, + "grad_norm": 0.4375, + "learning_rate": 4.6882069877717e-06, + "log_odds_chosen": 0.55852872133255, + "log_odds_ratio": -0.5228425860404968, + "logits/chosen": 0.2908174395561218, + "logits/rejected": 0.044635575264692307, + "logps/chosen": -0.7578222155570984, + "logps/rejected": -1.0437644720077515, + "loss": 1.3032, + "nll_loss": 1.1439415216445923, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07578221708536148, + "rewards/margins": 0.028594231233000755, + "rewards/rejected": -0.10437645763158798, + "step": 870 + }, + { + "epoch": 0.24611490049926166, + "grad_norm": 0.48828125, + "learning_rate": 4.68224397147617e-06, + "log_odds_chosen": 0.20548930764198303, + "log_odds_ratio": -0.7238609790802002, + "logits/chosen": 0.028538722544908524, + "logits/rejected": -0.13830550014972687, + "logps/chosen": -0.8864482641220093, + "logps/rejected": -0.9824331402778625, + "loss": 1.3505, + "nll_loss": 1.33633291721344, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0886448323726654, + "rewards/margins": 0.00959849078208208, + "rewards/rejected": -0.09824331849813461, + "step": 875 + }, + { + "epoch": 0.24752127135925744, + "grad_norm": 0.67578125, + "learning_rate": 4.67622833968041e-06, + "log_odds_chosen": 0.24598467350006104, + "log_odds_ratio": -0.6492888927459717, + "logits/chosen": 0.11724593490362167, + "logits/rejected": -0.10359089076519012, + "logps/chosen": -0.8895597457885742, + "logps/rejected": -1.0241923332214355, + "loss": 1.3641, + "nll_loss": 1.3283023834228516, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08895598351955414, + "rewards/margins": 0.013463238254189491, + "rewards/rejected": -0.10241921991109848, + "step": 880 + }, + { + "epoch": 0.2489276422192532, + "grad_norm": 0.796875, + "learning_rate": 4.670160237425709e-06, + "log_odds_chosen": 0.3207041621208191, + "log_odds_ratio": -0.6082428693771362, + "logits/chosen": 0.1917845606803894, + "logits/rejected": 0.04012478515505791, + "logps/chosen": -0.9117262959480286, + "logps/rejected": -1.131466269493103, + "loss": 1.3221, + "nll_loss": 1.2137980461120605, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09117262810468674, + "rewards/margins": 0.021974004805088043, + "rewards/rejected": -0.11314662545919418, + "step": 885 + }, + { + "epoch": 0.250334013079249, + "grad_norm": 0.5390625, + "learning_rate": 4.6640398110184546e-06, + "log_odds_chosen": 0.24420297145843506, + "log_odds_ratio": -0.6132012605667114, + "logits/chosen": 0.15292124450206757, + "logits/rejected": 0.04410483315587044, + "logps/chosen": -0.8954147100448608, + "logps/rejected": -1.0340155363082886, + "loss": 1.3204, + "nll_loss": 1.1968119144439697, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0895414799451828, + "rewards/margins": 0.013860085979104042, + "rewards/rejected": -0.1034015566110611, + "step": 890 + }, + { + "epoch": 0.2517403839392448, + "grad_norm": 0.80859375, + "learning_rate": 4.657867208026612e-06, + "log_odds_chosen": 0.48092031478881836, + "log_odds_ratio": -0.5752378702163696, + "logits/chosen": 0.18317563831806183, + "logits/rejected": 0.06989286839962006, + "logps/chosen": -0.7608040571212769, + "logps/rejected": -1.0253283977508545, + "loss": 1.3631, + "nll_loss": 1.3409475088119507, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07608039677143097, + "rewards/margins": 0.026452431455254555, + "rewards/rejected": -0.10253284126520157, + "step": 895 + }, + { + "epoch": 0.25314675479924054, + "grad_norm": 0.6484375, + "learning_rate": 4.651642577276157e-06, + "log_odds_chosen": 0.10233037173748016, + "log_odds_ratio": -0.8109905123710632, + "logits/chosen": 0.05576135590672493, + "logits/rejected": -0.31019073724746704, + "logps/chosen": -1.1359487771987915, + "logps/rejected": -1.205318808555603, + "loss": 1.4058, + "nll_loss": 1.4091219902038574, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.11359486728906631, + "rewards/margins": 0.006937010679394007, + "rewards/rejected": -0.12053187936544418, + "step": 900 + }, + { + "epoch": 0.2545531256592363, + "grad_norm": 1.1640625, + "learning_rate": 4.645366068847495e-06, + "log_odds_chosen": 0.49762052297592163, + "log_odds_ratio": -0.5452755689620972, + "logits/chosen": 0.3103446066379547, + "logits/rejected": -0.05523936077952385, + "logps/chosen": -0.8181917071342468, + "logps/rejected": -1.0620474815368652, + "loss": 1.3866, + "nll_loss": 1.1911251544952393, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08181916922330856, + "rewards/margins": 0.024385575205087662, + "rewards/rejected": -0.10620476305484772, + "step": 905 + }, + { + "epoch": 0.2559594965192321, + "grad_norm": 1.1328125, + "learning_rate": 4.639037834071843e-06, + "log_odds_chosen": 0.11570564657449722, + "log_odds_ratio": -0.7543329000473022, + "logits/chosen": 0.18902553617954254, + "logits/rejected": 0.06252843141555786, + "logps/chosen": -1.1758387088775635, + "logps/rejected": -1.215280532836914, + "loss": 1.365, + "nll_loss": 1.3247127532958984, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11758387088775635, + "rewards/margins": 0.003944178577512503, + "rewards/rejected": -0.12152805179357529, + "step": 910 + }, + { + "epoch": 0.2573658673792279, + "grad_norm": 0.74609375, + "learning_rate": 4.6326580255275755e-06, + "log_odds_chosen": 0.1841403841972351, + "log_odds_ratio": -0.7191182374954224, + "logits/chosen": 0.03459787741303444, + "logits/rejected": -0.11460791528224945, + "logps/chosen": -1.063010334968567, + "logps/rejected": -1.1565712690353394, + "loss": 1.3794, + "nll_loss": 1.4235786199569702, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10630103200674057, + "rewards/margins": 0.00935608334839344, + "rewards/rejected": -0.11565710604190826, + "step": 915 + }, + { + "epoch": 0.2587722382392237, + "grad_norm": 0.875, + "learning_rate": 4.626226797036547e-06, + "log_odds_chosen": 0.3253302574157715, + "log_odds_ratio": -0.6299694180488586, + "logits/chosen": 0.1421690285205841, + "logits/rejected": -0.04063946381211281, + "logps/chosen": -0.8765993118286133, + "logps/rejected": -1.1254886388778687, + "loss": 1.3979, + "nll_loss": 1.1592350006103516, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08765992522239685, + "rewards/margins": 0.024888943880796432, + "rewards/rejected": -0.11254886537790298, + "step": 920 + }, + { + "epoch": 0.26017860909921947, + "grad_norm": 0.8359375, + "learning_rate": 4.619744303660386e-06, + "log_odds_chosen": 0.8051989674568176, + "log_odds_ratio": -0.47940540313720703, + "logits/chosen": 0.19530947506427765, + "logits/rejected": -0.23512431979179382, + "logps/chosen": -0.8018245697021484, + "logps/rejected": -1.2894532680511475, + "loss": 1.4028, + "nll_loss": 1.2535035610198975, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08018245548009872, + "rewards/margins": 0.04876288026571274, + "rewards/rejected": -0.12894532084465027, + "step": 925 + }, + { + "epoch": 0.26158497995921526, + "grad_norm": 0.63671875, + "learning_rate": 4.6132107016967565e-06, + "log_odds_chosen": 0.6038027405738831, + "log_odds_ratio": -0.5177197456359863, + "logits/chosen": 0.23678426444530487, + "logits/rejected": -0.09270961582660675, + "logps/chosen": -0.8819047212600708, + "logps/rejected": -1.276870608329773, + "loss": 1.3713, + "nll_loss": 1.2016524076461792, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0881904736161232, + "rewards/margins": 0.03949659690260887, + "rewards/rejected": -0.12768706679344177, + "step": 930 + }, + { + "epoch": 0.26299135081921104, + "grad_norm": 0.9375, + "learning_rate": 4.606626148675585e-06, + "log_odds_chosen": 0.45221585035324097, + "log_odds_ratio": -0.5754260420799255, + "logits/chosen": 0.08490542322397232, + "logits/rejected": 0.010112226009368896, + "logps/chosen": -0.8532091379165649, + "logps/rejected": -1.138056993484497, + "loss": 1.3425, + "nll_loss": 1.079709768295288, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08532091230154037, + "rewards/margins": 0.028484785929322243, + "rewards/rejected": -0.11380569636821747, + "step": 935 + }, + { + "epoch": 0.26439772167920683, + "grad_norm": 0.671875, + "learning_rate": 4.599990803355267e-06, + "log_odds_chosen": 0.2118256539106369, + "log_odds_ratio": -0.6624730229377747, + "logits/chosen": 0.3913845717906952, + "logits/rejected": 0.11448683589696884, + "logps/chosen": -0.965084433555603, + "logps/rejected": -1.144004464149475, + "loss": 1.3229, + "nll_loss": 1.1798003911972046, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0965084433555603, + "rewards/margins": 0.017892012372612953, + "rewards/rejected": -0.11440044641494751, + "step": 940 + }, + { + "epoch": 0.26580409253920256, + "grad_norm": 0.86328125, + "learning_rate": 4.5933048257188385e-06, + "log_odds_chosen": 0.6222201585769653, + "log_odds_ratio": -0.5195111036300659, + "logits/chosen": 0.25307655334472656, + "logits/rejected": -0.07342733442783356, + "logps/chosen": -0.7892023921012878, + "logps/rejected": -1.1987894773483276, + "loss": 1.2383, + "nll_loss": 1.1316816806793213, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07892025262117386, + "rewards/margins": 0.0409587137401104, + "rewards/rejected": -0.11987896263599396, + "step": 945 + }, + { + "epoch": 0.26721046339919835, + "grad_norm": 0.828125, + "learning_rate": 4.586568376970115e-06, + "log_odds_chosen": 0.5186957120895386, + "log_odds_ratio": -0.591475248336792, + "logits/chosen": 0.09654757380485535, + "logits/rejected": -0.09392206370830536, + "logps/chosen": -0.8592597246170044, + "logps/rejected": -1.1736754179000854, + "loss": 1.3742, + "nll_loss": 1.428426742553711, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08592596650123596, + "rewards/margins": 0.031441580504179, + "rewards/rejected": -0.11736755073070526, + "step": 950 + }, + { + "epoch": 0.26861683425919414, + "grad_norm": 0.921875, + "learning_rate": 4.57978161952981e-06, + "log_odds_chosen": 0.1481349766254425, + "log_odds_ratio": -0.6983728408813477, + "logits/chosen": 0.15174202620983124, + "logits/rejected": 0.04254768043756485, + "logps/chosen": -0.9679096341133118, + "logps/rejected": -1.0594542026519775, + "loss": 1.3007, + "nll_loss": 1.2575373649597168, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09679095447063446, + "rewards/margins": 0.009154459461569786, + "rewards/rejected": -0.10594542324542999, + "step": 955 + }, + { + "epoch": 0.2700232051191899, + "grad_norm": 1.015625, + "learning_rate": 4.572944717031615e-06, + "log_odds_chosen": 0.1742623746395111, + "log_odds_ratio": -0.6798437833786011, + "logits/chosen": -0.0789838507771492, + "logits/rejected": -0.35425859689712524, + "logps/chosen": -0.9721781611442566, + "logps/rejected": -1.0981299877166748, + "loss": 1.4649, + "nll_loss": 1.488358736038208, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09721782058477402, + "rewards/margins": 0.012595164589583874, + "rewards/rejected": -0.10981299728155136, + "step": 960 + }, + { + "epoch": 0.2714295759791857, + "grad_norm": 0.6328125, + "learning_rate": 4.566057834318256e-06, + "log_odds_chosen": 0.2752152681350708, + "log_odds_ratio": -0.6796419620513916, + "logits/chosen": -0.16116170585155487, + "logits/rejected": 0.11222386360168457, + "logps/chosen": -0.8004133105278015, + "logps/rejected": -0.9160813093185425, + "loss": 1.3356, + "nll_loss": 1.340496301651001, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08004133403301239, + "rewards/margins": 0.01156679354608059, + "rewards/rejected": -0.09160811454057693, + "step": 965 + }, + { + "epoch": 0.2728359468391815, + "grad_norm": 1.0703125, + "learning_rate": 4.559121137437518e-06, + "log_odds_chosen": 0.028671523556113243, + "log_odds_ratio": -0.7715519070625305, + "logits/chosen": -0.004032718483358622, + "logits/rejected": 0.037678755819797516, + "logps/chosen": -0.9671252369880676, + "logps/rejected": -0.9393098950386047, + "loss": 1.377, + "nll_loss": 1.2385261058807373, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09671252220869064, + "rewards/margins": -0.0027815198991447687, + "rewards/rejected": -0.09393098950386047, + "step": 970 + }, + { + "epoch": 0.2742423176991773, + "grad_norm": 1.140625, + "learning_rate": 4.552134793638244e-06, + "log_odds_chosen": 0.4256436824798584, + "log_odds_ratio": -0.6906386017799377, + "logits/chosen": -0.00512584438547492, + "logits/rejected": -0.10282160341739655, + "logps/chosen": -0.7688851356506348, + "logps/rejected": -1.0349538326263428, + "loss": 1.4431, + "nll_loss": 1.375449299812317, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07688851654529572, + "rewards/margins": 0.026606876403093338, + "rewards/rejected": -0.10349539667367935, + "step": 975 + }, + { + "epoch": 0.27564868855917307, + "grad_norm": 1.03125, + "learning_rate": 4.545098971366298e-06, + "log_odds_chosen": 0.23097069561481476, + "log_odds_ratio": -0.6853961944580078, + "logits/chosen": 0.07004253566265106, + "logits/rejected": -0.0871606096625328, + "logps/chosen": -1.0082285404205322, + "logps/rejected": -1.2149624824523926, + "loss": 1.2518, + "nll_loss": 1.2343276739120483, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10082285106182098, + "rewards/margins": 0.020673388615250587, + "rewards/rejected": -0.12149624526500702, + "step": 980 + }, + { + "epoch": 0.27705505941916886, + "grad_norm": 0.85546875, + "learning_rate": 4.538013840260508e-06, + "log_odds_chosen": -0.06891898065805435, + "log_odds_ratio": -0.7656804323196411, + "logits/chosen": -0.02114402875304222, + "logits/rejected": 0.011167839169502258, + "logps/chosen": -1.045703411102295, + "logps/rejected": -0.9952338933944702, + "loss": 1.3659, + "nll_loss": 1.2949765920639038, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10457032918930054, + "rewards/margins": -0.00504694040864706, + "rewards/rejected": -0.0995233878493309, + "step": 985 + }, + { + "epoch": 0.2784614302791646, + "grad_norm": 0.71875, + "learning_rate": 4.530879571148572e-06, + "log_odds_chosen": 0.3072592616081238, + "log_odds_ratio": -0.6513957977294922, + "logits/chosen": 0.17522796988487244, + "logits/rejected": -0.03323373943567276, + "logps/chosen": -1.0087103843688965, + "logps/rejected": -1.2768394947052002, + "loss": 1.2754, + "nll_loss": 1.3225579261779785, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10087104141712189, + "rewards/margins": 0.02681291475892067, + "rewards/rejected": -0.12768395245075226, + "step": 990 + }, + { + "epoch": 0.2798678011391604, + "grad_norm": 0.9453125, + "learning_rate": 4.523696336042945e-06, + "log_odds_chosen": 0.38424450159072876, + "log_odds_ratio": -0.6066820621490479, + "logits/chosen": 0.11725147068500519, + "logits/rejected": -0.1419847458600998, + "logps/chosen": -0.9277389645576477, + "logps/rejected": -1.226851224899292, + "loss": 1.2525, + "nll_loss": 1.1180111169815063, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09277389943599701, + "rewards/margins": 0.029911210760474205, + "rewards/rejected": -0.12268511205911636, + "step": 995 + }, + { + "epoch": 0.28127417199915616, + "grad_norm": 0.83203125, + "learning_rate": 4.5164643081366844e-06, + "log_odds_chosen": 0.21584255993366241, + "log_odds_ratio": -0.6696838140487671, + "logits/chosen": 0.0936116874217987, + "logits/rejected": 0.20582985877990723, + "logps/chosen": -0.8180558085441589, + "logps/rejected": -0.8953365087509155, + "loss": 1.3023, + "nll_loss": 1.1692216396331787, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08180558681488037, + "rewards/margins": 0.007728065364062786, + "rewards/rejected": -0.08953364193439484, + "step": 1000 + }, + { + "epoch": 0.28268054285915195, + "grad_norm": 1.3828125, + "learning_rate": 4.509183661799279e-06, + "log_odds_chosen": 0.3438703417778015, + "log_odds_ratio": -0.6086449027061462, + "logits/chosen": -0.15333302319049835, + "logits/rejected": 0.09063899517059326, + "logps/chosen": -0.8000918626785278, + "logps/rejected": -1.026597023010254, + "loss": 1.3283, + "nll_loss": 1.3134121894836426, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08000917732715607, + "rewards/margins": 0.022650521248579025, + "rewards/rejected": -0.10265970230102539, + "step": 1005 + }, + { + "epoch": 0.28408691371914774, + "grad_norm": 0.9453125, + "learning_rate": 4.501854572572445e-06, + "log_odds_chosen": 0.28882235288619995, + "log_odds_ratio": -0.6323953866958618, + "logits/chosen": 0.02059057354927063, + "logits/rejected": 0.10210821777582169, + "logps/chosen": -0.9650726318359375, + "logps/rejected": -1.1212360858917236, + "loss": 1.2868, + "nll_loss": 1.2516919374465942, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09650726616382599, + "rewards/margins": 0.015616334974765778, + "rewards/rejected": -0.11212359368801117, + "step": 1010 + }, + { + "epoch": 0.2854932845791435, + "grad_norm": 0.62890625, + "learning_rate": 4.494477217165889e-06, + "log_odds_chosen": 0.5828167796134949, + "log_odds_ratio": -0.6320462822914124, + "logits/chosen": -0.007489413022994995, + "logits/rejected": -0.10742165893316269, + "logps/chosen": -0.7481693029403687, + "logps/rejected": -1.1290299892425537, + "loss": 1.4211, + "nll_loss": 1.2319283485412598, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07481692731380463, + "rewards/margins": 0.038086071610450745, + "rewards/rejected": -0.11290299892425537, + "step": 1015 + }, + { + "epoch": 0.2868996554391393, + "grad_norm": 1.0546875, + "learning_rate": 4.487051773453054e-06, + "log_odds_chosen": 0.049561046063899994, + "log_odds_ratio": -0.7927902936935425, + "logits/chosen": 0.15820041298866272, + "logits/rejected": -0.08799884468317032, + "logps/chosen": -1.0042955875396729, + "logps/rejected": -1.0792685747146606, + "loss": 1.3464, + "nll_loss": 1.2037123441696167, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10042955726385117, + "rewards/margins": 0.007497308310121298, + "rewards/rejected": -0.1079268679022789, + "step": 1020 + }, + { + "epoch": 0.2883060262991351, + "grad_norm": 0.75, + "learning_rate": 4.479578420466824e-06, + "log_odds_chosen": 0.2891156077384949, + "log_odds_ratio": -0.6404193043708801, + "logits/chosen": 0.15136688947677612, + "logits/rejected": -0.2594587206840515, + "logps/chosen": -0.8895992040634155, + "logps/rejected": -1.109318733215332, + "loss": 1.309, + "nll_loss": 1.2559032440185547, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08895992487668991, + "rewards/margins": 0.021971937268972397, + "rewards/rejected": -0.11093185842037201, + "step": 1025 + }, + { + "epoch": 0.2897123971591309, + "grad_norm": 1.328125, + "learning_rate": 4.472057338395214e-06, + "log_odds_chosen": 0.2149926722049713, + "log_odds_ratio": -0.6861375570297241, + "logits/chosen": 0.03491468355059624, + "logits/rejected": -0.005147813353687525, + "logps/chosen": -0.8401373624801636, + "logps/rejected": -0.9184094667434692, + "loss": 1.3409, + "nll_loss": 1.316861629486084, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08401374518871307, + "rewards/margins": 0.007827198132872581, + "rewards/rejected": -0.0918409451842308, + "step": 1030 + }, + { + "epoch": 0.2911187680191266, + "grad_norm": 1.0078125, + "learning_rate": 4.464488708577019e-06, + "log_odds_chosen": 0.3361745774745941, + "log_odds_ratio": -0.6563747525215149, + "logits/chosen": 0.0792590007185936, + "logits/rejected": -0.048874109983444214, + "logps/chosen": -1.068371057510376, + "logps/rejected": -1.257805585861206, + "loss": 1.3291, + "nll_loss": 1.420210838317871, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10683709383010864, + "rewards/margins": 0.01894346997141838, + "rewards/rejected": -0.12578055262565613, + "step": 1035 + }, + { + "epoch": 0.2925251388791224, + "grad_norm": 0.9296875, + "learning_rate": 4.456872713497447e-06, + "log_odds_chosen": 0.015982721000909805, + "log_odds_ratio": -0.7759819030761719, + "logits/chosen": -0.03836756944656372, + "logits/rejected": -0.07915479689836502, + "logps/chosen": -0.9412604570388794, + "logps/rejected": -1.0049259662628174, + "loss": 1.3297, + "nll_loss": 1.4640296697616577, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09412603825330734, + "rewards/margins": 0.0063665686175227165, + "rewards/rejected": -0.10049261152744293, + "step": 1040 + }, + { + "epoch": 0.2939315097391182, + "grad_norm": 0.7734375, + "learning_rate": 4.449209536783718e-06, + "log_odds_chosen": 0.45826300978660583, + "log_odds_ratio": -0.5908263921737671, + "logits/chosen": 0.1461760699748993, + "logits/rejected": 0.0322549007833004, + "logps/chosen": -0.8778446912765503, + "logps/rejected": -1.1638209819793701, + "loss": 1.4564, + "nll_loss": 1.1667400598526, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08778446912765503, + "rewards/margins": 0.028597641736268997, + "rewards/rejected": -0.11638212203979492, + "step": 1045 + }, + { + "epoch": 0.295337880599114, + "grad_norm": 1.421875, + "learning_rate": 4.441499363200632e-06, + "log_odds_chosen": 0.10152752697467804, + "log_odds_ratio": -0.8112316131591797, + "logits/chosen": 0.00877746008336544, + "logits/rejected": -0.027704555541276932, + "logps/chosen": -0.8361288905143738, + "logps/rejected": -0.9080076217651367, + "loss": 1.3844, + "nll_loss": 1.109171986579895, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08361288905143738, + "rewards/margins": 0.0071878740563988686, + "rewards/rejected": -0.09080077707767487, + "step": 1050 + }, + { + "epoch": 0.29674425145910976, + "grad_norm": 1.15625, + "learning_rate": 4.433742378646122e-06, + "log_odds_chosen": 0.6703130602836609, + "log_odds_ratio": -0.5025144815444946, + "logits/chosen": 0.05710332840681076, + "logits/rejected": 0.0904233530163765, + "logps/chosen": -0.9191819429397583, + "logps/rejected": -1.3129583597183228, + "loss": 1.2766, + "nll_loss": 1.1437983512878418, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09191820025444031, + "rewards/margins": 0.039377644658088684, + "rewards/rejected": -0.1312958300113678, + "step": 1055 + }, + { + "epoch": 0.29815062231910555, + "grad_norm": 1.140625, + "learning_rate": 4.425938770146765e-06, + "log_odds_chosen": 0.5356465578079224, + "log_odds_ratio": -0.5849270224571228, + "logits/chosen": 0.2629484236240387, + "logits/rejected": -0.29369235038757324, + "logps/chosen": -0.9397749900817871, + "logps/rejected": -1.3595625162124634, + "loss": 1.3173, + "nll_loss": 1.2218631505966187, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09397749602794647, + "rewards/margins": 0.041978754103183746, + "rewards/rejected": -0.13595624268054962, + "step": 1060 + }, + { + "epoch": 0.29955699317910134, + "grad_norm": 0.90234375, + "learning_rate": 4.418088725853278e-06, + "log_odds_chosen": 0.5873881578445435, + "log_odds_ratio": -0.553581178188324, + "logits/chosen": 0.1879061758518219, + "logits/rejected": 0.029256004840135574, + "logps/chosen": -0.7824908494949341, + "logps/rejected": -1.1668497323989868, + "loss": 1.3333, + "nll_loss": 1.2202861309051514, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07824908196926117, + "rewards/margins": 0.038435906171798706, + "rewards/rejected": -0.11668499559164047, + "step": 1065 + }, + { + "epoch": 0.3009633640390971, + "grad_norm": 0.68359375, + "learning_rate": 4.4101924350359755e-06, + "log_odds_chosen": 0.22440704703330994, + "log_odds_ratio": -0.6922354102134705, + "logits/chosen": 0.2807529866695404, + "logits/rejected": -0.06608657538890839, + "logps/chosen": -0.8481131792068481, + "logps/rejected": -0.9911687970161438, + "loss": 1.3235, + "nll_loss": 1.2630895376205444, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08481131494045258, + "rewards/margins": 0.014305558986961842, + "rewards/rejected": -0.09911688417196274, + "step": 1070 + }, + { + "epoch": 0.3023697348990929, + "grad_norm": 0.490234375, + "learning_rate": 4.402250088080214e-06, + "log_odds_chosen": -0.048357464373111725, + "log_odds_ratio": -0.7783852815628052, + "logits/chosen": -0.09076674282550812, + "logits/rejected": 0.06765065342187881, + "logps/chosen": -1.0395662784576416, + "logps/rejected": -1.0062190294265747, + "loss": 1.2533, + "nll_loss": 1.2233105897903442, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10395662486553192, + "rewards/margins": -0.003334715496748686, + "rewards/rejected": -0.10062190145254135, + "step": 1075 + }, + { + "epoch": 0.3037761057590887, + "grad_norm": 0.5, + "learning_rate": 4.394261876481795e-06, + "log_odds_chosen": -0.12141172587871552, + "log_odds_ratio": -0.8287912607192993, + "logits/chosen": 0.006760761141777039, + "logits/rejected": -0.22260034084320068, + "logps/chosen": -1.0042288303375244, + "logps/rejected": -0.9768675565719604, + "loss": 1.3572, + "nll_loss": 1.3437844514846802, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10042288154363632, + "rewards/margins": -0.0027361277025192976, + "rewards/rejected": -0.0976867526769638, + "step": 1080 + }, + { + "epoch": 0.30518247661908443, + "grad_norm": 0.484375, + "learning_rate": 4.386227992842347e-06, + "log_odds_chosen": 0.2469898760318756, + "log_odds_ratio": -0.6603808403015137, + "logits/chosen": 0.1306043416261673, + "logits/rejected": -0.05761692672967911, + "logps/chosen": -0.9411749839782715, + "logps/rejected": -1.1152271032333374, + "loss": 1.2368, + "nll_loss": 1.2312201261520386, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09411749988794327, + "rewards/margins": 0.017405226826667786, + "rewards/rejected": -0.11152271926403046, + "step": 1085 + }, + { + "epoch": 0.3065888474790802, + "grad_norm": 1.765625, + "learning_rate": 4.378148630864689e-06, + "log_odds_chosen": 0.5335085391998291, + "log_odds_ratio": -0.5338913798332214, + "logits/chosen": 0.23110118508338928, + "logits/rejected": -0.10254959017038345, + "logps/chosen": -0.8923786878585815, + "logps/rejected": -1.204424262046814, + "loss": 1.3393, + "nll_loss": 1.3731284141540527, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08923786878585815, + "rewards/margins": 0.031204570084810257, + "rewards/rejected": -0.12044243514537811, + "step": 1090 + }, + { + "epoch": 0.307995218339076, + "grad_norm": 0.734375, + "learning_rate": 4.3700239853481565e-06, + "log_odds_chosen": 0.282719224691391, + "log_odds_ratio": -0.6776636838912964, + "logits/chosen": -0.025338435545563698, + "logits/rejected": -0.006705662701278925, + "logps/chosen": -1.007938265800476, + "logps/rejected": -1.2182714939117432, + "loss": 1.2801, + "nll_loss": 1.3006494045257568, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10079382359981537, + "rewards/margins": 0.021033337339758873, + "rewards/rejected": -0.1218271479010582, + "step": 1095 + }, + { + "epoch": 0.3094015891990718, + "grad_norm": 0.58984375, + "learning_rate": 4.361854252183902e-06, + "log_odds_chosen": -0.020149126648902893, + "log_odds_ratio": -0.8116379976272583, + "logits/chosen": 0.12741239368915558, + "logits/rejected": -0.08182956278324127, + "logps/chosen": -1.0148308277130127, + "logps/rejected": -0.9748057126998901, + "loss": 1.3741, + "nll_loss": 1.3178448677062988, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10148308426141739, + "rewards/margins": -0.004002511501312256, + "rewards/rejected": -0.09748057276010513, + "step": 1100 + }, + { + "epoch": 0.3108079600590676, + "grad_norm": 0.94140625, + "learning_rate": 4.353639628350174e-06, + "log_odds_chosen": 0.2361874282360077, + "log_odds_ratio": -0.6560953855514526, + "logits/chosen": 0.33975356817245483, + "logits/rejected": -0.17692479491233826, + "logps/chosen": -0.957141101360321, + "logps/rejected": -1.1203813552856445, + "loss": 1.3241, + "nll_loss": 1.2143763303756714, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09571412950754166, + "rewards/margins": 0.01632402278482914, + "rewards/rejected": -0.11203813552856445, + "step": 1105 + }, + { + "epoch": 0.31221433091906337, + "grad_norm": 0.400390625, + "learning_rate": 4.345380311907569e-06, + "log_odds_chosen": 0.127059668302536, + "log_odds_ratio": -0.7422958016395569, + "logits/chosen": 0.09059157967567444, + "logits/rejected": 0.06283347308635712, + "logps/chosen": -0.9153604507446289, + "logps/rejected": -0.8998891115188599, + "loss": 1.4761, + "nll_loss": 1.3094428777694702, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0915360301733017, + "rewards/margins": -0.0015471221413463354, + "rewards/rejected": -0.08998890966176987, + "step": 1110 + }, + { + "epoch": 0.31362070177905915, + "grad_norm": 0.482421875, + "learning_rate": 4.3370765019942555e-06, + "log_odds_chosen": -0.006569194607436657, + "log_odds_ratio": -0.7842230796813965, + "logits/chosen": 0.2436453104019165, + "logits/rejected": 0.11122441291809082, + "logps/chosen": -1.0099600553512573, + "logps/rejected": -0.9945418238639832, + "loss": 1.2582, + "nll_loss": 1.2466042041778564, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10099601745605469, + "rewards/margins": -0.0015418336261063814, + "rewards/rejected": -0.09945418685674667, + "step": 1115 + }, + { + "epoch": 0.31502707263905494, + "grad_norm": 0.462890625, + "learning_rate": 4.32872839882117e-06, + "log_odds_chosen": 0.4390491545200348, + "log_odds_ratio": -0.5690140128135681, + "logits/chosen": 0.30528688430786133, + "logits/rejected": 0.0776129812002182, + "logps/chosen": -0.8607474565505981, + "logps/rejected": -1.1015281677246094, + "loss": 1.3295, + "nll_loss": 1.2498326301574707, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08607475459575653, + "rewards/margins": 0.02407807670533657, + "rewards/rejected": -0.11015282571315765, + "step": 1120 + }, + { + "epoch": 0.3164334434990507, + "grad_norm": 0.5078125, + "learning_rate": 4.320336203667195e-06, + "log_odds_chosen": -0.011590385809540749, + "log_odds_ratio": -0.7730804681777954, + "logits/chosen": 0.12901046872138977, + "logits/rejected": 0.2612306475639343, + "logps/chosen": -0.8959698677062988, + "logps/rejected": -0.8929961323738098, + "loss": 1.2513, + "nll_loss": 1.190316915512085, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08959699422121048, + "rewards/margins": -0.00029737595468759537, + "rewards/rejected": -0.08929961174726486, + "step": 1125 + }, + { + "epoch": 0.31783981435904646, + "grad_norm": 0.39453125, + "learning_rate": 4.311900118874301e-06, + "log_odds_chosen": 0.7718832492828369, + "log_odds_ratio": -0.5875697731971741, + "logits/chosen": 0.20271439850330353, + "logits/rejected": -0.027479147538542747, + "logps/chosen": -0.9410564303398132, + "logps/rejected": -1.4128764867782593, + "loss": 1.2279, + "nll_loss": 1.2182931900024414, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09410564601421356, + "rewards/margins": 0.04718201607465744, + "rewards/rejected": -0.1412876546382904, + "step": 1130 + }, + { + "epoch": 0.31924618521904224, + "grad_norm": 0.5234375, + "learning_rate": 4.303420347842669e-06, + "log_odds_chosen": 0.06899069249629974, + "log_odds_ratio": -0.8071345090866089, + "logits/chosen": -0.03334174305200577, + "logits/rejected": -0.13531741499900818, + "logps/chosen": -1.0268479585647583, + "logps/rejected": -1.0569186210632324, + "loss": 1.3351, + "nll_loss": 1.388672113418579, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10268481075763702, + "rewards/margins": 0.0030070613138377666, + "rewards/rejected": -0.10569186508655548, + "step": 1135 + }, + { + "epoch": 0.32065255607903803, + "grad_norm": 0.392578125, + "learning_rate": 4.294897095025791e-06, + "log_odds_chosen": 0.4574614465236664, + "log_odds_ratio": -0.6911519765853882, + "logits/chosen": 0.26376354694366455, + "logits/rejected": -0.10399019718170166, + "logps/chosen": -1.0972447395324707, + "logps/rejected": -1.4647719860076904, + "loss": 1.3333, + "nll_loss": 1.2746883630752563, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10972447693347931, + "rewards/margins": 0.03675273805856705, + "rewards/rejected": -0.14647720754146576, + "step": 1140 + }, + { + "epoch": 0.3220589269390338, + "grad_norm": 1.2265625, + "learning_rate": 4.2863305659255315e-06, + "log_odds_chosen": 0.4631293714046478, + "log_odds_ratio": -0.5717926025390625, + "logits/chosen": 0.3028055727481842, + "logits/rejected": 0.16470107436180115, + "logps/chosen": -0.875199019908905, + "logps/rejected": -1.1849122047424316, + "loss": 1.3673, + "nll_loss": 1.169250726699829, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08751990646123886, + "rewards/margins": 0.03097131848335266, + "rewards/rejected": -0.11849121749401093, + "step": 1145 + }, + { + "epoch": 0.3234652977990296, + "grad_norm": 0.609375, + "learning_rate": 4.277720967087181e-06, + "log_odds_chosen": 0.2828753590583801, + "log_odds_ratio": -0.6513091921806335, + "logits/chosen": 0.23147746920585632, + "logits/rejected": 0.042463745921850204, + "logps/chosen": -0.9318034052848816, + "logps/rejected": -1.0875053405761719, + "loss": 1.2955, + "nll_loss": 1.1813573837280273, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.093180350959301, + "rewards/margins": 0.01557018794119358, + "rewards/rejected": -0.10875053703784943, + "step": 1150 + }, + { + "epoch": 0.3248716686590254, + "grad_norm": 0.4296875, + "learning_rate": 4.269068506094472e-06, + "log_odds_chosen": 0.3769608736038208, + "log_odds_ratio": -0.6138890981674194, + "logits/chosen": 0.09225358814001083, + "logits/rejected": 0.04900515824556351, + "logps/chosen": -0.7790621519088745, + "logps/rejected": -1.01277756690979, + "loss": 1.3202, + "nll_loss": 1.2686938047409058, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07790622860193253, + "rewards/margins": 0.023371532559394836, + "rewards/rejected": -0.10127775371074677, + "step": 1155 + }, + { + "epoch": 0.3262780395190212, + "grad_norm": 0.40625, + "learning_rate": 4.2603733915645776e-06, + "log_odds_chosen": 0.3151751160621643, + "log_odds_ratio": -0.7018331289291382, + "logits/chosen": 0.030745208263397217, + "logits/rejected": -0.0075850216671824455, + "logps/chosen": -0.8284457325935364, + "logps/rejected": -1.0537253618240356, + "loss": 1.3178, + "nll_loss": 1.3805687427520752, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0828445702791214, + "rewards/margins": 0.022527966648340225, + "rewards/rejected": -0.10537254810333252, + "step": 1160 + }, + { + "epoch": 0.32768441037901697, + "grad_norm": 1.2265625, + "learning_rate": 4.251635833143075e-06, + "log_odds_chosen": 0.6363018155097961, + "log_odds_ratio": -0.5621328949928284, + "logits/chosen": -0.004075920674949884, + "logits/rejected": 0.024677347391843796, + "logps/chosen": -0.843280017375946, + "logps/rejected": -1.1920336484909058, + "loss": 1.2733, + "nll_loss": 1.3524099588394165, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08432799577713013, + "rewards/margins": 0.034875381737947464, + "rewards/rejected": -0.11920337378978729, + "step": 1165 + }, + { + "epoch": 0.32909078123901275, + "grad_norm": 0.96875, + "learning_rate": 4.242856041498895e-06, + "log_odds_chosen": 0.2801567614078522, + "log_odds_ratio": -0.6821891665458679, + "logits/chosen": -0.19107994437217712, + "logits/rejected": 0.10342366993427277, + "logps/chosen": -0.9781293869018555, + "logps/rejected": -1.211730718612671, + "loss": 1.2636, + "nll_loss": 1.2653511762619019, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0978129506111145, + "rewards/margins": 0.023360123857855797, + "rewards/rejected": -0.12117306888103485, + "step": 1170 + }, + { + "epoch": 0.3304971520990085, + "grad_norm": 0.333984375, + "learning_rate": 4.2340342283192456e-06, + "log_odds_chosen": -0.11513074487447739, + "log_odds_ratio": -0.832987904548645, + "logits/chosen": 0.3309639096260071, + "logits/rejected": -0.025387341156601906, + "logps/chosen": -0.9378958940505981, + "logps/rejected": -0.9139649271965027, + "loss": 1.32, + "nll_loss": 1.1061030626296997, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09378959238529205, + "rewards/margins": -0.002393099246546626, + "rewards/rejected": -0.09139649569988251, + "step": 1175 + }, + { + "epoch": 0.33190352295900427, + "grad_norm": 0.609375, + "learning_rate": 4.2251706063045025e-06, + "log_odds_chosen": 0.20498593151569366, + "log_odds_ratio": -0.6471208333969116, + "logits/chosen": 0.05163681507110596, + "logits/rejected": 0.05878226086497307, + "logps/chosen": -0.9705168604850769, + "logps/rejected": -1.1223108768463135, + "loss": 1.2788, + "nll_loss": 1.2215819358825684, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09705167263746262, + "rewards/margins": 0.015179403126239777, + "rewards/rejected": -0.11223109066486359, + "step": 1180 + }, + { + "epoch": 0.33330989381900006, + "grad_norm": 1.15625, + "learning_rate": 4.216265389163083e-06, + "log_odds_chosen": 0.6214634776115417, + "log_odds_ratio": -0.5356272459030151, + "logits/chosen": -0.01187597680836916, + "logits/rejected": 0.1219060868024826, + "logps/chosen": -0.8745874166488647, + "logps/rejected": -1.199029564857483, + "loss": 1.3475, + "nll_loss": 1.2316919565200806, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08745874464511871, + "rewards/margins": 0.03244420513510704, + "rewards/rejected": -0.11990294605493546, + "step": 1185 + }, + { + "epoch": 0.33471626467899585, + "grad_norm": 1.4921875, + "learning_rate": 4.207318791606296e-06, + "log_odds_chosen": 0.26392292976379395, + "log_odds_ratio": -0.6821704506874084, + "logits/chosen": 0.11818800866603851, + "logits/rejected": -0.13393503427505493, + "logps/chosen": -0.9134756922721863, + "logps/rejected": -1.0980814695358276, + "loss": 1.318, + "nll_loss": 1.3457120656967163, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09134756773710251, + "rewards/margins": 0.01846056990325451, + "rewards/rejected": -0.10980813205242157, + "step": 1190 + }, + { + "epoch": 0.33612263553899163, + "grad_norm": 0.7578125, + "learning_rate": 4.198331029343156e-06, + "log_odds_chosen": 0.4352284371852875, + "log_odds_ratio": -0.6051638722419739, + "logits/chosen": 0.1391836702823639, + "logits/rejected": -0.012159859761595726, + "logps/chosen": -0.9116001129150391, + "logps/rejected": -1.1639275550842285, + "loss": 1.2891, + "nll_loss": 1.2817579507827759, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09116000682115555, + "rewards/margins": 0.02523273602128029, + "rewards/rejected": -0.11639275401830673, + "step": 1195 + }, + { + "epoch": 0.3375290063989874, + "grad_norm": 0.77734375, + "learning_rate": 4.189302319075195e-06, + "log_odds_chosen": 0.15520837903022766, + "log_odds_ratio": -0.7217892408370972, + "logits/chosen": 0.19287073612213135, + "logits/rejected": 0.0029908656142652035, + "logps/chosen": -0.9474450945854187, + "logps/rejected": -0.98936527967453, + "loss": 1.2673, + "nll_loss": 1.23770010471344, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09474451839923859, + "rewards/margins": 0.004192027263343334, + "rewards/rejected": -0.0989365354180336, + "step": 1200 + }, + { + "epoch": 0.3389353772589832, + "grad_norm": 1.1484375, + "learning_rate": 4.18023287849123e-06, + "log_odds_chosen": 0.15082181990146637, + "log_odds_ratio": -0.7107568979263306, + "logits/chosen": 0.04572378844022751, + "logits/rejected": -0.023113315925002098, + "logps/chosen": -0.9957239031791687, + "logps/rejected": -1.001149296760559, + "loss": 1.2459, + "nll_loss": 1.250544786453247, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09957239776849747, + "rewards/margins": 0.000542531895916909, + "rewards/rejected": -0.10011491924524307, + "step": 1205 + }, + { + "epoch": 0.340341748118979, + "grad_norm": 0.58203125, + "learning_rate": 4.1711229262621145e-06, + "log_odds_chosen": 0.30810657143592834, + "log_odds_ratio": -0.6075814366340637, + "logits/chosen": 0.3693988621234894, + "logits/rejected": -0.019700681790709496, + "logps/chosen": -0.8349758386611938, + "logps/rejected": -1.0323964357376099, + "loss": 1.2642, + "nll_loss": 1.0299533605575562, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08349757641553879, + "rewards/margins": 0.01974206045269966, + "rewards/rejected": -0.10323964059352875, + "step": 1210 + }, + { + "epoch": 0.3417481189789748, + "grad_norm": 1.1953125, + "learning_rate": 4.161972682035469e-06, + "log_odds_chosen": 0.0991004928946495, + "log_odds_ratio": -0.6919318437576294, + "logits/chosen": 0.1498071849346161, + "logits/rejected": -0.09738024324178696, + "logps/chosen": -0.8192486763000488, + "logps/rejected": -0.9177687764167786, + "loss": 1.3983, + "nll_loss": 1.2638217210769653, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08192487061023712, + "rewards/margins": 0.009852008894085884, + "rewards/rejected": -0.09177687764167786, + "step": 1215 + }, + { + "epoch": 0.3431544898389705, + "grad_norm": 0.4609375, + "learning_rate": 4.152782366430381e-06, + "log_odds_chosen": 0.5309596061706543, + "log_odds_ratio": -0.560706377029419, + "logits/chosen": 0.08695764094591141, + "logits/rejected": -0.2072925567626953, + "logps/chosen": -0.8290009498596191, + "logps/rejected": -1.2268956899642944, + "loss": 1.2977, + "nll_loss": 1.3526235818862915, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08290009945631027, + "rewards/margins": 0.03978949040174484, + "rewards/rejected": -0.12268956750631332, + "step": 1220 + }, + { + "epoch": 0.3445608606989663, + "grad_norm": 1.03125, + "learning_rate": 4.143552201032092e-06, + "log_odds_chosen": 0.3517477810382843, + "log_odds_ratio": -0.5913905501365662, + "logits/chosen": 0.05310209468007088, + "logits/rejected": 0.03654911741614342, + "logps/chosen": -0.8787948489189148, + "logps/rejected": -1.1121970415115356, + "loss": 1.2062, + "nll_loss": 0.9948512315750122, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0878794938325882, + "rewards/margins": 0.023340212181210518, + "rewards/rejected": -0.11121970415115356, + "step": 1225 + }, + { + "epoch": 0.3459672315589621, + "grad_norm": 0.49609375, + "learning_rate": 4.134282408386646e-06, + "log_odds_chosen": 0.08063089102506638, + "log_odds_ratio": -0.7300704717636108, + "logits/chosen": 0.2532581090927124, + "logits/rejected": 0.12257635593414307, + "logps/chosen": -0.9871991872787476, + "logps/rejected": -1.0392825603485107, + "loss": 1.2664, + "nll_loss": 1.1173219680786133, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09871991723775864, + "rewards/margins": 0.00520833395421505, + "rewards/rejected": -0.10392825305461884, + "step": 1230 + }, + { + "epoch": 0.34737360241895787, + "grad_norm": 0.455078125, + "learning_rate": 4.124973211995535e-06, + "log_odds_chosen": 0.13872423768043518, + "log_odds_ratio": -0.7976440191268921, + "logits/chosen": -0.0937075987458229, + "logits/rejected": 0.05718477815389633, + "logps/chosen": -0.9579688906669617, + "logps/rejected": -1.0377689599990845, + "loss": 1.3545, + "nll_loss": 1.3179527521133423, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09579687565565109, + "rewards/margins": 0.00798002164810896, + "rewards/rejected": -0.10377690941095352, + "step": 1235 + }, + { + "epoch": 0.34877997327895366, + "grad_norm": 0.9453125, + "learning_rate": 4.1156248363103e-06, + "log_odds_chosen": 0.17224308848381042, + "log_odds_ratio": -0.7132772207260132, + "logits/chosen": 0.18819832801818848, + "logits/rejected": -0.1392843872308731, + "logps/chosen": -0.8691417574882507, + "logps/rejected": -1.0359532833099365, + "loss": 1.3706, + "nll_loss": 1.304761528968811, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08691417425870895, + "rewards/margins": 0.016681160777807236, + "rewards/rejected": -0.10359533876180649, + "step": 1240 + }, + { + "epoch": 0.35018634413894945, + "grad_norm": 0.447265625, + "learning_rate": 4.1062375067271245e-06, + "log_odds_chosen": 0.30390891432762146, + "log_odds_ratio": -0.6522955894470215, + "logits/chosen": 0.051540445536375046, + "logits/rejected": 0.03162222355604172, + "logps/chosen": -1.0302698612213135, + "logps/rejected": -1.25276517868042, + "loss": 1.3463, + "nll_loss": 1.4013944864273071, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10302698612213135, + "rewards/margins": 0.02224954031407833, + "rewards/rejected": -0.12527652084827423, + "step": 1245 + }, + { + "epoch": 0.35159271499894523, + "grad_norm": 0.341796875, + "learning_rate": 4.096811449581399e-06, + "log_odds_chosen": 0.3173461854457855, + "log_odds_ratio": -0.6898201107978821, + "logits/chosen": 0.24673572182655334, + "logits/rejected": 0.1782643347978592, + "logps/chosen": -0.9055770039558411, + "logps/rejected": -1.2110660076141357, + "loss": 1.2689, + "nll_loss": 1.0442687273025513, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09055770188570023, + "rewards/margins": 0.030548905953764915, + "rewards/rejected": -0.1211066022515297, + "step": 1250 + }, + { + "epoch": 0.352999085858941, + "grad_norm": 0.61328125, + "learning_rate": 4.087346892142265e-06, + "log_odds_chosen": 0.05978800728917122, + "log_odds_ratio": -0.7154570817947388, + "logits/chosen": 0.13341276347637177, + "logits/rejected": -0.057683832943439484, + "logps/chosen": -1.0031936168670654, + "logps/rejected": -1.052685260772705, + "loss": 1.2844, + "nll_loss": 1.4338276386260986, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10031934827566147, + "rewards/margins": 0.004949171096086502, + "rewards/rejected": -0.10526851564645767, + "step": 1255 + }, + { + "epoch": 0.3544054567189368, + "grad_norm": 0.80078125, + "learning_rate": 4.077844062607133e-06, + "log_odds_chosen": 0.08474165201187134, + "log_odds_ratio": -0.6954008340835571, + "logits/chosen": 0.15004608035087585, + "logits/rejected": 0.07776209712028503, + "logps/chosen": -1.1367336511611938, + "logps/rejected": -1.1752803325653076, + "loss": 1.2721, + "nll_loss": 1.165010690689087, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11367335170507431, + "rewards/margins": 0.003854684066027403, + "rewards/rejected": -0.1175280213356018, + "step": 1260 + }, + { + "epoch": 0.35581182757893254, + "grad_norm": 0.69140625, + "learning_rate": 4.068303190096182e-06, + "log_odds_chosen": 0.5245984792709351, + "log_odds_ratio": -0.5845207571983337, + "logits/chosen": 0.1559588462114334, + "logits/rejected": -0.036134567111730576, + "logps/chosen": -0.7722035646438599, + "logps/rejected": -1.1271470785140991, + "loss": 1.3113, + "nll_loss": 1.1578443050384521, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07722035050392151, + "rewards/margins": 0.03549434617161751, + "rewards/rejected": -0.11271469295024872, + "step": 1265 + }, + { + "epoch": 0.3572181984389283, + "grad_norm": 1.9453125, + "learning_rate": 4.058724504646834e-06, + "log_odds_chosen": 0.2877119183540344, + "log_odds_ratio": -0.6282137036323547, + "logits/chosen": 0.3439601957798004, + "logits/rejected": -0.022573407739400864, + "logps/chosen": -0.9233312606811523, + "logps/rejected": -1.1261590719223022, + "loss": 1.3093, + "nll_loss": 1.2163238525390625, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09233313053846359, + "rewards/margins": 0.020282771438360214, + "rewards/rejected": -0.11261589825153351, + "step": 1270 + }, + { + "epoch": 0.3586245692989241, + "grad_norm": 1.3125, + "learning_rate": 4.049108237208212e-06, + "log_odds_chosen": 0.1699952781200409, + "log_odds_ratio": -0.6839112043380737, + "logits/chosen": 0.03113476000726223, + "logits/rejected": 0.36601823568344116, + "logps/chosen": -0.8622844815254211, + "logps/rejected": -0.9559763669967651, + "loss": 1.2663, + "nll_loss": 1.125409722328186, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08622844517230988, + "rewards/margins": 0.00936918519437313, + "rewards/rejected": -0.09559763967990875, + "step": 1275 + }, + { + "epoch": 0.3600309401589199, + "grad_norm": 0.5546875, + "learning_rate": 4.039454619635563e-06, + "log_odds_chosen": 0.10882600396871567, + "log_odds_ratio": -0.7116864323616028, + "logits/chosen": -0.08300259709358215, + "logits/rejected": 0.009808266535401344, + "logps/chosen": -0.9910035133361816, + "logps/rejected": -1.0440236330032349, + "loss": 1.269, + "nll_loss": 1.1854488849639893, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09910035878419876, + "rewards/margins": 0.0053020054474473, + "rewards/rejected": -0.10440234839916229, + "step": 1280 + }, + { + "epoch": 0.3614373110189157, + "grad_norm": 0.9296875, + "learning_rate": 4.02976388468468e-06, + "log_odds_chosen": 0.3029418885707855, + "log_odds_ratio": -0.6561595797538757, + "logits/chosen": 0.23958845436573029, + "logits/rejected": -0.4308515191078186, + "logps/chosen": -0.8872898817062378, + "logps/rejected": -1.0813348293304443, + "loss": 1.2944, + "nll_loss": 1.2912275791168213, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08872898668050766, + "rewards/margins": 0.019404493272304535, + "rewards/rejected": -0.1081334799528122, + "step": 1285 + }, + { + "epoch": 0.3628436818789115, + "grad_norm": 0.72265625, + "learning_rate": 4.020036266006276e-06, + "log_odds_chosen": 0.7652324438095093, + "log_odds_ratio": -0.46309083700180054, + "logits/chosen": 0.2030514031648636, + "logits/rejected": 0.08312040567398071, + "logps/chosen": -0.8167656660079956, + "logps/rejected": -1.2904177904129028, + "loss": 1.321, + "nll_loss": 1.096789836883545, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08167656511068344, + "rewards/margins": 0.047365207225084305, + "rewards/rejected": -0.12904179096221924, + "step": 1290 + }, + { + "epoch": 0.36425005273890726, + "grad_norm": 0.296875, + "learning_rate": 4.0102719981403625e-06, + "log_odds_chosen": 0.5203356146812439, + "log_odds_ratio": -0.6198484301567078, + "logits/chosen": 0.061830371618270874, + "logits/rejected": 0.011904525570571423, + "logps/chosen": -0.842174232006073, + "logps/rejected": -1.1951404809951782, + "loss": 1.2896, + "nll_loss": 1.170758605003357, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08421741425991058, + "rewards/margins": 0.035296615213155746, + "rewards/rejected": -0.11951403319835663, + "step": 1295 + }, + { + "epoch": 0.36565642359890305, + "grad_norm": 0.54296875, + "learning_rate": 4.000471316510588e-06, + "log_odds_chosen": 0.21910062432289124, + "log_odds_ratio": -0.6447972059249878, + "logits/chosen": 0.1635589897632599, + "logits/rejected": -0.052931904792785645, + "logps/chosen": -0.8704059720039368, + "logps/rejected": -1.0420140027999878, + "loss": 1.4201, + "nll_loss": 1.3763192892074585, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08704060316085815, + "rewards/margins": 0.017160795629024506, + "rewards/rejected": -0.10420139878988266, + "step": 1300 + }, + { + "epoch": 0.36706279445889883, + "grad_norm": 0.96875, + "learning_rate": 3.9906344574185625e-06, + "log_odds_chosen": -0.06714977324008942, + "log_odds_ratio": -0.8473297357559204, + "logits/chosen": 0.13796642422676086, + "logits/rejected": 0.08382640033960342, + "logps/chosen": -1.0411431789398193, + "logps/rejected": -1.025268793106079, + "loss": 1.3294, + "nll_loss": 1.2165088653564453, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10411433130502701, + "rewards/margins": -0.001587429316714406, + "rewards/rejected": -0.10252688825130463, + "step": 1305 + }, + { + "epoch": 0.36846916531889456, + "grad_norm": 0.36328125, + "learning_rate": 3.9807616580381645e-06, + "log_odds_chosen": 0.11012011766433716, + "log_odds_ratio": -0.7090429067611694, + "logits/chosen": 0.2194376289844513, + "logits/rejected": 0.13040907680988312, + "logps/chosen": -0.9564974904060364, + "logps/rejected": -1.000109076499939, + "loss": 1.294, + "nll_loss": 1.029192328453064, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09564974904060364, + "rewards/margins": 0.0043611666187644005, + "rewards/rejected": -0.10001091659069061, + "step": 1310 + }, + { + "epoch": 0.36987553617889035, + "grad_norm": 0.55859375, + "learning_rate": 3.970853156409816e-06, + "log_odds_chosen": 0.37843549251556396, + "log_odds_ratio": -0.693618655204773, + "logits/chosen": 0.22795596718788147, + "logits/rejected": -0.10526075214147568, + "logps/chosen": -0.8523596525192261, + "logps/rejected": -1.2103275060653687, + "loss": 1.3429, + "nll_loss": 1.0443923473358154, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.08523597568273544, + "rewards/margins": 0.035796768963336945, + "rewards/rejected": -0.12103275209665298, + "step": 1315 + }, + { + "epoch": 0.37128190703888614, + "grad_norm": 0.48046875, + "learning_rate": 3.960909191434746e-06, + "log_odds_chosen": 0.0975802093744278, + "log_odds_ratio": -0.7350735664367676, + "logits/chosen": 0.09811149537563324, + "logits/rejected": -0.0825861319899559, + "logps/chosen": -0.885798454284668, + "logps/rejected": -0.9871622920036316, + "loss": 1.3356, + "nll_loss": 1.3084779977798462, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08857984840869904, + "rewards/margins": 0.010136393830180168, + "rewards/rejected": -0.09871623665094376, + "step": 1320 + }, + { + "epoch": 0.3726882778988819, + "grad_norm": 0.5078125, + "learning_rate": 3.9509300028692345e-06, + "log_odds_chosen": 0.07902495563030243, + "log_odds_ratio": -0.7018908262252808, + "logits/chosen": 0.3462804853916168, + "logits/rejected": -0.16924339532852173, + "logps/chosen": -1.059748888015747, + "logps/rejected": -1.1173927783966064, + "loss": 1.4139, + "nll_loss": 1.3403122425079346, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10597489029169083, + "rewards/margins": 0.00576439592987299, + "rewards/rejected": -0.11173927783966064, + "step": 1325 + }, + { + "epoch": 0.3740946487588777, + "grad_norm": 0.96484375, + "learning_rate": 3.940915831318824e-06, + "log_odds_chosen": 0.6405239105224609, + "log_odds_ratio": -0.49955257773399353, + "logits/chosen": 0.21703024208545685, + "logits/rejected": -0.14214935898780823, + "logps/chosen": -0.8258606791496277, + "logps/rejected": -1.2524826526641846, + "loss": 1.3476, + "nll_loss": 1.2318499088287354, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08258606493473053, + "rewards/margins": 0.042662184685468674, + "rewards/rejected": -0.1252482533454895, + "step": 1330 + }, + { + "epoch": 0.3755010196188735, + "grad_norm": 0.76171875, + "learning_rate": 3.930866918232525e-06, + "log_odds_chosen": 0.3761736750602722, + "log_odds_ratio": -0.6042557954788208, + "logits/chosen": -0.05178029090166092, + "logits/rejected": -0.06902565062046051, + "logps/chosen": -0.9542252421379089, + "logps/rejected": -1.2151601314544678, + "loss": 1.2994, + "nll_loss": 1.4641225337982178, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09542252868413925, + "rewards/margins": 0.0260935015976429, + "rewards/rejected": -0.12151602655649185, + "step": 1335 + }, + { + "epoch": 0.3769073904788693, + "grad_norm": 0.4921875, + "learning_rate": 3.9207835058969905e-06, + "log_odds_chosen": -0.12533536553382874, + "log_odds_ratio": -0.7958296537399292, + "logits/chosen": 0.25583916902542114, + "logits/rejected": 0.17151054739952087, + "logps/chosen": -0.9483383893966675, + "logps/rejected": -0.8815383911132812, + "loss": 1.2106, + "nll_loss": 1.078672170639038, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0948338508605957, + "rewards/margins": -0.006680003367364407, + "rewards/rejected": -0.08815383911132812, + "step": 1340 + }, + { + "epoch": 0.3783137613388651, + "grad_norm": 0.8828125, + "learning_rate": 3.910665837430676e-06, + "log_odds_chosen": -0.0325247123837471, + "log_odds_ratio": -0.7807704210281372, + "logits/chosen": 0.04034139961004257, + "logits/rejected": -0.07567404210567474, + "logps/chosen": -0.9568163752555847, + "logps/rejected": -0.9523313641548157, + "loss": 1.3861, + "nll_loss": 1.3908307552337646, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09568165242671967, + "rewards/margins": -0.00044852012069895864, + "rewards/rejected": -0.09523313492536545, + "step": 1345 + }, + { + "epoch": 0.37972013219886086, + "grad_norm": 0.7265625, + "learning_rate": 3.900514156777977e-06, + "log_odds_chosen": 0.5176669359207153, + "log_odds_ratio": -0.7029728293418884, + "logits/chosen": -0.015370416454970837, + "logits/rejected": -0.1847151219844818, + "logps/chosen": -0.831338107585907, + "logps/rejected": -1.2131564617156982, + "loss": 1.3746, + "nll_loss": 1.280045747756958, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08313381671905518, + "rewards/margins": 0.03818183392286301, + "rewards/rejected": -0.12131565809249878, + "step": 1350 + }, + { + "epoch": 0.38112650305885665, + "grad_norm": 0.56640625, + "learning_rate": 3.890328708703349e-06, + "log_odds_chosen": 0.5413855314254761, + "log_odds_ratio": -0.5090984106063843, + "logits/chosen": 0.14026743173599243, + "logits/rejected": 0.08661554008722305, + "logps/chosen": -0.7370747327804565, + "logps/rejected": -1.0743495225906372, + "loss": 1.376, + "nll_loss": 1.152942419052124, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0737074688076973, + "rewards/margins": 0.0337274894118309, + "rewards/rejected": -0.1074349656701088, + "step": 1355 + }, + { + "epoch": 0.3825328739188524, + "grad_norm": 0.5859375, + "learning_rate": 3.880109738785404e-06, + "log_odds_chosen": 0.06151856109499931, + "log_odds_ratio": -0.7875824570655823, + "logits/chosen": 0.1456756442785263, + "logits/rejected": 0.09310576319694519, + "logps/chosen": -1.1162899732589722, + "logps/rejected": -1.1293188333511353, + "loss": 1.3632, + "nll_loss": 1.2380468845367432, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.11162900924682617, + "rewards/margins": 0.0013028827961534262, + "rewards/rejected": -0.11293188482522964, + "step": 1360 + }, + { + "epoch": 0.38393924477884817, + "grad_norm": 0.7734375, + "learning_rate": 3.86985749341099e-06, + "log_odds_chosen": 0.38305407762527466, + "log_odds_ratio": -0.6281946897506714, + "logits/chosen": 0.039482396095991135, + "logits/rejected": -0.06709714233875275, + "logps/chosen": -0.8778185844421387, + "logps/rejected": -1.067224144935608, + "loss": 1.3404, + "nll_loss": 1.395684838294983, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0877818614244461, + "rewards/margins": 0.018940549343824387, + "rewards/rejected": -0.10672241449356079, + "step": 1365 + }, + { + "epoch": 0.38534561563884395, + "grad_norm": 0.94921875, + "learning_rate": 3.859572219769251e-06, + "log_odds_chosen": 0.3674304187297821, + "log_odds_ratio": -0.563770592212677, + "logits/chosen": 0.11389932781457901, + "logits/rejected": 0.07518327236175537, + "logps/chosen": -0.8340722322463989, + "logps/rejected": -1.0481908321380615, + "loss": 1.3566, + "nll_loss": 1.2202757596969604, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08340722322463989, + "rewards/margins": 0.021411865949630737, + "rewards/rejected": -0.10481909662485123, + "step": 1370 + }, + { + "epoch": 0.38675198649883974, + "grad_norm": 0.5546875, + "learning_rate": 3.849254165845665e-06, + "log_odds_chosen": 0.5124444365501404, + "log_odds_ratio": -0.5738476514816284, + "logits/chosen": -0.12536796927452087, + "logits/rejected": -0.03598599508404732, + "logps/chosen": -0.7154397368431091, + "logps/rejected": -0.9961905479431152, + "loss": 1.3949, + "nll_loss": 1.3729028701782227, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07154396921396255, + "rewards/margins": 0.028075072914361954, + "rewards/rejected": -0.09961903840303421, + "step": 1375 + }, + { + "epoch": 0.3881583573588355, + "grad_norm": 0.4375, + "learning_rate": 3.83890358041607e-06, + "log_odds_chosen": 0.19218651950359344, + "log_odds_ratio": -0.6770265102386475, + "logits/chosen": 0.1676643192768097, + "logits/rejected": 0.09929686784744263, + "logps/chosen": -0.8764567375183105, + "logps/rejected": -1.0005100965499878, + "loss": 1.3095, + "nll_loss": 1.1619200706481934, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08764568716287613, + "rewards/margins": 0.012405339628458023, + "rewards/rejected": -0.10005102306604385, + "step": 1380 + }, + { + "epoch": 0.3895647282188313, + "grad_norm": 0.57421875, + "learning_rate": 3.82852071304066e-06, + "log_odds_chosen": 0.04810095950961113, + "log_odds_ratio": -0.804132342338562, + "logits/chosen": 0.25406962633132935, + "logits/rejected": 0.07150840759277344, + "logps/chosen": -0.9535115957260132, + "logps/rejected": -1.0202562808990479, + "loss": 1.3359, + "nll_loss": 1.2485761642456055, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09535115957260132, + "rewards/margins": 0.006674474570900202, + "rewards/rejected": -0.10202564299106598, + "step": 1385 + }, + { + "epoch": 0.3909710990788271, + "grad_norm": 0.29296875, + "learning_rate": 3.818105814057971e-06, + "log_odds_chosen": 0.0038131296169012785, + "log_odds_ratio": -0.7812002301216125, + "logits/chosen": 0.16467972099781036, + "logits/rejected": 0.19734536111354828, + "logps/chosen": -1.058872103691101, + "logps/rejected": -1.0703850984573364, + "loss": 1.3523, + "nll_loss": 1.3367329835891724, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10588721185922623, + "rewards/margins": 0.0011512942146509886, + "rewards/rejected": -0.10703851282596588, + "step": 1390 + }, + { + "epoch": 0.3923774699388229, + "grad_norm": 0.61328125, + "learning_rate": 3.8076591345788434e-06, + "log_odds_chosen": 0.42102327942848206, + "log_odds_ratio": -0.5969361066818237, + "logits/chosen": 0.015062945894896984, + "logits/rejected": -0.07421614974737167, + "logps/chosen": -0.8461735844612122, + "logps/rejected": -1.1441727876663208, + "loss": 1.3275, + "nll_loss": 1.239793062210083, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08461736142635345, + "rewards/margins": 0.0297999270260334, + "rewards/rejected": -0.11441727727651596, + "step": 1395 + }, + { + "epoch": 0.3937838407988187, + "grad_norm": 0.69921875, + "learning_rate": 3.7971809264803705e-06, + "log_odds_chosen": 0.3402322828769684, + "log_odds_ratio": -0.7314016819000244, + "logits/chosen": 0.1510065346956253, + "logits/rejected": 0.2304486781358719, + "logps/chosen": -0.8683599233627319, + "logps/rejected": -1.0512754917144775, + "loss": 1.1824, + "nll_loss": 1.1514182090759277, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.08683599531650543, + "rewards/margins": 0.018291553482413292, + "rewards/rejected": -0.10512755066156387, + "step": 1400 + }, + { + "epoch": 0.3951902116588144, + "grad_norm": 0.55078125, + "learning_rate": 3.786671442399823e-06, + "log_odds_chosen": 0.2950545847415924, + "log_odds_ratio": -0.6508955955505371, + "logits/chosen": 0.07452909648418427, + "logits/rejected": 0.03641175478696823, + "logps/chosen": -0.9904881715774536, + "logps/rejected": -1.1894201040267944, + "loss": 1.3093, + "nll_loss": 1.2014591693878174, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09904881566762924, + "rewards/margins": 0.01989319920539856, + "rewards/rejected": -0.1189420074224472, + "step": 1405 + }, + { + "epoch": 0.3965965825188102, + "grad_norm": 0.71875, + "learning_rate": 3.776130935728558e-06, + "log_odds_chosen": 0.957088828086853, + "log_odds_ratio": -0.46606189012527466, + "logits/chosen": 0.16692259907722473, + "logits/rejected": 0.09750144928693771, + "logps/chosen": -0.6723443865776062, + "logps/rejected": -1.2955124378204346, + "loss": 1.2867, + "nll_loss": 1.1986668109893799, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06723444163799286, + "rewards/margins": 0.06231679767370224, + "rewards/rejected": -0.1295512467622757, + "step": 1410 + }, + { + "epoch": 0.398002953378806, + "grad_norm": 0.41796875, + "learning_rate": 3.7655596606059095e-06, + "log_odds_chosen": 0.018707597628235817, + "log_odds_ratio": -0.7543188333511353, + "logits/chosen": 0.2600110173225403, + "logits/rejected": -0.2204551249742508, + "logps/chosen": -0.9564399719238281, + "logps/rejected": -0.9729903340339661, + "loss": 1.3391, + "nll_loss": 1.373101830482483, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09564399719238281, + "rewards/margins": 0.0016550387954339385, + "rewards/rejected": -0.09729902446269989, + "step": 1415 + }, + { + "epoch": 0.39940932423880177, + "grad_norm": 0.494140625, + "learning_rate": 3.754957871913064e-06, + "log_odds_chosen": 0.3720802366733551, + "log_odds_ratio": -0.6222091913223267, + "logits/chosen": 0.11969141662120819, + "logits/rejected": 0.029351050034165382, + "logps/chosen": -1.0278053283691406, + "logps/rejected": -1.3334497213363647, + "loss": 1.2639, + "nll_loss": 1.2064927816390991, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1027805432677269, + "rewards/margins": 0.030564438551664352, + "rewards/rejected": -0.13334497809410095, + "step": 1420 + }, + { + "epoch": 0.40081569509879755, + "grad_norm": 0.8203125, + "learning_rate": 3.7443258252669084e-06, + "log_odds_chosen": 0.3200578987598419, + "log_odds_ratio": -0.6280742883682251, + "logits/chosen": 0.06024733930826187, + "logits/rejected": 0.02671833336353302, + "logps/chosen": -0.8416322469711304, + "logps/rejected": -1.0510714054107666, + "loss": 1.3019, + "nll_loss": 1.1888415813446045, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08416323363780975, + "rewards/margins": 0.020943904295563698, + "rewards/rejected": -0.1051071435213089, + "step": 1425 + }, + { + "epoch": 0.40222206595879334, + "grad_norm": 0.84765625, + "learning_rate": 3.733663777013875e-06, + "log_odds_chosen": 0.4754953980445862, + "log_odds_ratio": -0.5904639363288879, + "logits/chosen": 0.08866497129201889, + "logits/rejected": 0.1178668737411499, + "logps/chosen": -0.8428691029548645, + "logps/rejected": -1.0717285871505737, + "loss": 1.3726, + "nll_loss": 1.2191121578216553, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08428691327571869, + "rewards/margins": 0.022885948419570923, + "rewards/rejected": -0.10717286169528961, + "step": 1430 + }, + { + "epoch": 0.4036284368187891, + "grad_norm": 0.48828125, + "learning_rate": 3.7229719842237545e-06, + "log_odds_chosen": 0.5185042023658752, + "log_odds_ratio": -0.541755199432373, + "logits/chosen": 0.03424149751663208, + "logits/rejected": -0.25478774309158325, + "logps/chosen": -0.9219148755073547, + "logps/rejected": -1.2493301630020142, + "loss": 1.2789, + "nll_loss": 1.1576497554779053, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09219150245189667, + "rewards/margins": 0.032741524279117584, + "rewards/rejected": -0.12493302673101425, + "step": 1435 + }, + { + "epoch": 0.4050348076787849, + "grad_norm": 0.71484375, + "learning_rate": 3.712250704683501e-06, + "log_odds_chosen": 0.6584421992301941, + "log_odds_ratio": -0.557796835899353, + "logits/chosen": 0.2356627881526947, + "logits/rejected": 0.009463606402277946, + "logps/chosen": -0.8283861875534058, + "logps/rejected": -1.178853154182434, + "loss": 1.1522, + "nll_loss": 1.0821020603179932, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08283861726522446, + "rewards/margins": 0.035046692937612534, + "rewards/rejected": -0.11788531392812729, + "step": 1440 + }, + { + "epoch": 0.4064411785387807, + "grad_norm": 0.68359375, + "learning_rate": 3.701500196891015e-06, + "log_odds_chosen": 0.2571748197078705, + "log_odds_ratio": -0.6834226846694946, + "logits/chosen": -0.009924083948135376, + "logits/rejected": -0.04143872857093811, + "logps/chosen": -0.9206323623657227, + "logps/rejected": -1.069331407546997, + "loss": 1.331, + "nll_loss": 1.2972078323364258, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09206323325634003, + "rewards/margins": 0.014869892969727516, + "rewards/rejected": -0.10693313926458359, + "step": 1445 + }, + { + "epoch": 0.40784754939877643, + "grad_norm": 0.671875, + "learning_rate": 3.690720720048913e-06, + "log_odds_chosen": 0.25981101393699646, + "log_odds_ratio": -0.6484376192092896, + "logits/chosen": 0.28591400384902954, + "logits/rejected": -0.010500210337340832, + "logps/chosen": -0.9713215827941895, + "logps/rejected": -1.1231167316436768, + "loss": 1.3804, + "nll_loss": 1.1480700969696045, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09713216125965118, + "rewards/margins": 0.015179498121142387, + "rewards/rejected": -0.11231166124343872, + "step": 1450 + }, + { + "epoch": 0.4092539202587722, + "grad_norm": 1.078125, + "learning_rate": 3.6799125340582742e-06, + "log_odds_chosen": 0.1738591492176056, + "log_odds_ratio": -0.7462955713272095, + "logits/chosen": 0.19391432404518127, + "logits/rejected": -0.003649419639259577, + "logps/chosen": -0.9834516644477844, + "logps/rejected": -1.057770013809204, + "loss": 1.3333, + "nll_loss": 1.0925512313842773, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09834517538547516, + "rewards/margins": 0.007431824691593647, + "rewards/rejected": -0.10577700287103653, + "step": 1455 + }, + { + "epoch": 0.410660291118768, + "grad_norm": 0.69921875, + "learning_rate": 3.6690758995123788e-06, + "log_odds_chosen": 0.5114420652389526, + "log_odds_ratio": -0.5689659714698792, + "logits/chosen": 0.2528618276119232, + "logits/rejected": 0.020400792360305786, + "logps/chosen": -0.7881637215614319, + "logps/rejected": -1.0973840951919556, + "loss": 1.3211, + "nll_loss": 1.1730833053588867, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07881636917591095, + "rewards/margins": 0.030922044068574905, + "rewards/rejected": -0.10973842442035675, + "step": 1460 + }, + { + "epoch": 0.4120666619787638, + "grad_norm": 0.470703125, + "learning_rate": 3.658211077690421e-06, + "log_odds_chosen": 0.4712817072868347, + "log_odds_ratio": -0.6004756689071655, + "logits/chosen": 0.09132170677185059, + "logits/rejected": 0.2290278673171997, + "logps/chosen": -0.8380820155143738, + "logps/rejected": -1.1644837856292725, + "loss": 1.2326, + "nll_loss": 1.0265964269638062, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08380821347236633, + "rewards/margins": 0.03264017030596733, + "rewards/rejected": -0.11644838005304337, + "step": 1465 + }, + { + "epoch": 0.4134730328387596, + "grad_norm": 1.2734375, + "learning_rate": 3.6473183305512118e-06, + "log_odds_chosen": 0.6088531613349915, + "log_odds_ratio": -0.5412156581878662, + "logits/chosen": 0.06665558367967606, + "logits/rejected": 0.12135007232427597, + "logps/chosen": -0.8178361654281616, + "logps/rejected": -1.1795545816421509, + "loss": 1.2037, + "nll_loss": 0.9547508955001831, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08178362995386124, + "rewards/margins": 0.036171846091747284, + "rewards/rejected": -0.11795546859502792, + "step": 1470 + }, + { + "epoch": 0.41487940369875537, + "grad_norm": 0.474609375, + "learning_rate": 3.636397920726861e-06, + "log_odds_chosen": 0.03626465052366257, + "log_odds_ratio": -0.7632964849472046, + "logits/chosen": -0.06508911401033401, + "logits/rejected": 0.015456904657185078, + "logps/chosen": -0.8522012829780579, + "logps/rejected": -0.8960638046264648, + "loss": 1.3382, + "nll_loss": 1.295910120010376, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08522014319896698, + "rewards/margins": 0.004386237356811762, + "rewards/rejected": -0.089606374502182, + "step": 1475 + }, + { + "epoch": 0.41628577455875115, + "grad_norm": 0.4375, + "learning_rate": 3.6254501115164477e-06, + "log_odds_chosen": 0.07423652708530426, + "log_odds_ratio": -0.7656761407852173, + "logits/chosen": -0.07237622886896133, + "logits/rejected": 0.10565494000911713, + "logps/chosen": -1.0656932592391968, + "logps/rejected": -1.0861456394195557, + "loss": 1.332, + "nll_loss": 1.252655267715454, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1065693125128746, + "rewards/margins": 0.0020452491007745266, + "rewards/rejected": -0.10861456394195557, + "step": 1480 + }, + { + "epoch": 0.41769214541874694, + "grad_norm": 0.44140625, + "learning_rate": 3.6144751668796672e-06, + "log_odds_chosen": 0.17315946519374847, + "log_odds_ratio": -0.6867714524269104, + "logits/chosen": 0.13648071885108948, + "logits/rejected": 0.09958993643522263, + "logps/chosen": -0.8613283038139343, + "logps/rejected": -0.9445828199386597, + "loss": 1.2987, + "nll_loss": 1.1979812383651733, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08613282442092896, + "rewards/margins": 0.008325454778969288, + "rewards/rejected": -0.09445827454328537, + "step": 1485 + }, + { + "epoch": 0.4190985162787427, + "grad_norm": 0.52734375, + "learning_rate": 3.603473351430474e-06, + "log_odds_chosen": 0.17239415645599365, + "log_odds_ratio": -0.7343893647193909, + "logits/chosen": 0.09099440276622772, + "logits/rejected": 0.07965691387653351, + "logps/chosen": -1.1169488430023193, + "logps/rejected": -1.2787196636199951, + "loss": 1.3347, + "nll_loss": 1.287948489189148, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11169488728046417, + "rewards/margins": 0.016177091747522354, + "rewards/rejected": -0.12787196040153503, + "step": 1490 + }, + { + "epoch": 0.42050488713873846, + "grad_norm": 0.61328125, + "learning_rate": 3.592444930430693e-06, + "log_odds_chosen": 0.5300126075744629, + "log_odds_ratio": -0.5818522572517395, + "logits/chosen": -0.03600483015179634, + "logits/rejected": 0.10826855897903442, + "logps/chosen": -0.8172906041145325, + "logps/rejected": -1.1227980852127075, + "loss": 1.313, + "nll_loss": 1.1631724834442139, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08172906935214996, + "rewards/margins": 0.030550751835107803, + "rewards/rejected": -0.11227981001138687, + "step": 1495 + }, + { + "epoch": 0.42191125799873425, + "grad_norm": 0.33984375, + "learning_rate": 3.581390169783633e-06, + "log_odds_chosen": 0.5500032305717468, + "log_odds_ratio": -0.531952977180481, + "logits/chosen": 0.2329874336719513, + "logits/rejected": -0.09679488837718964, + "logps/chosen": -0.7500187158584595, + "logps/rejected": -1.0893311500549316, + "loss": 1.3202, + "nll_loss": 1.3443794250488281, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07500188052654266, + "rewards/margins": 0.03393123671412468, + "rewards/rejected": -0.10893311351537704, + "step": 1500 + }, + { + "epoch": 0.42331762885873003, + "grad_norm": 0.859375, + "learning_rate": 3.570309336027667e-06, + "log_odds_chosen": 0.20320725440979004, + "log_odds_ratio": -0.7183516621589661, + "logits/chosen": -0.14446063339710236, + "logits/rejected": 0.05480308085680008, + "logps/chosen": -0.967627227306366, + "logps/rejected": -1.0267921686172485, + "loss": 1.2588, + "nll_loss": 1.4239879846572876, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09676271677017212, + "rewards/margins": 0.005916501395404339, + "rewards/rejected": -0.10267921537160873, + "step": 1505 + }, + { + "epoch": 0.4247239997187258, + "grad_norm": 0.5625, + "learning_rate": 3.559202696329812e-06, + "log_odds_chosen": 0.3492463529109955, + "log_odds_ratio": -0.6220179796218872, + "logits/chosen": 0.3201816976070404, + "logits/rejected": 0.06840449571609497, + "logps/chosen": -0.9060875177383423, + "logps/rejected": -1.1536937952041626, + "loss": 1.3733, + "nll_loss": 1.1420310735702515, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09060876071453094, + "rewards/margins": 0.024760618805885315, + "rewards/rejected": -0.11536937952041626, + "step": 1510 + }, + { + "epoch": 0.4261303705787216, + "grad_norm": 0.494140625, + "learning_rate": 3.548070518479285e-06, + "log_odds_chosen": 0.14108441770076752, + "log_odds_ratio": -0.698050856590271, + "logits/chosen": 0.09484975039958954, + "logits/rejected": -0.1475522667169571, + "logps/chosen": -0.8964487910270691, + "logps/rejected": -0.9797111749649048, + "loss": 1.4352, + "nll_loss": 1.370657205581665, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08964487910270691, + "rewards/margins": 0.008326229639351368, + "rewards/rejected": -0.097971111536026, + "step": 1515 + }, + { + "epoch": 0.4275367414387174, + "grad_norm": 1.2421875, + "learning_rate": 3.5369130708810457e-06, + "log_odds_chosen": 0.20812788605690002, + "log_odds_ratio": -0.7099069356918335, + "logits/chosen": 0.06524882465600967, + "logits/rejected": 0.09684231877326965, + "logps/chosen": -1.0159488916397095, + "logps/rejected": -1.0735489130020142, + "loss": 1.3004, + "nll_loss": 1.3176088333129883, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10159488767385483, + "rewards/margins": 0.005759999621659517, + "rewards/rejected": -0.10735489428043365, + "step": 1520 + }, + { + "epoch": 0.4289431122987132, + "grad_norm": 1.15625, + "learning_rate": 3.525730622549327e-06, + "log_odds_chosen": 0.3095996081829071, + "log_odds_ratio": -0.656899631023407, + "logits/chosen": 0.13213399052619934, + "logits/rejected": 0.025996968150138855, + "logps/chosen": -0.9443826675415039, + "logps/rejected": -1.1343662738800049, + "loss": 1.3466, + "nll_loss": 1.3322721719741821, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09443826973438263, + "rewards/margins": 0.01899835839867592, + "rewards/rejected": -0.11343662440776825, + "step": 1525 + }, + { + "epoch": 0.43034948315870897, + "grad_norm": 0.76171875, + "learning_rate": 3.5145234431011455e-06, + "log_odds_chosen": 0.1597270667552948, + "log_odds_ratio": -0.6989853978157043, + "logits/chosen": 0.27805274724960327, + "logits/rejected": 0.14623470604419708, + "logps/chosen": -0.9531451463699341, + "logps/rejected": -1.0427472591400146, + "loss": 1.3183, + "nll_loss": 1.1004104614257812, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09531451761722565, + "rewards/margins": 0.00896020419895649, + "rewards/rejected": -0.10427472740411758, + "step": 1530 + }, + { + "epoch": 0.43175585401870475, + "grad_norm": 0.59375, + "learning_rate": 3.503291802749807e-06, + "log_odds_chosen": -0.03711827099323273, + "log_odds_ratio": -0.8125853538513184, + "logits/chosen": 0.1721441000699997, + "logits/rejected": -0.18884584307670593, + "logps/chosen": -1.061238169670105, + "logps/rejected": -1.0077035427093506, + "loss": 1.3304, + "nll_loss": 1.3840948343276978, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10612382739782333, + "rewards/margins": -0.0053534661419689655, + "rewards/rejected": -0.10077036917209625, + "step": 1535 + }, + { + "epoch": 0.4331622248787005, + "grad_norm": 0.94921875, + "learning_rate": 3.492035972298384e-06, + "log_odds_chosen": 0.5267351269721985, + "log_odds_ratio": -0.5846163034439087, + "logits/chosen": 0.30322569608688354, + "logits/rejected": 0.09838562458753586, + "logps/chosen": -0.8233901858329773, + "logps/rejected": -1.1212366819381714, + "loss": 1.2286, + "nll_loss": 1.1432218551635742, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08233902603387833, + "rewards/margins": 0.02978464961051941, + "rewards/rejected": -0.11212366819381714, + "step": 1540 + }, + { + "epoch": 0.4345685957386963, + "grad_norm": 0.56640625, + "learning_rate": 3.480756223133192e-06, + "log_odds_chosen": 0.5206764340400696, + "log_odds_ratio": -0.5795222520828247, + "logits/chosen": 0.22843270003795624, + "logits/rejected": -0.002892266260460019, + "logps/chosen": -0.8150238990783691, + "logps/rejected": -1.0866426229476929, + "loss": 1.3497, + "nll_loss": 1.199294924736023, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08150239288806915, + "rewards/margins": 0.027161872014403343, + "rewards/rejected": -0.10866427421569824, + "step": 1545 + }, + { + "epoch": 0.43597496659869206, + "grad_norm": 0.4765625, + "learning_rate": 3.469452827217244e-06, + "log_odds_chosen": 0.31150805950164795, + "log_odds_ratio": -0.6585651636123657, + "logits/chosen": 0.022850574925541878, + "logits/rejected": -0.079190194606781, + "logps/chosen": -0.8741966485977173, + "logps/rejected": -1.0799192190170288, + "loss": 1.3772, + "nll_loss": 1.3223216533660889, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08741967380046844, + "rewards/margins": 0.02057226002216339, + "rewards/rejected": -0.10799191892147064, + "step": 1550 + }, + { + "epoch": 0.43738133745868785, + "grad_norm": 0.51171875, + "learning_rate": 3.4581260570836923e-06, + "log_odds_chosen": 0.3546622693538666, + "log_odds_ratio": -0.5984280705451965, + "logits/chosen": 0.21791966259479523, + "logits/rejected": 0.12619267404079437, + "logps/chosen": -0.8225027322769165, + "logps/rejected": -1.0330629348754883, + "loss": 1.3622, + "nll_loss": 1.3160654306411743, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08225028216838837, + "rewards/margins": 0.02105601504445076, + "rewards/rejected": -0.10330629348754883, + "step": 1555 + }, + { + "epoch": 0.43878770831868363, + "grad_norm": 0.91796875, + "learning_rate": 3.4467761858292597e-06, + "log_odds_chosen": 0.2265872061252594, + "log_odds_ratio": -0.7681325078010559, + "logits/chosen": 0.1044432520866394, + "logits/rejected": 0.03168236091732979, + "logps/chosen": -1.058218240737915, + "logps/rejected": -1.1245912313461304, + "loss": 1.3188, + "nll_loss": 1.2893956899642944, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10582182556390762, + "rewards/margins": 0.006637311074882746, + "rewards/rejected": -0.11245913803577423, + "step": 1560 + }, + { + "epoch": 0.4401940791786794, + "grad_norm": 0.6875, + "learning_rate": 3.4354034871076535e-06, + "log_odds_chosen": 0.17266540229320526, + "log_odds_ratio": -0.6633031368255615, + "logits/chosen": 0.2871550917625427, + "logits/rejected": 0.03216805309057236, + "logps/chosen": -0.852996826171875, + "logps/rejected": -0.9725733995437622, + "loss": 1.2719, + "nll_loss": 1.1104490756988525, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08529968559741974, + "rewards/margins": 0.011957659386098385, + "rewards/rejected": -0.0972573384642601, + "step": 1565 + }, + { + "epoch": 0.4416004500386752, + "grad_norm": 0.5625, + "learning_rate": 3.4240082351229698e-06, + "log_odds_chosen": 0.9087265729904175, + "log_odds_ratio": -0.4688630998134613, + "logits/chosen": 0.3752935528755188, + "logits/rejected": -0.13310836255550385, + "logps/chosen": -0.7691707015037537, + "logps/rejected": -1.356413722038269, + "loss": 1.3106, + "nll_loss": 1.174346923828125, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07691706717014313, + "rewards/margins": 0.05872431397438049, + "rewards/rejected": -0.13564138114452362, + "step": 1570 + }, + { + "epoch": 0.443006820898671, + "grad_norm": 0.765625, + "learning_rate": 3.4125907046230765e-06, + "log_odds_chosen": 0.3072708249092102, + "log_odds_ratio": -0.6539907455444336, + "logits/chosen": 0.16183142364025116, + "logits/rejected": -0.0026418864727020264, + "logps/chosen": -1.0090245008468628, + "logps/rejected": -1.166789174079895, + "loss": 1.2752, + "nll_loss": 1.4169988632202148, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10090246051549911, + "rewards/margins": 0.015776459127664566, + "rewards/rejected": -0.11667891591787338, + "step": 1575 + }, + { + "epoch": 0.4444131917586668, + "grad_norm": 1.0234375, + "learning_rate": 3.4011511708929966e-06, + "log_odds_chosen": 0.28264811635017395, + "log_odds_ratio": -0.6940265893936157, + "logits/chosen": 0.16960462927818298, + "logits/rejected": -0.06453105062246323, + "logps/chosen": -0.9375308752059937, + "logps/rejected": -1.095520257949829, + "loss": 1.2216, + "nll_loss": 1.189095377922058, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09375307708978653, + "rewards/margins": 0.015798933804035187, + "rewards/rejected": -0.10955201089382172, + "step": 1580 + }, + { + "epoch": 0.44581956261866257, + "grad_norm": 0.93359375, + "learning_rate": 3.389689909748265e-06, + "log_odds_chosen": 0.2239263951778412, + "log_odds_ratio": -0.6509321331977844, + "logits/chosen": 0.04326072335243225, + "logits/rejected": 0.18090423941612244, + "logps/chosen": -0.9276517033576965, + "logps/rejected": -1.0240963697433472, + "loss": 1.2754, + "nll_loss": 1.1569693088531494, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09276516735553741, + "rewards/margins": 0.009644483216106892, + "rewards/rejected": -0.10240964591503143, + "step": 1585 + }, + { + "epoch": 0.4472259334786583, + "grad_norm": 0.4453125, + "learning_rate": 3.378207197528282e-06, + "log_odds_chosen": 0.42619529366493225, + "log_odds_ratio": -0.6441267132759094, + "logits/chosen": 0.13767486810684204, + "logits/rejected": -0.05010326951742172, + "logps/chosen": -0.9565455317497253, + "logps/rejected": -1.2347350120544434, + "loss": 1.3051, + "nll_loss": 1.1900882720947266, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09565454721450806, + "rewards/margins": 0.027818959206342697, + "rewards/rejected": -0.12347351014614105, + "step": 1590 + }, + { + "epoch": 0.4486323043386541, + "grad_norm": 1.8828125, + "learning_rate": 3.3667033110896476e-06, + "log_odds_chosen": 0.4609476923942566, + "log_odds_ratio": -0.642728328704834, + "logits/chosen": 0.09757024794816971, + "logits/rejected": 0.0014503851998597383, + "logps/chosen": -0.936115562915802, + "logps/rejected": -1.218464970588684, + "loss": 1.4283, + "nll_loss": 1.3003675937652588, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09361156076192856, + "rewards/margins": 0.028234923258423805, + "rewards/rejected": -0.12184648215770721, + "step": 1595 + }, + { + "epoch": 0.4500386751986499, + "grad_norm": 0.3359375, + "learning_rate": 3.355178527799487e-06, + "log_odds_chosen": 0.04692380875349045, + "log_odds_ratio": -0.7183758616447449, + "logits/chosen": 0.22514934837818146, + "logits/rejected": -0.017583077773451805, + "logps/chosen": -0.9809813499450684, + "logps/rejected": -1.0266735553741455, + "loss": 1.2577, + "nll_loss": 1.283029556274414, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09809814393520355, + "rewards/margins": 0.004569205921143293, + "rewards/rejected": -0.10266734659671783, + "step": 1600 + }, + { + "epoch": 0.45144504605864566, + "grad_norm": 1.046875, + "learning_rate": 3.343633125528766e-06, + "log_odds_chosen": 0.7795692682266235, + "log_odds_ratio": -0.4920505881309509, + "logits/chosen": -0.039743535220623016, + "logits/rejected": -0.005208232905715704, + "logps/chosen": -0.7143467664718628, + "logps/rejected": -1.1701852083206177, + "loss": 1.3277, + "nll_loss": 1.2369157075881958, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07143466919660568, + "rewards/margins": 0.045583855360746384, + "rewards/rejected": -0.11701853573322296, + "step": 1605 + }, + { + "epoch": 0.45285141691864145, + "grad_norm": 1.3671875, + "learning_rate": 3.3320673826455898e-06, + "log_odds_chosen": 0.4412851929664612, + "log_odds_ratio": -0.587189793586731, + "logits/chosen": 0.08623610436916351, + "logits/rejected": -0.03705819323658943, + "logps/chosen": -0.8188657760620117, + "logps/rejected": -1.0730937719345093, + "loss": 1.3171, + "nll_loss": 1.1836116313934326, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08188657462596893, + "rewards/margins": 0.025422796607017517, + "rewards/rejected": -0.10730937868356705, + "step": 1610 + }, + { + "epoch": 0.45425778777863723, + "grad_norm": 0.578125, + "learning_rate": 3.3204815780084853e-06, + "log_odds_chosen": 0.28102391958236694, + "log_odds_ratio": -0.642076849937439, + "logits/chosen": 0.17856454849243164, + "logits/rejected": 0.09094846248626709, + "logps/chosen": -0.8270488977432251, + "logps/rejected": -1.0271437168121338, + "loss": 1.3033, + "nll_loss": 1.156569242477417, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08270487934350967, + "rewards/margins": 0.0200094822794199, + "rewards/rejected": -0.10271435976028442, + "step": 1615 + }, + { + "epoch": 0.455664158638633, + "grad_norm": 0.443359375, + "learning_rate": 3.3088759909596906e-06, + "log_odds_chosen": -0.021066760644316673, + "log_odds_ratio": -0.7931792736053467, + "logits/chosen": 0.09776515513658524, + "logits/rejected": 0.06629323214292526, + "logps/chosen": -1.0177059173583984, + "logps/rejected": -0.9952551126480103, + "loss": 1.3109, + "nll_loss": 1.226104497909546, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10177057981491089, + "rewards/margins": -0.0022450797259807587, + "rewards/rejected": -0.09952551126480103, + "step": 1620 + }, + { + "epoch": 0.4570705294986288, + "grad_norm": 0.703125, + "learning_rate": 3.2972509013184063e-06, + "log_odds_chosen": 0.3991120457649231, + "log_odds_ratio": -0.6175200939178467, + "logits/chosen": 0.21924090385437012, + "logits/rejected": 0.07826922088861465, + "logps/chosen": -0.8282210230827332, + "logps/rejected": -1.032022476196289, + "loss": 1.2812, + "nll_loss": 1.088661789894104, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08282209932804108, + "rewards/margins": 0.020380137488245964, + "rewards/rejected": -0.10320223867893219, + "step": 1625 + }, + { + "epoch": 0.4584769003586246, + "grad_norm": 0.55859375, + "learning_rate": 3.285606589374056e-06, + "log_odds_chosen": 0.10021786391735077, + "log_odds_ratio": -0.7633699178695679, + "logits/chosen": 0.1093636304140091, + "logits/rejected": -0.02713550068438053, + "logps/chosen": -0.9839357137680054, + "logps/rejected": -1.1006211042404175, + "loss": 1.4217, + "nll_loss": 1.5614440441131592, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09839358180761337, + "rewards/margins": 0.011668531224131584, + "rewards/rejected": -0.11006210744380951, + "step": 1630 + }, + { + "epoch": 0.4598832712186203, + "grad_norm": 0.7578125, + "learning_rate": 3.27394333587953e-06, + "log_odds_chosen": 0.3297358751296997, + "log_odds_ratio": -0.6110000014305115, + "logits/chosen": 0.1353745013475418, + "logits/rejected": -0.1594073474407196, + "logps/chosen": -0.9906272888183594, + "logps/rejected": -1.278929352760315, + "loss": 1.3627, + "nll_loss": 1.3165563344955444, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0990627333521843, + "rewards/margins": 0.028830209746956825, + "rewards/rejected": -0.12789292633533478, + "step": 1635 + }, + { + "epoch": 0.4612896420786161, + "grad_norm": 1.046875, + "learning_rate": 3.2622614220444105e-06, + "log_odds_chosen": 0.19407084584236145, + "log_odds_ratio": -0.7150810956954956, + "logits/chosen": -0.019503358751535416, + "logits/rejected": -0.02048128843307495, + "logps/chosen": -0.9893431663513184, + "logps/rejected": -1.1059573888778687, + "loss": 1.2613, + "nll_loss": 1.1831978559494019, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09893431514501572, + "rewards/margins": 0.011661411263048649, + "rewards/rejected": -0.11059572547674179, + "step": 1640 + }, + { + "epoch": 0.4626960129386119, + "grad_norm": 0.7890625, + "learning_rate": 3.2505611295281934e-06, + "log_odds_chosen": 0.2686554193496704, + "log_odds_ratio": -0.6652511358261108, + "logits/chosen": -0.005538326688110828, + "logits/rejected": -0.2434784471988678, + "logps/chosen": -0.9460929036140442, + "logps/rejected": -1.1476470232009888, + "loss": 1.3183, + "nll_loss": 1.2766331434249878, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09460929781198502, + "rewards/margins": 0.0201554112136364, + "rewards/rejected": -0.11476470530033112, + "step": 1645 + }, + { + "epoch": 0.4641023837986077, + "grad_norm": 1.609375, + "learning_rate": 3.2388427404335016e-06, + "log_odds_chosen": 0.13716399669647217, + "log_odds_ratio": -0.7483528256416321, + "logits/chosen": -0.04212506115436554, + "logits/rejected": 0.11257772147655487, + "logps/chosen": -0.9056754112243652, + "logps/rejected": -0.9881712198257446, + "loss": 1.4144, + "nll_loss": 1.4437960386276245, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09056752175092697, + "rewards/margins": 0.008249588310718536, + "rewards/rejected": -0.0988171249628067, + "step": 1650 + }, + { + "epoch": 0.4655087546586035, + "grad_norm": 1.1875, + "learning_rate": 3.2271065372992765e-06, + "log_odds_chosen": 0.7073981165885925, + "log_odds_ratio": -0.5036030411720276, + "logits/chosen": 0.25857871770858765, + "logits/rejected": 0.1317531317472458, + "logps/chosen": -0.7869008779525757, + "logps/rejected": -1.2054097652435303, + "loss": 1.243, + "nll_loss": 1.020970106124878, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07869009673595428, + "rewards/margins": 0.04185087978839874, + "rewards/rejected": -0.12054097652435303, + "step": 1655 + }, + { + "epoch": 0.46691512551859926, + "grad_norm": 0.341796875, + "learning_rate": 3.2153528030939696e-06, + "log_odds_chosen": 0.3224171996116638, + "log_odds_ratio": -0.6465792059898376, + "logits/chosen": 0.19630217552185059, + "logits/rejected": -0.009671496227383614, + "logps/chosen": -0.8761960864067078, + "logps/rejected": -1.0852859020233154, + "loss": 1.2579, + "nll_loss": 1.1838932037353516, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08761961758136749, + "rewards/margins": 0.02090897597372532, + "rewards/rejected": -0.10852859169244766, + "step": 1660 + }, + { + "epoch": 0.46832149637859505, + "grad_norm": 0.6328125, + "learning_rate": 3.2035818212087218e-06, + "log_odds_chosen": 0.029603172093629837, + "log_odds_ratio": -0.7813437581062317, + "logits/chosen": 0.3448614478111267, + "logits/rejected": -0.03233183175325394, + "logps/chosen": -0.8988531827926636, + "logps/rejected": -0.8917310833930969, + "loss": 1.3023, + "nll_loss": 1.211477518081665, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.08988531678915024, + "rewards/margins": -0.0007122076931409538, + "rewards/rejected": -0.08917311578989029, + "step": 1665 + }, + { + "epoch": 0.46972786723859083, + "grad_norm": 0.81640625, + "learning_rate": 3.191793875450524e-06, + "log_odds_chosen": 0.14861974120140076, + "log_odds_ratio": -0.7764616012573242, + "logits/chosen": 0.09097392857074738, + "logits/rejected": 0.15764659643173218, + "logps/chosen": -1.060884714126587, + "logps/rejected": -1.1452229022979736, + "loss": 1.3467, + "nll_loss": 1.055648684501648, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10608847439289093, + "rewards/margins": 0.008433830924332142, + "rewards/rejected": -0.1145222932100296, + "step": 1670 + }, + { + "epoch": 0.4711342380985866, + "grad_norm": 0.515625, + "learning_rate": 3.1799892500353825e-06, + "log_odds_chosen": -0.0818730816245079, + "log_odds_ratio": -0.8223946690559387, + "logits/chosen": 0.04592124745249748, + "logits/rejected": 0.04530264809727669, + "logps/chosen": -0.8977168202400208, + "logps/rejected": -0.8727294206619263, + "loss": 1.2926, + "nll_loss": 1.2204254865646362, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08977167308330536, + "rewards/margins": -0.0024987380020320415, + "rewards/rejected": -0.08727294206619263, + "step": 1675 + }, + { + "epoch": 0.47254060895858235, + "grad_norm": 0.5, + "learning_rate": 3.168168229581461e-06, + "log_odds_chosen": 0.3223528265953064, + "log_odds_ratio": -0.6745055317878723, + "logits/chosen": 0.09610681235790253, + "logits/rejected": -0.14661376178264618, + "logps/chosen": -0.868693470954895, + "logps/rejected": -1.0475997924804688, + "loss": 1.2964, + "nll_loss": 1.3960098028182983, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08686934411525726, + "rewards/margins": 0.017890626564621925, + "rewards/rejected": -0.10475997626781464, + "step": 1680 + }, + { + "epoch": 0.47394697981857814, + "grad_norm": 0.5546875, + "learning_rate": 3.1563310991022183e-06, + "log_odds_chosen": 0.42393559217453003, + "log_odds_ratio": -0.5872426629066467, + "logits/chosen": 0.14853203296661377, + "logits/rejected": -0.09115082025527954, + "logps/chosen": -0.8752552270889282, + "logps/rejected": -1.1131998300552368, + "loss": 1.2221, + "nll_loss": 1.189212679862976, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08752551674842834, + "rewards/margins": 0.023794464766979218, + "rewards/rejected": -0.11131997406482697, + "step": 1685 + }, + { + "epoch": 0.4753533506785739, + "grad_norm": 0.4765625, + "learning_rate": 3.144478143999539e-06, + "log_odds_chosen": -0.04508579522371292, + "log_odds_ratio": -0.7579798102378845, + "logits/chosen": 0.27330082654953003, + "logits/rejected": -0.18080435693264008, + "logps/chosen": -1.079253911972046, + "logps/rejected": -1.0695364475250244, + "loss": 1.3606, + "nll_loss": 1.4030883312225342, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.1079254001379013, + "rewards/margins": -0.0009717432549223304, + "rewards/rejected": -0.10695364326238632, + "step": 1690 + }, + { + "epoch": 0.4767597215385697, + "grad_norm": 0.498046875, + "learning_rate": 3.1326096500568502e-06, + "log_odds_chosen": -0.2987438440322876, + "log_odds_ratio": -0.9051570892333984, + "logits/chosen": 0.13019177317619324, + "logits/rejected": -0.057271480560302734, + "logps/chosen": -1.1571803092956543, + "logps/rejected": -0.9558758735656738, + "loss": 1.331, + "nll_loss": 1.4521785974502563, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.1157180443406105, + "rewards/margins": -0.020130449905991554, + "rewards/rejected": -0.0955875962972641, + "step": 1695 + }, + { + "epoch": 0.4781660923985655, + "grad_norm": 0.71875, + "learning_rate": 3.1207259034322325e-06, + "log_odds_chosen": 0.10301417112350464, + "log_odds_ratio": -0.7123724222183228, + "logits/chosen": 0.12717892229557037, + "logits/rejected": 0.19477501511573792, + "logps/chosen": -0.9269098043441772, + "logps/rejected": -0.9850351214408875, + "loss": 1.2283, + "nll_loss": 1.0985163450241089, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09269097447395325, + "rewards/margins": 0.005812531802803278, + "rewards/rejected": -0.09850350767374039, + "step": 1700 + }, + { + "epoch": 0.4795724632585613, + "grad_norm": 0.89453125, + "learning_rate": 3.1088271906515203e-06, + "log_odds_chosen": 0.022561922669410706, + "log_odds_ratio": -0.7353700399398804, + "logits/chosen": 0.40177297592163086, + "logits/rejected": 0.4013861119747162, + "logps/chosen": -0.8993846774101257, + "logps/rejected": -0.8993097543716431, + "loss": 1.3072, + "nll_loss": 1.1129878759384155, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0899384617805481, + "rewards/margins": -7.492490112781525e-06, + "rewards/rejected": -0.08993097394704819, + "step": 1705 + }, + { + "epoch": 0.4809788341185571, + "grad_norm": 0.95703125, + "learning_rate": 3.096913798601392e-06, + "log_odds_chosen": -0.01520991325378418, + "log_odds_ratio": -0.7819222211837769, + "logits/chosen": 0.08972098678350449, + "logits/rejected": 0.14325182139873505, + "logps/chosen": -1.2126904726028442, + "logps/rejected": -1.2241413593292236, + "loss": 1.3372, + "nll_loss": 1.2252757549285889, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.12126903235912323, + "rewards/margins": 0.001145093934610486, + "rewards/rejected": -0.12241413444280624, + "step": 1710 + }, + { + "epoch": 0.48238520497855286, + "grad_norm": 0.4296875, + "learning_rate": 3.0849860145224537e-06, + "log_odds_chosen": -0.0876084640622139, + "log_odds_ratio": -0.8532189130783081, + "logits/chosen": 0.057985819876194, + "logits/rejected": 0.012759095057845116, + "logps/chosen": -0.9567869305610657, + "logps/rejected": -0.895396888256073, + "loss": 1.2678, + "nll_loss": 1.1957978010177612, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09567869454622269, + "rewards/margins": -0.006138999946415424, + "rewards/rejected": -0.08953969180583954, + "step": 1715 + }, + { + "epoch": 0.48379157583854865, + "grad_norm": 0.546875, + "learning_rate": 3.0730441260023148e-06, + "log_odds_chosen": 0.5008511543273926, + "log_odds_ratio": -0.6241706609725952, + "logits/chosen": 0.2945128381252289, + "logits/rejected": -0.06201595067977905, + "logps/chosen": -0.7394557595252991, + "logps/rejected": -0.9900891184806824, + "loss": 1.3866, + "nll_loss": 1.2984784841537476, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07394556701183319, + "rewards/margins": 0.02506333589553833, + "rewards/rejected": -0.09900891780853271, + "step": 1720 + }, + { + "epoch": 0.4851979466985444, + "grad_norm": 0.37890625, + "learning_rate": 3.061088420968652e-06, + "log_odds_chosen": 0.37631872296333313, + "log_odds_ratio": -0.6500317454338074, + "logits/chosen": 0.17834916710853577, + "logits/rejected": -0.08376047015190125, + "logps/chosen": -0.8821493983268738, + "logps/rejected": -1.156684160232544, + "loss": 1.2124, + "nll_loss": 1.1348316669464111, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0882149413228035, + "rewards/margins": 0.027453461661934853, + "rewards/rejected": -0.1156684011220932, + "step": 1725 + }, + { + "epoch": 0.48660431755854017, + "grad_norm": 0.58203125, + "learning_rate": 3.04911918768227e-06, + "log_odds_chosen": 0.01393374614417553, + "log_odds_ratio": -0.7638979554176331, + "logits/chosen": 0.21452713012695312, + "logits/rejected": -0.010724795050919056, + "logps/chosen": -0.9365432858467102, + "logps/rejected": -1.0033760070800781, + "loss": 1.2342, + "nll_loss": 1.1810895204544067, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0936543270945549, + "rewards/margins": 0.00668326998129487, + "rewards/rejected": -0.10033760219812393, + "step": 1730 + }, + { + "epoch": 0.48801068841853595, + "grad_norm": 0.6875, + "learning_rate": 3.037136714730148e-06, + "log_odds_chosen": 0.06743361800909042, + "log_odds_ratio": -0.718464195728302, + "logits/chosen": 0.08722179383039474, + "logits/rejected": 0.22729694843292236, + "logps/chosen": -0.9022086262702942, + "logps/rejected": -0.9189395904541016, + "loss": 1.2611, + "nll_loss": 1.176918387413025, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0902208760380745, + "rewards/margins": 0.0016730849165469408, + "rewards/rejected": -0.09189395606517792, + "step": 1735 + }, + { + "epoch": 0.48941705927853174, + "grad_norm": 0.416015625, + "learning_rate": 3.025141291018484e-06, + "log_odds_chosen": 0.46886926889419556, + "log_odds_ratio": -0.6271840929985046, + "logits/chosen": 0.31561678647994995, + "logits/rejected": 0.21411773562431335, + "logps/chosen": -0.8544023633003235, + "logps/rejected": -1.1380481719970703, + "loss": 1.2311, + "nll_loss": 1.1394670009613037, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08544023334980011, + "rewards/margins": 0.02836458943784237, + "rewards/rejected": -0.11380481719970703, + "step": 1740 + }, + { + "epoch": 0.49082343013852753, + "grad_norm": 0.5625, + "learning_rate": 3.0131332057657263e-06, + "log_odds_chosen": 0.1628924459218979, + "log_odds_ratio": -0.7746323943138123, + "logits/chosen": 0.04965885728597641, + "logits/rejected": 0.08840426057577133, + "logps/chosen": -0.8988859057426453, + "logps/rejected": -0.8661472201347351, + "loss": 1.3893, + "nll_loss": 1.2756502628326416, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08988858014345169, + "rewards/margins": -0.0032738607842475176, + "rewards/rejected": -0.08661472052335739, + "step": 1745 + }, + { + "epoch": 0.4922298009985233, + "grad_norm": 0.6328125, + "learning_rate": 3.0011127484956066e-06, + "log_odds_chosen": 0.43131130933761597, + "log_odds_ratio": -0.5675671100616455, + "logits/chosen": 0.06633243709802628, + "logits/rejected": 0.10099319368600845, + "logps/chosen": -0.8430767059326172, + "logps/rejected": -1.0605162382125854, + "loss": 1.3615, + "nll_loss": 1.4225993156433105, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08430766314268112, + "rewards/margins": 0.021743962541222572, + "rewards/rejected": -0.10605162382125854, + "step": 1750 + }, + { + "epoch": 0.4936361718585191, + "grad_norm": 1.046875, + "learning_rate": 2.989080209030152e-06, + "log_odds_chosen": 0.3503009080886841, + "log_odds_ratio": -0.6180037260055542, + "logits/chosen": 0.2614450752735138, + "logits/rejected": 0.14672674238681793, + "logps/chosen": -0.8308509588241577, + "logps/rejected": -1.072124719619751, + "loss": 1.1843, + "nll_loss": 1.1179145574569702, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08308509737253189, + "rewards/margins": 0.02412736788392067, + "rewards/rejected": -0.10721246898174286, + "step": 1755 + }, + { + "epoch": 0.4950425427185149, + "grad_norm": 0.875, + "learning_rate": 2.977035877482698e-06, + "log_odds_chosen": 0.11333318799734116, + "log_odds_ratio": -0.7067681550979614, + "logits/chosen": 0.08780858665704727, + "logits/rejected": 0.16933095455169678, + "logps/chosen": -1.0028111934661865, + "logps/rejected": -1.0639703273773193, + "loss": 1.2054, + "nll_loss": 1.2186434268951416, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10028110444545746, + "rewards/margins": 0.006115921773016453, + "rewards/rejected": -0.10639703273773193, + "step": 1760 + }, + { + "epoch": 0.4964489135785107, + "grad_norm": 0.45703125, + "learning_rate": 2.9649800442509013e-06, + "log_odds_chosen": 0.19351093471050262, + "log_odds_ratio": -0.6989894509315491, + "logits/chosen": -0.018921542912721634, + "logits/rejected": 0.05963977426290512, + "logps/chosen": -0.9325786828994751, + "logps/rejected": -1.1182286739349365, + "loss": 1.2193, + "nll_loss": 1.1260807514190674, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0932578593492508, + "rewards/margins": 0.018564995378255844, + "rewards/rejected": -0.11182286590337753, + "step": 1765 + }, + { + "epoch": 0.4978552844385064, + "grad_norm": 0.83984375, + "learning_rate": 2.952913000009729e-06, + "log_odds_chosen": 0.38330164551734924, + "log_odds_ratio": -0.5583044290542603, + "logits/chosen": 0.20868602395057678, + "logits/rejected": -0.043395109474658966, + "logps/chosen": -0.8025444746017456, + "logps/rejected": -1.0090991258621216, + "loss": 1.3287, + "nll_loss": 1.2745373249053955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0802544504404068, + "rewards/margins": 0.020655466243624687, + "rewards/rejected": -0.10090991109609604, + "step": 1770 + }, + { + "epoch": 0.4992616552985022, + "grad_norm": 1.5625, + "learning_rate": 2.9408350357044527e-06, + "log_odds_chosen": 0.3873779773712158, + "log_odds_ratio": -0.6013652086257935, + "logits/chosen": -0.011114698834717274, + "logits/rejected": 0.006222672760486603, + "logps/chosen": -0.8589563369750977, + "logps/rejected": -1.0658544301986694, + "loss": 1.2814, + "nll_loss": 1.2002887725830078, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08589563518762589, + "rewards/margins": 0.020689817145466805, + "rewards/rejected": -0.10658544301986694, + "step": 1775 + }, + { + "epoch": 0.500668026158498, + "grad_norm": 0.69140625, + "learning_rate": 2.9287464425436386e-06, + "log_odds_chosen": 0.22454440593719482, + "log_odds_ratio": -0.6838093996047974, + "logits/chosen": -0.025767764076590538, + "logits/rejected": 0.0712432935833931, + "logps/chosen": -1.0069491863250732, + "logps/rejected": -1.1783020496368408, + "loss": 1.2376, + "nll_loss": 1.1458652019500732, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10069490969181061, + "rewards/margins": 0.01713528484106064, + "rewards/rejected": -0.11783019453287125, + "step": 1780 + }, + { + "epoch": 0.5020743970184938, + "grad_norm": 0.94140625, + "learning_rate": 2.9166475119921206e-06, + "log_odds_chosen": 0.09982554614543915, + "log_odds_ratio": -0.7170512676239014, + "logits/chosen": 0.27315324544906616, + "logits/rejected": 0.05909186601638794, + "logps/chosen": -1.0025029182434082, + "logps/rejected": -1.0902785062789917, + "loss": 1.2826, + "nll_loss": 1.3132749795913696, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10025028884410858, + "rewards/margins": 0.008777563460171223, + "rewards/rejected": -0.10902786254882812, + "step": 1785 + }, + { + "epoch": 0.5034807678784896, + "grad_norm": 0.5546875, + "learning_rate": 2.904538535763973e-06, + "log_odds_chosen": 0.6338373422622681, + "log_odds_ratio": -0.48895391821861267, + "logits/chosen": 0.29887187480926514, + "logits/rejected": 0.14295390248298645, + "logps/chosen": -0.7918098568916321, + "logps/rejected": -1.1807332038879395, + "loss": 1.1814, + "nll_loss": 1.0374513864517212, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07918097823858261, + "rewards/margins": 0.03889235109090805, + "rewards/rejected": -0.11807332932949066, + "step": 1790 + }, + { + "epoch": 0.5048871387384853, + "grad_norm": 0.5625, + "learning_rate": 2.8924198058154807e-06, + "log_odds_chosen": 0.3318602740764618, + "log_odds_ratio": -0.6486313343048096, + "logits/chosen": 0.18483847379684448, + "logits/rejected": 0.06718714535236359, + "logps/chosen": -0.9118415713310242, + "logps/rejected": -1.1547482013702393, + "loss": 1.2743, + "nll_loss": 1.1991218328475952, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09118416905403137, + "rewards/margins": 0.024290654808282852, + "rewards/rejected": -0.11547482013702393, + "step": 1795 + }, + { + "epoch": 0.5062935095984811, + "grad_norm": 0.9375, + "learning_rate": 2.8802916143380983e-06, + "log_odds_chosen": 0.14648711681365967, + "log_odds_ratio": -0.667282223701477, + "logits/chosen": 0.21301324665546417, + "logits/rejected": 0.15577247738838196, + "logps/chosen": -0.8973161578178406, + "logps/rejected": -0.9705324172973633, + "loss": 1.2663, + "nll_loss": 1.1156691312789917, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0897316187620163, + "rewards/margins": 0.007321618497371674, + "rewards/rejected": -0.09705324470996857, + "step": 1800 + }, + { + "epoch": 0.5076998804584769, + "grad_norm": 0.302734375, + "learning_rate": 2.8681542537514024e-06, + "log_odds_chosen": 0.34126919507980347, + "log_odds_ratio": -0.5945046544075012, + "logits/chosen": 0.30521661043167114, + "logits/rejected": -0.009005474857985973, + "logps/chosen": -0.899261474609375, + "logps/rejected": -1.116114616394043, + "loss": 1.3684, + "nll_loss": 1.3031537532806396, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08992613852024078, + "rewards/margins": 0.021685311570763588, + "rewards/rejected": -0.11161146312952042, + "step": 1805 + }, + { + "epoch": 0.5091062513184726, + "grad_norm": 0.54296875, + "learning_rate": 2.8560080166960465e-06, + "log_odds_chosen": 0.05003465339541435, + "log_odds_ratio": -0.8127752542495728, + "logits/chosen": 0.15410101413726807, + "logits/rejected": -0.05961986631155014, + "logps/chosen": -1.2021180391311646, + "logps/rejected": -1.244368314743042, + "loss": 1.2782, + "nll_loss": 1.2843389511108398, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12021180242300034, + "rewards/margins": 0.004225029144436121, + "rewards/rejected": -0.12443683296442032, + "step": 1810 + }, + { + "epoch": 0.5105126221784685, + "grad_norm": 0.5, + "learning_rate": 2.8438531960267e-06, + "log_odds_chosen": 0.14014047384262085, + "log_odds_ratio": -0.746768593788147, + "logits/chosen": 0.007824910804629326, + "logits/rejected": 0.04268079251050949, + "logps/chosen": -0.9318583607673645, + "logps/rejected": -1.0280802249908447, + "loss": 1.2935, + "nll_loss": 1.2723755836486816, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09318584203720093, + "rewards/margins": 0.009622195735573769, + "rewards/rejected": -0.10280803591012955, + "step": 1815 + }, + { + "epoch": 0.5119189930384642, + "grad_norm": 0.6953125, + "learning_rate": 2.8316900848049896e-06, + "log_odds_chosen": 0.46041789650917053, + "log_odds_ratio": -0.545207142829895, + "logits/chosen": -0.025069892406463623, + "logits/rejected": 0.0031093836296349764, + "logps/chosen": -0.9471213221549988, + "logps/rejected": -1.2298479080200195, + "loss": 1.2804, + "nll_loss": 0.9852622747421265, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09471213817596436, + "rewards/margins": 0.02827264927327633, + "rewards/rejected": -0.12298478931188583, + "step": 1820 + }, + { + "epoch": 0.5133253638984601, + "grad_norm": 0.462890625, + "learning_rate": 2.8195189762924357e-06, + "log_odds_chosen": 0.38171496987342834, + "log_odds_ratio": -0.5993026494979858, + "logits/chosen": 0.13395783305168152, + "logits/rejected": -0.014965623617172241, + "logps/chosen": -0.8572479486465454, + "logps/rejected": -1.1530402898788452, + "loss": 1.3398, + "nll_loss": 1.2653952836990356, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08572478592395782, + "rewards/margins": 0.02957923151552677, + "rewards/rejected": -0.11530401557683945, + "step": 1825 + }, + { + "epoch": 0.5147317347584558, + "grad_norm": 0.5546875, + "learning_rate": 2.807340163943377e-06, + "log_odds_chosen": 0.9017633199691772, + "log_odds_ratio": -0.528324544429779, + "logits/chosen": 0.2883888781070709, + "logits/rejected": 0.047523729503154755, + "logps/chosen": -0.8029147386550903, + "logps/rejected": -1.508462905883789, + "loss": 1.3291, + "nll_loss": 1.1781851053237915, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08029146492481232, + "rewards/margins": 0.07055483758449554, + "rewards/rejected": -0.15084628760814667, + "step": 1830 + }, + { + "epoch": 0.5161381056184516, + "grad_norm": 0.443359375, + "learning_rate": 2.7951539413978967e-06, + "log_odds_chosen": -0.15000049769878387, + "log_odds_ratio": -0.9076086282730103, + "logits/chosen": 0.026946574449539185, + "logits/rejected": 0.008309757336974144, + "logps/chosen": -1.0551016330718994, + "logps/rejected": -0.962007999420166, + "loss": 1.3199, + "nll_loss": 1.2718522548675537, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.1055101752281189, + "rewards/margins": -0.009309363551437855, + "rewards/rejected": -0.09620080888271332, + "step": 1835 + }, + { + "epoch": 0.5175444764784474, + "grad_norm": 0.6484375, + "learning_rate": 2.7829606024747458e-06, + "log_odds_chosen": 0.3771592974662781, + "log_odds_ratio": -0.6184414625167847, + "logits/chosen": 0.08473803848028183, + "logits/rejected": -0.06116770580410957, + "logps/chosen": -0.8525704145431519, + "logps/rejected": -1.120895504951477, + "loss": 1.2459, + "nll_loss": 1.1650941371917725, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08525704592466354, + "rewards/margins": 0.026832515373826027, + "rewards/rejected": -0.11208955198526382, + "step": 1840 + }, + { + "epoch": 0.5189508473384431, + "grad_norm": 0.46484375, + "learning_rate": 2.7707604411642547e-06, + "log_odds_chosen": 0.26341018080711365, + "log_odds_ratio": -0.6081176996231079, + "logits/chosen": 0.264903724193573, + "logits/rejected": -0.026007074862718582, + "logps/chosen": -0.9291330575942993, + "logps/rejected": -1.0887218713760376, + "loss": 1.3753, + "nll_loss": 1.2891864776611328, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09291330724954605, + "rewards/margins": 0.015958871692419052, + "rewards/rejected": -0.1088721975684166, + "step": 1845 + }, + { + "epoch": 0.5203572181984389, + "grad_norm": 0.408203125, + "learning_rate": 2.7585537516212468e-06, + "log_odds_chosen": 0.26861482858657837, + "log_odds_ratio": -0.7222877740859985, + "logits/chosen": -0.026812052354216576, + "logits/rejected": -0.068088099360466, + "logps/chosen": -1.0210070610046387, + "logps/rejected": -1.2642821073532104, + "loss": 1.3171, + "nll_loss": 1.4248225688934326, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.10210070759057999, + "rewards/margins": 0.02432749792933464, + "rewards/rejected": -0.12642820179462433, + "step": 1850 + }, + { + "epoch": 0.5217635890584347, + "grad_norm": 0.625, + "learning_rate": 2.7463408281579455e-06, + "log_odds_chosen": 0.18715333938598633, + "log_odds_ratio": -0.7116204500198364, + "logits/chosen": 0.18907713890075684, + "logits/rejected": -0.004186171106994152, + "logps/chosen": -0.8570513725280762, + "logps/rejected": -0.9989348649978638, + "loss": 1.3202, + "nll_loss": 1.1158322095870972, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08570513874292374, + "rewards/margins": 0.014188344590365887, + "rewards/rejected": -0.0998934805393219, + "step": 1855 + }, + { + "epoch": 0.5231699599184305, + "grad_norm": 0.322265625, + "learning_rate": 2.73412196523688e-06, + "log_odds_chosen": 0.3805992603302002, + "log_odds_ratio": -0.5736201405525208, + "logits/chosen": 0.1680772304534912, + "logits/rejected": 0.03141466900706291, + "logps/chosen": -0.8502508997917175, + "logps/rejected": -1.1017472743988037, + "loss": 1.287, + "nll_loss": 1.1758310794830322, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08502508699893951, + "rewards/margins": 0.025149637833237648, + "rewards/rejected": -0.11017473042011261, + "step": 1860 + }, + { + "epoch": 0.5245763307784262, + "grad_norm": 1.1953125, + "learning_rate": 2.7218974574637837e-06, + "log_odds_chosen": 0.6645687222480774, + "log_odds_ratio": -0.6347068548202515, + "logits/chosen": 0.04413944110274315, + "logits/rejected": 0.04097120463848114, + "logps/chosen": -0.911517322063446, + "logps/rejected": -1.2816895246505737, + "loss": 1.3929, + "nll_loss": 1.3630738258361816, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09115172922611237, + "rewards/margins": 0.03701721876859665, + "rewards/rejected": -0.12816892564296722, + "step": 1865 + }, + { + "epoch": 0.5259827016384221, + "grad_norm": 0.38671875, + "learning_rate": 2.70966759958049e-06, + "log_odds_chosen": 0.28335127234458923, + "log_odds_ratio": -0.6399809122085571, + "logits/chosen": 0.17220312356948853, + "logits/rejected": 0.18077576160430908, + "logps/chosen": -0.9338391423225403, + "logps/rejected": -1.128045916557312, + "loss": 1.231, + "nll_loss": 1.2164467573165894, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09338392317295074, + "rewards/margins": 0.0194206852465868, + "rewards/rejected": -0.11280461400747299, + "step": 1870 + }, + { + "epoch": 0.5273890724984178, + "grad_norm": 0.5390625, + "learning_rate": 2.697432686457828e-06, + "log_odds_chosen": 0.4342438280582428, + "log_odds_ratio": -0.6491691470146179, + "logits/chosen": 0.17093636095523834, + "logits/rejected": 0.05285681039094925, + "logps/chosen": -0.8747994303703308, + "logps/rejected": -1.1325397491455078, + "loss": 1.3414, + "nll_loss": 1.2444498538970947, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08747994154691696, + "rewards/margins": 0.025774037465453148, + "rewards/rejected": -0.11325398832559586, + "step": 1875 + }, + { + "epoch": 0.5287954433584137, + "grad_norm": 0.62109375, + "learning_rate": 2.685193013088515e-06, + "log_odds_chosen": 0.42729368805885315, + "log_odds_ratio": -0.6372612714767456, + "logits/chosen": 0.1625080108642578, + "logits/rejected": 0.06369408220052719, + "logps/chosen": -0.9214962720870972, + "logps/rejected": -1.2057900428771973, + "loss": 1.3388, + "nll_loss": 1.317048192024231, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09214963763952255, + "rewards/margins": 0.02842937409877777, + "rewards/rejected": -0.12057900428771973, + "step": 1880 + }, + { + "epoch": 0.5302018142184094, + "grad_norm": 1.6953125, + "learning_rate": 2.6729488745800375e-06, + "log_odds_chosen": 0.21069876849651337, + "log_odds_ratio": -0.6181350946426392, + "logits/chosen": 0.09169472008943558, + "logits/rejected": -0.15534797310829163, + "logps/chosen": -1.0708431005477905, + "logps/rejected": -1.2090436220169067, + "loss": 1.2751, + "nll_loss": 1.3398125171661377, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10708429664373398, + "rewards/margins": 0.0138200419023633, + "rewards/rejected": -0.1209043487906456, + "step": 1885 + }, + { + "epoch": 0.5316081850784051, + "grad_norm": 0.703125, + "learning_rate": 2.6607005661475412e-06, + "log_odds_chosen": -0.015950357541441917, + "log_odds_ratio": -0.9686321020126343, + "logits/chosen": 0.17628921568393707, + "logits/rejected": 0.04031284898519516, + "logps/chosen": -1.1431938409805298, + "logps/rejected": -1.177972435951233, + "loss": 1.3656, + "nll_loss": 1.365552306175232, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11431938409805298, + "rewards/margins": 0.0034778628032654524, + "rewards/rejected": -0.11779724061489105, + "step": 1890 + }, + { + "epoch": 0.533014555938401, + "grad_norm": 0.380859375, + "learning_rate": 2.6484483831067132e-06, + "log_odds_chosen": 0.6062152981758118, + "log_odds_ratio": -0.5288558602333069, + "logits/chosen": 0.04212791472673416, + "logits/rejected": -0.21651284396648407, + "logps/chosen": -0.9404972791671753, + "logps/rejected": -1.3659436702728271, + "loss": 1.3344, + "nll_loss": 1.4181785583496094, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09404972940683365, + "rewards/margins": 0.0425446555018425, + "rewards/rejected": -0.13659438490867615, + "step": 1895 + }, + { + "epoch": 0.5344209267983967, + "grad_norm": 0.498046875, + "learning_rate": 2.6361926208666585e-06, + "log_odds_chosen": 0.2467803657054901, + "log_odds_ratio": -0.6717809438705444, + "logits/chosen": 0.306289941072464, + "logits/rejected": -0.03955193608999252, + "logps/chosen": -0.9068489074707031, + "logps/rejected": -1.087083101272583, + "loss": 1.2124, + "nll_loss": 1.149648904800415, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09068489074707031, + "rewards/margins": 0.01802341639995575, + "rewards/rejected": -0.10870830714702606, + "step": 1900 + }, + { + "epoch": 0.5358272976583925, + "grad_norm": 0.357421875, + "learning_rate": 2.623933574922779e-06, + "log_odds_chosen": 0.3376588523387909, + "log_odds_ratio": -0.6184042096138, + "logits/chosen": 0.07933865487575531, + "logits/rejected": 0.1608869433403015, + "logps/chosen": -0.850027859210968, + "logps/rejected": -1.0102084875106812, + "loss": 1.2721, + "nll_loss": 1.1246836185455322, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08500279486179352, + "rewards/margins": 0.01601807400584221, + "rewards/rejected": -0.10102085769176483, + "step": 1905 + }, + { + "epoch": 0.5372336685183883, + "grad_norm": 0.490234375, + "learning_rate": 2.611671540849651e-06, + "log_odds_chosen": 0.28319352865219116, + "log_odds_ratio": -0.6058458685874939, + "logits/chosen": 0.3010689616203308, + "logits/rejected": 0.015681147575378418, + "logps/chosen": -0.9729520082473755, + "logps/rejected": -1.1751806735992432, + "loss": 1.3129, + "nll_loss": 1.2195885181427002, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09729520231485367, + "rewards/margins": 0.0202228631824255, + "rewards/rejected": -0.11751806735992432, + "step": 1910 + }, + { + "epoch": 0.5386400393783841, + "grad_norm": 0.4140625, + "learning_rate": 2.599406814293895e-06, + "log_odds_chosen": 0.31516438722610474, + "log_odds_ratio": -0.6291471719741821, + "logits/chosen": 0.12986072897911072, + "logits/rejected": 0.03120257332921028, + "logps/chosen": -0.9961276054382324, + "logps/rejected": -1.1284226179122925, + "loss": 1.311, + "nll_loss": 1.3952885866165161, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.099612757563591, + "rewards/margins": 0.01322950143367052, + "rewards/rejected": -0.11284227669239044, + "step": 1915 + }, + { + "epoch": 0.5400464102383798, + "grad_norm": 1.5390625, + "learning_rate": 2.5871396909670494e-06, + "log_odds_chosen": 0.723926842212677, + "log_odds_ratio": -0.5813840627670288, + "logits/chosen": 0.10541262477636337, + "logits/rejected": 0.2073705941438675, + "logps/chosen": -0.8834552764892578, + "logps/rejected": -1.36771559715271, + "loss": 1.345, + "nll_loss": 1.0873137712478638, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08834554255008698, + "rewards/margins": 0.048426032066345215, + "rewards/rejected": -0.1367715746164322, + "step": 1920 + }, + { + "epoch": 0.5414527810983757, + "grad_norm": 0.67578125, + "learning_rate": 2.5748704666384417e-06, + "log_odds_chosen": 0.41179290413856506, + "log_odds_ratio": -0.6550354957580566, + "logits/chosen": 0.15152336657047272, + "logits/rejected": -0.05778312683105469, + "logps/chosen": -0.9851999282836914, + "logps/rejected": -1.2101080417633057, + "loss": 1.3186, + "nll_loss": 1.2828853130340576, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09852000325918198, + "rewards/margins": 0.022490810602903366, + "rewards/rejected": -0.12101080268621445, + "step": 1925 + }, + { + "epoch": 0.5428591519583714, + "grad_norm": 0.3828125, + "learning_rate": 2.562599437128055e-06, + "log_odds_chosen": 0.377454936504364, + "log_odds_ratio": -0.6358648538589478, + "logits/chosen": 0.0949888676404953, + "logits/rejected": -0.10224437713623047, + "logps/chosen": -0.8374663591384888, + "logps/rejected": -1.0614335536956787, + "loss": 1.3362, + "nll_loss": 1.259254813194275, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08374662697315216, + "rewards/margins": 0.02239672839641571, + "rewards/rejected": -0.10614337027072906, + "step": 1930 + }, + { + "epoch": 0.5442655228183672, + "grad_norm": 0.5625, + "learning_rate": 2.550326898299397e-06, + "log_odds_chosen": 0.3798829913139343, + "log_odds_ratio": -0.6570533514022827, + "logits/chosen": 0.20587129890918732, + "logits/rejected": -0.19827596843242645, + "logps/chosen": -0.8702229261398315, + "logps/rejected": -1.0879228115081787, + "loss": 1.2534, + "nll_loss": 1.336097240447998, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08702228963375092, + "rewards/margins": 0.021769985556602478, + "rewards/rejected": -0.1087922677397728, + "step": 1935 + }, + { + "epoch": 0.545671893678363, + "grad_norm": 0.50390625, + "learning_rate": 2.538053146052366e-06, + "log_odds_chosen": 0.42701345682144165, + "log_odds_ratio": -0.597525417804718, + "logits/chosen": 0.3210323452949524, + "logits/rejected": 0.06907093524932861, + "logps/chosen": -0.782520592212677, + "logps/rejected": -1.081879734992981, + "loss": 1.2815, + "nll_loss": 1.3015620708465576, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07825206220149994, + "rewards/margins": 0.029935915023088455, + "rewards/rejected": -0.1081879585981369, + "step": 1940 + }, + { + "epoch": 0.5470782645383587, + "grad_norm": 0.4453125, + "learning_rate": 2.5257784763161177e-06, + "log_odds_chosen": -0.26187849044799805, + "log_odds_ratio": -0.9886520504951477, + "logits/chosen": -0.0548955500125885, + "logits/rejected": -0.041044097393751144, + "logps/chosen": -1.1638720035552979, + "logps/rejected": -1.041886329650879, + "loss": 1.4207, + "nll_loss": 1.4691832065582275, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.11638720333576202, + "rewards/margins": -0.012198579497635365, + "rewards/rejected": -0.10418863594532013, + "step": 1945 + }, + { + "epoch": 0.5484846353983546, + "grad_norm": 1.21875, + "learning_rate": 2.5135031850419266e-06, + "log_odds_chosen": 0.38276079297065735, + "log_odds_ratio": -0.5948246717453003, + "logits/chosen": -0.045088671147823334, + "logits/rejected": -0.12301528453826904, + "logps/chosen": -0.9737148284912109, + "logps/rejected": -1.1692750453948975, + "loss": 1.307, + "nll_loss": 1.3369234800338745, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09737147390842438, + "rewards/margins": 0.019556032493710518, + "rewards/rejected": -0.11692751944065094, + "step": 1950 + }, + { + "epoch": 0.5498910062583503, + "grad_norm": 0.921875, + "learning_rate": 2.5012275681960563e-06, + "log_odds_chosen": 0.1337728202342987, + "log_odds_ratio": -0.7448712587356567, + "logits/chosen": 0.07559056580066681, + "logits/rejected": -0.15480338037014008, + "logps/chosen": -0.8971524238586426, + "logps/rejected": -1.0162702798843384, + "loss": 1.259, + "nll_loss": 1.3991035223007202, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08971523493528366, + "rewards/margins": 0.011911772191524506, + "rewards/rejected": -0.10162701457738876, + "step": 1955 + }, + { + "epoch": 0.5512973771183461, + "grad_norm": 0.65234375, + "learning_rate": 2.4889519217526178e-06, + "log_odds_chosen": 0.22945475578308105, + "log_odds_ratio": -0.680932343006134, + "logits/chosen": 0.22960948944091797, + "logits/rejected": 0.17516712844371796, + "logps/chosen": -0.827686607837677, + "logps/rejected": -0.9752469062805176, + "loss": 1.3716, + "nll_loss": 1.041610598564148, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08276865631341934, + "rewards/margins": 0.014756026677787304, + "rewards/rejected": -0.09752468764781952, + "step": 1960 + }, + { + "epoch": 0.5527037479783419, + "grad_norm": 0.51953125, + "learning_rate": 2.4766765416864358e-06, + "log_odds_chosen": 0.4295072555541992, + "log_odds_ratio": -0.5632562637329102, + "logits/chosen": 0.3465437591075897, + "logits/rejected": 0.19048206508159637, + "logps/chosen": -0.7735083103179932, + "logps/rejected": -0.9938994646072388, + "loss": 1.2924, + "nll_loss": 1.0761052370071411, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07735083997249603, + "rewards/margins": 0.02203909493982792, + "rewards/rejected": -0.0993899330496788, + "step": 1965 + }, + { + "epoch": 0.5541101188383377, + "grad_norm": 0.7421875, + "learning_rate": 2.4644017239659145e-06, + "log_odds_chosen": 0.20891109108924866, + "log_odds_ratio": -0.6594338417053223, + "logits/chosen": 0.2855902314186096, + "logits/rejected": -0.0511033833026886, + "logps/chosen": -0.9585043787956238, + "logps/rejected": -1.067773699760437, + "loss": 1.2839, + "nll_loss": 1.1800204515457153, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09585044533014297, + "rewards/margins": 0.010926928371191025, + "rewards/rejected": -0.1067773699760437, + "step": 1970 + }, + { + "epoch": 0.5555164896983334, + "grad_norm": 0.361328125, + "learning_rate": 2.4521277645458968e-06, + "log_odds_chosen": 0.209771990776062, + "log_odds_ratio": -0.6410363912582397, + "logits/chosen": 0.16523006558418274, + "logits/rejected": 0.0723339170217514, + "logps/chosen": -0.9708080291748047, + "logps/rejected": -1.0834459066390991, + "loss": 1.2706, + "nll_loss": 1.1527389287948608, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09708081185817719, + "rewards/margins": 0.011263787746429443, + "rewards/rejected": -0.10834459215402603, + "step": 1975 + }, + { + "epoch": 0.5569228605583292, + "grad_norm": 0.5234375, + "learning_rate": 2.4398549593605336e-06, + "log_odds_chosen": 0.30742546916007996, + "log_odds_ratio": -0.7039249539375305, + "logits/chosen": 0.04328065365552902, + "logits/rejected": -0.12980946898460388, + "logps/chosen": -0.9298011064529419, + "logps/rejected": -1.132953405380249, + "loss": 1.3455, + "nll_loss": 1.2900826930999756, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09298010915517807, + "rewards/margins": 0.020315242931246758, + "rewards/rejected": -0.11329533904790878, + "step": 1980 + }, + { + "epoch": 0.558329231418325, + "grad_norm": 0.578125, + "learning_rate": 2.427583604316145e-06, + "log_odds_chosen": 0.32450270652770996, + "log_odds_ratio": -0.6246632933616638, + "logits/chosen": 0.2687751054763794, + "logits/rejected": 0.002015703823417425, + "logps/chosen": -0.8845337629318237, + "logps/rejected": -1.1134403944015503, + "loss": 1.2423, + "nll_loss": 1.1597281694412231, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08845336735248566, + "rewards/margins": 0.022890685126185417, + "rewards/rejected": -0.11134406179189682, + "step": 1985 + }, + { + "epoch": 0.5597356022783208, + "grad_norm": 0.93359375, + "learning_rate": 2.4153139952840873e-06, + "log_odds_chosen": -0.2574736475944519, + "log_odds_ratio": -0.9290412068367004, + "logits/chosen": 0.17939214408397675, + "logits/rejected": -0.03628557547926903, + "logps/chosen": -1.1249353885650635, + "logps/rejected": -0.925279438495636, + "loss": 1.4328, + "nll_loss": 1.4516746997833252, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.11249355226755142, + "rewards/margins": -0.019965607672929764, + "rewards/rejected": -0.09252794086933136, + "step": 1990 + }, + { + "epoch": 0.5611419731383166, + "grad_norm": 0.98046875, + "learning_rate": 2.4030464280936196e-06, + "log_odds_chosen": -0.13905853033065796, + "log_odds_ratio": -0.9099873304367065, + "logits/chosen": 0.19021955132484436, + "logits/rejected": 0.1553717404603958, + "logps/chosen": -1.0612506866455078, + "logps/rejected": -1.012810230255127, + "loss": 1.3182, + "nll_loss": 1.1444755792617798, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10612507164478302, + "rewards/margins": -0.004844049923121929, + "rewards/rejected": -0.10128102451562881, + "step": 1995 + }, + { + "epoch": 0.5625483439983123, + "grad_norm": 1.4453125, + "learning_rate": 2.390781198524771e-06, + "log_odds_chosen": 0.12198109924793243, + "log_odds_ratio": -0.8283858299255371, + "logits/chosen": 0.011166423559188843, + "logits/rejected": 0.008707046508789062, + "logps/chosen": -1.1139929294586182, + "logps/rejected": -1.1516040563583374, + "loss": 1.3065, + "nll_loss": 1.2652587890625, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11139927804470062, + "rewards/margins": 0.003761128056794405, + "rewards/rejected": -0.11516042053699493, + "step": 2000 + }, + { + "epoch": 0.5639547148583082, + "grad_norm": 1.1484375, + "learning_rate": 2.378518602301207e-06, + "log_odds_chosen": 0.3775716722011566, + "log_odds_ratio": -0.660047709941864, + "logits/chosen": 0.018313337117433548, + "logits/rejected": -0.12297528982162476, + "logps/chosen": -0.871324360370636, + "logps/rejected": -1.077490210533142, + "loss": 1.3086, + "nll_loss": 1.2721842527389526, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08713243901729584, + "rewards/margins": 0.02061658538877964, + "rewards/rejected": -0.10774902254343033, + "step": 2005 + }, + { + "epoch": 0.5653610857183039, + "grad_norm": 0.359375, + "learning_rate": 2.366258935083104e-06, + "log_odds_chosen": 0.35200756788253784, + "log_odds_ratio": -0.6445342898368835, + "logits/chosen": 0.18469831347465515, + "logits/rejected": 0.046655140817165375, + "logps/chosen": -0.8012509346008301, + "logps/rejected": -1.037642240524292, + "loss": 1.2667, + "nll_loss": 1.1248598098754883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08012509346008301, + "rewards/margins": 0.02363913133740425, + "rewards/rejected": -0.10376423597335815, + "step": 2010 + }, + { + "epoch": 0.5667674565782997, + "grad_norm": 0.53125, + "learning_rate": 2.354002492460015e-06, + "log_odds_chosen": 0.29196059703826904, + "log_odds_ratio": -0.6961022615432739, + "logits/chosen": 0.07946896553039551, + "logits/rejected": 0.20001347362995148, + "logps/chosen": -0.7962630987167358, + "logps/rejected": -0.9276267886161804, + "loss": 1.2736, + "nll_loss": 1.0803678035736084, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07962630689144135, + "rewards/margins": 0.013136359862983227, + "rewards/rejected": -0.09276267141103745, + "step": 2015 + }, + { + "epoch": 0.5681738274382955, + "grad_norm": 0.65234375, + "learning_rate": 2.3417495699437494e-06, + "log_odds_chosen": 0.0876246765255928, + "log_odds_ratio": -0.8300532102584839, + "logits/chosen": -0.09331229329109192, + "logits/rejected": 0.013139751739799976, + "logps/chosen": -1.0107604265213013, + "logps/rejected": -1.016236662864685, + "loss": 1.3041, + "nll_loss": 1.3678447008132935, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10107602179050446, + "rewards/margins": 0.0005476403748616576, + "rewards/rejected": -0.10162366926670074, + "step": 2020 + }, + { + "epoch": 0.5695801982982912, + "grad_norm": 0.64453125, + "learning_rate": 2.32950046296124e-06, + "log_odds_chosen": 0.22191683948040009, + "log_odds_ratio": -0.6643384695053101, + "logits/chosen": 0.31556597352027893, + "logits/rejected": -0.18868893384933472, + "logps/chosen": -0.8896587491035461, + "logps/rejected": -1.0823167562484741, + "loss": 1.2252, + "nll_loss": 1.3100733757019043, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08896587044000626, + "rewards/margins": 0.019265811890363693, + "rewards/rejected": -0.10823168605566025, + "step": 2025 + }, + { + "epoch": 0.570986569158287, + "grad_norm": 0.40625, + "learning_rate": 2.317255466847428e-06, + "log_odds_chosen": 0.20406213402748108, + "log_odds_ratio": -0.6933099031448364, + "logits/chosen": -0.07382262498140335, + "logits/rejected": 0.03679852560162544, + "logps/chosen": -0.8334574699401855, + "logps/rejected": -0.9437686204910278, + "loss": 1.2915, + "nll_loss": 1.2893542051315308, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08334574103355408, + "rewards/margins": 0.011031119152903557, + "rewards/rejected": -0.09437686204910278, + "step": 2030 + }, + { + "epoch": 0.5723929400182828, + "grad_norm": 0.765625, + "learning_rate": 2.3050148768381346e-06, + "log_odds_chosen": 0.1456795036792755, + "log_odds_ratio": -0.7081176042556763, + "logits/chosen": 0.11449067294597626, + "logits/rejected": -0.00850601214915514, + "logps/chosen": -1.0489826202392578, + "logps/rejected": -1.1977183818817139, + "loss": 1.3718, + "nll_loss": 1.2525540590286255, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10489825904369354, + "rewards/margins": 0.014873594045639038, + "rewards/rejected": -0.11977185308933258, + "step": 2035 + }, + { + "epoch": 0.5737993108782786, + "grad_norm": 0.84375, + "learning_rate": 2.2927789880629505e-06, + "log_odds_chosen": -0.17272424697875977, + "log_odds_ratio": -0.8494987487792969, + "logits/chosen": 0.0225498266518116, + "logits/rejected": 0.16063106060028076, + "logps/chosen": -0.9505000114440918, + "logps/rejected": -0.9065941572189331, + "loss": 1.2537, + "nll_loss": 1.1457884311676025, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09504999965429306, + "rewards/margins": -0.004390590824186802, + "rewards/rejected": -0.09065941721200943, + "step": 2040 + }, + { + "epoch": 0.5752056817382744, + "grad_norm": 0.640625, + "learning_rate": 2.2805480955381146e-06, + "log_odds_chosen": 0.10740852355957031, + "log_odds_ratio": -0.7454892992973328, + "logits/chosen": 0.041121773421764374, + "logits/rejected": -0.10364113003015518, + "logps/chosen": -1.0231186151504517, + "logps/rejected": -1.064650297164917, + "loss": 1.4006, + "nll_loss": 1.347855806350708, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1023118644952774, + "rewards/margins": 0.004153153393417597, + "rewards/rejected": -0.10646501928567886, + "step": 2045 + }, + { + "epoch": 0.5766120525982702, + "grad_norm": 2.65625, + "learning_rate": 2.268322494159401e-06, + "log_odds_chosen": 0.4567478597164154, + "log_odds_ratio": -0.5950134992599487, + "logits/chosen": 0.1594325602054596, + "logits/rejected": -0.06351219117641449, + "logps/chosen": -0.9101318120956421, + "logps/rejected": -1.2298481464385986, + "loss": 1.3683, + "nll_loss": 1.3389989137649536, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09101317077875137, + "rewards/margins": 0.03197162598371506, + "rewards/rejected": -0.12298478931188583, + "step": 2050 + }, + { + "epoch": 0.5780184234582659, + "grad_norm": 0.6484375, + "learning_rate": 2.256102478695013e-06, + "log_odds_chosen": 0.5688678622245789, + "log_odds_ratio": -0.5590689182281494, + "logits/chosen": 0.3047412037849426, + "logits/rejected": 0.02218955010175705, + "logps/chosen": -0.8990691304206848, + "logps/rejected": -1.2767914533615112, + "loss": 1.2824, + "nll_loss": 1.1349036693572998, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08990690857172012, + "rewards/margins": 0.03777223452925682, + "rewards/rejected": -0.12767915427684784, + "step": 2055 + }, + { + "epoch": 0.5794247943182618, + "grad_norm": 0.8359375, + "learning_rate": 2.2438883437784724e-06, + "log_odds_chosen": 0.32400697469711304, + "log_odds_ratio": -0.6653164625167847, + "logits/chosen": 0.126313254237175, + "logits/rejected": -0.08219246566295624, + "logps/chosen": -0.8283101320266724, + "logps/rejected": -1.045828938484192, + "loss": 1.2714, + "nll_loss": 1.1874326467514038, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.082831010222435, + "rewards/margins": 0.021751878783106804, + "rewards/rejected": -0.10458288341760635, + "step": 2060 + }, + { + "epoch": 0.5808311651782575, + "grad_norm": 1.078125, + "learning_rate": 2.231680383901516e-06, + "log_odds_chosen": 0.2799515724182129, + "log_odds_ratio": -0.6260203719139099, + "logits/chosen": 0.2734755873680115, + "logits/rejected": -0.0657648891210556, + "logps/chosen": -0.997807502746582, + "logps/rejected": -1.1968848705291748, + "loss": 1.266, + "nll_loss": 1.1785128116607666, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09978075325489044, + "rewards/margins": 0.019907724112272263, + "rewards/rejected": -0.1196884736418724, + "step": 2065 + }, + { + "epoch": 0.5822375360382532, + "grad_norm": 0.474609375, + "learning_rate": 2.219478893406997e-06, + "log_odds_chosen": 0.6603912115097046, + "log_odds_ratio": -0.49572378396987915, + "logits/chosen": 0.08124915510416031, + "logits/rejected": -0.0769030898809433, + "logps/chosen": -0.7781981229782104, + "logps/rejected": -1.1534805297851562, + "loss": 1.3176, + "nll_loss": 1.248460054397583, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0778198167681694, + "rewards/margins": 0.037528228014707565, + "rewards/rejected": -0.11534804105758667, + "step": 2070 + }, + { + "epoch": 0.5836439068982491, + "grad_norm": 0.47265625, + "learning_rate": 2.2072841664817855e-06, + "log_odds_chosen": 0.19003677368164062, + "log_odds_ratio": -0.7124180793762207, + "logits/chosen": 0.1900695264339447, + "logits/rejected": 0.16181489825248718, + "logps/chosen": -0.9449484944343567, + "logps/rejected": -1.0786292552947998, + "loss": 1.375, + "nll_loss": 1.1930427551269531, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09449484944343567, + "rewards/margins": 0.013368071988224983, + "rewards/rejected": -0.1078629270195961, + "step": 2075 + }, + { + "epoch": 0.5850502777582448, + "grad_norm": 0.359375, + "learning_rate": 2.195096497149679e-06, + "log_odds_chosen": 0.6070584058761597, + "log_odds_ratio": -0.6306462287902832, + "logits/chosen": 0.3088809847831726, + "logits/rejected": 0.16098138689994812, + "logps/chosen": -1.0000215768814087, + "logps/rejected": -1.5181127786636353, + "loss": 1.2809, + "nll_loss": 1.1490371227264404, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10000214725732803, + "rewards/margins": 0.051809124648571014, + "rewards/rejected": -0.15181128680706024, + "step": 2080 + }, + { + "epoch": 0.5864566486182407, + "grad_norm": 0.7265625, + "learning_rate": 2.182916179264309e-06, + "log_odds_chosen": 0.22232560813426971, + "log_odds_ratio": -0.6624404191970825, + "logits/chosen": 0.025626610964536667, + "logits/rejected": 0.16651883721351624, + "logps/chosen": -0.8706231117248535, + "logps/rejected": -1.018607497215271, + "loss": 1.2903, + "nll_loss": 1.2660566568374634, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08706231415271759, + "rewards/margins": 0.014798441901803017, + "rewards/rejected": -0.10186074674129486, + "step": 2085 + }, + { + "epoch": 0.5878630194782364, + "grad_norm": 0.5625, + "learning_rate": 2.170743506502061e-06, + "log_odds_chosen": 0.5465322732925415, + "log_odds_ratio": -0.5666033625602722, + "logits/chosen": 0.0625384971499443, + "logits/rejected": 0.12112072855234146, + "logps/chosen": -0.685492217540741, + "logps/rejected": -0.931922435760498, + "loss": 1.178, + "nll_loss": 1.129865288734436, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.06854921579360962, + "rewards/margins": 0.02464301511645317, + "rewards/rejected": -0.09319223463535309, + "step": 2090 + }, + { + "epoch": 0.5892693903382322, + "grad_norm": 0.431640625, + "learning_rate": 2.1585787723549886e-06, + "log_odds_chosen": 0.21627631783485413, + "log_odds_ratio": -0.6291038393974304, + "logits/chosen": 0.3572022616863251, + "logits/rejected": 0.17766115069389343, + "logps/chosen": -0.924892783164978, + "logps/rejected": -1.0687482357025146, + "loss": 1.3015, + "nll_loss": 1.1631147861480713, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09248928725719452, + "rewards/margins": 0.014385545626282692, + "rewards/rejected": -0.10687483847141266, + "step": 2095 + }, + { + "epoch": 0.590675761198228, + "grad_norm": 0.55859375, + "learning_rate": 2.146422270123741e-06, + "log_odds_chosen": 0.5283633470535278, + "log_odds_ratio": -0.6173766255378723, + "logits/chosen": -0.03145185858011246, + "logits/rejected": 0.03247164934873581, + "logps/chosen": -0.8153474926948547, + "logps/rejected": -1.142411470413208, + "loss": 1.2569, + "nll_loss": 1.1404359340667725, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08153475821018219, + "rewards/margins": 0.03270639851689339, + "rewards/rejected": -0.11424114555120468, + "step": 2100 + }, + { + "epoch": 0.5920821320582238, + "grad_norm": 0.431640625, + "learning_rate": 2.134274292910489e-06, + "log_odds_chosen": 0.44718852639198303, + "log_odds_ratio": -0.6034306287765503, + "logits/chosen": 0.19990482926368713, + "logits/rejected": 0.05019756406545639, + "logps/chosen": -0.8512241244316101, + "logps/rejected": -1.1330512762069702, + "loss": 1.1902, + "nll_loss": 1.1092191934585571, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08512241393327713, + "rewards/margins": 0.028182705864310265, + "rewards/rejected": -0.11330513656139374, + "step": 2105 + }, + { + "epoch": 0.5934885029182195, + "grad_norm": 1.015625, + "learning_rate": 2.1221351336118587e-06, + "log_odds_chosen": -0.05416733771562576, + "log_odds_ratio": -0.8569121360778809, + "logits/chosen": 0.12580278515815735, + "logits/rejected": -0.2401326447725296, + "logps/chosen": -1.075391411781311, + "logps/rejected": -1.1087602376937866, + "loss": 1.302, + "nll_loss": 1.4039140939712524, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10753913968801498, + "rewards/margins": 0.003336886642500758, + "rewards/rejected": -0.11087602376937866, + "step": 2110 + }, + { + "epoch": 0.5948948737782153, + "grad_norm": 0.74609375, + "learning_rate": 2.1100050849118716e-06, + "log_odds_chosen": 0.3060021996498108, + "log_odds_ratio": -0.6356474757194519, + "logits/chosen": 0.17181582748889923, + "logits/rejected": 0.06386371701955795, + "logps/chosen": -0.9602164030075073, + "logps/rejected": -1.1083872318267822, + "loss": 1.2469, + "nll_loss": 1.1497198343276978, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09602165222167969, + "rewards/margins": 0.014817061834037304, + "rewards/rejected": -0.11083869636058807, + "step": 2115 + }, + { + "epoch": 0.5963012446382111, + "grad_norm": 0.58984375, + "learning_rate": 2.097884439274883e-06, + "log_odds_chosen": 0.6273894906044006, + "log_odds_ratio": -0.5269737839698792, + "logits/chosen": 0.21485364437103271, + "logits/rejected": -0.256959468126297, + "logps/chosen": -0.7991748452186584, + "logps/rejected": -1.1641663312911987, + "loss": 1.3441, + "nll_loss": 1.1461737155914307, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07991747558116913, + "rewards/margins": 0.03649916127324104, + "rewards/rejected": -0.11641664803028107, + "step": 2120 + }, + { + "epoch": 0.5977076154982068, + "grad_norm": 0.78515625, + "learning_rate": 2.0857734889385375e-06, + "log_odds_chosen": 0.4728359282016754, + "log_odds_ratio": -0.5811837911605835, + "logits/chosen": 0.0014868139987811446, + "logits/rejected": 0.14086367189884186, + "logps/chosen": -0.8561526536941528, + "logps/rejected": -1.0794785022735596, + "loss": 1.248, + "nll_loss": 1.1451321840286255, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08561527729034424, + "rewards/margins": 0.022332582622766495, + "rewards/rejected": -0.10794784873723984, + "step": 2125 + }, + { + "epoch": 0.5991139863582027, + "grad_norm": 0.57421875, + "learning_rate": 2.0736725259067152e-06, + "log_odds_chosen": -0.012895092368125916, + "log_odds_ratio": -0.7812901735305786, + "logits/chosen": -0.04183458536863327, + "logits/rejected": -0.05972647666931152, + "logps/chosen": -0.9985347986221313, + "logps/rejected": -1.0221898555755615, + "loss": 1.3272, + "nll_loss": 1.202670693397522, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09985347837209702, + "rewards/margins": 0.0023655148688703775, + "rewards/rejected": -0.10221900045871735, + "step": 2130 + }, + { + "epoch": 0.6005203572181984, + "grad_norm": 0.6171875, + "learning_rate": 2.0615818419424965e-06, + "log_odds_chosen": 0.5170741677284241, + "log_odds_ratio": -0.5714846253395081, + "logits/chosen": -0.12221293151378632, + "logits/rejected": 0.11729947477579117, + "logps/chosen": -0.950890839099884, + "logps/rejected": -1.2947622537612915, + "loss": 1.2321, + "nll_loss": 1.2039806842803955, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09508909285068512, + "rewards/margins": 0.034387145191431046, + "rewards/rejected": -0.12947621941566467, + "step": 2135 + }, + { + "epoch": 0.6019267280781943, + "grad_norm": 0.87109375, + "learning_rate": 2.0495017285611267e-06, + "log_odds_chosen": 0.5369144082069397, + "log_odds_ratio": -0.5662406086921692, + "logits/chosen": 0.021474510431289673, + "logits/rejected": -0.13889047503471375, + "logps/chosen": -0.9632217288017273, + "logps/rejected": -1.2753288745880127, + "loss": 1.3178, + "nll_loss": 1.243088722229004, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09632216393947601, + "rewards/margins": 0.031210720539093018, + "rewards/rejected": -0.12753288447856903, + "step": 2140 + }, + { + "epoch": 0.60333309893819, + "grad_norm": 0.46484375, + "learning_rate": 2.0374324770229852e-06, + "log_odds_chosen": 0.44575804471969604, + "log_odds_ratio": -0.5990105271339417, + "logits/chosen": 0.16773398220539093, + "logits/rejected": -0.01770983263850212, + "logps/chosen": -0.8211402893066406, + "logps/rejected": -1.067427396774292, + "loss": 1.428, + "nll_loss": 1.1296110153198242, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08211404085159302, + "rewards/margins": 0.024628710001707077, + "rewards/rejected": -0.1067427545785904, + "step": 2145 + }, + { + "epoch": 0.6047394697981858, + "grad_norm": 0.7421875, + "learning_rate": 2.0253743783265644e-06, + "log_odds_chosen": 0.043892212212085724, + "log_odds_ratio": -0.7687390446662903, + "logits/chosen": 0.11299238353967667, + "logits/rejected": 0.19744233787059784, + "logps/chosen": -0.8225703239440918, + "logps/rejected": -0.8660001754760742, + "loss": 1.2762, + "nll_loss": 1.1397249698638916, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08225702494382858, + "rewards/margins": 0.00434298487380147, + "rewards/rejected": -0.08660002052783966, + "step": 2150 + }, + { + "epoch": 0.6061458406581816, + "grad_norm": 0.8515625, + "learning_rate": 2.013327723201456e-06, + "log_odds_chosen": 0.8239778280258179, + "log_odds_ratio": -0.5448077917098999, + "logits/chosen": 0.11527810990810394, + "logits/rejected": -0.11592914909124374, + "logps/chosen": -0.9209944009780884, + "logps/rejected": -1.4681470394134521, + "loss": 1.426, + "nll_loss": 1.3432397842407227, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09209943562746048, + "rewards/margins": 0.05471527576446533, + "rewards/rejected": -0.14681470394134521, + "step": 2155 + }, + { + "epoch": 0.6075522115181774, + "grad_norm": 0.87890625, + "learning_rate": 2.001292802101334e-06, + "log_odds_chosen": 0.45466384291648865, + "log_odds_ratio": -0.6190831661224365, + "logits/chosen": -0.057558946311473846, + "logits/rejected": 0.02829205058515072, + "logps/chosen": -1.1019726991653442, + "logps/rejected": -1.4023187160491943, + "loss": 1.4079, + "nll_loss": 1.5371198654174805, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11019726097583771, + "rewards/margins": 0.030034605413675308, + "rewards/rejected": -0.1402318775653839, + "step": 2160 + }, + { + "epoch": 0.6089585823781731, + "grad_norm": 0.494140625, + "learning_rate": 1.989269905196962e-06, + "log_odds_chosen": 0.5837582349777222, + "log_odds_ratio": -0.4719756245613098, + "logits/chosen": 0.23967739939689636, + "logits/rejected": -0.15985237061977386, + "logps/chosen": -0.7570546865463257, + "logps/rejected": -1.098534345626831, + "loss": 1.3862, + "nll_loss": 1.4102153778076172, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07570547610521317, + "rewards/margins": 0.034147970378398895, + "rewards/rejected": -0.10985343158245087, + "step": 2165 + }, + { + "epoch": 0.6103649532381689, + "grad_norm": 0.70703125, + "learning_rate": 1.9772593223691884e-06, + "log_odds_chosen": 0.049016643315553665, + "log_odds_ratio": -0.8077160120010376, + "logits/chosen": 0.2120121270418167, + "logits/rejected": 0.15697090327739716, + "logps/chosen": -1.0200127363204956, + "logps/rejected": -0.9740360379219055, + "loss": 1.2599, + "nll_loss": 1.2477294206619263, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.10200126469135284, + "rewards/margins": -0.004597645718604326, + "rewards/rejected": -0.0974036157131195, + "step": 2170 + }, + { + "epoch": 0.6117713240981647, + "grad_norm": 0.396484375, + "learning_rate": 1.9652613432019603e-06, + "log_odds_chosen": 0.439436674118042, + "log_odds_ratio": -0.5775164365768433, + "logits/chosen": 0.33784905076026917, + "logits/rejected": -0.16036701202392578, + "logps/chosen": -0.7958052754402161, + "logps/rejected": -1.0919268131256104, + "loss": 1.3179, + "nll_loss": 1.0887128114700317, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07958053052425385, + "rewards/margins": 0.029612144455313683, + "rewards/rejected": -0.10919268429279327, + "step": 2175 + }, + { + "epoch": 0.6131776949581604, + "grad_norm": 1.34375, + "learning_rate": 1.9532762569753413e-06, + "log_odds_chosen": 0.46795496344566345, + "log_odds_ratio": -0.571441650390625, + "logits/chosen": 0.1472143828868866, + "logits/rejected": -0.10492970049381256, + "logps/chosen": -0.9131848216056824, + "logps/rejected": -1.2060058116912842, + "loss": 1.3282, + "nll_loss": 1.2877761125564575, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09131848812103271, + "rewards/margins": 0.02928210236132145, + "rewards/rejected": -0.12060059607028961, + "step": 2180 + }, + { + "epoch": 0.6145840658181563, + "grad_norm": 0.83203125, + "learning_rate": 1.9413043526585377e-06, + "log_odds_chosen": 0.18188393115997314, + "log_odds_ratio": -0.6771284937858582, + "logits/chosen": 0.16518446803092957, + "logits/rejected": -0.010451188310980797, + "logps/chosen": -0.8314681053161621, + "logps/rejected": -0.9089493751525879, + "loss": 1.1942, + "nll_loss": 1.155928611755371, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08314681798219681, + "rewards/margins": 0.0077481335029006, + "rewards/rejected": -0.09089495241641998, + "step": 2185 + }, + { + "epoch": 0.615990436678152, + "grad_norm": 0.353515625, + "learning_rate": 1.9293459189029297e-06, + "log_odds_chosen": 0.17067180573940277, + "log_odds_ratio": -0.6768472194671631, + "logits/chosen": 0.3366175591945648, + "logits/rejected": -0.07003750652074814, + "logps/chosen": -0.8593961596488953, + "logps/rejected": -1.0146676301956177, + "loss": 1.2568, + "nll_loss": 1.1882942914962769, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08593961596488953, + "rewards/margins": 0.015527153387665749, + "rewards/rejected": -0.10146676003932953, + "step": 2190 + }, + { + "epoch": 0.6173968075381479, + "grad_norm": 0.60546875, + "learning_rate": 1.9174012440351115e-06, + "log_odds_chosen": 0.05603064224123955, + "log_odds_ratio": -0.770088791847229, + "logits/chosen": 0.12057554721832275, + "logits/rejected": 0.2600278854370117, + "logps/chosen": -0.886670708656311, + "logps/rejected": -0.8796000480651855, + "loss": 1.3116, + "nll_loss": 1.1579667329788208, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.08866707235574722, + "rewards/margins": -0.0007070727879181504, + "rewards/rejected": -0.08796000480651855, + "step": 2195 + }, + { + "epoch": 0.6188031783981436, + "grad_norm": 0.5859375, + "learning_rate": 1.9054706160499425e-06, + "log_odds_chosen": 0.9510795474052429, + "log_odds_ratio": -0.497814804315567, + "logits/chosen": -0.029894907027482986, + "logits/rejected": -0.0075707389041781425, + "logps/chosen": -0.7396495938301086, + "logps/rejected": -1.2674949169158936, + "loss": 1.208, + "nll_loss": 1.152024745941162, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07396496832370758, + "rewards/margins": 0.05278454348444939, + "rewards/rejected": -0.12674950063228607, + "step": 2200 + }, + { + "epoch": 0.6202095492581394, + "grad_norm": 0.38671875, + "learning_rate": 1.8935543226035991e-06, + "log_odds_chosen": 0.3846930265426636, + "log_odds_ratio": -0.6503673791885376, + "logits/chosen": 0.04987801983952522, + "logits/rejected": -0.14280924201011658, + "logps/chosen": -1.050396203994751, + "logps/rejected": -1.3851416110992432, + "loss": 1.2002, + "nll_loss": 1.2732374668121338, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10503961890935898, + "rewards/margins": 0.03347453102469444, + "rewards/rejected": -0.13851414620876312, + "step": 2205 + }, + { + "epoch": 0.6216159201181352, + "grad_norm": 0.5234375, + "learning_rate": 1.8816526510066443e-06, + "log_odds_chosen": 0.016715114936232567, + "log_odds_ratio": -0.7636454701423645, + "logits/chosen": 0.19401657581329346, + "logits/rejected": 0.11138969659805298, + "logps/chosen": -0.9060971140861511, + "logps/rejected": -0.8897374868392944, + "loss": 1.3381, + "nll_loss": 1.1524732112884521, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09060971438884735, + "rewards/margins": -0.0016359605360776186, + "rewards/rejected": -0.0889737457036972, + "step": 2210 + }, + { + "epoch": 0.6230222909781309, + "grad_norm": 0.451171875, + "learning_rate": 1.869765888217095e-06, + "log_odds_chosen": 0.35817837715148926, + "log_odds_ratio": -0.608710765838623, + "logits/chosen": 0.34807461500167847, + "logits/rejected": 0.004034848418086767, + "logps/chosen": -0.9350178837776184, + "logps/rejected": -1.145564079284668, + "loss": 1.2227, + "nll_loss": 1.1987204551696777, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09350178390741348, + "rewards/margins": 0.02105463482439518, + "rewards/rejected": -0.11455640941858292, + "step": 2215 + }, + { + "epoch": 0.6244286618381267, + "grad_norm": 1.1328125, + "learning_rate": 1.8578943208335064e-06, + "log_odds_chosen": 0.5045295357704163, + "log_odds_ratio": -0.5768045783042908, + "logits/chosen": 0.19994623959064484, + "logits/rejected": 0.03926212340593338, + "logps/chosen": -0.9680612683296204, + "logps/rejected": -1.2390127182006836, + "loss": 1.2678, + "nll_loss": 1.165244460105896, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0968061313033104, + "rewards/margins": 0.0270951297134161, + "rewards/rejected": -0.12390126287937164, + "step": 2220 + }, + { + "epoch": 0.6258350326981225, + "grad_norm": 0.66796875, + "learning_rate": 1.8460382350880631e-06, + "log_odds_chosen": 0.49432697892189026, + "log_odds_ratio": -0.5883679986000061, + "logits/chosen": 0.4599657952785492, + "logits/rejected": -0.22167901694774628, + "logps/chosen": -0.9144092798233032, + "logps/rejected": -1.2420196533203125, + "loss": 1.2947, + "nll_loss": 1.199195384979248, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09144093096256256, + "rewards/margins": 0.03276105225086212, + "rewards/rejected": -0.12420199066400528, + "step": 2225 + }, + { + "epoch": 0.6272414035581183, + "grad_norm": 0.396484375, + "learning_rate": 1.8341979168396729e-06, + "log_odds_chosen": 0.2862274944782257, + "log_odds_ratio": -0.6693924069404602, + "logits/chosen": 0.08865300565958023, + "logits/rejected": -0.13743454217910767, + "logps/chosen": -1.0582154989242554, + "logps/rejected": -1.2691776752471924, + "loss": 1.349, + "nll_loss": 1.4249746799468994, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10582154989242554, + "rewards/margins": 0.021096205338835716, + "rewards/rejected": -0.126917764544487, + "step": 2230 + }, + { + "epoch": 0.628647774418114, + "grad_norm": 0.890625, + "learning_rate": 1.8223736515670815e-06, + "log_odds_chosen": -0.037134379148483276, + "log_odds_ratio": -0.8214631080627441, + "logits/chosen": -0.0823502168059349, + "logits/rejected": 0.12497730553150177, + "logps/chosen": -0.932249903678894, + "logps/rejected": -0.8945218920707703, + "loss": 1.3146, + "nll_loss": 1.2705105543136597, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09322498738765717, + "rewards/margins": -0.0037727851886302233, + "rewards/rejected": -0.08945219963788986, + "step": 2235 + }, + { + "epoch": 0.6300541452781099, + "grad_norm": 0.5234375, + "learning_rate": 1.8105657243619823e-06, + "log_odds_chosen": 0.34684863686561584, + "log_odds_ratio": -0.7218656539916992, + "logits/chosen": 0.14807887375354767, + "logits/rejected": 0.03153051808476448, + "logps/chosen": -0.8757486343383789, + "logps/rejected": -1.0104224681854248, + "loss": 1.3259, + "nll_loss": 1.1627438068389893, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08757485449314117, + "rewards/margins": 0.013467395678162575, + "rewards/rejected": -0.1010422483086586, + "step": 2240 + }, + { + "epoch": 0.6314605161381056, + "grad_norm": 0.8671875, + "learning_rate": 1.7987744199221486e-06, + "log_odds_chosen": 0.6574544906616211, + "log_odds_ratio": -0.5802866220474243, + "logits/chosen": 0.1322064995765686, + "logits/rejected": -0.11798272281885147, + "logps/chosen": -0.8747881650924683, + "logps/rejected": -1.334978699684143, + "loss": 1.263, + "nll_loss": 1.0923330783843994, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08747883141040802, + "rewards/margins": 0.04601903632283211, + "rewards/rejected": -0.13349786400794983, + "step": 2245 + }, + { + "epoch": 0.6328668869981015, + "grad_norm": 0.5234375, + "learning_rate": 1.787000022544564e-06, + "log_odds_chosen": 0.5828297138214111, + "log_odds_ratio": -0.5941449403762817, + "logits/chosen": 0.22428825497627258, + "logits/rejected": 0.07940506190061569, + "logps/chosen": -0.733045756816864, + "logps/rejected": -1.0111888647079468, + "loss": 1.2972, + "nll_loss": 1.1566951274871826, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07330457866191864, + "rewards/margins": 0.027814310044050217, + "rewards/rejected": -0.10111889988183975, + "step": 2250 + }, + { + "epoch": 0.6342732578580972, + "grad_norm": 0.60546875, + "learning_rate": 1.7752428161185722e-06, + "log_odds_chosen": 0.31006038188934326, + "log_odds_ratio": -0.6792932748794556, + "logits/chosen": 0.16227427124977112, + "logits/rejected": -0.025093629956245422, + "logps/chosen": -0.9472533464431763, + "logps/rejected": -1.1567457914352417, + "loss": 1.2279, + "nll_loss": 1.1718425750732422, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09472532570362091, + "rewards/margins": 0.020949259400367737, + "rewards/rejected": -0.11567459255456924, + "step": 2255 + }, + { + "epoch": 0.6356796287180929, + "grad_norm": 0.7109375, + "learning_rate": 1.7635030841190305e-06, + "log_odds_chosen": 0.37945660948753357, + "log_odds_ratio": -0.6706798076629639, + "logits/chosen": 0.12936149537563324, + "logits/rejected": 0.19631770253181458, + "logps/chosen": -0.9205350875854492, + "logps/rejected": -1.1315011978149414, + "loss": 1.1826, + "nll_loss": 1.1098390817642212, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09205349534749985, + "rewards/margins": 0.021096620708703995, + "rewards/rejected": -0.11315013468265533, + "step": 2260 + }, + { + "epoch": 0.6370859995780888, + "grad_norm": 0.53125, + "learning_rate": 1.7517811095994735e-06, + "log_odds_chosen": 0.6533330678939819, + "log_odds_ratio": -0.49430108070373535, + "logits/chosen": 0.004287800285965204, + "logits/rejected": -0.12239503860473633, + "logps/chosen": -0.9396077990531921, + "logps/rejected": -1.3777822256088257, + "loss": 1.4278, + "nll_loss": 1.3003541231155396, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.09396077692508698, + "rewards/margins": 0.043817438185214996, + "rewards/rejected": -0.13777822256088257, + "step": 2265 + }, + { + "epoch": 0.6384923704380845, + "grad_norm": 0.765625, + "learning_rate": 1.7400771751852918e-06, + "log_odds_chosen": 0.3625580668449402, + "log_odds_ratio": -0.6069513559341431, + "logits/chosen": 0.1046602874994278, + "logits/rejected": -0.14799770712852478, + "logps/chosen": -0.9565946459770203, + "logps/rejected": -1.1671000719070435, + "loss": 1.2987, + "nll_loss": 1.3088912963867188, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09565945714712143, + "rewards/margins": 0.021050548180937767, + "rewards/rejected": -0.11671002209186554, + "step": 2270 + }, + { + "epoch": 0.6398987412980803, + "grad_norm": 1.1484375, + "learning_rate": 1.7283915630669152e-06, + "log_odds_chosen": 0.6291302442550659, + "log_odds_ratio": -0.5847497582435608, + "logits/chosen": 0.27099600434303284, + "logits/rejected": -0.10813238471746445, + "logps/chosen": -0.8863940238952637, + "logps/rejected": -1.34452486038208, + "loss": 1.35, + "nll_loss": 1.2117236852645874, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08863941580057144, + "rewards/margins": 0.04581306502223015, + "rewards/rejected": -0.1344524621963501, + "step": 2275 + }, + { + "epoch": 0.6413051121580761, + "grad_norm": 0.52734375, + "learning_rate": 1.7167245549930084e-06, + "log_odds_chosen": 0.4684177041053772, + "log_odds_ratio": -0.6205426454544067, + "logits/chosen": 0.19276770949363708, + "logits/rejected": -0.0946609154343605, + "logps/chosen": -1.003852367401123, + "logps/rejected": -1.2560100555419922, + "loss": 1.3595, + "nll_loss": 1.323639988899231, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10038523375988007, + "rewards/margins": 0.025215759873390198, + "rewards/rejected": -0.12560100853443146, + "step": 2280 + }, + { + "epoch": 0.6427114830180719, + "grad_norm": 0.82421875, + "learning_rate": 1.705076432263681e-06, + "log_odds_chosen": 0.18357697129249573, + "log_odds_ratio": -0.685191810131073, + "logits/chosen": 0.3408302664756775, + "logits/rejected": -0.02346021868288517, + "logps/chosen": -0.8450328707695007, + "logps/rejected": -0.9442952871322632, + "loss": 1.3166, + "nll_loss": 1.247656226158142, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08450329303741455, + "rewards/margins": 0.009926247410476208, + "rewards/rejected": -0.09442953020334244, + "step": 2285 + }, + { + "epoch": 0.6441178538780676, + "grad_norm": 0.70703125, + "learning_rate": 1.693447475723701e-06, + "log_odds_chosen": 0.4198075234889984, + "log_odds_ratio": -0.6046397686004639, + "logits/chosen": 0.02919054962694645, + "logits/rejected": -0.09719662368297577, + "logps/chosen": -0.8987553715705872, + "logps/rejected": -1.1064974069595337, + "loss": 1.3942, + "nll_loss": 1.2800304889678955, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08987553417682648, + "rewards/margins": 0.020774196833372116, + "rewards/rejected": -0.1106497272849083, + "step": 2290 + }, + { + "epoch": 0.6455242247380635, + "grad_norm": 0.82421875, + "learning_rate": 1.6818379657557276e-06, + "log_odds_chosen": 0.2639097571372986, + "log_odds_ratio": -0.6733454465866089, + "logits/chosen": 0.13979147374629974, + "logits/rejected": -0.036435022950172424, + "logps/chosen": -0.8567923307418823, + "logps/rejected": -1.0064163208007812, + "loss": 1.2637, + "nll_loss": 1.1874176263809204, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08567923307418823, + "rewards/margins": 0.014962397515773773, + "rewards/rejected": -0.100641630589962, + "step": 2295 + }, + { + "epoch": 0.6469305955980592, + "grad_norm": 0.69140625, + "learning_rate": 1.6702481822735463e-06, + "log_odds_chosen": 0.5099008679389954, + "log_odds_ratio": -0.6696144342422485, + "logits/chosen": 0.1339375525712967, + "logits/rejected": -0.07086416333913803, + "logps/chosen": -0.6614585518836975, + "logps/rejected": -1.0182112455368042, + "loss": 1.2939, + "nll_loss": 1.266301155090332, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06614585220813751, + "rewards/margins": 0.0356752835214138, + "rewards/rejected": -0.10182113945484161, + "step": 2300 + }, + { + "epoch": 0.6483369664580549, + "grad_norm": 0.84765625, + "learning_rate": 1.6586784047153264e-06, + "log_odds_chosen": 0.1889144480228424, + "log_odds_ratio": -0.7223377227783203, + "logits/chosen": 0.18328048288822174, + "logits/rejected": 0.0038080899976193905, + "logps/chosen": -1.0151089429855347, + "logps/rejected": -1.1970748901367188, + "loss": 1.476, + "nll_loss": 1.4434733390808105, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1015109196305275, + "rewards/margins": 0.018196579068899155, + "rewards/rejected": -0.11970750242471695, + "step": 2305 + }, + { + "epoch": 0.6497433373180508, + "grad_norm": 1.2265625, + "learning_rate": 1.6471289120368755e-06, + "log_odds_chosen": 0.061086464673280716, + "log_odds_ratio": -0.806367039680481, + "logits/chosen": 0.060416080057621, + "logits/rejected": 0.13000234961509705, + "logps/chosen": -1.0920124053955078, + "logps/rejected": -1.1408212184906006, + "loss": 1.3669, + "nll_loss": 1.3251636028289795, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10920125246047974, + "rewards/margins": 0.004880874417722225, + "rewards/rejected": -0.11408212035894394, + "step": 2310 + }, + { + "epoch": 0.6511497081780465, + "grad_norm": 0.81640625, + "learning_rate": 1.6355999827049214e-06, + "log_odds_chosen": -0.14868974685668945, + "log_odds_ratio": -0.8572866320610046, + "logits/chosen": 0.33928146958351135, + "logits/rejected": 0.13666602969169617, + "logps/chosen": -0.9500266909599304, + "logps/rejected": -0.9167013168334961, + "loss": 1.3243, + "nll_loss": 1.296018362045288, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0950026735663414, + "rewards/margins": -0.003332542721182108, + "rewards/rejected": -0.09167014062404633, + "step": 2315 + }, + { + "epoch": 0.6525560790380424, + "grad_norm": 0.8046875, + "learning_rate": 1.6240918946903923e-06, + "log_odds_chosen": 0.08933033049106598, + "log_odds_ratio": -0.8013785481452942, + "logits/chosen": 0.3539492189884186, + "logits/rejected": 0.07136426866054535, + "logps/chosen": -1.0159389972686768, + "logps/rejected": -1.0767545700073242, + "loss": 1.306, + "nll_loss": 1.2898885011672974, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10159389674663544, + "rewards/margins": 0.00608155969530344, + "rewards/rejected": -0.1076754555106163, + "step": 2320 + }, + { + "epoch": 0.6539624498980381, + "grad_norm": 0.91015625, + "learning_rate": 1.612604925461717e-06, + "log_odds_chosen": 0.3929768204689026, + "log_odds_ratio": -0.5996061563491821, + "logits/chosen": 0.21692538261413574, + "logits/rejected": 0.08881232887506485, + "logps/chosen": -0.7952896356582642, + "logps/rejected": -1.0285656452178955, + "loss": 1.2243, + "nll_loss": 1.0526376962661743, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07952895760536194, + "rewards/margins": 0.023327605798840523, + "rewards/rejected": -0.10285656154155731, + "step": 2325 + }, + { + "epoch": 0.6553688207580339, + "grad_norm": 0.49609375, + "learning_rate": 1.6011393519781373e-06, + "log_odds_chosen": 0.46675533056259155, + "log_odds_ratio": -0.5259859561920166, + "logits/chosen": 0.25100797414779663, + "logits/rejected": 0.05385111644864082, + "logps/chosen": -0.8101531267166138, + "logps/rejected": -1.090301275253296, + "loss": 1.2458, + "nll_loss": 1.068930983543396, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08101530373096466, + "rewards/margins": 0.02801482379436493, + "rewards/rejected": -0.1090301126241684, + "step": 2330 + }, + { + "epoch": 0.6567751916180297, + "grad_norm": 0.68359375, + "learning_rate": 1.5896954506830251e-06, + "log_odds_chosen": 0.08317549526691437, + "log_odds_ratio": -0.7241812348365784, + "logits/chosen": 0.018165847286581993, + "logits/rejected": -0.045239925384521484, + "logps/chosen": -1.012702465057373, + "logps/rejected": -1.0038830041885376, + "loss": 1.3008, + "nll_loss": 1.1472804546356201, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10127024352550507, + "rewards/margins": -0.0008819475769996643, + "rewards/rejected": -0.1003882884979248, + "step": 2335 + }, + { + "epoch": 0.6581815624780255, + "grad_norm": 0.86328125, + "learning_rate": 1.5782734974972207e-06, + "log_odds_chosen": 0.2770829200744629, + "log_odds_ratio": -0.6474322080612183, + "logits/chosen": 0.3198090195655823, + "logits/rejected": 0.0875503420829773, + "logps/chosen": -0.8086474537849426, + "logps/rejected": -0.9755656123161316, + "loss": 1.2639, + "nll_loss": 1.0375709533691406, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08086474239826202, + "rewards/margins": 0.016691816970705986, + "rewards/rejected": -0.09755655378103256, + "step": 2340 + }, + { + "epoch": 0.6595879333380212, + "grad_norm": 0.62109375, + "learning_rate": 1.5668737678123808e-06, + "log_odds_chosen": 0.36177942156791687, + "log_odds_ratio": -0.6418853402137756, + "logits/chosen": 0.32350581884384155, + "logits/rejected": -0.033881280571222305, + "logps/chosen": -0.9356054067611694, + "logps/rejected": -1.2087011337280273, + "loss": 1.4322, + "nll_loss": 1.203917384147644, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09356053918600082, + "rewards/margins": 0.027309581637382507, + "rewards/rejected": -0.12087012827396393, + "step": 2345 + }, + { + "epoch": 0.660994304198017, + "grad_norm": 0.59765625, + "learning_rate": 1.5554965364843355e-06, + "log_odds_chosen": 0.636290967464447, + "log_odds_ratio": -0.582929790019989, + "logits/chosen": 0.13164618611335754, + "logits/rejected": 0.20954278111457825, + "logps/chosen": -0.7973164916038513, + "logps/rejected": -1.2046148777008057, + "loss": 1.3067, + "nll_loss": 1.1008788347244263, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07973165065050125, + "rewards/margins": 0.04072984308004379, + "rewards/rejected": -0.12046150118112564, + "step": 2350 + }, + { + "epoch": 0.6624006750580128, + "grad_norm": 0.9140625, + "learning_rate": 1.5441420778264647e-06, + "log_odds_chosen": 0.40660586953163147, + "log_odds_ratio": -0.5770841836929321, + "logits/chosen": -0.06428630650043488, + "logits/rejected": 0.038785386830568314, + "logps/chosen": -0.8069353103637695, + "logps/rejected": -1.0121805667877197, + "loss": 1.284, + "nll_loss": 1.1668837070465088, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08069352805614471, + "rewards/margins": 0.0205245278775692, + "rewards/rejected": -0.10121805965900421, + "step": 2355 + }, + { + "epoch": 0.6638070459180085, + "grad_norm": 0.60546875, + "learning_rate": 1.5328106656030805e-06, + "log_odds_chosen": 0.2510248124599457, + "log_odds_ratio": -0.6650518178939819, + "logits/chosen": 0.2300342619419098, + "logits/rejected": 0.23015658557415009, + "logps/chosen": -0.9045982360839844, + "logps/rejected": -1.0975348949432373, + "loss": 1.2664, + "nll_loss": 1.1747139692306519, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09045982360839844, + "rewards/margins": 0.01929367706179619, + "rewards/rejected": -0.10975348949432373, + "step": 2360 + }, + { + "epoch": 0.6652134167780044, + "grad_norm": 0.58984375, + "learning_rate": 1.52150257302283e-06, + "log_odds_chosen": -0.03923415392637253, + "log_odds_ratio": -0.8279203176498413, + "logits/chosen": 0.12241461127996445, + "logits/rejected": 0.2429099977016449, + "logps/chosen": -1.0360397100448608, + "logps/rejected": -1.0254569053649902, + "loss": 1.2996, + "nll_loss": 1.235386848449707, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10360397398471832, + "rewards/margins": -0.0010582676623016596, + "rewards/rejected": -0.10254569351673126, + "step": 2365 + }, + { + "epoch": 0.6666197876380001, + "grad_norm": 1.046875, + "learning_rate": 1.510218072732107e-06, + "log_odds_chosen": 0.1947515308856964, + "log_odds_ratio": -0.7813352346420288, + "logits/chosen": 0.027283471077680588, + "logits/rejected": -0.05581042915582657, + "logps/chosen": -0.8156864047050476, + "logps/rejected": -0.9339845776557922, + "loss": 1.3315, + "nll_loss": 1.197085976600647, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0815686509013176, + "rewards/margins": 0.01182980090379715, + "rewards/rejected": -0.09339844435453415, + "step": 2370 + }, + { + "epoch": 0.668026158497996, + "grad_norm": 1.2109375, + "learning_rate": 1.4989574368084757e-06, + "log_odds_chosen": 0.5522528886795044, + "log_odds_ratio": -0.6361822485923767, + "logits/chosen": -0.045281365513801575, + "logits/rejected": -0.08735646307468414, + "logps/chosen": -0.8376988172531128, + "logps/rejected": -1.1683425903320312, + "loss": 1.287, + "nll_loss": 1.156860589981079, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08376988023519516, + "rewards/margins": 0.03306437283754349, + "rewards/rejected": -0.11683426052331924, + "step": 2375 + }, + { + "epoch": 0.6694325293579917, + "grad_norm": 0.4453125, + "learning_rate": 1.4877209367541167e-06, + "log_odds_chosen": 0.46589261293411255, + "log_odds_ratio": -0.6088491678237915, + "logits/chosen": 0.18501324951648712, + "logits/rejected": -0.07747994363307953, + "logps/chosen": -0.8727632761001587, + "logps/rejected": -1.181078553199768, + "loss": 1.2855, + "nll_loss": 1.294697880744934, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08727632462978363, + "rewards/margins": 0.030831540003418922, + "rewards/rejected": -0.11810784041881561, + "step": 2380 + }, + { + "epoch": 0.6708389002179875, + "grad_norm": 1.4453125, + "learning_rate": 1.4765088434892735e-06, + "log_odds_chosen": 0.20517206192016602, + "log_odds_ratio": -0.7064529657363892, + "logits/chosen": 0.04695446044206619, + "logits/rejected": 0.20691752433776855, + "logps/chosen": -0.8503645062446594, + "logps/rejected": -1.0062562227249146, + "loss": 1.3207, + "nll_loss": 1.0638062953948975, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08503645658493042, + "rewards/margins": 0.015589162707328796, + "rewards/rejected": -0.10062561929225922, + "step": 2385 + }, + { + "epoch": 0.6722452710779833, + "grad_norm": 0.98046875, + "learning_rate": 1.4653214273457261e-06, + "log_odds_chosen": 0.3600631654262543, + "log_odds_ratio": -0.6305667161941528, + "logits/chosen": 0.2736007571220398, + "logits/rejected": -0.09294568002223969, + "logps/chosen": -1.028140902519226, + "logps/rejected": -1.2551156282424927, + "loss": 1.2326, + "nll_loss": 1.334746241569519, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10281410068273544, + "rewards/margins": 0.022697459906339645, + "rewards/rejected": -0.12551157176494598, + "step": 2390 + }, + { + "epoch": 0.673651641937979, + "grad_norm": 1.1640625, + "learning_rate": 1.4541589580602691e-06, + "log_odds_chosen": 0.21781405806541443, + "log_odds_ratio": -0.6903058886528015, + "logits/chosen": 0.32889285683631897, + "logits/rejected": 0.20784631371498108, + "logps/chosen": -0.9776535034179688, + "logps/rejected": -1.1374032497406006, + "loss": 1.2584, + "nll_loss": 1.2378979921340942, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09776534140110016, + "rewards/margins": 0.0159749872982502, + "rewards/rejected": -0.11374033987522125, + "step": 2395 + }, + { + "epoch": 0.6750580127979748, + "grad_norm": 0.498046875, + "learning_rate": 1.4430217047682133e-06, + "log_odds_chosen": 0.3385079503059387, + "log_odds_ratio": -0.6288328766822815, + "logits/chosen": 0.01931552030146122, + "logits/rejected": -0.039456650614738464, + "logps/chosen": -0.926923394203186, + "logps/rejected": -1.1492844820022583, + "loss": 1.3098, + "nll_loss": 1.1470708847045898, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09269233047962189, + "rewards/margins": 0.022236105054616928, + "rewards/rejected": -0.11492843925952911, + "step": 2400 + }, + { + "epoch": 0.6764643836579706, + "grad_norm": 0.48046875, + "learning_rate": 1.4319099359968897e-06, + "log_odds_chosen": 0.4113641381263733, + "log_odds_ratio": -0.6631403565406799, + "logits/chosen": 0.20192308723926544, + "logits/rejected": -0.16459718346595764, + "logps/chosen": -0.9309433698654175, + "logps/rejected": -1.1940377950668335, + "loss": 1.3175, + "nll_loss": 1.2504770755767822, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0930943414568901, + "rewards/margins": 0.026309454813599586, + "rewards/rejected": -0.11940377950668335, + "step": 2405 + }, + { + "epoch": 0.6778707545179664, + "grad_norm": 0.84375, + "learning_rate": 1.4208239196591816e-06, + "log_odds_chosen": 0.6248758435249329, + "log_odds_ratio": -0.5380848050117493, + "logits/chosen": 0.2532563805580139, + "logits/rejected": 0.03574008494615555, + "logps/chosen": -0.7717488408088684, + "logps/rejected": -1.1881312131881714, + "loss": 1.2321, + "nll_loss": 1.0419811010360718, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07717488706111908, + "rewards/margins": 0.04163823276758194, + "rewards/rejected": -0.11881311237812042, + "step": 2410 + }, + { + "epoch": 0.6792771253779621, + "grad_norm": 0.8125, + "learning_rate": 1.4097639230470602e-06, + "log_odds_chosen": 0.16117160022258759, + "log_odds_ratio": -0.6795519590377808, + "logits/chosen": -0.0329584926366806, + "logits/rejected": 0.09744864702224731, + "logps/chosen": -1.0823699235916138, + "logps/rejected": -1.2119646072387695, + "loss": 1.2633, + "nll_loss": 1.1455273628234863, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10823698341846466, + "rewards/margins": 0.012959480285644531, + "rewards/rejected": -0.12119647115468979, + "step": 2415 + }, + { + "epoch": 0.680683496237958, + "grad_norm": 0.56640625, + "learning_rate": 1.3987302128251451e-06, + "log_odds_chosen": 0.5146900415420532, + "log_odds_ratio": -0.5096299052238464, + "logits/chosen": 0.21035465598106384, + "logits/rejected": 0.3026772737503052, + "logps/chosen": -0.7643210291862488, + "logps/rejected": -1.0609276294708252, + "loss": 1.3398, + "nll_loss": 0.982632040977478, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07643209397792816, + "rewards/margins": 0.02966066636145115, + "rewards/rejected": -0.10609277337789536, + "step": 2420 + }, + { + "epoch": 0.6820898670979537, + "grad_norm": 0.94140625, + "learning_rate": 1.3877230550242685e-06, + "log_odds_chosen": 0.12729986011981964, + "log_odds_ratio": -0.7055837512016296, + "logits/chosen": 0.1564406454563141, + "logits/rejected": 0.018828097730875015, + "logps/chosen": -1.0019980669021606, + "logps/rejected": -1.11031973361969, + "loss": 1.3547, + "nll_loss": 1.2092901468276978, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10019980370998383, + "rewards/margins": 0.010832170024514198, + "rewards/rejected": -0.11103197187185287, + "step": 2425 + }, + { + "epoch": 0.6834962379579496, + "grad_norm": 0.6875, + "learning_rate": 1.376742715035066e-06, + "log_odds_chosen": 0.3790433704853058, + "log_odds_ratio": -0.6022090911865234, + "logits/chosen": 0.07409689575433731, + "logits/rejected": 0.1763194501399994, + "logps/chosen": -0.931224524974823, + "logps/rejected": -1.217162847518921, + "loss": 1.2583, + "nll_loss": 1.1742773056030273, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0931224375963211, + "rewards/margins": 0.02859382890164852, + "rewards/rejected": -0.12171627581119537, + "step": 2430 + }, + { + "epoch": 0.6849026088179453, + "grad_norm": 0.44140625, + "learning_rate": 1.3657894576015752e-06, + "log_odds_chosen": 0.3091423213481903, + "log_odds_ratio": -0.6466763615608215, + "logits/chosen": 0.1856229156255722, + "logits/rejected": -0.09812851250171661, + "logps/chosen": -0.8701708912849426, + "logps/rejected": -1.0862239599227905, + "loss": 1.2811, + "nll_loss": 1.1256446838378906, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.08701709657907486, + "rewards/margins": 0.021605292335152626, + "rewards/rejected": -0.10862239450216293, + "step": 2435 + }, + { + "epoch": 0.686308979677941, + "grad_norm": 0.5390625, + "learning_rate": 1.3548635468148546e-06, + "log_odds_chosen": -0.15600749850273132, + "log_odds_ratio": -0.8607271313667297, + "logits/chosen": 0.039382584393024445, + "logits/rejected": -0.024478310719132423, + "logps/chosen": -1.0003107786178589, + "logps/rejected": -0.936125636100769, + "loss": 1.3303, + "nll_loss": 1.2574559450149536, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10003107786178589, + "rewards/margins": -0.006418503820896149, + "rewards/rejected": -0.09361256659030914, + "step": 2440 + }, + { + "epoch": 0.6877153505379369, + "grad_norm": 0.474609375, + "learning_rate": 1.3439652461066132e-06, + "log_odds_chosen": 0.9599248766899109, + "log_odds_ratio": -0.5301603078842163, + "logits/chosen": 0.11211545765399933, + "logits/rejected": -0.01433412916958332, + "logps/chosen": -0.9639075994491577, + "logps/rejected": -1.5113232135772705, + "loss": 1.2619, + "nll_loss": 1.2481439113616943, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09639076888561249, + "rewards/margins": 0.05474156141281128, + "rewards/rejected": -0.15113233029842377, + "step": 2445 + }, + { + "epoch": 0.6891217213979326, + "grad_norm": 1.1640625, + "learning_rate": 1.3330948182428632e-06, + "log_odds_chosen": 0.3813667893409729, + "log_odds_ratio": -0.6987672448158264, + "logits/chosen": 0.2353462278842926, + "logits/rejected": -0.04703383892774582, + "logps/chosen": -0.9156500697135925, + "logps/rejected": -1.2335669994354248, + "loss": 1.3956, + "nll_loss": 1.2824862003326416, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09156500548124313, + "rewards/margins": 0.03179169446229935, + "rewards/rejected": -0.12335671484470367, + "step": 2450 + }, + { + "epoch": 0.6905280922579284, + "grad_norm": 1.0703125, + "learning_rate": 1.3222525253175817e-06, + "log_odds_chosen": 0.38882437348365784, + "log_odds_ratio": -0.6351332068443298, + "logits/chosen": 0.06713583320379257, + "logits/rejected": -0.06536121666431427, + "logps/chosen": -0.7927005887031555, + "logps/rejected": -1.057539701461792, + "loss": 1.3135, + "nll_loss": 1.363416314125061, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07927004992961884, + "rewards/margins": 0.026483912020921707, + "rewards/rejected": -0.10575397312641144, + "step": 2455 + }, + { + "epoch": 0.6919344631179242, + "grad_norm": 0.515625, + "learning_rate": 1.3114386287463895e-06, + "log_odds_chosen": 0.3855310380458832, + "log_odds_ratio": -0.6534914970397949, + "logits/chosen": 0.3311907947063446, + "logits/rejected": -0.03206902742385864, + "logps/chosen": -0.9295966029167175, + "logps/rejected": -1.1973559856414795, + "loss": 1.2819, + "nll_loss": 1.1859447956085205, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09295966476202011, + "rewards/margins": 0.02677593193948269, + "rewards/rejected": -0.11973558366298676, + "step": 2460 + }, + { + "epoch": 0.69334083397792, + "grad_norm": 0.921875, + "learning_rate": 1.3006533892602529e-06, + "log_odds_chosen": 0.5109578967094421, + "log_odds_ratio": -0.5499760508537292, + "logits/chosen": 0.18025276064872742, + "logits/rejected": -0.0963296964764595, + "logps/chosen": -0.8864691853523254, + "logps/rejected": -1.241557002067566, + "loss": 1.2567, + "nll_loss": 1.2526640892028809, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08864691853523254, + "rewards/margins": 0.035508789122104645, + "rewards/rejected": -0.12415570020675659, + "step": 2465 + }, + { + "epoch": 0.6947472048379157, + "grad_norm": 0.80078125, + "learning_rate": 1.289897066899194e-06, + "log_odds_chosen": 0.2278120517730713, + "log_odds_ratio": -0.6966596841812134, + "logits/chosen": 0.2671460211277008, + "logits/rejected": -0.14307644963264465, + "logps/chosen": -0.9270893335342407, + "logps/rejected": -1.1367871761322021, + "loss": 1.343, + "nll_loss": 1.2511600255966187, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09270893782377243, + "rewards/margins": 0.020969776436686516, + "rewards/rejected": -0.1136787086725235, + "step": 2470 + }, + { + "epoch": 0.6961535756979116, + "grad_norm": 0.81640625, + "learning_rate": 1.27916992100602e-06, + "log_odds_chosen": 0.07840847969055176, + "log_odds_ratio": -0.7949457168579102, + "logits/chosen": -0.019716311246156693, + "logits/rejected": 0.07323477417230606, + "logps/chosen": -1.0711171627044678, + "logps/rejected": -1.1677472591400146, + "loss": 1.2643, + "nll_loss": 1.206630825996399, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10711170732975006, + "rewards/margins": 0.009663019329309464, + "rewards/rejected": -0.11677472293376923, + "step": 2475 + }, + { + "epoch": 0.6975599465579073, + "grad_norm": 0.52734375, + "learning_rate": 1.268472210220077e-06, + "log_odds_chosen": -0.06773873418569565, + "log_odds_ratio": -0.8004165887832642, + "logits/chosen": -0.054945267736911774, + "logits/rejected": 0.06799677759408951, + "logps/chosen": -0.886520266532898, + "logps/rejected": -0.8365623354911804, + "loss": 1.3579, + "nll_loss": 1.3515230417251587, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.08865202963352203, + "rewards/margins": -0.004995786584913731, + "rewards/rejected": -0.08365623652935028, + "step": 2480 + }, + { + "epoch": 0.698966317417903, + "grad_norm": 1.421875, + "learning_rate": 1.2578041924710048e-06, + "log_odds_chosen": 0.026949768885970116, + "log_odds_ratio": -0.7602518796920776, + "logits/chosen": 0.08551601320505142, + "logits/rejected": -0.11867004632949829, + "logps/chosen": -0.9919036626815796, + "logps/rejected": -1.0153735876083374, + "loss": 1.3524, + "nll_loss": 1.4168071746826172, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0991903692483902, + "rewards/margins": 0.002346993423998356, + "rewards/rejected": -0.10153736919164658, + "step": 2485 + }, + { + "epoch": 0.7003726882778989, + "grad_norm": 1.578125, + "learning_rate": 1.247166124972523e-06, + "log_odds_chosen": 0.27566736936569214, + "log_odds_ratio": -0.631436824798584, + "logits/chosen": 0.10302142798900604, + "logits/rejected": 0.0398896150290966, + "logps/chosen": -0.9392396211624146, + "logps/rejected": -1.1210951805114746, + "loss": 1.3061, + "nll_loss": 1.1322752237319946, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09392396360635757, + "rewards/margins": 0.018185561522841454, + "rewards/rejected": -0.11210951954126358, + "step": 2490 + }, + { + "epoch": 0.7017790591378946, + "grad_norm": 0.57421875, + "learning_rate": 1.2365582642162303e-06, + "log_odds_chosen": -0.050146959722042084, + "log_odds_ratio": -0.7798740267753601, + "logits/chosen": 0.04944353178143501, + "logits/rejected": 0.006843870971351862, + "logps/chosen": -1.0934979915618896, + "logps/rejected": -1.067382574081421, + "loss": 1.3223, + "nll_loss": 1.3140615224838257, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.1093498095870018, + "rewards/margins": -0.0026115558575838804, + "rewards/rejected": -0.10673825442790985, + "step": 2495 + }, + { + "epoch": 0.7031854299978905, + "grad_norm": 0.322265625, + "learning_rate": 1.225980865965416e-06, + "log_odds_chosen": 0.16867448389530182, + "log_odds_ratio": -0.7877094149589539, + "logits/chosen": 0.15207277238368988, + "logits/rejected": 0.07655763626098633, + "logps/chosen": -1.1009851694107056, + "logps/rejected": -1.1948564052581787, + "loss": 1.2842, + "nll_loss": 1.3291409015655518, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11009852588176727, + "rewards/margins": 0.009387115947902203, + "rewards/rejected": -0.11948563903570175, + "step": 2500 + }, + { + "epoch": 0.7045918008578862, + "grad_norm": 0.92578125, + "learning_rate": 1.2154341852489e-06, + "log_odds_chosen": 0.19464707374572754, + "log_odds_ratio": -0.6840943098068237, + "logits/chosen": -0.10291782766580582, + "logits/rejected": 0.017472196370363235, + "logps/chosen": -0.9715100526809692, + "logps/rejected": -1.0822222232818604, + "loss": 1.3588, + "nll_loss": 1.2510693073272705, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0971510112285614, + "rewards/margins": 0.0110712219029665, + "rewards/rejected": -0.10822223126888275, + "step": 2505 + }, + { + "epoch": 0.705998171717882, + "grad_norm": 1.234375, + "learning_rate": 1.2049184763548772e-06, + "log_odds_chosen": 0.4920802712440491, + "log_odds_ratio": -0.5853989124298096, + "logits/chosen": -0.08663634210824966, + "logits/rejected": -0.034693799912929535, + "logps/chosen": -0.902317225933075, + "logps/rejected": -1.2154607772827148, + "loss": 1.2472, + "nll_loss": 1.23630690574646, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09023171663284302, + "rewards/margins": 0.03131437301635742, + "rewards/rejected": -0.12154610455036163, + "step": 2510 + }, + { + "epoch": 0.7074045425778778, + "grad_norm": 0.6015625, + "learning_rate": 1.1944339928247886e-06, + "log_odds_chosen": 0.16076788306236267, + "log_odds_ratio": -0.7225486040115356, + "logits/chosen": 0.30270877480506897, + "logits/rejected": 0.1148032397031784, + "logps/chosen": -1.0007215738296509, + "logps/rejected": -1.1605135202407837, + "loss": 1.2335, + "nll_loss": 1.1324108839035034, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10007216036319733, + "rewards/margins": 0.01597919873893261, + "rewards/rejected": -0.11605136096477509, + "step": 2515 + }, + { + "epoch": 0.7088109134378736, + "grad_norm": 1.0546875, + "learning_rate": 1.1839809874472116e-06, + "log_odds_chosen": 0.32421764731407166, + "log_odds_ratio": -0.6674357652664185, + "logits/chosen": 0.18027594685554504, + "logits/rejected": 0.10635361820459366, + "logps/chosen": -0.9377398490905762, + "logps/rejected": -1.1755977869033813, + "loss": 1.3138, + "nll_loss": 1.1462560892105103, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09377399832010269, + "rewards/margins": 0.0237857848405838, + "rewards/rejected": -0.1175597757101059, + "step": 2520 + }, + { + "epoch": 0.7102172842978693, + "grad_norm": 0.5703125, + "learning_rate": 1.1735597122517603e-06, + "log_odds_chosen": 0.33940380811691284, + "log_odds_ratio": -0.5675379037857056, + "logits/chosen": 0.22716209292411804, + "logits/rejected": 0.09732247143983841, + "logps/chosen": -0.8749046325683594, + "logps/rejected": -1.0648473501205444, + "loss": 1.2041, + "nll_loss": 1.094160795211792, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.08749047666788101, + "rewards/margins": 0.01899426057934761, + "rewards/rejected": -0.10648472607135773, + "step": 2525 + }, + { + "epoch": 0.7116236551578651, + "grad_norm": 0.4765625, + "learning_rate": 1.1631704185030107e-06, + "log_odds_chosen": 0.004075491335242987, + "log_odds_ratio": -0.7541936635971069, + "logits/chosen": 0.27269211411476135, + "logits/rejected": 0.01029108464717865, + "logps/chosen": -0.8757359385490417, + "logps/rejected": -0.8719568252563477, + "loss": 1.2674, + "nll_loss": 1.2918345928192139, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0875735953450203, + "rewards/margins": -0.00037791504291817546, + "rewards/rejected": -0.08719567954540253, + "step": 2530 + }, + { + "epoch": 0.7130300260178609, + "grad_norm": 0.88671875, + "learning_rate": 1.1528133566944427e-06, + "log_odds_chosen": -0.1133066862821579, + "log_odds_ratio": -0.8648877143859863, + "logits/chosen": -0.10340137779712677, + "logits/rejected": 0.04538872092962265, + "logps/chosen": -1.0196564197540283, + "logps/rejected": -0.8988596200942993, + "loss": 1.2808, + "nll_loss": 1.2842795848846436, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10196565091609955, + "rewards/margins": -0.01207968033850193, + "rewards/rejected": -0.08988596498966217, + "step": 2535 + }, + { + "epoch": 0.7144363968778566, + "grad_norm": 0.75390625, + "learning_rate": 1.1424887765424029e-06, + "log_odds_chosen": 0.18615292012691498, + "log_odds_ratio": -0.7488057017326355, + "logits/chosen": 0.0731426477432251, + "logits/rejected": -0.023691270500421524, + "logps/chosen": -1.0201672315597534, + "logps/rejected": -1.0216002464294434, + "loss": 1.4175, + "nll_loss": 1.341402292251587, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10201673209667206, + "rewards/margins": 0.00014328323595691472, + "rewards/rejected": -0.1021600142121315, + "step": 2540 + }, + { + "epoch": 0.7158427677378525, + "grad_norm": 0.74609375, + "learning_rate": 1.1321969269800783e-06, + "log_odds_chosen": -0.12177006155252457, + "log_odds_ratio": -0.8551080822944641, + "logits/chosen": 0.060602523386478424, + "logits/rejected": 0.08475537598133087, + "logps/chosen": -1.0529420375823975, + "logps/rejected": -0.9692171216011047, + "loss": 1.2211, + "nll_loss": 1.0461233854293823, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.10529420524835587, + "rewards/margins": -0.008372487500309944, + "rewards/rejected": -0.09692171216011047, + "step": 2545 + }, + { + "epoch": 0.7172491385978482, + "grad_norm": 0.7578125, + "learning_rate": 1.1219380561514992e-06, + "log_odds_chosen": 0.6954945921897888, + "log_odds_ratio": -0.580727756023407, + "logits/chosen": 0.25962942838668823, + "logits/rejected": -0.02940789982676506, + "logps/chosen": -0.884055495262146, + "logps/rejected": -1.4140456914901733, + "loss": 1.2898, + "nll_loss": 1.118775486946106, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0884055569767952, + "rewards/margins": 0.05299902707338333, + "rewards/rejected": -0.14140458405017853, + "step": 2550 + }, + { + "epoch": 0.7186555094578441, + "grad_norm": 0.86328125, + "learning_rate": 1.1117124114055533e-06, + "log_odds_chosen": 0.3898099362850189, + "log_odds_ratio": -0.6564149260520935, + "logits/chosen": 0.13655290007591248, + "logits/rejected": -0.14647407829761505, + "logps/chosen": -0.924484133720398, + "logps/rejected": -1.103148102760315, + "loss": 1.3297, + "nll_loss": 1.439805507659912, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09244842082262039, + "rewards/margins": 0.017866378650069237, + "rewards/rejected": -0.11031480133533478, + "step": 2555 + }, + { + "epoch": 0.7200618803178398, + "grad_norm": 0.73046875, + "learning_rate": 1.1015202392900234e-06, + "log_odds_chosen": 0.24838736653327942, + "log_odds_ratio": -0.7105676531791687, + "logits/chosen": 0.13131016492843628, + "logits/rejected": -0.09939908236265182, + "logps/chosen": -0.8603301048278809, + "logps/rejected": -1.0633742809295654, + "loss": 1.355, + "nll_loss": 1.2121269702911377, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08603300899267197, + "rewards/margins": 0.02030441164970398, + "rewards/rejected": -0.10633742809295654, + "step": 2560 + }, + { + "epoch": 0.7214682511778356, + "grad_norm": 0.921875, + "learning_rate": 1.0913617855456449e-06, + "log_odds_chosen": 0.07735253125429153, + "log_odds_ratio": -0.7111676931381226, + "logits/chosen": 0.3953332304954529, + "logits/rejected": -0.05915503576397896, + "logps/chosen": -1.0024585723876953, + "logps/rejected": -1.083264708518982, + "loss": 1.1852, + "nll_loss": 1.2101662158966064, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10024585574865341, + "rewards/margins": 0.008080625906586647, + "rewards/rejected": -0.10832647979259491, + "step": 2565 + }, + { + "epoch": 0.7228746220378314, + "grad_norm": 0.59765625, + "learning_rate": 1.0812372951001752e-06, + "log_odds_chosen": -0.04463967680931091, + "log_odds_ratio": -0.8388767242431641, + "logits/chosen": 0.02681068144738674, + "logits/rejected": -0.018642084673047066, + "logps/chosen": -1.0836817026138306, + "logps/rejected": -1.0096104145050049, + "loss": 1.353, + "nll_loss": 1.344254493713379, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1083681732416153, + "rewards/margins": -0.007407122757285833, + "rewards/rejected": -0.10096104443073273, + "step": 2570 + }, + { + "epoch": 0.7242809928978271, + "grad_norm": 0.63671875, + "learning_rate": 1.0711470120624937e-06, + "log_odds_chosen": 0.3836483061313629, + "log_odds_ratio": -0.5960331559181213, + "logits/chosen": 0.2320297658443451, + "logits/rejected": -0.0060660927556455135, + "logps/chosen": -0.9361212849617004, + "logps/rejected": -1.170351266860962, + "loss": 1.3233, + "nll_loss": 1.1359012126922607, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09361211955547333, + "rewards/margins": 0.023423010483384132, + "rewards/rejected": -0.11703513562679291, + "step": 2575 + }, + { + "epoch": 0.725687363757823, + "grad_norm": 0.61328125, + "learning_rate": 1.0610911797167133e-06, + "log_odds_chosen": 0.0792519822716713, + "log_odds_ratio": -0.7163349390029907, + "logits/chosen": 0.24216756224632263, + "logits/rejected": 0.06887772679328918, + "logps/chosen": -0.8495702743530273, + "logps/rejected": -0.881389319896698, + "loss": 1.2753, + "nll_loss": 1.0806999206542969, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.08495702594518661, + "rewards/margins": 0.003181902226060629, + "rewards/rejected": -0.08813893049955368, + "step": 2580 + }, + { + "epoch": 0.7270937346178187, + "grad_norm": 0.82421875, + "learning_rate": 1.0510700405163152e-06, + "log_odds_chosen": 0.3315682113170624, + "log_odds_ratio": -0.5999222993850708, + "logits/chosen": 0.313637912273407, + "logits/rejected": -0.13951337337493896, + "logps/chosen": -0.9159483909606934, + "logps/rejected": -1.1227281093597412, + "loss": 1.2945, + "nll_loss": 1.2091586589813232, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09159483760595322, + "rewards/margins": 0.020677978172898293, + "rewards/rejected": -0.11227281391620636, + "step": 2585 + }, + { + "epoch": 0.7285001054778145, + "grad_norm": 0.7265625, + "learning_rate": 1.0410838360783058e-06, + "log_odds_chosen": 0.3959486782550812, + "log_odds_ratio": -0.617482602596283, + "logits/chosen": 0.40600308775901794, + "logits/rejected": -0.3985676169395447, + "logps/chosen": -1.0334031581878662, + "logps/rejected": -1.3553186655044556, + "loss": 1.355, + "nll_loss": 1.2000818252563477, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10334031283855438, + "rewards/margins": 0.032191552221775055, + "rewards/rejected": -0.13553187251091003, + "step": 2590 + }, + { + "epoch": 0.7299064763378103, + "grad_norm": 0.4609375, + "learning_rate": 1.031132807177385e-06, + "log_odds_chosen": 0.4226677417755127, + "log_odds_ratio": -0.6021221876144409, + "logits/chosen": -0.010524725541472435, + "logits/rejected": -0.04317883029580116, + "logps/chosen": -0.860185444355011, + "logps/rejected": -1.1054656505584717, + "loss": 1.2373, + "nll_loss": 1.1730222702026367, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08601854741573334, + "rewards/margins": 0.024528011679649353, + "rewards/rejected": -0.11054656654596329, + "step": 2595 + }, + { + "epoch": 0.7313128471978061, + "grad_norm": 0.71484375, + "learning_rate": 1.0212171937401493e-06, + "log_odds_chosen": 0.555956244468689, + "log_odds_ratio": -0.5494065284729004, + "logits/chosen": 0.1120821014046669, + "logits/rejected": -0.11111991107463837, + "logps/chosen": -0.9098693132400513, + "logps/rejected": -1.2724263668060303, + "loss": 1.3911, + "nll_loss": 1.4419333934783936, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09098692983388901, + "rewards/margins": 0.03625571355223656, + "rewards/rejected": -0.12724265456199646, + "step": 2600 + }, + { + "epoch": 0.7327192180578018, + "grad_norm": 0.5546875, + "learning_rate": 1.0113372348392988e-06, + "log_odds_chosen": 0.33087357878685, + "log_odds_ratio": -0.6583686470985413, + "logits/chosen": 0.20928256213665009, + "logits/rejected": 0.009285476990044117, + "logps/chosen": -0.9686871767044067, + "logps/rejected": -1.1702792644500732, + "loss": 1.2857, + "nll_loss": 1.3147389888763428, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09686873108148575, + "rewards/margins": 0.02015921100974083, + "rewards/rejected": -0.11702793836593628, + "step": 2605 + }, + { + "epoch": 0.7341255889177977, + "grad_norm": 0.9296875, + "learning_rate": 1.0014931686878776e-06, + "log_odds_chosen": 0.43109697103500366, + "log_odds_ratio": -0.5490614175796509, + "logits/chosen": -0.0007611617329530418, + "logits/rejected": 0.2633914649486542, + "logps/chosen": -0.8575743436813354, + "logps/rejected": -1.1433038711547852, + "loss": 1.2056, + "nll_loss": 1.0657745599746704, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08575743436813354, + "rewards/margins": 0.02857295610010624, + "rewards/rejected": -0.11433041095733643, + "step": 2610 + }, + { + "epoch": 0.7355319597777934, + "grad_norm": 0.68359375, + "learning_rate": 9.916852326335294e-07, + "log_odds_chosen": 0.37479788064956665, + "log_odds_ratio": -0.5814806222915649, + "logits/chosen": 0.19862744212150574, + "logits/rejected": 0.21426455676555634, + "logps/chosen": -0.9342344999313354, + "logps/rejected": -1.1362876892089844, + "loss": 1.3389, + "nll_loss": 1.1891016960144043, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09342344850301743, + "rewards/margins": 0.020205311477184296, + "rewards/rejected": -0.11362876743078232, + "step": 2615 + }, + { + "epoch": 0.7369383306377891, + "grad_norm": 0.9609375, + "learning_rate": 9.819136631527742e-07, + "log_odds_chosen": -0.01976107433438301, + "log_odds_ratio": -0.7440468072891235, + "logits/chosen": 0.2642653286457062, + "logits/rejected": -0.05234430357813835, + "logps/chosen": -0.9733647108078003, + "logps/rejected": -0.9724742770195007, + "loss": 1.2947, + "nll_loss": 1.2140226364135742, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09733647853136063, + "rewards/margins": -8.90508308657445e-05, + "rewards/rejected": -0.09724743664264679, + "step": 2620 + }, + { + "epoch": 0.738344701497785, + "grad_norm": 1.6328125, + "learning_rate": 9.7217869584531e-07, + "log_odds_chosen": 0.542760968208313, + "log_odds_ratio": -0.6224207878112793, + "logits/chosen": 0.03186125308275223, + "logits/rejected": -0.09850295633077621, + "logps/chosen": -0.9357168078422546, + "logps/rejected": -1.185787320137024, + "loss": 1.3398, + "nll_loss": 1.4264706373214722, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09357168525457382, + "rewards/margins": 0.025007059797644615, + "rewards/rejected": -0.11857875436544418, + "step": 2625 + }, + { + "epoch": 0.7397510723577807, + "grad_norm": 0.4765625, + "learning_rate": 9.624805654283264e-07, + "log_odds_chosen": 0.4654063284397125, + "log_odds_ratio": -0.5991908311843872, + "logits/chosen": 0.08382277935743332, + "logits/rejected": -0.10615154355764389, + "logps/chosen": -0.9338623285293579, + "logps/rejected": -1.217292308807373, + "loss": 1.2077, + "nll_loss": 1.33759605884552, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0933862254023552, + "rewards/margins": 0.02834300324320793, + "rewards/rejected": -0.12172923237085342, + "step": 2630 + }, + { + "epoch": 0.7411574432177765, + "grad_norm": 0.75390625, + "learning_rate": 9.528195057308498e-07, + "log_odds_chosen": 0.13208118081092834, + "log_odds_ratio": -0.7440918684005737, + "logits/chosen": 0.12219689041376114, + "logits/rejected": 0.05499458312988281, + "logps/chosen": -0.9848679304122925, + "logps/rejected": -1.0994460582733154, + "loss": 1.2732, + "nll_loss": 1.143159031867981, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09848680347204208, + "rewards/margins": 0.011457815766334534, + "rewards/rejected": -0.10994460433721542, + "step": 2635 + }, + { + "epoch": 0.7425638140777723, + "grad_norm": 1.09375, + "learning_rate": 9.431957496881044e-07, + "log_odds_chosen": -0.24765324592590332, + "log_odds_ratio": -0.91820228099823, + "logits/chosen": 0.13052822649478912, + "logits/rejected": -0.09291420876979828, + "logps/chosen": -1.1655653715133667, + "logps/rejected": -1.0791189670562744, + "loss": 1.4344, + "nll_loss": 1.45156991481781, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.11655654013156891, + "rewards/margins": -0.008644639514386654, + "rewards/rejected": -0.10791189968585968, + "step": 2640 + }, + { + "epoch": 0.7439701849377681, + "grad_norm": 0.490234375, + "learning_rate": 9.336095293358955e-07, + "log_odds_chosen": 0.18489600718021393, + "log_odds_ratio": -0.7089937329292297, + "logits/chosen": 0.22435322403907776, + "logits/rejected": -0.04987093061208725, + "logps/chosen": -0.8399380445480347, + "logps/rejected": -0.9723587036132812, + "loss": 1.2625, + "nll_loss": 1.1622884273529053, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0839938074350357, + "rewards/margins": 0.013242078013718128, + "rewards/rejected": -0.09723588079214096, + "step": 2645 + }, + { + "epoch": 0.7453765557977639, + "grad_norm": 0.9296875, + "learning_rate": 9.240610758050167e-07, + "log_odds_chosen": 0.16607101261615753, + "log_odds_ratio": -0.6715102791786194, + "logits/chosen": 0.4128045439720154, + "logits/rejected": -0.09653332084417343, + "logps/chosen": -0.8086966276168823, + "logps/rejected": -0.9036060571670532, + "loss": 1.3191, + "nll_loss": 1.1381139755249023, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.080869659781456, + "rewards/margins": 0.009490938857197762, + "rewards/rejected": -0.0903606042265892, + "step": 2650 + }, + { + "epoch": 0.7467829266577597, + "grad_norm": 0.84375, + "learning_rate": 9.14550619315675e-07, + "log_odds_chosen": 0.41601577401161194, + "log_odds_ratio": -0.6104211211204529, + "logits/chosen": 0.2691023349761963, + "logits/rejected": -0.30555975437164307, + "logps/chosen": -0.9007304906845093, + "logps/rejected": -1.229494333267212, + "loss": 1.2178, + "nll_loss": 1.2668547630310059, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09007303416728973, + "rewards/margins": 0.0328763946890831, + "rewards/rejected": -0.12294943630695343, + "step": 2655 + }, + { + "epoch": 0.7481892975177554, + "grad_norm": 0.6796875, + "learning_rate": 9.050783891719397e-07, + "log_odds_chosen": 0.16551217436790466, + "log_odds_ratio": -0.7445172071456909, + "logits/chosen": 0.025711068883538246, + "logits/rejected": -0.060128070414066315, + "logps/chosen": -1.0086417198181152, + "logps/rejected": -1.1160506010055542, + "loss": 1.2587, + "nll_loss": 1.2568762302398682, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10086417198181152, + "rewards/margins": 0.010740896686911583, + "rewards/rejected": -0.11160507053136826, + "step": 2660 + }, + { + "epoch": 0.7495956683777513, + "grad_norm": 0.466796875, + "learning_rate": 8.956446137562183e-07, + "log_odds_chosen": 0.16542503237724304, + "log_odds_ratio": -0.7191334962844849, + "logits/chosen": 0.11821790039539337, + "logits/rejected": -0.20049512386322021, + "logps/chosen": -0.9817320704460144, + "logps/rejected": -1.1040692329406738, + "loss": 1.2025, + "nll_loss": 1.22897469997406, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09817320853471756, + "rewards/margins": 0.012233722023665905, + "rewards/rejected": -0.11040693521499634, + "step": 2665 + }, + { + "epoch": 0.751002039237747, + "grad_norm": 1.15625, + "learning_rate": 8.862495205237432e-07, + "log_odds_chosen": 0.3671686351299286, + "log_odds_ratio": -0.5621435046195984, + "logits/chosen": 0.1925962269306183, + "logits/rejected": -0.02321244589984417, + "logps/chosen": -0.8481559753417969, + "logps/rejected": -1.0866224765777588, + "loss": 1.236, + "nll_loss": 1.1403025388717651, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0848156064748764, + "rewards/margins": 0.02384665608406067, + "rewards/rejected": -0.10866224765777588, + "step": 2670 + }, + { + "epoch": 0.7524084100977427, + "grad_norm": 1.140625, + "learning_rate": 8.768933359970924e-07, + "log_odds_chosen": 0.514217734336853, + "log_odds_ratio": -0.5560160279273987, + "logits/chosen": 0.12720641493797302, + "logits/rejected": -0.23077580332756042, + "logps/chosen": -0.987766444683075, + "logps/rejected": -1.3353379964828491, + "loss": 1.3173, + "nll_loss": 1.2551425695419312, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09877664595842361, + "rewards/margins": 0.03475714102387428, + "rewards/rejected": -0.1335338056087494, + "step": 2675 + }, + { + "epoch": 0.7538147809577386, + "grad_norm": 0.6875, + "learning_rate": 8.675762857607256e-07, + "log_odds_chosen": 0.19895534217357635, + "log_odds_ratio": -0.7668569684028625, + "logits/chosen": 0.3156585395336151, + "logits/rejected": -0.07820574939250946, + "logps/chosen": -0.902428150177002, + "logps/rejected": -1.12700355052948, + "loss": 1.2807, + "nll_loss": 1.1560865640640259, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09024281799793243, + "rewards/margins": 0.02245754934847355, + "rewards/rejected": -0.11270036548376083, + "step": 2680 + }, + { + "epoch": 0.7552211518177343, + "grad_norm": 0.4765625, + "learning_rate": 8.582985944555488e-07, + "log_odds_chosen": 0.5820831656455994, + "log_odds_ratio": -0.5177468061447144, + "logits/chosen": 0.30657750368118286, + "logits/rejected": -0.03154323250055313, + "logps/chosen": -0.8904935121536255, + "logps/rejected": -1.2868787050247192, + "loss": 1.2957, + "nll_loss": 1.1450371742248535, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08904936164617538, + "rewards/margins": 0.03963851183652878, + "rewards/rejected": -0.12868787348270416, + "step": 2685 + }, + { + "epoch": 0.7566275226777301, + "grad_norm": 0.68359375, + "learning_rate": 8.49060485773493e-07, + "log_odds_chosen": 0.780549943447113, + "log_odds_ratio": -0.49319782853126526, + "logits/chosen": 0.043089210987091064, + "logits/rejected": -0.19765236973762512, + "logps/chosen": -0.9323430061340332, + "logps/rejected": -1.4097062349319458, + "loss": 1.3579, + "nll_loss": 1.2788646221160889, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09323429316282272, + "rewards/margins": 0.047736309468746185, + "rewards/rejected": -0.1409706175327301, + "step": 2690 + }, + { + "epoch": 0.7580338935377259, + "grad_norm": 0.462890625, + "learning_rate": 8.39862182452123e-07, + "log_odds_chosen": 0.36220189929008484, + "log_odds_ratio": -0.6064194440841675, + "logits/chosen": 0.006648133508861065, + "logits/rejected": 0.20794352889060974, + "logps/chosen": -0.9459856748580933, + "logps/rejected": -1.1608078479766846, + "loss": 1.3569, + "nll_loss": 1.14859938621521, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09459856897592545, + "rewards/margins": 0.021482214331626892, + "rewards/rejected": -0.11608078330755234, + "step": 2695 + }, + { + "epoch": 0.7594402643977217, + "grad_norm": 0.3984375, + "learning_rate": 8.307039062692682e-07, + "log_odds_chosen": 0.06786508858203888, + "log_odds_ratio": -0.7138899564743042, + "logits/chosen": 0.28591951727867126, + "logits/rejected": 0.158804252743721, + "logps/chosen": -0.9865355491638184, + "logps/rejected": -1.0179736614227295, + "loss": 1.358, + "nll_loss": 1.275681972503662, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09865355491638184, + "rewards/margins": 0.003143805544823408, + "rewards/rejected": -0.10179736465215683, + "step": 2700 + }, + { + "epoch": 0.7608466352577175, + "grad_norm": 0.77734375, + "learning_rate": 8.215858780376732e-07, + "log_odds_chosen": 0.16411906480789185, + "log_odds_ratio": -0.6866927146911621, + "logits/chosen": 0.16503454744815826, + "logits/rejected": 0.040900081396102905, + "logps/chosen": -1.0014139413833618, + "logps/rejected": -1.0946948528289795, + "loss": 1.3739, + "nll_loss": 1.2816741466522217, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10014139115810394, + "rewards/margins": 0.009328092448413372, + "rewards/rejected": -0.10946948826313019, + "step": 2705 + }, + { + "epoch": 0.7622530061177133, + "grad_norm": 0.5859375, + "learning_rate": 8.125083175996776e-07, + "log_odds_chosen": 0.25532618165016174, + "log_odds_ratio": -0.7476253509521484, + "logits/chosen": -0.02141258493065834, + "logits/rejected": -0.10258176177740097, + "logps/chosen": -0.931313693523407, + "logps/rejected": -1.1693487167358398, + "loss": 1.2248, + "nll_loss": 1.2920305728912354, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09313137829303741, + "rewards/margins": 0.023803498595952988, + "rewards/rejected": -0.1169348731637001, + "step": 2710 + }, + { + "epoch": 0.763659376977709, + "grad_norm": 1.015625, + "learning_rate": 8.034714438219101e-07, + "log_odds_chosen": 0.7236061692237854, + "log_odds_ratio": -0.5326360464096069, + "logits/chosen": 0.06193612143397331, + "logits/rejected": -0.04839233309030533, + "logps/chosen": -0.89410001039505, + "logps/rejected": -1.2422478199005127, + "loss": 1.3183, + "nll_loss": 1.3468520641326904, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08941000699996948, + "rewards/margins": 0.034814778715372086, + "rewards/rejected": -0.12422479689121246, + "step": 2715 + }, + { + "epoch": 0.7650657478377048, + "grad_norm": 0.435546875, + "learning_rate": 7.944754745900158e-07, + "log_odds_chosen": 0.3403502404689789, + "log_odds_ratio": -0.7160285711288452, + "logits/chosen": 0.03555140644311905, + "logits/rejected": -0.06346184015274048, + "logps/chosen": -0.917829692363739, + "logps/rejected": -1.2131367921829224, + "loss": 1.3901, + "nll_loss": 1.3876527547836304, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09178297221660614, + "rewards/margins": 0.029530709609389305, + "rewards/rejected": -0.12131367623806, + "step": 2720 + }, + { + "epoch": 0.7664721186977006, + "grad_norm": 0.5546875, + "learning_rate": 7.855206268034e-07, + "log_odds_chosen": 0.3166981041431427, + "log_odds_ratio": -0.6263189911842346, + "logits/chosen": 0.32282960414886475, + "logits/rejected": 0.1895475536584854, + "logps/chosen": -0.9686748385429382, + "logps/rejected": -1.2145028114318848, + "loss": 1.3271, + "nll_loss": 1.1234979629516602, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09686748683452606, + "rewards/margins": 0.02458280883729458, + "rewards/rejected": -0.12145029008388519, + "step": 2725 + }, + { + "epoch": 0.7678784895576963, + "grad_norm": 0.9765625, + "learning_rate": 7.766071163700004e-07, + "log_odds_chosen": 0.24529030919075012, + "log_odds_ratio": -0.6629317402839661, + "logits/chosen": 0.00040346087189391255, + "logits/rejected": -0.030096957460045815, + "logps/chosen": -0.9108067750930786, + "logps/rejected": -1.0234143733978271, + "loss": 1.3121, + "nll_loss": 1.280273199081421, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0910806730389595, + "rewards/margins": 0.011260779574513435, + "rewards/rejected": -0.10234145075082779, + "step": 2730 + }, + { + "epoch": 0.7692848604176922, + "grad_norm": 0.7890625, + "learning_rate": 7.677351582010811e-07, + "log_odds_chosen": 0.5884172320365906, + "log_odds_ratio": -0.5643038749694824, + "logits/chosen": -0.007936659269034863, + "logits/rejected": 0.024150729179382324, + "logps/chosen": -0.8941663503646851, + "logps/rejected": -1.3198270797729492, + "loss": 1.2896, + "nll_loss": 1.2319035530090332, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08941663801670074, + "rewards/margins": 0.042566053569316864, + "rewards/rejected": -0.1319826990365982, + "step": 2735 + }, + { + "epoch": 0.7706912312776879, + "grad_norm": 0.40625, + "learning_rate": 7.589049662060518e-07, + "log_odds_chosen": 0.40013399720191956, + "log_odds_ratio": -0.5915107131004333, + "logits/chosen": 0.39384156465530396, + "logits/rejected": 0.017804330214858055, + "logps/chosen": -0.9199845194816589, + "logps/rejected": -1.1359189748764038, + "loss": 1.3512, + "nll_loss": 1.1475433111190796, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09199845045804977, + "rewards/margins": 0.02159346267580986, + "rewards/rejected": -0.11359190940856934, + "step": 2740 + }, + { + "epoch": 0.7720976021376837, + "grad_norm": 0.73828125, + "learning_rate": 7.501167532873075e-07, + "log_odds_chosen": 0.07918643206357956, + "log_odds_ratio": -0.7816256284713745, + "logits/chosen": 0.016884874552488327, + "logits/rejected": 0.1366664171218872, + "logps/chosen": -1.0064961910247803, + "logps/rejected": -0.9999880790710449, + "loss": 1.2442, + "nll_loss": 1.2048060894012451, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1006496325135231, + "rewards/margins": -0.0006508378428407013, + "rewards/rejected": -0.09999879449605942, + "step": 2745 + }, + { + "epoch": 0.7735039729976795, + "grad_norm": 0.58203125, + "learning_rate": 7.413707313350965e-07, + "log_odds_chosen": 0.47310739755630493, + "log_odds_ratio": -0.6083270907402039, + "logits/chosen": 0.1051364317536354, + "logits/rejected": -0.04713956639170647, + "logps/chosen": -0.9754480123519897, + "logps/rejected": -1.2068026065826416, + "loss": 1.3705, + "nll_loss": 1.2637088298797607, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09754480421543121, + "rewards/margins": 0.02313544601202011, + "rewards/rejected": -0.12068025767803192, + "step": 2750 + }, + { + "epoch": 0.7749103438576753, + "grad_norm": 0.5625, + "learning_rate": 7.326671112224135e-07, + "log_odds_chosen": 0.4301450848579407, + "log_odds_ratio": -0.6069377660751343, + "logits/chosen": 0.003236403688788414, + "logits/rejected": 0.0592556893825531, + "logps/chosen": -0.8992059826850891, + "logps/rejected": -1.2177690267562866, + "loss": 1.263, + "nll_loss": 1.3265444040298462, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08992060273885727, + "rewards/margins": 0.031856290996074677, + "rewards/rejected": -0.12177689373493195, + "step": 2755 + }, + { + "epoch": 0.776316714717671, + "grad_norm": 0.64453125, + "learning_rate": 7.240061027999129e-07, + "log_odds_chosen": -0.005230224225670099, + "log_odds_ratio": -0.7652040123939514, + "logits/chosen": 0.13336391746997833, + "logits/rejected": 0.040630996227264404, + "logps/chosen": -1.029039740562439, + "logps/rejected": -0.9913640022277832, + "loss": 1.3398, + "nll_loss": 1.3457856178283691, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10290397703647614, + "rewards/margins": -0.0037675693165510893, + "rewards/rejected": -0.09913641214370728, + "step": 2760 + }, + { + "epoch": 0.7777230855776668, + "grad_norm": 0.6328125, + "learning_rate": 7.153879148908496e-07, + "log_odds_chosen": 0.44602876901626587, + "log_odds_ratio": -0.5923662185668945, + "logits/chosen": 0.3108251988887787, + "logits/rejected": -0.019554242491722107, + "logps/chosen": -0.8534099459648132, + "logps/rejected": -1.0774658918380737, + "loss": 1.2261, + "nll_loss": 1.1150697469711304, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08534099906682968, + "rewards/margins": 0.022405600175261497, + "rewards/rejected": -0.10774660110473633, + "step": 2765 + }, + { + "epoch": 0.7791294564376626, + "grad_norm": 0.89453125, + "learning_rate": 7.068127552860468e-07, + "log_odds_chosen": 0.15509608387947083, + "log_odds_ratio": -0.6707223057746887, + "logits/chosen": 0.23539385199546814, + "logits/rejected": 0.3137028217315674, + "logps/chosen": -0.9319781064987183, + "logps/rejected": -1.0136935710906982, + "loss": 1.2378, + "nll_loss": 1.2062842845916748, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09319781512022018, + "rewards/margins": 0.00817155558615923, + "rewards/rejected": -0.10136936604976654, + "step": 2770 + }, + { + "epoch": 0.7805358272976584, + "grad_norm": 0.361328125, + "learning_rate": 6.982808307388813e-07, + "log_odds_chosen": 0.3228221535682678, + "log_odds_ratio": -0.6157253384590149, + "logits/chosen": 0.14408710598945618, + "logits/rejected": -0.12160065025091171, + "logps/chosen": -0.8962388038635254, + "logps/rejected": -1.1015846729278564, + "loss": 1.3435, + "nll_loss": 1.1823790073394775, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08962388336658478, + "rewards/margins": 0.020534580573439598, + "rewards/rejected": -0.11015846580266953, + "step": 2775 + }, + { + "epoch": 0.7819421981576542, + "grad_norm": 0.42578125, + "learning_rate": 6.897923469603023e-07, + "log_odds_chosen": 0.4124717116355896, + "log_odds_ratio": -0.6217266321182251, + "logits/chosen": 0.3927221894264221, + "logits/rejected": -0.1088181883096695, + "logps/chosen": -0.9175729751586914, + "logps/rejected": -1.187263011932373, + "loss": 1.2468, + "nll_loss": 1.0678209066390991, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09175729751586914, + "rewards/margins": 0.026969006285071373, + "rewards/rejected": -0.11872629821300507, + "step": 2780 + }, + { + "epoch": 0.7833485690176499, + "grad_norm": 1.015625, + "learning_rate": 6.813475086138699e-07, + "log_odds_chosen": 0.4346896708011627, + "log_odds_ratio": -0.5942043662071228, + "logits/chosen": 0.33520328998565674, + "logits/rejected": 0.08043156564235687, + "logps/chosen": -0.7721613645553589, + "logps/rejected": -1.0392404794692993, + "loss": 1.2665, + "nll_loss": 1.1436734199523926, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07721613347530365, + "rewards/margins": 0.026707911863923073, + "rewards/rejected": -0.10392403602600098, + "step": 2785 + }, + { + "epoch": 0.7847549398776458, + "grad_norm": 0.37890625, + "learning_rate": 6.729465193108195e-07, + "log_odds_chosen": 0.487898051738739, + "log_odds_ratio": -0.6025739908218384, + "logits/chosen": 0.24001376330852509, + "logits/rejected": -0.13463373482227325, + "logps/chosen": -0.946478545665741, + "logps/rejected": -1.3150343894958496, + "loss": 1.3489, + "nll_loss": 1.4121153354644775, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09464786946773529, + "rewards/margins": 0.03685557097196579, + "rewards/rejected": -0.13150344789028168, + "step": 2790 + }, + { + "epoch": 0.7861613107376415, + "grad_norm": 1.2109375, + "learning_rate": 6.645895816051576e-07, + "log_odds_chosen": 0.06646132469177246, + "log_odds_ratio": -0.7336984276771545, + "logits/chosen": 0.13854220509529114, + "logits/rejected": 0.14925724267959595, + "logps/chosen": -0.866500973701477, + "logps/rejected": -0.8997198343276978, + "loss": 1.3252, + "nll_loss": 1.2038604021072388, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.08665008842945099, + "rewards/margins": 0.0033218897879123688, + "rewards/rejected": -0.08997198194265366, + "step": 2795 + }, + { + "epoch": 0.7875676815976373, + "grad_norm": 0.5390625, + "learning_rate": 6.562768969887706e-07, + "log_odds_chosen": -0.08361145108938217, + "log_odds_ratio": -0.8131535649299622, + "logits/chosen": 0.00587810855358839, + "logits/rejected": -0.00947192870080471, + "logps/chosen": -1.0220993757247925, + "logps/rejected": -0.9503192901611328, + "loss": 1.3987, + "nll_loss": 1.5176174640655518, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10220994055271149, + "rewards/margins": -0.00717801321297884, + "rewards/rejected": -0.09503192454576492, + "step": 2800 + }, + { + "epoch": 0.7889740524576331, + "grad_norm": 0.48828125, + "learning_rate": 6.480086658865722e-07, + "log_odds_chosen": 0.20292186737060547, + "log_odds_ratio": -0.7245144248008728, + "logits/chosen": 0.12096218764781952, + "logits/rejected": -0.17297106981277466, + "logps/chosen": -1.0369324684143066, + "logps/rejected": -1.181063175201416, + "loss": 1.3157, + "nll_loss": 1.3815839290618896, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10369324684143066, + "rewards/margins": 0.014413063414394855, + "rewards/rejected": -0.11810630559921265, + "step": 2805 + }, + { + "epoch": 0.7903804233176288, + "grad_norm": 0.65234375, + "learning_rate": 6.397850876516698e-07, + "log_odds_chosen": 0.13107821345329285, + "log_odds_ratio": -0.8021856546401978, + "logits/chosen": -0.062346749007701874, + "logits/rejected": 0.16534826159477234, + "logps/chosen": -1.063522219657898, + "logps/rejected": -1.1438487768173218, + "loss": 1.275, + "nll_loss": 1.1316450834274292, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.10635224729776382, + "rewards/margins": 0.008032634854316711, + "rewards/rejected": -0.11438487470149994, + "step": 2810 + }, + { + "epoch": 0.7917867941776247, + "grad_norm": 0.69140625, + "learning_rate": 6.316063605605569e-07, + "log_odds_chosen": 0.1849391758441925, + "log_odds_ratio": -0.7484263181686401, + "logits/chosen": -0.05717191845178604, + "logits/rejected": 0.0552591010928154, + "logps/chosen": -1.103704810142517, + "logps/rejected": -1.2362749576568604, + "loss": 1.3111, + "nll_loss": 1.2431524991989136, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11037049442529678, + "rewards/margins": 0.013256999664008617, + "rewards/rejected": -0.12362749874591827, + "step": 2815 + }, + { + "epoch": 0.7931931650376204, + "grad_norm": 0.412109375, + "learning_rate": 6.234726818083323e-07, + "log_odds_chosen": 0.5808418989181519, + "log_odds_ratio": -0.539539635181427, + "logits/chosen": 0.15562665462493896, + "logits/rejected": 0.16735698282718658, + "logps/chosen": -0.7677489519119263, + "logps/rejected": -1.1099231243133545, + "loss": 1.355, + "nll_loss": 1.0738924741744995, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07677489519119263, + "rewards/margins": 0.034217409789562225, + "rewards/rejected": -0.11099229753017426, + "step": 2820 + }, + { + "epoch": 0.7945995358976162, + "grad_norm": 1.109375, + "learning_rate": 6.153842475039468e-07, + "log_odds_chosen": 0.5155315399169922, + "log_odds_ratio": -0.5820074081420898, + "logits/chosen": 0.03340931981801987, + "logits/rejected": -0.009877646341919899, + "logps/chosen": -0.8543240427970886, + "logps/rejected": -1.2141969203948975, + "loss": 1.3416, + "nll_loss": 1.3335164785385132, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08543241024017334, + "rewards/margins": 0.03598729148507118, + "rewards/rejected": -0.12141970545053482, + "step": 2825 + }, + { + "epoch": 0.796005906757612, + "grad_norm": 0.55859375, + "learning_rate": 6.073412526654762e-07, + "log_odds_chosen": 0.3356287181377411, + "log_odds_ratio": -0.6290286779403687, + "logits/chosen": 0.1556534469127655, + "logits/rejected": -0.012762689962983131, + "logps/chosen": -0.9366539716720581, + "logps/rejected": -1.118251085281372, + "loss": 1.3256, + "nll_loss": 1.3431589603424072, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09366540610790253, + "rewards/margins": 0.018159715458750725, + "rewards/rejected": -0.11182510852813721, + "step": 2830 + }, + { + "epoch": 0.7974122776176078, + "grad_norm": 1.171875, + "learning_rate": 5.993438912154148e-07, + "log_odds_chosen": 0.603886067867279, + "log_odds_ratio": -0.5090949535369873, + "logits/chosen": 0.09760837256908417, + "logits/rejected": -0.09581668674945831, + "logps/chosen": -0.8826834559440613, + "logps/rejected": -1.224760890007019, + "loss": 1.2492, + "nll_loss": 1.2926743030548096, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08826833963394165, + "rewards/margins": 0.034207746386528015, + "rewards/rejected": -0.12247607856988907, + "step": 2835 + }, + { + "epoch": 0.7988186484776035, + "grad_norm": 0.671875, + "learning_rate": 5.913923559760043e-07, + "log_odds_chosen": 0.42496395111083984, + "log_odds_ratio": -0.6716245412826538, + "logits/chosen": -0.25178101658821106, + "logits/rejected": 0.042255472391843796, + "logps/chosen": -0.9133031964302063, + "logps/rejected": -1.154044270515442, + "loss": 1.3642, + "nll_loss": 1.3009847402572632, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09133031964302063, + "rewards/margins": 0.02407410368323326, + "rewards/rejected": -0.11540442705154419, + "step": 2840 + }, + { + "epoch": 0.8002250193375994, + "grad_norm": 0.640625, + "learning_rate": 5.83486838664582e-07, + "log_odds_chosen": 0.010806980542838573, + "log_odds_ratio": -0.7399574518203735, + "logits/chosen": 0.28659483790397644, + "logits/rejected": 0.009598970413208008, + "logps/chosen": -1.0342603921890259, + "logps/rejected": -1.0249049663543701, + "loss": 1.336, + "nll_loss": 1.3047925233840942, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10342603921890259, + "rewards/margins": -0.0009355418151244521, + "rewards/rejected": -0.10249048471450806, + "step": 2845 + }, + { + "epoch": 0.8016313901975951, + "grad_norm": 0.6328125, + "learning_rate": 5.756275298889588e-07, + "log_odds_chosen": 0.41581591963768005, + "log_odds_ratio": -0.5534268021583557, + "logits/chosen": 0.18089225888252258, + "logits/rejected": 0.21019300818443298, + "logps/chosen": -0.8669838905334473, + "logps/rejected": -1.126821517944336, + "loss": 1.2032, + "nll_loss": 1.023749589920044, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08669839799404144, + "rewards/margins": 0.025983760133385658, + "rewards/rejected": -0.11268214881420135, + "step": 2850 + }, + { + "epoch": 0.8030377610575908, + "grad_norm": 1.03125, + "learning_rate": 5.678146191428266e-07, + "log_odds_chosen": 0.3920010030269623, + "log_odds_ratio": -0.8133651614189148, + "logits/chosen": -0.07348822802305222, + "logits/rejected": -0.08784066885709763, + "logps/chosen": -1.0480382442474365, + "logps/rejected": -1.3849751949310303, + "loss": 1.2955, + "nll_loss": 1.2348756790161133, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10480382293462753, + "rewards/margins": 0.03369368985295296, + "rewards/rejected": -0.1384975016117096, + "step": 2855 + }, + { + "epoch": 0.8044441319175867, + "grad_norm": 0.80859375, + "learning_rate": 5.600482948011835e-07, + "log_odds_chosen": 0.6736562848091125, + "log_odds_ratio": -0.5489833354949951, + "logits/chosen": 0.32034921646118164, + "logits/rejected": 0.19463083148002625, + "logps/chosen": -0.7580772638320923, + "logps/rejected": -1.1340504884719849, + "loss": 1.2874, + "nll_loss": 1.0859034061431885, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07580773532390594, + "rewards/margins": 0.03759731352329254, + "rewards/rejected": -0.11340503394603729, + "step": 2860 + }, + { + "epoch": 0.8058505027775824, + "grad_norm": 0.8515625, + "learning_rate": 5.523287441157973e-07, + "log_odds_chosen": 0.33141160011291504, + "log_odds_ratio": -0.6283584237098694, + "logits/chosen": 0.26048731803894043, + "logits/rejected": 0.09480077028274536, + "logps/chosen": -0.8867843747138977, + "logps/rejected": -1.0908777713775635, + "loss": 1.2885, + "nll_loss": 1.1591079235076904, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08867844939231873, + "rewards/margins": 0.020409341901540756, + "rewards/rejected": -0.10908778756856918, + "step": 2865 + }, + { + "epoch": 0.8072568736375783, + "grad_norm": 0.91796875, + "learning_rate": 5.446561532106878e-07, + "log_odds_chosen": 0.5373131632804871, + "log_odds_ratio": -0.5314943790435791, + "logits/chosen": 0.07900340110063553, + "logits/rejected": -0.17070356011390686, + "logps/chosen": -0.8341196775436401, + "logps/rejected": -1.1634811162948608, + "loss": 1.3394, + "nll_loss": 1.2959939241409302, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08341196924448013, + "rewards/margins": 0.03293614834547043, + "rewards/rejected": -0.11634812504053116, + "step": 2870 + }, + { + "epoch": 0.808663244497574, + "grad_norm": 0.82421875, + "learning_rate": 5.370307070776398e-07, + "log_odds_chosen": 0.28469741344451904, + "log_odds_ratio": -0.6910279393196106, + "logits/chosen": 0.06598956882953644, + "logits/rejected": 0.07577238976955414, + "logps/chosen": -0.8683170080184937, + "logps/rejected": -1.0559046268463135, + "loss": 1.3146, + "nll_loss": 1.086538553237915, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08683168888092041, + "rewards/margins": 0.01875876821577549, + "rewards/rejected": -0.10559046268463135, + "step": 2875 + }, + { + "epoch": 0.8100696153575698, + "grad_norm": 1.1171875, + "learning_rate": 5.294525895717447e-07, + "log_odds_chosen": 0.15487167239189148, + "log_odds_ratio": -0.7134450674057007, + "logits/chosen": 0.09006836265325546, + "logits/rejected": -0.11367156356573105, + "logps/chosen": -0.9337458610534668, + "logps/rejected": -1.0194793939590454, + "loss": 1.2439, + "nll_loss": 1.2626831531524658, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0933745726943016, + "rewards/margins": 0.00857335515320301, + "rewards/rejected": -0.10194794088602066, + "step": 2880 + }, + { + "epoch": 0.8114759862175656, + "grad_norm": 0.59375, + "learning_rate": 5.219219834069655e-07, + "log_odds_chosen": -0.030949676409363747, + "log_odds_ratio": -0.8828393816947937, + "logits/chosen": 0.17199784517288208, + "logits/rejected": -0.15780949592590332, + "logps/chosen": -0.9767589569091797, + "logps/rejected": -1.0622228384017944, + "loss": 1.3058, + "nll_loss": 1.3363220691680908, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09767589718103409, + "rewards/margins": 0.008546384051442146, + "rewards/rejected": -0.10622228682041168, + "step": 2885 + }, + { + "epoch": 0.8128823570775614, + "grad_norm": 0.7734375, + "learning_rate": 5.14439070151731e-07, + "log_odds_chosen": 0.008938372135162354, + "log_odds_ratio": -0.7494646310806274, + "logits/chosen": 0.08280684053897858, + "logits/rejected": 0.01193913072347641, + "logps/chosen": -0.9013395309448242, + "logps/rejected": -0.8923047780990601, + "loss": 1.3378, + "nll_loss": 1.3657349348068237, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09013396501541138, + "rewards/margins": -0.0009034700924530625, + "rewards/rejected": -0.0892305001616478, + "step": 2890 + }, + { + "epoch": 0.8142887279375571, + "grad_norm": 0.80078125, + "learning_rate": 5.070040302245596e-07, + "log_odds_chosen": 0.14327339828014374, + "log_odds_ratio": -0.7465823292732239, + "logits/chosen": 0.07318969070911407, + "logits/rejected": -0.08628226816654205, + "logps/chosen": -1.0324245691299438, + "logps/rejected": -1.1777406930923462, + "loss": 1.2994, + "nll_loss": 1.3385908603668213, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10324247181415558, + "rewards/margins": 0.014531604945659637, + "rewards/rejected": -0.11777406930923462, + "step": 2895 + }, + { + "epoch": 0.8156950987975529, + "grad_norm": 0.94140625, + "learning_rate": 4.996170428897087e-07, + "log_odds_chosen": 0.1323220580816269, + "log_odds_ratio": -0.6626992225646973, + "logits/chosen": 0.17111194133758545, + "logits/rejected": 0.08679263293743134, + "logps/chosen": -0.9184327125549316, + "logps/rejected": -0.9902726411819458, + "loss": 1.3859, + "nll_loss": 1.392698049545288, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09184327721595764, + "rewards/margins": 0.0071840002201497555, + "rewards/rejected": -0.09902726113796234, + "step": 2900 + }, + { + "epoch": 0.8171014696575487, + "grad_norm": 0.4296875, + "learning_rate": 4.922782862528525e-07, + "log_odds_chosen": 0.7320371866226196, + "log_odds_ratio": -0.5086044669151306, + "logits/chosen": -0.006654873490333557, + "logits/rejected": -0.1328873336315155, + "logps/chosen": -0.8062270283699036, + "logps/rejected": -1.2401305437088013, + "loss": 1.3848, + "nll_loss": 1.377241611480713, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08062271773815155, + "rewards/margins": 0.043390341103076935, + "rewards/rejected": -0.12401305139064789, + "step": 2905 + }, + { + "epoch": 0.8185078405175444, + "grad_norm": 0.435546875, + "learning_rate": 4.84987937256787e-07, + "log_odds_chosen": 0.31368353962898254, + "log_odds_ratio": -0.6530981659889221, + "logits/chosen": 0.16416539251804352, + "logits/rejected": -0.05000491812825203, + "logps/chosen": -0.8580842018127441, + "logps/rejected": -1.0716028213500977, + "loss": 1.2962, + "nll_loss": 1.2167668342590332, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.0858084186911583, + "rewards/margins": 0.021351851522922516, + "rewards/rejected": -0.10716027021408081, + "step": 2910 + }, + { + "epoch": 0.8199142113775403, + "grad_norm": 0.875, + "learning_rate": 4.777461716771664e-07, + "log_odds_chosen": -0.14726386964321136, + "log_odds_ratio": -0.8374557495117188, + "logits/chosen": 0.23301705718040466, + "logits/rejected": -0.11433436721563339, + "logps/chosen": -1.1605294942855835, + "logps/rejected": -1.0649001598358154, + "loss": 1.3267, + "nll_loss": 1.2344844341278076, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.11605296283960342, + "rewards/margins": -0.00956294871866703, + "rewards/rejected": -0.10649001598358154, + "step": 2915 + }, + { + "epoch": 0.821320582237536, + "grad_norm": 0.92578125, + "learning_rate": 4.7055316411826203e-07, + "log_odds_chosen": 0.27676817774772644, + "log_odds_ratio": -0.6456387042999268, + "logits/chosen": 0.2127613127231598, + "logits/rejected": 0.0729435533285141, + "logps/chosen": -0.904322624206543, + "logps/rejected": -1.0903629064559937, + "loss": 1.3024, + "nll_loss": 1.3120503425598145, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09043227136135101, + "rewards/margins": 0.018604028970003128, + "rewards/rejected": -0.10903630405664444, + "step": 2920 + }, + { + "epoch": 0.8227269530975319, + "grad_norm": 0.8828125, + "learning_rate": 4.63409088008753e-07, + "log_odds_chosen": 0.023978684097528458, + "log_odds_ratio": -0.8245126605033875, + "logits/chosen": 0.14065992832183838, + "logits/rejected": 0.173757404088974, + "logps/chosen": -1.055584192276001, + "logps/rejected": -1.0905482769012451, + "loss": 1.3143, + "nll_loss": 1.3115730285644531, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10555841028690338, + "rewards/margins": 0.0034964128863066435, + "rewards/rejected": -0.10905482620000839, + "step": 2925 + }, + { + "epoch": 0.8241333239575276, + "grad_norm": 0.44921875, + "learning_rate": 4.5631411559754615e-07, + "log_odds_chosen": 0.58967524766922, + "log_odds_ratio": -0.5444141626358032, + "logits/chosen": 0.05109834671020508, + "logits/rejected": -0.01770491525530815, + "logps/chosen": -0.8372839093208313, + "logps/rejected": -1.223162293434143, + "loss": 1.2745, + "nll_loss": 1.2411905527114868, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08372838795185089, + "rewards/margins": 0.03858783096075058, + "rewards/rejected": -0.12231622636318207, + "step": 2930 + }, + { + "epoch": 0.8255396948175234, + "grad_norm": 0.388671875, + "learning_rate": 4.4926841794962153e-07, + "log_odds_chosen": 0.2589498460292816, + "log_odds_ratio": -0.6366890668869019, + "logits/chosen": 0.11284098774194717, + "logits/rejected": -0.06327764689922333, + "logps/chosen": -0.9735992550849915, + "logps/rejected": -1.0729389190673828, + "loss": 1.2247, + "nll_loss": 1.3420193195343018, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09735993295907974, + "rewards/margins": 0.00993395783007145, + "rewards/rejected": -0.10729388892650604, + "step": 2935 + }, + { + "epoch": 0.8269460656775192, + "grad_norm": 0.3515625, + "learning_rate": 4.422721649419104e-07, + "log_odds_chosen": 0.5509729981422424, + "log_odds_ratio": -0.5443827509880066, + "logits/chosen": 0.19436611235141754, + "logits/rejected": -0.043565236032009125, + "logps/chosen": -0.7678650617599487, + "logps/rejected": -1.031546950340271, + "loss": 1.187, + "nll_loss": 1.1322847604751587, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07678651064634323, + "rewards/margins": 0.026368189603090286, + "rewards/rejected": -0.10315469652414322, + "step": 2940 + }, + { + "epoch": 0.8283524365375149, + "grad_norm": 2.3125, + "learning_rate": 4.3532552525919535e-07, + "log_odds_chosen": 0.3315231204032898, + "log_odds_ratio": -0.6372717618942261, + "logits/chosen": 0.028471380472183228, + "logits/rejected": 0.030725056305527687, + "logps/chosen": -0.8331094980239868, + "logps/rejected": -1.1218417882919312, + "loss": 1.3841, + "nll_loss": 1.3554751873016357, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08331094682216644, + "rewards/margins": 0.028873246163129807, + "rewards/rejected": -0.11218418926000595, + "step": 2945 + }, + { + "epoch": 0.8297588073975107, + "grad_norm": 0.87109375, + "learning_rate": 4.2842866639004555e-07, + "log_odds_chosen": 0.61634761095047, + "log_odds_ratio": -0.5611122250556946, + "logits/chosen": 0.13515019416809082, + "logits/rejected": 0.17823290824890137, + "logps/chosen": -0.7703655362129211, + "logps/rejected": -1.0910954475402832, + "loss": 1.326, + "nll_loss": 1.0025686025619507, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07703655958175659, + "rewards/margins": 0.03207298368215561, + "rewards/rejected": -0.1091095358133316, + "step": 2950 + }, + { + "epoch": 0.8311651782575065, + "grad_norm": 0.443359375, + "learning_rate": 4.2158175462278045e-07, + "log_odds_chosen": 0.32084041833877563, + "log_odds_ratio": -0.6876135468482971, + "logits/chosen": 0.30791932344436646, + "logits/rejected": 0.02039993740618229, + "logps/chosen": -0.9735520482063293, + "logps/rejected": -1.1728037595748901, + "loss": 1.3285, + "nll_loss": 1.2136799097061157, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09735521674156189, + "rewards/margins": 0.019925158470869064, + "rewards/rejected": -0.11728037893772125, + "step": 2955 + }, + { + "epoch": 0.8325715491175023, + "grad_norm": 0.455078125, + "learning_rate": 4.147849550414562e-07, + "log_odds_chosen": 0.41564807295799255, + "log_odds_ratio": -0.6257321834564209, + "logits/chosen": 0.05283625051379204, + "logits/rejected": -0.07132598012685776, + "logps/chosen": -0.9385086894035339, + "logps/rejected": -1.1858062744140625, + "loss": 1.2499, + "nll_loss": 1.150863528251648, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09385088086128235, + "rewards/margins": 0.02472977340221405, + "rewards/rejected": -0.1185806393623352, + "step": 2960 + }, + { + "epoch": 0.833977919977498, + "grad_norm": 0.5234375, + "learning_rate": 4.0803843152188714e-07, + "log_odds_chosen": -0.022001957520842552, + "log_odds_ratio": -0.8182210922241211, + "logits/chosen": 0.0680101290345192, + "logits/rejected": 0.07088492810726166, + "logps/chosen": -0.9397522211074829, + "logps/rejected": -0.9277949333190918, + "loss": 1.3689, + "nll_loss": 1.3800146579742432, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09397522360086441, + "rewards/margins": -0.0011957198148593307, + "rewards/rejected": -0.0927794948220253, + "step": 2965 + }, + { + "epoch": 0.8353842908374939, + "grad_norm": 0.6328125, + "learning_rate": 4.0134234672769723e-07, + "log_odds_chosen": 0.5197348594665527, + "log_odds_ratio": -0.5607367157936096, + "logits/chosen": 0.03629279136657715, + "logits/rejected": -0.05779438465833664, + "logps/chosen": -0.9035292863845825, + "logps/rejected": -1.2461154460906982, + "loss": 1.2671, + "nll_loss": 1.3226429224014282, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09035293012857437, + "rewards/margins": 0.03425862640142441, + "rewards/rejected": -0.12461157143115997, + "step": 2970 + }, + { + "epoch": 0.8367906616974896, + "grad_norm": 0.392578125, + "learning_rate": 3.94696862106394e-07, + "log_odds_chosen": 0.6360191106796265, + "log_odds_ratio": -0.6056066751480103, + "logits/chosen": -0.09138171374797821, + "logits/rejected": -0.028163975104689598, + "logps/chosen": -0.8779658079147339, + "logps/rejected": -1.2462159395217896, + "loss": 1.2585, + "nll_loss": 1.3103601932525635, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08779658377170563, + "rewards/margins": 0.03682499751448631, + "rewards/rejected": -0.12462159246206284, + "step": 2975 + }, + { + "epoch": 0.8381970325574855, + "grad_norm": 0.84765625, + "learning_rate": 3.8810213788547825e-07, + "log_odds_chosen": 0.008012396283447742, + "log_odds_ratio": -0.8510234951972961, + "logits/chosen": -0.051430534571409225, + "logits/rejected": 0.029810791835188866, + "logps/chosen": -0.9297181963920593, + "logps/rejected": -0.963575005531311, + "loss": 1.3514, + "nll_loss": 1.2485787868499756, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0929718166589737, + "rewards/margins": 0.0033856802619993687, + "rewards/rejected": -0.09635750204324722, + "step": 2980 + }, + { + "epoch": 0.8396034034174812, + "grad_norm": 0.84375, + "learning_rate": 3.8155833306858015e-07, + "log_odds_chosen": 0.1383453756570816, + "log_odds_ratio": -0.6804744005203247, + "logits/chosen": 0.2983183264732361, + "logits/rejected": 0.166207954287529, + "logps/chosen": -0.9256361126899719, + "logps/rejected": -1.0058834552764893, + "loss": 1.3121, + "nll_loss": 1.1450464725494385, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09256361424922943, + "rewards/margins": 0.008024740032851696, + "rewards/rejected": -0.1005883440375328, + "step": 2985 + }, + { + "epoch": 0.8410097742774769, + "grad_norm": 0.60546875, + "learning_rate": 3.7506560543162594e-07, + "log_odds_chosen": 0.39446815848350525, + "log_odds_ratio": -0.6375328302383423, + "logits/chosen": 0.15619999170303345, + "logits/rejected": 0.021760011091828346, + "logps/chosen": -0.9194302558898926, + "logps/rejected": -1.1816115379333496, + "loss": 1.2654, + "nll_loss": 1.205482006072998, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09194303303956985, + "rewards/margins": 0.026218125596642494, + "rewards/rejected": -0.1181611642241478, + "step": 2990 + }, + { + "epoch": 0.8424161451374728, + "grad_norm": 0.48046875, + "learning_rate": 3.6862411151903284e-07, + "log_odds_chosen": 0.922314465045929, + "log_odds_ratio": -0.4417967200279236, + "logits/chosen": 0.20397333800792694, + "logits/rejected": -0.004587986972182989, + "logps/chosen": -0.7588263750076294, + "logps/rejected": -1.3238674402236938, + "loss": 1.2892, + "nll_loss": 1.3261582851409912, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.07588264346122742, + "rewards/margins": 0.05650409311056137, + "rewards/rejected": -0.1323867291212082, + "step": 2995 + }, + { + "epoch": 0.8438225159974685, + "grad_norm": 1.9921875, + "learning_rate": 3.6223400663993795e-07, + "log_odds_chosen": 0.14348141849040985, + "log_odds_ratio": -0.762873113155365, + "logits/chosen": 0.18984070420265198, + "logits/rejected": -0.2189832180738449, + "logps/chosen": -0.8771038055419922, + "logps/rejected": -0.961025059223175, + "loss": 1.381, + "nll_loss": 1.347921371459961, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08771036565303802, + "rewards/margins": 0.008392124436795712, + "rewards/rejected": -0.0961025133728981, + "step": 3000 + }, + { + "epoch": 0.8452288868574643, + "grad_norm": 0.87109375, + "learning_rate": 3.558954448644483e-07, + "log_odds_chosen": -0.012045865878462791, + "log_odds_ratio": -0.9381014704704285, + "logits/chosen": 0.13290119171142578, + "logits/rejected": 0.23536305129528046, + "logps/chosen": -1.1245501041412354, + "logps/rejected": -1.0270380973815918, + "loss": 1.2123, + "nll_loss": 1.0796576738357544, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11245502531528473, + "rewards/margins": -0.00975120346993208, + "rewards/rejected": -0.10270382463932037, + "step": 3005 + }, + { + "epoch": 0.8466352577174601, + "grad_norm": 0.85546875, + "learning_rate": 3.496085790199305e-07, + "log_odds_chosen": 0.24462373554706573, + "log_odds_ratio": -0.7010733485221863, + "logits/chosen": -0.04115645959973335, + "logits/rejected": -0.05985347554087639, + "logps/chosen": -0.9546895027160645, + "logps/rejected": -1.1048619747161865, + "loss": 1.3499, + "nll_loss": 1.2850078344345093, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09546895325183868, + "rewards/margins": 0.01501724123954773, + "rewards/rejected": -0.11048620939254761, + "step": 3010 + }, + { + "epoch": 0.8480416285774559, + "grad_norm": 1.15625, + "learning_rate": 3.4337356068732397e-07, + "log_odds_chosen": 0.24743500351905823, + "log_odds_ratio": -0.712551474571228, + "logits/chosen": 0.10973727703094482, + "logits/rejected": 0.06765292584896088, + "logps/chosen": -0.9948040843009949, + "logps/rejected": -1.153732419013977, + "loss": 1.368, + "nll_loss": 1.1009012460708618, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09948041290044785, + "rewards/margins": 0.015892835333943367, + "rewards/rejected": -0.11537323892116547, + "step": 3015 + }, + { + "epoch": 0.8494479994374516, + "grad_norm": 0.486328125, + "learning_rate": 3.3719054019748564e-07, + "log_odds_chosen": 0.004147529602050781, + "log_odds_ratio": -0.8406862020492554, + "logits/chosen": 0.1997382640838623, + "logits/rejected": -0.02337918058037758, + "logps/chosen": -1.0684696435928345, + "logps/rejected": -1.0946805477142334, + "loss": 1.3976, + "nll_loss": 1.321757435798645, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10684695094823837, + "rewards/margins": 0.002621088642627001, + "rewards/rejected": -0.10946805775165558, + "step": 3020 + }, + { + "epoch": 0.8508543702974475, + "grad_norm": 0.66015625, + "learning_rate": 3.310596666275684e-07, + "log_odds_chosen": 0.18196114897727966, + "log_odds_ratio": -0.666560173034668, + "logits/chosen": 0.10361174494028091, + "logits/rejected": 0.15355415642261505, + "logps/chosen": -0.878470778465271, + "logps/rejected": -0.9664610028266907, + "loss": 1.3655, + "nll_loss": 1.1552141904830933, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08784707635641098, + "rewards/margins": 0.008799021132290363, + "rewards/rejected": -0.09664610028266907, + "step": 3025 + }, + { + "epoch": 0.8522607411574432, + "grad_norm": 0.94921875, + "learning_rate": 3.2498108779742436e-07, + "log_odds_chosen": 0.3598330616950989, + "log_odds_ratio": -0.6202287673950195, + "logits/chosen": 0.20293493568897247, + "logits/rejected": 0.06396631896495819, + "logps/chosen": -0.9000099897384644, + "logps/rejected": -1.089213252067566, + "loss": 1.2846, + "nll_loss": 1.192943811416626, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09000100940465927, + "rewards/margins": 0.01892031356692314, + "rewards/rejected": -0.10892131179571152, + "step": 3030 + }, + { + "epoch": 0.853667112017439, + "grad_norm": 0.5703125, + "learning_rate": 3.189549502660397e-07, + "log_odds_chosen": 0.3495637774467468, + "log_odds_ratio": -0.6821560859680176, + "logits/chosen": 0.2623140215873718, + "logits/rejected": -0.038874976336956024, + "logps/chosen": -0.9177125096321106, + "logps/rejected": -1.1296727657318115, + "loss": 1.3407, + "nll_loss": 1.101432204246521, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09177124500274658, + "rewards/margins": 0.02119603380560875, + "rewards/rejected": -0.11296728998422623, + "step": 3035 + }, + { + "epoch": 0.8550734828774348, + "grad_norm": 0.5546875, + "learning_rate": 3.1298139932800317e-07, + "log_odds_chosen": 0.24225249886512756, + "log_odds_ratio": -0.6946839094161987, + "logits/chosen": -0.00331364874728024, + "logits/rejected": 0.1221175566315651, + "logps/chosen": -0.9066191911697388, + "logps/rejected": -1.0242269039154053, + "loss": 1.2658, + "nll_loss": 1.0006282329559326, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0906619131565094, + "rewards/margins": 0.0117607731372118, + "rewards/rejected": -0.10242269188165665, + "step": 3040 + }, + { + "epoch": 0.8564798537374305, + "grad_norm": 0.921875, + "learning_rate": 3.070605790100026e-07, + "log_odds_chosen": 0.47688302397727966, + "log_odds_ratio": -0.5514553785324097, + "logits/chosen": 0.3406530022621155, + "logits/rejected": -0.20624065399169922, + "logps/chosen": -0.7682023644447327, + "logps/rejected": -1.119099497795105, + "loss": 1.2605, + "nll_loss": 1.0881941318511963, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07682023197412491, + "rewards/margins": 0.03508970886468887, + "rewards/rejected": -0.11190994828939438, + "step": 3045 + }, + { + "epoch": 0.8578862245974264, + "grad_norm": 0.77734375, + "learning_rate": 3.011926320673511e-07, + "log_odds_chosen": 0.31140241026878357, + "log_odds_ratio": -0.7348920106887817, + "logits/chosen": -0.023136280477046967, + "logits/rejected": -0.05698322132229805, + "logps/chosen": -1.0393954515457153, + "logps/rejected": -1.2733558416366577, + "loss": 1.3406, + "nll_loss": 1.4408925771713257, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10393954813480377, + "rewards/margins": 0.02339603379368782, + "rewards/rejected": -0.1273355782032013, + "step": 3050 + }, + { + "epoch": 0.8592925954574221, + "grad_norm": 0.875, + "learning_rate": 2.95377699980548e-07, + "log_odds_chosen": 0.15080220997333527, + "log_odds_ratio": -0.7218335270881653, + "logits/chosen": 0.16377411782741547, + "logits/rejected": -0.05593908950686455, + "logps/chosen": -0.962912380695343, + "logps/rejected": -1.0588743686676025, + "loss": 1.2954, + "nll_loss": 1.289074182510376, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09629125893115997, + "rewards/margins": 0.009596194140613079, + "rewards/rejected": -0.10588743537664413, + "step": 3055 + }, + { + "epoch": 0.8606989663174179, + "grad_norm": 0.435546875, + "learning_rate": 2.89615922951863e-07, + "log_odds_chosen": 0.6246389746665955, + "log_odds_ratio": -0.5178121328353882, + "logits/chosen": 0.16498211026191711, + "logits/rejected": -0.22444438934326172, + "logps/chosen": -0.9330152273178101, + "logps/rejected": -1.3666049242019653, + "loss": 1.347, + "nll_loss": 1.2327336072921753, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09330151975154877, + "rewards/margins": 0.0433589443564415, + "rewards/rejected": -0.13666047155857086, + "step": 3060 + }, + { + "epoch": 0.8621053371774137, + "grad_norm": 0.5625, + "learning_rate": 2.8390743990196063e-07, + "log_odds_chosen": 0.21638545393943787, + "log_odds_ratio": -0.7404422163963318, + "logits/chosen": 0.15448644757270813, + "logits/rejected": 0.014381295070052147, + "logps/chosen": -0.9812231063842773, + "logps/rejected": -1.1194281578063965, + "loss": 1.2685, + "nll_loss": 1.3578256368637085, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09812231361865997, + "rewards/margins": 0.013820504769682884, + "rewards/rejected": -0.1119428277015686, + "step": 3065 + }, + { + "epoch": 0.8635117080374095, + "grad_norm": 0.703125, + "learning_rate": 2.782523884665467e-07, + "log_odds_chosen": 0.31919899582862854, + "log_odds_ratio": -0.6073621511459351, + "logits/chosen": 0.1252957582473755, + "logits/rejected": -0.25018587708473206, + "logps/chosen": -0.9057314991950989, + "logps/rejected": -1.1693683862686157, + "loss": 1.2821, + "nll_loss": 1.341191053390503, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09057314693927765, + "rewards/margins": 0.026363695040345192, + "rewards/rejected": -0.11693684756755829, + "step": 3070 + }, + { + "epoch": 0.8649180788974052, + "grad_norm": 0.99609375, + "learning_rate": 2.7265090499305284e-07, + "log_odds_chosen": 0.16994507610797882, + "log_odds_ratio": -0.6835598349571228, + "logits/chosen": 0.02478812076151371, + "logits/rejected": -0.0021049350034445524, + "logps/chosen": -1.0673532485961914, + "logps/rejected": -1.141217589378357, + "loss": 1.3122, + "nll_loss": 1.3584511280059814, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10673532634973526, + "rewards/margins": 0.007386439945548773, + "rewards/rejected": -0.11412177234888077, + "step": 3075 + }, + { + "epoch": 0.866324449757401, + "grad_norm": 0.6015625, + "learning_rate": 2.671031245373476e-07, + "log_odds_chosen": 0.5922014713287354, + "log_odds_ratio": -0.6016316413879395, + "logits/chosen": 0.3141447603702545, + "logits/rejected": -0.19234515726566315, + "logps/chosen": -0.805921733379364, + "logps/rejected": -1.2572062015533447, + "loss": 1.244, + "nll_loss": 1.252669095993042, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08059217780828476, + "rewards/margins": 0.045128434896469116, + "rewards/rejected": -0.12572060525417328, + "step": 3080 + }, + { + "epoch": 0.8677308206173968, + "grad_norm": 0.421875, + "learning_rate": 2.6160918086048067e-07, + "log_odds_chosen": 0.23566380143165588, + "log_odds_ratio": -0.6851691603660583, + "logits/chosen": 0.13757197558879852, + "logits/rejected": 0.04290277883410454, + "logps/chosen": -0.8842877149581909, + "logps/rejected": -1.1051255464553833, + "loss": 1.2529, + "nll_loss": 1.2141902446746826, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08842878043651581, + "rewards/margins": 0.022083774209022522, + "rewards/rejected": -0.11051255464553833, + "step": 3085 + }, + { + "epoch": 0.8691371914773925, + "grad_norm": 1.09375, + "learning_rate": 2.56169206425457e-07, + "log_odds_chosen": 0.6022747755050659, + "log_odds_ratio": -0.5932275056838989, + "logits/chosen": 0.2205590009689331, + "logits/rejected": -0.08603726327419281, + "logps/chosen": -0.8612334132194519, + "logps/rejected": -1.2691211700439453, + "loss": 1.2449, + "nll_loss": 1.21316659450531, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08612334728240967, + "rewards/margins": 0.04078877717256546, + "rewards/rejected": -0.12691213190555573, + "step": 3090 + }, + { + "epoch": 0.8705435623373884, + "grad_norm": 0.8359375, + "learning_rate": 2.507833323940448e-07, + "log_odds_chosen": 0.38121479749679565, + "log_odds_ratio": -0.6170352697372437, + "logits/chosen": 0.20064540207386017, + "logits/rejected": -0.10161037743091583, + "logps/chosen": -0.8439592123031616, + "logps/rejected": -1.1321828365325928, + "loss": 1.4369, + "nll_loss": 1.206767201423645, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08439593017101288, + "rewards/margins": 0.028822356835007668, + "rewards/rejected": -0.1132182702422142, + "step": 3095 + }, + { + "epoch": 0.8719499331973841, + "grad_norm": 0.38671875, + "learning_rate": 2.454516886236102e-07, + "log_odds_chosen": 0.39103588461875916, + "log_odds_ratio": -0.6175183653831482, + "logits/chosen": 0.1710575371980667, + "logits/rejected": 0.1462509036064148, + "logps/chosen": -0.964928150177002, + "logps/rejected": -1.2527413368225098, + "loss": 1.2979, + "nll_loss": 1.2733262777328491, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09649281948804855, + "rewards/margins": 0.02878131903707981, + "rewards/rejected": -0.12527413666248322, + "step": 3100 + }, + { + "epoch": 0.87335630405738, + "grad_norm": 0.94140625, + "learning_rate": 2.4017440366398944e-07, + "log_odds_chosen": 0.07706121355295181, + "log_odds_ratio": -0.7358866930007935, + "logits/chosen": 0.419283926486969, + "logits/rejected": 0.1185811311006546, + "logps/chosen": -0.9240352511405945, + "logps/rejected": -0.995712161064148, + "loss": 1.2982, + "nll_loss": 1.1964889764785767, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09240352362394333, + "rewards/margins": 0.00716769602149725, + "rewards/rejected": -0.09957122057676315, + "step": 3105 + }, + { + "epoch": 0.8747626749173757, + "grad_norm": 0.73046875, + "learning_rate": 2.3495160475438678e-07, + "log_odds_chosen": 0.012202749028801918, + "log_odds_ratio": -0.751497745513916, + "logits/chosen": 0.31101810932159424, + "logits/rejected": 0.0647566169500351, + "logps/chosen": -0.8119691014289856, + "logps/rejected": -0.8204206228256226, + "loss": 1.3133, + "nll_loss": 1.2127889394760132, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08119690418243408, + "rewards/margins": 0.0008451612666249275, + "rewards/rejected": -0.08204207569360733, + "step": 3110 + }, + { + "epoch": 0.8761690457773715, + "grad_norm": 0.4765625, + "learning_rate": 2.2978341782030955e-07, + "log_odds_chosen": 0.2562883496284485, + "log_odds_ratio": -0.7287235260009766, + "logits/chosen": 0.16910497844219208, + "logits/rejected": 0.021375443786382675, + "logps/chosen": -0.9910010099411011, + "logps/rejected": -1.17304265499115, + "loss": 1.2457, + "nll_loss": 1.2075997591018677, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09910011291503906, + "rewards/margins": 0.018204163759946823, + "rewards/rejected": -0.11730428040027618, + "step": 3115 + }, + { + "epoch": 0.8775754166373673, + "grad_norm": 0.4765625, + "learning_rate": 2.246699674705291e-07, + "log_odds_chosen": 0.4453654885292053, + "log_odds_ratio": -0.5896596908569336, + "logits/chosen": 0.07442586123943329, + "logits/rejected": -0.11232948303222656, + "logps/chosen": -0.8934370279312134, + "logps/rejected": -1.1213937997817993, + "loss": 1.342, + "nll_loss": 1.2364587783813477, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08934369683265686, + "rewards/margins": 0.02279568277299404, + "rewards/rejected": -0.11213938146829605, + "step": 3120 + }, + { + "epoch": 0.8789817874973631, + "grad_norm": 0.39453125, + "learning_rate": 2.1961137699407774e-07, + "log_odds_chosen": 0.18608251214027405, + "log_odds_ratio": -0.6622592806816101, + "logits/chosen": 0.13960817456245422, + "logits/rejected": 0.06866041570901871, + "logps/chosen": -1.1074713468551636, + "logps/rejected": -1.2126071453094482, + "loss": 1.2905, + "nll_loss": 1.1634876728057861, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11074714362621307, + "rewards/margins": 0.010513585060834885, + "rewards/rejected": -0.12126071751117706, + "step": 3125 + }, + { + "epoch": 0.8803881583573588, + "grad_norm": 0.412109375, + "learning_rate": 2.1460776835727671e-07, + "log_odds_chosen": 0.8946942090988159, + "log_odds_ratio": -0.5820009708404541, + "logits/chosen": 0.01775258220732212, + "logits/rejected": -0.2365727424621582, + "logps/chosen": -1.1353235244750977, + "logps/rejected": -1.7781460285186768, + "loss": 1.2756, + "nll_loss": 1.1982827186584473, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11353236436843872, + "rewards/margins": 0.06428224593400955, + "rewards/rejected": -0.17781458795070648, + "step": 3130 + }, + { + "epoch": 0.8817945292173546, + "grad_norm": 1.984375, + "learning_rate": 2.0965926220079396e-07, + "log_odds_chosen": 0.3438710570335388, + "log_odds_ratio": -0.6113203763961792, + "logits/chosen": 0.013415333814918995, + "logits/rejected": 0.04428642615675926, + "logps/chosen": -0.9512618184089661, + "logps/rejected": -1.1797399520874023, + "loss": 1.2454, + "nll_loss": 1.2080955505371094, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09512616693973541, + "rewards/margins": 0.02284781076014042, + "rewards/rejected": -0.11797398328781128, + "step": 3135 + }, + { + "epoch": 0.8832009000773504, + "grad_norm": 0.5625, + "learning_rate": 2.0476597783673696e-07, + "log_odds_chosen": -0.04732183367013931, + "log_odds_ratio": -0.871155858039856, + "logits/chosen": 0.39590129256248474, + "logits/rejected": 0.1537986397743225, + "logps/chosen": -0.9534363746643066, + "logps/rejected": -0.9588222503662109, + "loss": 1.288, + "nll_loss": 1.1494090557098389, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09534362703561783, + "rewards/margins": 0.000538596126716584, + "rewards/rejected": -0.09588223695755005, + "step": 3140 + }, + { + "epoch": 0.8846072709373461, + "grad_norm": 0.419921875, + "learning_rate": 1.9992803324577537e-07, + "log_odds_chosen": 0.14176274836063385, + "log_odds_ratio": -0.7075371742248535, + "logits/chosen": 0.1753927320241928, + "logits/rejected": -0.047832123935222626, + "logps/chosen": -1.0028040409088135, + "logps/rejected": -1.0708872079849243, + "loss": 1.3469, + "nll_loss": 1.2555263042449951, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10028040409088135, + "rewards/margins": 0.0068083046935498714, + "rewards/rejected": -0.10708870738744736, + "step": 3145 + }, + { + "epoch": 0.886013641797342, + "grad_norm": 0.45703125, + "learning_rate": 1.951455450742959e-07, + "log_odds_chosen": 0.05005607753992081, + "log_odds_ratio": -0.7353087663650513, + "logits/chosen": 0.0414562001824379, + "logits/rejected": 0.09611912071704865, + "logps/chosen": -1.035521388053894, + "logps/rejected": -1.057826280593872, + "loss": 1.3203, + "nll_loss": 1.315623164176941, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10355212539434433, + "rewards/margins": 0.002230483340099454, + "rewards/rejected": -0.10578262805938721, + "step": 3150 + }, + { + "epoch": 0.8874200126573377, + "grad_norm": 0.48828125, + "learning_rate": 1.9041862863159065e-07, + "log_odds_chosen": 0.23923692107200623, + "log_odds_ratio": -0.6668912172317505, + "logits/chosen": 0.08915407210588455, + "logits/rejected": 0.14433476328849792, + "logps/chosen": -0.9437106847763062, + "logps/rejected": -1.0925174951553345, + "loss": 1.2482, + "nll_loss": 1.0153512954711914, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09437108039855957, + "rewards/margins": 0.014880669303238392, + "rewards/rejected": -0.10925173759460449, + "step": 3155 + }, + { + "epoch": 0.8888263835173336, + "grad_norm": 0.458984375, + "learning_rate": 1.8574739788707692e-07, + "log_odds_chosen": 0.2607952952384949, + "log_odds_ratio": -0.6574887037277222, + "logits/chosen": 0.38875633478164673, + "logits/rejected": 0.11233469098806381, + "logps/chosen": -0.8368536829948425, + "logps/rejected": -1.0057650804519653, + "loss": 1.3697, + "nll_loss": 1.295095682144165, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08368537575006485, + "rewards/margins": 0.016891125589609146, + "rewards/rejected": -0.1005764976143837, + "step": 3160 + }, + { + "epoch": 0.8902327543773293, + "grad_norm": 0.43359375, + "learning_rate": 1.811319654675478e-07, + "log_odds_chosen": 0.48963475227355957, + "log_odds_ratio": -0.558243989944458, + "logits/chosen": 0.020070917904376984, + "logits/rejected": 0.059196602553129196, + "logps/chosen": -0.8479886054992676, + "logps/rejected": -1.139087200164795, + "loss": 1.2397, + "nll_loss": 1.0761158466339111, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08479885756969452, + "rewards/margins": 0.029109861701726913, + "rewards/rejected": -0.11390872299671173, + "step": 3165 + }, + { + "epoch": 0.8916391252373251, + "grad_norm": 1.1875, + "learning_rate": 1.765724426544596e-07, + "log_odds_chosen": -0.31798478960990906, + "log_odds_ratio": -0.9315303564071655, + "logits/chosen": 0.06348660588264465, + "logits/rejected": 0.04159253090620041, + "logps/chosen": -0.9252711534500122, + "logps/rejected": -0.7397373914718628, + "loss": 1.3367, + "nll_loss": 1.2851605415344238, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.0925271064043045, + "rewards/margins": -0.018553372472524643, + "rewards/rejected": -0.07397373020648956, + "step": 3170 + }, + { + "epoch": 0.8930454960973209, + "grad_norm": 0.51953125, + "learning_rate": 1.7206893938124664e-07, + "log_odds_chosen": 0.289433091878891, + "log_odds_ratio": -0.6993290185928345, + "logits/chosen": 0.17868265509605408, + "logits/rejected": 0.11774604022502899, + "logps/chosen": -0.8691530227661133, + "logps/rejected": -0.9393417239189148, + "loss": 1.3159, + "nll_loss": 1.2740023136138916, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08691529929637909, + "rewards/margins": 0.007018885109573603, + "rewards/rejected": -0.09393417835235596, + "step": 3175 + }, + { + "epoch": 0.8944518669573166, + "grad_norm": 1.0078125, + "learning_rate": 1.6762156423067055e-07, + "log_odds_chosen": 0.21066789329051971, + "log_odds_ratio": -0.7149744629859924, + "logits/chosen": 0.14750464260578156, + "logits/rejected": -0.06367696821689606, + "logps/chosen": -0.9500678777694702, + "logps/rejected": -1.096605658531189, + "loss": 1.2811, + "nll_loss": 1.2395678758621216, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0950067862868309, + "rewards/margins": 0.014653787016868591, + "rewards/rejected": -0.1096605658531189, + "step": 3180 + }, + { + "epoch": 0.8958582378173124, + "grad_norm": 0.578125, + "learning_rate": 1.6323042443220276e-07, + "log_odds_chosen": 0.392107218503952, + "log_odds_ratio": -0.6191523671150208, + "logits/chosen": 0.16187028586864471, + "logits/rejected": 0.1439216136932373, + "logps/chosen": -0.9571911096572876, + "logps/rejected": -1.1570160388946533, + "loss": 1.2599, + "nll_loss": 1.3180067539215088, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0957191064953804, + "rewards/margins": 0.019982485100626945, + "rewards/rejected": -0.1157015934586525, + "step": 3185 + }, + { + "epoch": 0.8972646086773082, + "grad_norm": 0.251953125, + "learning_rate": 1.5889562585943958e-07, + "log_odds_chosen": 0.6321003437042236, + "log_odds_ratio": -0.5375659465789795, + "logits/chosen": 0.22293278574943542, + "logits/rejected": -0.12136626243591309, + "logps/chosen": -0.9416702389717102, + "logps/rejected": -1.3543380498886108, + "loss": 1.2855, + "nll_loss": 1.086060881614685, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09416702389717102, + "rewards/margins": 0.041266776621341705, + "rewards/rejected": -0.13543380796909332, + "step": 3190 + }, + { + "epoch": 0.898670979537304, + "grad_norm": 0.87109375, + "learning_rate": 1.5461727302754858e-07, + "log_odds_chosen": 0.36316487193107605, + "log_odds_ratio": -0.6222284436225891, + "logits/chosen": 0.2005232870578766, + "logits/rejected": -0.357538104057312, + "logps/chosen": -0.9915369749069214, + "logps/rejected": -1.2216651439666748, + "loss": 1.2588, + "nll_loss": 1.2823837995529175, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09915368258953094, + "rewards/margins": 0.023012813180685043, + "rewards/rejected": -0.12216649204492569, + "step": 3195 + }, + { + "epoch": 0.9000773503972997, + "grad_norm": 0.58984375, + "learning_rate": 1.5039546909075082e-07, + "log_odds_chosen": -0.10971565544605255, + "log_odds_ratio": -0.8382658958435059, + "logits/chosen": 0.2254742681980133, + "logits/rejected": -0.114879310131073, + "logps/chosen": -0.9886395335197449, + "logps/rejected": -0.953644871711731, + "loss": 1.4266, + "nll_loss": 1.540959358215332, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09886395186185837, + "rewards/margins": -0.0034994587767869234, + "rewards/rejected": -0.09536449611186981, + "step": 3200 + }, + { + "epoch": 0.9014837212572956, + "grad_norm": 1.0390625, + "learning_rate": 1.4623031583982982e-07, + "log_odds_chosen": -0.10182987153530121, + "log_odds_ratio": -0.849809467792511, + "logits/chosen": 0.29536086320877075, + "logits/rejected": 0.19249524176120758, + "logps/chosen": -0.9300820231437683, + "logps/rejected": -0.8594253659248352, + "loss": 1.2539, + "nll_loss": 1.049961805343628, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09300820529460907, + "rewards/margins": -0.007065662182867527, + "rewards/rejected": -0.08594253659248352, + "step": 3205 + }, + { + "epoch": 0.9028900921172913, + "grad_norm": 0.71875, + "learning_rate": 1.4212191369968125e-07, + "log_odds_chosen": 0.6274880170822144, + "log_odds_ratio": -0.5335894823074341, + "logits/chosen": 0.2696693539619446, + "logits/rejected": -0.06525443494319916, + "logps/chosen": -0.9380648732185364, + "logps/rejected": -1.3793491125106812, + "loss": 1.3315, + "nll_loss": 1.16745924949646, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09380649030208588, + "rewards/margins": 0.04412844032049179, + "rewards/rejected": -0.13793492317199707, + "step": 3210 + }, + { + "epoch": 0.9042964629772872, + "grad_norm": 0.38671875, + "learning_rate": 1.3807036172688887e-07, + "log_odds_chosen": 0.04567628353834152, + "log_odds_ratio": -0.7277408838272095, + "logits/chosen": 0.24385914206504822, + "logits/rejected": 0.10521028935909271, + "logps/chosen": -0.9530619382858276, + "logps/rejected": -0.9420475959777832, + "loss": 1.271, + "nll_loss": 1.2172807455062866, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09530619531869888, + "rewards/margins": -0.001101435860618949, + "rewards/rejected": -0.09420476108789444, + "step": 3215 + }, + { + "epoch": 0.9057028338372829, + "grad_norm": 1.3671875, + "learning_rate": 1.3407575760733814e-07, + "log_odds_chosen": -0.014312508516013622, + "log_odds_ratio": -0.8823341131210327, + "logits/chosen": 0.11543910205364227, + "logits/rejected": -0.09864558279514313, + "logps/chosen": -1.0436660051345825, + "logps/rejected": -1.1146585941314697, + "loss": 1.2537, + "nll_loss": 1.1284033060073853, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10436661541461945, + "rewards/margins": 0.007099261973053217, + "rewards/rejected": -0.11146585643291473, + "step": 3220 + }, + { + "epoch": 0.9071092046972786, + "grad_norm": 0.546875, + "learning_rate": 1.3013819765385903e-07, + "log_odds_chosen": 0.05668574571609497, + "log_odds_ratio": -0.7083471417427063, + "logits/chosen": 0.13047949969768524, + "logits/rejected": 0.05119504779577255, + "logps/chosen": -0.9959262013435364, + "logps/rejected": -1.0308897495269775, + "loss": 1.399, + "nll_loss": 1.4183380603790283, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09959262609481812, + "rewards/margins": 0.0034963549114763737, + "rewards/rejected": -0.10308897495269775, + "step": 3225 + }, + { + "epoch": 0.9085155755572745, + "grad_norm": 0.62890625, + "learning_rate": 1.262577768039061e-07, + "log_odds_chosen": 0.2668651342391968, + "log_odds_ratio": -0.6243875026702881, + "logits/chosen": 0.1160123199224472, + "logits/rejected": 0.13229742646217346, + "logps/chosen": -0.9078758358955383, + "logps/rejected": -1.0736418962478638, + "loss": 1.3454, + "nll_loss": 1.1889612674713135, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09078757464885712, + "rewards/margins": 0.01657661236822605, + "rewards/rejected": -0.10736417770385742, + "step": 3230 + }, + { + "epoch": 0.9099219464172702, + "grad_norm": 0.41015625, + "learning_rate": 1.224345886172673e-07, + "log_odds_chosen": 0.32301950454711914, + "log_odds_ratio": -0.6222423315048218, + "logits/chosen": 0.30127039551734924, + "logits/rejected": -0.06227840110659599, + "logps/chosen": -0.971104621887207, + "logps/rejected": -1.1543586254119873, + "loss": 1.3673, + "nll_loss": 1.2399194240570068, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09711045026779175, + "rewards/margins": 0.01832542195916176, + "rewards/rejected": -0.1154358834028244, + "step": 3235 + }, + { + "epoch": 0.911328317277266, + "grad_norm": 0.5546875, + "learning_rate": 1.1866872527381029e-07, + "log_odds_chosen": 0.04946266487240791, + "log_odds_ratio": -0.7625073790550232, + "logits/chosen": 0.021832874044775963, + "logits/rejected": 0.01887836493551731, + "logps/chosen": -1.1444568634033203, + "logps/rejected": -1.1844040155410767, + "loss": 1.2423, + "nll_loss": 1.0771002769470215, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.11444568634033203, + "rewards/margins": 0.003994708880782127, + "rewards/rejected": -0.11844038963317871, + "step": 3240 + }, + { + "epoch": 0.9127346881372618, + "grad_norm": 0.51171875, + "learning_rate": 1.1496027757125743e-07, + "log_odds_chosen": 0.18768064677715302, + "log_odds_ratio": -0.8096588253974915, + "logits/chosen": 0.05349903181195259, + "logits/rejected": 0.061613865196704865, + "logps/chosen": -1.0463298559188843, + "logps/rejected": -1.1358802318572998, + "loss": 1.3161, + "nll_loss": 1.3629252910614014, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10463298857212067, + "rewards/margins": 0.008955043740570545, + "rewards/rejected": -0.11358805000782013, + "step": 3245 + }, + { + "epoch": 0.9141410589972576, + "grad_norm": 1.4296875, + "learning_rate": 1.1130933492299906e-07, + "log_odds_chosen": 0.3283534646034241, + "log_odds_ratio": -0.6355275511741638, + "logits/chosen": 0.24122712016105652, + "logits/rejected": 0.14860299229621887, + "logps/chosen": -0.8834742307662964, + "logps/rejected": -1.0724339485168457, + "loss": 1.3049, + "nll_loss": 1.1096837520599365, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08834744244813919, + "rewards/margins": 0.018895963206887245, + "rewards/rejected": -0.10724340379238129, + "step": 3250 + }, + { + "epoch": 0.9155474298572533, + "grad_norm": 0.40234375, + "learning_rate": 1.0771598535593508e-07, + "log_odds_chosen": 0.5767858028411865, + "log_odds_ratio": -0.49948233366012573, + "logits/chosen": 0.1712990701198578, + "logits/rejected": -0.1658778339624405, + "logps/chosen": -0.769035279750824, + "logps/rejected": -1.0615276098251343, + "loss": 1.3339, + "nll_loss": 1.265640377998352, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07690352201461792, + "rewards/margins": 0.02924923039972782, + "rewards/rejected": -0.10615275800228119, + "step": 3255 + }, + { + "epoch": 0.9169538007172492, + "grad_norm": 0.62109375, + "learning_rate": 1.0418031550835594e-07, + "log_odds_chosen": 0.7740265727043152, + "log_odds_ratio": -0.5304247140884399, + "logits/chosen": 0.22207096219062805, + "logits/rejected": -0.11452925205230713, + "logps/chosen": -0.9042800664901733, + "logps/rejected": -1.4035189151763916, + "loss": 1.2448, + "nll_loss": 1.3662537336349487, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09042801707983017, + "rewards/margins": 0.04992387443780899, + "rewards/rejected": -0.14035189151763916, + "step": 3260 + }, + { + "epoch": 0.9183601715772449, + "grad_norm": 0.478515625, + "learning_rate": 1.0070241062785063e-07, + "log_odds_chosen": 0.04200774058699608, + "log_odds_ratio": -0.8028166890144348, + "logits/chosen": 0.19796448945999146, + "logits/rejected": 0.07584884762763977, + "logps/chosen": -0.966947078704834, + "logps/rejected": -0.9276117086410522, + "loss": 1.2781, + "nll_loss": 1.2592346668243408, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.096694715321064, + "rewards/margins": -0.0039335330948233604, + "rewards/rejected": -0.09276118129491806, + "step": 3265 + }, + { + "epoch": 0.9197665424372407, + "grad_norm": 0.546875, + "learning_rate": 9.72823545692525e-08, + "log_odds_chosen": 0.28387412428855896, + "log_odds_ratio": -0.6854857802391052, + "logits/chosen": 0.2692652642726898, + "logits/rejected": -0.17600694298744202, + "logps/chosen": -0.9816180467605591, + "logps/rejected": -1.206395149230957, + "loss": 1.3407, + "nll_loss": 1.2754056453704834, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09816179424524307, + "rewards/margins": 0.02247772179543972, + "rewards/rejected": -0.12063954025506973, + "step": 3270 + }, + { + "epoch": 0.9211729132972365, + "grad_norm": 0.96484375, + "learning_rate": 9.392022979261811e-08, + "log_odds_chosen": 0.2608630359172821, + "log_odds_ratio": -0.6507230997085571, + "logits/chosen": -0.025340568274259567, + "logits/rejected": 0.03941095247864723, + "logps/chosen": -0.9909608960151672, + "logps/rejected": -1.1462547779083252, + "loss": 1.377, + "nll_loss": 1.3717944622039795, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09909608960151672, + "rewards/margins": 0.015529394149780273, + "rewards/rejected": -0.1146254763007164, + "step": 3275 + }, + { + "epoch": 0.9225792841572322, + "grad_norm": 0.64453125, + "learning_rate": 9.061611736123716e-08, + "log_odds_chosen": 0.2979525327682495, + "log_odds_ratio": -0.6411749124526978, + "logits/chosen": -0.06878896057605743, + "logits/rejected": 0.00978168100118637, + "logps/chosen": -0.9312704205513, + "logps/rejected": -1.1202948093414307, + "loss": 1.3165, + "nll_loss": 1.3011987209320068, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09312702715396881, + "rewards/margins": 0.018902456387877464, + "rewards/rejected": -0.11202949285507202, + "step": 3280 + }, + { + "epoch": 0.9239856550172281, + "grad_norm": 0.310546875, + "learning_rate": 8.737009693968068e-08, + "log_odds_chosen": 0.3643631935119629, + "log_odds_ratio": -0.6027190089225769, + "logits/chosen": 0.33368968963623047, + "logits/rejected": 0.02814490720629692, + "logps/chosen": -0.9008346796035767, + "logps/rejected": -1.0817344188690186, + "loss": 1.234, + "nll_loss": 1.206937313079834, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09008348733186722, + "rewards/margins": 0.018089953809976578, + "rewards/rejected": -0.1081734448671341, + "step": 3285 + }, + { + "epoch": 0.9253920258772238, + "grad_norm": 0.35546875, + "learning_rate": 8.418224679187792e-08, + "log_odds_chosen": 0.00694617023691535, + "log_odds_ratio": -0.8071894645690918, + "logits/chosen": -0.014169919304549694, + "logits/rejected": 0.1118505448102951, + "logps/chosen": -1.275794506072998, + "logps/rejected": -1.2462470531463623, + "loss": 1.2667, + "nll_loss": 1.1461488008499146, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1275794506072998, + "rewards/margins": -0.0029547386802732944, + "rewards/rejected": -0.12462472915649414, + "step": 3290 + }, + { + "epoch": 0.9267983967372196, + "grad_norm": 0.55859375, + "learning_rate": 8.105264377923056e-08, + "log_odds_chosen": 0.5860501527786255, + "log_odds_ratio": -0.5213780999183655, + "logits/chosen": 0.24955987930297852, + "logits/rejected": -0.05474225804209709, + "logps/chosen": -0.9786784052848816, + "logps/rejected": -1.3315867185592651, + "loss": 1.353, + "nll_loss": 1.3573219776153564, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09786783903837204, + "rewards/margins": 0.03529084101319313, + "rewards/rejected": -0.13315868377685547, + "step": 3295 + }, + { + "epoch": 0.9282047675972154, + "grad_norm": 0.671875, + "learning_rate": 7.798136335875895e-08, + "log_odds_chosen": 0.263238787651062, + "log_odds_ratio": -0.6176015138626099, + "logits/chosen": 0.39209964871406555, + "logits/rejected": 0.1130467876791954, + "logps/chosen": -0.7684231400489807, + "logps/rejected": -0.8703362345695496, + "loss": 1.2282, + "nll_loss": 1.0319740772247314, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07684232294559479, + "rewards/margins": 0.010191314853727818, + "rewards/rejected": -0.08703363686800003, + "step": 3300 + }, + { + "epoch": 0.9296111384572112, + "grad_norm": 0.439453125, + "learning_rate": 7.496847958128273e-08, + "log_odds_chosen": 0.4197857975959778, + "log_odds_ratio": -0.5740344524383545, + "logits/chosen": 0.116268590092659, + "logits/rejected": -0.025552403181791306, + "logps/chosen": -0.8456694483757019, + "logps/rejected": -1.0732481479644775, + "loss": 1.301, + "nll_loss": 1.2720739841461182, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08456695824861526, + "rewards/margins": 0.02275785803794861, + "rewards/rejected": -0.10732481628656387, + "step": 3305 + }, + { + "epoch": 0.931017509317207, + "grad_norm": 0.875, + "learning_rate": 7.201406508963698e-08, + "log_odds_chosen": 0.7171459794044495, + "log_odds_ratio": -0.44450870156288147, + "logits/chosen": 0.09888849407434464, + "logits/rejected": -0.3451232314109802, + "logps/chosen": -0.9191095232963562, + "logps/rejected": -1.3688544034957886, + "loss": 1.3185, + "nll_loss": 1.353026270866394, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.0919109433889389, + "rewards/margins": 0.04497448354959488, + "rewards/rejected": -0.13688543438911438, + "step": 3310 + }, + { + "epoch": 0.9324238801772027, + "grad_norm": 0.71875, + "learning_rate": 6.911819111691809e-08, + "log_odds_chosen": 0.13063013553619385, + "log_odds_ratio": -0.6960271596908569, + "logits/chosen": 0.0920720174908638, + "logits/rejected": -0.05112043768167496, + "logps/chosen": -1.0039879083633423, + "logps/rejected": -1.1332917213439941, + "loss": 1.256, + "nll_loss": 1.4166369438171387, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10039877891540527, + "rewards/margins": 0.012930400669574738, + "rewards/rejected": -0.11332918703556061, + "step": 3315 + }, + { + "epoch": 0.9338302510371985, + "grad_norm": 1.390625, + "learning_rate": 6.628092748476839e-08, + "log_odds_chosen": 0.10143091529607773, + "log_odds_ratio": -0.7682610750198364, + "logits/chosen": -0.03156689926981926, + "logits/rejected": 0.10378922522068024, + "logps/chosen": -1.0922961235046387, + "logps/rejected": -1.1099097728729248, + "loss": 1.3721, + "nll_loss": 1.3745818138122559, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10922960937023163, + "rewards/margins": 0.0017613600939512253, + "rewards/rejected": -0.1109909787774086, + "step": 3320 + }, + { + "epoch": 0.9352366218971943, + "grad_norm": 0.7734375, + "learning_rate": 6.350234260169175e-08, + "log_odds_chosen": 0.2494625747203827, + "log_odds_ratio": -0.727098822593689, + "logits/chosen": 0.11463620513677597, + "logits/rejected": -0.00450973492115736, + "logps/chosen": -0.9400336146354675, + "logps/rejected": -1.0810682773590088, + "loss": 1.2895, + "nll_loss": 1.2487919330596924, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0940033569931984, + "rewards/margins": 0.01410345546901226, + "rewards/rejected": -0.1081068143248558, + "step": 3325 + }, + { + "epoch": 0.9366429927571901, + "grad_norm": 3.140625, + "learning_rate": 6.078250346140457e-08, + "log_odds_chosen": 0.5818791389465332, + "log_odds_ratio": -0.5487780570983887, + "logits/chosen": 0.1828577071428299, + "logits/rejected": -0.32167941331863403, + "logps/chosen": -0.975090503692627, + "logps/rejected": -1.378078818321228, + "loss": 1.2588, + "nll_loss": 1.4379678964614868, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09750904142856598, + "rewards/margins": 0.04029882326722145, + "rewards/rejected": -0.13780787587165833, + "step": 3330 + }, + { + "epoch": 0.9380493636171858, + "grad_norm": 0.57421875, + "learning_rate": 5.812147564122017e-08, + "log_odds_chosen": 0.08344351500272751, + "log_odds_ratio": -0.7285597920417786, + "logits/chosen": 0.1936129629611969, + "logits/rejected": 0.06743821501731873, + "logps/chosen": -0.9562802314758301, + "logps/rejected": -1.0205862522125244, + "loss": 1.3033, + "nll_loss": 1.1195799112319946, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09562802314758301, + "rewards/margins": 0.0064305951818823814, + "rewards/rejected": -0.10205862671136856, + "step": 3335 + }, + { + "epoch": 0.9394557344771817, + "grad_norm": 0.56640625, + "learning_rate": 5.5519323300467775e-08, + "log_odds_chosen": 0.7931851148605347, + "log_odds_ratio": -0.4802130162715912, + "logits/chosen": 0.3637743890285492, + "logits/rejected": -0.05420628935098648, + "logps/chosen": -0.7939968705177307, + "logps/rejected": -1.3140610456466675, + "loss": 1.3665, + "nll_loss": 1.1318628787994385, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07939968258142471, + "rewards/margins": 0.052006423473358154, + "rewards/rejected": -0.13140609860420227, + "step": 3340 + }, + { + "epoch": 0.9408621053371774, + "grad_norm": 0.7421875, + "learning_rate": 5.29761091789463e-08, + "log_odds_chosen": 0.10022227466106415, + "log_odds_ratio": -0.7228878140449524, + "logits/chosen": 0.06098024919629097, + "logits/rejected": -0.02456871047616005, + "logps/chosen": -0.8794612884521484, + "logps/rejected": -0.9316323399543762, + "loss": 1.413, + "nll_loss": 1.4631612300872803, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08794611692428589, + "rewards/margins": 0.00521711353212595, + "rewards/rejected": -0.09316324442625046, + "step": 3345 + }, + { + "epoch": 0.9422684761971732, + "grad_norm": 0.8671875, + "learning_rate": 5.049189459541054e-08, + "log_odds_chosen": 0.36486390233039856, + "log_odds_ratio": -0.6099367141723633, + "logits/chosen": 0.06692637503147125, + "logits/rejected": -0.11904720962047577, + "logps/chosen": -1.2377574443817139, + "logps/rejected": -1.5012363195419312, + "loss": 1.3822, + "nll_loss": 1.4673506021499634, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.12377575784921646, + "rewards/margins": 0.026347899809479713, + "rewards/rejected": -0.15012364089488983, + "step": 3350 + }, + { + "epoch": 0.943674847057169, + "grad_norm": 1.1328125, + "learning_rate": 4.806673944609347e-08, + "log_odds_chosen": 0.06244078278541565, + "log_odds_ratio": -0.78443843126297, + "logits/chosen": 0.17202523350715637, + "logits/rejected": 0.10731378942728043, + "logps/chosen": -0.9702354669570923, + "logps/rejected": -1.0049316883087158, + "loss": 1.2405, + "nll_loss": 1.2193858623504639, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.09702353924512863, + "rewards/margins": 0.0034696266520768404, + "rewards/rejected": -0.1004931703209877, + "step": 3355 + }, + { + "epoch": 0.9450812179171647, + "grad_norm": 0.498046875, + "learning_rate": 4.570070220326128e-08, + "log_odds_chosen": 0.11939896643161774, + "log_odds_ratio": -0.7277523279190063, + "logits/chosen": 0.12861505150794983, + "logits/rejected": 0.02386235073208809, + "logps/chosen": -1.0052900314331055, + "logps/rejected": -1.1172101497650146, + "loss": 1.2454, + "nll_loss": 1.1664661169052124, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10052900016307831, + "rewards/margins": 0.011192025616765022, + "rewards/rejected": -0.11172102391719818, + "step": 3360 + }, + { + "epoch": 0.9464875887771605, + "grad_norm": 0.5390625, + "learning_rate": 4.33938399138048e-08, + "log_odds_chosen": 0.5814080238342285, + "log_odds_ratio": -0.5879030227661133, + "logits/chosen": 0.28143981099128723, + "logits/rejected": 0.061570387333631516, + "logps/chosen": -0.7967894077301025, + "logps/rejected": -1.0686571598052979, + "loss": 1.3116, + "nll_loss": 1.094059705734253, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07967893779277802, + "rewards/margins": 0.02718677744269371, + "rewards/rejected": -0.10686571896076202, + "step": 3365 + }, + { + "epoch": 0.9478939596371563, + "grad_norm": 0.4765625, + "learning_rate": 4.114620819786308e-08, + "log_odds_chosen": 0.23346397280693054, + "log_odds_ratio": -0.6866267919540405, + "logits/chosen": 0.25685185194015503, + "logits/rejected": -0.01630682870745659, + "logps/chosen": -0.9060823321342468, + "logps/rejected": -1.0845293998718262, + "loss": 1.3498, + "nll_loss": 1.196526288986206, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09060823917388916, + "rewards/margins": 0.01784469559788704, + "rewards/rejected": -0.1084529384970665, + "step": 3370 + }, + { + "epoch": 0.9493003304971521, + "grad_norm": 0.52734375, + "learning_rate": 3.89578612474828e-08, + "log_odds_chosen": -0.14990894496440887, + "log_odds_ratio": -0.9641848802566528, + "logits/chosen": 0.24826081097126007, + "logits/rejected": 0.09183430671691895, + "logps/chosen": -1.2104682922363281, + "logps/rejected": -1.0689446926116943, + "loss": 1.272, + "nll_loss": 1.293968915939331, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.12104681879281998, + "rewards/margins": -0.014152342453598976, + "rewards/rejected": -0.10689447820186615, + "step": 3375 + }, + { + "epoch": 0.9507067013571479, + "grad_norm": 1.40625, + "learning_rate": 3.682885182531154e-08, + "log_odds_chosen": 0.2269192934036255, + "log_odds_ratio": -0.673160970211029, + "logits/chosen": 0.2562906742095947, + "logits/rejected": 0.09089311212301254, + "logps/chosen": -0.9358326196670532, + "logps/rejected": -1.0342806577682495, + "loss": 1.3099, + "nll_loss": 1.2557461261749268, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09358327090740204, + "rewards/margins": 0.009844806976616383, + "rewards/rejected": -0.10342808067798615, + "step": 3380 + }, + { + "epoch": 0.9521130722171437, + "grad_norm": 0.953125, + "learning_rate": 3.475923126332575e-08, + "log_odds_chosen": -0.13816693425178528, + "log_odds_ratio": -0.9078758955001831, + "logits/chosen": 0.20385582745075226, + "logits/rejected": 0.0857970267534256, + "logps/chosen": -0.9905937314033508, + "logps/rejected": -0.9162753820419312, + "loss": 1.3297, + "nll_loss": 1.271182656288147, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09905937314033508, + "rewards/margins": -0.007431834936141968, + "rewards/rejected": -0.09162753820419312, + "step": 3385 + }, + { + "epoch": 0.9535194430771394, + "grad_norm": 0.54296875, + "learning_rate": 3.2749049461592295e-08, + "log_odds_chosen": 0.11129869520664215, + "log_odds_ratio": -0.7079453468322754, + "logits/chosen": 0.20724210143089294, + "logits/rejected": 0.046768300235271454, + "logps/chosen": -0.9434338808059692, + "logps/rejected": -1.0389223098754883, + "loss": 1.2332, + "nll_loss": 1.212593913078308, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0943433940410614, + "rewards/margins": 0.009548845700919628, + "rewards/rejected": -0.10389222949743271, + "step": 3390 + }, + { + "epoch": 0.9549258139371353, + "grad_norm": 0.498046875, + "learning_rate": 3.0798354887066897e-08, + "log_odds_chosen": -0.019172105938196182, + "log_odds_ratio": -0.7809979915618896, + "logits/chosen": -0.1130768209695816, + "logits/rejected": 0.10138092190027237, + "logps/chosen": -1.010284185409546, + "logps/rejected": -0.9839268922805786, + "loss": 1.3462, + "nll_loss": 1.2669436931610107, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.10102842003107071, + "rewards/margins": -0.0026357315946370363, + "rewards/rejected": -0.09839268773794174, + "step": 3395 + }, + { + "epoch": 0.956332184797131, + "grad_norm": 0.357421875, + "learning_rate": 2.890719457242397e-08, + "log_odds_chosen": 0.4177800714969635, + "log_odds_ratio": -0.5798153281211853, + "logits/chosen": 0.15526822209358215, + "logits/rejected": 0.1292274296283722, + "logps/chosen": -0.8883660435676575, + "logps/rejected": -1.1707476377487183, + "loss": 1.381, + "nll_loss": 1.1893246173858643, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08883660286664963, + "rewards/margins": 0.02823815681040287, + "rewards/rejected": -0.11707476526498795, + "step": 3400 + }, + { + "epoch": 0.9577385556571267, + "grad_norm": 0.71875, + "learning_rate": 2.707561411492393e-08, + "log_odds_chosen": 0.24508845806121826, + "log_odds_ratio": -0.6735808849334717, + "logits/chosen": 0.18047787249088287, + "logits/rejected": -0.13641862571239471, + "logps/chosen": -0.9271578788757324, + "logps/rejected": -1.1271312236785889, + "loss": 1.2555, + "nll_loss": 1.260265588760376, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.092715784907341, + "rewards/margins": 0.01999734900891781, + "rewards/rejected": -0.11271314322948456, + "step": 3405 + }, + { + "epoch": 0.9591449265171226, + "grad_norm": 0.47265625, + "learning_rate": 2.5303657675312677e-08, + "log_odds_chosen": 0.24291105568408966, + "log_odds_ratio": -0.6924997568130493, + "logits/chosen": 0.06729461997747421, + "logits/rejected": 0.11077950149774551, + "logps/chosen": -0.9200528264045715, + "logps/rejected": -1.05774986743927, + "loss": 1.2593, + "nll_loss": 1.0932310819625854, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.09200528264045715, + "rewards/margins": 0.013769703917205334, + "rewards/rejected": -0.10577498376369476, + "step": 3410 + }, + { + "epoch": 0.9605512973771183, + "grad_norm": 0.76171875, + "learning_rate": 2.359136797675743e-08, + "log_odds_chosen": 0.7608834505081177, + "log_odds_ratio": -0.5808351635932922, + "logits/chosen": 0.17602987587451935, + "logits/rejected": 0.07526994496583939, + "logps/chosen": -0.855022132396698, + "logps/rejected": -1.4509608745574951, + "loss": 1.2025, + "nll_loss": 1.0686630010604858, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08550222218036652, + "rewards/margins": 0.05959387496113777, + "rewards/rejected": -0.145096093416214, + "step": 3415 + }, + { + "epoch": 0.9619576682371141, + "grad_norm": 0.419921875, + "learning_rate": 2.1938786303817295e-08, + "log_odds_chosen": 0.45528849959373474, + "log_odds_ratio": -0.6140977144241333, + "logits/chosen": 0.077461838722229, + "logits/rejected": -0.12476935237646103, + "logps/chosen": -0.8827389478683472, + "logps/rejected": -1.1483733654022217, + "loss": 1.3045, + "nll_loss": 1.3081581592559814, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.08827390521764755, + "rewards/margins": 0.026563435792922974, + "rewards/rejected": -0.11483733355998993, + "step": 3420 + }, + { + "epoch": 0.9633640390971099, + "grad_norm": 0.56640625, + "learning_rate": 2.0345952501445722e-08, + "log_odds_chosen": 0.6777931451797485, + "log_odds_ratio": -0.529708743095398, + "logits/chosen": 0.3582519292831421, + "logits/rejected": -0.17594197392463684, + "logps/chosen": -0.8322056531906128, + "logps/rejected": -1.263925313949585, + "loss": 1.3209, + "nll_loss": 1.2668817043304443, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08322056382894516, + "rewards/margins": 0.0431719608604908, + "rewards/rejected": -0.12639251351356506, + "step": 3425 + }, + { + "epoch": 0.9647704099571057, + "grad_norm": 1.1171875, + "learning_rate": 1.881290497403321e-08, + "log_odds_chosen": 0.07137568295001984, + "log_odds_ratio": -0.7536412477493286, + "logits/chosen": 0.39291954040527344, + "logits/rejected": 0.10763299465179443, + "logps/chosen": -1.019919753074646, + "logps/rejected": -1.0896474123001099, + "loss": 1.4614, + "nll_loss": 1.3426240682601929, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.10199197381734848, + "rewards/margins": 0.006972762290388346, + "rewards/rejected": -0.10896474123001099, + "step": 3430 + }, + { + "epoch": 0.9661767808171015, + "grad_norm": 0.439453125, + "learning_rate": 1.73396806844775e-08, + "log_odds_chosen": 0.4001692831516266, + "log_odds_ratio": -0.6367109417915344, + "logits/chosen": 0.19484727084636688, + "logits/rejected": 0.09062852710485458, + "logps/chosen": -0.950181782245636, + "logps/rejected": -1.245031714439392, + "loss": 1.2299, + "nll_loss": 1.2423794269561768, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.095018170773983, + "rewards/margins": 0.029485007748007774, + "rewards/rejected": -0.12450318038463593, + "step": 3435 + }, + { + "epoch": 0.9675831516770973, + "grad_norm": 1.5703125, + "learning_rate": 1.5926315153295402e-08, + "log_odds_chosen": 0.21363556385040283, + "log_odds_ratio": -0.6855403780937195, + "logits/chosen": -0.012852529995143414, + "logits/rejected": 0.035267848521471024, + "logps/chosen": -1.0747730731964111, + "logps/rejected": -1.145056962966919, + "loss": 1.2873, + "nll_loss": 1.251246452331543, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10747730731964111, + "rewards/margins": 0.007028372026979923, + "rewards/rejected": -0.11450569331645966, + "step": 3440 + }, + { + "epoch": 0.968989522537093, + "grad_norm": 0.69921875, + "learning_rate": 1.4572842457764858e-08, + "log_odds_chosen": 0.36453381180763245, + "log_odds_ratio": -0.6931215524673462, + "logits/chosen": 0.17390012741088867, + "logits/rejected": 0.009960390627384186, + "logps/chosen": -0.9899552464485168, + "logps/rejected": -1.2095123529434204, + "loss": 1.3039, + "nll_loss": 1.2584543228149414, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09899552166461945, + "rewards/margins": 0.021955717355012894, + "rewards/rejected": -0.12095125019550323, + "step": 3445 + }, + { + "epoch": 0.9703958933970888, + "grad_norm": 0.59375, + "learning_rate": 1.3279295231103661e-08, + "log_odds_chosen": 0.22130723297595978, + "log_odds_ratio": -0.7411271333694458, + "logits/chosen": -0.0032444163225591183, + "logits/rejected": -0.20411105453968048, + "logps/chosen": -1.0461972951889038, + "logps/rejected": -1.230019211769104, + "loss": 1.3301, + "nll_loss": 1.3564178943634033, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.10461972653865814, + "rewards/margins": 0.018382195383310318, + "rewards/rejected": -0.12300191819667816, + "step": 3450 + }, + { + "epoch": 0.9718022642570846, + "grad_norm": 0.484375, + "learning_rate": 1.204570466168259e-08, + "log_odds_chosen": 0.6615599393844604, + "log_odds_ratio": -0.45958274602890015, + "logits/chosen": 0.2637723684310913, + "logits/rejected": -0.2658749222755432, + "logps/chosen": -0.797881543636322, + "logps/rejected": -1.2056782245635986, + "loss": 1.3484, + "nll_loss": 1.2117133140563965, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.07978816330432892, + "rewards/margins": 0.04077966883778572, + "rewards/rejected": -0.12056783586740494, + "step": 3455 + }, + { + "epoch": 0.9732086351170803, + "grad_norm": 2.484375, + "learning_rate": 1.087210049227405e-08, + "log_odds_chosen": 0.4059210419654846, + "log_odds_ratio": -0.6156436204910278, + "logits/chosen": 0.42425599694252014, + "logits/rejected": -0.07910940796136856, + "logps/chosen": -0.7596513032913208, + "logps/rejected": -1.0463638305664062, + "loss": 1.2396, + "nll_loss": 1.1112778186798096, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07596512883901596, + "rewards/margins": 0.028671249747276306, + "rewards/rejected": -0.10463637113571167, + "step": 3460 + }, + { + "epoch": 0.9746150059770762, + "grad_norm": 0.5078125, + "learning_rate": 9.758511019333505e-09, + "log_odds_chosen": 0.18497975170612335, + "log_odds_ratio": -0.6618334650993347, + "logits/chosen": 0.21915988624095917, + "logits/rejected": -0.14438927173614502, + "logps/chosen": -0.990871787071228, + "logps/rejected": -1.1092342138290405, + "loss": 1.2737, + "nll_loss": 1.3173089027404785, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0990871861577034, + "rewards/margins": 0.011836233548820019, + "rewards/rejected": -0.1109234094619751, + "step": 3465 + }, + { + "epoch": 0.9760213768370719, + "grad_norm": 0.52734375, + "learning_rate": 8.704963092319164e-09, + "log_odds_chosen": 0.14419862627983093, + "log_odds_ratio": -0.6739202737808228, + "logits/chosen": 0.13422077894210815, + "logits/rejected": 0.11353013664484024, + "logps/chosen": -0.8795303106307983, + "logps/rejected": -0.9439455270767212, + "loss": 1.3173, + "nll_loss": 1.1744730472564697, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08795302361249924, + "rewards/margins": 0.006441520992666483, + "rewards/rejected": -0.09439454972743988, + "step": 3470 + }, + { + "epoch": 0.9774277476970678, + "grad_norm": 0.3515625, + "learning_rate": 7.71148211304279e-09, + "log_odds_chosen": 0.14641742408275604, + "log_odds_ratio": -0.752571702003479, + "logits/chosen": 0.34768936038017273, + "logits/rejected": 0.09763796627521515, + "logps/chosen": -0.8525916934013367, + "logps/rejected": -0.8650640249252319, + "loss": 1.291, + "nll_loss": 1.110510230064392, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08525917679071426, + "rewards/margins": 0.0012472260277718306, + "rewards/rejected": -0.08650640398263931, + "step": 3475 + }, + { + "epoch": 0.9788341185570635, + "grad_norm": 0.5078125, + "learning_rate": 6.778092035058248e-09, + "log_odds_chosen": -0.07389330118894577, + "log_odds_ratio": -0.7915335893630981, + "logits/chosen": 0.0018207028042525053, + "logits/rejected": 0.19812503457069397, + "logps/chosen": -0.8738433718681335, + "logps/rejected": -0.8644292950630188, + "loss": 1.3015, + "nll_loss": 1.1114206314086914, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.08738434314727783, + "rewards/margins": -0.0009413976222276688, + "rewards/rejected": -0.08644293993711472, + "step": 3480 + }, + { + "epoch": 0.9802404894170593, + "grad_norm": 2.0625, + "learning_rate": 5.904815363083904e-09, + "log_odds_chosen": 0.41229891777038574, + "log_odds_ratio": -0.6279317140579224, + "logits/chosen": -0.048399388790130615, + "logits/rejected": -0.034595172852277756, + "logps/chosen": -0.9107322692871094, + "logps/rejected": -1.190619707107544, + "loss": 1.3319, + "nll_loss": 1.3180420398712158, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09107322990894318, + "rewards/margins": 0.027988741174340248, + "rewards/rejected": -0.11906196922063828, + "step": 3485 + }, + { + "epoch": 0.9816468602770551, + "grad_norm": 0.65234375, + "learning_rate": 5.091673152459731e-09, + "log_odds_chosen": -0.03466014191508293, + "log_odds_ratio": -0.806627094745636, + "logits/chosen": 0.2747122049331665, + "logits/rejected": 0.07116059213876724, + "logps/chosen": -0.9941873550415039, + "logps/rejected": -1.009939432144165, + "loss": 1.4112, + "nll_loss": 1.218632459640503, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09941873699426651, + "rewards/margins": 0.0015752071049064398, + "rewards/rejected": -0.10099394619464874, + "step": 3490 + }, + { + "epoch": 0.9830532311370508, + "grad_norm": 0.6953125, + "learning_rate": 4.338685008640209e-09, + "log_odds_chosen": 0.4317692220211029, + "log_odds_ratio": -0.5760183334350586, + "logits/chosen": -0.013361548073589802, + "logits/rejected": -0.12007039785385132, + "logps/chosen": -0.8380535840988159, + "logps/rejected": -1.0656559467315674, + "loss": 1.3201, + "nll_loss": 1.3138437271118164, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08380536735057831, + "rewards/margins": 0.022760232910513878, + "rewards/rejected": -0.10656560957431793, + "step": 3495 + }, + { + "epoch": 0.9844596019970466, + "grad_norm": 0.74609375, + "learning_rate": 3.6458690867208214e-09, + "log_odds_chosen": 0.28699353337287903, + "log_odds_ratio": -0.6248040795326233, + "logits/chosen": 0.18887999653816223, + "logits/rejected": 0.14614346623420715, + "logps/chosen": -0.7974966168403625, + "logps/rejected": -0.9402490854263306, + "loss": 1.265, + "nll_loss": 1.12100350856781, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07974965870380402, + "rewards/margins": 0.014275247231125832, + "rewards/rejected": -0.0940249115228653, + "step": 3500 + }, + { + "epoch": 0.9858659728570424, + "grad_norm": 0.5, + "learning_rate": 3.0132420910006233e-09, + "log_odds_chosen": 0.25048303604125977, + "log_odds_ratio": -0.6807008981704712, + "logits/chosen": 0.18030565977096558, + "logits/rejected": 0.07345929741859436, + "logps/chosen": -0.8522911071777344, + "logps/rejected": -0.9874947667121887, + "loss": 1.2869, + "nll_loss": 1.2538975477218628, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.08522911369800568, + "rewards/margins": 0.013520359992980957, + "rewards/rejected": -0.09874947369098663, + "step": 3505 + }, + { + "epoch": 0.9872723437170382, + "grad_norm": 0.52734375, + "learning_rate": 2.440819274579509e-09, + "log_odds_chosen": 0.19837257266044617, + "log_odds_ratio": -0.7717543840408325, + "logits/chosen": 0.22246333956718445, + "logits/rejected": -0.24095554649829865, + "logps/chosen": -1.009987235069275, + "logps/rejected": -1.1807398796081543, + "loss": 1.3517, + "nll_loss": 1.3689885139465332, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10099872201681137, + "rewards/margins": 0.017075251787900925, + "rewards/rejected": -0.11807398498058319, + "step": 3510 + }, + { + "epoch": 0.9886787145770339, + "grad_norm": 0.435546875, + "learning_rate": 1.9286144389915605e-09, + "log_odds_chosen": 0.37249404191970825, + "log_odds_ratio": -0.6405671834945679, + "logits/chosen": 0.19467693567276, + "logits/rejected": 0.094179168343544, + "logps/chosen": -0.8736664056777954, + "logps/rejected": -1.1186236143112183, + "loss": 1.214, + "nll_loss": 1.075716257095337, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.08736663311719894, + "rewards/margins": 0.024495726451277733, + "rewards/rejected": -0.11186236143112183, + "step": 3515 + }, + { + "epoch": 0.9900850854370298, + "grad_norm": 0.890625, + "learning_rate": 1.4766399338697613e-09, + "log_odds_chosen": 0.3097439706325531, + "log_odds_ratio": -0.6437228918075562, + "logits/chosen": 0.12821228802204132, + "logits/rejected": -0.21044449508190155, + "logps/chosen": -0.9112231135368347, + "logps/rejected": -1.0949690341949463, + "loss": 1.3147, + "nll_loss": 1.3154476881027222, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09112232178449631, + "rewards/margins": 0.01837458834052086, + "rewards/rejected": -0.10949690639972687, + "step": 3520 + }, + { + "epoch": 0.9914914562970255, + "grad_norm": 0.6015625, + "learning_rate": 1.084906656650675e-09, + "log_odds_chosen": 0.3887336850166321, + "log_odds_ratio": -0.6401981115341187, + "logits/chosen": 0.07849763333797455, + "logits/rejected": 0.04411407560110092, + "logps/chosen": -0.7694699764251709, + "logps/rejected": -0.924534797668457, + "loss": 1.3069, + "nll_loss": 1.2313811779022217, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07694699615240097, + "rewards/margins": 0.015506486408412457, + "rewards/rejected": -0.0924534797668457, + "step": 3525 + }, + { + "epoch": 0.9928978271570214, + "grad_norm": 0.494140625, + "learning_rate": 7.534240523107694e-10, + "log_odds_chosen": -0.010244468227028847, + "log_odds_ratio": -0.8057150840759277, + "logits/chosen": -0.1170666366815567, + "logits/rejected": 0.07405931502580643, + "logps/chosen": -0.9780911207199097, + "logps/rejected": -0.910449206829071, + "loss": 1.29, + "nll_loss": 1.3354380130767822, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.09780912101268768, + "rewards/margins": -0.006764202378690243, + "rewards/rejected": -0.09104491770267487, + "step": 3530 + }, + { + "epoch": 0.9943041980170171, + "grad_norm": 0.37109375, + "learning_rate": 4.822001131377096e-10, + "log_odds_chosen": 0.20615753531455994, + "log_odds_ratio": -0.6634969115257263, + "logits/chosen": 0.2902953624725342, + "logits/rejected": 0.024714922532439232, + "logps/chosen": -0.9204000234603882, + "logps/rejected": -1.0575330257415771, + "loss": 1.2121, + "nll_loss": 1.058855652809143, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09204001724720001, + "rewards/margins": 0.013713277876377106, + "rewards/rejected": -0.10575328767299652, + "step": 3535 + }, + { + "epoch": 0.9957105688770128, + "grad_norm": 0.71484375, + "learning_rate": 2.7124137853967723e-10, + "log_odds_chosen": -0.03763968497514725, + "log_odds_ratio": -0.8463073968887329, + "logits/chosen": 0.18455204367637634, + "logits/rejected": -0.0659802109003067, + "logps/chosen": -0.7890104651451111, + "logps/rejected": -0.8145157098770142, + "loss": 1.3798, + "nll_loss": 1.4178717136383057, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.07890104502439499, + "rewards/margins": 0.0025505167432129383, + "rewards/rejected": -0.08145156502723694, + "step": 3540 + }, + { + "epoch": 0.9971169397370087, + "grad_norm": 0.75, + "learning_rate": 1.2055293488633102e-10, + "log_odds_chosen": -0.07134605944156647, + "log_odds_ratio": -0.8291767835617065, + "logits/chosen": 0.11052653938531876, + "logits/rejected": 0.15074488520622253, + "logps/chosen": -0.9489976763725281, + "logps/rejected": -0.9296343922615051, + "loss": 1.2384, + "nll_loss": 1.1190108060836792, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.09489977359771729, + "rewards/margins": -0.0019363273167982697, + "rewards/rejected": -0.09296345710754395, + "step": 3545 + }, + { + "epoch": 0.9985233105970044, + "grad_norm": 0.462890625, + "learning_rate": 3.013841538640483e-11, + "log_odds_chosen": 0.37353289127349854, + "log_odds_ratio": -0.6932848691940308, + "logits/chosen": -0.08854226768016815, + "logits/rejected": -0.006648472044616938, + "logps/chosen": -0.9258167147636414, + "logps/rejected": -1.1444346904754639, + "loss": 1.24, + "nll_loss": 1.0886918306350708, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09258167445659637, + "rewards/margins": 0.021861806511878967, + "rewards/rejected": -0.11444349586963654, + "step": 3550 + }, + { + "epoch": 0.9999296814570002, + "grad_norm": 1.21875, + "learning_rate": 0.0, + "log_odds_chosen": 0.21634745597839355, + "log_odds_ratio": -0.6914322972297668, + "logits/chosen": 0.18516218662261963, + "logits/rejected": 0.08085967600345612, + "logps/chosen": -0.9154261350631714, + "logps/rejected": -1.0730537176132202, + "loss": 1.2483, + "nll_loss": 1.1246975660324097, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.09154261648654938, + "rewards/margins": 0.01576276682317257, + "rewards/rejected": -0.1073053851723671, + "step": 3555 + }, + { + "epoch": 0.9999296814570002, + "eval_log_odds_chosen": 0.3767525255680084, + "eval_log_odds_ratio": -0.62775719165802, + "eval_logits/chosen": 0.20116207003593445, + "eval_logits/rejected": -0.0032512724865227938, + "eval_logps/chosen": -0.8883616924285889, + "eval_logps/rejected": -1.1382248401641846, + "eval_loss": 1.2883824110031128, + "eval_nll_loss": 1.2074565887451172, + "eval_rewards/accuracies": 0.6132478713989258, + "eval_rewards/chosen": -0.08883616328239441, + "eval_rewards/margins": 0.02498631551861763, + "eval_rewards/rejected": -0.11382248252630234, + "eval_runtime": 976.6863, + "eval_samples_per_second": 1.914, + "eval_steps_per_second": 0.479, + "step": 3555 + }, + { + "epoch": 0.9999296814570002, + "step": 3555, + "total_flos": 0.0, + "train_loss": 1.4252014341233652, + "train_runtime": 102484.8577, + "train_samples_per_second": 0.555, + "train_steps_per_second": 0.035 + } + ], + "logging_steps": 5, + "max_steps": 3555, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}