diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,21516 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 3000, + "global_step": 15284, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.270111183780249e-09, + "logits/chosen": -3.2917370796203613, + "logits/rejected": -3.2796809673309326, + "logps/chosen": -336.192626953125, + "logps/rejected": -310.9856872558594, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 3.270111183780249e-08, + "logits/chosen": -3.1285088062286377, + "logits/rejected": -3.18198299407959, + "logps/chosen": -315.2669677734375, + "logps/rejected": -272.7064514160156, + "loss": 0.693, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0010706963948905468, + "rewards/margins": 0.00015845504822209477, + "rewards/rejected": 0.0009122414048761129, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 6.540222367560497e-08, + "logits/chosen": -3.0872440338134766, + "logits/rejected": -3.0241026878356934, + "logps/chosen": -414.28765869140625, + "logps/rejected": -233.66372680664062, + "loss": 0.6929, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00017881770327221602, + "rewards/margins": -5.1920022087870166e-05, + "rewards/rejected": 0.00023073769989423454, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 9.810333551340746e-08, + "logits/chosen": -2.8971962928771973, + "logits/rejected": -2.972332000732422, + "logps/chosen": -312.8296203613281, + "logps/rejected": -326.06396484375, + "loss": 0.6927, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0005883770063519478, + "rewards/margins": 0.0005195619305595756, + "rewards/rejected": 6.88152140355669e-05, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.3080444735120995e-07, + "logits/chosen": -2.9546608924865723, + "logits/rejected": -2.884795665740967, + "logps/chosen": -282.99395751953125, + "logps/rejected": -250.0284423828125, + "loss": 0.6936, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.00084640400018543, + "rewards/margins": 8.266828808700666e-05, + "rewards/rejected": -0.0009290723246522248, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.6350555918901243e-07, + "logits/chosen": -3.0107474327087402, + "logits/rejected": -3.1580097675323486, + "logps/chosen": -437.7754821777344, + "logps/rejected": -369.67620849609375, + "loss": 0.6932, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 9.289549052482471e-05, + "rewards/margins": 0.0005522651481442153, + "rewards/rejected": -0.0004593696794472635, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.9620667102681492e-07, + "logits/chosen": -2.87144136428833, + "logits/rejected": -2.7554101943969727, + "logps/chosen": -183.0247802734375, + "logps/rejected": -211.3839569091797, + "loss": 0.6921, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0003684524563141167, + "rewards/margins": 0.001609438331797719, + "rewards/rejected": -0.0012409858172759414, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 2.289077828646174e-07, + "logits/chosen": -3.0719211101531982, + "logits/rejected": -3.0397117137908936, + "logps/chosen": -262.9385070800781, + "logps/rejected": -270.0909423828125, + "loss": 0.692, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00010034188017016277, + "rewards/margins": 0.003150531556457281, + "rewards/rejected": -0.003050189930945635, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 2.616088947024199e-07, + "logits/chosen": -2.995681047439575, + "logits/rejected": -2.925076961517334, + "logps/chosen": -215.14419555664062, + "logps/rejected": -248.43911743164062, + "loss": 0.6937, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0004575929488055408, + "rewards/margins": -0.0010273593943566084, + "rewards/rejected": 0.0005697665037587285, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 2.943100065402224e-07, + "logits/chosen": -3.0288403034210205, + "logits/rejected": -3.10252046585083, + "logps/chosen": -317.17596435546875, + "logps/rejected": -326.1206359863281, + "loss": 0.6932, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0005004165577702224, + "rewards/margins": -0.0007878703763708472, + "rewards/rejected": 0.00028745370218530297, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 3.2701111837802487e-07, + "logits/chosen": -3.028860330581665, + "logits/rejected": -3.1097171306610107, + "logps/chosen": -290.9003601074219, + "logps/rejected": -242.79989624023438, + "loss": 0.6933, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 4.07161314797122e-05, + "rewards/margins": 0.00024331473105121404, + "rewards/rejected": -0.00020259855955373496, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3.5971223021582736e-07, + "logits/chosen": -3.10758900642395, + "logits/rejected": -3.1759090423583984, + "logps/chosen": -319.63482666015625, + "logps/rejected": -252.9918975830078, + "loss": 0.6937, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.00032907718559727073, + "rewards/margins": -0.0014111388009041548, + "rewards/rejected": 0.0010820617899298668, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 3.9241334205362984e-07, + "logits/chosen": -2.9805614948272705, + "logits/rejected": -3.066190242767334, + "logps/chosen": -233.74319458007812, + "logps/rejected": -261.84765625, + "loss": 0.6925, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0013232993660494685, + "rewards/margins": 0.0016237791860476136, + "rewards/rejected": -0.0003004799073096365, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.251144538914324e-07, + "logits/chosen": -2.8722615242004395, + "logits/rejected": -2.882192373275757, + "logps/chosen": -296.33563232421875, + "logps/rejected": -232.06124877929688, + "loss": 0.6932, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.001974537968635559, + "rewards/margins": 0.0014208784559741616, + "rewards/rejected": 0.0005536594544537365, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.578155657292348e-07, + "logits/chosen": -3.083569288253784, + "logits/rejected": -3.181466579437256, + "logps/chosen": -403.43634033203125, + "logps/rejected": -419.53668212890625, + "loss": 0.6941, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0006010475335642695, + "rewards/margins": -0.0011253413977101445, + "rewards/rejected": 0.001726388931274414, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.905166775670374e-07, + "logits/chosen": -3.0899462699890137, + "logits/rejected": -3.0028645992279053, + "logps/chosen": -336.4407653808594, + "logps/rejected": -242.031005859375, + "loss": 0.6935, + "rewards/accuracies": 0.5, + "rewards/chosen": -8.147531480062753e-05, + "rewards/margins": -0.0008282337221316993, + "rewards/rejected": 0.0007467584800906479, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 5.232177894048398e-07, + "logits/chosen": -2.966787338256836, + "logits/rejected": -2.9958865642547607, + "logps/chosen": -369.6310119628906, + "logps/rejected": -245.83041381835938, + "loss": 0.6936, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0004910840652883053, + "rewards/margins": -0.0005090809427201748, + "rewards/rejected": 1.799676101654768e-05, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 5.559189012426422e-07, + "logits/chosen": -2.934314250946045, + "logits/rejected": -2.9343628883361816, + "logps/chosen": -267.1063232421875, + "logps/rejected": -188.83877563476562, + "loss": 0.6926, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.0004375514108687639, + "rewards/margins": 0.0008714391151443124, + "rewards/rejected": -0.00043388744234107435, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 5.886200130804448e-07, + "logits/chosen": -3.018904209136963, + "logits/rejected": -3.013699531555176, + "logps/chosen": -162.3592987060547, + "logps/rejected": -218.61184692382812, + "loss": 0.693, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.00043491655378602445, + "rewards/margins": -0.00012707136920653284, + "rewards/rejected": 0.0005619878647848964, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 6.213211249182473e-07, + "logits/chosen": -3.078159809112549, + "logits/rejected": -2.9432177543640137, + "logps/chosen": -289.8063049316406, + "logps/rejected": -177.1880340576172, + "loss": 0.6934, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.00021298688079696149, + "rewards/margins": 0.00023004722606856376, + "rewards/rejected": -1.7060223399312235e-05, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 6.540222367560497e-07, + "logits/chosen": -2.8321168422698975, + "logits/rejected": -2.8416965007781982, + "logps/chosen": -281.98187255859375, + "logps/rejected": -315.5621643066406, + "loss": 0.6929, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.002115556737408042, + "rewards/margins": 0.0005559118581004441, + "rewards/rejected": 0.0015596445882692933, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 6.867233485938523e-07, + "logits/chosen": -3.202807664871216, + "logits/rejected": -3.137254238128662, + "logps/chosen": -376.1246643066406, + "logps/rejected": -399.75103759765625, + "loss": 0.6945, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.0005506344023160636, + "rewards/margins": -0.004687961656600237, + "rewards/rejected": 0.004137327428907156, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 7.194244604316547e-07, + "logits/chosen": -2.784348726272583, + "logits/rejected": -2.907197952270508, + "logps/chosen": -316.67193603515625, + "logps/rejected": -334.51568603515625, + "loss": 0.6933, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.0006333760684356093, + "rewards/margins": -0.0016217123484238982, + "rewards/rejected": 0.002255088882520795, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 7.521255722694571e-07, + "logits/chosen": -2.9172425270080566, + "logits/rejected": -2.982654094696045, + "logps/chosen": -246.4900665283203, + "logps/rejected": -196.82415771484375, + "loss": 0.693, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.002549818018451333, + "rewards/margins": 0.0003842850273940712, + "rewards/rejected": 0.0021655329037457705, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 7.848266841072597e-07, + "logits/chosen": -2.964061737060547, + "logits/rejected": -2.9448022842407227, + "logps/chosen": -244.23294067382812, + "logps/rejected": -195.120361328125, + "loss": 0.6932, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.002195965964347124, + "rewards/margins": 0.0011466979049146175, + "rewards/rejected": 0.001049267710186541, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 8.175277959450622e-07, + "logits/chosen": -2.729448080062866, + "logits/rejected": -2.699052333831787, + "logps/chosen": -286.503662109375, + "logps/rejected": -290.0101623535156, + "loss": 0.6926, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.002826615469530225, + "rewards/margins": 0.00432937266305089, + "rewards/rejected": -0.0015027571935206652, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 8.502289077828648e-07, + "logits/chosen": -2.7071239948272705, + "logits/rejected": -2.8652503490448, + "logps/chosen": -195.9088897705078, + "logps/rejected": -269.6458435058594, + "loss": 0.6928, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.0018324966076761484, + "rewards/margins": 0.0005571646615862846, + "rewards/rejected": 0.0012753319460898638, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 8.829300196206672e-07, + "logits/chosen": -2.9909119606018066, + "logits/rejected": -2.9201207160949707, + "logps/chosen": -193.13424682617188, + "logps/rejected": -225.7820281982422, + "loss": 0.6929, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0022729868069291115, + "rewards/margins": 0.0003797583922278136, + "rewards/rejected": 0.001893228618428111, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 9.156311314584696e-07, + "logits/chosen": -2.9982759952545166, + "logits/rejected": -2.945038318634033, + "logps/chosen": -219.11135864257812, + "logps/rejected": -207.0333709716797, + "loss": 0.6929, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0009125813958235085, + "rewards/margins": -0.0010643262648954988, + "rewards/rejected": 0.001976907718926668, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 9.483322432962722e-07, + "logits/chosen": -2.9865849018096924, + "logits/rejected": -2.938310146331787, + "logps/chosen": -243.83993530273438, + "logps/rejected": -197.4394989013672, + "loss": 0.6931, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.001292440458200872, + "rewards/margins": -0.000284919748082757, + "rewards/rejected": 0.001577360206283629, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 9.810333551340747e-07, + "logits/chosen": -2.7662692070007324, + "logits/rejected": -2.9064176082611084, + "logps/chosen": -311.7242431640625, + "logps/rejected": -374.3269958496094, + "loss": 0.693, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.004182077944278717, + "rewards/margins": 0.00121305079665035, + "rewards/rejected": 0.002969027729704976, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 1.0137344669718771e-06, + "logits/chosen": -2.9435641765594482, + "logits/rejected": -2.9995861053466797, + "logps/chosen": -256.0497741699219, + "logps/rejected": -298.29425048828125, + "loss": 0.6931, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": 0.00030184269417077303, + "rewards/margins": -0.0029051245655864477, + "rewards/rejected": 0.0032069676090031862, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 1.0464355788096796e-06, + "logits/chosen": -2.8672854900360107, + "logits/rejected": -2.7651145458221436, + "logps/chosen": -325.3626708984375, + "logps/rejected": -317.30511474609375, + "loss": 0.6929, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0017262229230254889, + "rewards/margins": 0.0012272644089534879, + "rewards/rejected": 0.0004989585722796619, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 1.079136690647482e-06, + "logits/chosen": -3.002739429473877, + "logits/rejected": -2.9137415885925293, + "logps/chosen": -290.84130859375, + "logps/rejected": -267.84649658203125, + "loss": 0.6928, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.0031630732119083405, + "rewards/margins": 0.0001625925360713154, + "rewards/rejected": 0.00300048035569489, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 1.1118378024852844e-06, + "logits/chosen": -3.0424742698669434, + "logits/rejected": -3.0423471927642822, + "logps/chosen": -244.46572875976562, + "logps/rejected": -247.05697631835938, + "loss": 0.6925, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0033019818365573883, + "rewards/margins": 0.00133128825109452, + "rewards/rejected": 0.00197069370187819, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 1.144538914323087e-06, + "logits/chosen": -3.0653843879699707, + "logits/rejected": -2.9315295219421387, + "logps/chosen": -260.92156982421875, + "logps/rejected": -227.8271026611328, + "loss": 0.6914, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.004591038916260004, + "rewards/margins": 0.00426831841468811, + "rewards/rejected": 0.00032272053067572415, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 1.1772400261608895e-06, + "logits/chosen": -2.91213059425354, + "logits/rejected": -2.748988628387451, + "logps/chosen": -259.0718078613281, + "logps/rejected": -212.82266235351562, + "loss": 0.6923, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0024882450234144926, + "rewards/margins": 0.0006938829901628196, + "rewards/rejected": 0.001794362091459334, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 1.2099411379986922e-06, + "logits/chosen": -2.947958469390869, + "logits/rejected": -3.1207051277160645, + "logps/chosen": -224.5135955810547, + "logps/rejected": -237.42453002929688, + "loss": 0.6929, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.003422154113650322, + "rewards/margins": -0.0007809843518771231, + "rewards/rejected": 0.004203137941658497, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 1.2426422498364946e-06, + "logits/chosen": -2.9310691356658936, + "logits/rejected": -3.126197338104248, + "logps/chosen": -310.5855712890625, + "logps/rejected": -207.7636260986328, + "loss": 0.6915, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.006997501011937857, + "rewards/margins": 0.004127983469516039, + "rewards/rejected": 0.0028695182409137487, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 1.2753433616742968e-06, + "logits/chosen": -3.098378896713257, + "logits/rejected": -3.0954604148864746, + "logps/chosen": -311.447021484375, + "logps/rejected": -359.23431396484375, + "loss": 0.6923, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.007996728643774986, + "rewards/margins": 0.002210235456004739, + "rewards/rejected": 0.005786492954939604, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 1.3080444735120995e-06, + "logits/chosen": -2.9606876373291016, + "logits/rejected": -2.8155083656311035, + "logps/chosen": -328.29986572265625, + "logps/rejected": -201.92807006835938, + "loss": 0.6929, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.006145277991890907, + "rewards/margins": 0.0019246510928496718, + "rewards/rejected": 0.004220626782625914, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 1.3407455853499021e-06, + "logits/chosen": -3.044910430908203, + "logits/rejected": -2.8663291931152344, + "logps/chosen": -228.4242706298828, + "logps/rejected": -193.3241729736328, + "loss": 0.6918, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.00872000027447939, + "rewards/margins": 0.003681318135932088, + "rewards/rejected": 0.005038681905716658, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 1.3734466971877046e-06, + "logits/chosen": -3.1175425052642822, + "logits/rejected": -3.088022470474243, + "logps/chosen": -310.7445983886719, + "logps/rejected": -404.0841064453125, + "loss": 0.6933, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00814331416040659, + "rewards/margins": -0.0008761845529079437, + "rewards/rejected": 0.009019499644637108, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 1.406147809025507e-06, + "logits/chosen": -2.90427565574646, + "logits/rejected": -2.9754254817962646, + "logps/chosen": -296.58502197265625, + "logps/rejected": -254.1155548095703, + "loss": 0.6912, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.009763370268046856, + "rewards/margins": 0.005822093226015568, + "rewards/rejected": 0.0039412761107087135, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 1.4388489208633094e-06, + "logits/chosen": -2.9745967388153076, + "logits/rejected": -3.056553363800049, + "logps/chosen": -247.5413360595703, + "logps/rejected": -257.79071044921875, + "loss": 0.6929, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.007196736987680197, + "rewards/margins": -0.0012092979159206152, + "rewards/rejected": 0.008406035602092743, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 1.471550032701112e-06, + "logits/chosen": -2.9960484504699707, + "logits/rejected": -3.1356592178344727, + "logps/chosen": -264.06732177734375, + "logps/rejected": -300.70721435546875, + "loss": 0.6928, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.010084071196615696, + "rewards/margins": -0.0002762650838121772, + "rewards/rejected": 0.010360335931181908, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 1.5042511445389143e-06, + "logits/chosen": -2.8759608268737793, + "logits/rejected": -2.946329116821289, + "logps/chosen": -257.097412109375, + "logps/rejected": -230.97933959960938, + "loss": 0.691, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.012902451679110527, + "rewards/margins": 0.006056067533791065, + "rewards/rejected": 0.006846384145319462, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 1.536952256376717e-06, + "logits/chosen": -2.8737595081329346, + "logits/rejected": -2.858184337615967, + "logps/chosen": -294.66680908203125, + "logps/rejected": -299.099365234375, + "loss": 0.6911, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.010799551382660866, + "rewards/margins": 0.002541209105402231, + "rewards/rejected": 0.008258342742919922, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 1.5696533682145194e-06, + "logits/chosen": -3.0661721229553223, + "logits/rejected": -3.044696092605591, + "logps/chosen": -355.2105407714844, + "logps/rejected": -342.7800598144531, + "loss": 0.6922, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.011583315208554268, + "rewards/margins": 0.0039892070926725864, + "rewards/rejected": 0.007594108581542969, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 1.602354480052322e-06, + "logits/chosen": -2.8834774494171143, + "logits/rejected": -2.9143149852752686, + "logps/chosen": -408.67523193359375, + "logps/rejected": -356.30718994140625, + "loss": 0.6921, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.010796618647873402, + "rewards/margins": 0.002751033054664731, + "rewards/rejected": 0.008045585826039314, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 1.6350555918901245e-06, + "logits/chosen": -2.9682867527008057, + "logits/rejected": -2.963850498199463, + "logps/chosen": -264.46136474609375, + "logps/rejected": -175.54135131835938, + "loss": 0.6913, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.011705334298312664, + "rewards/margins": 0.0035117301158607006, + "rewards/rejected": 0.008193603716790676, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 1.6677567037279269e-06, + "logits/chosen": -2.9862213134765625, + "logits/rejected": -2.9082834720611572, + "logps/chosen": -236.3311309814453, + "logps/rejected": -214.0072021484375, + "loss": 0.6924, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.012016872875392437, + "rewards/margins": 0.003492117626592517, + "rewards/rejected": 0.008524755015969276, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 1.7004578155657295e-06, + "logits/chosen": -2.89336895942688, + "logits/rejected": -2.966418504714966, + "logps/chosen": -427.30865478515625, + "logps/rejected": -328.8701477050781, + "loss": 0.691, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.011949967592954636, + "rewards/margins": 0.0047841123305261135, + "rewards/rejected": 0.007165855262428522, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 1.7331589274035318e-06, + "logits/chosen": -3.179882287979126, + "logits/rejected": -3.214411497116089, + "logps/chosen": -300.58074951171875, + "logps/rejected": -305.2362976074219, + "loss": 0.6913, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.011264842934906483, + "rewards/margins": 0.0016359112923964858, + "rewards/rejected": 0.009628929197788239, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 1.7658600392413344e-06, + "logits/chosen": -3.1450603008270264, + "logits/rejected": -3.1157948970794678, + "logps/chosen": -300.3776550292969, + "logps/rejected": -366.01019287109375, + "loss": 0.6917, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.013677257113158703, + "rewards/margins": 0.0008588259806856513, + "rewards/rejected": 0.012818431481719017, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 1.7985611510791368e-06, + "logits/chosen": -2.9594764709472656, + "logits/rejected": -2.9382336139678955, + "logps/chosen": -255.61941528320312, + "logps/rejected": -195.72076416015625, + "loss": 0.6907, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.012641459703445435, + "rewards/margins": 0.004340741783380508, + "rewards/rejected": 0.008300717920064926, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 1.8312622629169393e-06, + "logits/chosen": -2.830198287963867, + "logits/rejected": -2.8257110118865967, + "logps/chosen": -336.34051513671875, + "logps/rejected": -330.2576904296875, + "loss": 0.6891, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.017110275104641914, + "rewards/margins": 0.007845441810786724, + "rewards/rejected": 0.009264834225177765, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 1.8639633747547417e-06, + "logits/chosen": -2.9448599815368652, + "logits/rejected": -3.0247905254364014, + "logps/chosen": -265.4458312988281, + "logps/rejected": -292.64239501953125, + "loss": 0.6907, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.014689937233924866, + "rewards/margins": 0.0031159906648099422, + "rewards/rejected": 0.011573946103453636, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 1.8966644865925443e-06, + "logits/chosen": -2.987705707550049, + "logits/rejected": -3.077376365661621, + "logps/chosen": -253.3921356201172, + "logps/rejected": -312.4097900390625, + "loss": 0.6917, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.016644762828946114, + "rewards/margins": 0.0007716016261838377, + "rewards/rejected": 0.01587316021323204, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 1.9293655984303466e-06, + "logits/chosen": -3.165412425994873, + "logits/rejected": -2.9510579109191895, + "logps/chosen": -256.69708251953125, + "logps/rejected": -215.0804901123047, + "loss": 0.6899, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.017473822459578514, + "rewards/margins": 0.008105012588202953, + "rewards/rejected": 0.009368810802698135, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 1.9620667102681494e-06, + "logits/chosen": -3.047368288040161, + "logits/rejected": -3.020981788635254, + "logps/chosen": -341.5492248535156, + "logps/rejected": -232.7269744873047, + "loss": 0.692, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.02070160023868084, + "rewards/margins": 0.008108451962471008, + "rewards/rejected": 0.01259315200150013, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 1.994767822105952e-06, + "logits/chosen": -2.967050075531006, + "logits/rejected": -2.8431944847106934, + "logps/chosen": -286.22247314453125, + "logps/rejected": -338.3892517089844, + "loss": 0.6911, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.018522150814533234, + "rewards/margins": 0.0019176944624632597, + "rewards/rejected": 0.016604457050561905, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 2.0274689339437543e-06, + "logits/chosen": -2.938920736312866, + "logits/rejected": -2.9256796836853027, + "logps/chosen": -335.93853759765625, + "logps/rejected": -350.2706604003906, + "loss": 0.6896, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.01635252870619297, + "rewards/margins": 0.002685768064111471, + "rewards/rejected": 0.013666761107742786, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 2.0601700457815567e-06, + "logits/chosen": -2.9339334964752197, + "logits/rejected": -3.1059889793395996, + "logps/chosen": -275.78179931640625, + "logps/rejected": -298.95159912109375, + "loss": 0.691, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.02482684887945652, + "rewards/margins": 0.006859474815428257, + "rewards/rejected": 0.017967374995350838, + "step": 630 + }, + { + "epoch": 0.04, + "learning_rate": 2.092871157619359e-06, + "logits/chosen": -3.0149481296539307, + "logits/rejected": -2.833794116973877, + "logps/chosen": -318.1592712402344, + "logps/rejected": -182.3878631591797, + "loss": 0.6875, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.023765167221426964, + "rewards/margins": 0.015172283165156841, + "rewards/rejected": 0.008592883124947548, + "step": 640 + }, + { + "epoch": 0.04, + "learning_rate": 2.1255722694571616e-06, + "logits/chosen": -3.138582944869995, + "logits/rejected": -3.1221721172332764, + "logps/chosen": -320.1607971191406, + "logps/rejected": -356.0599670410156, + "loss": 0.6914, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.024007441475987434, + "rewards/margins": 0.0019604742992669344, + "rewards/rejected": 0.02204696647822857, + "step": 650 + }, + { + "epoch": 0.04, + "learning_rate": 2.158273381294964e-06, + "logits/chosen": -3.2076401710510254, + "logits/rejected": -3.1157641410827637, + "logps/chosen": -295.0274963378906, + "logps/rejected": -313.6297912597656, + "loss": 0.6884, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.02200487069785595, + "rewards/margins": 0.0068700313568115234, + "rewards/rejected": 0.015134838409721851, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 2.190974493132767e-06, + "logits/chosen": -3.0300211906433105, + "logits/rejected": -2.9287357330322266, + "logps/chosen": -321.67987060546875, + "logps/rejected": -299.279541015625, + "loss": 0.6884, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.02429506927728653, + "rewards/margins": 0.011312566697597504, + "rewards/rejected": 0.012982504442334175, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 2.223675604970569e-06, + "logits/chosen": -3.0303335189819336, + "logits/rejected": -3.076803684234619, + "logps/chosen": -443.90203857421875, + "logps/rejected": -369.6851501464844, + "loss": 0.6865, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0327363982796669, + "rewards/margins": 0.011801688000559807, + "rewards/rejected": 0.020934712141752243, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 2.2563767168083718e-06, + "logits/chosen": -3.053703784942627, + "logits/rejected": -3.116055727005005, + "logps/chosen": -245.5877227783203, + "logps/rejected": -265.5740966796875, + "loss": 0.6874, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.02502378821372986, + "rewards/margins": 0.010461007244884968, + "rewards/rejected": 0.014562780037522316, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 2.289077828646174e-06, + "logits/chosen": -3.0341150760650635, + "logits/rejected": -2.9276299476623535, + "logps/chosen": -271.6872253417969, + "logps/rejected": -285.0089416503906, + "loss": 0.689, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.025852534919977188, + "rewards/margins": 0.0027749096043407917, + "rewards/rejected": 0.023077625781297684, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 2.3217789404839766e-06, + "logits/chosen": -2.939833879470825, + "logits/rejected": -2.881133556365967, + "logps/chosen": -283.1850280761719, + "logps/rejected": -267.50799560546875, + "loss": 0.6891, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.03299666941165924, + "rewards/margins": 0.004511397797614336, + "rewards/rejected": 0.028485268354415894, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 2.354480052321779e-06, + "logits/chosen": -2.884310483932495, + "logits/rejected": -2.8266072273254395, + "logps/chosen": -220.0033721923828, + "logps/rejected": -203.66299438476562, + "loss": 0.6868, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.03351835533976555, + "rewards/margins": 0.012706073932349682, + "rewards/rejected": 0.020812280476093292, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 2.3871811641595815e-06, + "logits/chosen": -2.9537875652313232, + "logits/rejected": -3.1177542209625244, + "logps/chosen": -272.7933044433594, + "logps/rejected": -263.432861328125, + "loss": 0.6881, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.03301476314663887, + "rewards/margins": 0.009064620360732079, + "rewards/rejected": 0.023950140923261642, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 2.4198822759973843e-06, + "logits/chosen": -2.9899003505706787, + "logits/rejected": -3.02457332611084, + "logps/chosen": -252.21902465820312, + "logps/rejected": -266.54986572265625, + "loss": 0.6882, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.034833673387765884, + "rewards/margins": 0.009695864282548428, + "rewards/rejected": 0.025137806311249733, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 2.4525833878351864e-06, + "logits/chosen": -3.0314903259277344, + "logits/rejected": -3.094717502593994, + "logps/chosen": -344.877197265625, + "logps/rejected": -282.93853759765625, + "loss": 0.6886, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.040870435535907745, + "rewards/margins": 0.010108423419296741, + "rewards/rejected": 0.03076201304793358, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 2.4852844996729892e-06, + "logits/chosen": -3.1002516746520996, + "logits/rejected": -3.1319639682769775, + "logps/chosen": -328.6524963378906, + "logps/rejected": -280.7578430175781, + "loss": 0.6884, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03915861248970032, + "rewards/margins": 0.011181964538991451, + "rewards/rejected": 0.02797664701938629, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 2.5179856115107916e-06, + "logits/chosen": -2.9437851905822754, + "logits/rejected": -2.991842746734619, + "logps/chosen": -392.2171936035156, + "logps/rejected": -289.81976318359375, + "loss": 0.6837, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.047296278178691864, + "rewards/margins": 0.01911090686917305, + "rewards/rejected": 0.028185371309518814, + "step": 770 + }, + { + "epoch": 0.05, + "learning_rate": 2.5506867233485937e-06, + "logits/chosen": -2.99171781539917, + "logits/rejected": -2.9441757202148438, + "logps/chosen": -385.4003601074219, + "logps/rejected": -305.50103759765625, + "loss": 0.6866, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.035985272377729416, + "rewards/margins": 0.02009792998433113, + "rewards/rejected": 0.015887338668107986, + "step": 780 + }, + { + "epoch": 0.05, + "learning_rate": 2.5833878351863965e-06, + "logits/chosen": -3.073352813720703, + "logits/rejected": -3.058485507965088, + "logps/chosen": -228.98611450195312, + "logps/rejected": -188.19522094726562, + "loss": 0.6887, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.03437228873372078, + "rewards/margins": 0.005837684962898493, + "rewards/rejected": 0.028534606099128723, + "step": 790 + }, + { + "epoch": 0.05, + "learning_rate": 2.616088947024199e-06, + "logits/chosen": -3.0605218410491943, + "logits/rejected": -3.0869102478027344, + "logps/chosen": -414.29150390625, + "logps/rejected": -299.36578369140625, + "loss": 0.6888, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.04800860956311226, + "rewards/margins": 0.02571099065244198, + "rewards/rejected": 0.02229761704802513, + "step": 800 + }, + { + "epoch": 0.05, + "learning_rate": 2.6487900588620014e-06, + "logits/chosen": -3.1698508262634277, + "logits/rejected": -3.162097454071045, + "logps/chosen": -293.2779846191406, + "logps/rejected": -304.637451171875, + "loss": 0.6872, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.04854557663202286, + "rewards/margins": 0.015368210151791573, + "rewards/rejected": 0.033177368342876434, + "step": 810 + }, + { + "epoch": 0.05, + "learning_rate": 2.6814911706998042e-06, + "logits/chosen": -3.058969020843506, + "logits/rejected": -2.893124580383301, + "logps/chosen": -290.9662170410156, + "logps/rejected": -267.09930419921875, + "loss": 0.6911, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.03650563582777977, + "rewards/margins": 0.011088610626757145, + "rewards/rejected": 0.02541702426970005, + "step": 820 + }, + { + "epoch": 0.05, + "learning_rate": 2.7141922825376067e-06, + "logits/chosen": -3.001389265060425, + "logits/rejected": -2.7879388332366943, + "logps/chosen": -376.96087646484375, + "logps/rejected": -301.4887390136719, + "loss": 0.6913, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.042007606476545334, + "rewards/margins": 0.006915568374097347, + "rewards/rejected": 0.035092033445835114, + "step": 830 + }, + { + "epoch": 0.05, + "learning_rate": 2.746893394375409e-06, + "logits/chosen": -2.858647108078003, + "logits/rejected": -2.8301475048065186, + "logps/chosen": -421.82763671875, + "logps/rejected": -453.30963134765625, + "loss": 0.6887, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.04595818370580673, + "rewards/margins": 0.007595541886985302, + "rewards/rejected": 0.038362644612789154, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 2.779594506213211e-06, + "logits/chosen": -3.091282844543457, + "logits/rejected": -2.925567865371704, + "logps/chosen": -290.15289306640625, + "logps/rejected": -367.5709533691406, + "loss": 0.6822, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.04890549182891846, + "rewards/margins": 0.02194773219525814, + "rewards/rejected": 0.026957765221595764, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 2.812295618051014e-06, + "logits/chosen": -2.9579873085021973, + "logits/rejected": -3.16754412651062, + "logps/chosen": -252.59719848632812, + "logps/rejected": -273.73052978515625, + "loss": 0.6858, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.041400037705898285, + "rewards/margins": 0.010079341940581799, + "rewards/rejected": 0.03132069483399391, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 2.8449967298888164e-06, + "logits/chosen": -3.0155527591705322, + "logits/rejected": -3.0040760040283203, + "logps/chosen": -336.65216064453125, + "logps/rejected": -299.76556396484375, + "loss": 0.6842, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.051180075854063034, + "rewards/margins": 0.01540394313633442, + "rewards/rejected": 0.035776130855083466, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 2.877697841726619e-06, + "logits/chosen": -3.0651357173919678, + "logits/rejected": -3.054041624069214, + "logps/chosen": -378.2223205566406, + "logps/rejected": -358.79962158203125, + "loss": 0.6847, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.06589755415916443, + "rewards/margins": 0.025782927870750427, + "rewards/rejected": 0.0401146337389946, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 2.9103989535644217e-06, + "logits/chosen": -2.899027109146118, + "logits/rejected": -2.873547315597534, + "logps/chosen": -273.908447265625, + "logps/rejected": -287.2727355957031, + "loss": 0.6822, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.05353100225329399, + "rewards/margins": 0.02129439450800419, + "rewards/rejected": 0.032236598432064056, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 2.943100065402224e-06, + "logits/chosen": -2.897641658782959, + "logits/rejected": -2.8532097339630127, + "logps/chosen": -222.2373504638672, + "logps/rejected": -259.68011474609375, + "loss": 0.6869, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.036358319222927094, + "rewards/margins": 0.0070862360298633575, + "rewards/rejected": 0.029272085055708885, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 2.9758011772400266e-06, + "logits/chosen": -3.010063648223877, + "logits/rejected": -2.952120542526245, + "logps/chosen": -242.9051971435547, + "logps/rejected": -153.533447265625, + "loss": 0.6789, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.05027133971452713, + "rewards/margins": 0.031339433044195175, + "rewards/rejected": 0.018931902945041656, + "step": 910 + }, + { + "epoch": 0.06, + "learning_rate": 3.0085022890778286e-06, + "logits/chosen": -2.9196720123291016, + "logits/rejected": -2.8785929679870605, + "logps/chosen": -285.1444396972656, + "logps/rejected": -218.13412475585938, + "loss": 0.6793, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.05185414478182793, + "rewards/margins": 0.021058565005660057, + "rewards/rejected": 0.030795583501458168, + "step": 920 + }, + { + "epoch": 0.06, + "learning_rate": 3.0412034009156314e-06, + "logits/chosen": -2.8984265327453613, + "logits/rejected": -2.9644322395324707, + "logps/chosen": -315.30517578125, + "logps/rejected": -333.4654846191406, + "loss": 0.6813, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.05165805667638779, + "rewards/margins": 0.022742148488759995, + "rewards/rejected": 0.028915906324982643, + "step": 930 + }, + { + "epoch": 0.06, + "learning_rate": 3.073904512753434e-06, + "logits/chosen": -3.0516133308410645, + "logits/rejected": -3.0722031593322754, + "logps/chosen": -376.10137939453125, + "logps/rejected": -313.91754150390625, + "loss": 0.6813, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.055447887629270554, + "rewards/margins": 0.014918354339897633, + "rewards/rejected": 0.040529537945985794, + "step": 940 + }, + { + "epoch": 0.06, + "learning_rate": 3.1066056245912363e-06, + "logits/chosen": -2.8814139366149902, + "logits/rejected": -2.923527479171753, + "logps/chosen": -336.75439453125, + "logps/rejected": -280.9449768066406, + "loss": 0.6861, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.04855465143918991, + "rewards/margins": 0.013142083771526814, + "rewards/rejected": 0.03541256859898567, + "step": 950 + }, + { + "epoch": 0.06, + "learning_rate": 3.1393067364290387e-06, + "logits/chosen": -2.878671169281006, + "logits/rejected": -2.9011781215667725, + "logps/chosen": -237.6183624267578, + "logps/rejected": -273.76824951171875, + "loss": 0.6927, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03925769031047821, + "rewards/margins": 0.007788626942783594, + "rewards/rejected": 0.031469058245420456, + "step": 960 + }, + { + "epoch": 0.06, + "learning_rate": 3.1720078482668416e-06, + "logits/chosen": -3.128509521484375, + "logits/rejected": -3.1664719581604004, + "logps/chosen": -334.951904296875, + "logps/rejected": -304.9898681640625, + "loss": 0.675, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.0688043162226677, + "rewards/margins": 0.044605594128370285, + "rewards/rejected": 0.02419872209429741, + "step": 970 + }, + { + "epoch": 0.06, + "learning_rate": 3.204708960104644e-06, + "logits/chosen": -3.01989483833313, + "logits/rejected": -2.9897732734680176, + "logps/chosen": -215.50637817382812, + "logps/rejected": -181.29798889160156, + "loss": 0.6831, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.04856652021408081, + "rewards/margins": 0.023396335542201996, + "rewards/rejected": 0.025170186534523964, + "step": 980 + }, + { + "epoch": 0.06, + "learning_rate": 3.237410071942446e-06, + "logits/chosen": -3.1784377098083496, + "logits/rejected": -3.093890905380249, + "logps/chosen": -325.9986877441406, + "logps/rejected": -321.07208251953125, + "loss": 0.678, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.0696471557021141, + "rewards/margins": 0.027126217260956764, + "rewards/rejected": 0.04252093285322189, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 3.270111183780249e-06, + "logits/chosen": -2.994716167449951, + "logits/rejected": -3.0044682025909424, + "logps/chosen": -211.45425415039062, + "logps/rejected": -274.2388000488281, + "loss": 0.6852, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04428134113550186, + "rewards/margins": 0.02824358269572258, + "rewards/rejected": 0.016037756577134132, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 3.3028122956180513e-06, + "logits/chosen": -3.0379650592803955, + "logits/rejected": -2.964712142944336, + "logps/chosen": -318.2586364746094, + "logps/rejected": -241.7306671142578, + "loss": 0.6803, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.04623536020517349, + "rewards/margins": 0.026243770495057106, + "rewards/rejected": 0.019991587847471237, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 3.3355134074558538e-06, + "logits/chosen": -2.9493517875671387, + "logits/rejected": -2.9502549171447754, + "logps/chosen": -335.3736572265625, + "logps/rejected": -308.6483154296875, + "loss": 0.6685, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07033336162567139, + "rewards/margins": 0.03989529609680176, + "rewards/rejected": 0.030438074842095375, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 3.368214519293656e-06, + "logits/chosen": -3.053618907928467, + "logits/rejected": -3.0059900283813477, + "logps/chosen": -251.8245391845703, + "logps/rejected": -264.260009765625, + "loss": 0.6847, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03353007882833481, + "rewards/margins": 0.0037069707177579403, + "rewards/rejected": 0.02982310950756073, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 3.400915631131459e-06, + "logits/chosen": -2.9938790798187256, + "logits/rejected": -2.943967342376709, + "logps/chosen": -252.6847381591797, + "logps/rejected": -252.75265502929688, + "loss": 0.6747, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.05386337637901306, + "rewards/margins": 0.03166833892464638, + "rewards/rejected": 0.022195037454366684, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 3.4336167429692615e-06, + "logits/chosen": -2.7841649055480957, + "logits/rejected": -2.8269340991973877, + "logps/chosen": -363.76641845703125, + "logps/rejected": -302.6523742675781, + "loss": 0.6722, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.07321266829967499, + "rewards/margins": 0.047675829380750656, + "rewards/rejected": 0.025536835193634033, + "step": 1050 + }, + { + "epoch": 0.07, + "learning_rate": 3.4663178548070635e-06, + "logits/chosen": -3.1110241413116455, + "logits/rejected": -3.0979695320129395, + "logps/chosen": -289.6631774902344, + "logps/rejected": -406.4927673339844, + "loss": 0.6832, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.06365511566400528, + "rewards/margins": 0.007427097763866186, + "rewards/rejected": 0.05622801184654236, + "step": 1060 + }, + { + "epoch": 0.07, + "learning_rate": 3.499018966644866e-06, + "logits/chosen": -3.257605791091919, + "logits/rejected": -3.072235345840454, + "logps/chosen": -301.8328552246094, + "logps/rejected": -260.7064208984375, + "loss": 0.6811, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.07253851741552353, + "rewards/margins": 0.016410846263170242, + "rewards/rejected": 0.056127674877643585, + "step": 1070 + }, + { + "epoch": 0.07, + "learning_rate": 3.531720078482669e-06, + "logits/chosen": -2.932795524597168, + "logits/rejected": -2.9286468029022217, + "logps/chosen": -388.4093017578125, + "logps/rejected": -298.8544921875, + "loss": 0.6792, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.056833960115909576, + "rewards/margins": 0.02518545091152191, + "rewards/rejected": 0.031648509204387665, + "step": 1080 + }, + { + "epoch": 0.07, + "learning_rate": 3.5644211903204712e-06, + "logits/chosen": -3.0924220085144043, + "logits/rejected": -3.116910457611084, + "logps/chosen": -334.62994384765625, + "logps/rejected": -260.61614990234375, + "loss": 0.6777, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.063229039311409, + "rewards/margins": 0.02396319806575775, + "rewards/rejected": 0.03926585242152214, + "step": 1090 + }, + { + "epoch": 0.07, + "learning_rate": 3.5971223021582737e-06, + "logits/chosen": -2.729091167449951, + "logits/rejected": -2.701641082763672, + "logps/chosen": -222.6670684814453, + "logps/rejected": -199.57400512695312, + "loss": 0.6692, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.06342534720897675, + "rewards/margins": 0.03789401054382324, + "rewards/rejected": 0.02553134225308895, + "step": 1100 + }, + { + "epoch": 0.07, + "learning_rate": 3.6298234139960765e-06, + "logits/chosen": -3.0402450561523438, + "logits/rejected": -3.112766981124878, + "logps/chosen": -298.9159240722656, + "logps/rejected": -251.3931884765625, + "loss": 0.6778, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.07815130054950714, + "rewards/margins": 0.04820716381072998, + "rewards/rejected": 0.029944131150841713, + "step": 1110 + }, + { + "epoch": 0.07, + "learning_rate": 3.6625245258338785e-06, + "logits/chosen": -2.9670872688293457, + "logits/rejected": -3.015652656555176, + "logps/chosen": -225.95596313476562, + "logps/rejected": -209.48294067382812, + "loss": 0.6714, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.06183785945177078, + "rewards/margins": 0.041525088250637054, + "rewards/rejected": 0.02031276375055313, + "step": 1120 + }, + { + "epoch": 0.07, + "learning_rate": 3.695225637671681e-06, + "logits/chosen": -2.734419584274292, + "logits/rejected": -2.9420053958892822, + "logps/chosen": -406.99700927734375, + "logps/rejected": -385.1966552734375, + "loss": 0.6652, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.06387855857610703, + "rewards/margins": 0.052874863147735596, + "rewards/rejected": 0.011003690771758556, + "step": 1130 + }, + { + "epoch": 0.07, + "learning_rate": 3.7279267495094834e-06, + "logits/chosen": -2.891770839691162, + "logits/rejected": -2.858868360519409, + "logps/chosen": -343.5257263183594, + "logps/rejected": -374.30633544921875, + "loss": 0.6808, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.04993007704615593, + "rewards/margins": 0.007034347392618656, + "rewards/rejected": 0.04289572685956955, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 3.7606278613472863e-06, + "logits/chosen": -2.916008472442627, + "logits/rejected": -2.9267024993896484, + "logps/chosen": -300.3416442871094, + "logps/rejected": -254.60079956054688, + "loss": 0.6699, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06914080679416656, + "rewards/margins": 0.05962265655398369, + "rewards/rejected": 0.009518155828118324, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 3.7933289731850887e-06, + "logits/chosen": -3.080824851989746, + "logits/rejected": -3.029308557510376, + "logps/chosen": -468.10479736328125, + "logps/rejected": -330.76226806640625, + "loss": 0.6648, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.09183225780725479, + "rewards/margins": 0.09566021710634232, + "rewards/rejected": -0.0038279606960713863, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 3.826030085022891e-06, + "logits/chosen": -2.990856647491455, + "logits/rejected": -3.072279453277588, + "logps/chosen": -289.534423828125, + "logps/rejected": -258.0203857421875, + "loss": 0.6807, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0335356742143631, + "rewards/margins": 0.013129748404026031, + "rewards/rejected": 0.020405925810337067, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 3.858731196860693e-06, + "logits/chosen": -3.14605975151062, + "logits/rejected": -3.1897926330566406, + "logps/chosen": -330.21356201171875, + "logps/rejected": -374.36376953125, + "loss": 0.6679, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.057777903974056244, + "rewards/margins": 0.040721334517002106, + "rewards/rejected": 0.017056571319699287, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 3.891432308698496e-06, + "logits/chosen": -3.0566301345825195, + "logits/rejected": -2.830443859100342, + "logps/chosen": -398.5063171386719, + "logps/rejected": -262.1009521484375, + "loss": 0.6651, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.07374347746372223, + "rewards/margins": 0.08343470841646194, + "rewards/rejected": -0.009691240265965462, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 3.924133420536299e-06, + "logits/chosen": -2.9240617752075195, + "logits/rejected": -2.9756927490234375, + "logps/chosen": -292.318359375, + "logps/rejected": -272.2164001464844, + "loss": 0.6574, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.04478208348155022, + "rewards/margins": 0.09882897138595581, + "rewards/rejected": -0.054046887904405594, + "step": 1200 + }, + { + "epoch": 0.08, + "learning_rate": 3.956834532374101e-06, + "logits/chosen": -2.7908530235290527, + "logits/rejected": -2.7424261569976807, + "logps/chosen": -262.8086853027344, + "logps/rejected": -338.43011474609375, + "loss": 0.6779, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.008423469960689545, + "rewards/margins": 0.039038076996803284, + "rewards/rejected": -0.04746154695749283, + "step": 1210 + }, + { + "epoch": 0.08, + "learning_rate": 3.989535644211904e-06, + "logits/chosen": -3.07283878326416, + "logits/rejected": -3.0677459239959717, + "logps/chosen": -522.1658935546875, + "logps/rejected": -308.08135986328125, + "loss": 0.6565, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06819725036621094, + "rewards/margins": 0.12000145018100739, + "rewards/rejected": -0.051804203540086746, + "step": 1220 + }, + { + "epoch": 0.08, + "learning_rate": 4.022236756049706e-06, + "logits/chosen": -3.0104000568389893, + "logits/rejected": -2.896672248840332, + "logps/chosen": -339.0327453613281, + "logps/rejected": -285.937744140625, + "loss": 0.6618, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.02467339299619198, + "rewards/margins": 0.024880170822143555, + "rewards/rejected": -0.00020677558495663106, + "step": 1230 + }, + { + "epoch": 0.08, + "learning_rate": 4.054937867887509e-06, + "logits/chosen": -2.83773136138916, + "logits/rejected": -2.8805081844329834, + "logps/chosen": -299.5276794433594, + "logps/rejected": -322.9584655761719, + "loss": 0.6599, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.012974822893738747, + "rewards/margins": 0.0379905179142952, + "rewards/rejected": -0.0250156931579113, + "step": 1240 + }, + { + "epoch": 0.08, + "learning_rate": 4.087638979725311e-06, + "logits/chosen": -2.9484379291534424, + "logits/rejected": -2.947218418121338, + "logps/chosen": -354.7485046386719, + "logps/rejected": -372.19720458984375, + "loss": 0.6664, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0066092670895159245, + "rewards/margins": 0.057695530354976654, + "rewards/rejected": -0.051086265593767166, + "step": 1250 + }, + { + "epoch": 0.08, + "learning_rate": 4.1203400915631135e-06, + "logits/chosen": -2.855781078338623, + "logits/rejected": -2.8165206909179688, + "logps/chosen": -251.22183227539062, + "logps/rejected": -235.73709106445312, + "loss": 0.6698, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.011250384151935577, + "rewards/margins": 0.05445141717791557, + "rewards/rejected": -0.043201036751270294, + "step": 1260 + }, + { + "epoch": 0.08, + "learning_rate": 4.153041203400916e-06, + "logits/chosen": -3.0679378509521484, + "logits/rejected": -2.857640504837036, + "logps/chosen": -346.5669860839844, + "logps/rejected": -250.6300506591797, + "loss": 0.6505, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.043904975056648254, + "rewards/margins": 0.10966650396585464, + "rewards/rejected": -0.06576154381036758, + "step": 1270 + }, + { + "epoch": 0.08, + "learning_rate": 4.185742315238718e-06, + "logits/chosen": -2.9014155864715576, + "logits/rejected": -3.046358585357666, + "logps/chosen": -319.28717041015625, + "logps/rejected": -307.1815490722656, + "loss": 0.6549, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.1001446470618248, + "rewards/margins": 0.11425294727087021, + "rewards/rejected": -0.014108309522271156, + "step": 1280 + }, + { + "epoch": 0.08, + "learning_rate": 4.218443427076521e-06, + "logits/chosen": -2.9250168800354004, + "logits/rejected": -2.9453256130218506, + "logps/chosen": -322.75653076171875, + "logps/rejected": -365.05224609375, + "loss": 0.673, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.030087579041719437, + "rewards/margins": 0.05422119423747063, + "rewards/rejected": -0.024133607745170593, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.251144538914323e-06, + "logits/chosen": -3.013686418533325, + "logits/rejected": -3.0016348361968994, + "logps/chosen": -302.3190002441406, + "logps/rejected": -265.59869384765625, + "loss": 0.6756, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.006552050355821848, + "rewards/margins": 0.08697822690010071, + "rewards/rejected": -0.0804261788725853, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.283845650752126e-06, + "logits/chosen": -3.0734734535217285, + "logits/rejected": -3.084721803665161, + "logps/chosen": -247.04159545898438, + "logps/rejected": -196.4541473388672, + "loss": 0.6785, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0005421757814474404, + "rewards/margins": 0.04514998197555542, + "rewards/rejected": -0.04460780695080757, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.316546762589928e-06, + "logits/chosen": -3.0271167755126953, + "logits/rejected": -3.0676238536834717, + "logps/chosen": -274.3728942871094, + "logps/rejected": -241.3634033203125, + "loss": 0.6439, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.006143507547676563, + "rewards/margins": 0.06333483010530472, + "rewards/rejected": -0.057191312313079834, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.349247874427731e-06, + "logits/chosen": -2.9338698387145996, + "logits/rejected": -2.834819793701172, + "logps/chosen": -179.49215698242188, + "logps/rejected": -217.63473510742188, + "loss": 0.6657, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.00903988629579544, + "rewards/margins": 0.05287107825279236, + "rewards/rejected": -0.04383119195699692, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.381948986265534e-06, + "logits/chosen": -2.994556427001953, + "logits/rejected": -3.03247332572937, + "logps/chosen": -291.14678955078125, + "logps/rejected": -273.00640869140625, + "loss": 0.6645, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.05871891975402832, + "rewards/margins": 0.06984923779964447, + "rewards/rejected": -0.011130331084132195, + "step": 1340 + }, + { + "epoch": 0.09, + "learning_rate": 4.414650098103336e-06, + "logits/chosen": -3.00968074798584, + "logits/rejected": -3.035393238067627, + "logps/chosen": -214.24575805664062, + "logps/rejected": -244.91378784179688, + "loss": 0.6518, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.05464862659573555, + "rewards/margins": 0.08675499260425568, + "rewards/rejected": -0.03210636228322983, + "step": 1350 + }, + { + "epoch": 0.09, + "learning_rate": 4.447351209941138e-06, + "logits/chosen": -2.6517372131347656, + "logits/rejected": -2.600365161895752, + "logps/chosen": -292.5060119628906, + "logps/rejected": -240.5036163330078, + "loss": 0.6443, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.04661313816905022, + "rewards/margins": 0.09707652777433395, + "rewards/rejected": -0.05046338960528374, + "step": 1360 + }, + { + "epoch": 0.09, + "learning_rate": 4.480052321778941e-06, + "logits/chosen": -2.8098044395446777, + "logits/rejected": -2.916383981704712, + "logps/chosen": -235.16897583007812, + "logps/rejected": -230.39395141601562, + "loss": 0.6535, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.04006228223443031, + "rewards/margins": 0.0514717698097229, + "rewards/rejected": -0.09153404831886292, + "step": 1370 + }, + { + "epoch": 0.09, + "learning_rate": 4.5127534336167435e-06, + "logits/chosen": -2.9116311073303223, + "logits/rejected": -2.8839030265808105, + "logps/chosen": -372.9744873046875, + "logps/rejected": -271.71209716796875, + "loss": 0.6497, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.04008869081735611, + "rewards/margins": 0.08455850183963776, + "rewards/rejected": -0.12464718520641327, + "step": 1380 + }, + { + "epoch": 0.09, + "learning_rate": 4.5454545454545455e-06, + "logits/chosen": -2.9106247425079346, + "logits/rejected": -2.7901999950408936, + "logps/chosen": -322.4967346191406, + "logps/rejected": -275.24359130859375, + "loss": 0.6587, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.011935449205338955, + "rewards/margins": 0.08074381202459335, + "rewards/rejected": -0.09267926216125488, + "step": 1390 + }, + { + "epoch": 0.09, + "learning_rate": 4.578155657292348e-06, + "logits/chosen": -2.784376621246338, + "logits/rejected": -2.7655889987945557, + "logps/chosen": -238.74771118164062, + "logps/rejected": -277.7735290527344, + "loss": 0.6659, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.01576855033636093, + "rewards/margins": 0.09559451043605804, + "rewards/rejected": -0.11136305332183838, + "step": 1400 + }, + { + "epoch": 0.09, + "learning_rate": 4.610856769130151e-06, + "logits/chosen": -2.8557660579681396, + "logits/rejected": -2.841609477996826, + "logps/chosen": -293.3002624511719, + "logps/rejected": -261.0894470214844, + "loss": 0.6288, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0980224460363388, + "rewards/margins": 0.13837285339832306, + "rewards/rejected": -0.04035038873553276, + "step": 1410 + }, + { + "epoch": 0.09, + "learning_rate": 4.643557880967953e-06, + "logits/chosen": -2.902876138687134, + "logits/rejected": -2.8389651775360107, + "logps/chosen": -301.04254150390625, + "logps/rejected": -238.2169189453125, + "loss": 0.6573, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.08447854965925217, + "rewards/margins": 0.1570580005645752, + "rewards/rejected": -0.07257945090532303, + "step": 1420 + }, + { + "epoch": 0.09, + "learning_rate": 4.676258992805755e-06, + "logits/chosen": -2.901109457015991, + "logits/rejected": -2.942526340484619, + "logps/chosen": -257.54705810546875, + "logps/rejected": -282.9903259277344, + "loss": 0.6491, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.004095321986824274, + "rewards/margins": 0.07804937660694122, + "rewards/rejected": -0.08214469999074936, + "step": 1430 + }, + { + "epoch": 0.09, + "learning_rate": 4.708960104643558e-06, + "logits/chosen": -2.820150852203369, + "logits/rejected": -2.955181837081909, + "logps/chosen": -300.74249267578125, + "logps/rejected": -280.85546875, + "loss": 0.636, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.032780297100543976, + "rewards/margins": 0.1143231987953186, + "rewards/rejected": -0.08154290169477463, + "step": 1440 + }, + { + "epoch": 0.09, + "learning_rate": 4.741661216481361e-06, + "logits/chosen": -2.9988656044006348, + "logits/rejected": -3.0337607860565186, + "logps/chosen": -321.4072570800781, + "logps/rejected": -303.68927001953125, + "loss": 0.6386, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.040198661386966705, + "rewards/margins": 0.09094201028347015, + "rewards/rejected": -0.13114067912101746, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.774362328319163e-06, + "logits/chosen": -2.9808907508850098, + "logits/rejected": -2.916173219680786, + "logps/chosen": -385.5293884277344, + "logps/rejected": -329.20361328125, + "loss": 0.6366, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.050022225826978683, + "rewards/margins": 0.1688729077577591, + "rewards/rejected": -0.21889512240886688, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.807063440156966e-06, + "logits/chosen": -2.7928836345672607, + "logits/rejected": -2.7777328491210938, + "logps/chosen": -277.9568786621094, + "logps/rejected": -321.45867919921875, + "loss": 0.6426, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10680846124887466, + "rewards/margins": 0.09730565547943115, + "rewards/rejected": -0.20411410927772522, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.839764551994769e-06, + "logits/chosen": -2.968703269958496, + "logits/rejected": -2.9341697692871094, + "logps/chosen": -387.0003356933594, + "logps/rejected": -307.2288513183594, + "loss": 0.6598, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08723671734333038, + "rewards/margins": 0.09109548479318619, + "rewards/rejected": -0.17833219468593597, + "step": 1480 + }, + { + "epoch": 0.1, + "learning_rate": 4.872465663832571e-06, + "logits/chosen": -2.7156434059143066, + "logits/rejected": -2.7810118198394775, + "logps/chosen": -306.45513916015625, + "logps/rejected": -310.51678466796875, + "loss": 0.644, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.07703594118356705, + "rewards/margins": 0.19709235429763794, + "rewards/rejected": -0.2741282880306244, + "step": 1490 + }, + { + "epoch": 0.1, + "learning_rate": 4.905166775670373e-06, + "logits/chosen": -2.6941978931427, + "logits/rejected": -2.7313380241394043, + "logps/chosen": -267.1389465332031, + "logps/rejected": -262.7569580078125, + "loss": 0.6278, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.120394766330719, + "rewards/margins": 0.12228628247976303, + "rewards/rejected": -0.24268105626106262, + "step": 1500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9378678875081756e-06, + "logits/chosen": -2.829664945602417, + "logits/rejected": -2.835359573364258, + "logps/chosen": -191.07113647460938, + "logps/rejected": -161.7421875, + "loss": 0.6533, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.02066131681203842, + "rewards/margins": 0.06928695738315582, + "rewards/rejected": -0.08994828164577484, + "step": 1510 + }, + { + "epoch": 0.1, + "learning_rate": 4.9705689993459784e-06, + "logits/chosen": -2.724827527999878, + "logits/rejected": -2.694981813430786, + "logps/chosen": -266.87750244140625, + "logps/rejected": -233.462890625, + "loss": 0.6835, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0038128546439111233, + "rewards/margins": 0.025530481711030006, + "rewards/rejected": -0.02171761728823185, + "step": 1520 + }, + { + "epoch": 0.1, + "learning_rate": 4.999999934793849e-06, + "logits/chosen": -2.8814034461975098, + "logits/rejected": -2.866196870803833, + "logps/chosen": -244.37026977539062, + "logps/rejected": -221.701416015625, + "loss": 0.6557, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.006678455974906683, + "rewards/margins": 0.09985242784023285, + "rewards/rejected": -0.09317396581172943, + "step": 1530 + }, + { + "epoch": 0.1, + "learning_rate": 4.999992110059814e-06, + "logits/chosen": -2.891392469406128, + "logits/rejected": -2.858283519744873, + "logps/chosen": -229.2855682373047, + "logps/rejected": -294.8144226074219, + "loss": 0.6374, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.03305545449256897, + "rewards/margins": 0.15080586075782776, + "rewards/rejected": -0.18386133015155792, + "step": 1540 + }, + { + "epoch": 0.1, + "learning_rate": 4.999971244142299e-06, + "logits/chosen": -2.71058988571167, + "logits/rejected": -2.7885243892669678, + "logps/chosen": -405.15472412109375, + "logps/rejected": -338.3912658691406, + "loss": 0.6308, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.047473885118961334, + "rewards/margins": 0.2326829433441162, + "rewards/rejected": -0.28015682101249695, + "step": 1550 + }, + { + "epoch": 0.1, + "learning_rate": 4.999937337150149e-06, + "logits/chosen": -2.848661422729492, + "logits/rejected": -2.7998948097229004, + "logps/chosen": -347.9453125, + "logps/rejected": -438.14276123046875, + "loss": 0.6659, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.180062934756279, + "rewards/margins": 0.05969248339533806, + "rewards/rejected": -0.23975543677806854, + "step": 1560 + }, + { + "epoch": 0.1, + "learning_rate": 4.99989038926024e-06, + "logits/chosen": -2.8489651679992676, + "logits/rejected": -2.834139347076416, + "logps/chosen": -301.15411376953125, + "logps/rejected": -357.87396240234375, + "loss": 0.6506, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.01634044013917446, + "rewards/margins": 0.14033670723438263, + "rewards/rejected": -0.15667715668678284, + "step": 1570 + }, + { + "epoch": 0.1, + "learning_rate": 4.999830400717476e-06, + "logits/chosen": -2.781190872192383, + "logits/rejected": -2.6491172313690186, + "logps/chosen": -293.2514343261719, + "logps/rejected": -223.9866943359375, + "loss": 0.6801, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11608616262674332, + "rewards/margins": 0.05661296099424362, + "rewards/rejected": -0.17269913852214813, + "step": 1580 + }, + { + "epoch": 0.1, + "learning_rate": 4.999757371834787e-06, + "logits/chosen": -2.872457981109619, + "logits/rejected": -2.804356098175049, + "logps/chosen": -435.396484375, + "logps/rejected": -372.4584045410156, + "loss": 0.6367, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.03728240728378296, + "rewards/margins": 0.13812865316867828, + "rewards/rejected": -0.17541104555130005, + "step": 1590 + }, + { + "epoch": 0.1, + "learning_rate": 4.999671302993125e-06, + "logits/chosen": -2.895263195037842, + "logits/rejected": -2.8939125537872314, + "logps/chosen": -387.8405456542969, + "logps/rejected": -375.2184753417969, + "loss": 0.6226, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17230334877967834, + "rewards/margins": 0.1778700351715088, + "rewards/rejected": -0.35017335414886475, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.999572194641471e-06, + "logits/chosen": -3.000087261199951, + "logits/rejected": -2.9178078174591064, + "logps/chosen": -296.35284423828125, + "logps/rejected": -271.0105285644531, + "loss": 0.6437, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.2482984960079193, + "rewards/margins": 0.037916336208581924, + "rewards/rejected": -0.28621482849121094, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.999460047296819e-06, + "logits/chosen": -2.7535433769226074, + "logits/rejected": -2.7120440006256104, + "logps/chosen": -205.52490234375, + "logps/rejected": -217.80032348632812, + "loss": 0.663, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.2501955032348633, + "rewards/margins": -0.028146442025899887, + "rewards/rejected": -0.22204908728599548, + "step": 1620 + }, + { + "epoch": 0.11, + "learning_rate": 4.999334861544186e-06, + "logits/chosen": -3.0465235710144043, + "logits/rejected": -2.8982014656066895, + "logps/chosen": -386.46673583984375, + "logps/rejected": -325.98284912109375, + "loss": 0.6366, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.013998913578689098, + "rewards/margins": 0.1589469611644745, + "rewards/rejected": -0.17294588685035706, + "step": 1630 + }, + { + "epoch": 0.11, + "learning_rate": 4.999196638036604e-06, + "logits/chosen": -2.8301024436950684, + "logits/rejected": -2.880171537399292, + "logps/chosen": -243.76687622070312, + "logps/rejected": -196.94740295410156, + "loss": 0.648, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.04583977907896042, + "rewards/margins": 0.11452523618936539, + "rewards/rejected": -0.06868544965982437, + "step": 1640 + }, + { + "epoch": 0.11, + "learning_rate": 4.999045377495111e-06, + "logits/chosen": -3.003966808319092, + "logits/rejected": -2.8065781593322754, + "logps/chosen": -378.6763610839844, + "logps/rejected": -311.1116027832031, + "loss": 0.6677, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.014937293715775013, + "rewards/margins": 0.05808718875050545, + "rewards/rejected": -0.04314989596605301, + "step": 1650 + }, + { + "epoch": 0.11, + "learning_rate": 4.998881080708759e-06, + "logits/chosen": -2.5581583976745605, + "logits/rejected": -2.461325168609619, + "logps/chosen": -231.7208251953125, + "logps/rejected": -194.53955078125, + "loss": 0.648, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.10793449729681015, + "rewards/margins": 0.05950494855642319, + "rewards/rejected": 0.04842953756451607, + "step": 1660 + }, + { + "epoch": 0.11, + "learning_rate": 4.998703748534599e-06, + "logits/chosen": -2.7714176177978516, + "logits/rejected": -2.815610885620117, + "logps/chosen": -244.06005859375, + "logps/rejected": -254.1628875732422, + "loss": 0.6597, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.10158028453588486, + "rewards/margins": 0.06951704621315002, + "rewards/rejected": 0.03206322342157364, + "step": 1670 + }, + { + "epoch": 0.11, + "learning_rate": 4.998513381897683e-06, + "logits/chosen": -2.9440979957580566, + "logits/rejected": -2.7861945629119873, + "logps/chosen": -403.1971130371094, + "logps/rejected": -281.72564697265625, + "loss": 0.6469, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.12582075595855713, + "rewards/margins": 0.0909295454621315, + "rewards/rejected": 0.03489120304584503, + "step": 1680 + }, + { + "epoch": 0.11, + "learning_rate": 4.9983099817910565e-06, + "logits/chosen": -2.793125867843628, + "logits/rejected": -2.858919143676758, + "logps/chosen": -347.7610778808594, + "logps/rejected": -266.6481018066406, + "loss": 0.6366, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.04975166171789169, + "rewards/margins": 0.13270191848278046, + "rewards/rejected": -0.08295025676488876, + "step": 1690 + }, + { + "epoch": 0.11, + "learning_rate": 4.998093549275754e-06, + "logits/chosen": -2.871772527694702, + "logits/rejected": -2.8467118740081787, + "logps/chosen": -377.32354736328125, + "logps/rejected": -303.02301025390625, + "loss": 0.6364, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.06866715848445892, + "rewards/margins": 0.15883667767047882, + "rewards/rejected": -0.0901695117354393, + "step": 1700 + }, + { + "epoch": 0.11, + "learning_rate": 4.997864085480794e-06, + "logits/chosen": -2.9470508098602295, + "logits/rejected": -2.959531307220459, + "logps/chosen": -290.06195068359375, + "logps/rejected": -299.32269287109375, + "loss": 0.625, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.04539639502763748, + "rewards/margins": 0.19093555212020874, + "rewards/rejected": -0.14553913474082947, + "step": 1710 + }, + { + "epoch": 0.11, + "learning_rate": 4.997621591603171e-06, + "logits/chosen": -2.82224702835083, + "logits/rejected": -2.720681667327881, + "logps/chosen": -348.3783264160156, + "logps/rejected": -381.4719543457031, + "loss": 0.6764, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.027834400534629822, + "rewards/margins": 0.12895603477954865, + "rewards/rejected": -0.15679042041301727, + "step": 1720 + }, + { + "epoch": 0.11, + "learning_rate": 4.997366068907853e-06, + "logits/chosen": -2.795383930206299, + "logits/rejected": -2.7165658473968506, + "logps/chosen": -359.398193359375, + "logps/rejected": -324.2231140136719, + "loss": 0.6247, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.011148473247885704, + "rewards/margins": 0.14630728960037231, + "rewards/rejected": -0.15745574235916138, + "step": 1730 + }, + { + "epoch": 0.11, + "learning_rate": 4.997097518727771e-06, + "logits/chosen": -2.8241539001464844, + "logits/rejected": -2.837796211242676, + "logps/chosen": -300.73577880859375, + "logps/rejected": -249.60818481445312, + "loss": 0.6365, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.07024487107992172, + "rewards/margins": 0.11239133030176163, + "rewards/rejected": -0.18263621628284454, + "step": 1740 + }, + { + "epoch": 0.11, + "learning_rate": 4.9968159424638155e-06, + "logits/chosen": -2.8376364707946777, + "logits/rejected": -2.9506516456604004, + "logps/chosen": -358.78302001953125, + "logps/rejected": -407.3536071777344, + "loss": 0.693, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07075332850217819, + "rewards/margins": 0.07305476069450378, + "rewards/rejected": -0.14380808174610138, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9965213415848235e-06, + "logits/chosen": -2.9004623889923096, + "logits/rejected": -2.792384624481201, + "logps/chosen": -309.20452880859375, + "logps/rejected": -258.9640808105469, + "loss": 0.6135, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.08543910086154938, + "rewards/margins": 0.14936544001102448, + "rewards/rejected": -0.0639263391494751, + "step": 1760 + }, + { + "epoch": 0.12, + "learning_rate": 4.9962137176275805e-06, + "logits/chosen": -2.7845258712768555, + "logits/rejected": -2.6718716621398926, + "logps/chosen": -197.41293334960938, + "logps/rejected": -231.66928100585938, + "loss": 0.6562, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.14296093583106995, + "rewards/margins": 0.001989279640838504, + "rewards/rejected": -0.14495019614696503, + "step": 1770 + }, + { + "epoch": 0.12, + "learning_rate": 4.9958930721968015e-06, + "logits/chosen": -2.9959325790405273, + "logits/rejected": -2.9684629440307617, + "logps/chosen": -443.68572998046875, + "logps/rejected": -327.4826965332031, + "loss": 0.6618, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.020464541390538216, + "rewards/margins": 0.18904823064804077, + "rewards/rejected": -0.1685837060213089, + "step": 1780 + }, + { + "epoch": 0.12, + "learning_rate": 4.995559406965132e-06, + "logits/chosen": -2.945493221282959, + "logits/rejected": -2.8600668907165527, + "logps/chosen": -390.383056640625, + "logps/rejected": -363.43438720703125, + "loss": 0.6388, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.10935555398464203, + "rewards/margins": 0.17368203401565552, + "rewards/rejected": -0.28303760290145874, + "step": 1790 + }, + { + "epoch": 0.12, + "learning_rate": 4.995212723673131e-06, + "logits/chosen": -2.7073473930358887, + "logits/rejected": -2.727189779281616, + "logps/chosen": -297.14459228515625, + "logps/rejected": -357.22979736328125, + "loss": 0.6592, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.19449344277381897, + "rewards/margins": 0.01761249266564846, + "rewards/rejected": -0.21210594475269318, + "step": 1800 + }, + { + "epoch": 0.12, + "learning_rate": 4.99485302412927e-06, + "logits/chosen": -2.6889750957489014, + "logits/rejected": -2.811795711517334, + "logps/chosen": -260.56134033203125, + "logps/rejected": -371.4437561035156, + "loss": 0.6356, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0422750785946846, + "rewards/margins": 0.16190145909786224, + "rewards/rejected": -0.20417654514312744, + "step": 1810 + }, + { + "epoch": 0.12, + "learning_rate": 4.994480310209918e-06, + "logits/chosen": -2.8943614959716797, + "logits/rejected": -2.7965211868286133, + "logps/chosen": -372.2635192871094, + "logps/rejected": -317.09405517578125, + "loss": 0.6225, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.16946710646152496, + "rewards/margins": 0.18853087723255157, + "rewards/rejected": -0.3579980134963989, + "step": 1820 + }, + { + "epoch": 0.12, + "learning_rate": 4.994094583859332e-06, + "logits/chosen": -2.867899179458618, + "logits/rejected": -2.7022008895874023, + "logps/chosen": -289.0708923339844, + "logps/rejected": -269.9219665527344, + "loss": 0.6542, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2944765090942383, + "rewards/margins": 0.11893720924854279, + "rewards/rejected": -0.4134137034416199, + "step": 1830 + }, + { + "epoch": 0.12, + "learning_rate": 4.9936958470896525e-06, + "logits/chosen": -2.9204633235931396, + "logits/rejected": -2.831799268722534, + "logps/chosen": -472.00341796875, + "logps/rejected": -395.9357604980469, + "loss": 0.6416, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.12566198408603668, + "rewards/margins": 0.23625274002552032, + "rewards/rejected": -0.3619146943092346, + "step": 1840 + }, + { + "epoch": 0.12, + "learning_rate": 4.993284101980883e-06, + "logits/chosen": -2.9902560710906982, + "logits/rejected": -2.863290309906006, + "logps/chosen": -338.646484375, + "logps/rejected": -336.25482177734375, + "loss": 0.6014, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1826198399066925, + "rewards/margins": 0.1706969141960144, + "rewards/rejected": -0.3533167243003845, + "step": 1850 + }, + { + "epoch": 0.12, + "learning_rate": 4.9928593506808885e-06, + "logits/chosen": -2.854135751724243, + "logits/rejected": -2.769709587097168, + "logps/chosen": -302.7193298339844, + "logps/rejected": -304.617919921875, + "loss": 0.6377, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.024336855858564377, + "rewards/margins": 0.23395895957946777, + "rewards/rejected": -0.2096220999956131, + "step": 1860 + }, + { + "epoch": 0.12, + "learning_rate": 4.992421595405381e-06, + "logits/chosen": -2.931744337081909, + "logits/rejected": -2.862643241882324, + "logps/chosen": -318.87103271484375, + "logps/rejected": -319.90106201171875, + "loss": 0.602, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0002502381685189903, + "rewards/margins": 0.25090909004211426, + "rewards/rejected": -0.2506588399410248, + "step": 1870 + }, + { + "epoch": 0.12, + "learning_rate": 4.991970838437905e-06, + "logits/chosen": -2.8745996952056885, + "logits/rejected": -2.702986717224121, + "logps/chosen": -230.1893310546875, + "logps/rejected": -264.86920166015625, + "loss": 0.6542, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.12010681629180908, + "rewards/margins": 0.02200702764093876, + "rewards/rejected": -0.1421138495206833, + "step": 1880 + }, + { + "epoch": 0.12, + "learning_rate": 4.9915070821298294e-06, + "logits/chosen": -2.8845481872558594, + "logits/rejected": -2.8821284770965576, + "logps/chosen": -284.79052734375, + "logps/rejected": -272.1836853027344, + "loss": 0.6228, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.11445657163858414, + "rewards/margins": 0.268448144197464, + "rewards/rejected": -0.15399155020713806, + "step": 1890 + }, + { + "epoch": 0.12, + "learning_rate": 4.991030328900336e-06, + "logits/chosen": -2.974465847015381, + "logits/rejected": -2.8674521446228027, + "logps/chosen": -328.77899169921875, + "logps/rejected": -249.0155792236328, + "loss": 0.62, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.1050812155008316, + "rewards/margins": 0.21933884918689728, + "rewards/rejected": -0.11425761878490448, + "step": 1900 + }, + { + "epoch": 0.12, + "learning_rate": 4.9905405812364014e-06, + "logits/chosen": -2.613386631011963, + "logits/rejected": -2.7967910766601562, + "logps/chosen": -275.4242858886719, + "logps/rejected": -352.1298828125, + "loss": 0.6447, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.11817902326583862, + "rewards/margins": 0.15935374796390533, + "rewards/rejected": -0.27753275632858276, + "step": 1910 + }, + { + "epoch": 0.13, + "learning_rate": 4.990037841692791e-06, + "logits/chosen": -2.7451908588409424, + "logits/rejected": -2.581724166870117, + "logps/chosen": -424.11279296875, + "logps/rejected": -315.0185546875, + "loss": 0.6417, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.0610206238925457, + "rewards/margins": 0.1065296158194542, + "rewards/rejected": -0.1675502508878708, + "step": 1920 + }, + { + "epoch": 0.13, + "learning_rate": 4.989522112892039e-06, + "logits/chosen": -2.661689281463623, + "logits/rejected": -2.701464891433716, + "logps/chosen": -270.7320861816406, + "logps/rejected": -265.72564697265625, + "loss": 0.5975, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.021038547158241272, + "rewards/margins": 0.22835056483745575, + "rewards/rejected": -0.20731201767921448, + "step": 1930 + }, + { + "epoch": 0.13, + "learning_rate": 4.98899339752444e-06, + "logits/chosen": -2.7815101146698, + "logits/rejected": -2.878944158554077, + "logps/chosen": -288.7119445800781, + "logps/rejected": -282.08367919921875, + "loss": 0.6303, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1159503310918808, + "rewards/margins": 0.09114055335521698, + "rewards/rejected": -0.20709089934825897, + "step": 1940 + }, + { + "epoch": 0.13, + "learning_rate": 4.988451698348033e-06, + "logits/chosen": -2.9348340034484863, + "logits/rejected": -3.0299041271209717, + "logps/chosen": -339.9063415527344, + "logps/rejected": -295.8576354980469, + "loss": 0.6519, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05489187315106392, + "rewards/margins": 0.06054762750864029, + "rewards/rejected": -0.1154395118355751, + "step": 1950 + }, + { + "epoch": 0.13, + "learning_rate": 4.987897018188585e-06, + "logits/chosen": -2.618508815765381, + "logits/rejected": -2.4888291358947754, + "logps/chosen": -310.83551025390625, + "logps/rejected": -265.2162170410156, + "loss": 0.612, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.14666445553302765, + "rewards/margins": 0.09787283092737198, + "rewards/rejected": -0.24453727900981903, + "step": 1960 + }, + { + "epoch": 0.13, + "learning_rate": 4.9873293599395814e-06, + "logits/chosen": -2.6340200901031494, + "logits/rejected": -2.6515004634857178, + "logps/chosen": -306.9834899902344, + "logps/rejected": -294.7528076171875, + "loss": 0.6432, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10474874824285507, + "rewards/margins": 0.20273160934448242, + "rewards/rejected": -0.3074803352355957, + "step": 1970 + }, + { + "epoch": 0.13, + "learning_rate": 4.986748726562203e-06, + "logits/chosen": -2.706296682357788, + "logits/rejected": -2.8979294300079346, + "logps/chosen": -290.6112365722656, + "logps/rejected": -338.37713623046875, + "loss": 0.5807, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03407129645347595, + "rewards/margins": 0.20913465321063995, + "rewards/rejected": -0.2432059496641159, + "step": 1980 + }, + { + "epoch": 0.13, + "learning_rate": 4.98615512108532e-06, + "logits/chosen": -2.7610926628112793, + "logits/rejected": -2.870851993560791, + "logps/chosen": -272.8517150878906, + "logps/rejected": -366.29388427734375, + "loss": 0.6786, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.01706174574792385, + "rewards/margins": 0.029359757900238037, + "rewards/rejected": -0.046421509236097336, + "step": 1990 + }, + { + "epoch": 0.13, + "learning_rate": 4.985548546605469e-06, + "logits/chosen": -2.651104688644409, + "logits/rejected": -2.755127191543579, + "logps/chosen": -310.54302978515625, + "logps/rejected": -329.9986877441406, + "loss": 0.6424, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.09717954695224762, + "rewards/margins": 0.07522216439247131, + "rewards/rejected": 0.02195737510919571, + "step": 2000 + }, + { + "epoch": 0.13, + "learning_rate": 4.984929006286838e-06, + "logits/chosen": -2.8356099128723145, + "logits/rejected": -2.765662670135498, + "logps/chosen": -339.5566101074219, + "logps/rejected": -300.8033142089844, + "loss": 0.6369, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.07670809328556061, + "rewards/margins": 0.14976195991039276, + "rewards/rejected": -0.07305385172367096, + "step": 2010 + }, + { + "epoch": 0.13, + "learning_rate": 4.984296503361256e-06, + "logits/chosen": -2.683217763900757, + "logits/rejected": -2.7624807357788086, + "logps/chosen": -248.154541015625, + "logps/rejected": -254.16848754882812, + "loss": 0.5923, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.08094163239002228, + "rewards/margins": 0.11836649477481842, + "rewards/rejected": -0.037424858659505844, + "step": 2020 + }, + { + "epoch": 0.13, + "learning_rate": 4.9836510411281645e-06, + "logits/chosen": -2.602567195892334, + "logits/rejected": -2.7688286304473877, + "logps/chosen": -282.9025573730469, + "logps/rejected": -257.9793395996094, + "loss": 0.5823, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.016858983784914017, + "rewards/margins": 0.3600960373878479, + "rewards/rejected": -0.3432370126247406, + "step": 2030 + }, + { + "epoch": 0.13, + "learning_rate": 4.982992622954613e-06, + "logits/chosen": -2.650596857070923, + "logits/rejected": -2.770486354827881, + "logps/chosen": -414.5718688964844, + "logps/rejected": -341.5304260253906, + "loss": 0.6332, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.22947485744953156, + "rewards/margins": 0.11173935234546661, + "rewards/rejected": -0.34121423959732056, + "step": 2040 + }, + { + "epoch": 0.13, + "learning_rate": 4.9823212522752325e-06, + "logits/chosen": -2.7033188343048096, + "logits/rejected": -2.6886463165283203, + "logps/chosen": -316.0194396972656, + "logps/rejected": -309.9945373535156, + "loss": 0.6379, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08541987091302872, + "rewards/margins": 0.1773550808429718, + "rewards/rejected": -0.26277491450309753, + "step": 2050 + }, + { + "epoch": 0.13, + "learning_rate": 4.981636932592222e-06, + "logits/chosen": -2.928769826889038, + "logits/rejected": -2.890620470046997, + "logps/chosen": -458.3501892089844, + "logps/rejected": -394.8435363769531, + "loss": 0.607, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.03149319440126419, + "rewards/margins": 0.18963074684143066, + "rewards/rejected": -0.15813757479190826, + "step": 2060 + }, + { + "epoch": 0.14, + "learning_rate": 4.980939667475328e-06, + "logits/chosen": -2.7920684814453125, + "logits/rejected": -2.76005220413208, + "logps/chosen": -267.3896484375, + "logps/rejected": -304.2720947265625, + "loss": 0.632, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.17286111414432526, + "rewards/margins": 0.17540349066257477, + "rewards/rejected": -0.3482646048069, + "step": 2070 + }, + { + "epoch": 0.14, + "learning_rate": 4.980229460561826e-06, + "logits/chosen": -2.9371695518493652, + "logits/rejected": -2.8353123664855957, + "logps/chosen": -366.78912353515625, + "logps/rejected": -339.2974548339844, + "loss": 0.6588, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.09282257407903671, + "rewards/margins": 0.1689838320016861, + "rewards/rejected": -0.2618064284324646, + "step": 2080 + }, + { + "epoch": 0.14, + "learning_rate": 4.979506315556503e-06, + "logits/chosen": -2.775759220123291, + "logits/rejected": -2.7336509227752686, + "logps/chosen": -212.32644653320312, + "logps/rejected": -204.06906127929688, + "loss": 0.5562, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.02957214042544365, + "rewards/margins": 0.33536648750305176, + "rewards/rejected": -0.3649386167526245, + "step": 2090 + }, + { + "epoch": 0.14, + "learning_rate": 4.9787702362316395e-06, + "logits/chosen": -2.7911858558654785, + "logits/rejected": -2.674790859222412, + "logps/chosen": -354.57379150390625, + "logps/rejected": -257.1429138183594, + "loss": 0.6317, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.06640686839818954, + "rewards/margins": 0.14485633373260498, + "rewards/rejected": -0.21126320958137512, + "step": 2100 + }, + { + "epoch": 0.14, + "learning_rate": 4.9780212264269835e-06, + "logits/chosen": -2.5965216159820557, + "logits/rejected": -2.6920247077941895, + "logps/chosen": -236.93283081054688, + "logps/rejected": -212.55197143554688, + "loss": 0.6517, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06051822379231453, + "rewards/margins": 0.05328022688627243, + "rewards/rejected": -0.11379845440387726, + "step": 2110 + }, + { + "epoch": 0.14, + "learning_rate": 4.977259290049739e-06, + "logits/chosen": -2.8331427574157715, + "logits/rejected": -2.8146824836730957, + "logps/chosen": -298.81573486328125, + "logps/rejected": -366.21905517578125, + "loss": 0.6427, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.03964535892009735, + "rewards/margins": 0.1442907154560089, + "rewards/rejected": -0.10464537143707275, + "step": 2120 + }, + { + "epoch": 0.14, + "learning_rate": 4.976484431074538e-06, + "logits/chosen": -2.7439489364624023, + "logits/rejected": -2.6999521255493164, + "logps/chosen": -335.0853576660156, + "logps/rejected": -311.0948486328125, + "loss": 0.6055, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.10866321623325348, + "rewards/margins": 0.17203572392463684, + "rewards/rejected": -0.2806989252567291, + "step": 2130 + }, + { + "epoch": 0.14, + "learning_rate": 4.975696653543425e-06, + "logits/chosen": -2.496903419494629, + "logits/rejected": -2.6326394081115723, + "logps/chosen": -227.9203643798828, + "logps/rejected": -305.10430908203125, + "loss": 0.627, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.220001220703125, + "rewards/margins": 0.21087773144245148, + "rewards/rejected": -0.4308788776397705, + "step": 2140 + }, + { + "epoch": 0.14, + "learning_rate": 4.974895961565835e-06, + "logits/chosen": -3.0033822059631348, + "logits/rejected": -2.862435817718506, + "logps/chosen": -364.16748046875, + "logps/rejected": -307.7515869140625, + "loss": 0.6024, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.032728563994169235, + "rewards/margins": 0.21506018936634064, + "rewards/rejected": -0.24778875708580017, + "step": 2150 + }, + { + "epoch": 0.14, + "learning_rate": 4.974082359318566e-06, + "logits/chosen": -2.738600969314575, + "logits/rejected": -2.770749092102051, + "logps/chosen": -259.91302490234375, + "logps/rejected": -257.9204406738281, + "loss": 0.6264, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.08208741247653961, + "rewards/margins": 0.2051773965358734, + "rewards/rejected": -0.12308996915817261, + "step": 2160 + }, + { + "epoch": 0.14, + "learning_rate": 4.973255851045769e-06, + "logits/chosen": -2.687934398651123, + "logits/rejected": -2.558964252471924, + "logps/chosen": -245.05209350585938, + "logps/rejected": -200.83056640625, + "loss": 0.6101, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.13080234825611115, + "rewards/margins": 0.2099367082118988, + "rewards/rejected": -0.07913436740636826, + "step": 2170 + }, + { + "epoch": 0.14, + "learning_rate": 4.972416441058915e-06, + "logits/chosen": -2.744450330734253, + "logits/rejected": -2.5733814239501953, + "logps/chosen": -321.6153869628906, + "logps/rejected": -291.11920166015625, + "loss": 0.6304, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.025879329070448875, + "rewards/margins": 0.2006806582212448, + "rewards/rejected": -0.1748013198375702, + "step": 2180 + }, + { + "epoch": 0.14, + "learning_rate": 4.971564133736777e-06, + "logits/chosen": -2.665870189666748, + "logits/rejected": -2.497673511505127, + "logps/chosen": -248.264404296875, + "logps/rejected": -253.1470184326172, + "loss": 0.6669, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0837312713265419, + "rewards/margins": 0.012899696826934814, + "rewards/rejected": -0.09663096815347672, + "step": 2190 + }, + { + "epoch": 0.14, + "learning_rate": 4.970698933525409e-06, + "logits/chosen": -2.729921340942383, + "logits/rejected": -2.6478610038757324, + "logps/chosen": -236.1103057861328, + "logps/rejected": -298.22674560546875, + "loss": 0.6371, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.07094663381576538, + "rewards/margins": 0.053258299827575684, + "rewards/rejected": 0.017688335850834846, + "step": 2200 + }, + { + "epoch": 0.14, + "learning_rate": 4.969820844938118e-06, + "logits/chosen": -2.7145307064056396, + "logits/rejected": -2.6865108013153076, + "logps/chosen": -235.56124877929688, + "logps/rejected": -294.7950439453125, + "loss": 0.6113, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0399596281349659, + "rewards/margins": 0.08694467693567276, + "rewards/rejected": -0.04698503762483597, + "step": 2210 + }, + { + "epoch": 0.15, + "learning_rate": 4.968929872555444e-06, + "logits/chosen": -2.7173850536346436, + "logits/rejected": -2.7645998001098633, + "logps/chosen": -331.38848876953125, + "logps/rejected": -282.1485595703125, + "loss": 0.607, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.04040694981813431, + "rewards/margins": 0.31585150957107544, + "rewards/rejected": -0.27544456720352173, + "step": 2220 + }, + { + "epoch": 0.15, + "learning_rate": 4.968026021025137e-06, + "logits/chosen": -2.807720899581909, + "logits/rejected": -2.8572170734405518, + "logps/chosen": -297.9932556152344, + "logps/rejected": -284.1007385253906, + "loss": 0.6067, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08410535752773285, + "rewards/margins": 0.12690380215644836, + "rewards/rejected": -0.2110091745853424, + "step": 2230 + }, + { + "epoch": 0.15, + "learning_rate": 4.967109295062128e-06, + "logits/chosen": -2.499629497528076, + "logits/rejected": -2.497025728225708, + "logps/chosen": -266.9873046875, + "logps/rejected": -256.3060607910156, + "loss": 0.6606, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.037730611860752106, + "rewards/margins": 0.12185688316822052, + "rewards/rejected": -0.15958748757839203, + "step": 2240 + }, + { + "epoch": 0.15, + "learning_rate": 4.966179699448509e-06, + "logits/chosen": -2.9855830669403076, + "logits/rejected": -2.822563648223877, + "logps/chosen": -273.6982727050781, + "logps/rejected": -236.4923858642578, + "loss": 0.6039, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.031817689538002014, + "rewards/margins": 0.2563541531562805, + "rewards/rejected": -0.22453641891479492, + "step": 2250 + }, + { + "epoch": 0.15, + "learning_rate": 4.965237239033506e-06, + "logits/chosen": -2.5667567253112793, + "logits/rejected": -2.6416573524475098, + "logps/chosen": -274.92852783203125, + "logps/rejected": -335.0534362792969, + "loss": 0.5869, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.012694315984845161, + "rewards/margins": 0.16257108747959137, + "rewards/rejected": -0.14987674355506897, + "step": 2260 + }, + { + "epoch": 0.15, + "learning_rate": 4.964281918733453e-06, + "logits/chosen": -2.5804924964904785, + "logits/rejected": -2.684441566467285, + "logps/chosen": -294.51617431640625, + "logps/rejected": -315.2884521484375, + "loss": 0.5968, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.06486682593822479, + "rewards/margins": 0.2623649835586548, + "rewards/rejected": -0.1974981129169464, + "step": 2270 + }, + { + "epoch": 0.15, + "learning_rate": 4.9633137435317715e-06, + "logits/chosen": -2.8421781063079834, + "logits/rejected": -2.8614776134490967, + "logps/chosen": -352.1127014160156, + "logps/rejected": -288.5516357421875, + "loss": 0.5829, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.009625107049942017, + "rewards/margins": 0.184491366147995, + "rewards/rejected": -0.17486627399921417, + "step": 2280 + }, + { + "epoch": 0.15, + "learning_rate": 4.9623327184789355e-06, + "logits/chosen": -2.869119644165039, + "logits/rejected": -2.770078182220459, + "logps/chosen": -267.7673645019531, + "logps/rejected": -217.97592163085938, + "loss": 0.5909, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.00918522197753191, + "rewards/margins": 0.219018816947937, + "rewards/rejected": -0.22820401191711426, + "step": 2290 + }, + { + "epoch": 0.15, + "learning_rate": 4.9613388486924525e-06, + "logits/chosen": -2.757904291152954, + "logits/rejected": -2.7098991870880127, + "logps/chosen": -266.7463684082031, + "logps/rejected": -258.8957824707031, + "loss": 0.6369, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.17312125861644745, + "rewards/margins": 0.23959565162658691, + "rewards/rejected": -0.4127168655395508, + "step": 2300 + }, + { + "epoch": 0.15, + "learning_rate": 4.960332139356834e-06, + "logits/chosen": -2.615774631500244, + "logits/rejected": -2.499512195587158, + "logps/chosen": -266.8538818359375, + "logps/rejected": -282.8984680175781, + "loss": 0.6625, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14361433684825897, + "rewards/margins": 0.18731355667114258, + "rewards/rejected": -0.33092790842056274, + "step": 2310 + }, + { + "epoch": 0.15, + "learning_rate": 4.95931259572357e-06, + "logits/chosen": -2.638256549835205, + "logits/rejected": -2.6577975749969482, + "logps/chosen": -284.3966979980469, + "logps/rejected": -324.7958984375, + "loss": 0.5841, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0298948734998703, + "rewards/margins": 0.21776501834392548, + "rewards/rejected": -0.18787017464637756, + "step": 2320 + }, + { + "epoch": 0.15, + "learning_rate": 4.9582802231111e-06, + "logits/chosen": -2.8069567680358887, + "logits/rejected": -2.7043187618255615, + "logps/chosen": -285.8518371582031, + "logps/rejected": -340.26171875, + "loss": 0.6535, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.023144256323575974, + "rewards/margins": 0.12189881503582001, + "rewards/rejected": -0.14504310488700867, + "step": 2330 + }, + { + "epoch": 0.15, + "learning_rate": 4.957235026904782e-06, + "logits/chosen": -2.7947421073913574, + "logits/rejected": -2.693174362182617, + "logps/chosen": -343.4107971191406, + "logps/rejected": -279.5213623046875, + "loss": 0.5414, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07865247875452042, + "rewards/margins": 0.2984381318092346, + "rewards/rejected": -0.37709060311317444, + "step": 2340 + }, + { + "epoch": 0.15, + "learning_rate": 4.956177012556875e-06, + "logits/chosen": -2.6740355491638184, + "logits/rejected": -2.555637836456299, + "logps/chosen": -375.19744873046875, + "logps/rejected": -320.8430480957031, + "loss": 0.6106, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.24865643680095673, + "rewards/margins": 0.20838837325572968, + "rewards/rejected": -0.45704489946365356, + "step": 2350 + }, + { + "epoch": 0.15, + "learning_rate": 4.9551061855864976e-06, + "logits/chosen": -2.813265800476074, + "logits/rejected": -2.802703619003296, + "logps/chosen": -361.70770263671875, + "logps/rejected": -349.2850341796875, + "loss": 0.6683, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.2633809745311737, + "rewards/margins": 0.19430817663669586, + "rewards/rejected": -0.4576891362667084, + "step": 2360 + }, + { + "epoch": 0.16, + "learning_rate": 4.95402255157961e-06, + "logits/chosen": -2.565751552581787, + "logits/rejected": -2.5812134742736816, + "logps/chosen": -277.73876953125, + "logps/rejected": -388.343505859375, + "loss": 0.6804, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.29647955298423767, + "rewards/margins": 0.07615470886230469, + "rewards/rejected": -0.37263426184654236, + "step": 2370 + }, + { + "epoch": 0.16, + "learning_rate": 4.952926116188977e-06, + "logits/chosen": -2.7091922760009766, + "logits/rejected": -2.7243802547454834, + "logps/chosen": -314.4618835449219, + "logps/rejected": -292.33929443359375, + "loss": 0.6367, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07366470992565155, + "rewards/margins": 0.19427324831485748, + "rewards/rejected": -0.26793795824050903, + "step": 2380 + }, + { + "epoch": 0.16, + "learning_rate": 4.951816885134143e-06, + "logits/chosen": -2.9690959453582764, + "logits/rejected": -2.8864049911499023, + "logps/chosen": -389.62689208984375, + "logps/rejected": -310.96044921875, + "loss": 0.6694, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.13698528707027435, + "rewards/margins": 0.010475650429725647, + "rewards/rejected": -0.1474609375, + "step": 2390 + }, + { + "epoch": 0.16, + "learning_rate": 4.950694864201399e-06, + "logits/chosen": -2.6521568298339844, + "logits/rejected": -2.540308952331543, + "logps/chosen": -199.8266143798828, + "logps/rejected": -339.84564208984375, + "loss": 0.686, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.01984802633523941, + "rewards/margins": 0.10776877403259277, + "rewards/rejected": -0.1276167929172516, + "step": 2400 + }, + { + "epoch": 0.16, + "learning_rate": 4.9495600592437575e-06, + "logits/chosen": -2.8433144092559814, + "logits/rejected": -2.628474235534668, + "logps/chosen": -336.7990417480469, + "logps/rejected": -364.16583251953125, + "loss": 0.6337, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.10713674873113632, + "rewards/margins": 0.17330363392829895, + "rewards/rejected": -0.06616689264774323, + "step": 2410 + }, + { + "epoch": 0.16, + "learning_rate": 4.948412476180917e-06, + "logits/chosen": -2.846092462539673, + "logits/rejected": -2.807753324508667, + "logps/chosen": -470.0384826660156, + "logps/rejected": -318.007568359375, + "loss": 0.563, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.1342833936214447, + "rewards/margins": 0.33717745542526245, + "rewards/rejected": -0.20289401710033417, + "step": 2420 + }, + { + "epoch": 0.16, + "learning_rate": 4.947252120999232e-06, + "logits/chosen": -2.7561464309692383, + "logits/rejected": -2.737532138824463, + "logps/chosen": -331.05694580078125, + "logps/rejected": -276.02783203125, + "loss": 0.5775, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10048478841781616, + "rewards/margins": 0.3389813303947449, + "rewards/rejected": -0.43946608901023865, + "step": 2430 + }, + { + "epoch": 0.16, + "learning_rate": 4.946078999751683e-06, + "logits/chosen": -2.927323341369629, + "logits/rejected": -2.8564233779907227, + "logps/chosen": -424.54608154296875, + "logps/rejected": -322.99810791015625, + "loss": 0.5726, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0023977644741535187, + "rewards/margins": 0.43597611784935, + "rewards/rejected": -0.4383738934993744, + "step": 2440 + }, + { + "epoch": 0.16, + "learning_rate": 4.944893118557847e-06, + "logits/chosen": -2.6823534965515137, + "logits/rejected": -2.632371425628662, + "logps/chosen": -273.3263854980469, + "logps/rejected": -285.999755859375, + "loss": 0.6475, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.23725327849388123, + "rewards/margins": 0.17456074059009552, + "rewards/rejected": -0.41181403398513794, + "step": 2450 + }, + { + "epoch": 0.16, + "learning_rate": 4.943694483603861e-06, + "logits/chosen": -2.4171245098114014, + "logits/rejected": -2.4290964603424072, + "logps/chosen": -256.521240234375, + "logps/rejected": -283.49176025390625, + "loss": 0.5532, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11423642933368683, + "rewards/margins": 0.40349873900413513, + "rewards/rejected": -0.5177351236343384, + "step": 2460 + }, + { + "epoch": 0.16, + "learning_rate": 4.9424831011423914e-06, + "logits/chosen": -2.8101084232330322, + "logits/rejected": -2.8960862159729004, + "logps/chosen": -365.55596923828125, + "logps/rejected": -305.8233642578125, + "loss": 0.61, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1972278505563736, + "rewards/margins": 0.22212381660938263, + "rewards/rejected": -0.419351726770401, + "step": 2470 + }, + { + "epoch": 0.16, + "learning_rate": 4.9412589774926015e-06, + "logits/chosen": -2.8390262126922607, + "logits/rejected": -2.7306559085845947, + "logps/chosen": -395.5912170410156, + "logps/rejected": -368.03997802734375, + "loss": 0.6008, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.22051730751991272, + "rewards/margins": 0.2272726595401764, + "rewards/rejected": -0.44778990745544434, + "step": 2480 + }, + { + "epoch": 0.16, + "learning_rate": 4.940022119040121e-06, + "logits/chosen": -2.8345274925231934, + "logits/rejected": -2.4840381145477295, + "logps/chosen": -249.48184204101562, + "logps/rejected": -277.16351318359375, + "loss": 0.611, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.29384705424308777, + "rewards/margins": 0.2915216088294983, + "rewards/rejected": -0.5853686332702637, + "step": 2490 + }, + { + "epoch": 0.16, + "learning_rate": 4.93877253223701e-06, + "logits/chosen": -2.30358624458313, + "logits/rejected": -2.2991833686828613, + "logps/chosen": -239.4513702392578, + "logps/rejected": -340.1556396484375, + "loss": 0.6116, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.2424122840166092, + "rewards/margins": 0.2587777376174927, + "rewards/rejected": -0.5011900067329407, + "step": 2500 + }, + { + "epoch": 0.16, + "learning_rate": 4.937510223601725e-06, + "logits/chosen": -2.683197498321533, + "logits/rejected": -2.6224794387817383, + "logps/chosen": -282.93170166015625, + "logps/rejected": -275.8567810058594, + "loss": 0.5768, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.19845330715179443, + "rewards/margins": 0.25473955273628235, + "rewards/rejected": -0.453192800283432, + "step": 2510 + }, + { + "epoch": 0.16, + "learning_rate": 4.936235199719085e-06, + "logits/chosen": -2.7041985988616943, + "logits/rejected": -2.5971970558166504, + "logps/chosen": -331.7781677246094, + "logps/rejected": -321.8423767089844, + "loss": 0.5466, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11719875037670135, + "rewards/margins": 0.36858198046684265, + "rewards/rejected": -0.4857807159423828, + "step": 2520 + }, + { + "epoch": 0.17, + "learning_rate": 4.93494746724024e-06, + "logits/chosen": -2.744253635406494, + "logits/rejected": -2.6150527000427246, + "logps/chosen": -371.90411376953125, + "logps/rejected": -352.849365234375, + "loss": 0.5948, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16774104535579681, + "rewards/margins": 0.2554989755153656, + "rewards/rejected": -0.4232400357723236, + "step": 2530 + }, + { + "epoch": 0.17, + "learning_rate": 4.933647032882635e-06, + "logits/chosen": -2.717115640640259, + "logits/rejected": -2.6257479190826416, + "logps/chosen": -252.213134765625, + "logps/rejected": -235.54183959960938, + "loss": 0.617, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.20600995421409607, + "rewards/margins": 0.17118848860263824, + "rewards/rejected": -0.3771984875202179, + "step": 2540 + }, + { + "epoch": 0.17, + "learning_rate": 4.932333903429969e-06, + "logits/chosen": -2.7186102867126465, + "logits/rejected": -2.658327579498291, + "logps/chosen": -256.15576171875, + "logps/rejected": -290.25372314453125, + "loss": 0.6289, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1984180510044098, + "rewards/margins": 0.1289817988872528, + "rewards/rejected": -0.327399879693985, + "step": 2550 + }, + { + "epoch": 0.17, + "learning_rate": 4.931008085732172e-06, + "logits/chosen": -2.6936402320861816, + "logits/rejected": -2.7055277824401855, + "logps/chosen": -248.66830444335938, + "logps/rejected": -329.27239990234375, + "loss": 0.6649, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.35135889053344727, + "rewards/margins": 0.08967886865139008, + "rewards/rejected": -0.44103774428367615, + "step": 2560 + }, + { + "epoch": 0.17, + "learning_rate": 4.9296695867053565e-06, + "logits/chosen": -2.7393367290496826, + "logits/rejected": -2.673619508743286, + "logps/chosen": -330.6111145019531, + "logps/rejected": -382.0716857910156, + "loss": 0.5591, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.056804366409778595, + "rewards/margins": 0.4075342118740082, + "rewards/rejected": -0.46433860063552856, + "step": 2570 + }, + { + "epoch": 0.17, + "learning_rate": 4.928318413331791e-06, + "logits/chosen": -2.8475029468536377, + "logits/rejected": -2.6908457279205322, + "logps/chosen": -213.46224975585938, + "logps/rejected": -211.10586547851562, + "loss": 0.6087, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0277628805488348, + "rewards/margins": 0.3201954960823059, + "rewards/rejected": -0.34795838594436646, + "step": 2580 + }, + { + "epoch": 0.17, + "learning_rate": 4.926954572659855e-06, + "logits/chosen": -2.6036033630371094, + "logits/rejected": -2.5377697944641113, + "logps/chosen": -282.39874267578125, + "logps/rejected": -233.1647186279297, + "loss": 0.6071, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.04948263615369797, + "rewards/margins": 0.3064866364002228, + "rewards/rejected": -0.35596925020217896, + "step": 2590 + }, + { + "epoch": 0.17, + "learning_rate": 4.925578071804013e-06, + "logits/chosen": -2.7369303703308105, + "logits/rejected": -2.7130982875823975, + "logps/chosen": -270.61651611328125, + "logps/rejected": -413.401611328125, + "loss": 0.5794, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.21495899558067322, + "rewards/margins": 0.24355199933052063, + "rewards/rejected": -0.45851102471351624, + "step": 2600 + }, + { + "epoch": 0.17, + "learning_rate": 4.924188917944763e-06, + "logits/chosen": -2.52673077583313, + "logits/rejected": -2.575197219848633, + "logps/chosen": -222.6197509765625, + "logps/rejected": -315.6336975097656, + "loss": 0.6478, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23872074484825134, + "rewards/margins": 0.1525263786315918, + "rewards/rejected": -0.39124712347984314, + "step": 2610 + }, + { + "epoch": 0.17, + "learning_rate": 4.922787118328617e-06, + "logits/chosen": -2.5672783851623535, + "logits/rejected": -2.7630858421325684, + "logps/chosen": -235.62557983398438, + "logps/rejected": -324.2035827636719, + "loss": 0.5879, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14392860233783722, + "rewards/margins": 0.2915584444999695, + "rewards/rejected": -0.4354870915412903, + "step": 2620 + }, + { + "epoch": 0.17, + "learning_rate": 4.921372680268045e-06, + "logits/chosen": -2.8274261951446533, + "logits/rejected": -2.6772689819335938, + "logps/chosen": -379.2863464355469, + "logps/rejected": -330.87335205078125, + "loss": 0.5386, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.23995617032051086, + "rewards/margins": 0.45680126547813416, + "rewards/rejected": -0.696757435798645, + "step": 2630 + }, + { + "epoch": 0.17, + "learning_rate": 4.919945611141451e-06, + "logits/chosen": -2.4838151931762695, + "logits/rejected": -2.41625714302063, + "logps/chosen": -326.9600830078125, + "logps/rejected": -331.88836669921875, + "loss": 0.6019, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.4891494810581207, + "rewards/margins": 0.11712410300970078, + "rewards/rejected": -0.6062735319137573, + "step": 2640 + }, + { + "epoch": 0.17, + "learning_rate": 4.918505918393125e-06, + "logits/chosen": -2.6945321559906006, + "logits/rejected": -2.4596617221832275, + "logps/chosen": -285.453857421875, + "logps/rejected": -346.5177307128906, + "loss": 0.5209, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18411032855510712, + "rewards/margins": 0.47116097807884216, + "rewards/rejected": -0.6552713513374329, + "step": 2650 + }, + { + "epoch": 0.17, + "learning_rate": 4.91705360953321e-06, + "logits/chosen": -2.7415881156921387, + "logits/rejected": -2.708122491836548, + "logps/chosen": -288.8190612792969, + "logps/rejected": -325.80731201171875, + "loss": 0.6134, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2988317310810089, + "rewards/margins": 0.29410165548324585, + "rewards/rejected": -0.5929334163665771, + "step": 2660 + }, + { + "epoch": 0.17, + "learning_rate": 4.9155886921376615e-06, + "logits/chosen": -2.342031955718994, + "logits/rejected": -2.5404393672943115, + "logps/chosen": -275.90142822265625, + "logps/rejected": -270.68988037109375, + "loss": 0.565, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.34874653816223145, + "rewards/margins": 0.1326829195022583, + "rewards/rejected": -0.48142942786216736, + "step": 2670 + }, + { + "epoch": 0.18, + "learning_rate": 4.914111173848205e-06, + "logits/chosen": -2.7839322090148926, + "logits/rejected": -2.637239694595337, + "logps/chosen": -494.39410400390625, + "logps/rejected": -440.8687438964844, + "loss": 0.5636, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.36821606755256653, + "rewards/margins": 0.5051993131637573, + "rewards/rejected": -0.873415470123291, + "step": 2680 + }, + { + "epoch": 0.18, + "learning_rate": 4.9126210623723e-06, + "logits/chosen": -2.399345874786377, + "logits/rejected": -2.5851237773895264, + "logps/chosen": -318.40155029296875, + "logps/rejected": -333.19683837890625, + "loss": 0.597, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.3792705535888672, + "rewards/margins": 0.15724647045135498, + "rewards/rejected": -0.5365170240402222, + "step": 2690 + }, + { + "epoch": 0.18, + "learning_rate": 4.911118365483098e-06, + "logits/chosen": -2.8516323566436768, + "logits/rejected": -2.586472749710083, + "logps/chosen": -379.7595520019531, + "logps/rejected": -350.9483947753906, + "loss": 0.5593, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3597323000431061, + "rewards/margins": 0.36504340171813965, + "rewards/rejected": -0.7247756719589233, + "step": 2700 + }, + { + "epoch": 0.18, + "learning_rate": 4.909603091019403e-06, + "logits/chosen": -2.684126853942871, + "logits/rejected": -2.6924331188201904, + "logps/chosen": -364.5967102050781, + "logps/rejected": -351.02935791015625, + "loss": 0.7345, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.8244889974594116, + "rewards/margins": -0.039155781269073486, + "rewards/rejected": -0.7853331565856934, + "step": 2710 + }, + { + "epoch": 0.18, + "learning_rate": 4.908075246885626e-06, + "logits/chosen": -2.609663486480713, + "logits/rejected": -2.403352737426758, + "logps/chosen": -455.54742431640625, + "logps/rejected": -342.30303955078125, + "loss": 0.5849, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3363655209541321, + "rewards/margins": 0.449228435754776, + "rewards/rejected": -0.7855939865112305, + "step": 2720 + }, + { + "epoch": 0.18, + "learning_rate": 4.906534841051755e-06, + "logits/chosen": -2.559765577316284, + "logits/rejected": -2.52396559715271, + "logps/chosen": -336.14056396484375, + "logps/rejected": -396.00921630859375, + "loss": 0.5847, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4036168158054352, + "rewards/margins": 0.4683077335357666, + "rewards/rejected": -0.8719245195388794, + "step": 2730 + }, + { + "epoch": 0.18, + "learning_rate": 4.904981881553297e-06, + "logits/chosen": -2.4790966510772705, + "logits/rejected": -2.6218953132629395, + "logps/chosen": -280.62835693359375, + "logps/rejected": -341.94757080078125, + "loss": 0.6085, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.5170210599899292, + "rewards/margins": 0.39688968658447266, + "rewards/rejected": -0.9139108657836914, + "step": 2740 + }, + { + "epoch": 0.18, + "learning_rate": 4.903416376491252e-06, + "logits/chosen": -2.4483070373535156, + "logits/rejected": -2.463671922683716, + "logps/chosen": -291.2413330078125, + "logps/rejected": -357.9991760253906, + "loss": 0.5845, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.6890040636062622, + "rewards/margins": 0.3651917278766632, + "rewards/rejected": -1.054195761680603, + "step": 2750 + }, + { + "epoch": 0.18, + "learning_rate": 4.90183833403206e-06, + "logits/chosen": -2.2340424060821533, + "logits/rejected": -2.195526599884033, + "logps/chosen": -341.2486877441406, + "logps/rejected": -319.59564208984375, + "loss": 0.5644, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7643866539001465, + "rewards/margins": 0.27745020389556885, + "rewards/rejected": -1.0418369770050049, + "step": 2760 + }, + { + "epoch": 0.18, + "learning_rate": 4.900247762407564e-06, + "logits/chosen": -2.1294519901275635, + "logits/rejected": -2.2961814403533936, + "logps/chosen": -290.6664733886719, + "logps/rejected": -384.6078796386719, + "loss": 0.5743, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.493140310049057, + "rewards/margins": 0.5134254693984985, + "rewards/rejected": -1.0065659284591675, + "step": 2770 + }, + { + "epoch": 0.18, + "learning_rate": 4.898644669914965e-06, + "logits/chosen": -2.4749608039855957, + "logits/rejected": -2.49005126953125, + "logps/chosen": -377.3028259277344, + "logps/rejected": -360.3443603515625, + "loss": 0.5785, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.22494623064994812, + "rewards/margins": 0.39548856019973755, + "rewards/rejected": -0.6204348206520081, + "step": 2780 + }, + { + "epoch": 0.18, + "learning_rate": 4.897029064916778e-06, + "logits/chosen": -2.4899182319641113, + "logits/rejected": -2.4811320304870605, + "logps/chosen": -344.0760498046875, + "logps/rejected": -380.71453857421875, + "loss": 0.5613, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.36472102999687195, + "rewards/margins": 0.4021889269351959, + "rewards/rejected": -0.7669100165367126, + "step": 2790 + }, + { + "epoch": 0.18, + "learning_rate": 4.895400955840791e-06, + "logits/chosen": -2.688682794570923, + "logits/rejected": -2.5415966510772705, + "logps/chosen": -395.62225341796875, + "logps/rejected": -340.3146057128906, + "loss": 0.606, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.3848732113838196, + "rewards/margins": 0.2556595504283905, + "rewards/rejected": -0.6405327916145325, + "step": 2800 + }, + { + "epoch": 0.18, + "learning_rate": 4.893760351180018e-06, + "logits/chosen": -2.3920750617980957, + "logits/rejected": -2.2974207401275635, + "logps/chosen": -307.5142822265625, + "logps/rejected": -327.1595458984375, + "loss": 0.6083, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8331905603408813, + "rewards/margins": 0.14832532405853271, + "rewards/rejected": -0.9815157651901245, + "step": 2810 + }, + { + "epoch": 0.18, + "learning_rate": 4.892107259492657e-06, + "logits/chosen": -2.618544101715088, + "logits/rejected": -2.540144443511963, + "logps/chosen": -382.5555114746094, + "logps/rejected": -414.6402282714844, + "loss": 0.6895, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.7530332207679749, + "rewards/margins": 0.08182497322559357, + "rewards/rejected": -0.834858238697052, + "step": 2820 + }, + { + "epoch": 0.19, + "learning_rate": 4.890441689402042e-06, + "logits/chosen": -2.2747488021850586, + "logits/rejected": -2.4530391693115234, + "logps/chosen": -303.0140686035156, + "logps/rejected": -342.5342712402344, + "loss": 0.567, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.6708128452301025, + "rewards/margins": 0.3363160490989685, + "rewards/rejected": -1.0071289539337158, + "step": 2830 + }, + { + "epoch": 0.19, + "learning_rate": 4.888763649596606e-06, + "logits/chosen": -2.5664830207824707, + "logits/rejected": -2.5413601398468018, + "logps/chosen": -317.67974853515625, + "logps/rejected": -373.93231201171875, + "loss": 0.5778, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.509292483329773, + "rewards/margins": 0.43065786361694336, + "rewards/rejected": -0.9399503469467163, + "step": 2840 + }, + { + "epoch": 0.19, + "learning_rate": 4.887073148829824e-06, + "logits/chosen": -2.659855365753174, + "logits/rejected": -2.4314472675323486, + "logps/chosen": -371.5045471191406, + "logps/rejected": -371.8569030761719, + "loss": 0.4925, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.6626033782958984, + "rewards/margins": 0.57807856798172, + "rewards/rejected": -1.2406818866729736, + "step": 2850 + }, + { + "epoch": 0.19, + "learning_rate": 4.885370195920177e-06, + "logits/chosen": -2.265190839767456, + "logits/rejected": -2.2952256202697754, + "logps/chosen": -221.4605255126953, + "logps/rejected": -271.4645690917969, + "loss": 0.5525, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4074770510196686, + "rewards/margins": 0.4455071985721588, + "rewards/rejected": -0.8529843091964722, + "step": 2860 + }, + { + "epoch": 0.19, + "learning_rate": 4.883654799751101e-06, + "logits/chosen": -2.5295357704162598, + "logits/rejected": -2.604036808013916, + "logps/chosen": -251.52352905273438, + "logps/rejected": -317.5025939941406, + "loss": 0.6774, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.5625568628311157, + "rewards/margins": 0.09554485976696014, + "rewards/rejected": -0.6581017374992371, + "step": 2870 + }, + { + "epoch": 0.19, + "learning_rate": 4.8819269692709435e-06, + "logits/chosen": -2.506788969039917, + "logits/rejected": -2.595655679702759, + "logps/chosen": -302.5235290527344, + "logps/rejected": -373.41839599609375, + "loss": 0.5923, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6250518560409546, + "rewards/margins": 0.585913360118866, + "rewards/rejected": -1.2109653949737549, + "step": 2880 + }, + { + "epoch": 0.19, + "learning_rate": 4.880186713492915e-06, + "logits/chosen": -2.668774366378784, + "logits/rejected": -2.2550742626190186, + "logps/chosen": -292.2339172363281, + "logps/rejected": -402.7520446777344, + "loss": 0.5938, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.5656948685646057, + "rewards/margins": 0.446084201335907, + "rewards/rejected": -1.0117790699005127, + "step": 2890 + }, + { + "epoch": 0.19, + "learning_rate": 4.878434041495041e-06, + "logits/chosen": -2.6363844871520996, + "logits/rejected": -2.402409076690674, + "logps/chosen": -326.28057861328125, + "logps/rejected": -340.3365783691406, + "loss": 0.4879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20777079463005066, + "rewards/margins": 0.8683158159255981, + "rewards/rejected": -1.0760865211486816, + "step": 2900 + }, + { + "epoch": 0.19, + "learning_rate": 4.876668962420117e-06, + "logits/chosen": -2.0191640853881836, + "logits/rejected": -2.336470365524292, + "logps/chosen": -216.2163543701172, + "logps/rejected": -426.68682861328125, + "loss": 0.6081, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.577592134475708, + "rewards/margins": 0.34442293643951416, + "rewards/rejected": -0.9220150113105774, + "step": 2910 + }, + { + "epoch": 0.19, + "learning_rate": 4.87489148547566e-06, + "logits/chosen": -2.3271188735961914, + "logits/rejected": -2.41279673576355, + "logps/chosen": -427.1767578125, + "logps/rejected": -423.5763244628906, + "loss": 0.5848, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.738415539264679, + "rewards/margins": 0.5254755020141602, + "rewards/rejected": -1.2638909816741943, + "step": 2920 + }, + { + "epoch": 0.19, + "learning_rate": 4.873101619933862e-06, + "logits/chosen": -2.491079330444336, + "logits/rejected": -2.249969244003296, + "logps/chosen": -496.1459045410156, + "logps/rejected": -414.140869140625, + "loss": 0.5926, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9692713022232056, + "rewards/margins": 0.413583368062973, + "rewards/rejected": -1.382854700088501, + "step": 2930 + }, + { + "epoch": 0.19, + "learning_rate": 4.8712993751315385e-06, + "logits/chosen": -2.3056821823120117, + "logits/rejected": -2.304752826690674, + "logps/chosen": -371.10260009765625, + "logps/rejected": -419.9462890625, + "loss": 0.5782, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.1785022020339966, + "rewards/margins": 0.45382365584373474, + "rewards/rejected": -1.6323257684707642, + "step": 2940 + }, + { + "epoch": 0.19, + "learning_rate": 4.869484760470079e-06, + "logits/chosen": -2.5141701698303223, + "logits/rejected": -2.5063791275024414, + "logps/chosen": -392.4580993652344, + "logps/rejected": -448.1770935058594, + "loss": 0.6687, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1139910221099854, + "rewards/margins": 0.1485811471939087, + "rewards/rejected": -1.2625722885131836, + "step": 2950 + }, + { + "epoch": 0.19, + "learning_rate": 4.867657785415404e-06, + "logits/chosen": -2.4104926586151123, + "logits/rejected": -2.4443955421447754, + "logps/chosen": -403.44586181640625, + "logps/rejected": -444.3780212402344, + "loss": 0.5307, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8458258509635925, + "rewards/margins": 0.4578852653503418, + "rewards/rejected": -1.303711175918579, + "step": 2960 + }, + { + "epoch": 0.19, + "learning_rate": 4.865818459497911e-06, + "logits/chosen": -2.6232340335845947, + "logits/rejected": -2.461763858795166, + "logps/chosen": -485.65423583984375, + "logps/rejected": -356.5897521972656, + "loss": 0.6082, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.6778312921524048, + "rewards/margins": 0.25565898418426514, + "rewards/rejected": -0.9334903955459595, + "step": 2970 + }, + { + "epoch": 0.19, + "learning_rate": 4.863966792312423e-06, + "logits/chosen": -2.198486804962158, + "logits/rejected": -2.3604507446289062, + "logps/chosen": -354.3186340332031, + "logps/rejected": -375.06231689453125, + "loss": 0.6062, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.8335941433906555, + "rewards/margins": 0.12950456142425537, + "rewards/rejected": -0.9630987048149109, + "step": 2980 + }, + { + "epoch": 0.2, + "learning_rate": 4.862102793518145e-06, + "logits/chosen": -2.465055227279663, + "logits/rejected": -2.45820951461792, + "logps/chosen": -282.09307861328125, + "logps/rejected": -328.89666748046875, + "loss": 0.5157, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.3737873435020447, + "rewards/margins": 0.7718119621276855, + "rewards/rejected": -1.1455992460250854, + "step": 2990 + }, + { + "epoch": 0.2, + "learning_rate": 4.8602264728386075e-06, + "logits/chosen": -2.371791124343872, + "logits/rejected": -2.4465041160583496, + "logps/chosen": -405.9463806152344, + "logps/rejected": -443.180908203125, + "loss": 0.5795, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.43187254667282104, + "rewards/margins": 0.49086993932724, + "rewards/rejected": -0.922742486000061, + "step": 3000 + }, + { + "epoch": 0.2, + "eval_logits/chosen": -2.4471633434295654, + "eval_logits/rejected": -2.368870735168457, + "eval_logps/chosen": -373.0481872558594, + "eval_logps/rejected": -392.3198547363281, + "eval_loss": 0.5888271331787109, + "eval_rewards/accuracies": 0.6830000281333923, + "eval_rewards/chosen": -0.7760262489318848, + "eval_rewards/margins": 0.3931134045124054, + "eval_rewards/rejected": -1.1691396236419678, + "eval_runtime": 465.2562, + "eval_samples_per_second": 4.299, + "eval_steps_per_second": 2.149, + "step": 3000 + }, + { + "epoch": 0.2, + "learning_rate": 4.858337840061616e-06, + "logits/chosen": -2.5099802017211914, + "logits/rejected": -2.4437527656555176, + "logps/chosen": -273.11676025390625, + "logps/rejected": -373.9442443847656, + "loss": 0.5426, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9518076181411743, + "rewards/margins": 0.46014493703842163, + "rewards/rejected": -1.4119526147842407, + "step": 3010 + }, + { + "epoch": 0.2, + "learning_rate": 4.856436905039208e-06, + "logits/chosen": -2.579927921295166, + "logits/rejected": -2.2081363201141357, + "logps/chosen": -420.5865173339844, + "logps/rejected": -390.60870361328125, + "loss": 0.5135, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6775982975959778, + "rewards/margins": 0.4973832070827484, + "rewards/rejected": -1.1749814748764038, + "step": 3020 + }, + { + "epoch": 0.2, + "learning_rate": 4.854523677687588e-06, + "logits/chosen": -2.3134303092956543, + "logits/rejected": -2.064049243927002, + "logps/chosen": -280.912353515625, + "logps/rejected": -321.2112731933594, + "loss": 0.5114, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.49111509323120117, + "rewards/margins": 0.5566691160202026, + "rewards/rejected": -1.0477842092514038, + "step": 3030 + }, + { + "epoch": 0.2, + "learning_rate": 4.85259816798709e-06, + "logits/chosen": -2.618589401245117, + "logits/rejected": -2.6263256072998047, + "logps/chosen": -331.26641845703125, + "logps/rejected": -321.235595703125, + "loss": 0.6071, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.570954442024231, + "rewards/margins": 0.10999935865402222, + "rewards/rejected": -0.6809538006782532, + "step": 3040 + }, + { + "epoch": 0.2, + "learning_rate": 4.850660385982114e-06, + "logits/chosen": -2.4453115463256836, + "logits/rejected": -2.613149881362915, + "logps/chosen": -371.54034423828125, + "logps/rejected": -444.22314453125, + "loss": 0.6007, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.5611933469772339, + "rewards/margins": 0.351379930973053, + "rewards/rejected": -0.9125733375549316, + "step": 3050 + }, + { + "epoch": 0.2, + "learning_rate": 4.848710341781081e-06, + "logits/chosen": -2.22904109954834, + "logits/rejected": -2.3576712608337402, + "logps/chosen": -308.20159912109375, + "logps/rejected": -356.36212158203125, + "loss": 0.6788, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.49177709221839905, + "rewards/margins": 0.14831292629241943, + "rewards/rejected": -0.6400899887084961, + "step": 3060 + }, + { + "epoch": 0.2, + "learning_rate": 4.846748045556377e-06, + "logits/chosen": -2.3152804374694824, + "logits/rejected": -2.2141315937042236, + "logps/chosen": -386.8840026855469, + "logps/rejected": -448.82989501953125, + "loss": 0.5557, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.44822603464126587, + "rewards/margins": 0.6427403092384338, + "rewards/rejected": -1.0909662246704102, + "step": 3070 + }, + { + "epoch": 0.2, + "learning_rate": 4.8447735075442995e-06, + "logits/chosen": -2.2687716484069824, + "logits/rejected": -2.4406607151031494, + "logps/chosen": -229.04373168945312, + "logps/rejected": -280.9591979980469, + "loss": 0.6253, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.41208410263061523, + "rewards/margins": 0.21082070469856262, + "rewards/rejected": -0.6229047775268555, + "step": 3080 + }, + { + "epoch": 0.2, + "learning_rate": 4.8427867380450075e-06, + "logits/chosen": -2.3034188747406006, + "logits/rejected": -2.235747814178467, + "logps/chosen": -299.91656494140625, + "logps/rejected": -340.36090087890625, + "loss": 0.5385, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.5435574054718018, + "rewards/margins": 0.48364514112472534, + "rewards/rejected": -1.0272024869918823, + "step": 3090 + }, + { + "epoch": 0.2, + "learning_rate": 4.840787747422462e-06, + "logits/chosen": -2.0936059951782227, + "logits/rejected": -2.0867063999176025, + "logps/chosen": -317.1687316894531, + "logps/rejected": -335.8619079589844, + "loss": 0.5503, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.3254963755607605, + "rewards/margins": 0.6754858493804932, + "rewards/rejected": -1.0009822845458984, + "step": 3100 + }, + { + "epoch": 0.2, + "learning_rate": 4.838776546104378e-06, + "logits/chosen": -2.322481155395508, + "logits/rejected": -2.3373770713806152, + "logps/chosen": -390.653076171875, + "logps/rejected": -397.94122314453125, + "loss": 0.5695, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.6159372925758362, + "rewards/margins": 0.4556799530982971, + "rewards/rejected": -1.0716171264648438, + "step": 3110 + }, + { + "epoch": 0.2, + "learning_rate": 4.836753144582168e-06, + "logits/chosen": -2.4194235801696777, + "logits/rejected": -2.2814254760742188, + "logps/chosen": -395.2923278808594, + "logps/rejected": -362.1666259765625, + "loss": 0.477, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.32465457916259766, + "rewards/margins": 0.8579540252685547, + "rewards/rejected": -1.1826084852218628, + "step": 3120 + }, + { + "epoch": 0.2, + "learning_rate": 4.834717553410884e-06, + "logits/chosen": -2.35410475730896, + "logits/rejected": -2.2035439014434814, + "logps/chosen": -339.8991394042969, + "logps/rejected": -425.37158203125, + "loss": 0.678, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.9469770193099976, + "rewards/margins": 0.16363593935966492, + "rewards/rejected": -1.1106128692626953, + "step": 3130 + }, + { + "epoch": 0.21, + "learning_rate": 4.832669783209167e-06, + "logits/chosen": -2.2493810653686523, + "logits/rejected": -2.4438600540161133, + "logps/chosen": -356.7527770996094, + "logps/rejected": -357.9443054199219, + "loss": 0.5837, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5280917882919312, + "rewards/margins": 0.3985002934932709, + "rewards/rejected": -0.9265921711921692, + "step": 3140 + }, + { + "epoch": 0.21, + "learning_rate": 4.8306098446591895e-06, + "logits/chosen": -2.6090426445007324, + "logits/rejected": -2.6477925777435303, + "logps/chosen": -463.68072509765625, + "logps/rejected": -425.4881896972656, + "loss": 0.6984, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.5927342176437378, + "rewards/margins": 0.3940940499305725, + "rewards/rejected": -0.9868283271789551, + "step": 3150 + }, + { + "epoch": 0.21, + "learning_rate": 4.828537748506601e-06, + "logits/chosen": -2.613419771194458, + "logits/rejected": -2.545729875564575, + "logps/chosen": -398.8688049316406, + "logps/rejected": -460.64581298828125, + "loss": 0.6951, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.44156503677368164, + "rewards/margins": 0.43986767530441284, + "rewards/rejected": -0.8814327120780945, + "step": 3160 + }, + { + "epoch": 0.21, + "learning_rate": 4.826453505560469e-06, + "logits/chosen": -2.530033588409424, + "logits/rejected": -2.5991828441619873, + "logps/chosen": -325.0137023925781, + "logps/rejected": -420.013671875, + "loss": 0.5368, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.40284332633018494, + "rewards/margins": 0.6239348649978638, + "rewards/rejected": -1.026778221130371, + "step": 3170 + }, + { + "epoch": 0.21, + "learning_rate": 4.824357126693226e-06, + "logits/chosen": -2.3249502182006836, + "logits/rejected": -2.104342460632324, + "logps/chosen": -466.13714599609375, + "logps/rejected": -325.49383544921875, + "loss": 0.4687, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.5315268635749817, + "rewards/margins": 0.9683923721313477, + "rewards/rejected": -1.4999191761016846, + "step": 3180 + }, + { + "epoch": 0.21, + "learning_rate": 4.8222486228406105e-06, + "logits/chosen": -2.0594241619110107, + "logits/rejected": -2.126784324645996, + "logps/chosen": -307.40728759765625, + "logps/rejected": -328.8841247558594, + "loss": 0.4993, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6959290504455566, + "rewards/margins": 0.5490103363990784, + "rewards/rejected": -1.2449394464492798, + "step": 3190 + }, + { + "epoch": 0.21, + "learning_rate": 4.820128005001612e-06, + "logits/chosen": -2.2309165000915527, + "logits/rejected": -2.4116392135620117, + "logps/chosen": -356.24578857421875, + "logps/rejected": -441.033935546875, + "loss": 0.6128, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.739788830280304, + "rewards/margins": 0.4947684407234192, + "rewards/rejected": -1.2345572710037231, + "step": 3200 + }, + { + "epoch": 0.21, + "learning_rate": 4.817995284238412e-06, + "logits/chosen": -2.317817211151123, + "logits/rejected": -2.565655469894409, + "logps/chosen": -386.046142578125, + "logps/rejected": -382.74322509765625, + "loss": 0.6374, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.47409671545028687, + "rewards/margins": 0.45875558257102966, + "rewards/rejected": -0.9328522682189941, + "step": 3210 + }, + { + "epoch": 0.21, + "learning_rate": 4.815850471676327e-06, + "logits/chosen": -2.107598066329956, + "logits/rejected": -2.22800874710083, + "logps/chosen": -308.1783142089844, + "logps/rejected": -380.44512939453125, + "loss": 0.6251, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0362741947174072, + "rewards/margins": 0.3872363269329071, + "rewards/rejected": -1.4235105514526367, + "step": 3220 + }, + { + "epoch": 0.21, + "learning_rate": 4.813693578503751e-06, + "logits/chosen": -2.0429303646087646, + "logits/rejected": -2.1980860233306885, + "logps/chosen": -394.8866882324219, + "logps/rejected": -415.1434020996094, + "loss": 0.6315, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9625520706176758, + "rewards/margins": 0.28988438844680786, + "rewards/rejected": -1.2524363994598389, + "step": 3230 + }, + { + "epoch": 0.21, + "learning_rate": 4.811524615972093e-06, + "logits/chosen": -2.6508214473724365, + "logits/rejected": -2.228062391281128, + "logps/chosen": -421.9400329589844, + "logps/rejected": -352.90460205078125, + "loss": 0.5991, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9435746073722839, + "rewards/margins": 0.35201746225357056, + "rewards/rejected": -1.2955920696258545, + "step": 3240 + }, + { + "epoch": 0.21, + "learning_rate": 4.809343595395724e-06, + "logits/chosen": -2.3315563201904297, + "logits/rejected": -2.0697438716888428, + "logps/chosen": -385.09320068359375, + "logps/rejected": -379.77740478515625, + "loss": 0.5256, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8119958639144897, + "rewards/margins": 0.5800799131393433, + "rewards/rejected": -1.3920756578445435, + "step": 3250 + }, + { + "epoch": 0.21, + "learning_rate": 4.807150528151918e-06, + "logits/chosen": -2.42061185836792, + "logits/rejected": -2.132824659347534, + "logps/chosen": -340.1400146484375, + "logps/rejected": -332.33837890625, + "loss": 0.5084, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.44665035605430603, + "rewards/margins": 0.6681479811668396, + "rewards/rejected": -1.1147983074188232, + "step": 3260 + }, + { + "epoch": 0.21, + "learning_rate": 4.804945425680787e-06, + "logits/chosen": -2.6440634727478027, + "logits/rejected": -2.209912061691284, + "logps/chosen": -351.9530029296875, + "logps/rejected": -332.0872802734375, + "loss": 0.4822, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.35653167963027954, + "rewards/margins": 0.6620830297470093, + "rewards/rejected": -1.0186147689819336, + "step": 3270 + }, + { + "epoch": 0.21, + "learning_rate": 4.802728299485225e-06, + "logits/chosen": -2.2627944946289062, + "logits/rejected": -2.4057581424713135, + "logps/chosen": -400.33074951171875, + "logps/rejected": -461.861572265625, + "loss": 0.6489, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7264330983161926, + "rewards/margins": 0.17443110048770905, + "rewards/rejected": -0.9008641242980957, + "step": 3280 + }, + { + "epoch": 0.22, + "learning_rate": 4.8004991611308495e-06, + "logits/chosen": -2.164400100708008, + "logits/rejected": -2.3146960735321045, + "logps/chosen": -367.74267578125, + "logps/rejected": -397.4012145996094, + "loss": 0.6908, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.5942524671554565, + "rewards/margins": 0.31151682138442993, + "rewards/rejected": -0.9057693481445312, + "step": 3290 + }, + { + "epoch": 0.22, + "learning_rate": 4.798258022245937e-06, + "logits/chosen": -2.2937357425689697, + "logits/rejected": -2.0109310150146484, + "logps/chosen": -253.11441040039062, + "logps/rejected": -310.01715087890625, + "loss": 0.4082, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.210373193025589, + "rewards/margins": 0.7842254638671875, + "rewards/rejected": -0.9945986866950989, + "step": 3300 + }, + { + "epoch": 0.22, + "learning_rate": 4.796004894521365e-06, + "logits/chosen": -2.49446964263916, + "logits/rejected": -2.3719279766082764, + "logps/chosen": -446.8296813964844, + "logps/rejected": -426.35284423828125, + "loss": 0.5862, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.5281698107719421, + "rewards/margins": 0.3869725167751312, + "rewards/rejected": -0.9151424169540405, + "step": 3310 + }, + { + "epoch": 0.22, + "learning_rate": 4.7937397897105545e-06, + "logits/chosen": -2.5837979316711426, + "logits/rejected": -2.4648940563201904, + "logps/chosen": -295.7978515625, + "logps/rejected": -295.44000244140625, + "loss": 0.5636, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.3310592472553253, + "rewards/margins": 0.5538357496261597, + "rewards/rejected": -0.8848949670791626, + "step": 3320 + }, + { + "epoch": 0.22, + "learning_rate": 4.791462719629399e-06, + "logits/chosen": -2.5264270305633545, + "logits/rejected": -2.589407444000244, + "logps/chosen": -407.0428771972656, + "logps/rejected": -364.3183898925781, + "loss": 0.6217, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.4815065264701843, + "rewards/margins": 0.15432050824165344, + "rewards/rejected": -0.6358270645141602, + "step": 3330 + }, + { + "epoch": 0.22, + "learning_rate": 4.789173696156212e-06, + "logits/chosen": -2.4700942039489746, + "logits/rejected": -2.3618836402893066, + "logps/chosen": -350.07818603515625, + "logps/rejected": -345.74725341796875, + "loss": 0.6073, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7455496788024902, + "rewards/margins": 0.35116735100746155, + "rewards/rejected": -1.096717119216919, + "step": 3340 + }, + { + "epoch": 0.22, + "learning_rate": 4.786872731231662e-06, + "logits/chosen": -2.548314094543457, + "logits/rejected": -2.4918599128723145, + "logps/chosen": -446.01544189453125, + "logps/rejected": -495.8675842285156, + "loss": 0.5932, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17381136119365692, + "rewards/margins": 0.887151837348938, + "rewards/rejected": -1.0609632730484009, + "step": 3350 + }, + { + "epoch": 0.22, + "learning_rate": 4.784559836858709e-06, + "logits/chosen": -2.56068754196167, + "logits/rejected": -2.5343821048736572, + "logps/chosen": -420.57012939453125, + "logps/rejected": -417.384765625, + "loss": 0.5484, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4359206557273865, + "rewards/margins": 0.4395936131477356, + "rewards/rejected": -0.8755143284797668, + "step": 3360 + }, + { + "epoch": 0.22, + "learning_rate": 4.782235025102542e-06, + "logits/chosen": -2.3371522426605225, + "logits/rejected": -2.6841964721679688, + "logps/chosen": -430.0653381347656, + "logps/rejected": -399.28131103515625, + "loss": 0.5836, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.3325575590133667, + "rewards/margins": 0.5200690031051636, + "rewards/rejected": -0.8526265025138855, + "step": 3370 + }, + { + "epoch": 0.22, + "learning_rate": 4.779898308090519e-06, + "logits/chosen": -2.445465564727783, + "logits/rejected": -2.5201573371887207, + "logps/chosen": -459.0187072753906, + "logps/rejected": -477.300537109375, + "loss": 0.5541, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.5303617715835571, + "rewards/margins": 0.4567131996154785, + "rewards/rejected": -0.9870750308036804, + "step": 3380 + }, + { + "epoch": 0.22, + "learning_rate": 4.777549698012101e-06, + "logits/chosen": -2.197383403778076, + "logits/rejected": -2.2855629920959473, + "logps/chosen": -308.02545166015625, + "logps/rejected": -471.09454345703125, + "loss": 0.4765, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.5677157044410706, + "rewards/margins": 1.0973682403564453, + "rewards/rejected": -1.6650841236114502, + "step": 3390 + }, + { + "epoch": 0.22, + "learning_rate": 4.775189207118787e-06, + "logits/chosen": -2.546276330947876, + "logits/rejected": -2.33321475982666, + "logps/chosen": -483.5270080566406, + "logps/rejected": -526.8450927734375, + "loss": 0.5009, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.328021377325058, + "rewards/margins": 0.9916882514953613, + "rewards/rejected": -1.3197096586227417, + "step": 3400 + }, + { + "epoch": 0.22, + "learning_rate": 4.772816847724054e-06, + "logits/chosen": -2.4170174598693848, + "logits/rejected": -2.4794392585754395, + "logps/chosen": -567.5896606445312, + "logps/rejected": -457.197021484375, + "loss": 0.6967, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.722898006439209, + "rewards/margins": 0.5282581448554993, + "rewards/rejected": -1.251156210899353, + "step": 3410 + }, + { + "epoch": 0.22, + "learning_rate": 4.770432632203294e-06, + "logits/chosen": -2.2665069103240967, + "logits/rejected": -2.404815673828125, + "logps/chosen": -353.43902587890625, + "logps/rejected": -422.36627197265625, + "loss": 0.4706, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.7308656573295593, + "rewards/margins": 0.8495320081710815, + "rewards/rejected": -1.5803978443145752, + "step": 3420 + }, + { + "epoch": 0.22, + "learning_rate": 4.768036572993738e-06, + "logits/chosen": -2.409658670425415, + "logits/rejected": -2.3566925525665283, + "logps/chosen": -419.1898498535156, + "logps/rejected": -444.04180908203125, + "loss": 0.5349, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0371206998825073, + "rewards/margins": 0.6133378744125366, + "rewards/rejected": -1.6504586935043335, + "step": 3430 + }, + { + "epoch": 0.23, + "learning_rate": 4.765628682594409e-06, + "logits/chosen": -2.053723096847534, + "logits/rejected": -1.9276959896087646, + "logps/chosen": -300.51214599609375, + "logps/rejected": -445.822021484375, + "loss": 0.4754, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.40936318039894104, + "rewards/margins": 0.7937641143798828, + "rewards/rejected": -1.2031272649765015, + "step": 3440 + }, + { + "epoch": 0.23, + "learning_rate": 4.763208973566041e-06, + "logits/chosen": -2.242048740386963, + "logits/rejected": -2.1858267784118652, + "logps/chosen": -421.215087890625, + "logps/rejected": -487.296142578125, + "loss": 0.5922, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6550520658493042, + "rewards/margins": 0.769637942314148, + "rewards/rejected": -1.4246900081634521, + "step": 3450 + }, + { + "epoch": 0.23, + "learning_rate": 4.76077745853102e-06, + "logits/chosen": -2.4033076763153076, + "logits/rejected": -2.1319174766540527, + "logps/chosen": -442.29925537109375, + "logps/rejected": -400.6534423828125, + "loss": 0.5929, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.0011038780212402, + "rewards/margins": 0.5951480269432068, + "rewards/rejected": -1.5962518453598022, + "step": 3460 + }, + { + "epoch": 0.23, + "learning_rate": 4.758334150173322e-06, + "logits/chosen": -1.8501081466674805, + "logits/rejected": -1.8074462413787842, + "logps/chosen": -341.0419006347656, + "logps/rejected": -442.912353515625, + "loss": 0.4994, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.8858741521835327, + "rewards/margins": 0.6651721000671387, + "rewards/rejected": -1.5510464906692505, + "step": 3470 + }, + { + "epoch": 0.23, + "learning_rate": 4.755879061238439e-06, + "logits/chosen": -2.0849146842956543, + "logits/rejected": -1.8035848140716553, + "logps/chosen": -452.26275634765625, + "logps/rejected": -470.74822998046875, + "loss": 0.5805, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.1448858976364136, + "rewards/margins": 0.30576300621032715, + "rewards/rejected": -1.4506490230560303, + "step": 3480 + }, + { + "epoch": 0.23, + "learning_rate": 4.753412204533317e-06, + "logits/chosen": -1.9051125049591064, + "logits/rejected": -1.9653847217559814, + "logps/chosen": -321.68829345703125, + "logps/rejected": -493.9693908691406, + "loss": 0.6153, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5572867393493652, + "rewards/margins": 0.18797598779201508, + "rewards/rejected": -1.745262861251831, + "step": 3490 + }, + { + "epoch": 0.23, + "learning_rate": 4.750933592926292e-06, + "logits/chosen": -2.239838123321533, + "logits/rejected": -2.150252342224121, + "logps/chosen": -430.62799072265625, + "logps/rejected": -448.97747802734375, + "loss": 0.6151, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0551931858062744, + "rewards/margins": 0.3509371280670166, + "rewards/rejected": -1.4061301946640015, + "step": 3500 + }, + { + "epoch": 0.23, + "learning_rate": 4.7484432393470124e-06, + "logits/chosen": -2.284513235092163, + "logits/rejected": -2.1681177616119385, + "logps/chosen": -414.47314453125, + "logps/rejected": -401.70819091796875, + "loss": 0.5902, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0691416263580322, + "rewards/margins": 0.7813543081283569, + "rewards/rejected": -1.8504959344863892, + "step": 3510 + }, + { + "epoch": 0.23, + "learning_rate": 4.745941156786385e-06, + "logits/chosen": -2.242554187774658, + "logits/rejected": -1.9078760147094727, + "logps/chosen": -298.891845703125, + "logps/rejected": -330.5885009765625, + "loss": 0.6425, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.9184421300888062, + "rewards/margins": 0.2459874153137207, + "rewards/rejected": -1.164429783821106, + "step": 3520 + }, + { + "epoch": 0.23, + "learning_rate": 4.743427358296497e-06, + "logits/chosen": -2.1518125534057617, + "logits/rejected": -1.8860008716583252, + "logps/chosen": -389.6554870605469, + "logps/rejected": -460.02960205078125, + "loss": 0.4746, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7955155372619629, + "rewards/margins": 0.6738079190254211, + "rewards/rejected": -1.4693235158920288, + "step": 3530 + }, + { + "epoch": 0.23, + "learning_rate": 4.740901856990553e-06, + "logits/chosen": -2.2190632820129395, + "logits/rejected": -2.1639111042022705, + "logps/chosen": -367.6224060058594, + "logps/rejected": -394.49090576171875, + "loss": 0.7616, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.097391128540039, + "rewards/margins": 0.1312669813632965, + "rewards/rejected": -1.2286580801010132, + "step": 3540 + }, + { + "epoch": 0.23, + "learning_rate": 4.738364666042804e-06, + "logits/chosen": -2.3005948066711426, + "logits/rejected": -1.7008392810821533, + "logps/chosen": -389.313232421875, + "logps/rejected": -368.5771484375, + "loss": 0.6126, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.3324612379074097, + "rewards/margins": 0.3842897117137909, + "rewards/rejected": -1.7167507410049438, + "step": 3550 + }, + { + "epoch": 0.23, + "learning_rate": 4.735815798688483e-06, + "logits/chosen": -1.9981590509414673, + "logits/rejected": -2.083627462387085, + "logps/chosen": -366.39874267578125, + "logps/rejected": -425.00469970703125, + "loss": 0.4667, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2873318195343018, + "rewards/margins": 0.6296716332435608, + "rewards/rejected": -1.9170036315917969, + "step": 3560 + }, + { + "epoch": 0.23, + "learning_rate": 4.7332552682237285e-06, + "logits/chosen": -2.1684536933898926, + "logits/rejected": -2.210479497909546, + "logps/chosen": -428.04638671875, + "logps/rejected": -458.6451110839844, + "loss": 0.5565, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.248167872428894, + "rewards/margins": 0.5324001312255859, + "rewards/rejected": -1.7805678844451904, + "step": 3570 + }, + { + "epoch": 0.23, + "learning_rate": 4.7306830880055234e-06, + "logits/chosen": -2.2435035705566406, + "logits/rejected": -2.0245437622070312, + "logps/chosen": -425.3526306152344, + "logps/rejected": -438.34307861328125, + "loss": 0.4326, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.29618892073631287, + "rewards/margins": 1.0744205713272095, + "rewards/rejected": -1.3706094026565552, + "step": 3580 + }, + { + "epoch": 0.23, + "learning_rate": 4.728099271451619e-06, + "logits/chosen": -2.253227710723877, + "logits/rejected": -2.200775623321533, + "logps/chosen": -394.0977478027344, + "logps/rejected": -383.7187805175781, + "loss": 0.6185, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.43818607926368713, + "rewards/margins": 0.6707245707511902, + "rewards/rejected": -1.1089107990264893, + "step": 3590 + }, + { + "epoch": 0.24, + "learning_rate": 4.725503832040466e-06, + "logits/chosen": -2.3103957176208496, + "logits/rejected": -2.211575984954834, + "logps/chosen": -375.616943359375, + "logps/rejected": -480.480224609375, + "loss": 0.6166, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8503471612930298, + "rewards/margins": 0.2555815577507019, + "rewards/rejected": -1.1059287786483765, + "step": 3600 + }, + { + "epoch": 0.24, + "learning_rate": 4.722896783311152e-06, + "logits/chosen": -2.03578519821167, + "logits/rejected": -2.1414883136749268, + "logps/chosen": -342.0790100097656, + "logps/rejected": -416.62762451171875, + "loss": 0.6199, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.6153795123100281, + "rewards/margins": 1.017171025276184, + "rewards/rejected": -1.632550597190857, + "step": 3610 + }, + { + "epoch": 0.24, + "learning_rate": 4.720278138863318e-06, + "logits/chosen": -2.362046718597412, + "logits/rejected": -1.7298154830932617, + "logps/chosen": -337.70806884765625, + "logps/rejected": -404.6884765625, + "loss": 0.461, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8324246406555176, + "rewards/margins": 1.3992846012115479, + "rewards/rejected": -2.2317090034484863, + "step": 3620 + }, + { + "epoch": 0.24, + "learning_rate": 4.717647912357095e-06, + "logits/chosen": -2.1297411918640137, + "logits/rejected": -2.0725905895233154, + "logps/chosen": -417.9518127441406, + "logps/rejected": -386.58245849609375, + "loss": 0.5315, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5126937627792358, + "rewards/margins": 0.6966745853424072, + "rewards/rejected": -1.209368348121643, + "step": 3630 + }, + { + "epoch": 0.24, + "learning_rate": 4.715006117513035e-06, + "logits/chosen": -2.3161420822143555, + "logits/rejected": -2.2902309894561768, + "logps/chosen": -437.8321838378906, + "logps/rejected": -506.87744140625, + "loss": 0.5374, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5351322293281555, + "rewards/margins": 0.9479573965072632, + "rewards/rejected": -1.4830896854400635, + "step": 3640 + }, + { + "epoch": 0.24, + "learning_rate": 4.7123527681120326e-06, + "logits/chosen": -2.1307809352874756, + "logits/rejected": -2.225717782974243, + "logps/chosen": -424.44268798828125, + "logps/rejected": -457.6105041503906, + "loss": 0.5055, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.948150634765625, + "rewards/margins": 0.9346704483032227, + "rewards/rejected": -1.8828208446502686, + "step": 3650 + }, + { + "epoch": 0.24, + "learning_rate": 4.7096878779952594e-06, + "logits/chosen": -2.139453887939453, + "logits/rejected": -2.0068986415863037, + "logps/chosen": -498.299072265625, + "logps/rejected": -551.0009155273438, + "loss": 0.687, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0130908489227295, + "rewards/margins": 0.6879249811172485, + "rewards/rejected": -1.701015830039978, + "step": 3660 + }, + { + "epoch": 0.24, + "learning_rate": 4.707011461064086e-06, + "logits/chosen": -2.2592787742614746, + "logits/rejected": -2.0789124965667725, + "logps/chosen": -453.88763427734375, + "logps/rejected": -462.21533203125, + "loss": 0.6584, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.4273847341537476, + "rewards/margins": 0.19941839575767517, + "rewards/rejected": -1.6268031597137451, + "step": 3670 + }, + { + "epoch": 0.24, + "learning_rate": 4.704323531280016e-06, + "logits/chosen": -2.475243330001831, + "logits/rejected": -2.24717378616333, + "logps/chosen": -546.4675903320312, + "logps/rejected": -525.025390625, + "loss": 0.5654, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.911401093006134, + "rewards/margins": 0.4049322009086609, + "rewards/rejected": -1.3163334131240845, + "step": 3680 + }, + { + "epoch": 0.24, + "learning_rate": 4.701624102664606e-06, + "logits/chosen": -2.4433531761169434, + "logits/rejected": -2.279224395751953, + "logps/chosen": -578.2789916992188, + "logps/rejected": -490.89068603515625, + "loss": 0.5225, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.0811493396759033, + "rewards/margins": 0.6341270804405212, + "rewards/rejected": -1.7152763605117798, + "step": 3690 + }, + { + "epoch": 0.24, + "learning_rate": 4.698913189299399e-06, + "logits/chosen": -1.9927383661270142, + "logits/rejected": -1.8205230236053467, + "logps/chosen": -437.2293395996094, + "logps/rejected": -482.68701171875, + "loss": 0.4716, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.2083475589752197, + "rewards/margins": 1.060872197151184, + "rewards/rejected": -2.2692196369171143, + "step": 3700 + }, + { + "epoch": 0.24, + "learning_rate": 4.696190805325847e-06, + "logits/chosen": -2.0010743141174316, + "logits/rejected": -2.033720016479492, + "logps/chosen": -365.48394775390625, + "logps/rejected": -443.25347900390625, + "loss": 0.5951, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3819520473480225, + "rewards/margins": 0.6150561571121216, + "rewards/rejected": -1.9970080852508545, + "step": 3710 + }, + { + "epoch": 0.24, + "learning_rate": 4.693456964945239e-06, + "logits/chosen": -2.0450572967529297, + "logits/rejected": -2.0645382404327393, + "logps/chosen": -420.50335693359375, + "logps/rejected": -503.34454345703125, + "loss": 0.534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9044291377067566, + "rewards/margins": 0.6475400924682617, + "rewards/rejected": -1.551969289779663, + "step": 3720 + }, + { + "epoch": 0.24, + "learning_rate": 4.6907116824186245e-06, + "logits/chosen": -2.178130626678467, + "logits/rejected": -2.186275005340576, + "logps/chosen": -364.72845458984375, + "logps/rejected": -462.02618408203125, + "loss": 0.5147, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.6570572257041931, + "rewards/margins": 0.6763592958450317, + "rewards/rejected": -1.3334165811538696, + "step": 3730 + }, + { + "epoch": 0.24, + "learning_rate": 4.687954972066742e-06, + "logits/chosen": -2.0935654640197754, + "logits/rejected": -1.9112679958343506, + "logps/chosen": -394.4638977050781, + "logps/rejected": -397.63812255859375, + "loss": 0.6238, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.7246359586715698, + "rewards/margins": 0.2681662440299988, + "rewards/rejected": -0.9928021430969238, + "step": 3740 + }, + { + "epoch": 0.25, + "learning_rate": 4.685186848269944e-06, + "logits/chosen": -1.9995677471160889, + "logits/rejected": -1.9223219156265259, + "logps/chosen": -397.94915771484375, + "logps/rejected": -484.4215393066406, + "loss": 0.4696, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.6363323330879211, + "rewards/margins": 1.0407795906066895, + "rewards/rejected": -1.6771119832992554, + "step": 3750 + }, + { + "epoch": 0.25, + "learning_rate": 4.682407325468119e-06, + "logits/chosen": -2.4718260765075684, + "logits/rejected": -2.0210213661193848, + "logps/chosen": -443.21990966796875, + "logps/rejected": -384.62713623046875, + "loss": 0.7026, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -1.257415771484375, + "rewards/margins": 0.09741837531328201, + "rewards/rejected": -1.3548343181610107, + "step": 3760 + }, + { + "epoch": 0.25, + "learning_rate": 4.67961641816062e-06, + "logits/chosen": -2.411287784576416, + "logits/rejected": -2.1336147785186768, + "logps/chosen": -371.8212585449219, + "logps/rejected": -365.04302978515625, + "loss": 0.6053, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.4813748896121979, + "rewards/margins": 0.3684390187263489, + "rewards/rejected": -0.8498139381408691, + "step": 3770 + }, + { + "epoch": 0.25, + "learning_rate": 4.676814140906188e-06, + "logits/chosen": -2.2237911224365234, + "logits/rejected": -1.9375216960906982, + "logps/chosen": -399.607666015625, + "logps/rejected": -406.94879150390625, + "loss": 0.4214, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.43446213006973267, + "rewards/margins": 1.0578770637512207, + "rewards/rejected": -1.4923391342163086, + "step": 3780 + }, + { + "epoch": 0.25, + "learning_rate": 4.674000508322872e-06, + "logits/chosen": -2.434276580810547, + "logits/rejected": -2.393608570098877, + "logps/chosen": -457.020263671875, + "logps/rejected": -479.9658203125, + "loss": 0.54, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.4180832505226135, + "rewards/margins": 0.8254992365837097, + "rewards/rejected": -1.2435824871063232, + "step": 3790 + }, + { + "epoch": 0.25, + "learning_rate": 4.671175535087959e-06, + "logits/chosen": -2.320284605026245, + "logits/rejected": -1.8357927799224854, + "logps/chosen": -394.5904846191406, + "logps/rejected": -348.63140869140625, + "loss": 0.6604, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7477728724479675, + "rewards/margins": 0.5096405744552612, + "rewards/rejected": -1.2574135065078735, + "step": 3800 + }, + { + "epoch": 0.25, + "learning_rate": 4.6683392359378924e-06, + "logits/chosen": -2.4195501804351807, + "logits/rejected": -2.113314151763916, + "logps/chosen": -315.3486633300781, + "logps/rejected": -385.1060485839844, + "loss": 0.6387, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.5793839693069458, + "rewards/margins": 0.2212570160627365, + "rewards/rejected": -0.8006409406661987, + "step": 3810 + }, + { + "epoch": 0.25, + "learning_rate": 4.665491625668198e-06, + "logits/chosen": -2.0303101539611816, + "logits/rejected": -1.8734495639801025, + "logps/chosen": -309.8327941894531, + "logps/rejected": -332.2201232910156, + "loss": 0.5457, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.7786539793014526, + "rewards/margins": 0.4457494616508484, + "rewards/rejected": -1.2244032621383667, + "step": 3820 + }, + { + "epoch": 0.25, + "learning_rate": 4.662632719133407e-06, + "logits/chosen": -2.1171882152557373, + "logits/rejected": -1.7437130212783813, + "logps/chosen": -300.73651123046875, + "logps/rejected": -407.58721923828125, + "loss": 0.6046, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.04213285446167, + "rewards/margins": 0.2609824240207672, + "rewards/rejected": -1.3031153678894043, + "step": 3830 + }, + { + "epoch": 0.25, + "learning_rate": 4.659762531246974e-06, + "logits/chosen": -2.126164197921753, + "logits/rejected": -2.0261282920837402, + "logps/chosen": -279.12664794921875, + "logps/rejected": -399.71173095703125, + "loss": 0.5652, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.49107131361961365, + "rewards/margins": 0.6926654577255249, + "rewards/rejected": -1.1837369203567505, + "step": 3840 + }, + { + "epoch": 0.25, + "learning_rate": 4.656881076981207e-06, + "logits/chosen": -2.259749174118042, + "logits/rejected": -2.4762749671936035, + "logps/chosen": -387.292236328125, + "logps/rejected": -439.84100341796875, + "loss": 0.5908, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.5123671293258667, + "rewards/margins": 0.43006739020347595, + "rewards/rejected": -0.9424344897270203, + "step": 3850 + }, + { + "epoch": 0.25, + "learning_rate": 4.653988371367183e-06, + "logits/chosen": -2.232792854309082, + "logits/rejected": -1.897709608078003, + "logps/chosen": -378.61553955078125, + "logps/rejected": -530.4464721679688, + "loss": 0.5279, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.6617268323898315, + "rewards/margins": 0.752768874168396, + "rewards/rejected": -1.4144957065582275, + "step": 3860 + }, + { + "epoch": 0.25, + "learning_rate": 4.651084429494671e-06, + "logits/chosen": -2.2239532470703125, + "logits/rejected": -1.796750783920288, + "logps/chosen": -314.5279235839844, + "logps/rejected": -361.30572509765625, + "loss": 0.5363, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9054681658744812, + "rewards/margins": 0.5881915092468262, + "rewards/rejected": -1.4936596155166626, + "step": 3870 + }, + { + "epoch": 0.25, + "learning_rate": 4.648169266512053e-06, + "logits/chosen": -2.2404286861419678, + "logits/rejected": -2.038695812225342, + "logps/chosen": -469.52740478515625, + "logps/rejected": -552.247802734375, + "loss": 0.6447, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.0587666034698486, + "rewards/margins": 0.5267407298088074, + "rewards/rejected": -1.5855072736740112, + "step": 3880 + }, + { + "epoch": 0.25, + "learning_rate": 4.6452428976262505e-06, + "logits/chosen": -2.192521572113037, + "logits/rejected": -2.2139482498168945, + "logps/chosen": -397.2904968261719, + "logps/rejected": -423.5071716308594, + "loss": 0.5414, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8510065078735352, + "rewards/margins": 0.9575133323669434, + "rewards/rejected": -1.808519721031189, + "step": 3890 + }, + { + "epoch": 0.26, + "learning_rate": 4.642305338102633e-06, + "logits/chosen": -2.1184463500976562, + "logits/rejected": -2.1783528327941895, + "logps/chosen": -439.29248046875, + "logps/rejected": -539.232666015625, + "loss": 0.5006, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8429673910140991, + "rewards/margins": 1.2577102184295654, + "rewards/rejected": -2.100677490234375, + "step": 3900 + }, + { + "epoch": 0.26, + "learning_rate": 4.639356603264953e-06, + "logits/chosen": -2.2874608039855957, + "logits/rejected": -2.227522373199463, + "logps/chosen": -382.64434814453125, + "logps/rejected": -534.5256958007812, + "loss": 0.5064, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.9097906351089478, + "rewards/margins": 1.0155916213989258, + "rewards/rejected": -1.9253822565078735, + "step": 3910 + }, + { + "epoch": 0.26, + "learning_rate": 4.636396708495255e-06, + "logits/chosen": -1.881720781326294, + "logits/rejected": -2.0848631858825684, + "logps/chosen": -413.80279541015625, + "logps/rejected": -475.28692626953125, + "loss": 0.6035, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.131103754043579, + "rewards/margins": 0.6518651247024536, + "rewards/rejected": -1.7829687595367432, + "step": 3920 + }, + { + "epoch": 0.26, + "learning_rate": 4.633425669233799e-06, + "logits/chosen": -2.0936412811279297, + "logits/rejected": -2.0829267501831055, + "logps/chosen": -506.6336975097656, + "logps/rejected": -541.3096923828125, + "loss": 0.5988, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5224252939224243, + "rewards/margins": 0.4728141725063324, + "rewards/rejected": -1.995239496231079, + "step": 3930 + }, + { + "epoch": 0.26, + "learning_rate": 4.6304435009789825e-06, + "logits/chosen": -2.333017110824585, + "logits/rejected": -2.3485379219055176, + "logps/chosen": -378.2872619628906, + "logps/rejected": -490.08038330078125, + "loss": 0.5252, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.9699506759643555, + "rewards/margins": 0.65302973985672, + "rewards/rejected": -1.6229804754257202, + "step": 3940 + }, + { + "epoch": 0.26, + "learning_rate": 4.627450219287256e-06, + "logits/chosen": -2.0185725688934326, + "logits/rejected": -1.6690444946289062, + "logps/chosen": -388.4188232421875, + "logps/rejected": -435.01483154296875, + "loss": 0.6317, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3685195446014404, + "rewards/margins": 0.7547028064727783, + "rewards/rejected": -2.123222589492798, + "step": 3950 + }, + { + "epoch": 0.26, + "learning_rate": 4.624445839773042e-06, + "logits/chosen": -1.7887862920761108, + "logits/rejected": -1.7998504638671875, + "logps/chosen": -403.81365966796875, + "logps/rejected": -547.1272583007812, + "loss": 0.482, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.2239279747009277, + "rewards/margins": 1.0462610721588135, + "rewards/rejected": -2.270189046859741, + "step": 3960 + }, + { + "epoch": 0.26, + "learning_rate": 4.621430378108656e-06, + "logits/chosen": -2.050891637802124, + "logits/rejected": -1.8938401937484741, + "logps/chosen": -389.00299072265625, + "logps/rejected": -550.5980224609375, + "loss": 0.5963, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.2409727573394775, + "rewards/margins": 0.8711128234863281, + "rewards/rejected": -2.1120858192443848, + "step": 3970 + }, + { + "epoch": 0.26, + "learning_rate": 4.618403850024223e-06, + "logits/chosen": -2.2062628269195557, + "logits/rejected": -2.1440682411193848, + "logps/chosen": -441.54205322265625, + "logps/rejected": -556.6419067382812, + "loss": 0.5051, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2554422616958618, + "rewards/margins": 0.930446982383728, + "rewards/rejected": -2.18588924407959, + "step": 3980 + }, + { + "epoch": 0.26, + "learning_rate": 4.615366271307598e-06, + "logits/chosen": -2.1400365829467773, + "logits/rejected": -1.9636625051498413, + "logps/chosen": -427.9474182128906, + "logps/rejected": -428.34844970703125, + "loss": 0.5877, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.8203452825546265, + "rewards/margins": 0.3573678433895111, + "rewards/rejected": -2.177712917327881, + "step": 3990 + }, + { + "epoch": 0.26, + "learning_rate": 4.612317657804277e-06, + "logits/chosen": -1.7337572574615479, + "logits/rejected": -1.5541400909423828, + "logps/chosen": -336.13958740234375, + "logps/rejected": -405.3411560058594, + "loss": 0.5326, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.7155799865722656, + "rewards/margins": 0.8053289651870728, + "rewards/rejected": -2.520909070968628, + "step": 4000 + }, + { + "epoch": 0.26, + "learning_rate": 4.6092580254173236e-06, + "logits/chosen": -2.1133389472961426, + "logits/rejected": -2.1598751544952393, + "logps/chosen": -502.5541076660156, + "logps/rejected": -561.4671630859375, + "loss": 0.622, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.6341140270233154, + "rewards/margins": 0.7851090431213379, + "rewards/rejected": -2.4192230701446533, + "step": 4010 + }, + { + "epoch": 0.26, + "learning_rate": 4.606187390107277e-06, + "logits/chosen": -1.842321753501892, + "logits/rejected": -1.8627601861953735, + "logps/chosen": -441.71307373046875, + "logps/rejected": -457.519775390625, + "loss": 0.6325, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.5453346967697144, + "rewards/margins": 0.3694360852241516, + "rewards/rejected": -1.9147708415985107, + "step": 4020 + }, + { + "epoch": 0.26, + "learning_rate": 4.603105767892077e-06, + "logits/chosen": -2.073103427886963, + "logits/rejected": -2.0986146926879883, + "logps/chosen": -459.52081298828125, + "logps/rejected": -571.8239135742188, + "loss": 0.5492, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6996116638183594, + "rewards/margins": 0.6912637948989868, + "rewards/rejected": -2.3908753395080566, + "step": 4030 + }, + { + "epoch": 0.26, + "learning_rate": 4.6000131748469725e-06, + "logits/chosen": -2.240130662918091, + "logits/rejected": -2.3786003589630127, + "logps/chosen": -482.5987854003906, + "logps/rejected": -516.04931640625, + "loss": 0.434, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.6731864213943481, + "rewards/margins": 0.9360666275024414, + "rewards/rejected": -1.6092529296875, + "step": 4040 + }, + { + "epoch": 0.26, + "learning_rate": 4.596909627104445e-06, + "logits/chosen": -2.0946714878082275, + "logits/rejected": -1.9713408946990967, + "logps/chosen": -421.6636657714844, + "logps/rejected": -420.95709228515625, + "loss": 0.5574, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -1.4437962770462036, + "rewards/margins": 0.46572622656822205, + "rewards/rejected": -1.9095224142074585, + "step": 4050 + }, + { + "epoch": 0.27, + "learning_rate": 4.5937951408541215e-06, + "logits/chosen": -2.180326461791992, + "logits/rejected": -1.5821549892425537, + "logps/chosen": -513.3946533203125, + "logps/rejected": -469.1312561035156, + "loss": 0.5073, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.408052682876587, + "rewards/margins": 0.7202036380767822, + "rewards/rejected": -2.128256320953369, + "step": 4060 + }, + { + "epoch": 0.27, + "learning_rate": 4.590669732342685e-06, + "logits/chosen": -1.8135654926300049, + "logits/rejected": -2.0680251121520996, + "logps/chosen": -355.2364807128906, + "logps/rejected": -480.3870544433594, + "loss": 0.5478, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2912461757659912, + "rewards/margins": 0.7113165855407715, + "rewards/rejected": -2.002562999725342, + "step": 4070 + }, + { + "epoch": 0.27, + "learning_rate": 4.587533417873799e-06, + "logits/chosen": -1.9205124378204346, + "logits/rejected": -1.9332654476165771, + "logps/chosen": -368.07330322265625, + "logps/rejected": -443.73223876953125, + "loss": 0.5676, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.0717012882232666, + "rewards/margins": 0.7870392203330994, + "rewards/rejected": -1.8587405681610107, + "step": 4080 + }, + { + "epoch": 0.27, + "learning_rate": 4.584386213808016e-06, + "logits/chosen": -1.9750468730926514, + "logits/rejected": -1.7371772527694702, + "logps/chosen": -380.44927978515625, + "logps/rejected": -362.88751220703125, + "loss": 0.6002, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.1538901329040527, + "rewards/margins": 0.6567569971084595, + "rewards/rejected": -1.8106470108032227, + "step": 4090 + }, + { + "epoch": 0.27, + "learning_rate": 4.581228136562693e-06, + "logits/chosen": -2.6001052856445312, + "logits/rejected": -2.1924033164978027, + "logps/chosen": -488.16082763671875, + "logps/rejected": -533.7776489257812, + "loss": 0.5543, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1459815502166748, + "rewards/margins": 0.7997251749038696, + "rewards/rejected": -1.9457066059112549, + "step": 4100 + }, + { + "epoch": 0.27, + "learning_rate": 4.578059202611909e-06, + "logits/chosen": -2.046915054321289, + "logits/rejected": -2.2624149322509766, + "logps/chosen": -352.61962890625, + "logps/rejected": -454.8382263183594, + "loss": 0.4396, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0706390142440796, + "rewards/margins": 0.870526134967804, + "rewards/rejected": -1.9411652088165283, + "step": 4110 + }, + { + "epoch": 0.27, + "learning_rate": 4.574879428486376e-06, + "logits/chosen": -1.712337851524353, + "logits/rejected": -1.7488536834716797, + "logps/chosen": -363.9149169921875, + "logps/rejected": -437.3489685058594, + "loss": 0.5609, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.2686556577682495, + "rewards/margins": 0.8758034706115723, + "rewards/rejected": -2.1444592475891113, + "step": 4120 + }, + { + "epoch": 0.27, + "learning_rate": 4.571688830773352e-06, + "logits/chosen": -2.3221523761749268, + "logits/rejected": -2.22611927986145, + "logps/chosen": -467.30377197265625, + "logps/rejected": -583.3150634765625, + "loss": 0.4784, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.326136589050293, + "rewards/margins": 0.8334660530090332, + "rewards/rejected": -2.159602642059326, + "step": 4130 + }, + { + "epoch": 0.27, + "learning_rate": 4.568487426116559e-06, + "logits/chosen": -2.140794277191162, + "logits/rejected": -2.1583950519561768, + "logps/chosen": -437.3667907714844, + "logps/rejected": -428.0228576660156, + "loss": 0.5807, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.687368392944336, + "rewards/margins": 0.315930038690567, + "rewards/rejected": -2.00329852104187, + "step": 4140 + }, + { + "epoch": 0.27, + "learning_rate": 4.565275231216092e-06, + "logits/chosen": -1.6297184228897095, + "logits/rejected": -1.4589264392852783, + "logps/chosen": -418.3526306152344, + "logps/rejected": -456.468017578125, + "loss": 0.5298, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.8031368255615234, + "rewards/margins": 0.7424917817115784, + "rewards/rejected": -2.545628309249878, + "step": 4150 + }, + { + "epoch": 0.27, + "learning_rate": 4.562052262828331e-06, + "logits/chosen": -1.6205329895019531, + "logits/rejected": -1.862931489944458, + "logps/chosen": -362.370361328125, + "logps/rejected": -370.2030334472656, + "loss": 0.6984, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -1.7531535625457764, + "rewards/margins": -0.05112253874540329, + "rewards/rejected": -1.702031135559082, + "step": 4160 + }, + { + "epoch": 0.27, + "learning_rate": 4.558818537765861e-06, + "logits/chosen": -2.1181905269622803, + "logits/rejected": -2.2233452796936035, + "logps/chosen": -423.51629638671875, + "logps/rejected": -519.7943115234375, + "loss": 0.4127, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3734153509140015, + "rewards/margins": 0.9970091581344604, + "rewards/rejected": -2.370424509048462, + "step": 4170 + }, + { + "epoch": 0.27, + "learning_rate": 4.555574072897374e-06, + "logits/chosen": -2.010232448577881, + "logits/rejected": -1.2434194087982178, + "logps/chosen": -479.2718811035156, + "logps/rejected": -380.8519592285156, + "loss": 0.5867, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -1.7946832180023193, + "rewards/margins": 0.40616196393966675, + "rewards/rejected": -2.200845241546631, + "step": 4180 + }, + { + "epoch": 0.27, + "learning_rate": 4.552318885147589e-06, + "logits/chosen": -2.271369457244873, + "logits/rejected": -1.6737544536590576, + "logps/chosen": -410.2076721191406, + "logps/rejected": -420.95220947265625, + "loss": 0.6536, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.0419671535491943, + "rewards/margins": 0.33758771419525146, + "rewards/rejected": -1.3795549869537354, + "step": 4190 + }, + { + "epoch": 0.27, + "learning_rate": 4.549052991497159e-06, + "logits/chosen": -1.976317048072815, + "logits/rejected": -2.192378044128418, + "logps/chosen": -286.4415283203125, + "logps/rejected": -399.6639404296875, + "loss": 0.4888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8588165044784546, + "rewards/margins": 0.7741298675537109, + "rewards/rejected": -1.6329463720321655, + "step": 4200 + }, + { + "epoch": 0.28, + "learning_rate": 4.545776408982585e-06, + "logits/chosen": -1.9261176586151123, + "logits/rejected": -1.5991665124893188, + "logps/chosen": -292.0099792480469, + "logps/rejected": -431.8436584472656, + "loss": 0.5102, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.317204475402832, + "rewards/margins": 1.204416036605835, + "rewards/rejected": -2.521620750427246, + "step": 4210 + }, + { + "epoch": 0.28, + "learning_rate": 4.542489154696128e-06, + "logits/chosen": -2.0769829750061035, + "logits/rejected": -1.9520477056503296, + "logps/chosen": -391.1345520019531, + "logps/rejected": -382.9508972167969, + "loss": 0.6132, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.9532631635665894, + "rewards/margins": 0.8000235557556152, + "rewards/rejected": -1.753286600112915, + "step": 4220 + }, + { + "epoch": 0.28, + "learning_rate": 4.5391912457857145e-06, + "logits/chosen": -1.8980144262313843, + "logits/rejected": -1.5454305410385132, + "logps/chosen": -319.369873046875, + "logps/rejected": -440.15936279296875, + "loss": 0.5566, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8968728184700012, + "rewards/margins": 1.1255744695663452, + "rewards/rejected": -2.022447347640991, + "step": 4230 + }, + { + "epoch": 0.28, + "learning_rate": 4.535882699454854e-06, + "logits/chosen": -2.2944552898406982, + "logits/rejected": -1.9272899627685547, + "logps/chosen": -397.7351379394531, + "logps/rejected": -526.8030395507812, + "loss": 0.4981, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.0970957279205322, + "rewards/margins": 1.2821524143218994, + "rewards/rejected": -2.3792481422424316, + "step": 4240 + }, + { + "epoch": 0.28, + "learning_rate": 4.532563532962546e-06, + "logits/chosen": -2.3536887168884277, + "logits/rejected": -1.7265129089355469, + "logps/chosen": -443.1983947753906, + "logps/rejected": -416.35797119140625, + "loss": 0.6158, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9765421152114868, + "rewards/margins": 1.1191794872283936, + "rewards/rejected": -2.09572172164917, + "step": 4250 + }, + { + "epoch": 0.28, + "learning_rate": 4.529233763623187e-06, + "logits/chosen": -2.1754398345947266, + "logits/rejected": -2.0757946968078613, + "logps/chosen": -416.9156188964844, + "logps/rejected": -435.5135192871094, + "loss": 0.5907, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.8619926571846008, + "rewards/margins": 0.7766445875167847, + "rewards/rejected": -1.6386373043060303, + "step": 4260 + }, + { + "epoch": 0.28, + "learning_rate": 4.5258934088064854e-06, + "logits/chosen": -2.361896514892578, + "logits/rejected": -2.3284542560577393, + "logps/chosen": -416.2744140625, + "logps/rejected": -625.7243041992188, + "loss": 0.4612, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.0023585557937622, + "rewards/margins": 1.0771377086639404, + "rewards/rejected": -2.079496383666992, + "step": 4270 + }, + { + "epoch": 0.28, + "learning_rate": 4.522542485937369e-06, + "logits/chosen": -1.9742462635040283, + "logits/rejected": -1.6750361919403076, + "logps/chosen": -322.1027526855469, + "logps/rejected": -342.66912841796875, + "loss": 0.7162, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.497710943222046, + "rewards/margins": 0.40949931740760803, + "rewards/rejected": -1.907210111618042, + "step": 4280 + }, + { + "epoch": 0.28, + "learning_rate": 4.519181012495892e-06, + "logits/chosen": -2.279792308807373, + "logits/rejected": -1.960753083229065, + "logps/chosen": -444.9163513183594, + "logps/rejected": -554.8828125, + "loss": 0.5329, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.734828233718872, + "rewards/margins": 0.7434547543525696, + "rewards/rejected": -2.4782826900482178, + "step": 4290 + }, + { + "epoch": 0.28, + "learning_rate": 4.515809006017147e-06, + "logits/chosen": -1.8855798244476318, + "logits/rejected": -1.7125604152679443, + "logps/chosen": -436.1876525878906, + "logps/rejected": -519.3978271484375, + "loss": 0.6427, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2301485538482666, + "rewards/margins": 0.7057285308837891, + "rewards/rejected": -2.9358768463134766, + "step": 4300 + }, + { + "epoch": 0.28, + "learning_rate": 4.512426484091171e-06, + "logits/chosen": -1.9551712274551392, + "logits/rejected": -2.109748363494873, + "logps/chosen": -495.01007080078125, + "logps/rejected": -530.8716430664062, + "loss": 0.4651, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.095219373703003, + "rewards/margins": 0.6353042125701904, + "rewards/rejected": -2.7305235862731934, + "step": 4310 + }, + { + "epoch": 0.28, + "learning_rate": 4.509033464362858e-06, + "logits/chosen": -2.250704765319824, + "logits/rejected": -2.059197187423706, + "logps/chosen": -586.2399291992188, + "logps/rejected": -715.1597900390625, + "loss": 0.4659, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.1677844524383545, + "rewards/margins": 1.3767186403274536, + "rewards/rejected": -3.5445029735565186, + "step": 4320 + }, + { + "epoch": 0.28, + "learning_rate": 4.505629964531857e-06, + "logits/chosen": -2.2147953510284424, + "logits/rejected": -1.9781885147094727, + "logps/chosen": -441.72674560546875, + "logps/rejected": -460.2294006347656, + "loss": 0.4821, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.527851939201355, + "rewards/margins": 0.9044572710990906, + "rewards/rejected": -2.432309150695801, + "step": 4330 + }, + { + "epoch": 0.28, + "learning_rate": 4.502216002352492e-06, + "logits/chosen": -2.224872350692749, + "logits/rejected": -2.1673412322998047, + "logps/chosen": -551.7815551757812, + "logps/rejected": -583.5092163085938, + "loss": 0.6369, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7062419652938843, + "rewards/margins": 0.8081586956977844, + "rewards/rejected": -2.5144007205963135, + "step": 4340 + }, + { + "epoch": 0.28, + "learning_rate": 4.498791595633663e-06, + "logits/chosen": -1.6142504215240479, + "logits/rejected": -1.7114217281341553, + "logps/chosen": -358.05645751953125, + "logps/rejected": -460.80279541015625, + "loss": 0.5532, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6065235137939453, + "rewards/margins": 0.5826237201690674, + "rewards/rejected": -2.189147472381592, + "step": 4350 + }, + { + "epoch": 0.29, + "learning_rate": 4.495356762238751e-06, + "logits/chosen": -1.9433860778808594, + "logits/rejected": -1.9632737636566162, + "logps/chosen": -393.555908203125, + "logps/rejected": -529.794677734375, + "loss": 0.5459, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.6980907917022705, + "rewards/margins": 0.9175578355789185, + "rewards/rejected": -2.6156485080718994, + "step": 4360 + }, + { + "epoch": 0.29, + "learning_rate": 4.491911520085532e-06, + "logits/chosen": -2.3992745876312256, + "logits/rejected": -2.0180933475494385, + "logps/chosen": -497.4437561035156, + "logps/rejected": -598.4129638671875, + "loss": 0.3831, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.9618456363677979, + "rewards/margins": 1.466132402420044, + "rewards/rejected": -3.4279778003692627, + "step": 4370 + }, + { + "epoch": 0.29, + "learning_rate": 4.488455887146075e-06, + "logits/chosen": -1.813454270362854, + "logits/rejected": -2.147561550140381, + "logps/chosen": -557.65478515625, + "logps/rejected": -547.4513549804688, + "loss": 0.7619, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.099945306777954, + "rewards/margins": 0.7333266139030457, + "rewards/rejected": -2.8332715034484863, + "step": 4380 + }, + { + "epoch": 0.29, + "learning_rate": 4.484989881446654e-06, + "logits/chosen": -2.0431084632873535, + "logits/rejected": -2.0684776306152344, + "logps/chosen": -519.9021606445312, + "logps/rejected": -536.9307861328125, + "loss": 0.4899, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.86326003074646, + "rewards/margins": 0.8370512127876282, + "rewards/rejected": -2.7003111839294434, + "step": 4390 + }, + { + "epoch": 0.29, + "learning_rate": 4.481513521067654e-06, + "logits/chosen": -2.206216335296631, + "logits/rejected": -1.772658109664917, + "logps/chosen": -440.90008544921875, + "logps/rejected": -565.4508056640625, + "loss": 0.5633, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.6224441528320312, + "rewards/margins": 1.1807271242141724, + "rewards/rejected": -2.8031716346740723, + "step": 4400 + }, + { + "epoch": 0.29, + "learning_rate": 4.478026824143473e-06, + "logits/chosen": -2.2154037952423096, + "logits/rejected": -1.8642908334732056, + "logps/chosen": -493.7405700683594, + "logps/rejected": -593.7854614257812, + "loss": 0.4516, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.1170547008514404, + "rewards/margins": 0.7973050475120544, + "rewards/rejected": -2.9143595695495605, + "step": 4410 + }, + { + "epoch": 0.29, + "learning_rate": 4.474529808862429e-06, + "logits/chosen": -2.2030978202819824, + "logits/rejected": -2.3663721084594727, + "logps/chosen": -544.3342895507812, + "logps/rejected": -605.0272216796875, + "loss": 0.6167, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.7068147659301758, + "rewards/margins": 0.36660951375961304, + "rewards/rejected": -2.0734241008758545, + "step": 4420 + }, + { + "epoch": 0.29, + "learning_rate": 4.471022493466669e-06, + "logits/chosen": -2.409914493560791, + "logits/rejected": -1.8466516733169556, + "logps/chosen": -686.7117919921875, + "logps/rejected": -613.4614868164062, + "loss": 0.5096, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2821083068847656, + "rewards/margins": 1.3262908458709717, + "rewards/rejected": -2.608398914337158, + "step": 4430 + }, + { + "epoch": 0.29, + "learning_rate": 4.467504896252066e-06, + "logits/chosen": -2.1752190589904785, + "logits/rejected": -2.1447174549102783, + "logps/chosen": -549.7022705078125, + "logps/rejected": -543.849609375, + "loss": 0.8446, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.7671550512313843, + "rewards/margins": 0.3856579661369324, + "rewards/rejected": -2.152813196182251, + "step": 4440 + }, + { + "epoch": 0.29, + "learning_rate": 4.463977035568132e-06, + "logits/chosen": -2.402294158935547, + "logits/rejected": -1.758493185043335, + "logps/chosen": -483.510986328125, + "logps/rejected": -524.33056640625, + "loss": 0.5975, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6731014251708984, + "rewards/margins": 0.6348594427108765, + "rewards/rejected": -2.3079609870910645, + "step": 4450 + }, + { + "epoch": 0.29, + "learning_rate": 4.460438929817914e-06, + "logits/chosen": -1.9748367071151733, + "logits/rejected": -1.612786054611206, + "logps/chosen": -381.75091552734375, + "logps/rejected": -470.46441650390625, + "loss": 0.55, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.6240602731704712, + "rewards/margins": 0.9709591865539551, + "rewards/rejected": -2.5950193405151367, + "step": 4460 + }, + { + "epoch": 0.29, + "learning_rate": 4.456890597457907e-06, + "logits/chosen": -2.1747958660125732, + "logits/rejected": -1.5399408340454102, + "logps/chosen": -488.53912353515625, + "logps/rejected": -555.6651000976562, + "loss": 0.5172, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7258663177490234, + "rewards/margins": 1.2122005224227905, + "rewards/rejected": -2.9380667209625244, + "step": 4470 + }, + { + "epoch": 0.29, + "learning_rate": 4.453332056997951e-06, + "logits/chosen": -1.8284416198730469, + "logits/rejected": -2.1309056282043457, + "logps/chosen": -527.9506225585938, + "logps/rejected": -619.3512573242188, + "loss": 0.5483, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.246567487716675, + "rewards/margins": 0.6451946496963501, + "rewards/rejected": -2.8917624950408936, + "step": 4480 + }, + { + "epoch": 0.29, + "learning_rate": 4.449763327001134e-06, + "logits/chosen": -1.84951651096344, + "logits/rejected": -2.066161870956421, + "logps/chosen": -416.08087158203125, + "logps/rejected": -450.5859375, + "loss": 0.6554, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9449373483657837, + "rewards/margins": 0.45978718996047974, + "rewards/rejected": -2.404724597930908, + "step": 4490 + }, + { + "epoch": 0.29, + "learning_rate": 4.446184426083702e-06, + "logits/chosen": -2.311021327972412, + "logits/rejected": -2.4845499992370605, + "logps/chosen": -478.23431396484375, + "logps/rejected": -570.850341796875, + "loss": 0.6725, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.1061079502105713, + "rewards/margins": 0.28127962350845337, + "rewards/rejected": -2.38738751411438, + "step": 4500 + }, + { + "epoch": 0.3, + "learning_rate": 4.442595372914954e-06, + "logits/chosen": -1.8901832103729248, + "logits/rejected": -1.8583076000213623, + "logps/chosen": -542.8806762695312, + "logps/rejected": -535.1598510742188, + "loss": 0.4545, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.166436195373535, + "rewards/margins": 0.6861886382102966, + "rewards/rejected": -2.8526248931884766, + "step": 4510 + }, + { + "epoch": 0.3, + "learning_rate": 4.43899618621715e-06, + "logits/chosen": -2.3254332542419434, + "logits/rejected": -2.0657501220703125, + "logps/chosen": -656.4212646484375, + "logps/rejected": -512.197021484375, + "loss": 0.6208, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.3151257038116455, + "rewards/margins": 0.511042594909668, + "rewards/rejected": -2.8261685371398926, + "step": 4520 + }, + { + "epoch": 0.3, + "learning_rate": 4.4353868847654105e-06, + "logits/chosen": -1.948752760887146, + "logits/rejected": -1.5994007587432861, + "logps/chosen": -457.9671325683594, + "logps/rejected": -459.787353515625, + "loss": 0.4473, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.8508408069610596, + "rewards/margins": 0.8039600253105164, + "rewards/rejected": -2.6548008918762207, + "step": 4530 + }, + { + "epoch": 0.3, + "learning_rate": 4.43176748738762e-06, + "logits/chosen": -2.456021785736084, + "logits/rejected": -2.0521798133850098, + "logps/chosen": -520.0567626953125, + "logps/rejected": -550.7706909179688, + "loss": 0.6049, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.211097478866577, + "rewards/margins": 0.2645843029022217, + "rewards/rejected": -2.475681781768799, + "step": 4540 + }, + { + "epoch": 0.3, + "learning_rate": 4.4281380129643295e-06, + "logits/chosen": -1.8761346340179443, + "logits/rejected": -1.4433989524841309, + "logps/chosen": -607.0074462890625, + "logps/rejected": -571.9659423828125, + "loss": 0.5778, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.474813461303711, + "rewards/margins": 0.7461470365524292, + "rewards/rejected": -3.2209606170654297, + "step": 4550 + }, + { + "epoch": 0.3, + "learning_rate": 4.424498480428654e-06, + "logits/chosen": -1.8305082321166992, + "logits/rejected": -1.5906250476837158, + "logps/chosen": -509.3614196777344, + "logps/rejected": -521.6648559570312, + "loss": 0.5047, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1890828609466553, + "rewards/margins": 0.8899819254875183, + "rewards/rejected": -3.0790648460388184, + "step": 4560 + }, + { + "epoch": 0.3, + "learning_rate": 4.420848908766178e-06, + "logits/chosen": -1.8799827098846436, + "logits/rejected": -1.8004913330078125, + "logps/chosen": -416.0606994628906, + "logps/rejected": -512.1343383789062, + "loss": 0.4316, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.8887369632720947, + "rewards/margins": 1.1407749652862549, + "rewards/rejected": -3.0295119285583496, + "step": 4570 + }, + { + "epoch": 0.3, + "learning_rate": 4.417189317014855e-06, + "logits/chosen": -2.281931161880493, + "logits/rejected": -1.9250017404556274, + "logps/chosen": -531.4035034179688, + "logps/rejected": -626.2095947265625, + "loss": 0.5609, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.3789849281311035, + "rewards/margins": 0.8340399861335754, + "rewards/rejected": -3.213024616241455, + "step": 4580 + }, + { + "epoch": 0.3, + "learning_rate": 4.41351972426491e-06, + "logits/chosen": -2.017589569091797, + "logits/rejected": -2.132452964782715, + "logps/chosen": -540.7257690429688, + "logps/rejected": -575.760498046875, + "loss": 0.5085, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.080129623413086, + "rewards/margins": 1.0058696269989014, + "rewards/rejected": -3.085999011993408, + "step": 4590 + }, + { + "epoch": 0.3, + "learning_rate": 4.409840149658735e-06, + "logits/chosen": -1.8754075765609741, + "logits/rejected": -1.7641195058822632, + "logps/chosen": -501.75738525390625, + "logps/rejected": -543.0303955078125, + "loss": 0.6767, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.5409250259399414, + "rewards/margins": 0.22758810222148895, + "rewards/rejected": -2.7685132026672363, + "step": 4600 + }, + { + "epoch": 0.3, + "learning_rate": 4.4061506123907925e-06, + "logits/chosen": -1.8758211135864258, + "logits/rejected": -1.8451995849609375, + "logps/chosen": -545.2836303710938, + "logps/rejected": -475.25299072265625, + "loss": 0.574, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.855198860168457, + "rewards/margins": 0.3814522325992584, + "rewards/rejected": -2.2366509437561035, + "step": 4610 + }, + { + "epoch": 0.3, + "learning_rate": 4.402451131707519e-06, + "logits/chosen": -2.3047826290130615, + "logits/rejected": -1.9144645929336548, + "logps/chosen": -508.73443603515625, + "logps/rejected": -539.4705200195312, + "loss": 0.553, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.7654201984405518, + "rewards/margins": 0.7647563219070435, + "rewards/rejected": -2.5301764011383057, + "step": 4620 + }, + { + "epoch": 0.3, + "learning_rate": 4.398741726907215e-06, + "logits/chosen": -1.8007876873016357, + "logits/rejected": -1.868740439414978, + "logps/chosen": -496.21282958984375, + "logps/rejected": -553.9698486328125, + "loss": 0.8282, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.53663969039917, + "rewards/margins": 0.03457468003034592, + "rewards/rejected": -2.571214437484741, + "step": 4630 + }, + { + "epoch": 0.3, + "learning_rate": 4.395022417339955e-06, + "logits/chosen": -2.113487720489502, + "logits/rejected": -1.8683029413223267, + "logps/chosen": -530.5501708984375, + "logps/rejected": -591.97021484375, + "loss": 0.3195, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.364399790763855, + "rewards/margins": 1.4552600383758545, + "rewards/rejected": -2.819659948348999, + "step": 4640 + }, + { + "epoch": 0.3, + "learning_rate": 4.391293222407479e-06, + "logits/chosen": -2.048769474029541, + "logits/rejected": -1.9766504764556885, + "logps/chosen": -475.11651611328125, + "logps/rejected": -549.103271484375, + "loss": 0.5875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.867302656173706, + "rewards/margins": 0.6196926832199097, + "rewards/rejected": -2.486995220184326, + "step": 4650 + }, + { + "epoch": 0.3, + "learning_rate": 4.387554161563094e-06, + "logits/chosen": -1.9550203084945679, + "logits/rejected": -2.0374646186828613, + "logps/chosen": -420.591796875, + "logps/rejected": -440.51904296875, + "loss": 0.4986, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.6441049575805664, + "rewards/margins": 0.697167158126831, + "rewards/rejected": -2.3412718772888184, + "step": 4660 + }, + { + "epoch": 0.31, + "learning_rate": 4.383805254311575e-06, + "logits/chosen": -2.0355887413024902, + "logits/rejected": -2.292731761932373, + "logps/chosen": -399.4480285644531, + "logps/rejected": -524.9561767578125, + "loss": 0.4126, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9074411392211914, + "rewards/margins": 0.7646468877792358, + "rewards/rejected": -2.672088146209717, + "step": 4670 + }, + { + "epoch": 0.31, + "learning_rate": 4.380046520209056e-06, + "logits/chosen": -2.2156951427459717, + "logits/rejected": -2.034742593765259, + "logps/chosen": -579.1575317382812, + "logps/rejected": -681.671142578125, + "loss": 0.4618, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.0864853858947754, + "rewards/margins": 1.258885145187378, + "rewards/rejected": -3.3453705310821533, + "step": 4680 + }, + { + "epoch": 0.31, + "learning_rate": 4.376277978862936e-06, + "logits/chosen": -1.830632209777832, + "logits/rejected": -1.9834390878677368, + "logps/chosen": -404.22003173828125, + "logps/rejected": -464.7509765625, + "loss": 0.4448, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7368991374969482, + "rewards/margins": 0.8379810452461243, + "rewards/rejected": -2.5748801231384277, + "step": 4690 + }, + { + "epoch": 0.31, + "learning_rate": 4.372499649931774e-06, + "logits/chosen": -2.358445644378662, + "logits/rejected": -1.5742676258087158, + "logps/chosen": -552.6972045898438, + "logps/rejected": -541.5938720703125, + "loss": 0.5748, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.6361271142959595, + "rewards/margins": 0.6868298649787903, + "rewards/rejected": -2.3229570388793945, + "step": 4700 + }, + { + "epoch": 0.31, + "learning_rate": 4.368711553125185e-06, + "logits/chosen": -2.168393850326538, + "logits/rejected": -1.5660316944122314, + "logps/chosen": -542.9609985351562, + "logps/rejected": -575.06494140625, + "loss": 0.5706, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.852471113204956, + "rewards/margins": 0.8874910473823547, + "rewards/rejected": -2.739962100982666, + "step": 4710 + }, + { + "epoch": 0.31, + "learning_rate": 4.364913708203734e-06, + "logits/chosen": -2.1243157386779785, + "logits/rejected": -1.9141753911972046, + "logps/chosen": -492.9778747558594, + "logps/rejected": -508.63812255859375, + "loss": 0.4768, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0230612754821777, + "rewards/margins": 0.7433712482452393, + "rewards/rejected": -2.766432523727417, + "step": 4720 + }, + { + "epoch": 0.31, + "learning_rate": 4.361106134978844e-06, + "logits/chosen": -2.172084331512451, + "logits/rejected": -1.5947898626327515, + "logps/chosen": -466.58770751953125, + "logps/rejected": -405.91754150390625, + "loss": 0.5439, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.236283540725708, + "rewards/margins": 0.8609572649002075, + "rewards/rejected": -2.097240686416626, + "step": 4730 + }, + { + "epoch": 0.31, + "learning_rate": 4.357288853312681e-06, + "logits/chosen": -2.3615665435791016, + "logits/rejected": -2.126474380493164, + "logps/chosen": -442.7232360839844, + "logps/rejected": -458.16937255859375, + "loss": 0.453, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.3965190649032593, + "rewards/margins": 0.6557938456535339, + "rewards/rejected": -2.0523128509521484, + "step": 4740 + }, + { + "epoch": 0.31, + "learning_rate": 4.353461883118056e-06, + "logits/chosen": -1.8026084899902344, + "logits/rejected": -1.6555544137954712, + "logps/chosen": -484.675537109375, + "logps/rejected": -532.8782958984375, + "loss": 0.5328, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.5672917366027832, + "rewards/margins": 1.2434580326080322, + "rewards/rejected": -2.8107497692108154, + "step": 4750 + }, + { + "epoch": 0.31, + "learning_rate": 4.34962524435832e-06, + "logits/chosen": -2.1514358520507812, + "logits/rejected": -2.190011501312256, + "logps/chosen": -411.32818603515625, + "logps/rejected": -457.2787170410156, + "loss": 0.5686, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2103312015533447, + "rewards/margins": 0.9705151319503784, + "rewards/rejected": -2.1808464527130127, + "step": 4760 + }, + { + "epoch": 0.31, + "learning_rate": 4.34577895704726e-06, + "logits/chosen": -2.0095913410186768, + "logits/rejected": -2.151557683944702, + "logps/chosen": -450.92962646484375, + "logps/rejected": -574.5406494140625, + "loss": 0.4753, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8003053665161133, + "rewards/margins": 1.186186671257019, + "rewards/rejected": -2.9864919185638428, + "step": 4770 + }, + { + "epoch": 0.31, + "learning_rate": 4.3419230412489954e-06, + "logits/chosen": -2.141251802444458, + "logits/rejected": -1.974914312362671, + "logps/chosen": -521.9320678710938, + "logps/rejected": -572.5324096679688, + "loss": 0.4808, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7105445861816406, + "rewards/margins": 1.2596935033798218, + "rewards/rejected": -2.970238208770752, + "step": 4780 + }, + { + "epoch": 0.31, + "learning_rate": 4.338057517077872e-06, + "logits/chosen": -2.1706271171569824, + "logits/rejected": -2.2307467460632324, + "logps/chosen": -473.8224182128906, + "logps/rejected": -512.6556396484375, + "loss": 0.6164, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0227160453796387, + "rewards/margins": 0.5150956511497498, + "rewards/rejected": -2.537811517715454, + "step": 4790 + }, + { + "epoch": 0.31, + "learning_rate": 4.334182404698356e-06, + "logits/chosen": -1.4364566802978516, + "logits/rejected": -1.9702926874160767, + "logps/chosen": -525.397216796875, + "logps/rejected": -492.07470703125, + "loss": 0.5419, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.342111587524414, + "rewards/margins": 0.5041699409484863, + "rewards/rejected": -2.8462817668914795, + "step": 4800 + }, + { + "epoch": 0.31, + "learning_rate": 4.330297724324933e-06, + "logits/chosen": -1.4881843328475952, + "logits/rejected": -1.5391967296600342, + "logps/chosen": -454.2434997558594, + "logps/rejected": -580.0150146484375, + "loss": 0.4813, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.307107448577881, + "rewards/margins": 1.1245578527450562, + "rewards/rejected": -3.4316658973693848, + "step": 4810 + }, + { + "epoch": 0.32, + "learning_rate": 4.326403496221999e-06, + "logits/chosen": -1.6759843826293945, + "logits/rejected": -1.577532410621643, + "logps/chosen": -455.1409606933594, + "logps/rejected": -482.29095458984375, + "loss": 0.5314, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.9880273342132568, + "rewards/margins": 0.8221282958984375, + "rewards/rejected": -2.8101556301116943, + "step": 4820 + }, + { + "epoch": 0.32, + "learning_rate": 4.322499740703755e-06, + "logits/chosen": -2.2555432319641113, + "logits/rejected": -1.9342491626739502, + "logps/chosen": -507.57135009765625, + "logps/rejected": -517.7844848632812, + "loss": 0.5564, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0044960975646973, + "rewards/margins": 1.0811337232589722, + "rewards/rejected": -3.085629940032959, + "step": 4830 + }, + { + "epoch": 0.32, + "learning_rate": 4.318586478134101e-06, + "logits/chosen": -1.9722309112548828, + "logits/rejected": -2.1587672233581543, + "logps/chosen": -454.78125, + "logps/rejected": -635.3939819335938, + "loss": 0.4745, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.132972240447998, + "rewards/margins": 1.0663903951644897, + "rewards/rejected": -3.1993625164031982, + "step": 4840 + }, + { + "epoch": 0.32, + "learning_rate": 4.314663728926534e-06, + "logits/chosen": -1.7362626791000366, + "logits/rejected": -1.6253337860107422, + "logps/chosen": -512.2847290039062, + "logps/rejected": -569.8792724609375, + "loss": 0.7054, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.7742371559143066, + "rewards/margins": 0.21049389243125916, + "rewards/rejected": -2.9847309589385986, + "step": 4850 + }, + { + "epoch": 0.32, + "learning_rate": 4.310731513544033e-06, + "logits/chosen": -2.0022008419036865, + "logits/rejected": -1.6744985580444336, + "logps/chosen": -484.0415954589844, + "logps/rejected": -606.7132568359375, + "loss": 0.3881, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.31618070602417, + "rewards/margins": 1.1469529867172241, + "rewards/rejected": -3.4631335735321045, + "step": 4860 + }, + { + "epoch": 0.32, + "learning_rate": 4.30678985249896e-06, + "logits/chosen": -1.9810054302215576, + "logits/rejected": -1.7759273052215576, + "logps/chosen": -469.736083984375, + "logps/rejected": -595.0543823242188, + "loss": 0.4512, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.0385842323303223, + "rewards/margins": 1.3698408603668213, + "rewards/rejected": -3.4084250926971436, + "step": 4870 + }, + { + "epoch": 0.32, + "learning_rate": 4.302838766352952e-06, + "logits/chosen": -1.886491060256958, + "logits/rejected": -2.1183762550354004, + "logps/chosen": -503.8521423339844, + "logps/rejected": -629.655029296875, + "loss": 0.5701, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -1.8886480331420898, + "rewards/margins": 0.9551739692687988, + "rewards/rejected": -2.8438220024108887, + "step": 4880 + }, + { + "epoch": 0.32, + "learning_rate": 4.298878275716806e-06, + "logits/chosen": -1.7694628238677979, + "logits/rejected": -1.6724827289581299, + "logps/chosen": -444.172607421875, + "logps/rejected": -549.7099609375, + "loss": 0.4663, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8042892217636108, + "rewards/margins": 1.326806902885437, + "rewards/rejected": -3.1310958862304688, + "step": 4890 + }, + { + "epoch": 0.32, + "learning_rate": 4.294908401250386e-06, + "logits/chosen": -2.007857084274292, + "logits/rejected": -1.7297179698944092, + "logps/chosen": -404.09442138671875, + "logps/rejected": -523.7421264648438, + "loss": 0.5109, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8268076181411743, + "rewards/margins": 1.179343819618225, + "rewards/rejected": -3.0061516761779785, + "step": 4900 + }, + { + "epoch": 0.32, + "learning_rate": 4.290929163662498e-06, + "logits/chosen": -1.631799340248108, + "logits/rejected": -1.5702697038650513, + "logps/chosen": -556.2808837890625, + "logps/rejected": -681.7789916992188, + "loss": 0.2917, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.999092698097229, + "rewards/margins": 1.6200498342514038, + "rewards/rejected": -3.619143009185791, + "step": 4910 + }, + { + "epoch": 0.32, + "learning_rate": 4.286940583710796e-06, + "logits/chosen": -1.9041754007339478, + "logits/rejected": -1.4066526889801025, + "logps/chosen": -464.74822998046875, + "logps/rejected": -575.4861450195312, + "loss": 0.362, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.8307641744613647, + "rewards/margins": 1.4628773927688599, + "rewards/rejected": -3.2936415672302246, + "step": 4920 + }, + { + "epoch": 0.32, + "learning_rate": 4.282942682201667e-06, + "logits/chosen": -1.7861030101776123, + "logits/rejected": -1.9110653400421143, + "logps/chosen": -495.28131103515625, + "logps/rejected": -656.2120361328125, + "loss": 0.4626, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.301225423812866, + "rewards/margins": 0.6522427797317505, + "rewards/rejected": -2.9534683227539062, + "step": 4930 + }, + { + "epoch": 0.32, + "learning_rate": 4.278935479990123e-06, + "logits/chosen": -1.8299472332000732, + "logits/rejected": -1.1779818534851074, + "logps/chosen": -420.9767150878906, + "logps/rejected": -632.309326171875, + "loss": 0.4589, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.9877490997314453, + "rewards/margins": 1.4198617935180664, + "rewards/rejected": -3.4076104164123535, + "step": 4940 + }, + { + "epoch": 0.32, + "learning_rate": 4.274918997979695e-06, + "logits/chosen": -2.1755924224853516, + "logits/rejected": -2.0056111812591553, + "logps/chosen": -547.8631591796875, + "logps/rejected": -511.017578125, + "loss": 0.6709, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.987717628479004, + "rewards/margins": 0.31354743242263794, + "rewards/rejected": -2.301265239715576, + "step": 4950 + }, + { + "epoch": 0.32, + "learning_rate": 4.270893257122319e-06, + "logits/chosen": -2.0175094604492188, + "logits/rejected": -1.6015933752059937, + "logps/chosen": -505.9883728027344, + "logps/rejected": -537.2809448242188, + "loss": 0.4714, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.958593726158142, + "rewards/margins": 0.8624840974807739, + "rewards/rejected": -2.821077823638916, + "step": 4960 + }, + { + "epoch": 0.33, + "learning_rate": 4.266858278418232e-06, + "logits/chosen": -2.076406240463257, + "logits/rejected": -1.926634430885315, + "logps/chosen": -438.182373046875, + "logps/rejected": -538.9945678710938, + "loss": 0.6311, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.9475584030151367, + "rewards/margins": 0.55462646484375, + "rewards/rejected": -2.502185344696045, + "step": 4970 + }, + { + "epoch": 0.33, + "learning_rate": 4.26281408291586e-06, + "logits/chosen": -2.040910243988037, + "logits/rejected": -1.9170629978179932, + "logps/chosen": -571.2846069335938, + "logps/rejected": -575.861083984375, + "loss": 0.4263, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.9143867492675781, + "rewards/margins": 0.9282675981521606, + "rewards/rejected": -2.8426547050476074, + "step": 4980 + }, + { + "epoch": 0.33, + "learning_rate": 4.258760691711706e-06, + "logits/chosen": -2.102288246154785, + "logits/rejected": -2.1348159313201904, + "logps/chosen": -505.85321044921875, + "logps/rejected": -508.9205017089844, + "loss": 0.5157, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.7438154220581055, + "rewards/margins": 0.6320822834968567, + "rewards/rejected": -2.3758978843688965, + "step": 4990 + }, + { + "epoch": 0.33, + "learning_rate": 4.254698125950247e-06, + "logits/chosen": -2.086390733718872, + "logits/rejected": -1.717895269393921, + "logps/chosen": -456.79022216796875, + "logps/rejected": -578.4363403320312, + "loss": 0.4682, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.532288074493408, + "rewards/margins": 1.1843751668930054, + "rewards/rejected": -3.716663360595703, + "step": 5000 + }, + { + "epoch": 0.33, + "learning_rate": 4.250626406823815e-06, + "logits/chosen": -2.286832094192505, + "logits/rejected": -1.8051612377166748, + "logps/chosen": -522.6347045898438, + "logps/rejected": -707.0551147460938, + "loss": 0.5183, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0341882705688477, + "rewards/margins": 1.119905710220337, + "rewards/rejected": -3.1540939807891846, + "step": 5010 + }, + { + "epoch": 0.33, + "learning_rate": 4.246545555572489e-06, + "logits/chosen": -2.2195258140563965, + "logits/rejected": -1.7351186275482178, + "logps/chosen": -532.5775146484375, + "logps/rejected": -485.69708251953125, + "loss": 0.5602, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.5605659484863281, + "rewards/margins": 1.0322446823120117, + "rewards/rejected": -2.592810869216919, + "step": 5020 + }, + { + "epoch": 0.33, + "learning_rate": 4.242455593483992e-06, + "logits/chosen": -2.168919563293457, + "logits/rejected": -2.084602117538452, + "logps/chosen": -479.1246032714844, + "logps/rejected": -557.0482788085938, + "loss": 0.5369, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8440663814544678, + "rewards/margins": 1.1115206480026245, + "rewards/rejected": -2.955587148666382, + "step": 5030 + }, + { + "epoch": 0.33, + "learning_rate": 4.238356541893567e-06, + "logits/chosen": -2.0111374855041504, + "logits/rejected": -2.025683879852295, + "logps/chosen": -524.3695068359375, + "logps/rejected": -623.1271362304688, + "loss": 0.7591, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2869372367858887, + "rewards/margins": 0.8538182973861694, + "rewards/rejected": -3.1407554149627686, + "step": 5040 + }, + { + "epoch": 0.33, + "learning_rate": 4.234248422183876e-06, + "logits/chosen": -2.258674144744873, + "logits/rejected": -2.089789390563965, + "logps/chosen": -533.87890625, + "logps/rejected": -596.0156860351562, + "loss": 0.7065, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -1.7382818460464478, + "rewards/margins": 0.8634111285209656, + "rewards/rejected": -2.6016926765441895, + "step": 5050 + }, + { + "epoch": 0.33, + "learning_rate": 4.230131255784884e-06, + "logits/chosen": -1.9967342615127563, + "logits/rejected": -2.0427513122558594, + "logps/chosen": -422.4784240722656, + "logps/rejected": -509.82550048828125, + "loss": 0.6431, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.4072448015213013, + "rewards/margins": 0.980424702167511, + "rewards/rejected": -2.387669801712036, + "step": 5060 + }, + { + "epoch": 0.33, + "learning_rate": 4.226005064173748e-06, + "logits/chosen": -2.129643678665161, + "logits/rejected": -1.6824462413787842, + "logps/chosen": -478.75177001953125, + "logps/rejected": -463.4546813964844, + "loss": 0.5305, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.1462364196777344, + "rewards/margins": 1.4325624704360962, + "rewards/rejected": -2.578798770904541, + "step": 5070 + }, + { + "epoch": 0.33, + "learning_rate": 4.2218698688747035e-06, + "logits/chosen": -2.551898241043091, + "logits/rejected": -1.856146216392517, + "logps/chosen": -423.08001708984375, + "logps/rejected": -483.04766845703125, + "loss": 0.4279, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.3132469654083252, + "rewards/margins": 1.371935248374939, + "rewards/rejected": -2.6851823329925537, + "step": 5080 + }, + { + "epoch": 0.33, + "learning_rate": 4.217725691458957e-06, + "logits/chosen": -2.3234143257141113, + "logits/rejected": -1.9465463161468506, + "logps/chosen": -548.46630859375, + "logps/rejected": -516.9241943359375, + "loss": 0.5183, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.7047113180160522, + "rewards/margins": 0.6300544142723083, + "rewards/rejected": -2.334765911102295, + "step": 5090 + }, + { + "epoch": 0.33, + "learning_rate": 4.213572553544565e-06, + "logits/chosen": -2.1165995597839355, + "logits/rejected": -2.101731061935425, + "logps/chosen": -521.7017822265625, + "logps/rejected": -487.0848083496094, + "loss": 0.6403, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.7880723476409912, + "rewards/margins": 0.4881587028503418, + "rewards/rejected": -2.276231288909912, + "step": 5100 + }, + { + "epoch": 0.33, + "learning_rate": 4.209410476796331e-06, + "logits/chosen": -2.2760448455810547, + "logits/rejected": -2.162075996398926, + "logps/chosen": -505.9744567871094, + "logps/rejected": -572.6317138671875, + "loss": 0.4641, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.6629308462142944, + "rewards/margins": 1.0742077827453613, + "rewards/rejected": -2.7371392250061035, + "step": 5110 + }, + { + "epoch": 0.33, + "learning_rate": 4.205239482925686e-06, + "logits/chosen": -2.5016868114471436, + "logits/rejected": -2.208329439163208, + "logps/chosen": -486.69451904296875, + "logps/rejected": -514.7063598632812, + "loss": 0.5182, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.9836909174919128, + "rewards/margins": 0.8252573013305664, + "rewards/rejected": -1.8089481592178345, + "step": 5120 + }, + { + "epoch": 0.34, + "learning_rate": 4.201059593690577e-06, + "logits/chosen": -2.203293800354004, + "logits/rejected": -1.8268276453018188, + "logps/chosen": -417.32916259765625, + "logps/rejected": -432.7237854003906, + "loss": 0.7042, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.301255464553833, + "rewards/margins": 0.9073659181594849, + "rewards/rejected": -2.2086215019226074, + "step": 5130 + }, + { + "epoch": 0.34, + "learning_rate": 4.196870830895354e-06, + "logits/chosen": -2.3410606384277344, + "logits/rejected": -2.001013994216919, + "logps/chosen": -346.8150329589844, + "logps/rejected": -517.2017822265625, + "loss": 0.4128, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8742402195930481, + "rewards/margins": 1.6113131046295166, + "rewards/rejected": -2.48555326461792, + "step": 5140 + }, + { + "epoch": 0.34, + "learning_rate": 4.192673216390657e-06, + "logits/chosen": -2.111858606338501, + "logits/rejected": -2.1761603355407715, + "logps/chosen": -501.1302185058594, + "logps/rejected": -512.5011596679688, + "loss": 0.7603, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.516308307647705, + "rewards/margins": 0.6539145708084106, + "rewards/rejected": -2.170222759246826, + "step": 5150 + }, + { + "epoch": 0.34, + "learning_rate": 4.188466772073296e-06, + "logits/chosen": -2.0586180686950684, + "logits/rejected": -1.8965294361114502, + "logps/chosen": -447.8399353027344, + "logps/rejected": -547.6044921875, + "loss": 0.5181, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.681647539138794, + "rewards/margins": 1.0179592370986938, + "rewards/rejected": -2.6996068954467773, + "step": 5160 + }, + { + "epoch": 0.34, + "learning_rate": 4.184251519886148e-06, + "logits/chosen": -1.794429063796997, + "logits/rejected": -1.6684194803237915, + "logps/chosen": -421.6888122558594, + "logps/rejected": -462.4202575683594, + "loss": 0.736, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.355869770050049, + "rewards/margins": 0.24080920219421387, + "rewards/rejected": -2.5966787338256836, + "step": 5170 + }, + { + "epoch": 0.34, + "learning_rate": 4.180027481818033e-06, + "logits/chosen": -2.1500463485717773, + "logits/rejected": -1.862236738204956, + "logps/chosen": -497.8409118652344, + "logps/rejected": -479.428955078125, + "loss": 0.5666, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6651195287704468, + "rewards/margins": 1.0246086120605469, + "rewards/rejected": -2.689728260040283, + "step": 5180 + }, + { + "epoch": 0.34, + "learning_rate": 4.175794679903602e-06, + "logits/chosen": -2.3446333408355713, + "logits/rejected": -1.8376610279083252, + "logps/chosen": -455.2227478027344, + "logps/rejected": -458.30029296875, + "loss": 0.6515, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.7315324544906616, + "rewards/margins": 0.38231196999549866, + "rewards/rejected": -2.113844394683838, + "step": 5190 + }, + { + "epoch": 0.34, + "learning_rate": 4.171553136223222e-06, + "logits/chosen": -2.3308043479919434, + "logits/rejected": -2.1053833961486816, + "logps/chosen": -521.3209838867188, + "logps/rejected": -570.7605590820312, + "loss": 0.5516, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2747447490692139, + "rewards/margins": 0.7890059351921082, + "rewards/rejected": -2.0637505054473877, + "step": 5200 + }, + { + "epoch": 0.34, + "learning_rate": 4.167302872902865e-06, + "logits/chosen": -2.3960559368133545, + "logits/rejected": -1.8900690078735352, + "logps/chosen": -487.9169006347656, + "logps/rejected": -510.0519104003906, + "loss": 0.7002, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.5574736595153809, + "rewards/margins": 1.0342365503311157, + "rewards/rejected": -2.591710329055786, + "step": 5210 + }, + { + "epoch": 0.34, + "learning_rate": 4.163043912113985e-06, + "logits/chosen": -2.1093697547912598, + "logits/rejected": -1.9578189849853516, + "logps/chosen": -491.0966796875, + "logps/rejected": -520.5272216796875, + "loss": 0.5832, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.5820024013519287, + "rewards/margins": 0.46106070280075073, + "rewards/rejected": -2.043062925338745, + "step": 5220 + }, + { + "epoch": 0.34, + "learning_rate": 4.15877627607341e-06, + "logits/chosen": -1.9862468242645264, + "logits/rejected": -1.8697761297225952, + "logps/chosen": -365.73193359375, + "logps/rejected": -427.60260009765625, + "loss": 0.5577, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.5219247341156006, + "rewards/margins": 0.6337286233901978, + "rewards/rejected": -2.155653238296509, + "step": 5230 + }, + { + "epoch": 0.34, + "learning_rate": 4.154499987043217e-06, + "logits/chosen": -1.718347191810608, + "logits/rejected": -1.9001245498657227, + "logps/chosen": -416.593505859375, + "logps/rejected": -536.9202270507812, + "loss": 0.4793, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.6195068359375, + "rewards/margins": 1.0918627977371216, + "rewards/rejected": -2.7113699913024902, + "step": 5240 + }, + { + "epoch": 0.34, + "learning_rate": 4.150215067330625e-06, + "logits/chosen": -2.2182650566101074, + "logits/rejected": -2.0222058296203613, + "logps/chosen": -499.1563415527344, + "logps/rejected": -585.13232421875, + "loss": 0.5189, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7680704593658447, + "rewards/margins": 1.1057997941970825, + "rewards/rejected": -2.873870372772217, + "step": 5250 + }, + { + "epoch": 0.34, + "learning_rate": 4.145921539287876e-06, + "logits/chosen": -2.067573070526123, + "logits/rejected": -1.926025390625, + "logps/chosen": -557.6201171875, + "logps/rejected": -531.8350830078125, + "loss": 0.5725, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.5217723846435547, + "rewards/margins": 0.673224925994873, + "rewards/rejected": -3.1949973106384277, + "step": 5260 + }, + { + "epoch": 0.34, + "learning_rate": 4.141619425312115e-06, + "logits/chosen": -2.2126173973083496, + "logits/rejected": -2.1073038578033447, + "logps/chosen": -496.4862365722656, + "logps/rejected": -522.2747192382812, + "loss": 0.5975, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.240407943725586, + "rewards/margins": 0.9295075535774231, + "rewards/rejected": -3.1699154376983643, + "step": 5270 + }, + { + "epoch": 0.35, + "learning_rate": 4.1373087478452735e-06, + "logits/chosen": -1.852992057800293, + "logits/rejected": -1.8106857538223267, + "logps/chosen": -436.77740478515625, + "logps/rejected": -474.5143127441406, + "loss": 0.7164, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -2.3405117988586426, + "rewards/margins": 0.06529967486858368, + "rewards/rejected": -2.405811309814453, + "step": 5280 + }, + { + "epoch": 0.35, + "learning_rate": 4.132989529373959e-06, + "logits/chosen": -1.9827455282211304, + "logits/rejected": -2.061856746673584, + "logps/chosen": -505.684326171875, + "logps/rejected": -570.9915161132812, + "loss": 0.4378, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.6012649536132812, + "rewards/margins": 1.2496049404144287, + "rewards/rejected": -2.850870370864868, + "step": 5290 + }, + { + "epoch": 0.35, + "learning_rate": 4.128661792429331e-06, + "logits/chosen": -1.7086913585662842, + "logits/rejected": -2.030909776687622, + "logps/chosen": -413.0350646972656, + "logps/rejected": -516.2833251953125, + "loss": 0.6779, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.1970055103302, + "rewards/margins": 0.5528850555419922, + "rewards/rejected": -2.7498905658721924, + "step": 5300 + }, + { + "epoch": 0.35, + "learning_rate": 4.124325559586985e-06, + "logits/chosen": -2.0355865955352783, + "logits/rejected": -1.7397657632827759, + "logps/chosen": -442.1444396972656, + "logps/rejected": -477.7100524902344, + "loss": 0.6172, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.8046258687973022, + "rewards/margins": 0.736786425113678, + "rewards/rejected": -2.541412353515625, + "step": 5310 + }, + { + "epoch": 0.35, + "learning_rate": 4.119980853466835e-06, + "logits/chosen": -1.9371654987335205, + "logits/rejected": -1.4551010131835938, + "logps/chosen": -443.48883056640625, + "logps/rejected": -509.99652099609375, + "loss": 0.5016, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0050268173217773, + "rewards/margins": 0.9008941650390625, + "rewards/rejected": -2.905921220779419, + "step": 5320 + }, + { + "epoch": 0.35, + "learning_rate": 4.115627696732997e-06, + "logits/chosen": -2.2487733364105225, + "logits/rejected": -1.9912960529327393, + "logps/chosen": -587.8577270507812, + "logps/rejected": -580.4589233398438, + "loss": 0.5762, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.7368290424346924, + "rewards/margins": 0.7809491753578186, + "rewards/rejected": -2.517777919769287, + "step": 5330 + }, + { + "epoch": 0.35, + "learning_rate": 4.111266112093668e-06, + "logits/chosen": -2.338475465774536, + "logits/rejected": -1.7393487691879272, + "logps/chosen": -565.7013549804688, + "logps/rejected": -561.8267822265625, + "loss": 0.5838, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.7987060546875, + "rewards/margins": 0.9617929458618164, + "rewards/rejected": -2.7604992389678955, + "step": 5340 + }, + { + "epoch": 0.35, + "learning_rate": 4.1068961223010115e-06, + "logits/chosen": -2.0487475395202637, + "logits/rejected": -1.9890035390853882, + "logps/chosen": -494.0810546875, + "logps/rejected": -526.8984985351562, + "loss": 0.7369, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.0883586406707764, + "rewards/margins": 0.25026410818099976, + "rewards/rejected": -2.338622808456421, + "step": 5350 + }, + { + "epoch": 0.35, + "learning_rate": 4.102517750151034e-06, + "logits/chosen": -2.4758353233337402, + "logits/rejected": -2.0741126537323, + "logps/chosen": -516.5465087890625, + "logps/rejected": -488.43035888671875, + "loss": 0.5367, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.434444785118103, + "rewards/margins": 0.4245724678039551, + "rewards/rejected": -1.8590171337127686, + "step": 5360 + }, + { + "epoch": 0.35, + "learning_rate": 4.09813101848347e-06, + "logits/chosen": -2.068437337875366, + "logits/rejected": -1.921613097190857, + "logps/chosen": -448.24346923828125, + "logps/rejected": -530.2891845703125, + "loss": 0.6322, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.6722848415374756, + "rewards/margins": 0.7226563096046448, + "rewards/rejected": -2.3949408531188965, + "step": 5370 + }, + { + "epoch": 0.35, + "learning_rate": 4.093735950181659e-06, + "logits/chosen": -2.3279154300689697, + "logits/rejected": -2.0696029663085938, + "logps/chosen": -443.210205078125, + "logps/rejected": -503.69805908203125, + "loss": 0.5236, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8402855396270752, + "rewards/margins": 0.7039087414741516, + "rewards/rejected": -2.544194459915161, + "step": 5380 + }, + { + "epoch": 0.35, + "learning_rate": 4.0893325681724326e-06, + "logits/chosen": -2.0327677726745605, + "logits/rejected": -2.0879409313201904, + "logps/chosen": -473.17022705078125, + "logps/rejected": -611.0216064453125, + "loss": 0.5203, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.875012993812561, + "rewards/margins": 1.0310866832733154, + "rewards/rejected": -2.906099557876587, + "step": 5390 + }, + { + "epoch": 0.35, + "learning_rate": 4.084920895425988e-06, + "logits/chosen": -1.8736546039581299, + "logits/rejected": -1.8165258169174194, + "logps/chosen": -429.8600158691406, + "logps/rejected": -548.5433349609375, + "loss": 0.5732, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.9263439178466797, + "rewards/margins": 0.8851688504219055, + "rewards/rejected": -2.8115124702453613, + "step": 5400 + }, + { + "epoch": 0.35, + "learning_rate": 4.080500954955769e-06, + "logits/chosen": -1.9591480493545532, + "logits/rejected": -1.8119045495986938, + "logps/chosen": -464.69207763671875, + "logps/rejected": -554.7318115234375, + "loss": 0.6704, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0796895027160645, + "rewards/margins": 0.8748849034309387, + "rewards/rejected": -2.9545741081237793, + "step": 5410 + }, + { + "epoch": 0.35, + "learning_rate": 4.076072769818354e-06, + "logits/chosen": -1.8003151416778564, + "logits/rejected": -1.8232829570770264, + "logps/chosen": -430.34686279296875, + "logps/rejected": -528.0757446289062, + "loss": 0.5268, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.004014730453491, + "rewards/margins": 1.071571946144104, + "rewards/rejected": -3.0755867958068848, + "step": 5420 + }, + { + "epoch": 0.36, + "learning_rate": 4.071636363113323e-06, + "logits/chosen": -2.4541430473327637, + "logits/rejected": -2.071518659591675, + "logps/chosen": -541.6749877929688, + "logps/rejected": -580.9114990234375, + "loss": 0.6572, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4755618572235107, + "rewards/margins": 0.49576932191848755, + "rewards/rejected": -2.9713308811187744, + "step": 5430 + }, + { + "epoch": 0.36, + "learning_rate": 4.067191757983146e-06, + "logits/chosen": -1.6669687032699585, + "logits/rejected": -1.8759205341339111, + "logps/chosen": -481.29742431640625, + "logps/rejected": -646.45751953125, + "loss": 0.585, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7207741737365723, + "rewards/margins": 0.6411170959472656, + "rewards/rejected": -3.361891269683838, + "step": 5440 + }, + { + "epoch": 0.36, + "learning_rate": 4.062738977613063e-06, + "logits/chosen": -2.1314239501953125, + "logits/rejected": -1.5466810464859009, + "logps/chosen": -536.1124877929688, + "logps/rejected": -521.1047973632812, + "loss": 0.5347, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.06589674949646, + "rewards/margins": 0.4216742515563965, + "rewards/rejected": -2.4875710010528564, + "step": 5450 + }, + { + "epoch": 0.36, + "learning_rate": 4.058278045230957e-06, + "logits/chosen": -2.354736328125, + "logits/rejected": -1.8038610219955444, + "logps/chosen": -589.7005615234375, + "logps/rejected": -649.4627685546875, + "loss": 0.4977, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8075393438339233, + "rewards/margins": 0.8336325883865356, + "rewards/rejected": -2.641171932220459, + "step": 5460 + }, + { + "epoch": 0.36, + "learning_rate": 4.053808984107235e-06, + "logits/chosen": -2.18099308013916, + "logits/rejected": -2.067991018295288, + "logps/chosen": -644.2923583984375, + "logps/rejected": -661.2281494140625, + "loss": 0.5483, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.1242473125457764, + "rewards/margins": 0.7089834809303284, + "rewards/rejected": -2.833230495452881, + "step": 5470 + }, + { + "epoch": 0.36, + "learning_rate": 4.04933181755471e-06, + "logits/chosen": -2.0857701301574707, + "logits/rejected": -1.6287143230438232, + "logps/chosen": -522.9374389648438, + "logps/rejected": -495.65899658203125, + "loss": 0.6808, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.090423107147217, + "rewards/margins": 0.5246143937110901, + "rewards/rejected": -2.615037441253662, + "step": 5480 + }, + { + "epoch": 0.36, + "learning_rate": 4.044846568928477e-06, + "logits/chosen": -1.865134596824646, + "logits/rejected": -2.0705184936523438, + "logps/chosen": -457.414794921875, + "logps/rejected": -597.051025390625, + "loss": 0.5042, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.685142159461975, + "rewards/margins": 0.8473315238952637, + "rewards/rejected": -2.5324740409851074, + "step": 5490 + }, + { + "epoch": 0.36, + "learning_rate": 4.040353261625788e-06, + "logits/chosen": -2.257359266281128, + "logits/rejected": -2.08892822265625, + "logps/chosen": -455.34820556640625, + "logps/rejected": -545.6905517578125, + "loss": 0.5441, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.7408716678619385, + "rewards/margins": 1.2403234243392944, + "rewards/rejected": -2.9811949729919434, + "step": 5500 + }, + { + "epoch": 0.36, + "learning_rate": 4.035851919085936e-06, + "logits/chosen": -2.324406862258911, + "logits/rejected": -2.0984809398651123, + "logps/chosen": -519.6268310546875, + "logps/rejected": -553.7537841796875, + "loss": 0.4771, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.2758551836013794, + "rewards/margins": 1.4612915515899658, + "rewards/rejected": -2.7371463775634766, + "step": 5510 + }, + { + "epoch": 0.36, + "learning_rate": 4.031342564790128e-06, + "logits/chosen": -2.2692208290100098, + "logits/rejected": -1.6718521118164062, + "logps/chosen": -525.452880859375, + "logps/rejected": -548.0057983398438, + "loss": 0.4416, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.848789930343628, + "rewards/margins": 0.8104416728019714, + "rewards/rejected": -2.659231662750244, + "step": 5520 + }, + { + "epoch": 0.36, + "learning_rate": 4.026825222261367e-06, + "logits/chosen": -2.027358055114746, + "logits/rejected": -2.0448803901672363, + "logps/chosen": -634.1797485351562, + "logps/rejected": -619.1446533203125, + "loss": 0.3874, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.5155118703842163, + "rewards/margins": 1.2813142538070679, + "rewards/rejected": -2.796826124191284, + "step": 5530 + }, + { + "epoch": 0.36, + "learning_rate": 4.022299915064321e-06, + "logits/chosen": -2.2447431087493896, + "logits/rejected": -1.7948484420776367, + "logps/chosen": -429.32586669921875, + "logps/rejected": -488.72100830078125, + "loss": 0.4734, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7056586742401123, + "rewards/margins": 0.9686349630355835, + "rewards/rejected": -2.6742939949035645, + "step": 5540 + }, + { + "epoch": 0.36, + "learning_rate": 4.017766666805213e-06, + "logits/chosen": -1.7969173192977905, + "logits/rejected": -2.1284358501434326, + "logps/chosen": -505.5121154785156, + "logps/rejected": -535.5445556640625, + "loss": 0.5333, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.8811172246932983, + "rewards/margins": 0.8386653065681458, + "rewards/rejected": -2.719782590866089, + "step": 5550 + }, + { + "epoch": 0.36, + "learning_rate": 4.013225501131684e-06, + "logits/chosen": -2.1914796829223633, + "logits/rejected": -2.0097999572753906, + "logps/chosen": -542.9992065429688, + "logps/rejected": -625.959716796875, + "loss": 0.5042, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.498547077178955, + "rewards/margins": 1.0935722589492798, + "rewards/rejected": -2.5921194553375244, + "step": 5560 + }, + { + "epoch": 0.36, + "learning_rate": 4.008676441732679e-06, + "logits/chosen": -1.9770981073379517, + "logits/rejected": -1.7834548950195312, + "logps/chosen": -538.4899291992188, + "logps/rejected": -591.5714721679688, + "loss": 0.5164, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0004758834838867, + "rewards/margins": 1.0672811269760132, + "rewards/rejected": -3.0677573680877686, + "step": 5570 + }, + { + "epoch": 0.37, + "learning_rate": 4.00411951233832e-06, + "logits/chosen": -2.2867188453674316, + "logits/rejected": -1.8971859216690063, + "logps/chosen": -578.2861938476562, + "logps/rejected": -632.0267944335938, + "loss": 0.6347, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9446357488632202, + "rewards/margins": 0.6547573208808899, + "rewards/rejected": -2.599392890930176, + "step": 5580 + }, + { + "epoch": 0.37, + "learning_rate": 3.999554736719785e-06, + "logits/chosen": -2.297450304031372, + "logits/rejected": -2.1157913208007812, + "logps/chosen": -591.5215454101562, + "logps/rejected": -636.9697875976562, + "loss": 0.5849, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.753023386001587, + "rewards/margins": 0.7581643462181091, + "rewards/rejected": -2.5111875534057617, + "step": 5590 + }, + { + "epoch": 0.37, + "learning_rate": 3.994982138689177e-06, + "logits/chosen": -1.5000120401382446, + "logits/rejected": -1.7612262964248657, + "logps/chosen": -399.55999755859375, + "logps/rejected": -479.05859375, + "loss": 0.607, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7114953994750977, + "rewards/margins": 0.7481001615524292, + "rewards/rejected": -2.4595954418182373, + "step": 5600 + }, + { + "epoch": 0.37, + "learning_rate": 3.990401742099408e-06, + "logits/chosen": -1.8648064136505127, + "logits/rejected": -2.075862407684326, + "logps/chosen": -418.94281005859375, + "logps/rejected": -506.51904296875, + "loss": 0.4838, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.2457597255706787, + "rewards/margins": 1.0919276475906372, + "rewards/rejected": -2.3376874923706055, + "step": 5610 + }, + { + "epoch": 0.37, + "learning_rate": 3.985813570844072e-06, + "logits/chosen": -2.1678080558776855, + "logits/rejected": -2.161743402481079, + "logps/chosen": -423.78521728515625, + "logps/rejected": -466.4239196777344, + "loss": 0.6135, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.386838674545288, + "rewards/margins": 0.6362934112548828, + "rewards/rejected": -2.023132085800171, + "step": 5620 + }, + { + "epoch": 0.37, + "learning_rate": 3.981217648857316e-06, + "logits/chosen": -2.2606282234191895, + "logits/rejected": -2.1236536502838135, + "logps/chosen": -523.9324951171875, + "logps/rejected": -568.092041015625, + "loss": 0.6394, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.5362694263458252, + "rewards/margins": 0.4496995508670807, + "rewards/rejected": -1.9859689474105835, + "step": 5630 + }, + { + "epoch": 0.37, + "learning_rate": 3.97661400011372e-06, + "logits/chosen": -1.7658350467681885, + "logits/rejected": -1.849532127380371, + "logps/chosen": -428.254638671875, + "logps/rejected": -622.2107543945312, + "loss": 0.5494, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.562973141670227, + "rewards/margins": 1.3392118215560913, + "rewards/rejected": -2.9021852016448975, + "step": 5640 + }, + { + "epoch": 0.37, + "learning_rate": 3.972002648628174e-06, + "logits/chosen": -2.3656766414642334, + "logits/rejected": -2.090914487838745, + "logps/chosen": -527.8897094726562, + "logps/rejected": -521.8011474609375, + "loss": 0.4767, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.9467498064041138, + "rewards/margins": 1.3752272129058838, + "rewards/rejected": -2.321976661682129, + "step": 5650 + }, + { + "epoch": 0.37, + "learning_rate": 3.967383618455743e-06, + "logits/chosen": -2.2997450828552246, + "logits/rejected": -2.1734695434570312, + "logps/chosen": -412.80889892578125, + "logps/rejected": -531.1956176757812, + "loss": 0.4566, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.4110808372497559, + "rewards/margins": 0.9126925468444824, + "rewards/rejected": -2.323773145675659, + "step": 5660 + }, + { + "epoch": 0.37, + "learning_rate": 3.9627569336915515e-06, + "logits/chosen": -2.169245719909668, + "logits/rejected": -2.1522982120513916, + "logps/chosen": -438.11492919921875, + "logps/rejected": -493.45867919921875, + "loss": 0.6598, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.1814568042755127, + "rewards/margins": 0.8335569500923157, + "rewards/rejected": -2.0150136947631836, + "step": 5670 + }, + { + "epoch": 0.37, + "learning_rate": 3.9581226184706555e-06, + "logits/chosen": -2.0356647968292236, + "logits/rejected": -1.761395812034607, + "logps/chosen": -548.0200805664062, + "logps/rejected": -517.4801635742188, + "loss": 0.5303, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3839877843856812, + "rewards/margins": 1.20305597782135, + "rewards/rejected": -2.5870437622070312, + "step": 5680 + }, + { + "epoch": 0.37, + "learning_rate": 3.953480696967912e-06, + "logits/chosen": -1.7281534671783447, + "logits/rejected": -1.9886353015899658, + "logps/chosen": -360.5095520019531, + "logps/rejected": -469.1702575683594, + "loss": 0.554, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.5830284357070923, + "rewards/margins": 0.7689424753189087, + "rewards/rejected": -2.35197114944458, + "step": 5690 + }, + { + "epoch": 0.37, + "learning_rate": 3.948831193397857e-06, + "logits/chosen": -2.055854320526123, + "logits/rejected": -1.6672054529190063, + "logps/chosen": -474.595458984375, + "logps/rejected": -489.5585021972656, + "loss": 0.638, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.260961890220642, + "rewards/margins": 0.9778332710266113, + "rewards/rejected": -2.238795042037964, + "step": 5700 + }, + { + "epoch": 0.37, + "learning_rate": 3.94417413201458e-06, + "logits/chosen": -2.0651679039001465, + "logits/rejected": -1.9424585103988647, + "logps/chosen": -449.7088317871094, + "logps/rejected": -470.5743713378906, + "loss": 0.5406, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.487428903579712, + "rewards/margins": 0.7987778782844543, + "rewards/rejected": -2.2862064838409424, + "step": 5710 + }, + { + "epoch": 0.37, + "learning_rate": 3.9395095371115935e-06, + "logits/chosen": -2.0351507663726807, + "logits/rejected": -1.9006555080413818, + "logps/chosen": -459.5558166503906, + "logps/rejected": -525.0375366210938, + "loss": 0.5175, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.2365690469741821, + "rewards/margins": 1.2108285427093506, + "rewards/rejected": -2.447397470474243, + "step": 5720 + }, + { + "epoch": 0.37, + "learning_rate": 3.93483743302171e-06, + "logits/chosen": -2.2190499305725098, + "logits/rejected": -1.791672945022583, + "logps/chosen": -516.0533447265625, + "logps/rejected": -502.7911071777344, + "loss": 0.5993, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.2167534828186035, + "rewards/margins": 0.7097581624984741, + "rewards/rejected": -2.926511526107788, + "step": 5730 + }, + { + "epoch": 0.38, + "learning_rate": 3.930157844116913e-06, + "logits/chosen": -2.240591526031494, + "logits/rejected": -2.143420457839966, + "logps/chosen": -458.56195068359375, + "logps/rejected": -549.3421630859375, + "loss": 0.5023, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.8407554626464844, + "rewards/margins": 0.6896196603775024, + "rewards/rejected": -2.5303750038146973, + "step": 5740 + }, + { + "epoch": 0.38, + "learning_rate": 3.925470794808229e-06, + "logits/chosen": -2.3181164264678955, + "logits/rejected": -1.8494704961776733, + "logps/chosen": -503.0951232910156, + "logps/rejected": -676.624267578125, + "loss": 0.6081, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.0876903533935547, + "rewards/margins": 0.9999464750289917, + "rewards/rejected": -3.087637186050415, + "step": 5750 + }, + { + "epoch": 0.38, + "learning_rate": 3.920776309545606e-06, + "logits/chosen": -1.8568137884140015, + "logits/rejected": -2.0069284439086914, + "logps/chosen": -518.7303466796875, + "logps/rejected": -570.5198974609375, + "loss": 0.5407, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.189582347869873, + "rewards/margins": 0.9730221033096313, + "rewards/rejected": -3.162604331970215, + "step": 5760 + }, + { + "epoch": 0.38, + "learning_rate": 3.916074412817778e-06, + "logits/chosen": -2.047844409942627, + "logits/rejected": -1.6396989822387695, + "logps/chosen": -427.2057189941406, + "logps/rejected": -556.9207763671875, + "loss": 0.6314, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.956089735031128, + "rewards/margins": 1.3886876106262207, + "rewards/rejected": -3.3447773456573486, + "step": 5770 + }, + { + "epoch": 0.38, + "learning_rate": 3.911365129152139e-06, + "logits/chosen": -1.8778527975082397, + "logits/rejected": -1.5888967514038086, + "logps/chosen": -491.4697265625, + "logps/rejected": -579.642822265625, + "loss": 0.6426, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2698824405670166, + "rewards/margins": 1.137494683265686, + "rewards/rejected": -3.407376766204834, + "step": 5780 + }, + { + "epoch": 0.38, + "learning_rate": 3.906648483114623e-06, + "logits/chosen": -1.5776411294937134, + "logits/rejected": -2.142578601837158, + "logps/chosen": -383.7577819824219, + "logps/rejected": -552.5573120117188, + "loss": 0.427, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.3256168365478516, + "rewards/margins": 1.7148630619049072, + "rewards/rejected": -4.04047966003418, + "step": 5790 + }, + { + "epoch": 0.38, + "learning_rate": 3.901924499309564e-06, + "logits/chosen": -2.079991340637207, + "logits/rejected": -1.6393836736679077, + "logps/chosen": -446.80401611328125, + "logps/rejected": -541.393798828125, + "loss": 0.5418, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.5220731496810913, + "rewards/margins": 1.0446290969848633, + "rewards/rejected": -2.5667026042938232, + "step": 5800 + }, + { + "epoch": 0.38, + "learning_rate": 3.897193202379575e-06, + "logits/chosen": -2.1350531578063965, + "logits/rejected": -1.84587824344635, + "logps/chosen": -537.3280029296875, + "logps/rejected": -529.4195556640625, + "loss": 0.5709, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.909463882446289, + "rewards/margins": 0.6809160113334656, + "rewards/rejected": -2.5903801918029785, + "step": 5810 + }, + { + "epoch": 0.38, + "learning_rate": 3.8924546170054215e-06, + "logits/chosen": -2.3045449256896973, + "logits/rejected": -2.2156808376312256, + "logps/chosen": -501.5043029785156, + "logps/rejected": -581.8260498046875, + "loss": 0.4218, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.793035864830017, + "rewards/margins": 1.2349274158477783, + "rewards/rejected": -3.0279629230499268, + "step": 5820 + }, + { + "epoch": 0.38, + "learning_rate": 3.887708767905883e-06, + "logits/chosen": -2.1339306831359863, + "logits/rejected": -1.840649962425232, + "logps/chosen": -524.33935546875, + "logps/rejected": -588.361328125, + "loss": 0.3532, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9939978122711182, + "rewards/margins": 1.0440418720245361, + "rewards/rejected": -3.0380396842956543, + "step": 5830 + }, + { + "epoch": 0.38, + "learning_rate": 3.882955679837636e-06, + "logits/chosen": -1.8100063800811768, + "logits/rejected": -2.144019842147827, + "logps/chosen": -473.1988830566406, + "logps/rejected": -469.0291442871094, + "loss": 0.7634, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.7491573095321655, + "rewards/margins": 0.5698834657669067, + "rewards/rejected": -2.3190410137176514, + "step": 5840 + }, + { + "epoch": 0.38, + "learning_rate": 3.878195377595113e-06, + "logits/chosen": -1.8882341384887695, + "logits/rejected": -1.9309072494506836, + "logps/chosen": -544.5458374023438, + "logps/rejected": -674.8884887695312, + "loss": 0.6168, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.0545291900634766, + "rewards/margins": 0.7788420915603638, + "rewards/rejected": -2.833371162414551, + "step": 5850 + }, + { + "epoch": 0.38, + "learning_rate": 3.873427886010384e-06, + "logits/chosen": -2.1351327896118164, + "logits/rejected": -2.0625691413879395, + "logps/chosen": -417.35888671875, + "logps/rejected": -596.0733032226562, + "loss": 0.3956, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.5938721895217896, + "rewards/margins": 1.5050804615020752, + "rewards/rejected": -3.0989527702331543, + "step": 5860 + }, + { + "epoch": 0.38, + "learning_rate": 3.868653229953021e-06, + "logits/chosen": -1.8998457193374634, + "logits/rejected": -2.005687713623047, + "logps/chosen": -397.5736389160156, + "logps/rejected": -438.1299743652344, + "loss": 0.5318, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.550104022026062, + "rewards/margins": 0.7522678971290588, + "rewards/rejected": -2.3023719787597656, + "step": 5870 + }, + { + "epoch": 0.38, + "learning_rate": 3.8638714343299675e-06, + "logits/chosen": -2.50346040725708, + "logits/rejected": -1.835614800453186, + "logps/chosen": -494.24493408203125, + "logps/rejected": -441.7505798339844, + "loss": 0.4546, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.589115858078003, + "rewards/margins": 1.0198700428009033, + "rewards/rejected": -2.608985424041748, + "step": 5880 + }, + { + "epoch": 0.39, + "learning_rate": 3.859082524085414e-06, + "logits/chosen": -2.236497402191162, + "logits/rejected": -2.1372292041778564, + "logps/chosen": -491.81549072265625, + "logps/rejected": -572.0244750976562, + "loss": 0.4813, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.4131276607513428, + "rewards/margins": 1.2808161973953247, + "rewards/rejected": -2.693943738937378, + "step": 5890 + }, + { + "epoch": 0.39, + "learning_rate": 3.854286524200659e-06, + "logits/chosen": -2.0243048667907715, + "logits/rejected": -1.983472466468811, + "logps/chosen": -519.2561645507812, + "logps/rejected": -610.3675537109375, + "loss": 0.3632, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.828203797340393, + "rewards/margins": 1.4534698724746704, + "rewards/rejected": -3.2816739082336426, + "step": 5900 + }, + { + "epoch": 0.39, + "learning_rate": 3.849483459693991e-06, + "logits/chosen": -2.2442984580993652, + "logits/rejected": -1.936551809310913, + "logps/chosen": -518.1563720703125, + "logps/rejected": -664.1353759765625, + "loss": 0.4925, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.9774038791656494, + "rewards/margins": 1.205444097518921, + "rewards/rejected": -3.182847499847412, + "step": 5910 + }, + { + "epoch": 0.39, + "learning_rate": 3.844673355620544e-06, + "logits/chosen": -1.9714431762695312, + "logits/rejected": -1.6506202220916748, + "logps/chosen": -553.7269287109375, + "logps/rejected": -513.1190185546875, + "loss": 0.593, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.1814029216766357, + "rewards/margins": 0.47208651900291443, + "rewards/rejected": -2.653489828109741, + "step": 5920 + }, + { + "epoch": 0.39, + "learning_rate": 3.839856237072178e-06, + "logits/chosen": -2.135085105895996, + "logits/rejected": -1.9646351337432861, + "logps/chosen": -429.2771911621094, + "logps/rejected": -498.9122619628906, + "loss": 0.5099, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.285665988922119, + "rewards/margins": 0.9124363660812378, + "rewards/rejected": -3.1981022357940674, + "step": 5930 + }, + { + "epoch": 0.39, + "learning_rate": 3.8350321291773455e-06, + "logits/chosen": -2.0477957725524902, + "logits/rejected": -1.9287916421890259, + "logps/chosen": -520.1895751953125, + "logps/rejected": -633.580322265625, + "loss": 0.6912, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5589051246643066, + "rewards/margins": 0.9964600801467896, + "rewards/rejected": -3.5553650856018066, + "step": 5940 + }, + { + "epoch": 0.39, + "learning_rate": 3.830201057100953e-06, + "logits/chosen": -1.998542070388794, + "logits/rejected": -1.565536618232727, + "logps/chosen": -591.3970947265625, + "logps/rejected": -650.9102783203125, + "loss": 0.5073, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.550248622894287, + "rewards/margins": 1.3746483325958252, + "rewards/rejected": -3.924896717071533, + "step": 5950 + }, + { + "epoch": 0.39, + "learning_rate": 3.82536304604424e-06, + "logits/chosen": -1.5837963819503784, + "logits/rejected": -1.9000377655029297, + "logps/chosen": -591.5118408203125, + "logps/rejected": -706.6221923828125, + "loss": 0.5133, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.512164831161499, + "rewards/margins": 0.8565373420715332, + "rewards/rejected": -3.368701934814453, + "step": 5960 + }, + { + "epoch": 0.39, + "learning_rate": 3.8205181212446435e-06, + "logits/chosen": -1.9696906805038452, + "logits/rejected": -1.6013498306274414, + "logps/chosen": -593.3405151367188, + "logps/rejected": -571.490234375, + "loss": 0.7762, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.121450901031494, + "rewards/margins": 0.010794973000884056, + "rewards/rejected": -3.1322457790374756, + "step": 5970 + }, + { + "epoch": 0.39, + "learning_rate": 3.815666307975664e-06, + "logits/chosen": -1.9981441497802734, + "logits/rejected": -1.674731969833374, + "logps/chosen": -493.15435791015625, + "logps/rejected": -575.2023315429688, + "loss": 0.4685, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.250596523284912, + "rewards/margins": 1.1089481115341187, + "rewards/rejected": -3.3595452308654785, + "step": 5980 + }, + { + "epoch": 0.39, + "learning_rate": 3.8108076315467346e-06, + "logits/chosen": -2.3160128593444824, + "logits/rejected": -1.9766929149627686, + "logps/chosen": -489.34112548828125, + "logps/rejected": -657.270263671875, + "loss": 0.4317, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.1392629146575928, + "rewards/margins": 1.506763219833374, + "rewards/rejected": -3.646026134490967, + "step": 5990 + }, + { + "epoch": 0.39, + "learning_rate": 3.805942117303093e-06, + "logits/chosen": -2.19549560546875, + "logits/rejected": -1.8671777248382568, + "logps/chosen": -482.0986328125, + "logps/rejected": -604.8792114257812, + "loss": 0.4501, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.711912751197815, + "rewards/margins": 1.493793249130249, + "rewards/rejected": -3.2057061195373535, + "step": 6000 + }, + { + "epoch": 0.39, + "eval_logits/chosen": -2.0209784507751465, + "eval_logits/rejected": -1.8483818769454956, + "eval_logps/chosen": -507.349853515625, + "eval_logps/rejected": -587.6927490234375, + "eval_loss": 0.5436508655548096, + "eval_rewards/accuracies": 0.7419999837875366, + "eval_rewards/chosen": -2.1190431118011475, + "eval_rewards/margins": 1.0038256645202637, + "eval_rewards/rejected": -3.1228690147399902, + "eval_runtime": 464.8352, + "eval_samples_per_second": 4.303, + "eval_steps_per_second": 2.151, + "step": 6000 + }, + { + "epoch": 0.39, + "learning_rate": 3.8010697906256446e-06, + "logits/chosen": -1.5868901014328003, + "logits/rejected": -2.0350403785705566, + "logps/chosen": -521.3289184570312, + "logps/rejected": -640.9805908203125, + "loss": 0.5745, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1810312271118164, + "rewards/margins": 0.8306337594985962, + "rewards/rejected": -3.011664628982544, + "step": 6010 + }, + { + "epoch": 0.39, + "learning_rate": 3.7961906769308323e-06, + "logits/chosen": -2.1721839904785156, + "logits/rejected": -2.1664059162139893, + "logps/chosen": -428.31365966796875, + "logps/rejected": -519.9769897460938, + "loss": 0.4193, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.6097137928009033, + "rewards/margins": 1.039236068725586, + "rewards/rejected": -2.6489500999450684, + "step": 6020 + }, + { + "epoch": 0.39, + "learning_rate": 3.7913048016705028e-06, + "logits/chosen": -1.8027420043945312, + "logits/rejected": -1.119469404220581, + "logps/chosen": -405.3560791015625, + "logps/rejected": -543.0703125, + "loss": 0.428, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.032978057861328, + "rewards/margins": 1.3919148445129395, + "rewards/rejected": -3.4248929023742676, + "step": 6030 + }, + { + "epoch": 0.4, + "learning_rate": 3.786412190331775e-06, + "logits/chosen": -2.008702039718628, + "logits/rejected": -1.9136741161346436, + "logps/chosen": -495.7113342285156, + "logps/rejected": -723.3904418945312, + "loss": 0.3888, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.875732183456421, + "rewards/margins": 1.702206015586853, + "rewards/rejected": -3.577937602996826, + "step": 6040 + }, + { + "epoch": 0.4, + "learning_rate": 3.781512868436906e-06, + "logits/chosen": -2.2078769207000732, + "logits/rejected": -2.072467565536499, + "logps/chosen": -521.7391357421875, + "logps/rejected": -561.3995971679688, + "loss": 0.6048, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.6842775344848633, + "rewards/margins": 1.0569112300872803, + "rewards/rejected": -2.7411885261535645, + "step": 6050 + }, + { + "epoch": 0.4, + "learning_rate": 3.7766068615431605e-06, + "logits/chosen": -2.3585803508758545, + "logits/rejected": -2.168217658996582, + "logps/chosen": -642.48681640625, + "logps/rejected": -691.4666137695312, + "loss": 0.5816, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.5773718357086182, + "rewards/margins": 1.2659053802490234, + "rewards/rejected": -2.8432772159576416, + "step": 6060 + }, + { + "epoch": 0.4, + "learning_rate": 3.771694195242671e-06, + "logits/chosen": -2.1833529472351074, + "logits/rejected": -1.8416792154312134, + "logps/chosen": -552.9678344726562, + "logps/rejected": -465.968994140625, + "loss": 0.5965, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2463607788085938, + "rewards/margins": 0.32134681940078735, + "rewards/rejected": -2.5677075386047363, + "step": 6070 + }, + { + "epoch": 0.4, + "learning_rate": 3.766774895162314e-06, + "logits/chosen": -1.4838708639144897, + "logits/rejected": -1.8937629461288452, + "logps/chosen": -481.39178466796875, + "logps/rejected": -649.5086059570312, + "loss": 0.5998, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.128632068634033, + "rewards/margins": 0.9498635530471802, + "rewards/rejected": -3.078495740890503, + "step": 6080 + }, + { + "epoch": 0.4, + "learning_rate": 3.7618489869635666e-06, + "logits/chosen": -2.0555758476257324, + "logits/rejected": -1.9236648082733154, + "logps/chosen": -573.3447265625, + "logps/rejected": -530.6716918945312, + "loss": 0.725, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4555697441101074, + "rewards/margins": 0.5236064791679382, + "rewards/rejected": -2.9791760444641113, + "step": 6090 + }, + { + "epoch": 0.4, + "learning_rate": 3.756916496342379e-06, + "logits/chosen": -1.6241865158081055, + "logits/rejected": -1.6583588123321533, + "logps/chosen": -433.23333740234375, + "logps/rejected": -461.29180908203125, + "loss": 0.6667, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.3971171379089355, + "rewards/margins": 0.457655668258667, + "rewards/rejected": -2.8547730445861816, + "step": 6100 + }, + { + "epoch": 0.4, + "learning_rate": 3.751977449029039e-06, + "logits/chosen": -1.9116780757904053, + "logits/rejected": -1.8591690063476562, + "logps/chosen": -459.74652099609375, + "logps/rejected": -484.01953125, + "loss": 0.5498, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.8744319677352905, + "rewards/margins": 0.821243166923523, + "rewards/rejected": -2.6956753730773926, + "step": 6110 + }, + { + "epoch": 0.4, + "learning_rate": 3.747031870788037e-06, + "logits/chosen": -2.1998138427734375, + "logits/rejected": -1.8871634006500244, + "logps/chosen": -454.06365966796875, + "logps/rejected": -557.6979370117188, + "loss": 0.472, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.895861029624939, + "rewards/margins": 1.0318374633789062, + "rewards/rejected": -2.9276986122131348, + "step": 6120 + }, + { + "epoch": 0.4, + "learning_rate": 3.7420797874179326e-06, + "logits/chosen": -1.7379772663116455, + "logits/rejected": -1.4775676727294922, + "logps/chosen": -511.3853454589844, + "logps/rejected": -584.1634521484375, + "loss": 0.4523, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1693828105926514, + "rewards/margins": 1.03212571144104, + "rewards/rejected": -3.2015087604522705, + "step": 6130 + }, + { + "epoch": 0.4, + "learning_rate": 3.7371212247512167e-06, + "logits/chosen": -1.8649704456329346, + "logits/rejected": -2.244213581085205, + "logps/chosen": -548.1846923828125, + "logps/rejected": -826.314453125, + "loss": 0.5413, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.369753122329712, + "rewards/margins": 1.118550181388855, + "rewards/rejected": -3.4883029460906982, + "step": 6140 + }, + { + "epoch": 0.4, + "learning_rate": 3.7321562086541817e-06, + "logits/chosen": -2.4249210357666016, + "logits/rejected": -1.8241479396820068, + "logps/chosen": -610.2730102539062, + "logps/rejected": -584.8367309570312, + "loss": 0.4082, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1096255779266357, + "rewards/margins": 0.8866428136825562, + "rewards/rejected": -2.9962685108184814, + "step": 6150 + }, + { + "epoch": 0.4, + "learning_rate": 3.7271847650267834e-06, + "logits/chosen": -1.5324722528457642, + "logits/rejected": -1.7584350109100342, + "logps/chosen": -479.8152770996094, + "logps/rejected": -582.040771484375, + "loss": 0.5012, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.697323799133301, + "rewards/margins": 1.2286405563354492, + "rewards/rejected": -3.92596435546875, + "step": 6160 + }, + { + "epoch": 0.4, + "learning_rate": 3.7222069198025086e-06, + "logits/chosen": -1.997467279434204, + "logits/rejected": -1.7623908519744873, + "logps/chosen": -577.2261352539062, + "logps/rejected": -661.6519775390625, + "loss": 0.5088, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.898735523223877, + "rewards/margins": 1.1691944599151611, + "rewards/rejected": -4.067929744720459, + "step": 6170 + }, + { + "epoch": 0.4, + "learning_rate": 3.7172226989482353e-06, + "logits/chosen": -2.120117664337158, + "logits/rejected": -1.8180913925170898, + "logps/chosen": -630.7166748046875, + "logps/rejected": -645.8465576171875, + "loss": 0.5931, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3682315349578857, + "rewards/margins": 1.0373667478561401, + "rewards/rejected": -3.4055984020233154, + "step": 6180 + }, + { + "epoch": 0.4, + "learning_rate": 3.7122321284641007e-06, + "logits/chosen": -1.8285562992095947, + "logits/rejected": -1.9086427688598633, + "logps/chosen": -559.5812377929688, + "logps/rejected": -611.1234130859375, + "loss": 0.5888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.270254135131836, + "rewards/margins": 0.9550930261611938, + "rewards/rejected": -3.2253470420837402, + "step": 6190 + }, + { + "epoch": 0.41, + "learning_rate": 3.707235234383365e-06, + "logits/chosen": -1.5866405963897705, + "logits/rejected": -1.1846307516098022, + "logps/chosen": -497.96295166015625, + "logps/rejected": -517.0799560546875, + "loss": 0.5348, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7328848838806152, + "rewards/margins": 0.9069123268127441, + "rewards/rejected": -3.639796733856201, + "step": 6200 + }, + { + "epoch": 0.41, + "learning_rate": 3.702232042772277e-06, + "logits/chosen": -1.9289581775665283, + "logits/rejected": -2.16865873336792, + "logps/chosen": -585.1903076171875, + "logps/rejected": -650.5701293945312, + "loss": 0.5983, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3935234546661377, + "rewards/margins": 1.1612800359725952, + "rewards/rejected": -3.5548033714294434, + "step": 6210 + }, + { + "epoch": 0.41, + "learning_rate": 3.6972225797299325e-06, + "logits/chosen": -1.6929620504379272, + "logits/rejected": -1.8669235706329346, + "logps/chosen": -590.72412109375, + "logps/rejected": -693.8157958984375, + "loss": 0.7619, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.2169342041015625, + "rewards/margins": 0.41517114639282227, + "rewards/rejected": -3.6321048736572266, + "step": 6220 + }, + { + "epoch": 0.41, + "learning_rate": 3.692206871388147e-06, + "logits/chosen": -2.4398531913757324, + "logits/rejected": -1.8036763668060303, + "logps/chosen": -618.9306640625, + "logps/rejected": -543.3020629882812, + "loss": 0.4643, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.38877010345459, + "rewards/margins": 1.128936529159546, + "rewards/rejected": -3.517707109451294, + "step": 6230 + }, + { + "epoch": 0.41, + "learning_rate": 3.6871849439113115e-06, + "logits/chosen": -1.8826773166656494, + "logits/rejected": -1.5871411561965942, + "logps/chosen": -531.6732788085938, + "logps/rejected": -634.6497192382812, + "loss": 0.3874, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.203376531600952, + "rewards/margins": 1.1963980197906494, + "rewards/rejected": -3.3997745513916016, + "step": 6240 + }, + { + "epoch": 0.41, + "learning_rate": 3.682156823496259e-06, + "logits/chosen": -1.8589789867401123, + "logits/rejected": -1.4900842905044556, + "logps/chosen": -540.5941772460938, + "logps/rejected": -580.9816284179688, + "loss": 0.5012, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5480470657348633, + "rewards/margins": 1.0418312549591064, + "rewards/rejected": -3.589878559112549, + "step": 6250 + }, + { + "epoch": 0.41, + "learning_rate": 3.67712253637213e-06, + "logits/chosen": -1.6938155889511108, + "logits/rejected": -2.005298137664795, + "logps/chosen": -497.44744873046875, + "logps/rejected": -566.0697021484375, + "loss": 0.6546, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0947582721710205, + "rewards/margins": 0.5265017747879028, + "rewards/rejected": -2.621260166168213, + "step": 6260 + }, + { + "epoch": 0.41, + "learning_rate": 3.672082108800231e-06, + "logits/chosen": -1.952540636062622, + "logits/rejected": -1.4625334739685059, + "logps/chosen": -527.0181884765625, + "logps/rejected": -715.9548950195312, + "loss": 0.4024, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.1873886585235596, + "rewards/margins": 1.4106473922729492, + "rewards/rejected": -3.598036289215088, + "step": 6270 + }, + { + "epoch": 0.41, + "learning_rate": 3.6670355670739012e-06, + "logits/chosen": -1.6990658044815063, + "logits/rejected": -1.9302780628204346, + "logps/chosen": -547.1773681640625, + "logps/rejected": -577.26708984375, + "loss": 0.5239, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.47194242477417, + "rewards/margins": 0.6156947612762451, + "rewards/rejected": -3.087637186050415, + "step": 6280 + }, + { + "epoch": 0.41, + "learning_rate": 3.6619829375183745e-06, + "logits/chosen": -2.0098588466644287, + "logits/rejected": -1.8811092376708984, + "logps/chosen": -555.290283203125, + "logps/rejected": -684.6082763671875, + "loss": 0.4423, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.1225078105926514, + "rewards/margins": 1.1310758590698242, + "rewards/rejected": -3.2535834312438965, + "step": 6290 + }, + { + "epoch": 0.41, + "learning_rate": 3.6569242464906427e-06, + "logits/chosen": -1.8348238468170166, + "logits/rejected": -1.5659939050674438, + "logps/chosen": -561.0123291015625, + "logps/rejected": -603.9790649414062, + "loss": 0.6064, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.416660785675049, + "rewards/margins": 0.7037839889526367, + "rewards/rejected": -3.1204445362091064, + "step": 6300 + }, + { + "epoch": 0.41, + "learning_rate": 3.6518595203793156e-06, + "logits/chosen": -2.1522655487060547, + "logits/rejected": -1.8314883708953857, + "logps/chosen": -597.142578125, + "logps/rejected": -642.1500854492188, + "loss": 0.4829, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6272170543670654, + "rewards/margins": 1.0413819551467896, + "rewards/rejected": -3.6685993671417236, + "step": 6310 + }, + { + "epoch": 0.41, + "learning_rate": 3.646788785604485e-06, + "logits/chosen": -2.0718045234680176, + "logits/rejected": -2.0843162536621094, + "logps/chosen": -567.9349365234375, + "logps/rejected": -625.3218994140625, + "loss": 0.3936, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.2843122482299805, + "rewards/margins": 1.1147112846374512, + "rewards/rejected": -3.3990235328674316, + "step": 6320 + }, + { + "epoch": 0.41, + "learning_rate": 3.641712068617588e-06, + "logits/chosen": -2.1838338375091553, + "logits/rejected": -1.826072335243225, + "logps/chosen": -531.1757202148438, + "logps/rejected": -648.35546875, + "loss": 0.5066, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4559993743896484, + "rewards/margins": 1.0470889806747437, + "rewards/rejected": -3.5030884742736816, + "step": 6330 + }, + { + "epoch": 0.41, + "learning_rate": 3.6366293959012673e-06, + "logits/chosen": -2.360943555831909, + "logits/rejected": -1.382158875465393, + "logps/chosen": -503.5301818847656, + "logps/rejected": -572.0340576171875, + "loss": 0.4061, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.6652551889419556, + "rewards/margins": 1.7123034000396729, + "rewards/rejected": -3.3775582313537598, + "step": 6340 + }, + { + "epoch": 0.42, + "learning_rate": 3.631540793969233e-06, + "logits/chosen": -1.8118938207626343, + "logits/rejected": -2.3862550258636475, + "logps/chosen": -548.354736328125, + "logps/rejected": -568.3483276367188, + "loss": 0.5976, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.2418887615203857, + "rewards/margins": 0.4210108816623688, + "rewards/rejected": -2.6628997325897217, + "step": 6350 + }, + { + "epoch": 0.42, + "learning_rate": 3.626446289366127e-06, + "logits/chosen": -2.170741558074951, + "logits/rejected": -1.74264395236969, + "logps/chosen": -483.4600524902344, + "logps/rejected": -540.6439208984375, + "loss": 0.7371, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9748328924179077, + "rewards/margins": 1.2782257795333862, + "rewards/rejected": -3.253058671951294, + "step": 6360 + }, + { + "epoch": 0.42, + "learning_rate": 3.6213459086673786e-06, + "logits/chosen": -2.1250672340393066, + "logits/rejected": -1.9678720235824585, + "logps/chosen": -490.01263427734375, + "logps/rejected": -500.30718994140625, + "loss": 0.5438, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7553844451904297, + "rewards/margins": 0.5862716436386108, + "rewards/rejected": -2.341655969619751, + "step": 6370 + }, + { + "epoch": 0.42, + "learning_rate": 3.6162396784790737e-06, + "logits/chosen": -2.0813148021698, + "logits/rejected": -1.9161512851715088, + "logps/chosen": -536.7302856445312, + "logps/rejected": -584.066650390625, + "loss": 0.4712, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.7031223773956299, + "rewards/margins": 1.6962379217147827, + "rewards/rejected": -3.399359941482544, + "step": 6380 + }, + { + "epoch": 0.42, + "learning_rate": 3.6111276254378095e-06, + "logits/chosen": -1.94598388671875, + "logits/rejected": -1.7917381525039673, + "logps/chosen": -453.65728759765625, + "logps/rejected": -560.082275390625, + "loss": 0.5562, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.231935977935791, + "rewards/margins": 0.9571143388748169, + "rewards/rejected": -3.1890501976013184, + "step": 6390 + }, + { + "epoch": 0.42, + "learning_rate": 3.606009776210559e-06, + "logits/chosen": -2.254138469696045, + "logits/rejected": -2.0140843391418457, + "logps/chosen": -528.5057373046875, + "logps/rejected": -668.043212890625, + "loss": 0.5675, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.0886292457580566, + "rewards/margins": 0.7729565501213074, + "rewards/rejected": -2.861585855484009, + "step": 6400 + }, + { + "epoch": 0.42, + "learning_rate": 3.600886157494531e-06, + "logits/chosen": -1.9126689434051514, + "logits/rejected": -1.7674872875213623, + "logps/chosen": -491.20098876953125, + "logps/rejected": -499.23876953125, + "loss": 0.7688, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.448800563812256, + "rewards/margins": 0.6232201457023621, + "rewards/rejected": -3.072021007537842, + "step": 6410 + }, + { + "epoch": 0.42, + "learning_rate": 3.5957567960170304e-06, + "logits/chosen": -1.9752228260040283, + "logits/rejected": -1.8452541828155518, + "logps/chosen": -527.0646362304688, + "logps/rejected": -687.8794555664062, + "loss": 0.6649, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -2.8853278160095215, + "rewards/margins": 0.25008624792099, + "rewards/rejected": -3.1354143619537354, + "step": 6420 + }, + { + "epoch": 0.42, + "learning_rate": 3.590621718535319e-06, + "logits/chosen": -2.364518404006958, + "logits/rejected": -1.6936737298965454, + "logps/chosen": -595.7276611328125, + "logps/rejected": -576.9322509765625, + "loss": 0.5857, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.4467835426330566, + "rewards/margins": 1.041294813156128, + "rewards/rejected": -3.4880783557891846, + "step": 6430 + }, + { + "epoch": 0.42, + "learning_rate": 3.5854809518364775e-06, + "logits/chosen": -1.9299328327178955, + "logits/rejected": -1.869680643081665, + "logps/chosen": -533.153564453125, + "logps/rejected": -628.454345703125, + "loss": 0.4269, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.0279541015625, + "rewards/margins": 1.7927402257919312, + "rewards/rejected": -3.8206939697265625, + "step": 6440 + }, + { + "epoch": 0.42, + "learning_rate": 3.580334522737262e-06, + "logits/chosen": -1.9989423751831055, + "logits/rejected": -2.1659204959869385, + "logps/chosen": -509.93365478515625, + "logps/rejected": -597.7767333984375, + "loss": 0.5775, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1766810417175293, + "rewards/margins": 0.9996244311332703, + "rewards/rejected": -3.1763057708740234, + "step": 6450 + }, + { + "epoch": 0.42, + "learning_rate": 3.575182458083968e-06, + "logits/chosen": -1.8486299514770508, + "logits/rejected": -1.3089923858642578, + "logps/chosen": -543.155517578125, + "logps/rejected": -649.8765258789062, + "loss": 0.5096, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.2399661540985107, + "rewards/margins": 1.5620733499526978, + "rewards/rejected": -3.802039384841919, + "step": 6460 + }, + { + "epoch": 0.42, + "learning_rate": 3.5700247847522883e-06, + "logits/chosen": -1.926372766494751, + "logits/rejected": -2.0225892066955566, + "logps/chosen": -548.9107666015625, + "logps/rejected": -559.2340087890625, + "loss": 0.5155, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.181037664413452, + "rewards/margins": 0.8243209719657898, + "rewards/rejected": -3.0053586959838867, + "step": 6470 + }, + { + "epoch": 0.42, + "learning_rate": 3.5648615296471743e-06, + "logits/chosen": -2.098559856414795, + "logits/rejected": -1.8494961261749268, + "logps/chosen": -483.23919677734375, + "logps/rejected": -678.0482177734375, + "loss": 0.5595, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0355286598205566, + "rewards/margins": 1.228212594985962, + "rewards/rejected": -3.2637412548065186, + "step": 6480 + }, + { + "epoch": 0.42, + "learning_rate": 3.559692719702693e-06, + "logits/chosen": -1.9919559955596924, + "logits/rejected": -2.085280656814575, + "logps/chosen": -498.7488708496094, + "logps/rejected": -582.7216796875, + "loss": 0.6164, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.046030044555664, + "rewards/margins": 0.35755324363708496, + "rewards/rejected": -2.40358304977417, + "step": 6490 + }, + { + "epoch": 0.43, + "learning_rate": 3.55451838188189e-06, + "logits/chosen": -2.0272135734558105, + "logits/rejected": -1.7676805257797241, + "logps/chosen": -533.1506958007812, + "logps/rejected": -594.387451171875, + "loss": 0.5087, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.218221426010132, + "rewards/margins": 0.922869861125946, + "rewards/rejected": -3.1410908699035645, + "step": 6500 + }, + { + "epoch": 0.43, + "learning_rate": 3.549338543176645e-06, + "logits/chosen": -1.6263704299926758, + "logits/rejected": -1.7644325494766235, + "logps/chosen": -408.6803283691406, + "logps/rejected": -664.818359375, + "loss": 0.567, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.146083354949951, + "rewards/margins": 1.5915114879608154, + "rewards/rejected": -3.737595319747925, + "step": 6510 + }, + { + "epoch": 0.43, + "learning_rate": 3.5441532306075342e-06, + "logits/chosen": -2.2625370025634766, + "logits/rejected": -2.0242671966552734, + "logps/chosen": -556.9017333984375, + "logps/rejected": -609.79931640625, + "loss": 0.5176, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.1746726036071777, + "rewards/margins": 1.2311522960662842, + "rewards/rejected": -3.405825138092041, + "step": 6520 + }, + { + "epoch": 0.43, + "learning_rate": 3.5389624712236894e-06, + "logits/chosen": -2.2226243019104004, + "logits/rejected": -2.257262706756592, + "logps/chosen": -509.9283142089844, + "logps/rejected": -627.7484130859375, + "loss": 0.5179, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0390431880950928, + "rewards/margins": 1.254080891609192, + "rewards/rejected": -3.293123960494995, + "step": 6530 + }, + { + "epoch": 0.43, + "learning_rate": 3.533766292102653e-06, + "logits/chosen": -1.7512744665145874, + "logits/rejected": -2.1317524909973145, + "logps/chosen": -437.34356689453125, + "logps/rejected": -606.0906982421875, + "loss": 0.5225, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.106980562210083, + "rewards/margins": 0.8636156320571899, + "rewards/rejected": -2.9705960750579834, + "step": 6540 + }, + { + "epoch": 0.43, + "learning_rate": 3.5285647203502404e-06, + "logits/chosen": -2.1404430866241455, + "logits/rejected": -1.6980934143066406, + "logps/chosen": -517.9746704101562, + "logps/rejected": -558.1184692382812, + "loss": 0.4704, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.068937063217163, + "rewards/margins": 1.0138415098190308, + "rewards/rejected": -3.0827784538269043, + "step": 6550 + }, + { + "epoch": 0.43, + "learning_rate": 3.5233577831003983e-06, + "logits/chosen": -1.9494342803955078, + "logits/rejected": -1.800309419631958, + "logps/chosen": -551.1097412109375, + "logps/rejected": -552.0692138671875, + "loss": 0.5604, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.122185707092285, + "rewards/margins": 0.7436209917068481, + "rewards/rejected": -2.865807056427002, + "step": 6560 + }, + { + "epoch": 0.43, + "learning_rate": 3.5181455075150628e-06, + "logits/chosen": -2.3633298873901367, + "logits/rejected": -1.604595422744751, + "logps/chosen": -500.5062561035156, + "logps/rejected": -592.0992431640625, + "loss": 0.5148, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.738813042640686, + "rewards/margins": 1.5005505084991455, + "rewards/rejected": -3.239363431930542, + "step": 6570 + }, + { + "epoch": 0.43, + "learning_rate": 3.512927920784016e-06, + "logits/chosen": -1.9952714443206787, + "logits/rejected": -1.5963308811187744, + "logps/chosen": -401.2374572753906, + "logps/rejected": -499.2310485839844, + "loss": 0.4971, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.0079140663146973, + "rewards/margins": 1.016329050064087, + "rewards/rejected": -3.0242433547973633, + "step": 6580 + }, + { + "epoch": 0.43, + "learning_rate": 3.5077050501247457e-06, + "logits/chosen": -2.3940505981445312, + "logits/rejected": -2.106627941131592, + "logps/chosen": -472.62774658203125, + "logps/rejected": -550.3732299804688, + "loss": 0.4836, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -1.9436047077178955, + "rewards/margins": 0.7553819417953491, + "rewards/rejected": -2.698986530303955, + "step": 6590 + }, + { + "epoch": 0.43, + "learning_rate": 3.5024769227823042e-06, + "logits/chosen": -1.9121567010879517, + "logits/rejected": -1.7712024450302124, + "logps/chosen": -485.36895751953125, + "logps/rejected": -611.3514404296875, + "loss": 0.5061, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3973145484924316, + "rewards/margins": 1.1803823709487915, + "rewards/rejected": -3.5776965618133545, + "step": 6600 + }, + { + "epoch": 0.43, + "learning_rate": 3.4972435660291646e-06, + "logits/chosen": -1.877173662185669, + "logits/rejected": -1.741201639175415, + "logps/chosen": -593.0254516601562, + "logps/rejected": -672.1753540039062, + "loss": 0.592, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.6820404529571533, + "rewards/margins": 1.1979200839996338, + "rewards/rejected": -2.879960536956787, + "step": 6610 + }, + { + "epoch": 0.43, + "learning_rate": 3.492005007165079e-06, + "logits/chosen": -1.6904380321502686, + "logits/rejected": -2.0509963035583496, + "logps/chosen": -576.3147583007812, + "logps/rejected": -585.3676147460938, + "loss": 0.666, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.614394426345825, + "rewards/margins": 0.4233713746070862, + "rewards/rejected": -3.0377657413482666, + "step": 6620 + }, + { + "epoch": 0.43, + "learning_rate": 3.4867612735169377e-06, + "logits/chosen": -1.859418511390686, + "logits/rejected": -1.797196626663208, + "logps/chosen": -466.6185607910156, + "logps/rejected": -501.27105712890625, + "loss": 0.5138, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.128791570663452, + "rewards/margins": 0.7405956983566284, + "rewards/rejected": -2.869387149810791, + "step": 6630 + }, + { + "epoch": 0.43, + "learning_rate": 3.4815123924386226e-06, + "logits/chosen": -2.1479945182800293, + "logits/rejected": -1.7917789220809937, + "logps/chosen": -501.1796875, + "logps/rejected": -537.141845703125, + "loss": 0.6099, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.8953443765640259, + "rewards/margins": 1.2032743692398071, + "rewards/rejected": -3.098618745803833, + "step": 6640 + }, + { + "epoch": 0.44, + "learning_rate": 3.4762583913108696e-06, + "logits/chosen": -1.9697473049163818, + "logits/rejected": -1.8926702737808228, + "logps/chosen": -629.2015380859375, + "logps/rejected": -737.5687255859375, + "loss": 0.512, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.999524712562561, + "rewards/margins": 0.8353649377822876, + "rewards/rejected": -2.8348896503448486, + "step": 6650 + }, + { + "epoch": 0.44, + "learning_rate": 3.4709992975411217e-06, + "logits/chosen": -2.1073319911956787, + "logits/rejected": -1.863032341003418, + "logps/chosen": -356.60394287109375, + "logps/rejected": -415.05078125, + "loss": 0.6121, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9784562587738037, + "rewards/margins": 0.3868860602378845, + "rewards/rejected": -2.365342617034912, + "step": 6660 + }, + { + "epoch": 0.44, + "learning_rate": 3.4657351385633886e-06, + "logits/chosen": -1.802716612815857, + "logits/rejected": -2.103769063949585, + "logps/chosen": -497.4820251464844, + "logps/rejected": -602.2470703125, + "loss": 0.4817, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9606220722198486, + "rewards/margins": 0.9812425374984741, + "rewards/rejected": -2.941864490509033, + "step": 6670 + }, + { + "epoch": 0.44, + "learning_rate": 3.4604659418381024e-06, + "logits/chosen": -1.8399873971939087, + "logits/rejected": -1.899836540222168, + "logps/chosen": -421.61163330078125, + "logps/rejected": -521.968994140625, + "loss": 0.6611, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.403350830078125, + "rewards/margins": 0.4829896092414856, + "rewards/rejected": -2.886340618133545, + "step": 6680 + }, + { + "epoch": 0.44, + "learning_rate": 3.4551917348519744e-06, + "logits/chosen": -2.3457393646240234, + "logits/rejected": -2.0672945976257324, + "logps/chosen": -618.3902587890625, + "logps/rejected": -615.4583740234375, + "loss": 0.6227, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.1271519660949707, + "rewards/margins": 1.0083070993423462, + "rewards/rejected": -3.1354591846466064, + "step": 6690 + }, + { + "epoch": 0.44, + "learning_rate": 3.4499125451178505e-06, + "logits/chosen": -2.169529438018799, + "logits/rejected": -2.244375467300415, + "logps/chosen": -566.7544555664062, + "logps/rejected": -646.224365234375, + "loss": 0.604, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.3752753734588623, + "rewards/margins": 0.8324453234672546, + "rewards/rejected": -3.207720994949341, + "step": 6700 + }, + { + "epoch": 0.44, + "learning_rate": 3.4446284001745723e-06, + "logits/chosen": -2.130103349685669, + "logits/rejected": -1.7955682277679443, + "logps/chosen": -468.88348388671875, + "logps/rejected": -552.3078002929688, + "loss": 0.4814, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.294940233230591, + "rewards/margins": 1.5448663234710693, + "rewards/rejected": -3.8398067951202393, + "step": 6710 + }, + { + "epoch": 0.44, + "learning_rate": 3.439339327586827e-06, + "logits/chosen": -2.0202527046203613, + "logits/rejected": -1.760382056236267, + "logps/chosen": -525.0974731445312, + "logps/rejected": -689.3110961914062, + "loss": 0.5979, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.733820915222168, + "rewards/margins": 0.8580193519592285, + "rewards/rejected": -3.591839551925659, + "step": 6720 + }, + { + "epoch": 0.44, + "learning_rate": 3.434045354945008e-06, + "logits/chosen": -2.259902238845825, + "logits/rejected": -2.089804172515869, + "logps/chosen": -519.9620361328125, + "logps/rejected": -715.9612426757812, + "loss": 0.48, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.358236312866211, + "rewards/margins": 1.4368010759353638, + "rewards/rejected": -3.7950377464294434, + "step": 6730 + }, + { + "epoch": 0.44, + "learning_rate": 3.4287465098650713e-06, + "logits/chosen": -1.8449312448501587, + "logits/rejected": -1.768151044845581, + "logps/chosen": -508.92120361328125, + "logps/rejected": -558.8959350585938, + "loss": 0.495, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.8265440464019775, + "rewards/margins": 1.2664082050323486, + "rewards/rejected": -3.092952251434326, + "step": 6740 + }, + { + "epoch": 0.44, + "learning_rate": 3.423442819988387e-06, + "logits/chosen": -2.011176347732544, + "logits/rejected": -1.8518304824829102, + "logps/chosen": -456.6439514160156, + "logps/rejected": -559.4019165039062, + "loss": 0.6557, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.4466426372528076, + "rewards/margins": 0.8426218032836914, + "rewards/rejected": -3.28926420211792, + "step": 6750 + }, + { + "epoch": 0.44, + "learning_rate": 3.4181343129816e-06, + "logits/chosen": -2.3097541332244873, + "logits/rejected": -2.193639039993286, + "logps/chosen": -653.538330078125, + "logps/rejected": -680.1073608398438, + "loss": 0.5421, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3540773391723633, + "rewards/margins": 0.6987882852554321, + "rewards/rejected": -3.052865505218506, + "step": 6760 + }, + { + "epoch": 0.44, + "learning_rate": 3.4128210165364837e-06, + "logits/chosen": -2.04072904586792, + "logits/rejected": -1.845476746559143, + "logps/chosen": -520.7491455078125, + "logps/rejected": -505.431640625, + "loss": 0.5424, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9616371393203735, + "rewards/margins": 0.9794682264328003, + "rewards/rejected": -2.941105365753174, + "step": 6770 + }, + { + "epoch": 0.44, + "learning_rate": 3.407502958369795e-06, + "logits/chosen": -1.7737308740615845, + "logits/rejected": -1.9110854864120483, + "logps/chosen": -474.15850830078125, + "logps/rejected": -509.0408630371094, + "loss": 0.557, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.0218558311462402, + "rewards/margins": 0.6208639144897461, + "rewards/rejected": -2.6427202224731445, + "step": 6780 + }, + { + "epoch": 0.44, + "learning_rate": 3.4021801662231297e-06, + "logits/chosen": -2.1233630180358887, + "logits/rejected": -2.216717481613159, + "logps/chosen": -588.3614501953125, + "logps/rejected": -673.5755004882812, + "loss": 0.4333, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.7243669033050537, + "rewards/margins": 1.1570664644241333, + "rewards/rejected": -2.8814332485198975, + "step": 6790 + }, + { + "epoch": 0.44, + "learning_rate": 3.3968526678627793e-06, + "logits/chosen": -2.006848096847534, + "logits/rejected": -1.90310800075531, + "logps/chosen": -535.9368286132812, + "logps/rejected": -562.5875244140625, + "loss": 0.7555, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.227323532104492, + "rewards/margins": 0.5417407751083374, + "rewards/rejected": -2.769064426422119, + "step": 6800 + }, + { + "epoch": 0.45, + "learning_rate": 3.391520491079586e-06, + "logits/chosen": -2.16282320022583, + "logits/rejected": -2.040290594100952, + "logps/chosen": -467.48394775390625, + "logps/rejected": -588.5347900390625, + "loss": 0.4816, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9816973209381104, + "rewards/margins": 1.336430311203003, + "rewards/rejected": -3.318127393722534, + "step": 6810 + }, + { + "epoch": 0.45, + "learning_rate": 3.3861836636887936e-06, + "logits/chosen": -1.7972116470336914, + "logits/rejected": -1.7362552881240845, + "logps/chosen": -619.1534423828125, + "logps/rejected": -605.8143310546875, + "loss": 0.5679, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6134886741638184, + "rewards/margins": 0.9449461102485657, + "rewards/rejected": -3.5584347248077393, + "step": 6820 + }, + { + "epoch": 0.45, + "learning_rate": 3.3808422135299106e-06, + "logits/chosen": -2.1757609844207764, + "logits/rejected": -1.9568872451782227, + "logps/chosen": -496.1884765625, + "logps/rejected": -594.45654296875, + "loss": 0.6461, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3930745124816895, + "rewards/margins": 0.6214153170585632, + "rewards/rejected": -3.0144896507263184, + "step": 6830 + }, + { + "epoch": 0.45, + "learning_rate": 3.375496168466556e-06, + "logits/chosen": -1.769409418106079, + "logits/rejected": -1.8746048212051392, + "logps/chosen": -503.7557678222656, + "logps/rejected": -551.3370361328125, + "loss": 0.6333, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.382068157196045, + "rewards/margins": 0.6661649346351624, + "rewards/rejected": -3.0482332706451416, + "step": 6840 + }, + { + "epoch": 0.45, + "learning_rate": 3.3701455563863205e-06, + "logits/chosen": -2.1566174030303955, + "logits/rejected": -2.090261936187744, + "logps/chosen": -505.6607971191406, + "logps/rejected": -505.560546875, + "loss": 0.6342, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3432400226593018, + "rewards/margins": 0.5142241716384888, + "rewards/rejected": -2.857464075088501, + "step": 6850 + }, + { + "epoch": 0.45, + "learning_rate": 3.3647904052006174e-06, + "logits/chosen": -2.1792402267456055, + "logits/rejected": -1.721308708190918, + "logps/chosen": -511.5228576660156, + "logps/rejected": -615.1655883789062, + "loss": 0.3936, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.4667131900787354, + "rewards/margins": 1.5716593265533447, + "rewards/rejected": -3.03837251663208, + "step": 6860 + }, + { + "epoch": 0.45, + "learning_rate": 3.3594307428445383e-06, + "logits/chosen": -2.051579236984253, + "logits/rejected": -2.012957811355591, + "logps/chosen": -560.08984375, + "logps/rejected": -628.5303344726562, + "loss": 0.5154, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2268595695495605, + "rewards/margins": 0.9397394061088562, + "rewards/rejected": -3.1665987968444824, + "step": 6870 + }, + { + "epoch": 0.45, + "learning_rate": 3.354066597276707e-06, + "logits/chosen": -1.7750266790390015, + "logits/rejected": -1.9297701120376587, + "logps/chosen": -437.557861328125, + "logps/rejected": -588.8997192382812, + "loss": 0.588, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2441320419311523, + "rewards/margins": 1.2166695594787598, + "rewards/rejected": -3.460801601409912, + "step": 6880 + }, + { + "epoch": 0.45, + "learning_rate": 3.348697996479136e-06, + "logits/chosen": -2.2891221046447754, + "logits/rejected": -2.0664844512939453, + "logps/chosen": -551.2601318359375, + "logps/rejected": -479.00439453125, + "loss": 0.465, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.6627784967422485, + "rewards/margins": 0.9804137945175171, + "rewards/rejected": -2.643192768096924, + "step": 6890 + }, + { + "epoch": 0.45, + "learning_rate": 3.3433249684570757e-06, + "logits/chosen": -1.4506876468658447, + "logits/rejected": -1.8762842416763306, + "logps/chosen": -418.89984130859375, + "logps/rejected": -550.0848388671875, + "loss": 0.5604, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.209401845932007, + "rewards/margins": 0.9306854009628296, + "rewards/rejected": -3.140087366104126, + "step": 6900 + }, + { + "epoch": 0.45, + "learning_rate": 3.3379475412388724e-06, + "logits/chosen": -2.157930850982666, + "logits/rejected": -1.8227431774139404, + "logps/chosen": -508.7159118652344, + "logps/rejected": -634.552978515625, + "loss": 0.529, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.124192953109741, + "rewards/margins": 1.8052854537963867, + "rewards/rejected": -3.929478168487549, + "step": 6910 + }, + { + "epoch": 0.45, + "learning_rate": 3.3325657428758207e-06, + "logits/chosen": -1.7919973134994507, + "logits/rejected": -1.8608999252319336, + "logps/chosen": -436.8871154785156, + "logps/rejected": -589.9657592773438, + "loss": 0.4744, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.220111131668091, + "rewards/margins": 1.0796172618865967, + "rewards/rejected": -3.2997279167175293, + "step": 6920 + }, + { + "epoch": 0.45, + "learning_rate": 3.3271796014420175e-06, + "logits/chosen": -2.2344298362731934, + "logits/rejected": -1.459727168083191, + "logps/chosen": -545.1464233398438, + "logps/rejected": -551.0026245117188, + "loss": 0.6576, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.1628594398498535, + "rewards/margins": 0.9946087598800659, + "rewards/rejected": -3.15746808052063, + "step": 6930 + }, + { + "epoch": 0.45, + "learning_rate": 3.3217891450342142e-06, + "logits/chosen": -2.228728771209717, + "logits/rejected": -1.9757254123687744, + "logps/chosen": -463.68450927734375, + "logps/rejected": -558.191650390625, + "loss": 0.5128, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.7646548748016357, + "rewards/margins": 1.299391508102417, + "rewards/rejected": -3.0640463829040527, + "step": 6940 + }, + { + "epoch": 0.45, + "learning_rate": 3.3163944017716733e-06, + "logits/chosen": -2.4919800758361816, + "logits/rejected": -2.2972683906555176, + "logps/chosen": -539.271240234375, + "logps/rejected": -655.6963500976562, + "loss": 0.4676, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.7573680877685547, + "rewards/margins": 1.1947455406188965, + "rewards/rejected": -2.952113389968872, + "step": 6950 + }, + { + "epoch": 0.46, + "learning_rate": 3.310995399796017e-06, + "logits/chosen": -2.123279094696045, + "logits/rejected": -1.9764589071273804, + "logps/chosen": -563.791259765625, + "logps/rejected": -586.9517822265625, + "loss": 0.6602, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5124528408050537, + "rewards/margins": 0.9271472692489624, + "rewards/rejected": -3.4396004676818848, + "step": 6960 + }, + { + "epoch": 0.46, + "learning_rate": 3.305592167271085e-06, + "logits/chosen": -2.0485548973083496, + "logits/rejected": -2.015782356262207, + "logps/chosen": -599.0858154296875, + "logps/rejected": -659.1200561523438, + "loss": 0.4515, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.135643243789673, + "rewards/margins": 1.304933786392212, + "rewards/rejected": -3.4405770301818848, + "step": 6970 + }, + { + "epoch": 0.46, + "learning_rate": 3.3001847323827846e-06, + "logits/chosen": -2.3702428340911865, + "logits/rejected": -2.0240466594696045, + "logps/chosen": -573.703857421875, + "logps/rejected": -617.8404541015625, + "loss": 0.505, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.744354724884033, + "rewards/margins": 1.0058718919754028, + "rewards/rejected": -3.7502264976501465, + "step": 6980 + }, + { + "epoch": 0.46, + "learning_rate": 3.2947731233389447e-06, + "logits/chosen": -2.1859326362609863, + "logits/rejected": -1.7747504711151123, + "logps/chosen": -592.033203125, + "logps/rejected": -566.8076171875, + "loss": 0.51, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7474365234375, + "rewards/margins": 0.7229793667793274, + "rewards/rejected": -3.470416307449341, + "step": 6990 + }, + { + "epoch": 0.46, + "learning_rate": 3.2893573683691706e-06, + "logits/chosen": -1.8576014041900635, + "logits/rejected": -1.935616135597229, + "logps/chosen": -571.7442626953125, + "logps/rejected": -666.6461181640625, + "loss": 0.4546, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9634231328964233, + "rewards/margins": 1.1846826076507568, + "rewards/rejected": -3.1481058597564697, + "step": 7000 + }, + { + "epoch": 0.46, + "learning_rate": 3.2839374957246915e-06, + "logits/chosen": -1.8236987590789795, + "logits/rejected": -2.0967044830322266, + "logps/chosen": -503.366455078125, + "logps/rejected": -510.83563232421875, + "loss": 0.5869, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.859544277191162, + "rewards/margins": 0.6328840255737305, + "rewards/rejected": -3.4924283027648926, + "step": 7010 + }, + { + "epoch": 0.46, + "learning_rate": 3.2785135336782187e-06, + "logits/chosen": -1.9590002298355103, + "logits/rejected": -1.7429759502410889, + "logps/chosen": -504.9451599121094, + "logps/rejected": -582.9620361328125, + "loss": 0.6693, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8443717956542969, + "rewards/margins": 1.3743021488189697, + "rewards/rejected": -3.2186741828918457, + "step": 7020 + }, + { + "epoch": 0.46, + "learning_rate": 3.2730855105237952e-06, + "logits/chosen": -1.7093353271484375, + "logits/rejected": -1.8194334506988525, + "logps/chosen": -498.09857177734375, + "logps/rejected": -631.9508666992188, + "loss": 0.4945, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2490179538726807, + "rewards/margins": 0.7945035696029663, + "rewards/rejected": -3.0435216426849365, + "step": 7030 + }, + { + "epoch": 0.46, + "learning_rate": 3.2676534545766486e-06, + "logits/chosen": -1.8025137186050415, + "logits/rejected": -1.8641912937164307, + "logps/chosen": -565.27490234375, + "logps/rejected": -656.0820922851562, + "loss": 0.3891, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3689165115356445, + "rewards/margins": 1.647971510887146, + "rewards/rejected": -4.016888618469238, + "step": 7040 + }, + { + "epoch": 0.46, + "learning_rate": 3.262217394173043e-06, + "logits/chosen": -2.179482936859131, + "logits/rejected": -1.9934285879135132, + "logps/chosen": -565.6411743164062, + "logps/rejected": -619.4987182617188, + "loss": 0.4887, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.432353973388672, + "rewards/margins": 0.817031741142273, + "rewards/rejected": -3.2493858337402344, + "step": 7050 + }, + { + "epoch": 0.46, + "learning_rate": 3.2567773576701333e-06, + "logits/chosen": -1.961511254310608, + "logits/rejected": -2.085686206817627, + "logps/chosen": -555.8421630859375, + "logps/rejected": -600.7129516601562, + "loss": 0.6391, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5808510780334473, + "rewards/margins": 0.753670871257782, + "rewards/rejected": -3.334522247314453, + "step": 7060 + }, + { + "epoch": 0.46, + "learning_rate": 3.2513333734458154e-06, + "logits/chosen": -1.9186073541641235, + "logits/rejected": -1.8230812549591064, + "logps/chosen": -550.2675170898438, + "logps/rejected": -745.5703125, + "loss": 0.5088, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3937816619873047, + "rewards/margins": 1.4927335977554321, + "rewards/rejected": -3.8865153789520264, + "step": 7070 + }, + { + "epoch": 0.46, + "learning_rate": 3.245885469898576e-06, + "logits/chosen": -2.157291889190674, + "logits/rejected": -1.8229951858520508, + "logps/chosen": -584.3994140625, + "logps/rejected": -601.2501831054688, + "loss": 0.6809, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.986717700958252, + "rewards/margins": 0.6209832429885864, + "rewards/rejected": -3.607701063156128, + "step": 7080 + }, + { + "epoch": 0.46, + "learning_rate": 3.2404336754473497e-06, + "logits/chosen": -1.654690146446228, + "logits/rejected": -1.6594860553741455, + "logps/chosen": -572.0872802734375, + "logps/rejected": -654.669189453125, + "loss": 0.5341, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.753058433532715, + "rewards/margins": 0.994116485118866, + "rewards/rejected": -3.7471747398376465, + "step": 7090 + }, + { + "epoch": 0.46, + "learning_rate": 3.234978018531367e-06, + "logits/chosen": -1.7258952856063843, + "logits/rejected": -1.9482316970825195, + "logps/chosen": -637.0924682617188, + "logps/rejected": -730.4885864257812, + "loss": 0.5761, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.0775818824768066, + "rewards/margins": 0.5728412866592407, + "rewards/rejected": -3.6504225730895996, + "step": 7100 + }, + { + "epoch": 0.47, + "learning_rate": 3.229518527610006e-06, + "logits/chosen": -1.4942814111709595, + "logits/rejected": -1.8945693969726562, + "logps/chosen": -445.71954345703125, + "logps/rejected": -536.7567138671875, + "loss": 0.6735, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.618427038192749, + "rewards/margins": 0.8297905921936035, + "rewards/rejected": -3.4482178688049316, + "step": 7110 + }, + { + "epoch": 0.47, + "learning_rate": 3.2240552311626465e-06, + "logits/chosen": -1.9111835956573486, + "logits/rejected": -1.954232931137085, + "logps/chosen": -617.4752197265625, + "logps/rejected": -665.8094482421875, + "loss": 0.6307, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.846435070037842, + "rewards/margins": 0.4398753046989441, + "rewards/rejected": -3.2863106727600098, + "step": 7120 + }, + { + "epoch": 0.47, + "learning_rate": 3.2185881576885193e-06, + "logits/chosen": -2.338397979736328, + "logits/rejected": -1.7893339395523071, + "logps/chosen": -525.5226440429688, + "logps/rejected": -591.5637817382812, + "loss": 0.4398, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.201669454574585, + "rewards/margins": 1.5123226642608643, + "rewards/rejected": -3.71399188041687, + "step": 7130 + }, + { + "epoch": 0.47, + "learning_rate": 3.213117335706557e-06, + "logits/chosen": -1.9989426136016846, + "logits/rejected": -1.545607089996338, + "logps/chosen": -485.354736328125, + "logps/rejected": -522.4522094726562, + "loss": 0.4836, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.328815221786499, + "rewards/margins": 0.7823472023010254, + "rewards/rejected": -3.1111626625061035, + "step": 7140 + }, + { + "epoch": 0.47, + "learning_rate": 3.2076427937552473e-06, + "logits/chosen": -1.7991571426391602, + "logits/rejected": -2.210568904876709, + "logps/chosen": -554.9445190429688, + "logps/rejected": -665.0664672851562, + "loss": 0.6054, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2572712898254395, + "rewards/margins": 1.0221856832504272, + "rewards/rejected": -3.2794570922851562, + "step": 7150 + }, + { + "epoch": 0.47, + "learning_rate": 3.2021645603924827e-06, + "logits/chosen": -2.112433910369873, + "logits/rejected": -2.141270160675049, + "logps/chosen": -458.9959411621094, + "logps/rejected": -521.6207275390625, + "loss": 0.5506, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.4281132221221924, + "rewards/margins": 0.4478052258491516, + "rewards/rejected": -2.875918388366699, + "step": 7160 + }, + { + "epoch": 0.47, + "learning_rate": 3.196682664195412e-06, + "logits/chosen": -2.0608747005462646, + "logits/rejected": -2.0447030067443848, + "logps/chosen": -528.4253540039062, + "logps/rejected": -657.8396606445312, + "loss": 0.4791, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.070521116256714, + "rewards/margins": 1.1912587881088257, + "rewards/rejected": -3.261780261993408, + "step": 7170 + }, + { + "epoch": 0.47, + "learning_rate": 3.191197133760291e-06, + "logits/chosen": -1.872523546218872, + "logits/rejected": -1.9694303274154663, + "logps/chosen": -565.5282592773438, + "logps/rejected": -575.5656127929688, + "loss": 0.5671, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.6287894248962402, + "rewards/margins": 0.6487969160079956, + "rewards/rejected": -3.2775864601135254, + "step": 7180 + }, + { + "epoch": 0.47, + "learning_rate": 3.185707997702334e-06, + "logits/chosen": -2.3510260581970215, + "logits/rejected": -1.7899143695831299, + "logps/chosen": -515.1236572265625, + "logps/rejected": -526.2489624023438, + "loss": 0.6378, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.5627970695495605, + "rewards/margins": 0.5820015072822571, + "rewards/rejected": -3.144798755645752, + "step": 7190 + }, + { + "epoch": 0.47, + "learning_rate": 3.1802152846555624e-06, + "logits/chosen": -2.259308338165283, + "logits/rejected": -1.6995160579681396, + "logps/chosen": -555.3382568359375, + "logps/rejected": -688.6828002929688, + "loss": 0.4379, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4361205101013184, + "rewards/margins": 1.8397639989852905, + "rewards/rejected": -4.27588415145874, + "step": 7200 + }, + { + "epoch": 0.47, + "learning_rate": 3.174719023272659e-06, + "logits/chosen": -2.1534180641174316, + "logits/rejected": -2.1583364009857178, + "logps/chosen": -621.2399291992188, + "logps/rejected": -691.5772705078125, + "loss": 0.5478, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7099742889404297, + "rewards/margins": 0.8596396446228027, + "rewards/rejected": -3.5696136951446533, + "step": 7210 + }, + { + "epoch": 0.47, + "learning_rate": 3.169219242224816e-06, + "logits/chosen": -2.2311336994171143, + "logits/rejected": -1.8312718868255615, + "logps/chosen": -637.9315185546875, + "logps/rejected": -664.3123779296875, + "loss": 0.5815, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2736587524414062, + "rewards/margins": 1.4779915809631348, + "rewards/rejected": -3.75165057182312, + "step": 7220 + }, + { + "epoch": 0.47, + "learning_rate": 3.1637159702015837e-06, + "logits/chosen": -2.2375268936157227, + "logits/rejected": -2.0711469650268555, + "logps/chosen": -615.912109375, + "logps/rejected": -613.326416015625, + "loss": 0.5244, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.7703040838241577, + "rewards/margins": 0.9314772486686707, + "rewards/rejected": -2.7017810344696045, + "step": 7230 + }, + { + "epoch": 0.47, + "learning_rate": 3.1582092359107263e-06, + "logits/chosen": -2.2372851371765137, + "logits/rejected": -1.9184343814849854, + "logps/chosen": -551.5802001953125, + "logps/rejected": -714.7056884765625, + "loss": 0.3888, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.209808349609375, + "rewards/margins": 1.9521551132202148, + "rewards/rejected": -4.16196346282959, + "step": 7240 + }, + { + "epoch": 0.47, + "learning_rate": 3.152699068078067e-06, + "logits/chosen": -1.6327937841415405, + "logits/rejected": -1.676995873451233, + "logps/chosen": -417.7352600097656, + "logps/rejected": -610.2640380859375, + "loss": 0.4914, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6484999656677246, + "rewards/margins": 0.7113903760910034, + "rewards/rejected": -3.3598904609680176, + "step": 7250 + }, + { + "epoch": 0.48, + "learning_rate": 3.1471854954473415e-06, + "logits/chosen": -2.270559787750244, + "logits/rejected": -2.0879476070404053, + "logps/chosen": -601.9791870117188, + "logps/rejected": -638.6385498046875, + "loss": 0.5273, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.566265821456909, + "rewards/margins": 0.6942251920700073, + "rewards/rejected": -3.260490894317627, + "step": 7260 + }, + { + "epoch": 0.48, + "learning_rate": 3.1416685467800436e-06, + "logits/chosen": -2.1702325344085693, + "logits/rejected": -2.05281138420105, + "logps/chosen": -677.0984497070312, + "logps/rejected": -686.2244262695312, + "loss": 0.4073, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.608895778656006, + "rewards/margins": 1.11374831199646, + "rewards/rejected": -3.7226436138153076, + "step": 7270 + }, + { + "epoch": 0.48, + "learning_rate": 3.1361482508552803e-06, + "logits/chosen": -2.2912166118621826, + "logits/rejected": -2.0227737426757812, + "logps/chosen": -630.8660888671875, + "logps/rejected": -597.8320922851562, + "loss": 0.6156, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.68625545501709, + "rewards/margins": 0.6883805394172668, + "rewards/rejected": -3.374636173248291, + "step": 7280 + }, + { + "epoch": 0.48, + "learning_rate": 3.1306246364696198e-06, + "logits/chosen": -2.2401461601257324, + "logits/rejected": -2.0388596057891846, + "logps/chosen": -536.4967041015625, + "logps/rejected": -630.9244995117188, + "loss": 0.5952, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.8389899730682373, + "rewards/margins": 0.7212721109390259, + "rewards/rejected": -3.5602622032165527, + "step": 7290 + }, + { + "epoch": 0.48, + "learning_rate": 3.1250977324369413e-06, + "logits/chosen": -1.5504920482635498, + "logits/rejected": -1.8194414377212524, + "logps/chosen": -581.7468872070312, + "logps/rejected": -643.1931762695312, + "loss": 0.5003, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.213700532913208, + "rewards/margins": 0.786910891532898, + "rewards/rejected": -4.000611305236816, + "step": 7300 + }, + { + "epoch": 0.48, + "learning_rate": 3.1195675675882825e-06, + "logits/chosen": -1.7138620615005493, + "logits/rejected": -1.9886878728866577, + "logps/chosen": -532.3826904296875, + "logps/rejected": -676.5599975585938, + "loss": 0.5468, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.1613645553588867, + "rewards/margins": 1.1969727277755737, + "rewards/rejected": -4.358336448669434, + "step": 7310 + }, + { + "epoch": 0.48, + "learning_rate": 3.1140341707716926e-06, + "logits/chosen": -2.1033859252929688, + "logits/rejected": -1.4388631582260132, + "logps/chosen": -497.56658935546875, + "logps/rejected": -699.9725341796875, + "loss": 0.6106, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.9511947631835938, + "rewards/margins": 1.3797348737716675, + "rewards/rejected": -4.330929279327393, + "step": 7320 + }, + { + "epoch": 0.48, + "learning_rate": 3.1084975708520803e-06, + "logits/chosen": -2.092902421951294, + "logits/rejected": -1.963867425918579, + "logps/chosen": -516.0930786132812, + "logps/rejected": -649.5531616210938, + "loss": 0.5842, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7223544120788574, + "rewards/margins": 1.2275700569152832, + "rewards/rejected": -3.9499244689941406, + "step": 7330 + }, + { + "epoch": 0.48, + "learning_rate": 3.1029577967110625e-06, + "logits/chosen": -2.3549771308898926, + "logits/rejected": -1.8174254894256592, + "logps/chosen": -562.1286010742188, + "logps/rejected": -582.4174194335938, + "loss": 0.6719, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.910454511642456, + "rewards/margins": 0.6111830472946167, + "rewards/rejected": -3.521637439727783, + "step": 7340 + }, + { + "epoch": 0.48, + "learning_rate": 3.097414877246814e-06, + "logits/chosen": -2.3740878105163574, + "logits/rejected": -1.9598724842071533, + "logps/chosen": -605.2304077148438, + "logps/rejected": -613.1309814453125, + "loss": 0.5582, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.4840683937072754, + "rewards/margins": 0.8388042449951172, + "rewards/rejected": -3.3228728771209717, + "step": 7350 + }, + { + "epoch": 0.48, + "learning_rate": 3.0918688413739197e-06, + "logits/chosen": -2.122352123260498, + "logits/rejected": -1.6851507425308228, + "logps/chosen": -598.9288330078125, + "logps/rejected": -622.5264892578125, + "loss": 0.4821, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8729500770568848, + "rewards/margins": 0.9844695925712585, + "rewards/rejected": -3.85741925239563, + "step": 7360 + }, + { + "epoch": 0.48, + "learning_rate": 3.0863197180232178e-06, + "logits/chosen": -2.2433207035064697, + "logits/rejected": -2.0860939025878906, + "logps/chosen": -630.0677490234375, + "logps/rejected": -593.6553955078125, + "loss": 0.5065, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.967116117477417, + "rewards/margins": 0.7031041979789734, + "rewards/rejected": -3.670220136642456, + "step": 7370 + }, + { + "epoch": 0.48, + "learning_rate": 3.0807675361416554e-06, + "logits/chosen": -2.351539134979248, + "logits/rejected": -1.8410431146621704, + "logps/chosen": -582.3271484375, + "logps/rejected": -596.9386596679688, + "loss": 0.5604, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.467467784881592, + "rewards/margins": 0.7326360940933228, + "rewards/rejected": -3.200104236602783, + "step": 7380 + }, + { + "epoch": 0.48, + "learning_rate": 3.0752123246921327e-06, + "logits/chosen": -1.9665231704711914, + "logits/rejected": -2.0008537769317627, + "logps/chosen": -475.9529724121094, + "logps/rejected": -584.5914916992188, + "loss": 0.5119, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1385912895202637, + "rewards/margins": 1.027195692062378, + "rewards/rejected": -3.1657867431640625, + "step": 7390 + }, + { + "epoch": 0.48, + "learning_rate": 3.069654112653353e-06, + "logits/chosen": -1.7564090490341187, + "logits/rejected": -1.7700964212417603, + "logps/chosen": -577.4471435546875, + "logps/rejected": -689.7198486328125, + "loss": 0.459, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.926849126815796, + "rewards/margins": 1.3047624826431274, + "rewards/rejected": -4.231611728668213, + "step": 7400 + }, + { + "epoch": 0.48, + "learning_rate": 3.064092929019673e-06, + "logits/chosen": -1.8226516246795654, + "logits/rejected": -1.5395643711090088, + "logps/chosen": -557.78955078125, + "logps/rejected": -618.7718505859375, + "loss": 0.5415, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.797609329223633, + "rewards/margins": 0.6625191569328308, + "rewards/rejected": -3.4601283073425293, + "step": 7410 + }, + { + "epoch": 0.49, + "learning_rate": 3.058528802800952e-06, + "logits/chosen": -2.0301146507263184, + "logits/rejected": -2.247178316116333, + "logps/chosen": -646.4931640625, + "logps/rejected": -829.6624755859375, + "loss": 0.5092, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.2833645343780518, + "rewards/margins": 1.4274927377700806, + "rewards/rejected": -3.710857391357422, + "step": 7420 + }, + { + "epoch": 0.49, + "learning_rate": 3.052961763022397e-06, + "logits/chosen": -1.8858773708343506, + "logits/rejected": -2.2624406814575195, + "logps/chosen": -570.9578857421875, + "logps/rejected": -716.4319458007812, + "loss": 0.4434, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4454357624053955, + "rewards/margins": 1.0548083782196045, + "rewards/rejected": -3.500244140625, + "step": 7430 + }, + { + "epoch": 0.49, + "learning_rate": 3.047391838724415e-06, + "logits/chosen": -2.4916012287139893, + "logits/rejected": -1.750939965248108, + "logps/chosen": -706.2770385742188, + "logps/rejected": -721.5274047851562, + "loss": 0.4539, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.9347124099731445, + "rewards/margins": 1.29623281955719, + "rewards/rejected": -4.230945587158203, + "step": 7440 + }, + { + "epoch": 0.49, + "learning_rate": 3.0418190589624587e-06, + "logits/chosen": -2.0243852138519287, + "logits/rejected": -2.1274046897888184, + "logps/chosen": -535.9006958007812, + "logps/rejected": -807.1841430664062, + "loss": 0.5301, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7507853507995605, + "rewards/margins": 0.9818651080131531, + "rewards/rejected": -3.7326502799987793, + "step": 7450 + }, + { + "epoch": 0.49, + "learning_rate": 3.0362434528068784e-06, + "logits/chosen": -1.406724214553833, + "logits/rejected": -2.0237207412719727, + "logps/chosen": -536.3141479492188, + "logps/rejected": -646.9801025390625, + "loss": 0.5888, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.90315842628479, + "rewards/margins": 0.8274177312850952, + "rewards/rejected": -3.7305760383605957, + "step": 7460 + }, + { + "epoch": 0.49, + "learning_rate": 3.0306650493427657e-06, + "logits/chosen": -2.1693460941314697, + "logits/rejected": -2.2521302700042725, + "logps/chosen": -550.40234375, + "logps/rejected": -742.0943603515625, + "loss": 0.3351, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.001873731613159, + "rewards/margins": 1.350730299949646, + "rewards/rejected": -4.352604389190674, + "step": 7470 + }, + { + "epoch": 0.49, + "learning_rate": 3.0250838776698077e-06, + "logits/chosen": -2.1576735973358154, + "logits/rejected": -2.0279083251953125, + "logps/chosen": -535.0850830078125, + "logps/rejected": -600.74072265625, + "loss": 0.4392, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.38101863861084, + "rewards/margins": 0.6856867074966431, + "rewards/rejected": -3.0667052268981934, + "step": 7480 + }, + { + "epoch": 0.49, + "learning_rate": 3.0194999669021275e-06, + "logits/chosen": -2.003931760787964, + "logits/rejected": -1.7968238592147827, + "logps/chosen": -559.0775756835938, + "logps/rejected": -549.0262451171875, + "loss": 0.5302, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.790287733078003, + "rewards/margins": 0.9173834919929504, + "rewards/rejected": -3.7076709270477295, + "step": 7490 + }, + { + "epoch": 0.49, + "learning_rate": 3.0139133461681403e-06, + "logits/chosen": -2.3907244205474854, + "logits/rejected": -2.0973424911499023, + "logps/chosen": -606.6914672851562, + "logps/rejected": -637.0055541992188, + "loss": 0.4609, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.213467597961426, + "rewards/margins": 1.0296380519866943, + "rewards/rejected": -3.243105411529541, + "step": 7500 + }, + { + "epoch": 0.49, + "learning_rate": 3.0083240446103965e-06, + "logits/chosen": -1.9050096273422241, + "logits/rejected": -1.8977136611938477, + "logps/chosen": -590.552734375, + "logps/rejected": -677.3780517578125, + "loss": 0.5305, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.0147809982299805, + "rewards/margins": 1.719355583190918, + "rewards/rejected": -4.734137058258057, + "step": 7510 + }, + { + "epoch": 0.49, + "learning_rate": 3.0027320913854306e-06, + "logits/chosen": -2.0884788036346436, + "logits/rejected": -1.149019718170166, + "logps/chosen": -538.6810913085938, + "logps/rejected": -519.0482788085938, + "loss": 0.5387, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.799140453338623, + "rewards/margins": 0.6474732160568237, + "rewards/rejected": -3.4466137886047363, + "step": 7520 + }, + { + "epoch": 0.49, + "learning_rate": 2.997137515663609e-06, + "logits/chosen": -1.813439130783081, + "logits/rejected": -2.1003925800323486, + "logps/chosen": -560.1712036132812, + "logps/rejected": -700.1915283203125, + "loss": 0.5146, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.738173484802246, + "rewards/margins": 1.099260926246643, + "rewards/rejected": -3.8374342918395996, + "step": 7530 + }, + { + "epoch": 0.49, + "learning_rate": 2.991540346628981e-06, + "logits/chosen": -1.7849540710449219, + "logits/rejected": -1.9049389362335205, + "logps/chosen": -625.4427490234375, + "logps/rejected": -718.796875, + "loss": 0.679, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.618159532546997, + "rewards/margins": 0.27094143629074097, + "rewards/rejected": -3.8891005516052246, + "step": 7540 + }, + { + "epoch": 0.49, + "learning_rate": 2.985940613479121e-06, + "logits/chosen": -1.992310881614685, + "logits/rejected": -1.9023126363754272, + "logps/chosen": -556.0280151367188, + "logps/rejected": -602.31787109375, + "loss": 0.8377, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.62559175491333, + "rewards/margins": 0.700847327709198, + "rewards/rejected": -3.3264389038085938, + "step": 7550 + }, + { + "epoch": 0.49, + "learning_rate": 2.980338345424981e-06, + "logits/chosen": -2.198732852935791, + "logits/rejected": -2.181652069091797, + "logps/chosen": -590.9139404296875, + "logps/rejected": -680.5958251953125, + "loss": 0.4671, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.351149082183838, + "rewards/margins": 1.126341462135315, + "rewards/rejected": -4.477490425109863, + "step": 7560 + }, + { + "epoch": 0.5, + "learning_rate": 2.974733571690735e-06, + "logits/chosen": -2.1074137687683105, + "logits/rejected": -1.6007953882217407, + "logps/chosen": -622.3907470703125, + "logps/rejected": -614.3546142578125, + "loss": 0.6541, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.3112881183624268, + "rewards/margins": 0.5598162412643433, + "rewards/rejected": -3.8711044788360596, + "step": 7570 + }, + { + "epoch": 0.5, + "learning_rate": 2.9691263215136274e-06, + "logits/chosen": -1.917372465133667, + "logits/rejected": -2.2247812747955322, + "logps/chosen": -559.89599609375, + "logps/rejected": -755.5455322265625, + "loss": 0.5172, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.157663106918335, + "rewards/margins": 1.2236320972442627, + "rewards/rejected": -4.381295204162598, + "step": 7580 + }, + { + "epoch": 0.5, + "learning_rate": 2.963516624143823e-06, + "logits/chosen": -1.9110147953033447, + "logits/rejected": -1.9160484075546265, + "logps/chosen": -566.1820678710938, + "logps/rejected": -635.0407104492188, + "loss": 0.5732, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.1472010612487793, + "rewards/margins": 0.6961196660995483, + "rewards/rejected": -3.843320846557617, + "step": 7590 + }, + { + "epoch": 0.5, + "learning_rate": 2.9579045088442504e-06, + "logits/chosen": -1.8146030902862549, + "logits/rejected": -2.293555736541748, + "logps/chosen": -662.2504272460938, + "logps/rejected": -803.9173583984375, + "loss": 0.6416, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.340151309967041, + "rewards/margins": 0.6970237493515015, + "rewards/rejected": -4.037175178527832, + "step": 7600 + }, + { + "epoch": 0.5, + "learning_rate": 2.9522900048904534e-06, + "logits/chosen": -2.024477481842041, + "logits/rejected": -2.045595645904541, + "logps/chosen": -546.9307250976562, + "logps/rejected": -599.1251220703125, + "loss": 0.616, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.589736223220825, + "rewards/margins": 0.8832021951675415, + "rewards/rejected": -3.472938060760498, + "step": 7610 + }, + { + "epoch": 0.5, + "learning_rate": 2.9466731415704343e-06, + "logits/chosen": -1.8404598236083984, + "logits/rejected": -1.3947179317474365, + "logps/chosen": -658.2296142578125, + "logps/rejected": -744.2030029296875, + "loss": 0.5928, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.3021206855773926, + "rewards/margins": 0.7320025563240051, + "rewards/rejected": -4.034123420715332, + "step": 7620 + }, + { + "epoch": 0.5, + "learning_rate": 2.941053948184503e-06, + "logits/chosen": -2.174973487854004, + "logits/rejected": -2.1286215782165527, + "logps/chosen": -636.4111328125, + "logps/rejected": -579.2327270507812, + "loss": 0.5121, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6131653785705566, + "rewards/margins": 0.3719381093978882, + "rewards/rejected": -2.985103130340576, + "step": 7630 + }, + { + "epoch": 0.5, + "learning_rate": 2.935432454045125e-06, + "logits/chosen": -1.993748664855957, + "logits/rejected": -1.7756856679916382, + "logps/chosen": -507.26373291015625, + "logps/rejected": -602.416015625, + "loss": 0.5149, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.748135566711426, + "rewards/margins": 0.6938328742980957, + "rewards/rejected": -3.4419684410095215, + "step": 7640 + }, + { + "epoch": 0.5, + "learning_rate": 2.929808688476768e-06, + "logits/chosen": -1.8288981914520264, + "logits/rejected": -1.465491533279419, + "logps/chosen": -577.890380859375, + "logps/rejected": -576.1401977539062, + "loss": 0.5258, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.685096502304077, + "rewards/margins": 0.8233284950256348, + "rewards/rejected": -3.508425235748291, + "step": 7650 + }, + { + "epoch": 0.5, + "learning_rate": 2.924182680815748e-06, + "logits/chosen": -1.9489812850952148, + "logits/rejected": -1.8596827983856201, + "logps/chosen": -533.8180541992188, + "logps/rejected": -677.320556640625, + "loss": 0.5126, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3371520042419434, + "rewards/margins": 1.4479613304138184, + "rewards/rejected": -3.7851130962371826, + "step": 7660 + }, + { + "epoch": 0.5, + "learning_rate": 2.9185544604100765e-06, + "logits/chosen": -2.1623177528381348, + "logits/rejected": -1.8493192195892334, + "logps/chosen": -606.8828125, + "logps/rejected": -761.3399658203125, + "loss": 0.6452, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.8764357566833496, + "rewards/margins": 0.8948076367378235, + "rewards/rejected": -3.7712433338165283, + "step": 7670 + }, + { + "epoch": 0.5, + "learning_rate": 2.9129240566193083e-06, + "logits/chosen": -2.0953116416931152, + "logits/rejected": -1.8859145641326904, + "logps/chosen": -606.9465942382812, + "logps/rejected": -633.6063842773438, + "loss": 0.5791, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.3007349967956543, + "rewards/margins": 1.156020164489746, + "rewards/rejected": -3.4567553997039795, + "step": 7680 + }, + { + "epoch": 0.5, + "learning_rate": 2.9072914988143874e-06, + "logits/chosen": -1.8157374858856201, + "logits/rejected": -1.3965591192245483, + "logps/chosen": -524.5069580078125, + "logps/rejected": -614.9376220703125, + "loss": 0.6726, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.7255921363830566, + "rewards/margins": 0.8382970094680786, + "rewards/rejected": -3.5638890266418457, + "step": 7690 + }, + { + "epoch": 0.5, + "learning_rate": 2.9016568163774956e-06, + "logits/chosen": -1.7731380462646484, + "logits/rejected": -1.441528081893921, + "logps/chosen": -490.97412109375, + "logps/rejected": -556.3486938476562, + "loss": 0.6411, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.0641536712646484, + "rewards/margins": 0.7212511301040649, + "rewards/rejected": -3.785404920578003, + "step": 7700 + }, + { + "epoch": 0.5, + "learning_rate": 2.8960200387018942e-06, + "logits/chosen": -1.8296773433685303, + "logits/rejected": -1.2881231307983398, + "logps/chosen": -549.3494873046875, + "logps/rejected": -603.0115966796875, + "loss": 0.6182, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.878037214279175, + "rewards/margins": 0.7086192965507507, + "rewards/rejected": -3.5866565704345703, + "step": 7710 + }, + { + "epoch": 0.51, + "learning_rate": 2.8903811951917792e-06, + "logits/chosen": -2.2229435443878174, + "logits/rejected": -1.958284616470337, + "logps/chosen": -506.4098205566406, + "logps/rejected": -623.1008911132812, + "loss": 0.3227, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.339682102203369, + "rewards/margins": 1.6844733953475952, + "rewards/rejected": -4.024155616760254, + "step": 7720 + }, + { + "epoch": 0.51, + "learning_rate": 2.88474031526212e-06, + "logits/chosen": -1.440571904182434, + "logits/rejected": -1.1719087362289429, + "logps/chosen": -575.4572143554688, + "logps/rejected": -750.7650146484375, + "loss": 0.5327, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.3039677143096924, + "rewards/margins": 1.0600093603134155, + "rewards/rejected": -4.363976955413818, + "step": 7730 + }, + { + "epoch": 0.51, + "learning_rate": 2.879097428338509e-06, + "logits/chosen": -1.9522345066070557, + "logits/rejected": -1.9992725849151611, + "logps/chosen": -740.0489501953125, + "logps/rejected": -771.926513671875, + "loss": 0.6117, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.375081777572632, + "rewards/margins": 0.974968433380127, + "rewards/rejected": -3.350050449371338, + "step": 7740 + }, + { + "epoch": 0.51, + "learning_rate": 2.8734525638570094e-06, + "logits/chosen": -1.8566176891326904, + "logits/rejected": -1.8508907556533813, + "logps/chosen": -519.3516235351562, + "logps/rejected": -715.54736328125, + "loss": 0.4383, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.836459159851074, + "rewards/margins": 1.5149306058883667, + "rewards/rejected": -4.3513898849487305, + "step": 7750 + }, + { + "epoch": 0.51, + "learning_rate": 2.8678057512639982e-06, + "logits/chosen": -2.220822334289551, + "logits/rejected": -2.013430118560791, + "logps/chosen": -572.8555908203125, + "logps/rejected": -565.0475463867188, + "loss": 0.5743, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.433563709259033, + "rewards/margins": 0.9110967516899109, + "rewards/rejected": -3.3446602821350098, + "step": 7760 + }, + { + "epoch": 0.51, + "learning_rate": 2.8621570200160172e-06, + "logits/chosen": -2.3124799728393555, + "logits/rejected": -1.682875394821167, + "logps/chosen": -638.2242431640625, + "logps/rejected": -681.7181396484375, + "loss": 0.4721, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2771756649017334, + "rewards/margins": 1.1830321550369263, + "rewards/rejected": -3.460207462310791, + "step": 7770 + }, + { + "epoch": 0.51, + "learning_rate": 2.856506399579615e-06, + "logits/chosen": -1.984126329421997, + "logits/rejected": -1.962230920791626, + "logps/chosen": -581.7562866210938, + "logps/rejected": -631.6907348632812, + "loss": 0.4095, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4491162300109863, + "rewards/margins": 1.3098396062850952, + "rewards/rejected": -3.758955717086792, + "step": 7780 + }, + { + "epoch": 0.51, + "learning_rate": 2.8508539194311964e-06, + "logits/chosen": -1.8445428609848022, + "logits/rejected": -1.3157740831375122, + "logps/chosen": -543.8956298828125, + "logps/rejected": -637.4561157226562, + "loss": 0.3878, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.0309014320373535, + "rewards/margins": 0.7112507820129395, + "rewards/rejected": -3.7421517372131348, + "step": 7790 + }, + { + "epoch": 0.51, + "learning_rate": 2.8451996090568656e-06, + "logits/chosen": -2.2228527069091797, + "logits/rejected": -2.2127482891082764, + "logps/chosen": -584.2711181640625, + "logps/rejected": -585.3831787109375, + "loss": 0.6862, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.6453375816345215, + "rewards/margins": 0.6250047087669373, + "rewards/rejected": -3.2703425884246826, + "step": 7800 + }, + { + "epoch": 0.51, + "learning_rate": 2.839543497952276e-06, + "logits/chosen": -1.9297635555267334, + "logits/rejected": -1.6234534978866577, + "logps/chosen": -576.9271240234375, + "logps/rejected": -643.7190551757812, + "loss": 0.7176, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2241415977478027, + "rewards/margins": 1.2629339694976807, + "rewards/rejected": -3.4870758056640625, + "step": 7810 + }, + { + "epoch": 0.51, + "learning_rate": 2.833885615622474e-06, + "logits/chosen": -2.187995195388794, + "logits/rejected": -2.138394594192505, + "logps/chosen": -568.1746215820312, + "logps/rejected": -569.004150390625, + "loss": 0.5921, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4697365760803223, + "rewards/margins": 0.4508172869682312, + "rewards/rejected": -2.9205539226531982, + "step": 7820 + }, + { + "epoch": 0.51, + "learning_rate": 2.8282259915817454e-06, + "logits/chosen": -2.0024540424346924, + "logits/rejected": -2.0259571075439453, + "logps/chosen": -491.30303955078125, + "logps/rejected": -553.1123657226562, + "loss": 0.5645, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.398832082748413, + "rewards/margins": 0.8270804286003113, + "rewards/rejected": -3.225912570953369, + "step": 7830 + }, + { + "epoch": 0.51, + "learning_rate": 2.8225646553534614e-06, + "logits/chosen": -2.020742416381836, + "logits/rejected": -1.9773505926132202, + "logps/chosen": -591.7495727539062, + "logps/rejected": -621.8164672851562, + "loss": 0.5181, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5809714794158936, + "rewards/margins": 1.1599743366241455, + "rewards/rejected": -3.740945816040039, + "step": 7840 + }, + { + "epoch": 0.51, + "learning_rate": 2.8169016364699255e-06, + "logits/chosen": -2.116873264312744, + "logits/rejected": -1.9739990234375, + "logps/chosen": -645.8020629882812, + "logps/rejected": -601.5529174804688, + "loss": 0.4981, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3944008350372314, + "rewards/margins": 1.0491383075714111, + "rewards/rejected": -3.4435393810272217, + "step": 7850 + }, + { + "epoch": 0.51, + "learning_rate": 2.811236964472217e-06, + "logits/chosen": -1.8916501998901367, + "logits/rejected": -2.0408172607421875, + "logps/chosen": -615.2294311523438, + "logps/rejected": -675.1611328125, + "loss": 0.5551, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.5767409801483154, + "rewards/margins": 0.8729038238525391, + "rewards/rejected": -3.4496452808380127, + "step": 7860 + }, + { + "epoch": 0.51, + "learning_rate": 2.805570668910041e-06, + "logits/chosen": -2.265167474746704, + "logits/rejected": -1.8190959692001343, + "logps/chosen": -616.2484130859375, + "logps/rejected": -596.4984130859375, + "loss": 0.482, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5450892448425293, + "rewards/margins": 1.3092725276947021, + "rewards/rejected": -3.8543620109558105, + "step": 7870 + }, + { + "epoch": 0.52, + "learning_rate": 2.7999027793415695e-06, + "logits/chosen": -1.995221495628357, + "logits/rejected": -1.8924598693847656, + "logps/chosen": -523.5404052734375, + "logps/rejected": -646.7638549804688, + "loss": 0.5813, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0813703536987305, + "rewards/margins": 0.8624595403671265, + "rewards/rejected": -2.9438297748565674, + "step": 7880 + }, + { + "epoch": 0.52, + "learning_rate": 2.794233325333293e-06, + "logits/chosen": -2.136390209197998, + "logits/rejected": -1.8115367889404297, + "logps/chosen": -530.805908203125, + "logps/rejected": -636.94970703125, + "loss": 0.5549, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.795574188232422, + "rewards/margins": 1.2153223752975464, + "rewards/rejected": -4.010896682739258, + "step": 7890 + }, + { + "epoch": 0.52, + "learning_rate": 2.7885623364598597e-06, + "logits/chosen": -1.9392417669296265, + "logits/rejected": -1.6217542886734009, + "logps/chosen": -475.58160400390625, + "logps/rejected": -630.3709716796875, + "loss": 0.3976, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.8851187229156494, + "rewards/margins": 1.734307050704956, + "rewards/rejected": -3.6194255352020264, + "step": 7900 + }, + { + "epoch": 0.52, + "learning_rate": 2.782889842303926e-06, + "logits/chosen": -2.280442714691162, + "logits/rejected": -1.943890929222107, + "logps/chosen": -546.3623657226562, + "logps/rejected": -614.0206298828125, + "loss": 0.5306, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.287599563598633, + "rewards/margins": 1.1138108968734741, + "rewards/rejected": -3.4014103412628174, + "step": 7910 + }, + { + "epoch": 0.52, + "learning_rate": 2.7772158724559987e-06, + "logits/chosen": -2.0244617462158203, + "logits/rejected": -1.449589490890503, + "logps/chosen": -476.2408752441406, + "logps/rejected": -687.8169555664062, + "loss": 0.4509, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.570810317993164, + "rewards/margins": 1.615671157836914, + "rewards/rejected": -4.186481475830078, + "step": 7920 + }, + { + "epoch": 0.52, + "learning_rate": 2.7715404565142856e-06, + "logits/chosen": -2.1096596717834473, + "logits/rejected": -1.701581597328186, + "logps/chosen": -510.6554260253906, + "logps/rejected": -584.50732421875, + "loss": 0.6696, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2401087284088135, + "rewards/margins": 0.8200968503952026, + "rewards/rejected": -3.0602056980133057, + "step": 7930 + }, + { + "epoch": 0.52, + "learning_rate": 2.7658636240845354e-06, + "logits/chosen": -1.7610938549041748, + "logits/rejected": -1.8943217992782593, + "logps/chosen": -433.93353271484375, + "logps/rejected": -493.35736083984375, + "loss": 0.5261, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.544769287109375, + "rewards/margins": 0.5446535348892212, + "rewards/rejected": -3.0894227027893066, + "step": 7940 + }, + { + "epoch": 0.52, + "learning_rate": 2.7601854047798872e-06, + "logits/chosen": -2.537883758544922, + "logits/rejected": -2.118722915649414, + "logps/chosen": -635.4991455078125, + "logps/rejected": -613.1102905273438, + "loss": 0.5925, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.841888427734375, + "rewards/margins": 0.7391589879989624, + "rewards/rejected": -2.581047296524048, + "step": 7950 + }, + { + "epoch": 0.52, + "learning_rate": 2.7545058282207148e-06, + "logits/chosen": -2.077910900115967, + "logits/rejected": -1.944738745689392, + "logps/chosen": -593.0620727539062, + "logps/rejected": -686.479736328125, + "loss": 0.4804, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.03245210647583, + "rewards/margins": 1.290585994720459, + "rewards/rejected": -3.323037624359131, + "step": 7960 + }, + { + "epoch": 0.52, + "learning_rate": 2.748824924034471e-06, + "logits/chosen": -2.2640910148620605, + "logits/rejected": -2.149371862411499, + "logps/chosen": -556.6024169921875, + "logps/rejected": -612.380859375, + "loss": 0.4687, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.009018659591675, + "rewards/margins": 1.4819543361663818, + "rewards/rejected": -3.4909729957580566, + "step": 7970 + }, + { + "epoch": 0.52, + "learning_rate": 2.743142721855536e-06, + "logits/chosen": -2.1707024574279785, + "logits/rejected": -1.8723373413085938, + "logps/chosen": -507.60791015625, + "logps/rejected": -539.167724609375, + "loss": 0.7497, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.035862445831299, + "rewards/margins": 0.5101307034492493, + "rewards/rejected": -2.5459933280944824, + "step": 7980 + }, + { + "epoch": 0.52, + "learning_rate": 2.737459251325058e-06, + "logits/chosen": -2.2062249183654785, + "logits/rejected": -2.106947422027588, + "logps/chosen": -587.2820434570312, + "logps/rejected": -550.6422119140625, + "loss": 0.5874, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.542938470840454, + "rewards/margins": 0.6787039041519165, + "rewards/rejected": -2.221642255783081, + "step": 7990 + }, + { + "epoch": 0.52, + "learning_rate": 2.731774542090804e-06, + "logits/chosen": -2.335017681121826, + "logits/rejected": -1.8789472579956055, + "logps/chosen": -478.0525817871094, + "logps/rejected": -576.288330078125, + "loss": 0.5073, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.8263978958129883, + "rewards/margins": 1.1661754846572876, + "rewards/rejected": -2.9925730228424072, + "step": 8000 + }, + { + "epoch": 0.52, + "learning_rate": 2.7260886238070034e-06, + "logits/chosen": -1.8527981042861938, + "logits/rejected": -1.994049072265625, + "logps/chosen": -495.73193359375, + "logps/rejected": -610.97900390625, + "loss": 0.5424, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4737749099731445, + "rewards/margins": 0.9065383672714233, + "rewards/rejected": -3.3803133964538574, + "step": 8010 + }, + { + "epoch": 0.52, + "learning_rate": 2.72040152613419e-06, + "logits/chosen": -1.8517286777496338, + "logits/rejected": -2.2476627826690674, + "logps/chosen": -539.6078491210938, + "logps/rejected": -745.31982421875, + "loss": 0.4956, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0612945556640625, + "rewards/margins": 1.223171591758728, + "rewards/rejected": -3.28446626663208, + "step": 8020 + }, + { + "epoch": 0.53, + "learning_rate": 2.7147132787390516e-06, + "logits/chosen": -1.8884389400482178, + "logits/rejected": -1.7699356079101562, + "logps/chosen": -534.407470703125, + "logps/rejected": -597.0699462890625, + "loss": 0.4777, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.3583502769470215, + "rewards/margins": 0.9624267816543579, + "rewards/rejected": -3.3207767009735107, + "step": 8030 + }, + { + "epoch": 0.53, + "learning_rate": 2.709023911294273e-06, + "logits/chosen": -2.1596829891204834, + "logits/rejected": -1.7965008020401, + "logps/chosen": -496.8246154785156, + "logps/rejected": -651.1993408203125, + "loss": 0.5823, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.7730891704559326, + "rewards/margins": 1.466064691543579, + "rewards/rejected": -3.239154100418091, + "step": 8040 + }, + { + "epoch": 0.53, + "learning_rate": 2.7033334534783806e-06, + "logits/chosen": -2.329775333404541, + "logits/rejected": -1.6206992864608765, + "logps/chosen": -534.77392578125, + "logps/rejected": -617.8519897460938, + "loss": 0.5103, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2892603874206543, + "rewards/margins": 1.2485960721969604, + "rewards/rejected": -3.5378565788269043, + "step": 8050 + }, + { + "epoch": 0.53, + "learning_rate": 2.697641934975592e-06, + "logits/chosen": -1.8848540782928467, + "logits/rejected": -1.6786342859268188, + "logps/chosen": -503.2972106933594, + "logps/rejected": -617.09521484375, + "loss": 0.3394, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5366880893707275, + "rewards/margins": 1.0685951709747314, + "rewards/rejected": -3.60528302192688, + "step": 8060 + }, + { + "epoch": 0.53, + "learning_rate": 2.691949385475654e-06, + "logits/chosen": -2.042297601699829, + "logits/rejected": -1.7582738399505615, + "logps/chosen": -531.7859497070312, + "logps/rejected": -550.2825927734375, + "loss": 0.5953, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.7436671257019043, + "rewards/margins": 1.0303003787994385, + "rewards/rejected": -3.7739672660827637, + "step": 8070 + }, + { + "epoch": 0.53, + "learning_rate": 2.6862558346736937e-06, + "logits/chosen": -2.389453411102295, + "logits/rejected": -2.338557481765747, + "logps/chosen": -642.6590576171875, + "logps/rejected": -727.0181274414062, + "loss": 0.4292, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.8770205974578857, + "rewards/margins": 1.48763906955719, + "rewards/rejected": -3.3646597862243652, + "step": 8080 + }, + { + "epoch": 0.53, + "learning_rate": 2.6805613122700617e-06, + "logits/chosen": -1.9517875909805298, + "logits/rejected": -2.075026035308838, + "logps/chosen": -415.518798828125, + "logps/rejected": -472.64398193359375, + "loss": 0.4211, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.528329849243164, + "rewards/margins": 0.8481928706169128, + "rewards/rejected": -2.3765225410461426, + "step": 8090 + }, + { + "epoch": 0.53, + "learning_rate": 2.674865847970176e-06, + "logits/chosen": -1.7727165222167969, + "logits/rejected": -2.060763120651245, + "logps/chosen": -469.1021423339844, + "logps/rejected": -458.26763916015625, + "loss": 0.4274, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.1519832611083984, + "rewards/margins": 0.7703964114189148, + "rewards/rejected": -2.922379732131958, + "step": 8100 + }, + { + "epoch": 0.53, + "learning_rate": 2.669169471484368e-06, + "logits/chosen": -1.7492620944976807, + "logits/rejected": -1.66244637966156, + "logps/chosen": -560.0657958984375, + "logps/rejected": -617.1555786132812, + "loss": 0.5842, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.8839614391326904, + "rewards/margins": 0.47338324785232544, + "rewards/rejected": -3.35734486579895, + "step": 8110 + }, + { + "epoch": 0.53, + "learning_rate": 2.6634722125277278e-06, + "logits/chosen": -2.0980145931243896, + "logits/rejected": -1.6550731658935547, + "logps/chosen": -477.326416015625, + "logps/rejected": -490.2355041503906, + "loss": 0.4816, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.6155064105987549, + "rewards/margins": 0.9734585881233215, + "rewards/rejected": -2.5889651775360107, + "step": 8120 + }, + { + "epoch": 0.53, + "learning_rate": 2.6577741008199498e-06, + "logits/chosen": -1.685837984085083, + "logits/rejected": -1.5082666873931885, + "logps/chosen": -562.9486083984375, + "logps/rejected": -691.534912109375, + "loss": 0.5931, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.028662919998169, + "rewards/margins": 0.8841172456741333, + "rewards/rejected": -3.912780284881592, + "step": 8130 + }, + { + "epoch": 0.53, + "learning_rate": 2.652075166085175e-06, + "logits/chosen": -1.9452396631240845, + "logits/rejected": -2.100419521331787, + "logps/chosen": -493.06060791015625, + "logps/rejected": -645.3451538085938, + "loss": 0.4167, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.8556363582611084, + "rewards/margins": 1.4208673238754272, + "rewards/rejected": -3.276503801345825, + "step": 8140 + }, + { + "epoch": 0.53, + "learning_rate": 2.6463754380518395e-06, + "logits/chosen": -2.252941131591797, + "logits/rejected": -1.7829294204711914, + "logps/chosen": -621.8170776367188, + "logps/rejected": -660.5582275390625, + "loss": 0.6381, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.8197734355926514, + "rewards/margins": 0.8733735084533691, + "rewards/rejected": -3.6931469440460205, + "step": 8150 + }, + { + "epoch": 0.53, + "learning_rate": 2.6406749464525167e-06, + "logits/chosen": -2.0800015926361084, + "logits/rejected": -2.3741583824157715, + "logps/chosen": -509.7748107910156, + "logps/rejected": -587.732421875, + "loss": 0.6648, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.1928679943084717, + "rewards/margins": 0.18233470618724823, + "rewards/rejected": -2.3752026557922363, + "step": 8160 + }, + { + "epoch": 0.53, + "learning_rate": 2.634973721023762e-06, + "logits/chosen": -1.9787933826446533, + "logits/rejected": -2.109104871749878, + "logps/chosen": -439.8349609375, + "logps/rejected": -533.9478759765625, + "loss": 0.547, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.5185022354125977, + "rewards/margins": 0.8148691058158875, + "rewards/rejected": -3.33337140083313, + "step": 8170 + }, + { + "epoch": 0.54, + "learning_rate": 2.6292717915059605e-06, + "logits/chosen": -2.510175943374634, + "logits/rejected": -2.1551032066345215, + "logps/chosen": -553.9089965820312, + "logps/rejected": -695.7274169921875, + "loss": 0.4459, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9888378381729126, + "rewards/margins": 1.4000943899154663, + "rewards/rejected": -3.388932466506958, + "step": 8180 + }, + { + "epoch": 0.54, + "learning_rate": 2.6235691876431706e-06, + "logits/chosen": -2.161252498626709, + "logits/rejected": -1.870642900466919, + "logps/chosen": -588.6565551757812, + "logps/rejected": -673.9668579101562, + "loss": 0.5259, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.203993082046509, + "rewards/margins": 1.1176879405975342, + "rewards/rejected": -3.321680784225464, + "step": 8190 + }, + { + "epoch": 0.54, + "learning_rate": 2.6178659391829673e-06, + "logits/chosen": -1.940239667892456, + "logits/rejected": -1.860875129699707, + "logps/chosen": -469.7591247558594, + "logps/rejected": -469.07171630859375, + "loss": 0.5019, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2931342124938965, + "rewards/margins": 0.6397870182991028, + "rewards/rejected": -2.9329214096069336, + "step": 8200 + }, + { + "epoch": 0.54, + "learning_rate": 2.6121620758762877e-06, + "logits/chosen": -1.9328632354736328, + "logits/rejected": -2.13527774810791, + "logps/chosen": -391.53863525390625, + "logps/rejected": -537.9548950195312, + "loss": 0.3995, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.0132150650024414, + "rewards/margins": 0.770500898361206, + "rewards/rejected": -2.7837162017822266, + "step": 8210 + }, + { + "epoch": 0.54, + "learning_rate": 2.606457627477277e-06, + "logits/chosen": -2.294654369354248, + "logits/rejected": -1.6887871026992798, + "logps/chosen": -506.1493225097656, + "logps/rejected": -584.48388671875, + "loss": 0.4721, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.9555402994155884, + "rewards/margins": 1.706686019897461, + "rewards/rejected": -3.6622262001037598, + "step": 8220 + }, + { + "epoch": 0.54, + "learning_rate": 2.6007526237431324e-06, + "logits/chosen": -2.1083502769470215, + "logits/rejected": -1.6280349493026733, + "logps/chosen": -593.1681518554688, + "logps/rejected": -582.7951049804688, + "loss": 0.7511, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.666224956512451, + "rewards/margins": 0.6657021641731262, + "rewards/rejected": -3.3319270610809326, + "step": 8230 + }, + { + "epoch": 0.54, + "learning_rate": 2.5950470944339478e-06, + "logits/chosen": -2.0212769508361816, + "logits/rejected": -2.3718209266662598, + "logps/chosen": -608.8660278320312, + "logps/rejected": -666.3863525390625, + "loss": 0.4838, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.9420130252838135, + "rewards/margins": 1.0956814289093018, + "rewards/rejected": -3.0376946926116943, + "step": 8240 + }, + { + "epoch": 0.54, + "learning_rate": 2.58934106931256e-06, + "logits/chosen": -1.927168607711792, + "logits/rejected": -1.600671410560608, + "logps/chosen": -461.33953857421875, + "logps/rejected": -605.0335083007812, + "loss": 0.3508, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.1052207946777344, + "rewards/margins": 1.9401206970214844, + "rewards/rejected": -4.045341491699219, + "step": 8250 + }, + { + "epoch": 0.54, + "learning_rate": 2.58363457814439e-06, + "logits/chosen": -2.3823981285095215, + "logits/rejected": -1.7535566091537476, + "logps/chosen": -635.8217163085938, + "logps/rejected": -611.2908325195312, + "loss": 0.6405, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.046607494354248, + "rewards/margins": 0.7194627523422241, + "rewards/rejected": -3.7660701274871826, + "step": 8260 + }, + { + "epoch": 0.54, + "learning_rate": 2.5779276506972924e-06, + "logits/chosen": -2.0197932720184326, + "logits/rejected": -1.9778947830200195, + "logps/chosen": -543.31884765625, + "logps/rejected": -660.4400634765625, + "loss": 0.6526, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.641650438308716, + "rewards/margins": 0.852077305316925, + "rewards/rejected": -3.493727445602417, + "step": 8270 + }, + { + "epoch": 0.54, + "learning_rate": 2.5722203167413945e-06, + "logits/chosen": -1.8766615390777588, + "logits/rejected": -1.3877742290496826, + "logps/chosen": -449.7989196777344, + "logps/rejected": -610.9358520507812, + "loss": 0.4744, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.8375191688537598, + "rewards/margins": 1.3484939336776733, + "rewards/rejected": -4.186013221740723, + "step": 8280 + }, + { + "epoch": 0.54, + "learning_rate": 2.5665126060489476e-06, + "logits/chosen": -1.892877221107483, + "logits/rejected": -1.736212134361267, + "logps/chosen": -544.884521484375, + "logps/rejected": -616.0360717773438, + "loss": 0.523, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.8708701133728027, + "rewards/margins": 0.8997025489807129, + "rewards/rejected": -3.7705726623535156, + "step": 8290 + }, + { + "epoch": 0.54, + "learning_rate": 2.560804548394165e-06, + "logits/chosen": -1.9360411167144775, + "logits/rejected": -2.0777390003204346, + "logps/chosen": -484.18707275390625, + "logps/rejected": -608.6365966796875, + "loss": 0.6754, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4959094524383545, + "rewards/margins": 0.8614379167556763, + "rewards/rejected": -3.3573474884033203, + "step": 8300 + }, + { + "epoch": 0.54, + "learning_rate": 2.5550961735530734e-06, + "logits/chosen": -2.0347352027893066, + "logits/rejected": -1.6178643703460693, + "logps/chosen": -506.50091552734375, + "logps/rejected": -632.1541748046875, + "loss": 0.4265, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.2434918880462646, + "rewards/margins": 1.5187815427780151, + "rewards/rejected": -3.7622733116149902, + "step": 8310 + }, + { + "epoch": 0.54, + "learning_rate": 2.549387511303351e-06, + "logits/chosen": -2.024339199066162, + "logits/rejected": -1.7003933191299438, + "logps/chosen": -438.71697998046875, + "logps/rejected": -716.5164794921875, + "loss": 0.2969, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.8050426244735718, + "rewards/margins": 1.772749900817871, + "rewards/rejected": -3.5777924060821533, + "step": 8320 + }, + { + "epoch": 0.55, + "learning_rate": 2.5436785914241774e-06, + "logits/chosen": -2.4603960514068604, + "logits/rejected": -1.6958461999893188, + "logps/chosen": -467.468017578125, + "logps/rejected": -594.4733276367188, + "loss": 0.4276, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.301406979560852, + "rewards/margins": 1.6142040491104126, + "rewards/rejected": -2.9156107902526855, + "step": 8330 + }, + { + "epoch": 0.55, + "learning_rate": 2.5379694436960746e-06, + "logits/chosen": -2.165832281112671, + "logits/rejected": -1.8188012838363647, + "logps/chosen": -667.1691284179688, + "logps/rejected": -676.0819702148438, + "loss": 0.489, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6188740730285645, + "rewards/margins": 0.7326040267944336, + "rewards/rejected": -3.351478099822998, + "step": 8340 + }, + { + "epoch": 0.55, + "learning_rate": 2.5322600979007533e-06, + "logits/chosen": -1.8008044958114624, + "logits/rejected": -2.0625736713409424, + "logps/chosen": -565.9276123046875, + "logps/rejected": -608.2940673828125, + "loss": 0.4996, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9943790435791016, + "rewards/margins": 0.9581402540206909, + "rewards/rejected": -2.952519416809082, + "step": 8350 + }, + { + "epoch": 0.55, + "learning_rate": 2.5265505838209592e-06, + "logits/chosen": -1.8943090438842773, + "logits/rejected": -1.882758378982544, + "logps/chosen": -507.8409729003906, + "logps/rejected": -650.7178955078125, + "loss": 0.5263, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.271254301071167, + "rewards/margins": 1.2633321285247803, + "rewards/rejected": -3.5345866680145264, + "step": 8360 + }, + { + "epoch": 0.55, + "learning_rate": 2.520840931240314e-06, + "logits/chosen": -2.142739772796631, + "logits/rejected": -1.6363484859466553, + "logps/chosen": -534.8933715820312, + "logps/rejected": -612.6922607421875, + "loss": 0.5113, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.1984381675720215, + "rewards/margins": 0.9206544756889343, + "rewards/rejected": -3.1190924644470215, + "step": 8370 + }, + { + "epoch": 0.55, + "learning_rate": 2.515131169943162e-06, + "logits/chosen": -1.9574629068374634, + "logits/rejected": -1.7319316864013672, + "logps/chosen": -486.0719299316406, + "logps/rejected": -586.4020385742188, + "loss": 0.5304, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.3126673698425293, + "rewards/margins": 1.0584437847137451, + "rewards/rejected": -3.3711113929748535, + "step": 8380 + }, + { + "epoch": 0.55, + "learning_rate": 2.509421329714416e-06, + "logits/chosen": -2.012256145477295, + "logits/rejected": -1.9683516025543213, + "logps/chosen": -503.61798095703125, + "logps/rejected": -625.0061645507812, + "loss": 0.53, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.458313465118408, + "rewards/margins": 1.1787118911743164, + "rewards/rejected": -3.637025833129883, + "step": 8390 + }, + { + "epoch": 0.55, + "learning_rate": 2.5037114403393987e-06, + "logits/chosen": -2.151923656463623, + "logits/rejected": -1.5198156833648682, + "logps/chosen": -541.1068115234375, + "logps/rejected": -530.3890991210938, + "loss": 0.6136, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6220719814300537, + "rewards/margins": 1.383866548538208, + "rewards/rejected": -4.005938529968262, + "step": 8400 + }, + { + "epoch": 0.55, + "learning_rate": 2.4980015316036908e-06, + "logits/chosen": -2.0013201236724854, + "logits/rejected": -1.9275219440460205, + "logps/chosen": -482.7667541503906, + "logps/rejected": -672.0367431640625, + "loss": 0.5668, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.384315252304077, + "rewards/margins": 0.8206236958503723, + "rewards/rejected": -3.2049388885498047, + "step": 8410 + }, + { + "epoch": 0.55, + "learning_rate": 2.4922916332929725e-06, + "logits/chosen": -1.9645591974258423, + "logits/rejected": -1.5715625286102295, + "logps/chosen": -548.0907592773438, + "logps/rejected": -720.3143310546875, + "loss": 0.5238, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.744553804397583, + "rewards/margins": 1.1086987257003784, + "rewards/rejected": -3.853252410888672, + "step": 8420 + }, + { + "epoch": 0.55, + "learning_rate": 2.4865817751928716e-06, + "logits/chosen": -2.183432102203369, + "logits/rejected": -1.9140920639038086, + "logps/chosen": -598.3126220703125, + "logps/rejected": -632.6630859375, + "loss": 0.6305, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.114100456237793, + "rewards/margins": 0.8765825033187866, + "rewards/rejected": -3.9906837940216064, + "step": 8430 + }, + { + "epoch": 0.55, + "learning_rate": 2.4808719870888037e-06, + "logits/chosen": -2.294104814529419, + "logits/rejected": -1.6438252925872803, + "logps/chosen": -486.91192626953125, + "logps/rejected": -639.6134033203125, + "loss": 0.478, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.013448476791382, + "rewards/margins": 1.6693031787872314, + "rewards/rejected": -3.682751417160034, + "step": 8440 + }, + { + "epoch": 0.55, + "learning_rate": 2.4751622987658206e-06, + "logits/chosen": -1.4400765895843506, + "logits/rejected": -1.8438045978546143, + "logps/chosen": -475.89605712890625, + "logps/rejected": -678.0491943359375, + "loss": 0.6017, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.999626636505127, + "rewards/margins": 1.1372374296188354, + "rewards/rejected": -4.136864185333252, + "step": 8450 + }, + { + "epoch": 0.55, + "learning_rate": 2.4694527400084546e-06, + "logits/chosen": -1.9232698678970337, + "logits/rejected": -1.4870688915252686, + "logps/chosen": -568.2553100585938, + "logps/rejected": -500.90887451171875, + "loss": 0.5996, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.601705312728882, + "rewards/margins": 0.5879272222518921, + "rewards/rejected": -3.1896328926086426, + "step": 8460 + }, + { + "epoch": 0.55, + "learning_rate": 2.4637433406005607e-06, + "logits/chosen": -1.7582013607025146, + "logits/rejected": -1.7778558731079102, + "logps/chosen": -554.71484375, + "logps/rejected": -532.88720703125, + "loss": 0.4793, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.553034782409668, + "rewards/margins": 0.6445740461349487, + "rewards/rejected": -3.1976089477539062, + "step": 8470 + }, + { + "epoch": 0.55, + "learning_rate": 2.4580341303251628e-06, + "logits/chosen": -2.312466621398926, + "logits/rejected": -1.7932695150375366, + "logps/chosen": -528.412353515625, + "logps/rejected": -575.6771240234375, + "loss": 0.4946, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.548846960067749, + "rewards/margins": 0.6522601246833801, + "rewards/rejected": -3.2011075019836426, + "step": 8480 + }, + { + "epoch": 0.56, + "learning_rate": 2.4523251389642984e-06, + "logits/chosen": -2.352581024169922, + "logits/rejected": -1.9734636545181274, + "logps/chosen": -500.60797119140625, + "logps/rejected": -564.6924438476562, + "loss": 0.5307, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1165223121643066, + "rewards/margins": 0.9649814367294312, + "rewards/rejected": -3.0815041065216064, + "step": 8490 + }, + { + "epoch": 0.56, + "learning_rate": 2.4466163962988626e-06, + "logits/chosen": -2.381840705871582, + "logits/rejected": -2.000971555709839, + "logps/chosen": -584.6832885742188, + "logps/rejected": -670.2131958007812, + "loss": 0.6757, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4059548377990723, + "rewards/margins": 0.6139851212501526, + "rewards/rejected": -3.01993989944458, + "step": 8500 + }, + { + "epoch": 0.56, + "learning_rate": 2.4409079321084543e-06, + "logits/chosen": -2.1239302158355713, + "logits/rejected": -2.165752649307251, + "logps/chosen": -523.6356811523438, + "logps/rejected": -714.048828125, + "loss": 0.77, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.8900184631347656, + "rewards/margins": 0.8145949244499207, + "rewards/rejected": -3.704613208770752, + "step": 8510 + }, + { + "epoch": 0.56, + "learning_rate": 2.4351997761712184e-06, + "logits/chosen": -1.9498107433319092, + "logits/rejected": -1.6921589374542236, + "logps/chosen": -650.4454956054688, + "logps/rejected": -674.9125366210938, + "loss": 0.4921, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.603874683380127, + "rewards/margins": 1.356870174407959, + "rewards/rejected": -3.9607443809509277, + "step": 8520 + }, + { + "epoch": 0.56, + "learning_rate": 2.4294919582636933e-06, + "logits/chosen": -1.516585350036621, + "logits/rejected": -1.6425130367279053, + "logps/chosen": -594.2330322265625, + "logps/rejected": -834.4051513671875, + "loss": 0.3158, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.875373363494873, + "rewards/margins": 1.6923511028289795, + "rewards/rejected": -4.567724227905273, + "step": 8530 + }, + { + "epoch": 0.56, + "learning_rate": 2.423784508160652e-06, + "logits/chosen": -1.718045949935913, + "logits/rejected": -1.8462913036346436, + "logps/chosen": -637.2362670898438, + "logps/rejected": -767.963134765625, + "loss": 0.6127, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.217985153198242, + "rewards/margins": 0.6419950127601624, + "rewards/rejected": -3.8599801063537598, + "step": 8540 + }, + { + "epoch": 0.56, + "learning_rate": 2.418077455634951e-06, + "logits/chosen": -1.7958965301513672, + "logits/rejected": -2.0573506355285645, + "logps/chosen": -538.6891479492188, + "logps/rejected": -604.0670776367188, + "loss": 0.6724, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.5720856189727783, + "rewards/margins": 0.8947946429252625, + "rewards/rejected": -3.4668803215026855, + "step": 8550 + }, + { + "epoch": 0.56, + "learning_rate": 2.4123708304573714e-06, + "logits/chosen": -1.7513185739517212, + "logits/rejected": -1.7228978872299194, + "logps/chosen": -569.3314208984375, + "logps/rejected": -668.5128173828125, + "loss": 0.3983, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.264718532562256, + "rewards/margins": 1.522810697555542, + "rewards/rejected": -4.787528991699219, + "step": 8560 + }, + { + "epoch": 0.56, + "learning_rate": 2.406664662396465e-06, + "logits/chosen": -1.6329927444458008, + "logits/rejected": -1.8342996835708618, + "logps/chosen": -630.3096923828125, + "logps/rejected": -726.5323486328125, + "loss": 0.5556, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.3960113525390625, + "rewards/margins": 0.6501597166061401, + "rewards/rejected": -3.046171188354492, + "step": 8570 + }, + { + "epoch": 0.56, + "learning_rate": 2.4009589812184012e-06, + "logits/chosen": -1.9765291213989258, + "logits/rejected": -1.6217161417007446, + "logps/chosen": -490.7498474121094, + "logps/rejected": -631.5427856445312, + "loss": 0.4344, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6923301219940186, + "rewards/margins": 1.2100067138671875, + "rewards/rejected": -3.902336835861206, + "step": 8580 + }, + { + "epoch": 0.56, + "learning_rate": 2.3952538166868073e-06, + "logits/chosen": -2.1259334087371826, + "logits/rejected": -1.6824983358383179, + "logps/chosen": -632.2239990234375, + "logps/rejected": -669.2704467773438, + "loss": 0.491, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.824948310852051, + "rewards/margins": 1.0109959840774536, + "rewards/rejected": -3.835944414138794, + "step": 8590 + }, + { + "epoch": 0.56, + "learning_rate": 2.389549198562616e-06, + "logits/chosen": -1.9586637020111084, + "logits/rejected": -1.9438788890838623, + "logps/chosen": -549.78662109375, + "logps/rejected": -682.9403076171875, + "loss": 0.4568, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.497349262237549, + "rewards/margins": 1.207899808883667, + "rewards/rejected": -3.705249071121216, + "step": 8600 + }, + { + "epoch": 0.56, + "learning_rate": 2.3838451566039098e-06, + "logits/chosen": -1.9294124841690063, + "logits/rejected": -2.0406670570373535, + "logps/chosen": -567.9529418945312, + "logps/rejected": -750.5849609375, + "loss": 0.5347, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2828116416931152, + "rewards/margins": 1.2478787899017334, + "rewards/rejected": -3.5306904315948486, + "step": 8610 + }, + { + "epoch": 0.56, + "learning_rate": 2.3781417205657662e-06, + "logits/chosen": -2.027916669845581, + "logits/rejected": -1.8153997659683228, + "logps/chosen": -517.7577514648438, + "logps/rejected": -642.97705078125, + "loss": 0.5145, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.032965660095215, + "rewards/margins": 1.262226939201355, + "rewards/rejected": -3.295192241668701, + "step": 8620 + }, + { + "epoch": 0.56, + "learning_rate": 2.3724389202001006e-06, + "logits/chosen": -1.9544061422348022, + "logits/rejected": -1.5058300495147705, + "logps/chosen": -559.7622680664062, + "logps/rejected": -668.08251953125, + "loss": 0.432, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5795223712921143, + "rewards/margins": 1.103283166885376, + "rewards/rejected": -3.682805299758911, + "step": 8630 + }, + { + "epoch": 0.57, + "learning_rate": 2.366736785255514e-06, + "logits/chosen": -2.1814701557159424, + "logits/rejected": -1.883183240890503, + "logps/chosen": -529.947265625, + "logps/rejected": -600.7251586914062, + "loss": 0.5234, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.2991366386413574, + "rewards/margins": 1.3504598140716553, + "rewards/rejected": -3.6495964527130127, + "step": 8640 + }, + { + "epoch": 0.57, + "learning_rate": 2.3610353454771355e-06, + "logits/chosen": -2.1669187545776367, + "logits/rejected": -2.116823196411133, + "logps/chosen": -559.6239013671875, + "logps/rejected": -643.1777954101562, + "loss": 0.4503, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8764381408691406, + "rewards/margins": 0.7580646276473999, + "rewards/rejected": -3.63450288772583, + "step": 8650 + }, + { + "epoch": 0.57, + "learning_rate": 2.355334630606467e-06, + "logits/chosen": -2.248654842376709, + "logits/rejected": -1.7833881378173828, + "logps/chosen": -544.1170654296875, + "logps/rejected": -719.9376220703125, + "loss": 0.5325, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4501070976257324, + "rewards/margins": 1.2582969665527344, + "rewards/rejected": -3.708404064178467, + "step": 8660 + }, + { + "epoch": 0.57, + "learning_rate": 2.349634670381231e-06, + "logits/chosen": -1.9623810052871704, + "logits/rejected": -1.966509222984314, + "logps/chosen": -521.1162719726562, + "logps/rejected": -625.997314453125, + "loss": 0.6938, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.571371078491211, + "rewards/margins": 1.166871428489685, + "rewards/rejected": -3.7382426261901855, + "step": 8670 + }, + { + "epoch": 0.57, + "learning_rate": 2.3439354945352104e-06, + "logits/chosen": -1.8034248352050781, + "logits/rejected": -1.8189328908920288, + "logps/chosen": -581.9903564453125, + "logps/rejected": -672.5491943359375, + "loss": 0.4387, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5983049869537354, + "rewards/margins": 1.2358529567718506, + "rewards/rejected": -3.834158420562744, + "step": 8680 + }, + { + "epoch": 0.57, + "learning_rate": 2.3382371327981e-06, + "logits/chosen": -1.6109882593154907, + "logits/rejected": -1.9092413187026978, + "logps/chosen": -531.2320556640625, + "logps/rejected": -657.5418701171875, + "loss": 0.7228, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.8226723670959473, + "rewards/margins": 0.6856005191802979, + "rewards/rejected": -3.5082733631134033, + "step": 8690 + }, + { + "epoch": 0.57, + "learning_rate": 2.3325396148953456e-06, + "logits/chosen": -2.23337721824646, + "logits/rejected": -1.8812425136566162, + "logps/chosen": -537.9326782226562, + "logps/rejected": -645.2062377929688, + "loss": 0.4033, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6055963039398193, + "rewards/margins": 1.3089559078216553, + "rewards/rejected": -3.9145522117614746, + "step": 8700 + }, + { + "epoch": 0.57, + "learning_rate": 2.3268429705479915e-06, + "logits/chosen": -1.838443398475647, + "logits/rejected": -1.89572012424469, + "logps/chosen": -487.03253173828125, + "logps/rejected": -597.0982666015625, + "loss": 0.5256, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.876188278198242, + "rewards/margins": 1.1435706615447998, + "rewards/rejected": -4.019758701324463, + "step": 8710 + }, + { + "epoch": 0.57, + "learning_rate": 2.3211472294725248e-06, + "logits/chosen": -2.0660183429718018, + "logits/rejected": -1.859236478805542, + "logps/chosen": -588.6038208007812, + "logps/rejected": -677.5938720703125, + "loss": 0.4918, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4605069160461426, + "rewards/margins": 1.6218595504760742, + "rewards/rejected": -4.082365989685059, + "step": 8720 + }, + { + "epoch": 0.57, + "learning_rate": 2.315452421380721e-06, + "logits/chosen": -2.0983052253723145, + "logits/rejected": -1.5631288290023804, + "logps/chosen": -577.9644775390625, + "logps/rejected": -642.124755859375, + "loss": 0.3914, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.718008518218994, + "rewards/margins": 0.8872051239013672, + "rewards/rejected": -3.6052136421203613, + "step": 8730 + }, + { + "epoch": 0.57, + "learning_rate": 2.3097585759794886e-06, + "logits/chosen": -2.3067686557769775, + "logits/rejected": -2.0007030963897705, + "logps/chosen": -577.6544799804688, + "logps/rejected": -582.8269653320312, + "loss": 0.4402, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1569647789001465, + "rewards/margins": 1.6096376180648804, + "rewards/rejected": -3.7666027545928955, + "step": 8740 + }, + { + "epoch": 0.57, + "learning_rate": 2.3040657229707155e-06, + "logits/chosen": -1.7420654296875, + "logits/rejected": -1.9039827585220337, + "logps/chosen": -656.5730590820312, + "logps/rejected": -680.1136474609375, + "loss": 0.6174, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.26708984375, + "rewards/margins": 0.5174268484115601, + "rewards/rejected": -3.7845168113708496, + "step": 8750 + }, + { + "epoch": 0.57, + "learning_rate": 2.2983738920511104e-06, + "logits/chosen": -2.079326868057251, + "logits/rejected": -1.9476079940795898, + "logps/chosen": -595.2271118164062, + "logps/rejected": -624.8593139648438, + "loss": 0.4396, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.79783296585083, + "rewards/margins": 1.1170275211334229, + "rewards/rejected": -3.914860486984253, + "step": 8760 + }, + { + "epoch": 0.57, + "learning_rate": 2.2926831129120523e-06, + "logits/chosen": -2.1405370235443115, + "logits/rejected": -1.6271158456802368, + "logps/chosen": -491.77783203125, + "logps/rejected": -587.8319702148438, + "loss": 0.5622, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.3106017112731934, + "rewards/margins": 0.8187249898910522, + "rewards/rejected": -3.129326581954956, + "step": 8770 + }, + { + "epoch": 0.57, + "learning_rate": 2.2869934152394323e-06, + "logits/chosen": -2.487213611602783, + "logits/rejected": -2.0195982456207275, + "logps/chosen": -612.7380981445312, + "logps/rejected": -603.2962036132812, + "loss": 0.5513, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.550365924835205, + "rewards/margins": 0.7269454598426819, + "rewards/rejected": -3.2773118019104004, + "step": 8780 + }, + { + "epoch": 0.58, + "learning_rate": 2.281304828713501e-06, + "logits/chosen": -2.2334394454956055, + "logits/rejected": -1.6651496887207031, + "logps/chosen": -595.5431518554688, + "logps/rejected": -643.3426513671875, + "loss": 0.5202, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3620035648345947, + "rewards/margins": 1.4718233346939087, + "rewards/rejected": -3.833826780319214, + "step": 8790 + }, + { + "epoch": 0.58, + "learning_rate": 2.275617383008711e-06, + "logits/chosen": -2.304520606994629, + "logits/rejected": -1.7181825637817383, + "logps/chosen": -570.1748046875, + "logps/rejected": -560.0604858398438, + "loss": 0.6478, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.6325135231018066, + "rewards/margins": 0.6104687452316284, + "rewards/rejected": -3.2429816722869873, + "step": 8800 + }, + { + "epoch": 0.58, + "learning_rate": 2.269931107793567e-06, + "logits/chosen": -1.7754180431365967, + "logits/rejected": -1.9540001153945923, + "logps/chosen": -511.11834716796875, + "logps/rejected": -521.2728271484375, + "loss": 0.5108, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.5714993476867676, + "rewards/margins": 0.6436102986335754, + "rewards/rejected": -3.2151095867156982, + "step": 8810 + }, + { + "epoch": 0.58, + "learning_rate": 2.2642460327304655e-06, + "logits/chosen": -2.2850050926208496, + "logits/rejected": -2.1239497661590576, + "logps/chosen": -663.78466796875, + "logps/rejected": -669.0499877929688, + "loss": 0.5477, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.511131763458252, + "rewards/margins": 0.9971479177474976, + "rewards/rejected": -3.508279323577881, + "step": 8820 + }, + { + "epoch": 0.58, + "learning_rate": 2.258562187475543e-06, + "logits/chosen": -2.2841639518737793, + "logits/rejected": -2.083108425140381, + "logps/chosen": -518.9134521484375, + "logps/rejected": -691.3600463867188, + "loss": 0.3406, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.835007667541504, + "rewards/margins": 1.801963448524475, + "rewards/rejected": -3.6369712352752686, + "step": 8830 + }, + { + "epoch": 0.58, + "learning_rate": 2.2528796016785196e-06, + "logits/chosen": -2.287487030029297, + "logits/rejected": -2.0592336654663086, + "logps/chosen": -574.352783203125, + "logps/rejected": -623.5216064453125, + "loss": 0.4739, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.657583713531494, + "rewards/margins": 1.017406702041626, + "rewards/rejected": -3.674990177154541, + "step": 8840 + }, + { + "epoch": 0.58, + "learning_rate": 2.247198304982548e-06, + "logits/chosen": -1.7157217264175415, + "logits/rejected": -1.6318271160125732, + "logps/chosen": -508.4742126464844, + "logps/rejected": -640.5726318359375, + "loss": 0.5662, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.0853447914123535, + "rewards/margins": 1.3192092180252075, + "rewards/rejected": -3.4045543670654297, + "step": 8850 + }, + { + "epoch": 0.58, + "learning_rate": 2.2415183270240533e-06, + "logits/chosen": -2.2637226581573486, + "logits/rejected": -1.8087360858917236, + "logps/chosen": -564.3175048828125, + "logps/rejected": -577.1688842773438, + "loss": 0.4104, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.231301784515381, + "rewards/margins": 1.0309956073760986, + "rewards/rejected": -3.2622971534729004, + "step": 8860 + }, + { + "epoch": 0.58, + "learning_rate": 2.2358396974325837e-06, + "logits/chosen": -1.8857953548431396, + "logits/rejected": -2.095496416091919, + "logps/chosen": -644.1954345703125, + "logps/rejected": -772.6640625, + "loss": 0.5618, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6326816082000732, + "rewards/margins": 0.6671141982078552, + "rewards/rejected": -3.2997958660125732, + "step": 8870 + }, + { + "epoch": 0.58, + "learning_rate": 2.2301624458306525e-06, + "logits/chosen": -2.007840633392334, + "logits/rejected": -1.8935134410858154, + "logps/chosen": -596.2510375976562, + "logps/rejected": -559.5049438476562, + "loss": 0.7535, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.280665159225464, + "rewards/margins": 0.4504348337650299, + "rewards/rejected": -3.731100559234619, + "step": 8880 + }, + { + "epoch": 0.58, + "learning_rate": 2.2244866018335855e-06, + "logits/chosen": -1.8352820873260498, + "logits/rejected": -1.730767011642456, + "logps/chosen": -522.18212890625, + "logps/rejected": -626.891845703125, + "loss": 0.573, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.457399845123291, + "rewards/margins": 1.2562861442565918, + "rewards/rejected": -3.713685989379883, + "step": 8890 + }, + { + "epoch": 0.58, + "learning_rate": 2.2188121950493648e-06, + "logits/chosen": -2.008808135986328, + "logits/rejected": -1.604680061340332, + "logps/chosen": -621.1892700195312, + "logps/rejected": -616.54052734375, + "loss": 0.5277, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.031632900238037, + "rewards/margins": 0.9094408750534058, + "rewards/rejected": -3.9410743713378906, + "step": 8900 + }, + { + "epoch": 0.58, + "learning_rate": 2.2131392550784766e-06, + "logits/chosen": -2.350640058517456, + "logits/rejected": -2.0902938842773438, + "logps/chosen": -584.5798950195312, + "logps/rejected": -655.9541015625, + "loss": 0.5626, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2919318675994873, + "rewards/margins": 1.0417548418045044, + "rewards/rejected": -3.3336868286132812, + "step": 8910 + }, + { + "epoch": 0.58, + "learning_rate": 2.2074678115137533e-06, + "logits/chosen": -2.2224221229553223, + "logits/rejected": -1.7978475093841553, + "logps/chosen": -533.0066528320312, + "logps/rejected": -592.8658447265625, + "loss": 0.4884, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7765023708343506, + "rewards/margins": 0.9631174206733704, + "rewards/rejected": -3.739619493484497, + "step": 8920 + }, + { + "epoch": 0.58, + "learning_rate": 2.201797893940224e-06, + "logits/chosen": -2.1494221687316895, + "logits/rejected": -1.9534950256347656, + "logps/chosen": -535.6815185546875, + "logps/rejected": -551.4397583007812, + "loss": 0.7491, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.32279896736145, + "rewards/margins": 0.542815089225769, + "rewards/rejected": -2.8656139373779297, + "step": 8930 + }, + { + "epoch": 0.58, + "learning_rate": 2.196129531934956e-06, + "logits/chosen": -1.8187973499298096, + "logits/rejected": -1.8088195323944092, + "logps/chosen": -587.8333129882812, + "logps/rejected": -656.7921142578125, + "loss": 0.3842, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.524751663208008, + "rewards/margins": 1.7834068536758423, + "rewards/rejected": -4.3081583976745605, + "step": 8940 + }, + { + "epoch": 0.59, + "learning_rate": 2.190462755066902e-06, + "logits/chosen": -2.367126941680908, + "logits/rejected": -1.478229284286499, + "logps/chosen": -559.9871215820312, + "logps/rejected": -643.1048583984375, + "loss": 0.4527, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.1893820762634277, + "rewards/margins": 1.1641318798065186, + "rewards/rejected": -3.353513717651367, + "step": 8950 + }, + { + "epoch": 0.59, + "learning_rate": 2.184797592896746e-06, + "logits/chosen": -2.319800615310669, + "logits/rejected": -1.4292646646499634, + "logps/chosen": -589.6014404296875, + "logps/rejected": -580.06640625, + "loss": 0.4834, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.5645852088928223, + "rewards/margins": 1.3727290630340576, + "rewards/rejected": -3.93731427192688, + "step": 8960 + }, + { + "epoch": 0.59, + "learning_rate": 2.17913407497675e-06, + "logits/chosen": -1.7828247547149658, + "logits/rejected": -1.5657999515533447, + "logps/chosen": -552.4095458984375, + "logps/rejected": -668.5966186523438, + "loss": 0.5277, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7630295753479004, + "rewards/margins": 1.062851071357727, + "rewards/rejected": -3.825880765914917, + "step": 8970 + }, + { + "epoch": 0.59, + "learning_rate": 2.173472230850596e-06, + "logits/chosen": -1.9459810256958008, + "logits/rejected": -1.230505347251892, + "logps/chosen": -559.1304321289062, + "logps/rejected": -572.7885131835938, + "loss": 0.479, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3328442573547363, + "rewards/margins": 1.3568191528320312, + "rewards/rejected": -3.6896634101867676, + "step": 8980 + }, + { + "epoch": 0.59, + "learning_rate": 2.1678120900532375e-06, + "logits/chosen": -1.905078649520874, + "logits/rejected": -1.9692771434783936, + "logps/chosen": -591.5587768554688, + "logps/rejected": -695.5579833984375, + "loss": 0.4987, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7419822216033936, + "rewards/margins": 1.1332275867462158, + "rewards/rejected": -3.8752098083496094, + "step": 8990 + }, + { + "epoch": 0.59, + "learning_rate": 2.1621536821107412e-06, + "logits/chosen": -1.8924217224121094, + "logits/rejected": -1.3386434316635132, + "logps/chosen": -503.5406799316406, + "logps/rejected": -694.8475341796875, + "loss": 0.3399, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4634346961975098, + "rewards/margins": 1.3909974098205566, + "rewards/rejected": -3.8544325828552246, + "step": 9000 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.0022943019866943, + "eval_logits/rejected": -1.820178747177124, + "eval_logps/chosen": -542.1044921875, + "eval_logps/rejected": -637.0339965820312, + "eval_loss": 0.5424858331680298, + "eval_rewards/accuracies": 0.7409999966621399, + "eval_rewards/chosen": -2.46658992767334, + "eval_rewards/margins": 1.1496917009353638, + "eval_rewards/rejected": -3.616281509399414, + "eval_runtime": 464.9712, + "eval_samples_per_second": 4.301, + "eval_steps_per_second": 2.151, + "step": 9000 + }, + { + "epoch": 0.59, + "learning_rate": 2.1564970365401346e-06, + "logits/chosen": -2.1641273498535156, + "logits/rejected": -1.9229265451431274, + "logps/chosen": -520.37255859375, + "logps/rejected": -689.4486083984375, + "loss": 0.484, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.614044189453125, + "rewards/margins": 1.3849103450775146, + "rewards/rejected": -3.9989547729492188, + "step": 9010 + }, + { + "epoch": 0.59, + "learning_rate": 2.1508421828492527e-06, + "logits/chosen": -1.9114001989364624, + "logits/rejected": -2.0205748081207275, + "logps/chosen": -548.467529296875, + "logps/rejected": -674.73779296875, + "loss": 0.5263, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2811312675476074, + "rewards/margins": 1.1583563089370728, + "rewards/rejected": -3.4394874572753906, + "step": 9020 + }, + { + "epoch": 0.59, + "learning_rate": 2.145189150536582e-06, + "logits/chosen": -1.9899876117706299, + "logits/rejected": -2.2059924602508545, + "logps/chosen": -585.5611572265625, + "logps/rejected": -647.5160522460938, + "loss": 0.5881, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4526724815368652, + "rewards/margins": 0.7929941415786743, + "rewards/rejected": -3.245666980743408, + "step": 9030 + }, + { + "epoch": 0.59, + "learning_rate": 2.139537969091107e-06, + "logits/chosen": -1.997889757156372, + "logits/rejected": -1.4408007860183716, + "logps/chosen": -589.4309692382812, + "logps/rejected": -603.2365112304688, + "loss": 0.661, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.2231433391571045, + "rewards/margins": 0.8774444460868835, + "rewards/rejected": -4.100587844848633, + "step": 9040 + }, + { + "epoch": 0.59, + "learning_rate": 2.1338886679921603e-06, + "logits/chosen": -2.291400194168091, + "logits/rejected": -1.6803371906280518, + "logps/chosen": -533.8601684570312, + "logps/rejected": -636.6980590820312, + "loss": 0.5748, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5815556049346924, + "rewards/margins": 0.851997971534729, + "rewards/rejected": -3.4335532188415527, + "step": 9050 + }, + { + "epoch": 0.59, + "learning_rate": 2.128241276709263e-06, + "logits/chosen": -2.2793195247650146, + "logits/rejected": -2.075969934463501, + "logps/chosen": -532.822265625, + "logps/rejected": -600.0733032226562, + "loss": 0.4538, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9714977741241455, + "rewards/margins": 1.158085823059082, + "rewards/rejected": -3.1295838356018066, + "step": 9060 + }, + { + "epoch": 0.59, + "learning_rate": 2.1225958247019746e-06, + "logits/chosen": -2.2013304233551025, + "logits/rejected": -1.8609364032745361, + "logps/chosen": -447.49896240234375, + "logps/rejected": -562.5231323242188, + "loss": 0.4513, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.9393889904022217, + "rewards/margins": 1.7084639072418213, + "rewards/rejected": -3.647852659225464, + "step": 9070 + }, + { + "epoch": 0.59, + "learning_rate": 2.1169523414197383e-06, + "logits/chosen": -2.081878662109375, + "logits/rejected": -1.931777000427246, + "logps/chosen": -563.480712890625, + "logps/rejected": -615.7371215820312, + "loss": 0.478, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9521706104278564, + "rewards/margins": 1.0098683834075928, + "rewards/rejected": -2.96203875541687, + "step": 9080 + }, + { + "epoch": 0.59, + "learning_rate": 2.1113108563017267e-06, + "logits/chosen": -2.106600046157837, + "logits/rejected": -1.7770551443099976, + "logps/chosen": -617.9705810546875, + "logps/rejected": -740.1712646484375, + "loss": 0.6053, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.3663182258605957, + "rewards/margins": 0.9891064763069153, + "rewards/rejected": -3.3554248809814453, + "step": 9090 + }, + { + "epoch": 0.6, + "learning_rate": 2.1056713987766905e-06, + "logits/chosen": -1.7630764245986938, + "logits/rejected": -1.5825989246368408, + "logps/chosen": -464.11767578125, + "logps/rejected": -558.0419921875, + "loss": 0.5286, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.2644290924072266, + "rewards/margins": 1.071807622909546, + "rewards/rejected": -4.336236476898193, + "step": 9100 + }, + { + "epoch": 0.6, + "learning_rate": 2.1000339982628022e-06, + "logits/chosen": -1.8941562175750732, + "logits/rejected": -1.6353633403778076, + "logps/chosen": -608.3696899414062, + "logps/rejected": -621.0068359375, + "loss": 0.6304, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.9169669151306152, + "rewards/margins": 0.7194188237190247, + "rewards/rejected": -3.636385679244995, + "step": 9110 + }, + { + "epoch": 0.6, + "learning_rate": 2.0943986841675043e-06, + "logits/chosen": -2.0865185260772705, + "logits/rejected": -2.3169753551483154, + "logps/chosen": -589.1450805664062, + "logps/rejected": -657.9476318359375, + "loss": 0.6209, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.759594440460205, + "rewards/margins": 0.7825464010238647, + "rewards/rejected": -3.5421414375305176, + "step": 9120 + }, + { + "epoch": 0.6, + "learning_rate": 2.088765485887356e-06, + "logits/chosen": -2.305544376373291, + "logits/rejected": -1.736236333847046, + "logps/chosen": -502.2201232910156, + "logps/rejected": -603.9275512695312, + "loss": 0.3673, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.0719895362854004, + "rewards/margins": 1.5007104873657227, + "rewards/rejected": -3.572699785232544, + "step": 9130 + }, + { + "epoch": 0.6, + "learning_rate": 2.083134432807879e-06, + "logits/chosen": -2.0251638889312744, + "logits/rejected": -2.3476920127868652, + "logps/chosen": -565.7601318359375, + "logps/rejected": -609.1180419921875, + "loss": 0.6335, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.030787706375122, + "rewards/margins": 1.0215563774108887, + "rewards/rejected": -3.0523440837860107, + "step": 9140 + }, + { + "epoch": 0.6, + "learning_rate": 2.077505554303404e-06, + "logits/chosen": -2.367225408554077, + "logits/rejected": -1.573440432548523, + "logps/chosen": -550.5197143554688, + "logps/rejected": -619.6304931640625, + "loss": 0.483, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.22603702545166, + "rewards/margins": 1.6922342777252197, + "rewards/rejected": -3.9182708263397217, + "step": 9150 + }, + { + "epoch": 0.6, + "learning_rate": 2.071878879736918e-06, + "logits/chosen": -2.223241090774536, + "logits/rejected": -1.6001701354980469, + "logps/chosen": -615.3475952148438, + "logps/rejected": -580.7982177734375, + "loss": 0.7404, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.9252007007598877, + "rewards/margins": 0.4547927975654602, + "rewards/rejected": -3.379993438720703, + "step": 9160 + }, + { + "epoch": 0.6, + "learning_rate": 2.0662544384599136e-06, + "logits/chosen": -1.8714954853057861, + "logits/rejected": -1.802222490310669, + "logps/chosen": -539.7855224609375, + "logps/rejected": -655.821533203125, + "loss": 0.4154, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3042709827423096, + "rewards/margins": 1.7545344829559326, + "rewards/rejected": -4.058804988861084, + "step": 9170 + }, + { + "epoch": 0.6, + "learning_rate": 2.0606322598122314e-06, + "logits/chosen": -2.063321828842163, + "logits/rejected": -2.150560140609741, + "logps/chosen": -515.9948120117188, + "logps/rejected": -631.8154907226562, + "loss": 0.5244, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2739298343658447, + "rewards/margins": 1.2714461088180542, + "rewards/rejected": -3.5453758239746094, + "step": 9180 + }, + { + "epoch": 0.6, + "learning_rate": 2.0550123731219085e-06, + "logits/chosen": -2.152060031890869, + "logits/rejected": -1.9297168254852295, + "logps/chosen": -564.4265747070312, + "logps/rejected": -679.7371826171875, + "loss": 0.645, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.527137279510498, + "rewards/margins": 1.0509201288223267, + "rewards/rejected": -3.5780577659606934, + "step": 9190 + }, + { + "epoch": 0.6, + "learning_rate": 2.0493948077050267e-06, + "logits/chosen": -2.3841280937194824, + "logits/rejected": -1.8136011362075806, + "logps/chosen": -605.9197387695312, + "logps/rejected": -682.6570434570312, + "loss": 0.638, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.816260576248169, + "rewards/margins": 1.0166782140731812, + "rewards/rejected": -3.8329384326934814, + "step": 9200 + }, + { + "epoch": 0.6, + "learning_rate": 2.0437795928655596e-06, + "logits/chosen": -1.8025420904159546, + "logits/rejected": -1.828176736831665, + "logps/chosen": -525.0386352539062, + "logps/rejected": -610.8030395507812, + "loss": 0.577, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.5464606285095215, + "rewards/margins": 0.6083902716636658, + "rewards/rejected": -3.154850959777832, + "step": 9210 + }, + { + "epoch": 0.6, + "learning_rate": 2.0381667578952184e-06, + "logits/chosen": -1.8286422491073608, + "logits/rejected": -1.7795965671539307, + "logps/chosen": -505.29742431640625, + "logps/rejected": -559.3614501953125, + "loss": 0.6331, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -3.038729667663574, + "rewards/margins": 0.616546094417572, + "rewards/rejected": -3.65527606010437, + "step": 9220 + }, + { + "epoch": 0.6, + "learning_rate": 2.0325563320732995e-06, + "logits/chosen": -2.0447893142700195, + "logits/rejected": -1.742436408996582, + "logps/chosen": -467.5716247558594, + "logps/rejected": -578.86181640625, + "loss": 0.5913, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6618003845214844, + "rewards/margins": 0.9829275012016296, + "rewards/rejected": -3.6447277069091797, + "step": 9230 + }, + { + "epoch": 0.6, + "learning_rate": 2.026948344666532e-06, + "logits/chosen": -2.147611379623413, + "logits/rejected": -1.6617320775985718, + "logps/chosen": -566.64453125, + "logps/rejected": -610.9962768554688, + "loss": 0.4258, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.365844488143921, + "rewards/margins": 1.2829093933105469, + "rewards/rejected": -3.6487536430358887, + "step": 9240 + }, + { + "epoch": 0.61, + "learning_rate": 2.0213428249289257e-06, + "logits/chosen": -2.0575060844421387, + "logits/rejected": -1.766715407371521, + "logps/chosen": -520.9926147460938, + "logps/rejected": -624.228515625, + "loss": 0.5106, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6971800327301025, + "rewards/margins": 0.8146166801452637, + "rewards/rejected": -3.5117969512939453, + "step": 9250 + }, + { + "epoch": 0.61, + "learning_rate": 2.0157398021016175e-06, + "logits/chosen": -2.0321450233459473, + "logits/rejected": -1.6661357879638672, + "logps/chosen": -520.1958618164062, + "logps/rejected": -647.04052734375, + "loss": 0.5692, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.661426067352295, + "rewards/margins": 0.8946744799613953, + "rewards/rejected": -3.556100368499756, + "step": 9260 + }, + { + "epoch": 0.61, + "learning_rate": 2.010139305412719e-06, + "logits/chosen": -1.6539812088012695, + "logits/rejected": -1.6137405633926392, + "logps/chosen": -604.7357177734375, + "logps/rejected": -658.6666870117188, + "loss": 0.5299, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.1034462451934814, + "rewards/margins": 1.02749502658844, + "rewards/rejected": -4.130940914154053, + "step": 9270 + }, + { + "epoch": 0.61, + "learning_rate": 2.0045413640771644e-06, + "logits/chosen": -1.9617812633514404, + "logits/rejected": -1.632447600364685, + "logps/chosen": -478.29766845703125, + "logps/rejected": -611.6834716796875, + "loss": 0.56, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4144785404205322, + "rewards/margins": 1.2469799518585205, + "rewards/rejected": -3.6614582538604736, + "step": 9280 + }, + { + "epoch": 0.61, + "learning_rate": 1.998946007296558e-06, + "logits/chosen": -2.048708915710449, + "logits/rejected": -1.7788597345352173, + "logps/chosen": -515.7967529296875, + "logps/rejected": -620.9700317382812, + "loss": 0.5435, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.980874538421631, + "rewards/margins": 0.6445003151893616, + "rewards/rejected": -3.6253750324249268, + "step": 9290 + }, + { + "epoch": 0.61, + "learning_rate": 1.9933532642590215e-06, + "logits/chosen": -1.670326590538025, + "logits/rejected": -0.9932015538215637, + "logps/chosen": -559.5738525390625, + "logps/rejected": -637.4277954101562, + "loss": 0.5583, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.1597964763641357, + "rewards/margins": 0.7928120493888855, + "rewards/rejected": -3.952608585357666, + "step": 9300 + }, + { + "epoch": 0.61, + "learning_rate": 1.987763164139042e-06, + "logits/chosen": -1.5591435432434082, + "logits/rejected": -1.7700544595718384, + "logps/chosen": -441.77294921875, + "logps/rejected": -577.7332153320312, + "loss": 0.4779, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2711105346679688, + "rewards/margins": 1.0710574388504028, + "rewards/rejected": -3.342167615890503, + "step": 9310 + }, + { + "epoch": 0.61, + "learning_rate": 1.982175736097321e-06, + "logits/chosen": -1.8563480377197266, + "logits/rejected": -1.8127315044403076, + "logps/chosen": -562.796875, + "logps/rejected": -550.6398315429688, + "loss": 0.6156, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2904465198516846, + "rewards/margins": 0.5200685262680054, + "rewards/rejected": -2.8105149269104004, + "step": 9320 + }, + { + "epoch": 0.61, + "learning_rate": 1.9765910092806196e-06, + "logits/chosen": -1.920570731163025, + "logits/rejected": -1.916369080543518, + "logps/chosen": -608.796875, + "logps/rejected": -609.5429077148438, + "loss": 0.5016, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.900730848312378, + "rewards/margins": 0.667815625667572, + "rewards/rejected": -3.568546772003174, + "step": 9330 + }, + { + "epoch": 0.61, + "learning_rate": 1.9710090128216083e-06, + "logits/chosen": -2.0497214794158936, + "logits/rejected": -1.7055015563964844, + "logps/chosen": -491.4339904785156, + "logps/rejected": -572.2652587890625, + "loss": 0.6743, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.984422206878662, + "rewards/margins": 0.7262072563171387, + "rewards/rejected": -3.710629940032959, + "step": 9340 + }, + { + "epoch": 0.61, + "learning_rate": 1.9654297758387155e-06, + "logits/chosen": -2.1629855632781982, + "logits/rejected": -2.0053813457489014, + "logps/chosen": -576.7559814453125, + "logps/rejected": -723.0887451171875, + "loss": 0.469, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.744321346282959, + "rewards/margins": 1.1882708072662354, + "rewards/rejected": -3.9325923919677734, + "step": 9350 + }, + { + "epoch": 0.61, + "learning_rate": 1.9598533274359736e-06, + "logits/chosen": -2.2159430980682373, + "logits/rejected": -1.965995192527771, + "logps/chosen": -522.80419921875, + "logps/rejected": -573.6424560546875, + "loss": 0.553, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6606602668762207, + "rewards/margins": 0.6701160073280334, + "rewards/rejected": -3.330775737762451, + "step": 9360 + }, + { + "epoch": 0.61, + "learning_rate": 1.9542796967028697e-06, + "logits/chosen": -2.061631441116333, + "logits/rejected": -1.8505007028579712, + "logps/chosen": -628.1642456054688, + "logps/rejected": -639.5188598632812, + "loss": 0.6624, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.1050140857696533, + "rewards/margins": 1.1158678531646729, + "rewards/rejected": -3.220881700515747, + "step": 9370 + }, + { + "epoch": 0.61, + "learning_rate": 1.948708912714192e-06, + "logits/chosen": -1.7678762674331665, + "logits/rejected": -1.9105870723724365, + "logps/chosen": -540.94091796875, + "logps/rejected": -547.921142578125, + "loss": 0.6858, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.675358295440674, + "rewards/margins": 0.9142533540725708, + "rewards/rejected": -3.589611768722534, + "step": 9380 + }, + { + "epoch": 0.61, + "learning_rate": 1.9431410045298786e-06, + "logits/chosen": -2.333967685699463, + "logits/rejected": -2.0058631896972656, + "logps/chosen": -558.5092163085938, + "logps/rejected": -702.185791015625, + "loss": 0.4988, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.4004080295562744, + "rewards/margins": 1.0809009075164795, + "rewards/rejected": -3.481309175491333, + "step": 9390 + }, + { + "epoch": 0.62, + "learning_rate": 1.9375760011948654e-06, + "logits/chosen": -2.1702075004577637, + "logits/rejected": -1.756667137145996, + "logps/chosen": -627.5929565429688, + "logps/rejected": -633.3089599609375, + "loss": 0.5063, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.946218729019165, + "rewards/margins": 0.907780647277832, + "rewards/rejected": -3.853998899459839, + "step": 9400 + }, + { + "epoch": 0.62, + "learning_rate": 1.932013931738937e-06, + "logits/chosen": -2.0589101314544678, + "logits/rejected": -1.8918129205703735, + "logps/chosen": -520.4500732421875, + "logps/rejected": -549.6915283203125, + "loss": 0.3908, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3042593002319336, + "rewards/margins": 0.8746353387832642, + "rewards/rejected": -3.178894519805908, + "step": 9410 + }, + { + "epoch": 0.62, + "learning_rate": 1.9264548251765717e-06, + "logits/chosen": -2.0337188243865967, + "logits/rejected": -1.747370958328247, + "logps/chosen": -567.445068359375, + "logps/rejected": -643.7906494140625, + "loss": 0.5603, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.6033923625946045, + "rewards/margins": 1.404759168624878, + "rewards/rejected": -4.008151531219482, + "step": 9420 + }, + { + "epoch": 0.62, + "learning_rate": 1.9208987105067924e-06, + "logits/chosen": -1.8125396966934204, + "logits/rejected": -1.8225065469741821, + "logps/chosen": -601.48876953125, + "logps/rejected": -971.3978271484375, + "loss": 0.4689, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6848011016845703, + "rewards/margins": 2.0545504093170166, + "rewards/rejected": -4.739351749420166, + "step": 9430 + }, + { + "epoch": 0.62, + "learning_rate": 1.9153456167130154e-06, + "logits/chosen": -2.2428884506225586, + "logits/rejected": -2.1689774990081787, + "logps/chosen": -542.6967163085938, + "logps/rejected": -516.2056884765625, + "loss": 0.5726, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.138211965560913, + "rewards/margins": 1.0557899475097656, + "rewards/rejected": -3.1940014362335205, + "step": 9440 + }, + { + "epoch": 0.62, + "learning_rate": 1.9097955727628975e-06, + "logits/chosen": -2.2795727252960205, + "logits/rejected": -2.303039073944092, + "logps/chosen": -602.3601684570312, + "logps/rejected": -527.6361083984375, + "loss": 0.6025, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -2.211545705795288, + "rewards/margins": 0.08048735558986664, + "rewards/rejected": -2.2920329570770264, + "step": 9450 + }, + { + "epoch": 0.62, + "learning_rate": 1.904248607608187e-06, + "logits/chosen": -2.111039400100708, + "logits/rejected": -1.5743972063064575, + "logps/chosen": -533.7442626953125, + "logps/rejected": -579.7639770507812, + "loss": 0.5136, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.898390531539917, + "rewards/margins": 0.8783761262893677, + "rewards/rejected": -3.776766300201416, + "step": 9460 + }, + { + "epoch": 0.62, + "learning_rate": 1.8987047501845714e-06, + "logits/chosen": -2.220080852508545, + "logits/rejected": -2.1884331703186035, + "logps/chosen": -537.1690673828125, + "logps/rejected": -652.2893676757812, + "loss": 0.5149, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.9553630352020264, + "rewards/margins": 0.9699887037277222, + "rewards/rejected": -2.925351619720459, + "step": 9470 + }, + { + "epoch": 0.62, + "learning_rate": 1.8931640294115267e-06, + "logits/chosen": -2.340770959854126, + "logits/rejected": -1.3254892826080322, + "logps/chosen": -550.0404052734375, + "logps/rejected": -653.9843139648438, + "loss": 0.4111, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.198329448699951, + "rewards/margins": 1.6379525661468506, + "rewards/rejected": -3.8362820148468018, + "step": 9480 + }, + { + "epoch": 0.62, + "learning_rate": 1.8876264741921662e-06, + "logits/chosen": -2.047311305999756, + "logits/rejected": -1.9576759338378906, + "logps/chosen": -566.8899536132812, + "logps/rejected": -596.0674438476562, + "loss": 0.7663, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.7725155353546143, + "rewards/margins": 0.497785747051239, + "rewards/rejected": -3.270301103591919, + "step": 9490 + }, + { + "epoch": 0.62, + "learning_rate": 1.8820921134130912e-06, + "logits/chosen": -2.081411600112915, + "logits/rejected": -1.9306904077529907, + "logps/chosen": -478.1241760253906, + "logps/rejected": -616.6199951171875, + "loss": 0.5073, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.968158483505249, + "rewards/margins": 1.5478565692901611, + "rewards/rejected": -3.516014814376831, + "step": 9500 + }, + { + "epoch": 0.62, + "learning_rate": 1.8765609759442378e-06, + "logits/chosen": -1.7763296365737915, + "logits/rejected": -1.8750178813934326, + "logps/chosen": -628.099609375, + "logps/rejected": -672.4976806640625, + "loss": 0.5003, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.796745777130127, + "rewards/margins": 0.9968738555908203, + "rewards/rejected": -3.7936196327209473, + "step": 9510 + }, + { + "epoch": 0.62, + "learning_rate": 1.8710330906387288e-06, + "logits/chosen": -1.9623371362686157, + "logits/rejected": -1.1978862285614014, + "logps/chosen": -513.8328857421875, + "logps/rejected": -598.662109375, + "loss": 0.5912, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.5714448690414429, + "rewards/margins": 1.6823393106460571, + "rewards/rejected": -3.2537841796875, + "step": 9520 + }, + { + "epoch": 0.62, + "learning_rate": 1.8655084863327222e-06, + "logits/chosen": -1.8784821033477783, + "logits/rejected": -1.9442542791366577, + "logps/chosen": -518.8257446289062, + "logps/rejected": -678.2208251953125, + "loss": 0.4694, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6659276485443115, + "rewards/margins": 0.6673368811607361, + "rewards/rejected": -3.3332645893096924, + "step": 9530 + }, + { + "epoch": 0.62, + "learning_rate": 1.8599871918452603e-06, + "logits/chosen": -1.886810302734375, + "logits/rejected": -2.0606863498687744, + "logps/chosen": -558.5216064453125, + "logps/rejected": -647.117919921875, + "loss": 0.4529, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2705154418945312, + "rewards/margins": 1.0462725162506104, + "rewards/rejected": -3.3167881965637207, + "step": 9540 + }, + { + "epoch": 0.62, + "learning_rate": 1.8544692359781192e-06, + "logits/chosen": -2.2128937244415283, + "logits/rejected": -2.2743053436279297, + "logps/chosen": -486.0628967285156, + "logps/rejected": -597.64599609375, + "loss": 0.5026, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.9931297302246094, + "rewards/margins": 1.2624160051345825, + "rewards/rejected": -3.2555458545684814, + "step": 9550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8489546475156602e-06, + "logits/chosen": -1.8360512256622314, + "logits/rejected": -1.9772145748138428, + "logps/chosen": -470.145263671875, + "logps/rejected": -592.4556884765625, + "loss": 0.5319, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.4911038875579834, + "rewards/margins": 0.7783809900283813, + "rewards/rejected": -3.2694849967956543, + "step": 9560 + }, + { + "epoch": 0.63, + "learning_rate": 1.8434434552246778e-06, + "logits/chosen": -1.7942237854003906, + "logits/rejected": -2.0650551319122314, + "logps/chosen": -463.28192138671875, + "logps/rejected": -603.6610717773438, + "loss": 0.4579, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.378836154937744, + "rewards/margins": 0.7164266705513, + "rewards/rejected": -3.0952630043029785, + "step": 9570 + }, + { + "epoch": 0.63, + "learning_rate": 1.837935687854251e-06, + "logits/chosen": -2.18629789352417, + "logits/rejected": -2.0240304470062256, + "logps/chosen": -573.1627197265625, + "logps/rejected": -728.3543701171875, + "loss": 0.4139, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.989349126815796, + "rewards/margins": 1.6226472854614258, + "rewards/rejected": -3.6119964122772217, + "step": 9580 + }, + { + "epoch": 0.63, + "learning_rate": 1.832431374135592e-06, + "logits/chosen": -1.8767648935317993, + "logits/rejected": -1.5892021656036377, + "logps/chosen": -476.7618103027344, + "logps/rejected": -605.6596069335938, + "loss": 0.474, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.086083173751831, + "rewards/margins": 1.416338562965393, + "rewards/rejected": -3.5024218559265137, + "step": 9590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8269305427818977e-06, + "logits/chosen": -2.020125150680542, + "logits/rejected": -2.155333995819092, + "logps/chosen": -516.5859985351562, + "logps/rejected": -632.4111938476562, + "loss": 0.4812, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.2657718658447266, + "rewards/margins": 1.0013291835784912, + "rewards/rejected": -3.267101287841797, + "step": 9600 + }, + { + "epoch": 0.63, + "learning_rate": 1.821433222488199e-06, + "logits/chosen": -2.1384525299072266, + "logits/rejected": -1.779611587524414, + "logps/chosen": -508.69366455078125, + "logps/rejected": -654.6775512695312, + "loss": 0.5838, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3500173091888428, + "rewards/margins": 1.4160211086273193, + "rewards/rejected": -3.766038417816162, + "step": 9610 + }, + { + "epoch": 0.63, + "learning_rate": 1.8159394419312112e-06, + "logits/chosen": -2.125851631164551, + "logits/rejected": -1.3714392185211182, + "logps/chosen": -589.6465454101562, + "logps/rejected": -636.9295043945312, + "loss": 0.3293, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6726527214050293, + "rewards/margins": 1.416653037071228, + "rewards/rejected": -4.089305877685547, + "step": 9620 + }, + { + "epoch": 0.63, + "learning_rate": 1.8104492297691845e-06, + "logits/chosen": -2.600778102874756, + "logits/rejected": -2.02959942817688, + "logps/chosen": -728.420166015625, + "logps/rejected": -682.1693725585938, + "loss": 0.5269, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0615074634552, + "rewards/margins": 1.0232912302017212, + "rewards/rejected": -3.084798574447632, + "step": 9630 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049626146417562e-06, + "logits/chosen": -1.8293802738189697, + "logits/rejected": -1.7707229852676392, + "logps/chosen": -459.9698791503906, + "logps/rejected": -655.4939575195312, + "loss": 0.5349, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5275912284851074, + "rewards/margins": 2.15377140045166, + "rewards/rejected": -4.681363105773926, + "step": 9640 + }, + { + "epoch": 0.63, + "learning_rate": 1.7994796251697983e-06, + "logits/chosen": -2.037261486053467, + "logits/rejected": -2.1613070964813232, + "logps/chosen": -496.0302734375, + "logps/rejected": -599.44189453125, + "loss": 0.5668, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.432239055633545, + "rewards/margins": 0.7840698957443237, + "rewards/rejected": -3.21630859375, + "step": 9650 + }, + { + "epoch": 0.63, + "learning_rate": 1.794000289955269e-06, + "logits/chosen": -1.9210374355316162, + "logits/rejected": -2.0788938999176025, + "logps/chosen": -522.751708984375, + "logps/rejected": -671.2202758789062, + "loss": 0.6424, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7306416034698486, + "rewards/margins": 0.8474380373954773, + "rewards/rejected": -3.57807993888855, + "step": 9660 + }, + { + "epoch": 0.63, + "learning_rate": 1.7885246375810646e-06, + "logits/chosen": -2.1408586502075195, + "logits/rejected": -2.3029913902282715, + "logps/chosen": -589.8209838867188, + "logps/rejected": -596.8001708984375, + "loss": 0.5855, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.9752256870269775, + "rewards/margins": 0.6550930738449097, + "rewards/rejected": -3.6303184032440186, + "step": 9670 + }, + { + "epoch": 0.63, + "learning_rate": 1.7830526966108713e-06, + "logits/chosen": -1.8824348449707031, + "logits/rejected": -1.7505286931991577, + "logps/chosen": -562.9407958984375, + "logps/rejected": -667.536865234375, + "loss": 0.3848, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7581934928894043, + "rewards/margins": 1.1899961233139038, + "rewards/rejected": -3.9481894969940186, + "step": 9680 + }, + { + "epoch": 0.63, + "learning_rate": 1.7775844955890129e-06, + "logits/chosen": -2.3579797744750977, + "logits/rejected": -1.6902892589569092, + "logps/chosen": -620.387451171875, + "logps/rejected": -675.849365234375, + "loss": 0.5617, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.790555715560913, + "rewards/margins": 1.1491975784301758, + "rewards/rejected": -3.939753770828247, + "step": 9690 + }, + { + "epoch": 0.63, + "learning_rate": 1.7721200630403046e-06, + "logits/chosen": -2.2002413272857666, + "logits/rejected": -2.145082950592041, + "logps/chosen": -572.17919921875, + "logps/rejected": -632.3953247070312, + "loss": 0.5239, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.124516725540161, + "rewards/margins": 1.1394758224487305, + "rewards/rejected": -3.2639923095703125, + "step": 9700 + }, + { + "epoch": 0.64, + "learning_rate": 1.7666594274699037e-06, + "logits/chosen": -1.9743484258651733, + "logits/rejected": -1.4204148054122925, + "logps/chosen": -544.8038330078125, + "logps/rejected": -578.5160522460938, + "loss": 0.4681, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.116889715194702, + "rewards/margins": 0.9321670532226562, + "rewards/rejected": -4.049056529998779, + "step": 9710 + }, + { + "epoch": 0.64, + "learning_rate": 1.76120261736316e-06, + "logits/chosen": -1.8082326650619507, + "logits/rejected": -1.49305260181427, + "logps/chosen": -551.1629638671875, + "logps/rejected": -637.9384155273438, + "loss": 0.7645, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.505378007888794, + "rewards/margins": 0.28692299127578735, + "rewards/rejected": -3.7923007011413574, + "step": 9720 + }, + { + "epoch": 0.64, + "learning_rate": 1.755749661185468e-06, + "logits/chosen": -2.0321710109710693, + "logits/rejected": -1.9102131128311157, + "logps/chosen": -550.4937133789062, + "logps/rejected": -652.8010864257812, + "loss": 0.4133, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.035109281539917, + "rewards/margins": 1.307720422744751, + "rewards/rejected": -3.342829942703247, + "step": 9730 + }, + { + "epoch": 0.64, + "learning_rate": 1.7503005873821183e-06, + "logits/chosen": -1.872251272201538, + "logits/rejected": -1.6513566970825195, + "logps/chosen": -566.5404663085938, + "logps/rejected": -628.6839599609375, + "loss": 0.5487, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.969191312789917, + "rewards/margins": 0.6317653656005859, + "rewards/rejected": -3.600956678390503, + "step": 9740 + }, + { + "epoch": 0.64, + "learning_rate": 1.744855424378148e-06, + "logits/chosen": -2.311882495880127, + "logits/rejected": -1.8144493103027344, + "logps/chosen": -566.9302368164062, + "logps/rejected": -618.9942626953125, + "loss": 0.5155, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.352794885635376, + "rewards/margins": 1.1586731672286987, + "rewards/rejected": -3.5114681720733643, + "step": 9750 + }, + { + "epoch": 0.64, + "learning_rate": 1.7394142005781973e-06, + "logits/chosen": -2.0415024757385254, + "logits/rejected": -1.596631646156311, + "logps/chosen": -515.461669921875, + "logps/rejected": -670.7274169921875, + "loss": 0.5212, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.803544521331787, + "rewards/margins": 1.0397040843963623, + "rewards/rejected": -3.8432490825653076, + "step": 9760 + }, + { + "epoch": 0.64, + "learning_rate": 1.7339769443663528e-06, + "logits/chosen": -2.1776394844055176, + "logits/rejected": -2.0415406227111816, + "logps/chosen": -502.97320556640625, + "logps/rejected": -607.7816772460938, + "loss": 0.3718, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.1641182899475098, + "rewards/margins": 1.462161660194397, + "rewards/rejected": -3.626279830932617, + "step": 9770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7285436841060078e-06, + "logits/chosen": -2.3086421489715576, + "logits/rejected": -2.1996123790740967, + "logps/chosen": -667.8643798828125, + "logps/rejected": -674.9031982421875, + "loss": 0.5214, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.371945858001709, + "rewards/margins": 0.7595013380050659, + "rewards/rejected": -3.1314473152160645, + "step": 9780 + }, + { + "epoch": 0.64, + "learning_rate": 1.7231144481397083e-06, + "logits/chosen": -2.240018129348755, + "logits/rejected": -1.5356919765472412, + "logps/chosen": -632.2506103515625, + "logps/rejected": -654.18994140625, + "loss": 0.5387, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.7113661766052246, + "rewards/margins": 0.6184327006340027, + "rewards/rejected": -3.329799175262451, + "step": 9790 + }, + { + "epoch": 0.64, + "learning_rate": 1.7176892647890092e-06, + "logits/chosen": -2.3555355072021484, + "logits/rejected": -1.7661718130111694, + "logps/chosen": -566.7734985351562, + "logps/rejected": -609.5919189453125, + "loss": 0.5801, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.8732264041900635, + "rewards/margins": 1.266191840171814, + "rewards/rejected": -3.139418125152588, + "step": 9800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7122681623543239e-06, + "logits/chosen": -2.204519748687744, + "logits/rejected": -1.31349778175354, + "logps/chosen": -583.7943115234375, + "logps/rejected": -637.7953491210938, + "loss": 0.6432, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.27911376953125, + "rewards/margins": 1.301857590675354, + "rewards/rejected": -4.5809712409973145, + "step": 9810 + }, + { + "epoch": 0.64, + "learning_rate": 1.7068511691147788e-06, + "logits/chosen": -2.087271213531494, + "logits/rejected": -1.6732345819473267, + "logps/chosen": -505.6923828125, + "logps/rejected": -655.382568359375, + "loss": 0.4241, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.321934461593628, + "rewards/margins": 1.5012768507003784, + "rewards/rejected": -3.823211669921875, + "step": 9820 + }, + { + "epoch": 0.64, + "learning_rate": 1.7014383133280636e-06, + "logits/chosen": -1.8397657871246338, + "logits/rejected": -1.8332821130752563, + "logps/chosen": -583.0677490234375, + "logps/rejected": -586.8565673828125, + "loss": 0.5852, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.8137660026550293, + "rewards/margins": 0.7917875051498413, + "rewards/rejected": -3.605553388595581, + "step": 9830 + }, + { + "epoch": 0.64, + "learning_rate": 1.696029623230286e-06, + "logits/chosen": -1.8087882995605469, + "logits/rejected": -2.0043318271636963, + "logps/chosen": -473.6741638183594, + "logps/rejected": -636.8434448242188, + "loss": 0.4475, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.895564556121826, + "rewards/margins": 1.1002885103225708, + "rewards/rejected": -3.995853900909424, + "step": 9840 + }, + { + "epoch": 0.64, + "learning_rate": 1.6906251270358229e-06, + "logits/chosen": -2.3192977905273438, + "logits/rejected": -1.9405720233917236, + "logps/chosen": -591.8472900390625, + "logps/rejected": -636.525146484375, + "loss": 0.5559, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.4123001098632812, + "rewards/margins": 1.067739486694336, + "rewards/rejected": -3.480039119720459, + "step": 9850 + }, + { + "epoch": 0.65, + "learning_rate": 1.685224852937174e-06, + "logits/chosen": -1.7449123859405518, + "logits/rejected": -1.7458549737930298, + "logps/chosen": -437.24456787109375, + "logps/rejected": -725.3290405273438, + "loss": 0.4406, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4435203075408936, + "rewards/margins": 1.7831284999847412, + "rewards/rejected": -4.226648807525635, + "step": 9860 + }, + { + "epoch": 0.65, + "learning_rate": 1.6798288291048136e-06, + "logits/chosen": -2.501311779022217, + "logits/rejected": -2.0939345359802246, + "logps/chosen": -609.2479858398438, + "logps/rejected": -655.9998779296875, + "loss": 0.5679, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.2404470443725586, + "rewards/margins": 1.110357642173767, + "rewards/rejected": -3.350804567337036, + "step": 9870 + }, + { + "epoch": 0.65, + "learning_rate": 1.6744370836870466e-06, + "logits/chosen": -2.0165457725524902, + "logits/rejected": -2.0733141899108887, + "logps/chosen": -537.8299560546875, + "logps/rejected": -608.09765625, + "loss": 0.4918, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3343470096588135, + "rewards/margins": 1.3215583562850952, + "rewards/rejected": -3.6559054851531982, + "step": 9880 + }, + { + "epoch": 0.65, + "learning_rate": 1.6690496448098576e-06, + "logits/chosen": -1.6395372152328491, + "logits/rejected": -1.5579493045806885, + "logps/chosen": -574.619140625, + "logps/rejected": -686.0782470703125, + "loss": 0.5562, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.0321133136749268, + "rewards/margins": 1.00796377658844, + "rewards/rejected": -4.040077209472656, + "step": 9890 + }, + { + "epoch": 0.65, + "learning_rate": 1.6636665405767666e-06, + "logits/chosen": -2.1289894580841064, + "logits/rejected": -1.7371848821640015, + "logps/chosen": -671.19921875, + "logps/rejected": -714.276611328125, + "loss": 0.6902, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.9874866008758545, + "rewards/margins": 0.4317949712276459, + "rewards/rejected": -3.4192817211151123, + "step": 9900 + }, + { + "epoch": 0.65, + "learning_rate": 1.6582877990686827e-06, + "logits/chosen": -2.2885613441467285, + "logits/rejected": -1.443866491317749, + "logps/chosen": -533.60400390625, + "logps/rejected": -611.7394409179688, + "loss": 0.438, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6467955112457275, + "rewards/margins": 1.2188901901245117, + "rewards/rejected": -3.8656857013702393, + "step": 9910 + }, + { + "epoch": 0.65, + "learning_rate": 1.6529134483437562e-06, + "logits/chosen": -1.6534805297851562, + "logits/rejected": -1.2551298141479492, + "logps/chosen": -536.8887939453125, + "logps/rejected": -668.57275390625, + "loss": 0.5066, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.161116123199463, + "rewards/margins": 1.3622961044311523, + "rewards/rejected": -4.523411750793457, + "step": 9920 + }, + { + "epoch": 0.65, + "learning_rate": 1.647543516437233e-06, + "logits/chosen": -1.8923059701919556, + "logits/rejected": -1.8489351272583008, + "logps/chosen": -535.9762573242188, + "logps/rejected": -653.535400390625, + "loss": 0.5691, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6559455394744873, + "rewards/margins": 1.115609049797058, + "rewards/rejected": -3.771554470062256, + "step": 9930 + }, + { + "epoch": 0.65, + "learning_rate": 1.6421780313613088e-06, + "logits/chosen": -1.6177890300750732, + "logits/rejected": -1.644838571548462, + "logps/chosen": -591.5128784179688, + "logps/rejected": -762.8223876953125, + "loss": 0.4213, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.9691004753112793, + "rewards/margins": 1.322816014289856, + "rewards/rejected": -4.291916370391846, + "step": 9940 + }, + { + "epoch": 0.65, + "learning_rate": 1.6368170211049816e-06, + "logits/chosen": -2.2172207832336426, + "logits/rejected": -1.9700168371200562, + "logps/chosen": -635.7975463867188, + "logps/rejected": -708.9931030273438, + "loss": 0.4309, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.1618759632110596, + "rewards/margins": 1.135585069656372, + "rewards/rejected": -4.297461032867432, + "step": 9950 + }, + { + "epoch": 0.65, + "learning_rate": 1.6314605136339074e-06, + "logits/chosen": -1.5088202953338623, + "logits/rejected": -1.7007286548614502, + "logps/chosen": -557.1663818359375, + "logps/rejected": -690.3626708984375, + "loss": 0.5372, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.117827892303467, + "rewards/margins": 1.187142014503479, + "rewards/rejected": -4.304969787597656, + "step": 9960 + }, + { + "epoch": 0.65, + "learning_rate": 1.6261085368902526e-06, + "logits/chosen": -1.6839511394500732, + "logits/rejected": -1.834540605545044, + "logps/chosen": -605.2606811523438, + "logps/rejected": -610.1304931640625, + "loss": 0.4778, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.9750895500183105, + "rewards/margins": 1.0680017471313477, + "rewards/rejected": -4.043091297149658, + "step": 9970 + }, + { + "epoch": 0.65, + "learning_rate": 1.6207611187925503e-06, + "logits/chosen": -1.7885392904281616, + "logits/rejected": -1.3947422504425049, + "logps/chosen": -523.8977661132812, + "logps/rejected": -595.2725830078125, + "loss": 0.4977, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.456044912338257, + "rewards/margins": 1.3109487295150757, + "rewards/rejected": -3.766993761062622, + "step": 9980 + }, + { + "epoch": 0.65, + "learning_rate": 1.6154182872355512e-06, + "logits/chosen": -2.070439100265503, + "logits/rejected": -1.7619030475616455, + "logps/chosen": -579.6802978515625, + "logps/rejected": -636.4152221679688, + "loss": 0.5686, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.139418601989746, + "rewards/margins": 1.029687523841858, + "rewards/rejected": -4.169106483459473, + "step": 9990 + }, + { + "epoch": 0.65, + "learning_rate": 1.610080070090084e-06, + "logits/chosen": -2.3029110431671143, + "logits/rejected": -2.0725982189178467, + "logps/chosen": -669.8273315429688, + "logps/rejected": -659.200439453125, + "loss": 0.6379, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.107666492462158, + "rewards/margins": 1.127187728881836, + "rewards/rejected": -3.234853744506836, + "step": 10000 + }, + { + "epoch": 0.65, + "learning_rate": 1.6047464952029034e-06, + "logits/chosen": -2.423081636428833, + "logits/rejected": -1.1933789253234863, + "logps/chosen": -524.700439453125, + "logps/rejected": -526.11376953125, + "loss": 0.4875, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.9380455017089844, + "rewards/margins": 0.9216144680976868, + "rewards/rejected": -3.8596603870391846, + "step": 10010 + }, + { + "epoch": 0.66, + "learning_rate": 1.5994175903965486e-06, + "logits/chosen": -2.223822832107544, + "logits/rejected": -1.7636470794677734, + "logps/chosen": -618.0186767578125, + "logps/rejected": -664.9786376953125, + "loss": 0.5231, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.1082825660705566, + "rewards/margins": 0.6293025016784668, + "rewards/rejected": -3.7375850677490234, + "step": 10020 + }, + { + "epoch": 0.66, + "learning_rate": 1.5940933834691977e-06, + "logits/chosen": -2.235905408859253, + "logits/rejected": -1.820980429649353, + "logps/chosen": -634.0506591796875, + "logps/rejected": -723.7987060546875, + "loss": 0.3591, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5085694789886475, + "rewards/margins": 1.4282045364379883, + "rewards/rejected": -3.9367740154266357, + "step": 10030 + }, + { + "epoch": 0.66, + "learning_rate": 1.588773902194522e-06, + "logits/chosen": -2.003666400909424, + "logits/rejected": -1.8740298748016357, + "logps/chosen": -648.436767578125, + "logps/rejected": -629.9469604492188, + "loss": 0.6544, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.544236421585083, + "rewards/margins": 0.5117180943489075, + "rewards/rejected": -4.055954933166504, + "step": 10040 + }, + { + "epoch": 0.66, + "learning_rate": 1.583459174321541e-06, + "logits/chosen": -2.2216591835021973, + "logits/rejected": -1.599656343460083, + "logps/chosen": -520.3749389648438, + "logps/rejected": -669.2892456054688, + "loss": 0.6699, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.5063908100128174, + "rewards/margins": 1.1887396574020386, + "rewards/rejected": -3.6951301097869873, + "step": 10050 + }, + { + "epoch": 0.66, + "learning_rate": 1.5781492275744797e-06, + "logits/chosen": -1.8967546224594116, + "logits/rejected": -2.1360228061676025, + "logps/chosen": -500.9303283691406, + "logps/rejected": -546.4171142578125, + "loss": 0.7569, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.699618101119995, + "rewards/margins": 0.2735558748245239, + "rewards/rejected": -2.9731738567352295, + "step": 10060 + }, + { + "epoch": 0.66, + "learning_rate": 1.5728440896526215e-06, + "logits/chosen": -2.0227789878845215, + "logits/rejected": -2.035097599029541, + "logps/chosen": -556.853271484375, + "logps/rejected": -697.7138671875, + "loss": 0.5068, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4388175010681152, + "rewards/margins": 1.3052314519882202, + "rewards/rejected": -3.744049072265625, + "step": 10070 + }, + { + "epoch": 0.66, + "learning_rate": 1.5675437882301633e-06, + "logits/chosen": -1.372617483139038, + "logits/rejected": -1.4249074459075928, + "logps/chosen": -545.9317016601562, + "logps/rejected": -718.7574462890625, + "loss": 0.7194, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.4789633750915527, + "rewards/margins": 1.4255876541137695, + "rewards/rejected": -4.9045515060424805, + "step": 10080 + }, + { + "epoch": 0.66, + "learning_rate": 1.5622483509560748e-06, + "logits/chosen": -2.2033467292785645, + "logits/rejected": -2.130833148956299, + "logps/chosen": -560.2772216796875, + "logps/rejected": -674.1600952148438, + "loss": 0.3568, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.332453489303589, + "rewards/margins": 1.278029441833496, + "rewards/rejected": -3.610483169555664, + "step": 10090 + }, + { + "epoch": 0.66, + "learning_rate": 1.5569578054539506e-06, + "logits/chosen": -1.7471822500228882, + "logits/rejected": -1.7679237127304077, + "logps/chosen": -570.5139770507812, + "logps/rejected": -669.4573364257812, + "loss": 0.614, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.061995506286621, + "rewards/margins": 0.6868606805801392, + "rewards/rejected": -3.7488560676574707, + "step": 10100 + }, + { + "epoch": 0.66, + "learning_rate": 1.551672179321867e-06, + "logits/chosen": -2.377981662750244, + "logits/rejected": -2.1207613945007324, + "logps/chosen": -584.4964599609375, + "logps/rejected": -615.555419921875, + "loss": 0.6713, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.39784574508667, + "rewards/margins": 0.9840338826179504, + "rewards/rejected": -3.3818798065185547, + "step": 10110 + }, + { + "epoch": 0.66, + "learning_rate": 1.5463915001322398e-06, + "logits/chosen": -2.314440965652466, + "logits/rejected": -1.6883175373077393, + "logps/chosen": -631.7022094726562, + "logps/rejected": -669.0033569335938, + "loss": 0.6365, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.81516695022583, + "rewards/margins": 0.9370850324630737, + "rewards/rejected": -3.7522521018981934, + "step": 10120 + }, + { + "epoch": 0.66, + "learning_rate": 1.5411157954316784e-06, + "logits/chosen": -2.0021495819091797, + "logits/rejected": -1.4604111909866333, + "logps/chosen": -496.84881591796875, + "logps/rejected": -670.7314453125, + "loss": 0.4258, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.8273229598999023, + "rewards/margins": 1.669306993484497, + "rewards/rejected": -4.4966301918029785, + "step": 10130 + }, + { + "epoch": 0.66, + "learning_rate": 1.535845092740843e-06, + "logits/chosen": -2.4046027660369873, + "logits/rejected": -2.195103406906128, + "logps/chosen": -662.22705078125, + "logps/rejected": -719.7885131835938, + "loss": 0.481, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.198734760284424, + "rewards/margins": 1.1839425563812256, + "rewards/rejected": -3.3826770782470703, + "step": 10140 + }, + { + "epoch": 0.66, + "learning_rate": 1.5305794195543005e-06, + "logits/chosen": -1.5913124084472656, + "logits/rejected": -1.6547712087631226, + "logps/chosen": -525.8160400390625, + "logps/rejected": -589.795166015625, + "loss": 0.602, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.008615732192993, + "rewards/margins": 0.8577588200569153, + "rewards/rejected": -3.8663744926452637, + "step": 10150 + }, + { + "epoch": 0.66, + "learning_rate": 1.5253188033403816e-06, + "logits/chosen": -2.0906519889831543, + "logits/rejected": -1.8118690252304077, + "logps/chosen": -604.877197265625, + "logps/rejected": -674.4083251953125, + "loss": 0.6464, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0363845825195312, + "rewards/margins": 0.5893286466598511, + "rewards/rejected": -3.6257128715515137, + "step": 10160 + }, + { + "epoch": 0.67, + "learning_rate": 1.520063271541037e-06, + "logits/chosen": -1.967552900314331, + "logits/rejected": -1.936489462852478, + "logps/chosen": -548.0531005859375, + "logps/rejected": -648.8834838867188, + "loss": 0.5697, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.128791332244873, + "rewards/margins": 1.1066060066223145, + "rewards/rejected": -4.235397815704346, + "step": 10170 + }, + { + "epoch": 0.67, + "learning_rate": 1.5148128515716954e-06, + "logits/chosen": -2.1086018085479736, + "logits/rejected": -1.317168116569519, + "logps/chosen": -588.2862548828125, + "logps/rejected": -667.6849975585938, + "loss": 0.5786, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.9626882076263428, + "rewards/margins": 1.2267658710479736, + "rewards/rejected": -4.189453601837158, + "step": 10180 + }, + { + "epoch": 0.67, + "learning_rate": 1.5095675708211197e-06, + "logits/chosen": -1.7038860321044922, + "logits/rejected": -1.7373549938201904, + "logps/chosen": -510.05029296875, + "logps/rejected": -598.3043212890625, + "loss": 0.7414, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.886688232421875, + "rewards/margins": 0.5040321350097656, + "rewards/rejected": -3.3907203674316406, + "step": 10190 + }, + { + "epoch": 0.67, + "learning_rate": 1.504327456651263e-06, + "logits/chosen": -2.347780704498291, + "logits/rejected": -2.138662815093994, + "logps/chosen": -569.8806762695312, + "logps/rejected": -697.031494140625, + "loss": 0.3543, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3554959297180176, + "rewards/margins": 1.7921050786972046, + "rewards/rejected": -4.147601127624512, + "step": 10200 + }, + { + "epoch": 0.67, + "learning_rate": 1.4990925363971284e-06, + "logits/chosen": -1.9000709056854248, + "logits/rejected": -1.8390512466430664, + "logps/chosen": -650.6390991210938, + "logps/rejected": -682.9108276367188, + "loss": 0.5153, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.575188159942627, + "rewards/margins": 0.5928482413291931, + "rewards/rejected": -4.168036460876465, + "step": 10210 + }, + { + "epoch": 0.67, + "learning_rate": 1.4938628373666236e-06, + "logits/chosen": -2.047325849533081, + "logits/rejected": -2.0585241317749023, + "logps/chosen": -555.3873291015625, + "logps/rejected": -583.078369140625, + "loss": 0.5613, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.083057403564453, + "rewards/margins": 0.7428995966911316, + "rewards/rejected": -3.8259568214416504, + "step": 10220 + }, + { + "epoch": 0.67, + "learning_rate": 1.4886383868404203e-06, + "logits/chosen": -1.6983534097671509, + "logits/rejected": -1.745357871055603, + "logps/chosen": -581.9317016601562, + "logps/rejected": -669.6945190429688, + "loss": 0.6381, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.7163448333740234, + "rewards/margins": 0.955405056476593, + "rewards/rejected": -4.671749591827393, + "step": 10230 + }, + { + "epoch": 0.67, + "learning_rate": 1.483419212071813e-06, + "logits/chosen": -2.0277183055877686, + "logits/rejected": -2.0014185905456543, + "logps/chosen": -546.6410522460938, + "logps/rejected": -748.728515625, + "loss": 0.4608, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4395718574523926, + "rewards/margins": 1.5673308372497559, + "rewards/rejected": -4.006903171539307, + "step": 10240 + }, + { + "epoch": 0.67, + "learning_rate": 1.478205340286573e-06, + "logits/chosen": -2.0982167720794678, + "logits/rejected": -2.0167489051818848, + "logps/chosen": -541.8618774414062, + "logps/rejected": -570.8551025390625, + "loss": 0.5548, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.229513645172119, + "rewards/margins": 0.7931705713272095, + "rewards/rejected": -3.02268385887146, + "step": 10250 + }, + { + "epoch": 0.67, + "learning_rate": 1.4729967986828104e-06, + "logits/chosen": -2.021984815597534, + "logits/rejected": -2.096220016479492, + "logps/chosen": -601.5169677734375, + "logps/rejected": -682.0748291015625, + "loss": 0.597, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8182146549224854, + "rewards/margins": 1.1306148767471313, + "rewards/rejected": -3.948829174041748, + "step": 10260 + }, + { + "epoch": 0.67, + "learning_rate": 1.4677936144308286e-06, + "logits/chosen": -2.0748703479766846, + "logits/rejected": -2.051962375640869, + "logps/chosen": -708.6862182617188, + "logps/rejected": -712.1474609375, + "loss": 0.6064, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.689055919647217, + "rewards/margins": 0.6155595779418945, + "rewards/rejected": -3.3046157360076904, + "step": 10270 + }, + { + "epoch": 0.67, + "learning_rate": 1.4625958146729864e-06, + "logits/chosen": -2.340296983718872, + "logits/rejected": -1.7309805154800415, + "logps/chosen": -576.7568969726562, + "logps/rejected": -639.8253173828125, + "loss": 0.4493, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4062986373901367, + "rewards/margins": 1.3249199390411377, + "rewards/rejected": -3.7312183380126953, + "step": 10280 + }, + { + "epoch": 0.67, + "learning_rate": 1.4574034265235523e-06, + "logits/chosen": -2.414374828338623, + "logits/rejected": -2.1356239318847656, + "logps/chosen": -620.1607666015625, + "logps/rejected": -600.0708618164062, + "loss": 0.6447, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.010559320449829, + "rewards/margins": 0.8874303698539734, + "rewards/rejected": -2.897989511489868, + "step": 10290 + }, + { + "epoch": 0.67, + "learning_rate": 1.452216477068568e-06, + "logits/chosen": -1.9850013256072998, + "logits/rejected": -1.7580883502960205, + "logps/chosen": -517.3007202148438, + "logps/rejected": -688.1248779296875, + "loss": 0.5061, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.390328884124756, + "rewards/margins": 1.4543589353561401, + "rewards/rejected": -3.8446884155273438, + "step": 10300 + }, + { + "epoch": 0.67, + "learning_rate": 1.4470349933657004e-06, + "logits/chosen": -2.2277064323425293, + "logits/rejected": -2.022275447845459, + "logps/chosen": -511.62371826171875, + "logps/rejected": -570.8912353515625, + "loss": 0.5835, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.646738052368164, + "rewards/margins": 0.7845074534416199, + "rewards/rejected": -3.4312453269958496, + "step": 10310 + }, + { + "epoch": 0.68, + "learning_rate": 1.4418590024441096e-06, + "logits/chosen": -1.8264057636260986, + "logits/rejected": -1.7782049179077148, + "logps/chosen": -566.3980712890625, + "logps/rejected": -676.2247314453125, + "loss": 0.7352, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.8814053535461426, + "rewards/margins": 0.19260264933109283, + "rewards/rejected": -3.074007987976074, + "step": 10320 + }, + { + "epoch": 0.68, + "learning_rate": 1.436688531304297e-06, + "logits/chosen": -1.926770567893982, + "logits/rejected": -1.6952238082885742, + "logps/chosen": -555.2659912109375, + "logps/rejected": -568.3193969726562, + "loss": 0.4095, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.293551445007324, + "rewards/margins": 1.1236270666122437, + "rewards/rejected": -3.4171783924102783, + "step": 10330 + }, + { + "epoch": 0.68, + "learning_rate": 1.431523606917974e-06, + "logits/chosen": -2.0901577472686768, + "logits/rejected": -1.7170679569244385, + "logps/chosen": -594.2264404296875, + "logps/rejected": -695.5934448242188, + "loss": 0.4763, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.049959659576416, + "rewards/margins": 1.3577804565429688, + "rewards/rejected": -4.407739639282227, + "step": 10340 + }, + { + "epoch": 0.68, + "learning_rate": 1.4263642562279162e-06, + "logits/chosen": -1.9279849529266357, + "logits/rejected": -1.704097032546997, + "logps/chosen": -515.13037109375, + "logps/rejected": -701.1751098632812, + "loss": 0.4295, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3443188667297363, + "rewards/margins": 1.0688626766204834, + "rewards/rejected": -3.413181781768799, + "step": 10350 + }, + { + "epoch": 0.68, + "learning_rate": 1.4212105061478257e-06, + "logits/chosen": -2.2097935676574707, + "logits/rejected": -1.745566725730896, + "logps/chosen": -628.4295654296875, + "logps/rejected": -493.8260803222656, + "loss": 0.5575, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.012010097503662, + "rewards/margins": 0.5730947256088257, + "rewards/rejected": -2.5851049423217773, + "step": 10360 + }, + { + "epoch": 0.68, + "learning_rate": 1.4160623835621848e-06, + "logits/chosen": -2.002143144607544, + "logits/rejected": -1.9734750986099243, + "logps/chosen": -690.5743408203125, + "logps/rejected": -660.885498046875, + "loss": 0.6055, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6461167335510254, + "rewards/margins": 1.0223562717437744, + "rewards/rejected": -3.668473482131958, + "step": 10370 + }, + { + "epoch": 0.68, + "learning_rate": 1.4109199153261249e-06, + "logits/chosen": -1.7158596515655518, + "logits/rejected": -2.035625696182251, + "logps/chosen": -531.4519653320312, + "logps/rejected": -691.5840454101562, + "loss": 0.3877, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5169262886047363, + "rewards/margins": 1.0818930864334106, + "rewards/rejected": -3.5988192558288574, + "step": 10380 + }, + { + "epoch": 0.68, + "learning_rate": 1.405783128265278e-06, + "logits/chosen": -1.9189598560333252, + "logits/rejected": -1.8550268411636353, + "logps/chosen": -555.7239990234375, + "logps/rejected": -575.2282104492188, + "loss": 0.4999, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.295558452606201, + "rewards/margins": 0.9672013521194458, + "rewards/rejected": -3.2627596855163574, + "step": 10390 + }, + { + "epoch": 0.68, + "learning_rate": 1.4006520491756427e-06, + "logits/chosen": -1.7126344442367554, + "logits/rejected": -2.134204626083374, + "logps/chosen": -494.3365783691406, + "logps/rejected": -612.637939453125, + "loss": 0.6658, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.496967315673828, + "rewards/margins": 0.8580994606018066, + "rewards/rejected": -3.355067014694214, + "step": 10400 + }, + { + "epoch": 0.68, + "learning_rate": 1.39552670482344e-06, + "logits/chosen": -2.0910916328430176, + "logits/rejected": -1.7963777780532837, + "logps/chosen": -440.82135009765625, + "logps/rejected": -563.18310546875, + "loss": 0.4642, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.44130802154541, + "rewards/margins": 0.9846822023391724, + "rewards/rejected": -3.425990581512451, + "step": 10410 + }, + { + "epoch": 0.68, + "learning_rate": 1.3904071219449776e-06, + "logits/chosen": -1.9734894037246704, + "logits/rejected": -1.7503770589828491, + "logps/chosen": -541.8358154296875, + "logps/rejected": -622.4808349609375, + "loss": 0.4553, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.94407320022583, + "rewards/margins": 0.8452234268188477, + "rewards/rejected": -3.7892966270446777, + "step": 10420 + }, + { + "epoch": 0.68, + "learning_rate": 1.3852933272465068e-06, + "logits/chosen": -2.388002395629883, + "logits/rejected": -1.7295029163360596, + "logps/chosen": -581.5325927734375, + "logps/rejected": -617.9428100585938, + "loss": 0.6602, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7455406188964844, + "rewards/margins": 1.230202317237854, + "rewards/rejected": -3.975742816925049, + "step": 10430 + }, + { + "epoch": 0.68, + "learning_rate": 1.3801853474040873e-06, + "logits/chosen": -1.818842887878418, + "logits/rejected": -1.7371982336044312, + "logps/chosen": -609.0447998046875, + "logps/rejected": -701.6209106445312, + "loss": 0.4102, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.2907192707061768, + "rewards/margins": 0.7941431999206543, + "rewards/rejected": -4.084862232208252, + "step": 10440 + }, + { + "epoch": 0.68, + "learning_rate": 1.3750832090634417e-06, + "logits/chosen": -2.0449259281158447, + "logits/rejected": -1.609819769859314, + "logps/chosen": -622.4576416015625, + "logps/rejected": -702.9534301757812, + "loss": 0.5342, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.222707748413086, + "rewards/margins": 1.2670750617980957, + "rewards/rejected": -4.489782333374023, + "step": 10450 + }, + { + "epoch": 0.68, + "learning_rate": 1.3699869388398245e-06, + "logits/chosen": -1.9015638828277588, + "logits/rejected": -1.8529236316680908, + "logps/chosen": -634.927001953125, + "logps/rejected": -543.111083984375, + "loss": 0.5947, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.228215456008911, + "rewards/margins": 0.367938369512558, + "rewards/rejected": -3.596153974533081, + "step": 10460 + }, + { + "epoch": 0.69, + "learning_rate": 1.3648965633178772e-06, + "logits/chosen": -2.254178285598755, + "logits/rejected": -1.8249447345733643, + "logps/chosen": -581.3519897460938, + "logps/rejected": -621.2752685546875, + "loss": 0.5659, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.8849399089813232, + "rewards/margins": 0.8968615531921387, + "rewards/rejected": -3.781801223754883, + "step": 10470 + }, + { + "epoch": 0.69, + "learning_rate": 1.3598121090514938e-06, + "logits/chosen": -1.8592582941055298, + "logits/rejected": -2.095013380050659, + "logps/chosen": -501.6394958496094, + "logps/rejected": -641.8675537109375, + "loss": 0.5356, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5355939865112305, + "rewards/margins": 0.7130419611930847, + "rewards/rejected": -3.248635768890381, + "step": 10480 + }, + { + "epoch": 0.69, + "learning_rate": 1.3547336025636753e-06, + "logits/chosen": -1.9418447017669678, + "logits/rejected": -1.9443689584732056, + "logps/chosen": -624.2605590820312, + "logps/rejected": -689.8292846679688, + "loss": 0.592, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4760632514953613, + "rewards/margins": 1.5628429651260376, + "rewards/rejected": -4.038906097412109, + "step": 10490 + }, + { + "epoch": 0.69, + "learning_rate": 1.3496610703464022e-06, + "logits/chosen": -2.1925435066223145, + "logits/rejected": -2.0867669582366943, + "logps/chosen": -584.0159301757812, + "logps/rejected": -752.5704345703125, + "loss": 0.5222, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7059502601623535, + "rewards/margins": 1.3427493572235107, + "rewards/rejected": -4.048699378967285, + "step": 10500 + }, + { + "epoch": 0.69, + "learning_rate": 1.3445945388604848e-06, + "logits/chosen": -1.9044320583343506, + "logits/rejected": -1.698246717453003, + "logps/chosen": -550.87060546875, + "logps/rejected": -650.580078125, + "loss": 0.5889, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.0925121307373047, + "rewards/margins": 1.2077891826629639, + "rewards/rejected": -4.3003010749816895, + "step": 10510 + }, + { + "epoch": 0.69, + "learning_rate": 1.3395340345354358e-06, + "logits/chosen": -1.8592084646224976, + "logits/rejected": -1.685819387435913, + "logps/chosen": -542.7408447265625, + "logps/rejected": -685.5568237304688, + "loss": 0.4553, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5574772357940674, + "rewards/margins": 1.348659634590149, + "rewards/rejected": -3.9061362743377686, + "step": 10520 + }, + { + "epoch": 0.69, + "learning_rate": 1.334479583769322e-06, + "logits/chosen": -1.6842561960220337, + "logits/rejected": -1.757942795753479, + "logps/chosen": -540.4334106445312, + "logps/rejected": -558.3435668945312, + "loss": 0.8053, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.7499144077301025, + "rewards/margins": 0.581194281578064, + "rewards/rejected": -3.331108808517456, + "step": 10530 + }, + { + "epoch": 0.69, + "learning_rate": 1.3294312129286366e-06, + "logits/chosen": -2.046003818511963, + "logits/rejected": -1.7724971771240234, + "logps/chosen": -688.4324951171875, + "logps/rejected": -720.7156982421875, + "loss": 0.4639, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.893906354904175, + "rewards/margins": 1.4649419784545898, + "rewards/rejected": -4.3588480949401855, + "step": 10540 + }, + { + "epoch": 0.69, + "learning_rate": 1.324388948348153e-06, + "logits/chosen": -2.0161421298980713, + "logits/rejected": -2.1307661533355713, + "logps/chosen": -531.250732421875, + "logps/rejected": -607.2823486328125, + "loss": 0.5373, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.34794545173645, + "rewards/margins": 1.0986201763153076, + "rewards/rejected": -3.446566104888916, + "step": 10550 + }, + { + "epoch": 0.69, + "learning_rate": 1.319352816330796e-06, + "logits/chosen": -1.8210279941558838, + "logits/rejected": -2.1046142578125, + "logps/chosen": -501.18353271484375, + "logps/rejected": -660.062744140625, + "loss": 0.4282, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.3282196521759033, + "rewards/margins": 1.038062334060669, + "rewards/rejected": -3.3662819862365723, + "step": 10560 + }, + { + "epoch": 0.69, + "learning_rate": 1.314322843147494e-06, + "logits/chosen": -1.6058871746063232, + "logits/rejected": -1.4052083492279053, + "logps/chosen": -472.9828186035156, + "logps/rejected": -602.2042846679688, + "loss": 0.5213, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.9463601112365723, + "rewards/margins": 1.2267777919769287, + "rewards/rejected": -4.173138618469238, + "step": 10570 + }, + { + "epoch": 0.69, + "learning_rate": 1.3092990550370526e-06, + "logits/chosen": -1.8036601543426514, + "logits/rejected": -1.378004789352417, + "logps/chosen": -599.864013671875, + "logps/rejected": -751.0603637695312, + "loss": 0.6359, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5068113803863525, + "rewards/margins": 1.4007375240325928, + "rewards/rejected": -3.907548427581787, + "step": 10580 + }, + { + "epoch": 0.69, + "learning_rate": 1.3042814782060131e-06, + "logits/chosen": -1.947623610496521, + "logits/rejected": -1.6988731622695923, + "logps/chosen": -516.1401977539062, + "logps/rejected": -707.0333251953125, + "loss": 0.4308, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.8791418075561523, + "rewards/margins": 1.8367408514022827, + "rewards/rejected": -4.715882301330566, + "step": 10590 + }, + { + "epoch": 0.69, + "learning_rate": 1.2992701388285112e-06, + "logits/chosen": -2.125699520111084, + "logits/rejected": -1.5845218896865845, + "logps/chosen": -580.4114990234375, + "logps/rejected": -618.8101806640625, + "loss": 0.6051, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.8320207595825195, + "rewards/margins": 1.1038181781768799, + "rewards/rejected": -3.9358391761779785, + "step": 10600 + }, + { + "epoch": 0.69, + "learning_rate": 1.29426506304615e-06, + "logits/chosen": -2.3026821613311768, + "logits/rejected": -1.843064308166504, + "logps/chosen": -664.8126831054688, + "logps/rejected": -697.6212768554688, + "loss": 0.446, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.479389190673828, + "rewards/margins": 1.1389353275299072, + "rewards/rejected": -3.6183242797851562, + "step": 10610 + }, + { + "epoch": 0.69, + "learning_rate": 1.289266276967855e-06, + "logits/chosen": -1.8575429916381836, + "logits/rejected": -1.7204229831695557, + "logps/chosen": -513.6630859375, + "logps/rejected": -561.46044921875, + "loss": 0.456, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7641661167144775, + "rewards/margins": 1.3243200778961182, + "rewards/rejected": -4.0884857177734375, + "step": 10620 + }, + { + "epoch": 0.7, + "learning_rate": 1.284273806669745e-06, + "logits/chosen": -1.9443877935409546, + "logits/rejected": -1.904849648475647, + "logps/chosen": -579.8768310546875, + "logps/rejected": -575.4515380859375, + "loss": 0.656, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.2298901081085205, + "rewards/margins": 0.28284725546836853, + "rewards/rejected": -3.51273775100708, + "step": 10630 + }, + { + "epoch": 0.7, + "learning_rate": 1.2792876781949884e-06, + "logits/chosen": -1.764835000038147, + "logits/rejected": -1.8784644603729248, + "logps/chosen": -462.2169494628906, + "logps/rejected": -620.6724853515625, + "loss": 0.4777, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.609870433807373, + "rewards/margins": 0.887588620185852, + "rewards/rejected": -3.4974589347839355, + "step": 10640 + }, + { + "epoch": 0.7, + "learning_rate": 1.274307917553676e-06, + "logits/chosen": -2.0640013217926025, + "logits/rejected": -1.8911521434783936, + "logps/chosen": -616.5274658203125, + "logps/rejected": -635.0863037109375, + "loss": 0.6024, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.588681936264038, + "rewards/margins": 1.017657995223999, + "rewards/rejected": -3.606339931488037, + "step": 10650 + }, + { + "epoch": 0.7, + "learning_rate": 1.2693345507226767e-06, + "logits/chosen": -2.1492960453033447, + "logits/rejected": -1.4352697134017944, + "logps/chosen": -565.2218017578125, + "logps/rejected": -634.83447265625, + "loss": 0.6638, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7949912548065186, + "rewards/margins": 1.2197343111038208, + "rewards/rejected": -4.014725685119629, + "step": 10660 + }, + { + "epoch": 0.7, + "learning_rate": 1.2643676036455099e-06, + "logits/chosen": -2.426874876022339, + "logits/rejected": -1.861326813697815, + "logps/chosen": -649.0333251953125, + "logps/rejected": -665.9698486328125, + "loss": 0.634, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.055633068084717, + "rewards/margins": 0.6452008485794067, + "rewards/rejected": -3.700833797454834, + "step": 10670 + }, + { + "epoch": 0.7, + "learning_rate": 1.259407102232203e-06, + "logits/chosen": -1.9577795267105103, + "logits/rejected": -1.520578145980835, + "logps/chosen": -484.3440856933594, + "logps/rejected": -582.2489013671875, + "loss": 0.5464, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.621001720428467, + "rewards/margins": 1.1693580150604248, + "rewards/rejected": -3.79036021232605, + "step": 10680 + }, + { + "epoch": 0.7, + "learning_rate": 1.254453072359163e-06, + "logits/chosen": -2.0032951831817627, + "logits/rejected": -2.275364637374878, + "logps/chosen": -531.3727416992188, + "logps/rejected": -713.6097412109375, + "loss": 0.4103, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.579955577850342, + "rewards/margins": 1.1371161937713623, + "rewards/rejected": -3.7170722484588623, + "step": 10690 + }, + { + "epoch": 0.7, + "learning_rate": 1.2495055398690337e-06, + "logits/chosen": -1.7794713973999023, + "logits/rejected": -1.7121944427490234, + "logps/chosen": -581.0518798828125, + "logps/rejected": -602.139404296875, + "loss": 0.606, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.219092845916748, + "rewards/margins": 0.5926779508590698, + "rewards/rejected": -3.8117709159851074, + "step": 10700 + }, + { + "epoch": 0.7, + "learning_rate": 1.2445645305705718e-06, + "logits/chosen": -2.093291759490967, + "logits/rejected": -1.5274455547332764, + "logps/chosen": -557.7086181640625, + "logps/rejected": -600.8196411132812, + "loss": 0.4573, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3752002716064453, + "rewards/margins": 1.2375011444091797, + "rewards/rejected": -3.612701416015625, + "step": 10710 + }, + { + "epoch": 0.7, + "learning_rate": 1.2396300702384995e-06, + "logits/chosen": -1.8599927425384521, + "logits/rejected": -1.9693912267684937, + "logps/chosen": -517.023681640625, + "logps/rejected": -709.513671875, + "loss": 0.5622, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.9134440422058105, + "rewards/margins": 1.4009777307510376, + "rewards/rejected": -4.314421653747559, + "step": 10720 + }, + { + "epoch": 0.7, + "learning_rate": 1.234702184613381e-06, + "logits/chosen": -2.248994827270508, + "logits/rejected": -2.2692863941192627, + "logps/chosen": -476.87890625, + "logps/rejected": -669.5069580078125, + "loss": 0.4611, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.1680808067321777, + "rewards/margins": 1.2293837070465088, + "rewards/rejected": -3.3974647521972656, + "step": 10730 + }, + { + "epoch": 0.7, + "learning_rate": 1.2297808994014793e-06, + "logits/chosen": -1.8323322534561157, + "logits/rejected": -1.673384428024292, + "logps/chosen": -607.1715087890625, + "logps/rejected": -587.4119262695312, + "loss": 0.5794, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.567920207977295, + "rewards/margins": 0.7959483861923218, + "rewards/rejected": -3.363868236541748, + "step": 10740 + }, + { + "epoch": 0.7, + "learning_rate": 1.2248662402746314e-06, + "logits/chosen": -1.3988916873931885, + "logits/rejected": -1.6921294927597046, + "logps/chosen": -612.28857421875, + "logps/rejected": -778.8629760742188, + "loss": 0.4916, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.053574800491333, + "rewards/margins": 1.2433674335479736, + "rewards/rejected": -4.296942234039307, + "step": 10750 + }, + { + "epoch": 0.7, + "learning_rate": 1.2199582328701045e-06, + "logits/chosen": -1.9762433767318726, + "logits/rejected": -2.0665011405944824, + "logps/chosen": -458.28076171875, + "logps/rejected": -568.51806640625, + "loss": 0.4898, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9166343212127686, + "rewards/margins": 1.0887027978897095, + "rewards/rejected": -3.0053372383117676, + "step": 10760 + }, + { + "epoch": 0.7, + "learning_rate": 1.2150569027904712e-06, + "logits/chosen": -2.140639305114746, + "logits/rejected": -2.0418004989624023, + "logps/chosen": -533.3043212890625, + "logps/rejected": -749.2007446289062, + "loss": 0.7594, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.868107318878174, + "rewards/margins": 1.3112220764160156, + "rewards/rejected": -4.179329872131348, + "step": 10770 + }, + { + "epoch": 0.71, + "learning_rate": 1.2101622756034688e-06, + "logits/chosen": -2.2054402828216553, + "logits/rejected": -2.0917115211486816, + "logps/chosen": -637.744873046875, + "logps/rejected": -625.0416870117188, + "loss": 0.627, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.1930999755859375, + "rewards/margins": 0.7180913686752319, + "rewards/rejected": -2.911191463470459, + "step": 10780 + }, + { + "epoch": 0.71, + "learning_rate": 1.2052743768418715e-06, + "logits/chosen": -2.3397207260131836, + "logits/rejected": -1.8472929000854492, + "logps/chosen": -684.1767578125, + "logps/rejected": -793.119873046875, + "loss": 0.7048, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.507220983505249, + "rewards/margins": 1.1414546966552734, + "rewards/rejected": -3.6486754417419434, + "step": 10790 + }, + { + "epoch": 0.71, + "learning_rate": 1.2003932320033523e-06, + "logits/chosen": -1.8293190002441406, + "logits/rejected": -1.995443344116211, + "logps/chosen": -535.847412109375, + "logps/rejected": -565.1732788085938, + "loss": 0.6591, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.123100996017456, + "rewards/margins": 0.29064124822616577, + "rewards/rejected": -3.4137425422668457, + "step": 10800 + }, + { + "epoch": 0.71, + "learning_rate": 1.1955188665503553e-06, + "logits/chosen": -2.08339262008667, + "logits/rejected": -2.1265950202941895, + "logps/chosen": -640.2640380859375, + "logps/rejected": -620.4893188476562, + "loss": 0.5797, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.060319423675537, + "rewards/margins": 0.6381840705871582, + "rewards/rejected": -3.6985034942626953, + "step": 10810 + }, + { + "epoch": 0.71, + "learning_rate": 1.1906513059099566e-06, + "logits/chosen": -1.7086389064788818, + "logits/rejected": -1.8432223796844482, + "logps/chosen": -568.55810546875, + "logps/rejected": -727.3695068359375, + "loss": 0.427, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.1456050872802734, + "rewards/margins": 1.374998688697815, + "rewards/rejected": -4.520603656768799, + "step": 10820 + }, + { + "epoch": 0.71, + "learning_rate": 1.185790575473738e-06, + "logits/chosen": -2.117264747619629, + "logits/rejected": -1.9786115884780884, + "logps/chosen": -623.8470458984375, + "logps/rejected": -653.810546875, + "loss": 0.4546, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6704392433166504, + "rewards/margins": 1.197432279586792, + "rewards/rejected": -3.8678722381591797, + "step": 10830 + }, + { + "epoch": 0.71, + "learning_rate": 1.1809367005976516e-06, + "logits/chosen": -2.156859874725342, + "logits/rejected": -1.6391792297363281, + "logps/chosen": -539.9671630859375, + "logps/rejected": -634.4904174804688, + "loss": 0.58, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6767139434814453, + "rewards/margins": 1.1068816184997559, + "rewards/rejected": -3.7835960388183594, + "step": 10840 + }, + { + "epoch": 0.71, + "learning_rate": 1.1760897066018842e-06, + "logits/chosen": -2.143618106842041, + "logits/rejected": -1.9632008075714111, + "logps/chosen": -496.422119140625, + "logps/rejected": -600.4456787109375, + "loss": 0.6026, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6161766052246094, + "rewards/margins": 1.2766454219818115, + "rewards/rejected": -3.892821788787842, + "step": 10850 + }, + { + "epoch": 0.71, + "learning_rate": 1.1712496187707327e-06, + "logits/chosen": -2.378615140914917, + "logits/rejected": -2.21158504486084, + "logps/chosen": -521.894287109375, + "logps/rejected": -573.9391479492188, + "loss": 0.5006, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9917871952056885, + "rewards/margins": 1.0569854974746704, + "rewards/rejected": -3.0487725734710693, + "step": 10860 + }, + { + "epoch": 0.71, + "learning_rate": 1.1664164623524646e-06, + "logits/chosen": -2.248140811920166, + "logits/rejected": -1.6976674795150757, + "logps/chosen": -523.8060913085938, + "logps/rejected": -641.4791259765625, + "loss": 0.5039, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4532601833343506, + "rewards/margins": 1.2342567443847656, + "rewards/rejected": -3.687516689300537, + "step": 10870 + }, + { + "epoch": 0.71, + "learning_rate": 1.1615902625591926e-06, + "logits/chosen": -2.197209358215332, + "logits/rejected": -1.9862515926361084, + "logps/chosen": -601.8943481445312, + "logps/rejected": -616.5602416992188, + "loss": 0.7245, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.1172738075256348, + "rewards/margins": 0.2903970181941986, + "rewards/rejected": -3.4076714515686035, + "step": 10880 + }, + { + "epoch": 0.71, + "learning_rate": 1.156771044566738e-06, + "logits/chosen": -2.0141053199768066, + "logits/rejected": -1.8743298053741455, + "logps/chosen": -627.9108276367188, + "logps/rejected": -661.3457641601562, + "loss": 0.6366, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6688072681427, + "rewards/margins": 1.156123399734497, + "rewards/rejected": -3.8249306678771973, + "step": 10890 + }, + { + "epoch": 0.71, + "learning_rate": 1.1519588335145037e-06, + "logits/chosen": -1.9702669382095337, + "logits/rejected": -1.8649107217788696, + "logps/chosen": -525.55517578125, + "logps/rejected": -649.5360717773438, + "loss": 0.5939, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.1571083068847656, + "rewards/margins": 0.785507321357727, + "rewards/rejected": -3.942615509033203, + "step": 10900 + }, + { + "epoch": 0.71, + "learning_rate": 1.1471536545053382e-06, + "logits/chosen": -2.1818172931671143, + "logits/rejected": -2.2146239280700684, + "logps/chosen": -674.1969604492188, + "logps/rejected": -687.8861083984375, + "loss": 0.5476, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.312225580215454, + "rewards/margins": 1.3034298419952393, + "rewards/rejected": -3.6156551837921143, + "step": 10910 + }, + { + "epoch": 0.71, + "learning_rate": 1.1423555326054112e-06, + "logits/chosen": -2.0928287506103516, + "logits/rejected": -1.7444454431533813, + "logps/chosen": -598.6189575195312, + "logps/rejected": -651.1210327148438, + "loss": 0.2837, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.289663553237915, + "rewards/margins": 1.7426624298095703, + "rewards/rejected": -4.032325744628906, + "step": 10920 + }, + { + "epoch": 0.72, + "learning_rate": 1.1375644928440743e-06, + "logits/chosen": -1.9880787134170532, + "logits/rejected": -1.7319841384887695, + "logps/chosen": -569.8656005859375, + "logps/rejected": -608.8934326171875, + "loss": 0.5198, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.71450138092041, + "rewards/margins": 0.8693853616714478, + "rewards/rejected": -3.5838871002197266, + "step": 10930 + }, + { + "epoch": 0.72, + "learning_rate": 1.1327805602137396e-06, + "logits/chosen": -1.8245779275894165, + "logits/rejected": -1.6106252670288086, + "logps/chosen": -597.9203491210938, + "logps/rejected": -727.8937377929688, + "loss": 0.3286, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.8491742610931396, + "rewards/margins": 1.6420882940292358, + "rewards/rejected": -4.491262912750244, + "step": 10940 + }, + { + "epoch": 0.72, + "learning_rate": 1.1280037596697426e-06, + "logits/chosen": -2.0003836154937744, + "logits/rejected": -1.841463327407837, + "logps/chosen": -608.0892333984375, + "logps/rejected": -667.3717041015625, + "loss": 0.4502, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.653883218765259, + "rewards/margins": 1.31430184841156, + "rewards/rejected": -3.9681849479675293, + "step": 10950 + }, + { + "epoch": 0.72, + "learning_rate": 1.123234116130216e-06, + "logits/chosen": -2.1562633514404297, + "logits/rejected": -1.8884432315826416, + "logps/chosen": -569.6181640625, + "logps/rejected": -645.463134765625, + "loss": 0.4352, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.557612657546997, + "rewards/margins": 0.9821327328681946, + "rewards/rejected": -3.539745330810547, + "step": 10960 + }, + { + "epoch": 0.72, + "learning_rate": 1.1184716544759553e-06, + "logits/chosen": -2.110089063644409, + "logits/rejected": -1.994105339050293, + "logps/chosen": -532.6145629882812, + "logps/rejected": -578.947998046875, + "loss": 0.5566, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.7645394802093506, + "rewards/margins": 0.3476647436618805, + "rewards/rejected": -3.1122043132781982, + "step": 10970 + }, + { + "epoch": 0.72, + "learning_rate": 1.1137163995502948e-06, + "logits/chosen": -2.067387104034424, + "logits/rejected": -1.7719135284423828, + "logps/chosen": -572.7210693359375, + "logps/rejected": -560.327880859375, + "loss": 0.7995, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.173888683319092, + "rewards/margins": 0.44579392671585083, + "rewards/rejected": -3.6196823120117188, + "step": 10980 + }, + { + "epoch": 0.72, + "learning_rate": 1.1089683761589717e-06, + "logits/chosen": -2.198796272277832, + "logits/rejected": -1.8594017028808594, + "logps/chosen": -494.27099609375, + "logps/rejected": -558.8394775390625, + "loss": 0.6524, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.8282618522644043, + "rewards/margins": 0.6688450574874878, + "rewards/rejected": -3.4971070289611816, + "step": 10990 + }, + { + "epoch": 0.72, + "learning_rate": 1.1042276090700044e-06, + "logits/chosen": -1.8905689716339111, + "logits/rejected": -1.600900411605835, + "logps/chosen": -514.0736083984375, + "logps/rejected": -618.9229125976562, + "loss": 0.3493, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.639007091522217, + "rewards/margins": 1.3429841995239258, + "rewards/rejected": -3.9819908142089844, + "step": 11000 + }, + { + "epoch": 0.72, + "learning_rate": 1.0994941230135536e-06, + "logits/chosen": -2.286539077758789, + "logits/rejected": -1.9388920068740845, + "logps/chosen": -613.9688720703125, + "logps/rejected": -747.5011596679688, + "loss": 0.4968, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7653839588165283, + "rewards/margins": 0.8753796815872192, + "rewards/rejected": -3.640763521194458, + "step": 11010 + }, + { + "epoch": 0.72, + "learning_rate": 1.094767942681804e-06, + "logits/chosen": -2.1379446983337402, + "logits/rejected": -2.147871255874634, + "logps/chosen": -636.4110717773438, + "logps/rejected": -709.1380615234375, + "loss": 0.657, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.930417060852051, + "rewards/margins": 0.8874378204345703, + "rewards/rejected": -3.8178551197052, + "step": 11020 + }, + { + "epoch": 0.72, + "learning_rate": 1.0900490927288248e-06, + "logits/chosen": -1.964093804359436, + "logits/rejected": -1.9250081777572632, + "logps/chosen": -601.6725463867188, + "logps/rejected": -590.1107177734375, + "loss": 0.6325, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.7636687755584717, + "rewards/margins": 0.4920852780342102, + "rewards/rejected": -3.255753755569458, + "step": 11030 + }, + { + "epoch": 0.72, + "learning_rate": 1.0853375977704511e-06, + "logits/chosen": -1.8642339706420898, + "logits/rejected": -1.5740877389907837, + "logps/chosen": -594.4902954101562, + "logps/rejected": -586.4608154296875, + "loss": 0.4055, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.335334539413452, + "rewards/margins": 1.5911147594451904, + "rewards/rejected": -3.9264495372772217, + "step": 11040 + }, + { + "epoch": 0.72, + "learning_rate": 1.0806334823841466e-06, + "logits/chosen": -2.4295177459716797, + "logits/rejected": -1.918278694152832, + "logps/chosen": -545.12646484375, + "logps/rejected": -628.2261962890625, + "loss": 0.6423, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.925262928009033, + "rewards/margins": 0.8189485669136047, + "rewards/rejected": -3.744211196899414, + "step": 11050 + }, + { + "epoch": 0.72, + "learning_rate": 1.0759367711088825e-06, + "logits/chosen": -1.845646619796753, + "logits/rejected": -2.076916217803955, + "logps/chosen": -594.2481689453125, + "logps/rejected": -688.998779296875, + "loss": 0.5593, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.010962963104248, + "rewards/margins": 0.9051377177238464, + "rewards/rejected": -3.9161009788513184, + "step": 11060 + }, + { + "epoch": 0.72, + "learning_rate": 1.0712474884450056e-06, + "logits/chosen": -2.149695634841919, + "logits/rejected": -1.1780688762664795, + "logps/chosen": -514.140625, + "logps/rejected": -586.2655029296875, + "loss": 0.4989, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.6523220539093018, + "rewards/margins": 1.3810240030288696, + "rewards/rejected": -4.033346176147461, + "step": 11070 + }, + { + "epoch": 0.72, + "learning_rate": 1.066565658854112e-06, + "logits/chosen": -1.539414644241333, + "logits/rejected": -1.7554525136947632, + "logps/chosen": -495.03948974609375, + "logps/rejected": -681.1571044921875, + "loss": 0.552, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.119170665740967, + "rewards/margins": 0.3835334777832031, + "rewards/rejected": -3.502704620361328, + "step": 11080 + }, + { + "epoch": 0.73, + "learning_rate": 1.0618913067589165e-06, + "logits/chosen": -1.949589729309082, + "logits/rejected": -1.1513017416000366, + "logps/chosen": -607.67529296875, + "logps/rejected": -571.7874145507812, + "loss": 0.4539, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.3207106590270996, + "rewards/margins": 0.8452437520027161, + "rewards/rejected": -4.165954113006592, + "step": 11090 + }, + { + "epoch": 0.73, + "learning_rate": 1.0572244565431313e-06, + "logits/chosen": -1.9018256664276123, + "logits/rejected": -1.9944654703140259, + "logps/chosen": -698.68896484375, + "logps/rejected": -694.4862060546875, + "loss": 0.4973, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.091747283935547, + "rewards/margins": 0.8501051068305969, + "rewards/rejected": -3.941852569580078, + "step": 11100 + }, + { + "epoch": 0.73, + "learning_rate": 1.0525651325513317e-06, + "logits/chosen": -1.8144733905792236, + "logits/rejected": -1.6635713577270508, + "logps/chosen": -587.306884765625, + "logps/rejected": -645.5009765625, + "loss": 0.4695, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.213918685913086, + "rewards/margins": 1.390151023864746, + "rewards/rejected": -3.604069471359253, + "step": 11110 + }, + { + "epoch": 0.73, + "learning_rate": 1.0479133590888351e-06, + "logits/chosen": -2.0447092056274414, + "logits/rejected": -1.724495530128479, + "logps/chosen": -640.1514892578125, + "logps/rejected": -618.6660766601562, + "loss": 0.4973, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.23714017868042, + "rewards/margins": 0.5908339023590088, + "rewards/rejected": -3.8279738426208496, + "step": 11120 + }, + { + "epoch": 0.73, + "learning_rate": 1.0432691604215695e-06, + "logits/chosen": -1.9522594213485718, + "logits/rejected": -1.5733158588409424, + "logps/chosen": -741.5416259765625, + "logps/rejected": -649.3689575195312, + "loss": 0.5937, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.0032002925872803, + "rewards/margins": 0.7238863110542297, + "rewards/rejected": -3.7270865440368652, + "step": 11130 + }, + { + "epoch": 0.73, + "learning_rate": 1.0386325607759515e-06, + "logits/chosen": -1.877004861831665, + "logits/rejected": -1.1804144382476807, + "logps/chosen": -531.3442993164062, + "logps/rejected": -643.6566162109375, + "loss": 0.6587, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.545118808746338, + "rewards/margins": 1.568022608757019, + "rewards/rejected": -4.1131415367126465, + "step": 11140 + }, + { + "epoch": 0.73, + "learning_rate": 1.0340035843387544e-06, + "logits/chosen": -2.1784722805023193, + "logits/rejected": -1.8719772100448608, + "logps/chosen": -608.2196044921875, + "logps/rejected": -712.3246459960938, + "loss": 0.5015, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.738144874572754, + "rewards/margins": 1.0193064212799072, + "rewards/rejected": -3.757450819015503, + "step": 11150 + }, + { + "epoch": 0.73, + "learning_rate": 1.0293822552569887e-06, + "logits/chosen": -1.7314817905426025, + "logits/rejected": -1.7551358938217163, + "logps/chosen": -548.2224731445312, + "logps/rejected": -731.991455078125, + "loss": 0.2875, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.9847452640533447, + "rewards/margins": 1.7589595317840576, + "rewards/rejected": -4.743704319000244, + "step": 11160 + }, + { + "epoch": 0.73, + "learning_rate": 1.0247685976377688e-06, + "logits/chosen": -1.7158609628677368, + "logits/rejected": -2.0380682945251465, + "logps/chosen": -553.1986694335938, + "logps/rejected": -748.2996826171875, + "loss": 0.4106, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.849733829498291, + "rewards/margins": 1.3996142148971558, + "rewards/rejected": -4.2493486404418945, + "step": 11170 + }, + { + "epoch": 0.73, + "learning_rate": 1.0201626355481939e-06, + "logits/chosen": -1.9223911762237549, + "logits/rejected": -1.8944050073623657, + "logps/chosen": -625.564208984375, + "logps/rejected": -731.0145874023438, + "loss": 0.4134, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.318629741668701, + "rewards/margins": 1.764533281326294, + "rewards/rejected": -4.083163261413574, + "step": 11180 + }, + { + "epoch": 0.73, + "learning_rate": 1.0155643930152192e-06, + "logits/chosen": -1.7937358617782593, + "logits/rejected": -1.4579087495803833, + "logps/chosen": -472.65350341796875, + "logps/rejected": -642.529052734375, + "loss": 0.4607, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.678659439086914, + "rewards/margins": 1.5914627313613892, + "rewards/rejected": -4.270122528076172, + "step": 11190 + }, + { + "epoch": 0.73, + "learning_rate": 1.0109738940255286e-06, + "logits/chosen": -2.0085513591766357, + "logits/rejected": -1.5377354621887207, + "logps/chosen": -480.1001892089844, + "logps/rejected": -557.2733764648438, + "loss": 0.3617, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5536446571350098, + "rewards/margins": 1.317373514175415, + "rewards/rejected": -3.871018648147583, + "step": 11200 + }, + { + "epoch": 0.73, + "learning_rate": 1.0063911625254155e-06, + "logits/chosen": -2.3564112186431885, + "logits/rejected": -1.542410969734192, + "logps/chosen": -650.8883666992188, + "logps/rejected": -715.9833984375, + "loss": 0.543, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8708133697509766, + "rewards/margins": 0.7783092260360718, + "rewards/rejected": -3.649122714996338, + "step": 11210 + }, + { + "epoch": 0.73, + "learning_rate": 1.0018162224206502e-06, + "logits/chosen": -2.160595417022705, + "logits/rejected": -1.7032890319824219, + "logps/chosen": -614.4699096679688, + "logps/rejected": -601.10498046875, + "loss": 0.7165, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.0829195976257324, + "rewards/margins": 0.7624799013137817, + "rewards/rejected": -3.8453993797302246, + "step": 11220 + }, + { + "epoch": 0.73, + "learning_rate": 9.97249097576363e-07, + "logits/chosen": -2.035681962966919, + "logits/rejected": -1.9352619647979736, + "logps/chosen": -523.2393798828125, + "logps/rejected": -653.7843017578125, + "loss": 0.6436, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.1325199604034424, + "rewards/margins": 1.1268411874771118, + "rewards/rejected": -4.259360313415527, + "step": 11230 + }, + { + "epoch": 0.74, + "learning_rate": 9.92689811816913e-07, + "logits/chosen": -2.1685519218444824, + "logits/rejected": -2.1770365238189697, + "logps/chosen": -498.0049743652344, + "logps/rejected": -625.49462890625, + "loss": 0.4397, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4198482036590576, + "rewards/margins": 1.5032671689987183, + "rewards/rejected": -3.9231152534484863, + "step": 11240 + }, + { + "epoch": 0.74, + "learning_rate": 9.881383889257691e-07, + "logits/chosen": -2.2852888107299805, + "logits/rejected": -2.0769572257995605, + "logps/chosen": -557.4730224609375, + "logps/rejected": -576.2727661132812, + "loss": 0.5914, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.2877988815307617, + "rewards/margins": 0.9065145254135132, + "rewards/rejected": -3.1943135261535645, + "step": 11250 + }, + { + "epoch": 0.74, + "learning_rate": 9.835948526453817e-07, + "logits/chosen": -2.180548906326294, + "logits/rejected": -1.568034291267395, + "logps/chosen": -596.4942626953125, + "logps/rejected": -653.1251831054688, + "loss": 0.5723, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.893049478530884, + "rewards/margins": 0.8980989456176758, + "rewards/rejected": -3.7911484241485596, + "step": 11260 + }, + { + "epoch": 0.74, + "learning_rate": 9.790592266770633e-07, + "logits/chosen": -2.2299437522888184, + "logits/rejected": -2.1429529190063477, + "logps/chosen": -600.1373901367188, + "logps/rejected": -701.9710693359375, + "loss": 0.4668, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.1360514163970947, + "rewards/margins": 1.4669578075408936, + "rewards/rejected": -4.60300874710083, + "step": 11270 + }, + { + "epoch": 0.74, + "learning_rate": 9.745315346808584e-07, + "logits/chosen": -1.9295885562896729, + "logits/rejected": -1.9131072759628296, + "logps/chosen": -481.2538146972656, + "logps/rejected": -589.9473876953125, + "loss": 0.5351, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.528791904449463, + "rewards/margins": 0.9400409460067749, + "rewards/rejected": -3.4688332080841064, + "step": 11280 + }, + { + "epoch": 0.74, + "learning_rate": 9.70011800275428e-07, + "logits/chosen": -2.007050037384033, + "logits/rejected": -1.2384674549102783, + "logps/chosen": -601.4393920898438, + "logps/rejected": -608.2933349609375, + "loss": 0.684, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0498464107513428, + "rewards/margins": 0.7697028517723083, + "rewards/rejected": -3.819549083709717, + "step": 11290 + }, + { + "epoch": 0.74, + "learning_rate": 9.655000470379206e-07, + "logits/chosen": -2.1945061683654785, + "logits/rejected": -1.9669269323349, + "logps/chosen": -567.2088623046875, + "logps/rejected": -729.3013305664062, + "loss": 0.45, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5365357398986816, + "rewards/margins": 1.151732087135315, + "rewards/rejected": -3.688267946243286, + "step": 11300 + }, + { + "epoch": 0.74, + "learning_rate": 9.609962985038517e-07, + "logits/chosen": -2.264110803604126, + "logits/rejected": -1.7192445993423462, + "logps/chosen": -620.5179443359375, + "logps/rejected": -585.7546997070312, + "loss": 0.4001, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4289679527282715, + "rewards/margins": 1.1963040828704834, + "rewards/rejected": -3.6252715587615967, + "step": 11310 + }, + { + "epoch": 0.74, + "learning_rate": 9.565005781669786e-07, + "logits/chosen": -2.2255051136016846, + "logits/rejected": -1.4862034320831299, + "logps/chosen": -561.6036376953125, + "logps/rejected": -579.4625244140625, + "loss": 0.6386, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.013392925262451, + "rewards/margins": 0.7098814249038696, + "rewards/rejected": -3.7232747077941895, + "step": 11320 + }, + { + "epoch": 0.74, + "learning_rate": 9.520129094791822e-07, + "logits/chosen": -2.0273349285125732, + "logits/rejected": -1.5884578227996826, + "logps/chosen": -562.5972290039062, + "logps/rejected": -664.5333251953125, + "loss": 0.4295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.576624631881714, + "rewards/margins": 1.6048930883407593, + "rewards/rejected": -4.181517601013184, + "step": 11330 + }, + { + "epoch": 0.74, + "learning_rate": 9.475333158503389e-07, + "logits/chosen": -2.228905200958252, + "logits/rejected": -2.0265233516693115, + "logps/chosen": -661.26611328125, + "logps/rejected": -618.9077758789062, + "loss": 0.5175, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4387011528015137, + "rewards/margins": 1.1200625896453857, + "rewards/rejected": -3.5587639808654785, + "step": 11340 + }, + { + "epoch": 0.74, + "learning_rate": 9.430618206482053e-07, + "logits/chosen": -2.025967836380005, + "logits/rejected": -1.644971251487732, + "logps/chosen": -586.7125854492188, + "logps/rejected": -613.2728881835938, + "loss": 0.4476, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.5874221324920654, + "rewards/margins": 0.8730489015579224, + "rewards/rejected": -3.4604709148406982, + "step": 11350 + }, + { + "epoch": 0.74, + "learning_rate": 9.385984471982892e-07, + "logits/chosen": -2.110199451446533, + "logits/rejected": -1.60942804813385, + "logps/chosen": -486.7565002441406, + "logps/rejected": -678.8507690429688, + "loss": 0.3855, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.0172455310821533, + "rewards/margins": 1.782753348350525, + "rewards/rejected": -3.7999987602233887, + "step": 11360 + }, + { + "epoch": 0.74, + "learning_rate": 9.341432187837343e-07, + "logits/chosen": -2.0925145149230957, + "logits/rejected": -1.9435867071151733, + "logps/chosen": -555.3340454101562, + "logps/rejected": -724.8929443359375, + "loss": 0.6627, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.3047962188720703, + "rewards/margins": 1.0990153551101685, + "rewards/rejected": -4.403811454772949, + "step": 11370 + }, + { + "epoch": 0.74, + "learning_rate": 9.29696158645193e-07, + "logits/chosen": -2.439696788787842, + "logits/rejected": -1.9372104406356812, + "logps/chosen": -535.96435546875, + "logps/rejected": -607.0570068359375, + "loss": 0.5157, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.652575969696045, + "rewards/margins": 1.0027225017547607, + "rewards/rejected": -3.655298948287964, + "step": 11380 + }, + { + "epoch": 0.75, + "learning_rate": 9.252572899807111e-07, + "logits/chosen": -1.8352473974227905, + "logits/rejected": -1.7971274852752686, + "logps/chosen": -525.2449951171875, + "logps/rejected": -590.7938232421875, + "loss": 0.4892, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7689268589019775, + "rewards/margins": 0.7963454723358154, + "rewards/rejected": -3.565272092819214, + "step": 11390 + }, + { + "epoch": 0.75, + "learning_rate": 9.208266359456003e-07, + "logits/chosen": -2.1703238487243652, + "logits/rejected": -1.7259900569915771, + "logps/chosen": -519.4930419921875, + "logps/rejected": -710.6129760742188, + "loss": 0.4442, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.9516105651855469, + "rewards/margins": 1.9602371454238892, + "rewards/rejected": -3.9118475914001465, + "step": 11400 + }, + { + "epoch": 0.75, + "learning_rate": 9.164042196523229e-07, + "logits/chosen": -2.060485363006592, + "logits/rejected": -2.2558093070983887, + "logps/chosen": -714.7739868164062, + "logps/rejected": -715.2745361328125, + "loss": 0.553, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.3948960304260254, + "rewards/margins": 0.8847671747207642, + "rewards/rejected": -3.279662609100342, + "step": 11410 + }, + { + "epoch": 0.75, + "learning_rate": 9.119900641703696e-07, + "logits/chosen": -2.2287392616271973, + "logits/rejected": -2.3328731060028076, + "logps/chosen": -593.9951782226562, + "logps/rejected": -810.0656127929688, + "loss": 0.7244, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.9003055095672607, + "rewards/margins": 0.7129500508308411, + "rewards/rejected": -3.613255739212036, + "step": 11420 + }, + { + "epoch": 0.75, + "learning_rate": 9.075841925261364e-07, + "logits/chosen": -1.7975727319717407, + "logits/rejected": -1.8429571390151978, + "logps/chosen": -617.5447387695312, + "logps/rejected": -673.0473022460938, + "loss": 0.5878, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.9794981479644775, + "rewards/margins": 0.9177531003952026, + "rewards/rejected": -3.8972511291503906, + "step": 11430 + }, + { + "epoch": 0.75, + "learning_rate": 9.031866277028093e-07, + "logits/chosen": -1.7158721685409546, + "logits/rejected": -1.5207316875457764, + "logps/chosen": -535.4993286132812, + "logps/rejected": -593.2886352539062, + "loss": 0.5281, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.504063367843628, + "rewards/margins": 0.8655757904052734, + "rewards/rejected": -4.369638919830322, + "step": 11440 + }, + { + "epoch": 0.75, + "learning_rate": 8.987973926402391e-07, + "logits/chosen": -2.2284095287323, + "logits/rejected": -1.8909313678741455, + "logps/chosen": -581.1748657226562, + "logps/rejected": -668.5172119140625, + "loss": 0.4486, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.8736472129821777, + "rewards/margins": 1.1625237464904785, + "rewards/rejected": -4.036170959472656, + "step": 11450 + }, + { + "epoch": 0.75, + "learning_rate": 8.944165102348273e-07, + "logits/chosen": -2.121553659439087, + "logits/rejected": -1.6587817668914795, + "logps/chosen": -615.8829956054688, + "logps/rejected": -656.5050048828125, + "loss": 0.67, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.2396693229675293, + "rewards/margins": 0.6015607714653015, + "rewards/rejected": -3.8412303924560547, + "step": 11460 + }, + { + "epoch": 0.75, + "learning_rate": 8.900440033394018e-07, + "logits/chosen": -2.4720492362976074, + "logits/rejected": -1.9725520610809326, + "logps/chosen": -650.8018188476562, + "logps/rejected": -680.92431640625, + "loss": 0.3071, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.6864123344421387, + "rewards/margins": 1.4779071807861328, + "rewards/rejected": -4.1643195152282715, + "step": 11470 + }, + { + "epoch": 0.75, + "learning_rate": 8.856798947631009e-07, + "logits/chosen": -1.2511957883834839, + "logits/rejected": -1.570084571838379, + "logps/chosen": -431.64697265625, + "logps/rejected": -622.7315673828125, + "loss": 0.3729, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.7261099815368652, + "rewards/margins": 1.8274139165878296, + "rewards/rejected": -4.553524017333984, + "step": 11480 + }, + { + "epoch": 0.75, + "learning_rate": 8.813242072712519e-07, + "logits/chosen": -2.0236334800720215, + "logits/rejected": -1.9342308044433594, + "logps/chosen": -517.2122192382812, + "logps/rejected": -609.6121826171875, + "loss": 0.506, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.7198448181152344, + "rewards/margins": 0.9386121034622192, + "rewards/rejected": -3.658456802368164, + "step": 11490 + }, + { + "epoch": 0.75, + "learning_rate": 8.769769635852557e-07, + "logits/chosen": -2.1818830966949463, + "logits/rejected": -1.8092458248138428, + "logps/chosen": -607.4002075195312, + "logps/rejected": -655.6839599609375, + "loss": 0.6314, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.9455173015594482, + "rewards/margins": 1.0688055753707886, + "rewards/rejected": -4.0143232345581055, + "step": 11500 + }, + { + "epoch": 0.75, + "learning_rate": 8.726381863824635e-07, + "logits/chosen": -2.1254098415374756, + "logits/rejected": -1.7734934091567993, + "logps/chosen": -707.5592041015625, + "logps/rejected": -625.3045654296875, + "loss": 0.6464, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.804962635040283, + "rewards/margins": 0.7859970331192017, + "rewards/rejected": -3.5909600257873535, + "step": 11510 + }, + { + "epoch": 0.75, + "learning_rate": 8.683078982960638e-07, + "logits/chosen": -2.003600597381592, + "logits/rejected": -1.9953582286834717, + "logps/chosen": -544.07861328125, + "logps/rejected": -613.3527221679688, + "loss": 0.5258, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.647714138031006, + "rewards/margins": 1.037721037864685, + "rewards/rejected": -3.6854355335235596, + "step": 11520 + }, + { + "epoch": 0.75, + "learning_rate": 8.639861219149584e-07, + "logits/chosen": -1.9662593603134155, + "logits/rejected": -2.1062169075012207, + "logps/chosen": -553.8065185546875, + "logps/rejected": -558.3656005859375, + "loss": 0.6001, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.1915595531463623, + "rewards/margins": 0.26489943265914917, + "rewards/rejected": -3.4564595222473145, + "step": 11530 + }, + { + "epoch": 0.76, + "learning_rate": 8.596728797836532e-07, + "logits/chosen": -2.110957145690918, + "logits/rejected": -1.926509141921997, + "logps/chosen": -597.6747436523438, + "logps/rejected": -646.9539184570312, + "loss": 0.453, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.424696445465088, + "rewards/margins": 1.0680582523345947, + "rewards/rejected": -3.4927546977996826, + "step": 11540 + }, + { + "epoch": 0.76, + "learning_rate": 8.553681944021294e-07, + "logits/chosen": -1.9674144983291626, + "logits/rejected": -1.697922706604004, + "logps/chosen": -564.5590209960938, + "logps/rejected": -630.7199096679688, + "loss": 0.5602, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.4485175609588623, + "rewards/margins": 1.2627383470535278, + "rewards/rejected": -3.7112560272216797, + "step": 11550 + }, + { + "epoch": 0.76, + "learning_rate": 8.510720882257365e-07, + "logits/chosen": -1.8311374187469482, + "logits/rejected": -1.7656824588775635, + "logps/chosen": -520.2479248046875, + "logps/rejected": -593.451904296875, + "loss": 0.4734, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4908149242401123, + "rewards/margins": 0.8919802904129028, + "rewards/rejected": -3.382795810699463, + "step": 11560 + }, + { + "epoch": 0.76, + "learning_rate": 8.467845836650667e-07, + "logits/chosen": -1.9729102849960327, + "logits/rejected": -1.8888013362884521, + "logps/chosen": -554.5764770507812, + "logps/rejected": -618.1435546875, + "loss": 0.3996, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3505473136901855, + "rewards/margins": 1.1294513940811157, + "rewards/rejected": -3.4799983501434326, + "step": 11570 + }, + { + "epoch": 0.76, + "learning_rate": 8.425057030858461e-07, + "logits/chosen": -1.491781234741211, + "logits/rejected": -1.6472505331039429, + "logps/chosen": -536.1841430664062, + "logps/rejected": -794.4387817382812, + "loss": 0.5567, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.124786376953125, + "rewards/margins": 1.557202696800232, + "rewards/rejected": -4.6819891929626465, + "step": 11580 + }, + { + "epoch": 0.76, + "learning_rate": 8.382354688088098e-07, + "logits/chosen": -2.0456268787384033, + "logits/rejected": -1.848305106163025, + "logps/chosen": -602.2803955078125, + "logps/rejected": -660.6411743164062, + "loss": 0.5364, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.787703275680542, + "rewards/margins": 0.9418042898178101, + "rewards/rejected": -3.7295074462890625, + "step": 11590 + }, + { + "epoch": 0.76, + "learning_rate": 8.33973903109594e-07, + "logits/chosen": -1.9836442470550537, + "logits/rejected": -1.7316744327545166, + "logps/chosen": -539.95654296875, + "logps/rejected": -645.2711181640625, + "loss": 0.5209, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5273334980010986, + "rewards/margins": 1.3982807397842407, + "rewards/rejected": -3.925614595413208, + "step": 11600 + }, + { + "epoch": 0.76, + "learning_rate": 8.297210282186102e-07, + "logits/chosen": -1.7507593631744385, + "logits/rejected": -1.5172091722488403, + "logps/chosen": -560.347412109375, + "logps/rejected": -643.906982421875, + "loss": 0.4369, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.6636176109313965, + "rewards/margins": 1.5707944631576538, + "rewards/rejected": -4.23441219329834, + "step": 11610 + }, + { + "epoch": 0.76, + "learning_rate": 8.254768663209397e-07, + "logits/chosen": -2.0388007164001465, + "logits/rejected": -2.022284984588623, + "logps/chosen": -560.9110107421875, + "logps/rejected": -649.5015869140625, + "loss": 0.4579, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4646971225738525, + "rewards/margins": 0.8786503672599792, + "rewards/rejected": -3.3433470726013184, + "step": 11620 + }, + { + "epoch": 0.76, + "learning_rate": 8.212414395562079e-07, + "logits/chosen": -1.997552514076233, + "logits/rejected": -1.9959675073623657, + "logps/chosen": -573.3985595703125, + "logps/rejected": -581.78759765625, + "loss": 0.5521, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.674731492996216, + "rewards/margins": 1.2291405200958252, + "rewards/rejected": -3.90387225151062, + "step": 11630 + }, + { + "epoch": 0.76, + "learning_rate": 8.170147700184775e-07, + "logits/chosen": -2.024963855743408, + "logits/rejected": -1.524887204170227, + "logps/chosen": -527.4414672851562, + "logps/rejected": -653.4882202148438, + "loss": 0.4484, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3810606002807617, + "rewards/margins": 1.7915737628936768, + "rewards/rejected": -4.172634124755859, + "step": 11640 + }, + { + "epoch": 0.76, + "learning_rate": 8.127968797561242e-07, + "logits/chosen": -1.9064247608184814, + "logits/rejected": -1.88960862159729, + "logps/chosen": -543.8555908203125, + "logps/rejected": -640.4848022460938, + "loss": 0.4843, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.717148542404175, + "rewards/margins": 1.105126142501831, + "rewards/rejected": -3.8222744464874268, + "step": 11650 + }, + { + "epoch": 0.76, + "learning_rate": 8.085877907717338e-07, + "logits/chosen": -1.9744573831558228, + "logits/rejected": -1.6091855764389038, + "logps/chosen": -551.3575439453125, + "logps/rejected": -611.7328491210938, + "loss": 0.5567, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.031090259552002, + "rewards/margins": 0.9378800392150879, + "rewards/rejected": -3.9689698219299316, + "step": 11660 + }, + { + "epoch": 0.76, + "learning_rate": 8.043875250219732e-07, + "logits/chosen": -2.105393886566162, + "logits/rejected": -2.0549044609069824, + "logps/chosen": -649.9873046875, + "logps/rejected": -710.1104125976562, + "loss": 0.6793, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.7803592681884766, + "rewards/margins": 0.6869893074035645, + "rewards/rejected": -3.46734881401062, + "step": 11670 + }, + { + "epoch": 0.76, + "learning_rate": 8.001961044174881e-07, + "logits/chosen": -2.1214375495910645, + "logits/rejected": -1.462120771408081, + "logps/chosen": -572.0277709960938, + "logps/rejected": -650.1475830078125, + "loss": 0.4588, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.2363853454589844, + "rewards/margins": 2.0137810707092285, + "rewards/rejected": -4.250166416168213, + "step": 11680 + }, + { + "epoch": 0.76, + "learning_rate": 7.960135508227795e-07, + "logits/chosen": -1.8443381786346436, + "logits/rejected": -1.9977306127548218, + "logps/chosen": -605.5724487304688, + "logps/rejected": -708.4366455078125, + "loss": 0.5162, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.791918992996216, + "rewards/margins": 1.0935614109039307, + "rewards/rejected": -3.8854804039001465, + "step": 11690 + }, + { + "epoch": 0.77, + "learning_rate": 7.91839886056098e-07, + "logits/chosen": -2.0267434120178223, + "logits/rejected": -1.9769598245620728, + "logps/chosen": -524.788330078125, + "logps/rejected": -591.4601440429688, + "loss": 0.511, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.600790500640869, + "rewards/margins": 0.8144540786743164, + "rewards/rejected": -3.4152445793151855, + "step": 11700 + }, + { + "epoch": 0.77, + "learning_rate": 7.876751318893217e-07, + "logits/chosen": -1.7773189544677734, + "logits/rejected": -1.4888404607772827, + "logps/chosen": -524.0817260742188, + "logps/rejected": -642.443359375, + "loss": 0.5246, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.8395049571990967, + "rewards/margins": 1.466451644897461, + "rewards/rejected": -4.305956840515137, + "step": 11710 + }, + { + "epoch": 0.77, + "learning_rate": 7.8351931004785e-07, + "logits/chosen": -1.8321616649627686, + "logits/rejected": -2.047708511352539, + "logps/chosen": -599.1519775390625, + "logps/rejected": -620.6925659179688, + "loss": 0.5871, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.9461781978607178, + "rewards/margins": 0.5298594236373901, + "rewards/rejected": -3.4760379791259766, + "step": 11720 + }, + { + "epoch": 0.77, + "learning_rate": 7.793724422104834e-07, + "logits/chosen": -1.9366436004638672, + "logits/rejected": -1.9429668188095093, + "logps/chosen": -535.4391479492188, + "logps/rejected": -641.5661010742188, + "loss": 0.4596, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.000441074371338, + "rewards/margins": 1.0180342197418213, + "rewards/rejected": -4.018475532531738, + "step": 11730 + }, + { + "epoch": 0.77, + "learning_rate": 7.752345500093184e-07, + "logits/chosen": -2.282815456390381, + "logits/rejected": -1.8461005687713623, + "logps/chosen": -655.4285888671875, + "logps/rejected": -604.0984497070312, + "loss": 0.584, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.1589279174804688, + "rewards/margins": 0.7532690763473511, + "rewards/rejected": -3.912196636199951, + "step": 11740 + }, + { + "epoch": 0.77, + "learning_rate": 7.711056550296253e-07, + "logits/chosen": -2.036350727081299, + "logits/rejected": -1.7405805587768555, + "logps/chosen": -718.2552490234375, + "logps/rejected": -759.4415283203125, + "loss": 0.4641, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1789045333862305, + "rewards/margins": 1.2369298934936523, + "rewards/rejected": -3.415834903717041, + "step": 11750 + }, + { + "epoch": 0.77, + "learning_rate": 7.669857788097445e-07, + "logits/chosen": -2.080662965774536, + "logits/rejected": -2.1915130615234375, + "logps/chosen": -586.1531982421875, + "logps/rejected": -603.7664794921875, + "loss": 0.4661, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6133437156677246, + "rewards/margins": 0.7611432671546936, + "rewards/rejected": -3.3744869232177734, + "step": 11760 + }, + { + "epoch": 0.77, + "learning_rate": 7.628749428409676e-07, + "logits/chosen": -2.1952221393585205, + "logits/rejected": -1.871019959449768, + "logps/chosen": -557.4973754882812, + "logps/rejected": -677.4476318359375, + "loss": 0.498, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.662010669708252, + "rewards/margins": 1.359654188156128, + "rewards/rejected": -4.021664619445801, + "step": 11770 + }, + { + "epoch": 0.77, + "learning_rate": 7.587731685674288e-07, + "logits/chosen": -1.7733513116836548, + "logits/rejected": -1.8451553583145142, + "logps/chosen": -589.5215454101562, + "logps/rejected": -735.2191772460938, + "loss": 0.3707, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.250026226043701, + "rewards/margins": 1.2411115169525146, + "rewards/rejected": -4.491137504577637, + "step": 11780 + }, + { + "epoch": 0.77, + "learning_rate": 7.546804773859931e-07, + "logits/chosen": -2.1263413429260254, + "logits/rejected": -2.1229681968688965, + "logps/chosen": -594.2049560546875, + "logps/rejected": -629.3919067382812, + "loss": 0.6908, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.963937759399414, + "rewards/margins": 0.38868817687034607, + "rewards/rejected": -3.352626085281372, + "step": 11790 + }, + { + "epoch": 0.77, + "learning_rate": 7.505968906461409e-07, + "logits/chosen": -2.091522455215454, + "logits/rejected": -2.1456050872802734, + "logps/chosen": -592.6693115234375, + "logps/rejected": -766.59765625, + "loss": 0.4894, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.7104580402374268, + "rewards/margins": 1.1917150020599365, + "rewards/rejected": -3.9021732807159424, + "step": 11800 + }, + { + "epoch": 0.77, + "learning_rate": 7.465224296498627e-07, + "logits/chosen": -2.224442958831787, + "logits/rejected": -1.8706302642822266, + "logps/chosen": -630.4420776367188, + "logps/rejected": -593.1500854492188, + "loss": 0.532, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5721988677978516, + "rewards/margins": 0.990858256816864, + "rewards/rejected": -3.5630574226379395, + "step": 11810 + }, + { + "epoch": 0.77, + "learning_rate": 7.424571156515412e-07, + "logits/chosen": -1.3914611339569092, + "logits/rejected": -1.3528521060943604, + "logps/chosen": -620.4548950195312, + "logps/rejected": -719.0347290039062, + "loss": 0.4782, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.7914605140686035, + "rewards/margins": 1.3912131786346436, + "rewards/rejected": -4.182673454284668, + "step": 11820 + }, + { + "epoch": 0.77, + "learning_rate": 7.38400969857847e-07, + "logits/chosen": -1.641289472579956, + "logits/rejected": -1.6610963344573975, + "logps/chosen": -525.3446655273438, + "logps/rejected": -587.3547973632812, + "loss": 0.5223, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.275611639022827, + "rewards/margins": 1.4554826021194458, + "rewards/rejected": -3.7310938835144043, + "step": 11830 + }, + { + "epoch": 0.77, + "learning_rate": 7.343540134276225e-07, + "logits/chosen": -2.371286630630493, + "logits/rejected": -1.728348970413208, + "logps/chosen": -612.7855834960938, + "logps/rejected": -588.4951171875, + "loss": 0.5313, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.828519821166992, + "rewards/margins": 0.8085176348686218, + "rewards/rejected": -3.6370372772216797, + "step": 11840 + }, + { + "epoch": 0.78, + "learning_rate": 7.303162674717762e-07, + "logits/chosen": -1.7673263549804688, + "logits/rejected": -1.8349840641021729, + "logps/chosen": -436.9541015625, + "logps/rejected": -561.4946899414062, + "loss": 0.4587, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.671754837036133, + "rewards/margins": 1.310455560684204, + "rewards/rejected": -3.982210636138916, + "step": 11850 + }, + { + "epoch": 0.78, + "learning_rate": 7.26287753053167e-07, + "logits/chosen": -1.8987102508544922, + "logits/rejected": -1.6133962869644165, + "logps/chosen": -660.6593017578125, + "logps/rejected": -785.08837890625, + "loss": 0.4893, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.577979564666748, + "rewards/margins": 0.7468835711479187, + "rewards/rejected": -4.324862957000732, + "step": 11860 + }, + { + "epoch": 0.78, + "learning_rate": 7.222684911865013e-07, + "logits/chosen": -2.0908069610595703, + "logits/rejected": -2.007396697998047, + "logps/chosen": -521.2072143554688, + "logps/rejected": -706.6531982421875, + "loss": 0.5221, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.3962066173553467, + "rewards/margins": 1.5858217477798462, + "rewards/rejected": -3.9820282459259033, + "step": 11870 + }, + { + "epoch": 0.78, + "learning_rate": 7.182585028382166e-07, + "logits/chosen": -2.124596357345581, + "logits/rejected": -1.9639291763305664, + "logps/chosen": -477.7953186035156, + "logps/rejected": -597.7669677734375, + "loss": 0.4659, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0256547927856445, + "rewards/margins": 1.4007855653762817, + "rewards/rejected": -3.426440477371216, + "step": 11880 + }, + { + "epoch": 0.78, + "learning_rate": 7.142578089263769e-07, + "logits/chosen": -1.9041650295257568, + "logits/rejected": -1.9842296838760376, + "logps/chosen": -532.4962158203125, + "logps/rejected": -559.984375, + "loss": 0.5181, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.582383632659912, + "rewards/margins": 1.0075069665908813, + "rewards/rejected": -3.589890718460083, + "step": 11890 + }, + { + "epoch": 0.78, + "learning_rate": 7.102664303205611e-07, + "logits/chosen": -1.924194574356079, + "logits/rejected": -2.085660457611084, + "logps/chosen": -715.51025390625, + "logps/rejected": -694.806884765625, + "loss": 0.4882, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.911158323287964, + "rewards/margins": 0.8436439633369446, + "rewards/rejected": -3.754802703857422, + "step": 11900 + }, + { + "epoch": 0.78, + "learning_rate": 7.062843878417566e-07, + "logits/chosen": -1.6597715616226196, + "logits/rejected": -1.526272177696228, + "logps/chosen": -548.4642333984375, + "logps/rejected": -670.6319580078125, + "loss": 0.5269, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.9917690753936768, + "rewards/margins": 1.377164363861084, + "rewards/rejected": -4.36893367767334, + "step": 11910 + }, + { + "epoch": 0.78, + "learning_rate": 7.023117022622458e-07, + "logits/chosen": -2.1705479621887207, + "logits/rejected": -1.7469104528427124, + "logps/chosen": -606.1336059570312, + "logps/rejected": -674.3630981445312, + "loss": 0.6392, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.6440083980560303, + "rewards/margins": 0.7526111006736755, + "rewards/rejected": -3.3966193199157715, + "step": 11920 + }, + { + "epoch": 0.78, + "learning_rate": 6.983483943055042e-07, + "logits/chosen": -2.0007176399230957, + "logits/rejected": -2.0033366680145264, + "logps/chosen": -632.1116943359375, + "logps/rejected": -632.0157470703125, + "loss": 0.399, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.597093105316162, + "rewards/margins": 0.9765065312385559, + "rewards/rejected": -3.5736000537872314, + "step": 11930 + }, + { + "epoch": 0.78, + "learning_rate": 6.943944846460859e-07, + "logits/chosen": -2.1100950241088867, + "logits/rejected": -2.160198211669922, + "logps/chosen": -613.911865234375, + "logps/rejected": -709.337890625, + "loss": 0.5362, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8297903537750244, + "rewards/margins": 1.0146056413650513, + "rewards/rejected": -3.8443961143493652, + "step": 11940 + }, + { + "epoch": 0.78, + "learning_rate": 6.904499939095225e-07, + "logits/chosen": -2.1457552909851074, + "logits/rejected": -1.9467493295669556, + "logps/chosen": -667.1052856445312, + "logps/rejected": -671.9146728515625, + "loss": 0.4131, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.0337657928466797, + "rewards/margins": 1.3421456813812256, + "rewards/rejected": -4.375911712646484, + "step": 11950 + }, + { + "epoch": 0.78, + "learning_rate": 6.865149426722079e-07, + "logits/chosen": -1.936374306678772, + "logits/rejected": -1.9775865077972412, + "logps/chosen": -548.2598876953125, + "logps/rejected": -707.3658447265625, + "loss": 0.6487, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3087007999420166, + "rewards/margins": 1.3016471862792969, + "rewards/rejected": -3.6103482246398926, + "step": 11960 + }, + { + "epoch": 0.78, + "learning_rate": 6.825893514612985e-07, + "logits/chosen": -2.113210439682007, + "logits/rejected": -1.7311427593231201, + "logps/chosen": -580.7791748046875, + "logps/rejected": -764.8864135742188, + "loss": 0.634, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5090651512145996, + "rewards/margins": 0.9025558233261108, + "rewards/rejected": -3.41162109375, + "step": 11970 + }, + { + "epoch": 0.78, + "learning_rate": 6.786732407546001e-07, + "logits/chosen": -1.7111217975616455, + "logits/rejected": -1.5820856094360352, + "logps/chosen": -530.7203369140625, + "logps/rejected": -689.3048706054688, + "loss": 0.5514, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.502312421798706, + "rewards/margins": 1.546095848083496, + "rewards/rejected": -4.048408508300781, + "step": 11980 + }, + { + "epoch": 0.78, + "learning_rate": 6.747666309804654e-07, + "logits/chosen": -2.161839246749878, + "logits/rejected": -2.282132625579834, + "logps/chosen": -562.6868286132812, + "logps/rejected": -631.7789916992188, + "loss": 0.4637, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6618592739105225, + "rewards/margins": 0.9789729118347168, + "rewards/rejected": -3.6408324241638184, + "step": 11990 + }, + { + "epoch": 0.79, + "learning_rate": 6.708695425176831e-07, + "logits/chosen": -2.36330509185791, + "logits/rejected": -1.784592866897583, + "logps/chosen": -543.343017578125, + "logps/rejected": -702.59619140625, + "loss": 0.4636, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4326610565185547, + "rewards/margins": 1.4380266666412354, + "rewards/rejected": -3.870687484741211, + "step": 12000 + }, + { + "epoch": 0.79, + "eval_logits/chosen": -2.010216236114502, + "eval_logits/rejected": -1.832584023475647, + "eval_logps/chosen": -559.8972778320312, + "eval_logps/rejected": -653.1429443359375, + "eval_loss": 0.5347319841384888, + "eval_rewards/accuracies": 0.7450000047683716, + "eval_rewards/chosen": -2.644517183303833, + "eval_rewards/margins": 1.1328535079956055, + "eval_rewards/rejected": -3.7773704528808594, + "eval_runtime": 464.7985, + "eval_samples_per_second": 4.303, + "eval_steps_per_second": 2.151, + "step": 12000 + }, + { + "epoch": 0.79, + "learning_rate": 6.669819956953768e-07, + "logits/chosen": -2.558680295944214, + "logits/rejected": -2.0116984844207764, + "logps/chosen": -648.0056762695312, + "logps/rejected": -632.8907470703125, + "loss": 0.4691, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4394781589508057, + "rewards/margins": 1.159771203994751, + "rewards/rejected": -3.5992493629455566, + "step": 12010 + }, + { + "epoch": 0.79, + "learning_rate": 6.631040107928957e-07, + "logits/chosen": -2.110262632369995, + "logits/rejected": -1.9674198627471924, + "logps/chosen": -613.672119140625, + "logps/rejected": -663.4327392578125, + "loss": 0.4748, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3904502391815186, + "rewards/margins": 1.1840265989303589, + "rewards/rejected": -3.574476718902588, + "step": 12020 + }, + { + "epoch": 0.79, + "learning_rate": 6.592356080397072e-07, + "logits/chosen": -1.972866415977478, + "logits/rejected": -1.4877679347991943, + "logps/chosen": -497.69036865234375, + "logps/rejected": -569.5394287109375, + "loss": 0.4793, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.0033771991729736, + "rewards/margins": 1.077380895614624, + "rewards/rejected": -4.080758094787598, + "step": 12030 + }, + { + "epoch": 0.79, + "learning_rate": 6.553768076152963e-07, + "logits/chosen": -2.2271311283111572, + "logits/rejected": -1.976681113243103, + "logps/chosen": -623.8030395507812, + "logps/rejected": -777.1211547851562, + "loss": 0.5016, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.64901065826416, + "rewards/margins": 1.1936619281768799, + "rewards/rejected": -3.8426718711853027, + "step": 12040 + }, + { + "epoch": 0.79, + "learning_rate": 6.51527629649055e-07, + "logits/chosen": -1.7846431732177734, + "logits/rejected": -1.7610366344451904, + "logps/chosen": -601.7432861328125, + "logps/rejected": -581.0494384765625, + "loss": 0.5333, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.1557106971740723, + "rewards/margins": 1.2338597774505615, + "rewards/rejected": -3.389570713043213, + "step": 12050 + }, + { + "epoch": 0.79, + "learning_rate": 6.476880942201824e-07, + "logits/chosen": -2.2647032737731934, + "logits/rejected": -1.8640931844711304, + "logps/chosen": -515.746337890625, + "logps/rejected": -597.1239013671875, + "loss": 0.5296, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.493758201599121, + "rewards/margins": 1.0930613279342651, + "rewards/rejected": -3.5868194103240967, + "step": 12060 + }, + { + "epoch": 0.79, + "learning_rate": 6.438582213575748e-07, + "logits/chosen": -1.9803247451782227, + "logits/rejected": -1.8715168237686157, + "logps/chosen": -485.79559326171875, + "logps/rejected": -644.2310791015625, + "loss": 0.3128, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.364370584487915, + "rewards/margins": 1.6224912405014038, + "rewards/rejected": -3.9868621826171875, + "step": 12070 + }, + { + "epoch": 0.79, + "learning_rate": 6.400380310397267e-07, + "logits/chosen": -2.011253833770752, + "logits/rejected": -1.855588674545288, + "logps/chosen": -585.7911987304688, + "logps/rejected": -683.5035400390625, + "loss": 0.5284, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.2106995582580566, + "rewards/margins": 0.894027590751648, + "rewards/rejected": -3.104727029800415, + "step": 12080 + }, + { + "epoch": 0.79, + "learning_rate": 6.362275431946202e-07, + "logits/chosen": -1.5855903625488281, + "logits/rejected": -2.165252923965454, + "logps/chosen": -506.32354736328125, + "logps/rejected": -659.8756103515625, + "loss": 0.4614, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.271911144256592, + "rewards/margins": 1.0305030345916748, + "rewards/rejected": -4.3024139404296875, + "step": 12090 + }, + { + "epoch": 0.79, + "learning_rate": 6.324267776996285e-07, + "logits/chosen": -1.8862168788909912, + "logits/rejected": -1.7354240417480469, + "logps/chosen": -625.648681640625, + "logps/rejected": -629.8529052734375, + "loss": 0.5204, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6771509647369385, + "rewards/margins": 0.4423960745334625, + "rewards/rejected": -3.1195473670959473, + "step": 12100 + }, + { + "epoch": 0.79, + "learning_rate": 6.286357543814045e-07, + "logits/chosen": -2.4290318489074707, + "logits/rejected": -2.139944553375244, + "logps/chosen": -536.7593994140625, + "logps/rejected": -699.2761840820312, + "loss": 0.5809, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.0974652767181396, + "rewards/margins": 0.9861791729927063, + "rewards/rejected": -3.083644390106201, + "step": 12110 + }, + { + "epoch": 0.79, + "learning_rate": 6.248544930157838e-07, + "logits/chosen": -2.145630121231079, + "logits/rejected": -1.9923956394195557, + "logps/chosen": -544.7433471679688, + "logps/rejected": -650.2592163085938, + "loss": 0.5544, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7388195991516113, + "rewards/margins": 1.1529513597488403, + "rewards/rejected": -3.8917713165283203, + "step": 12120 + }, + { + "epoch": 0.79, + "learning_rate": 6.21083013327678e-07, + "logits/chosen": -1.69477117061615, + "logits/rejected": -2.0762622356414795, + "logps/chosen": -535.0198364257812, + "logps/rejected": -640.6212158203125, + "loss": 0.3952, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.796827793121338, + "rewards/margins": 1.0736668109893799, + "rewards/rejected": -3.8704943656921387, + "step": 12130 + }, + { + "epoch": 0.79, + "learning_rate": 6.17321334990973e-07, + "logits/chosen": -1.894301176071167, + "logits/rejected": -2.067835807800293, + "logps/chosen": -597.6411743164062, + "logps/rejected": -640.9744262695312, + "loss": 0.5247, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.738687753677368, + "rewards/margins": 1.0224624872207642, + "rewards/rejected": -3.761150360107422, + "step": 12140 + }, + { + "epoch": 0.79, + "learning_rate": 6.135694776284243e-07, + "logits/chosen": -2.353419780731201, + "logits/rejected": -1.988925576210022, + "logps/chosen": -696.5867309570312, + "logps/rejected": -627.3082885742188, + "loss": 0.5669, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.1813197135925293, + "rewards/margins": 0.7509251832962036, + "rewards/rejected": -2.9322447776794434, + "step": 12150 + }, + { + "epoch": 0.8, + "learning_rate": 6.098274608115595e-07, + "logits/chosen": -1.694628357887268, + "logits/rejected": -1.076794981956482, + "logps/chosen": -584.422119140625, + "logps/rejected": -619.1571044921875, + "loss": 0.3698, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.3269131183624268, + "rewards/margins": 1.1638798713684082, + "rewards/rejected": -4.490792751312256, + "step": 12160 + }, + { + "epoch": 0.8, + "learning_rate": 6.060953040605697e-07, + "logits/chosen": -1.6705732345581055, + "logits/rejected": -1.7893667221069336, + "logps/chosen": -557.1876220703125, + "logps/rejected": -576.4094848632812, + "loss": 0.6879, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.9966461658477783, + "rewards/margins": 0.26419544219970703, + "rewards/rejected": -3.2608418464660645, + "step": 12170 + }, + { + "epoch": 0.8, + "learning_rate": 6.023730268442144e-07, + "logits/chosen": -1.9730228185653687, + "logits/rejected": -1.8565328121185303, + "logps/chosen": -528.3025512695312, + "logps/rejected": -596.0540771484375, + "loss": 0.4897, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5707285404205322, + "rewards/margins": 1.0148826837539673, + "rewards/rejected": -3.585610866546631, + "step": 12180 + }, + { + "epoch": 0.8, + "learning_rate": 5.986606485797131e-07, + "logits/chosen": -2.4206955432891846, + "logits/rejected": -2.0519917011260986, + "logps/chosen": -650.331787109375, + "logps/rejected": -709.4451293945312, + "loss": 0.5735, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7515320777893066, + "rewards/margins": 1.218911051750183, + "rewards/rejected": -3.9704430103302, + "step": 12190 + }, + { + "epoch": 0.8, + "learning_rate": 5.949581886326511e-07, + "logits/chosen": -2.2249879837036133, + "logits/rejected": -2.350133180618286, + "logps/chosen": -580.1309814453125, + "logps/rejected": -681.6095581054688, + "loss": 0.6241, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.244213342666626, + "rewards/margins": 1.0740654468536377, + "rewards/rejected": -3.3182787895202637, + "step": 12200 + }, + { + "epoch": 0.8, + "learning_rate": 5.912656663168717e-07, + "logits/chosen": -1.710314154624939, + "logits/rejected": -1.9418649673461914, + "logps/chosen": -567.335205078125, + "logps/rejected": -693.5281982421875, + "loss": 0.5183, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.8011791706085205, + "rewards/margins": 0.9482982754707336, + "rewards/rejected": -3.7494773864746094, + "step": 12210 + }, + { + "epoch": 0.8, + "learning_rate": 5.875831008943817e-07, + "logits/chosen": -1.989551305770874, + "logits/rejected": -2.0580105781555176, + "logps/chosen": -510.07159423828125, + "logps/rejected": -602.3060302734375, + "loss": 0.5503, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1793808937072754, + "rewards/margins": 1.047814965248108, + "rewards/rejected": -3.227196216583252, + "step": 12220 + }, + { + "epoch": 0.8, + "learning_rate": 5.839105115752442e-07, + "logits/chosen": -2.4286434650421143, + "logits/rejected": -2.187541961669922, + "logps/chosen": -653.9439086914062, + "logps/rejected": -668.8286743164062, + "loss": 0.58, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.599398612976074, + "rewards/margins": 0.5656057596206665, + "rewards/rejected": -3.165004253387451, + "step": 12230 + }, + { + "epoch": 0.8, + "learning_rate": 5.802479175174855e-07, + "logits/chosen": -2.245690107345581, + "logits/rejected": -1.6115894317626953, + "logps/chosen": -503.63055419921875, + "logps/rejected": -713.26513671875, + "loss": 0.4681, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.519134521484375, + "rewards/margins": 1.466404914855957, + "rewards/rejected": -3.985539197921753, + "step": 12240 + }, + { + "epoch": 0.8, + "learning_rate": 5.765953378269901e-07, + "logits/chosen": -1.9824949502944946, + "logits/rejected": -2.0004501342773438, + "logps/chosen": -565.99755859375, + "logps/rejected": -666.8563232421875, + "loss": 0.7208, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.5569775104522705, + "rewards/margins": 0.556684136390686, + "rewards/rejected": -3.113661766052246, + "step": 12250 + }, + { + "epoch": 0.8, + "learning_rate": 5.729527915574037e-07, + "logits/chosen": -2.390829086303711, + "logits/rejected": -1.928675651550293, + "logps/chosen": -580.5411376953125, + "logps/rejected": -638.8826293945312, + "loss": 0.4666, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.4753901958465576, + "rewards/margins": 1.4707015752792358, + "rewards/rejected": -3.946091413497925, + "step": 12260 + }, + { + "epoch": 0.8, + "learning_rate": 5.693202977100304e-07, + "logits/chosen": -1.7291290760040283, + "logits/rejected": -2.0605156421661377, + "logps/chosen": -541.4114379882812, + "logps/rejected": -680.072998046875, + "loss": 0.4824, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.732806921005249, + "rewards/margins": 1.2888050079345703, + "rewards/rejected": -4.021612167358398, + "step": 12270 + }, + { + "epoch": 0.8, + "learning_rate": 5.656978752337389e-07, + "logits/chosen": -1.7255207300186157, + "logits/rejected": -2.1508889198303223, + "logps/chosen": -496.97125244140625, + "logps/rejected": -799.4225463867188, + "loss": 0.6514, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.5759778022766113, + "rewards/margins": 1.273808240890503, + "rewards/rejected": -3.8497862815856934, + "step": 12280 + }, + { + "epoch": 0.8, + "learning_rate": 5.620855430248581e-07, + "logits/chosen": -1.9934139251708984, + "logits/rejected": -1.6731551885604858, + "logps/chosen": -532.9713745117188, + "logps/rejected": -620.8679809570312, + "loss": 0.49, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.7751731872558594, + "rewards/margins": 0.825658917427063, + "rewards/rejected": -3.6008315086364746, + "step": 12290 + }, + { + "epoch": 0.8, + "learning_rate": 5.584833199270837e-07, + "logits/chosen": -1.892397165298462, + "logits/rejected": -1.9254367351531982, + "logps/chosen": -442.7142639160156, + "logps/rejected": -661.3265380859375, + "loss": 0.4687, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.443904399871826, + "rewards/margins": 1.6627155542373657, + "rewards/rejected": -4.1066203117370605, + "step": 12300 + }, + { + "epoch": 0.81, + "learning_rate": 5.548912247313742e-07, + "logits/chosen": -1.9033715724945068, + "logits/rejected": -1.8007004261016846, + "logps/chosen": -531.8680419921875, + "logps/rejected": -670.9957275390625, + "loss": 0.5136, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.654646396636963, + "rewards/margins": 1.2657206058502197, + "rewards/rejected": -3.9203670024871826, + "step": 12310 + }, + { + "epoch": 0.81, + "learning_rate": 5.513092761758596e-07, + "logits/chosen": -2.130232334136963, + "logits/rejected": -1.8414499759674072, + "logps/chosen": -601.0474243164062, + "logps/rejected": -640.6214599609375, + "loss": 0.4741, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.4780101776123047, + "rewards/margins": 0.6366981863975525, + "rewards/rejected": -3.114708423614502, + "step": 12320 + }, + { + "epoch": 0.81, + "learning_rate": 5.477374929457363e-07, + "logits/chosen": -2.0072124004364014, + "logits/rejected": -2.020477056503296, + "logps/chosen": -496.66497802734375, + "logps/rejected": -574.7835693359375, + "loss": 0.4712, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.09798526763916, + "rewards/margins": 1.52622389793396, + "rewards/rejected": -3.624208927154541, + "step": 12330 + }, + { + "epoch": 0.81, + "learning_rate": 5.441758936731772e-07, + "logits/chosen": -1.8541311025619507, + "logits/rejected": -1.9003242254257202, + "logps/chosen": -584.4236450195312, + "logps/rejected": -690.5775146484375, + "loss": 0.6396, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6013457775115967, + "rewards/margins": 1.170657992362976, + "rewards/rejected": -3.7720043659210205, + "step": 12340 + }, + { + "epoch": 0.81, + "learning_rate": 5.406244969372273e-07, + "logits/chosen": -2.2977888584136963, + "logits/rejected": -2.159036636352539, + "logps/chosen": -559.5189208984375, + "logps/rejected": -704.7296752929688, + "loss": 0.4578, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.185288906097412, + "rewards/margins": 1.2700397968292236, + "rewards/rejected": -3.4553287029266357, + "step": 12350 + }, + { + "epoch": 0.81, + "learning_rate": 5.370833212637122e-07, + "logits/chosen": -1.5719448328018188, + "logits/rejected": -1.7477598190307617, + "logps/chosen": -432.41937255859375, + "logps/rejected": -650.3663330078125, + "loss": 0.618, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.04536509513855, + "rewards/margins": 0.9212772250175476, + "rewards/rejected": -3.966642379760742, + "step": 12360 + }, + { + "epoch": 0.81, + "learning_rate": 5.335523851251392e-07, + "logits/chosen": -2.318669319152832, + "logits/rejected": -1.7633874416351318, + "logps/chosen": -623.6515502929688, + "logps/rejected": -614.4036865234375, + "loss": 0.4593, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.4246182441711426, + "rewards/margins": 1.146517038345337, + "rewards/rejected": -3.5711350440979004, + "step": 12370 + }, + { + "epoch": 0.81, + "learning_rate": 5.300317069406003e-07, + "logits/chosen": -1.8068931102752686, + "logits/rejected": -1.7986596822738647, + "logps/chosen": -674.0821533203125, + "logps/rejected": -612.6988525390625, + "loss": 0.5668, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.4161953926086426, + "rewards/margins": 0.4039956033229828, + "rewards/rejected": -3.8201911449432373, + "step": 12380 + }, + { + "epoch": 0.81, + "learning_rate": 5.265213050756782e-07, + "logits/chosen": -2.1107888221740723, + "logits/rejected": -2.1840109825134277, + "logps/chosen": -764.8612060546875, + "logps/rejected": -786.1771240234375, + "loss": 0.3672, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.1414551734924316, + "rewards/margins": 1.0852000713348389, + "rewards/rejected": -4.226655006408691, + "step": 12390 + }, + { + "epoch": 0.81, + "learning_rate": 5.230211978423477e-07, + "logits/chosen": -1.7935606241226196, + "logits/rejected": -2.1159043312072754, + "logps/chosen": -557.9053955078125, + "logps/rejected": -678.38525390625, + "loss": 0.5979, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.4887566566467285, + "rewards/margins": 1.6448196172714233, + "rewards/rejected": -4.133576393127441, + "step": 12400 + }, + { + "epoch": 0.81, + "learning_rate": 5.195314034988835e-07, + "logits/chosen": -1.6798433065414429, + "logits/rejected": -1.9338346719741821, + "logps/chosen": -492.1327209472656, + "logps/rejected": -591.898681640625, + "loss": 0.6946, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.843254804611206, + "rewards/margins": 0.31330960988998413, + "rewards/rejected": -3.156564712524414, + "step": 12410 + }, + { + "epoch": 0.81, + "learning_rate": 5.160519402497616e-07, + "logits/chosen": -2.2170321941375732, + "logits/rejected": -1.2710682153701782, + "logps/chosen": -694.8778076171875, + "logps/rejected": -638.19287109375, + "loss": 0.3549, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.887610912322998, + "rewards/margins": 1.2259972095489502, + "rewards/rejected": -4.113608360290527, + "step": 12420 + }, + { + "epoch": 0.81, + "learning_rate": 5.125828262455679e-07, + "logits/chosen": -1.9972445964813232, + "logits/rejected": -1.5846986770629883, + "logps/chosen": -585.0384521484375, + "logps/rejected": -653.4993896484375, + "loss": 0.5703, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0784976482391357, + "rewards/margins": 1.2749563455581665, + "rewards/rejected": -3.3534538745880127, + "step": 12430 + }, + { + "epoch": 0.81, + "learning_rate": 5.091240795828992e-07, + "logits/chosen": -2.2598729133605957, + "logits/rejected": -2.0431060791015625, + "logps/chosen": -642.5684814453125, + "logps/rejected": -509.0912170410156, + "loss": 0.794, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.7314934730529785, + "rewards/margins": 0.42480888962745667, + "rewards/rejected": -3.156301975250244, + "step": 12440 + }, + { + "epoch": 0.81, + "learning_rate": 5.056757183042732e-07, + "logits/chosen": -2.065981388092041, + "logits/rejected": -1.5648051500320435, + "logps/chosen": -562.4705810546875, + "logps/rejected": -604.2764892578125, + "loss": 0.6376, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.8458163738250732, + "rewards/margins": 0.7787143588066101, + "rewards/rejected": -3.624530792236328, + "step": 12450 + }, + { + "epoch": 0.82, + "learning_rate": 5.022377603980308e-07, + "logits/chosen": -1.9168132543563843, + "logits/rejected": -1.6493221521377563, + "logps/chosen": -689.4484252929688, + "logps/rejected": -724.8370361328125, + "loss": 0.4035, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.7570643424987793, + "rewards/margins": 1.5779832601547241, + "rewards/rejected": -4.335047721862793, + "step": 12460 + }, + { + "epoch": 0.82, + "learning_rate": 4.988102237982454e-07, + "logits/chosen": -1.7574495077133179, + "logits/rejected": -2.05273699760437, + "logps/chosen": -525.7030029296875, + "logps/rejected": -648.1375732421875, + "loss": 0.4259, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.600522994995117, + "rewards/margins": 1.2422122955322266, + "rewards/rejected": -3.8427352905273438, + "step": 12470 + }, + { + "epoch": 0.82, + "learning_rate": 4.953931263846251e-07, + "logits/chosen": -2.0417659282684326, + "logits/rejected": -1.957859754562378, + "logps/chosen": -608.4056396484375, + "logps/rejected": -740.5634155273438, + "loss": 0.5116, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.7717933654785156, + "rewards/margins": 0.4688115119934082, + "rewards/rejected": -3.240605115890503, + "step": 12480 + }, + { + "epoch": 0.82, + "learning_rate": 4.919864859824266e-07, + "logits/chosen": -1.8863499164581299, + "logits/rejected": -1.8030261993408203, + "logps/chosen": -553.4390869140625, + "logps/rejected": -671.4093627929688, + "loss": 0.5524, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.732145071029663, + "rewards/margins": 1.1588938236236572, + "rewards/rejected": -3.8910393714904785, + "step": 12490 + }, + { + "epoch": 0.82, + "learning_rate": 4.885903203623532e-07, + "logits/chosen": -1.9563652276992798, + "logits/rejected": -1.9688422679901123, + "logps/chosen": -550.7012939453125, + "logps/rejected": -624.7869873046875, + "loss": 0.4811, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5207536220550537, + "rewards/margins": 0.9616984128952026, + "rewards/rejected": -3.482451915740967, + "step": 12500 + }, + { + "epoch": 0.82, + "learning_rate": 4.852046472404695e-07, + "logits/chosen": -2.0069515705108643, + "logits/rejected": -1.915759801864624, + "logps/chosen": -574.3663330078125, + "logps/rejected": -675.5836791992188, + "loss": 0.5176, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.898287534713745, + "rewards/margins": 1.1114394664764404, + "rewards/rejected": -4.0097270011901855, + "step": 12510 + }, + { + "epoch": 0.82, + "learning_rate": 4.818294842781035e-07, + "logits/chosen": -1.8245235681533813, + "logits/rejected": -1.6396923065185547, + "logps/chosen": -511.2354431152344, + "logps/rejected": -533.2288818359375, + "loss": 0.5954, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.424683094024658, + "rewards/margins": 0.762967586517334, + "rewards/rejected": -3.187650442123413, + "step": 12520 + }, + { + "epoch": 0.82, + "learning_rate": 4.784648490817601e-07, + "logits/chosen": -2.390002727508545, + "logits/rejected": -1.4948108196258545, + "logps/chosen": -599.1458740234375, + "logps/rejected": -640.5682373046875, + "loss": 0.5618, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.689018726348877, + "rewards/margins": 1.4242786169052124, + "rewards/rejected": -4.113297462463379, + "step": 12530 + }, + { + "epoch": 0.82, + "learning_rate": 4.751107592030235e-07, + "logits/chosen": -2.0932703018188477, + "logits/rejected": -1.7733566761016846, + "logps/chosen": -610.6126708984375, + "logps/rejected": -719.3922119140625, + "loss": 0.5577, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.9133660793304443, + "rewards/margins": 1.0601561069488525, + "rewards/rejected": -3.9735217094421387, + "step": 12540 + }, + { + "epoch": 0.82, + "learning_rate": 4.717672321384703e-07, + "logits/chosen": -1.5202213525772095, + "logits/rejected": -1.974769949913025, + "logps/chosen": -458.13836669921875, + "logps/rejected": -612.10986328125, + "loss": 0.4942, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.4996895790100098, + "rewards/margins": 1.1478599309921265, + "rewards/rejected": -3.6475493907928467, + "step": 12550 + }, + { + "epoch": 0.82, + "learning_rate": 4.684342853295748e-07, + "logits/chosen": -1.9798561334609985, + "logits/rejected": -1.7142130136489868, + "logps/chosen": -574.4808349609375, + "logps/rejected": -703.8031616210938, + "loss": 0.5167, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6047985553741455, + "rewards/margins": 1.0268545150756836, + "rewards/rejected": -3.6316535472869873, + "step": 12560 + }, + { + "epoch": 0.82, + "learning_rate": 4.651119361626213e-07, + "logits/chosen": -2.03121018409729, + "logits/rejected": -1.7948925495147705, + "logps/chosen": -556.2425537109375, + "logps/rejected": -572.4705810546875, + "loss": 0.6544, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.823820114135742, + "rewards/margins": 0.2980334162712097, + "rewards/rejected": -3.1218535900115967, + "step": 12570 + }, + { + "epoch": 0.82, + "learning_rate": 4.618002019686091e-07, + "logits/chosen": -2.1452739238739014, + "logits/rejected": -1.3949072360992432, + "logps/chosen": -565.899169921875, + "logps/rejected": -617.302734375, + "loss": 0.4045, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.528507947921753, + "rewards/margins": 1.3577812910079956, + "rewards/rejected": -3.886289596557617, + "step": 12580 + }, + { + "epoch": 0.82, + "learning_rate": 4.5849910002316757e-07, + "logits/chosen": -2.036592483520508, + "logits/rejected": -2.1171741485595703, + "logps/chosen": -571.2657470703125, + "logps/rejected": -675.0497436523438, + "loss": 0.5223, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.716240644454956, + "rewards/margins": 1.0484824180603027, + "rewards/rejected": -3.764723300933838, + "step": 12590 + }, + { + "epoch": 0.82, + "learning_rate": 4.5520864754645984e-07, + "logits/chosen": -2.0893242359161377, + "logits/rejected": -2.1231119632720947, + "logps/chosen": -479.87066650390625, + "logps/rejected": -635.230224609375, + "loss": 0.5501, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6188578605651855, + "rewards/margins": 0.9830479621887207, + "rewards/rejected": -3.6019058227539062, + "step": 12600 + }, + { + "epoch": 0.83, + "learning_rate": 4.5192886170309896e-07, + "logits/chosen": -1.8651552200317383, + "logits/rejected": -1.6246029138565063, + "logps/chosen": -573.304931640625, + "logps/rejected": -564.4774169921875, + "loss": 0.5632, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.2361462116241455, + "rewards/margins": 0.16420508921146393, + "rewards/rejected": -3.4003512859344482, + "step": 12610 + }, + { + "epoch": 0.83, + "learning_rate": 4.486597596020548e-07, + "logits/chosen": -1.7377746105194092, + "logits/rejected": -1.476535439491272, + "logps/chosen": -492.79290771484375, + "logps/rejected": -609.2454833984375, + "loss": 0.559, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.638640880584717, + "rewards/margins": 1.4015352725982666, + "rewards/rejected": -4.040175914764404, + "step": 12620 + }, + { + "epoch": 0.83, + "learning_rate": 4.454013582965644e-07, + "logits/chosen": -1.9276511669158936, + "logits/rejected": -1.6506223678588867, + "logps/chosen": -475.0560607910156, + "logps/rejected": -581.9962158203125, + "loss": 0.457, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.385908365249634, + "rewards/margins": 1.3607556819915771, + "rewards/rejected": -3.746664047241211, + "step": 12630 + }, + { + "epoch": 0.83, + "learning_rate": 4.4215367478404605e-07, + "logits/chosen": -1.850710153579712, + "logits/rejected": -1.9889118671417236, + "logps/chosen": -599.2052001953125, + "logps/rejected": -740.7207641601562, + "loss": 0.4639, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.972484827041626, + "rewards/margins": 1.1469919681549072, + "rewards/rejected": -4.119476795196533, + "step": 12640 + }, + { + "epoch": 0.83, + "learning_rate": 4.389167260060068e-07, + "logits/chosen": -2.1288907527923584, + "logits/rejected": -1.8164348602294922, + "logps/chosen": -669.3413696289062, + "logps/rejected": -723.665283203125, + "loss": 0.2786, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.3347713947296143, + "rewards/margins": 1.8705580234527588, + "rewards/rejected": -4.205329418182373, + "step": 12650 + }, + { + "epoch": 0.83, + "learning_rate": 4.356905288479579e-07, + "logits/chosen": -1.9049745798110962, + "logits/rejected": -2.047344207763672, + "logps/chosen": -539.6206665039062, + "logps/rejected": -662.4989013671875, + "loss": 0.5003, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6522750854492188, + "rewards/margins": 1.5446138381958008, + "rewards/rejected": -4.1968889236450195, + "step": 12660 + }, + { + "epoch": 0.83, + "learning_rate": 4.3247510013932377e-07, + "logits/chosen": -1.9219976663589478, + "logits/rejected": -1.8390038013458252, + "logps/chosen": -500.98199462890625, + "logps/rejected": -563.2193603515625, + "loss": 0.6404, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.376319646835327, + "rewards/margins": 0.6220875978469849, + "rewards/rejected": -2.9984073638916016, + "step": 12670 + }, + { + "epoch": 0.83, + "learning_rate": 4.2927045665335594e-07, + "logits/chosen": -1.747137427330017, + "logits/rejected": -1.8025964498519897, + "logps/chosen": -678.0807495117188, + "logps/rejected": -724.901611328125, + "loss": 0.5937, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.0370450019836426, + "rewards/margins": 0.9841930270195007, + "rewards/rejected": -4.021238327026367, + "step": 12680 + }, + { + "epoch": 0.83, + "learning_rate": 4.260766151070439e-07, + "logits/chosen": -1.9573780298233032, + "logits/rejected": -1.9837071895599365, + "logps/chosen": -629.7701416015625, + "logps/rejected": -728.9971313476562, + "loss": 0.4825, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.3575403690338135, + "rewards/margins": 1.7600396871566772, + "rewards/rejected": -4.117579460144043, + "step": 12690 + }, + { + "epoch": 0.83, + "learning_rate": 4.228935921610308e-07, + "logits/chosen": -2.112596035003662, + "logits/rejected": -2.3389065265655518, + "logps/chosen": -511.53118896484375, + "logps/rejected": -678.299072265625, + "loss": 0.6924, + "rewards/accuracies": 0.5, + "rewards/chosen": -2.821420669555664, + "rewards/margins": 0.20472578704357147, + "rewards/rejected": -3.026146650314331, + "step": 12700 + }, + { + "epoch": 0.83, + "learning_rate": 4.1972140441952246e-07, + "logits/chosen": -2.140105962753296, + "logits/rejected": -1.904766321182251, + "logps/chosen": -576.4169311523438, + "logps/rejected": -638.1660766601562, + "loss": 0.582, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.951691150665283, + "rewards/margins": 0.9234949350357056, + "rewards/rejected": -3.8751864433288574, + "step": 12710 + }, + { + "epoch": 0.83, + "learning_rate": 4.165600684302046e-07, + "logits/chosen": -2.0236175060272217, + "logits/rejected": -2.025157928466797, + "logps/chosen": -698.0067749023438, + "logps/rejected": -652.054931640625, + "loss": 0.4429, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9516820907592773, + "rewards/margins": 0.9500513076782227, + "rewards/rejected": -2.901733160018921, + "step": 12720 + }, + { + "epoch": 0.83, + "learning_rate": 4.13409600684154e-07, + "logits/chosen": -2.2495474815368652, + "logits/rejected": -1.8680133819580078, + "logps/chosen": -604.0130615234375, + "logps/rejected": -690.2644653320312, + "loss": 0.3482, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.2546908855438232, + "rewards/margins": 1.5465089082717896, + "rewards/rejected": -3.8011996746063232, + "step": 12730 + }, + { + "epoch": 0.83, + "learning_rate": 4.102700176157548e-07, + "logits/chosen": -2.0701842308044434, + "logits/rejected": -1.9402803182601929, + "logps/chosen": -646.2323608398438, + "logps/rejected": -698.7376708984375, + "loss": 0.4756, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.0139358043670654, + "rewards/margins": 0.7653390765190125, + "rewards/rejected": -3.7792751789093018, + "step": 12740 + }, + { + "epoch": 0.83, + "learning_rate": 4.0714133560260884e-07, + "logits/chosen": -1.6190227270126343, + "logits/rejected": -1.9803718328475952, + "logps/chosen": -571.6145629882812, + "logps/rejected": -689.69287109375, + "loss": 0.5, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.9178075790405273, + "rewards/margins": 1.2299944162368774, + "rewards/rejected": -4.147801876068115, + "step": 12750 + }, + { + "epoch": 0.83, + "learning_rate": 4.0402357096545527e-07, + "logits/chosen": -2.0898361206054688, + "logits/rejected": -2.194303035736084, + "logps/chosen": -600.5381469726562, + "logps/rejected": -672.1326293945312, + "loss": 0.4669, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.165907144546509, + "rewards/margins": 1.0310553312301636, + "rewards/rejected": -3.196962356567383, + "step": 12760 + }, + { + "epoch": 0.84, + "learning_rate": 4.0091673996808025e-07, + "logits/chosen": -2.037740468978882, + "logits/rejected": -1.6166813373565674, + "logps/chosen": -600.8524169921875, + "logps/rejected": -612.1278686523438, + "loss": 0.5125, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.304969549179077, + "rewards/margins": 1.0908288955688477, + "rewards/rejected": -3.395798444747925, + "step": 12770 + }, + { + "epoch": 0.84, + "learning_rate": 3.9782085881723776e-07, + "logits/chosen": -1.8339916467666626, + "logits/rejected": -2.191059112548828, + "logps/chosen": -540.3582153320312, + "logps/rejected": -670.88232421875, + "loss": 0.5695, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.28513765335083, + "rewards/margins": 0.8673833608627319, + "rewards/rejected": -4.152520656585693, + "step": 12780 + }, + { + "epoch": 0.84, + "learning_rate": 3.947359436625592e-07, + "logits/chosen": -1.8163385391235352, + "logits/rejected": -1.6801373958587646, + "logps/chosen": -467.11376953125, + "logps/rejected": -692.0794677734375, + "loss": 0.3275, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.140580654144287, + "rewards/margins": 2.241574287414551, + "rewards/rejected": -4.382154941558838, + "step": 12790 + }, + { + "epoch": 0.84, + "learning_rate": 3.9166201059647386e-07, + "logits/chosen": -2.2520198822021484, + "logits/rejected": -1.7607390880584717, + "logps/chosen": -490.7139587402344, + "logps/rejected": -604.9755859375, + "loss": 0.4137, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.212995767593384, + "rewards/margins": 1.3278906345367432, + "rewards/rejected": -3.540886402130127, + "step": 12800 + }, + { + "epoch": 0.84, + "learning_rate": 3.8859907565412194e-07, + "logits/chosen": -2.5652875900268555, + "logits/rejected": -1.9722869396209717, + "logps/chosen": -594.1384887695312, + "logps/rejected": -624.1168823242188, + "loss": 0.6008, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.7919609546661377, + "rewards/margins": 1.0597692728042603, + "rewards/rejected": -3.85172963142395, + "step": 12810 + }, + { + "epoch": 0.84, + "learning_rate": 3.8554715481327303e-07, + "logits/chosen": -2.1508398056030273, + "logits/rejected": -1.7388432025909424, + "logps/chosen": -571.0724487304688, + "logps/rejected": -676.97998046875, + "loss": 0.4576, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.288452386856079, + "rewards/margins": 1.5518442392349243, + "rewards/rejected": -3.8402962684631348, + "step": 12820 + }, + { + "epoch": 0.84, + "learning_rate": 3.8250626399424007e-07, + "logits/chosen": -2.3599681854248047, + "logits/rejected": -2.067962408065796, + "logps/chosen": -582.147216796875, + "logps/rejected": -634.3817749023438, + "loss": 0.6027, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.914297103881836, + "rewards/margins": 0.8835035562515259, + "rewards/rejected": -2.7978005409240723, + "step": 12830 + }, + { + "epoch": 0.84, + "learning_rate": 3.7947641905980104e-07, + "logits/chosen": -2.2323431968688965, + "logits/rejected": -1.75969660282135, + "logps/chosen": -532.4102172851562, + "logps/rejected": -586.7337646484375, + "loss": 0.377, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.3225793838500977, + "rewards/margins": 1.504718542098999, + "rewards/rejected": -3.8272979259490967, + "step": 12840 + }, + { + "epoch": 0.84, + "learning_rate": 3.764576358151098e-07, + "logits/chosen": -1.6704959869384766, + "logits/rejected": -1.1709400415420532, + "logps/chosen": -492.19659423828125, + "logps/rejected": -505.697021484375, + "loss": 0.606, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.853659152984619, + "rewards/margins": 0.8451746106147766, + "rewards/rejected": -3.69883394241333, + "step": 12850 + }, + { + "epoch": 0.84, + "learning_rate": 3.7344993000761944e-07, + "logits/chosen": -2.033820390701294, + "logits/rejected": -2.1698267459869385, + "logps/chosen": -602.9301147460938, + "logps/rejected": -742.5383911132812, + "loss": 0.5597, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.648543119430542, + "rewards/margins": 0.9859378933906555, + "rewards/rejected": -3.634481430053711, + "step": 12860 + }, + { + "epoch": 0.84, + "learning_rate": 3.7045331732699585e-07, + "logits/chosen": -2.0151562690734863, + "logits/rejected": -1.9181638956069946, + "logps/chosen": -651.0270385742188, + "logps/rejected": -691.8004760742188, + "loss": 0.5002, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.936880111694336, + "rewards/margins": 1.0908117294311523, + "rewards/rejected": -4.027691841125488, + "step": 12870 + }, + { + "epoch": 0.84, + "learning_rate": 3.6746781340503993e-07, + "logits/chosen": -2.1583030223846436, + "logits/rejected": -1.5957120656967163, + "logps/chosen": -512.1683349609375, + "logps/rejected": -653.0682983398438, + "loss": 0.4046, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6026124954223633, + "rewards/margins": 1.51370370388031, + "rewards/rejected": -4.116316318511963, + "step": 12880 + }, + { + "epoch": 0.84, + "learning_rate": 3.6449343381560116e-07, + "logits/chosen": -2.0830488204956055, + "logits/rejected": -1.7334263324737549, + "logps/chosen": -628.7766723632812, + "logps/rejected": -617.7608032226562, + "loss": 0.6826, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.146591901779175, + "rewards/margins": 0.6630234718322754, + "rewards/rejected": -3.80961537361145, + "step": 12890 + }, + { + "epoch": 0.84, + "learning_rate": 3.615301940745017e-07, + "logits/chosen": -2.1485512256622314, + "logits/rejected": -2.1845757961273193, + "logps/chosen": -547.0929565429688, + "logps/rejected": -692.17919921875, + "loss": 0.5561, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4031753540039062, + "rewards/margins": 0.7044495344161987, + "rewards/rejected": -3.1076247692108154, + "step": 12900 + }, + { + "epoch": 0.84, + "learning_rate": 3.5857810963945084e-07, + "logits/chosen": -2.192819595336914, + "logits/rejected": -2.05318284034729, + "logps/chosen": -594.884765625, + "logps/rejected": -599.3443603515625, + "loss": 0.7412, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6752190589904785, + "rewards/margins": 0.33076637983322144, + "rewards/rejected": -3.005985736846924, + "step": 12910 + }, + { + "epoch": 0.85, + "learning_rate": 3.556371959099678e-07, + "logits/chosen": -2.1900582313537598, + "logits/rejected": -1.5971273183822632, + "logps/chosen": -542.5919799804688, + "logps/rejected": -571.1941528320312, + "loss": 0.4925, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.249591112136841, + "rewards/margins": 0.8787922859191895, + "rewards/rejected": -3.1283833980560303, + "step": 12920 + }, + { + "epoch": 0.85, + "learning_rate": 3.5270746822729797e-07, + "logits/chosen": -1.9319312572479248, + "logits/rejected": -1.6746313571929932, + "logps/chosen": -597.1219482421875, + "logps/rejected": -586.4876708984375, + "loss": 0.4624, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0332717895507812, + "rewards/margins": 1.236446499824524, + "rewards/rejected": -3.269718647003174, + "step": 12930 + }, + { + "epoch": 0.85, + "learning_rate": 3.4978894187433746e-07, + "logits/chosen": -2.0437123775482178, + "logits/rejected": -2.0197818279266357, + "logps/chosen": -532.19677734375, + "logps/rejected": -666.5894775390625, + "loss": 0.5365, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8714423179626465, + "rewards/margins": 0.9335220456123352, + "rewards/rejected": -3.804964542388916, + "step": 12940 + }, + { + "epoch": 0.85, + "learning_rate": 3.468816320755486e-07, + "logits/chosen": -1.7541229724884033, + "logits/rejected": -1.7077795267105103, + "logps/chosen": -509.0663146972656, + "logps/rejected": -753.5894165039062, + "loss": 0.4303, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6254935264587402, + "rewards/margins": 1.4224083423614502, + "rewards/rejected": -4.0479021072387695, + "step": 12950 + }, + { + "epoch": 0.85, + "learning_rate": 3.4398555399688336e-07, + "logits/chosen": -1.9851763248443604, + "logits/rejected": -1.3425769805908203, + "logps/chosen": -554.126953125, + "logps/rejected": -561.9085083007812, + "loss": 0.5907, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.192791700363159, + "rewards/margins": 0.6226320862770081, + "rewards/rejected": -3.8154244422912598, + "step": 12960 + }, + { + "epoch": 0.85, + "learning_rate": 3.411007227457047e-07, + "logits/chosen": -1.9916019439697266, + "logits/rejected": -1.8850574493408203, + "logps/chosen": -510.70733642578125, + "logps/rejected": -586.5621337890625, + "loss": 0.5224, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -2.755558967590332, + "rewards/margins": 0.21060097217559814, + "rewards/rejected": -2.9661598205566406, + "step": 12970 + }, + { + "epoch": 0.85, + "learning_rate": 3.382271533707043e-07, + "logits/chosen": -2.1298325061798096, + "logits/rejected": -1.8457624912261963, + "logps/chosen": -761.4664306640625, + "logps/rejected": -716.9132080078125, + "loss": 0.5527, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.6292195320129395, + "rewards/margins": 0.9699549674987793, + "rewards/rejected": -3.5991744995117188, + "step": 12980 + }, + { + "epoch": 0.85, + "learning_rate": 3.353648608618287e-07, + "logits/chosen": -2.0403294563293457, + "logits/rejected": -2.2515652179718018, + "logps/chosen": -613.3743896484375, + "logps/rejected": -729.22802734375, + "loss": 0.6022, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.769686222076416, + "rewards/margins": 0.8927669525146484, + "rewards/rejected": -3.6624526977539062, + "step": 12990 + }, + { + "epoch": 0.85, + "learning_rate": 3.3251386015019676e-07, + "logits/chosen": -2.323190927505493, + "logits/rejected": -1.8212988376617432, + "logps/chosen": -632.8485717773438, + "logps/rejected": -775.36181640625, + "loss": 0.5054, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.0626654624938965, + "rewards/margins": 1.7412147521972656, + "rewards/rejected": -3.803880214691162, + "step": 13000 + }, + { + "epoch": 0.85, + "learning_rate": 3.296741661080255e-07, + "logits/chosen": -1.7240066528320312, + "logits/rejected": -1.8471324443817139, + "logps/chosen": -528.8418579101562, + "logps/rejected": -653.22509765625, + "loss": 0.6246, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.734323024749756, + "rewards/margins": 0.8821514248847961, + "rewards/rejected": -3.6164746284484863, + "step": 13010 + }, + { + "epoch": 0.85, + "learning_rate": 3.2684579354854974e-07, + "logits/chosen": -1.852184534072876, + "logits/rejected": -1.9791618585586548, + "logps/chosen": -607.6898193359375, + "logps/rejected": -612.999267578125, + "loss": 0.6873, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.3641304969787598, + "rewards/margins": 0.4132850766181946, + "rewards/rejected": -3.7774155139923096, + "step": 13020 + }, + { + "epoch": 0.85, + "learning_rate": 3.2402875722594653e-07, + "logits/chosen": -1.6074268817901611, + "logits/rejected": -1.5102227926254272, + "logps/chosen": -469.12591552734375, + "logps/rejected": -599.9771118164062, + "loss": 0.3347, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.6753275394439697, + "rewards/margins": 1.4595181941986084, + "rewards/rejected": -4.13484525680542, + "step": 13030 + }, + { + "epoch": 0.85, + "learning_rate": 3.212230718352566e-07, + "logits/chosen": -2.4419732093811035, + "logits/rejected": -2.057793617248535, + "logps/chosen": -708.2984008789062, + "logps/rejected": -678.909912109375, + "loss": 0.52, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.55948543548584, + "rewards/margins": 0.5278710722923279, + "rewards/rejected": -3.0873570442199707, + "step": 13040 + }, + { + "epoch": 0.85, + "learning_rate": 3.1842875201231025e-07, + "logits/chosen": -1.3760806322097778, + "logits/rejected": -1.891939401626587, + "logps/chosen": -555.8878173828125, + "logps/rejected": -505.7021484375, + "loss": 0.6167, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.7588725090026855, + "rewards/margins": 0.5370644330978394, + "rewards/rejected": -3.2959365844726562, + "step": 13050 + }, + { + "epoch": 0.85, + "learning_rate": 3.156458123336478e-07, + "logits/chosen": -1.6771266460418701, + "logits/rejected": -1.8519872426986694, + "logps/chosen": -511.8056640625, + "logps/rejected": -585.6915893554688, + "loss": 0.6702, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.160909652709961, + "rewards/margins": 0.9589214324951172, + "rewards/rejected": -4.119831085205078, + "step": 13060 + }, + { + "epoch": 0.86, + "learning_rate": 3.128742673164459e-07, + "logits/chosen": -2.0616233348846436, + "logits/rejected": -2.001276969909668, + "logps/chosen": -525.9805908203125, + "logps/rejected": -554.439453125, + "loss": 0.6953, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.1127099990844727, + "rewards/margins": 0.6922973990440369, + "rewards/rejected": -2.805007219314575, + "step": 13070 + }, + { + "epoch": 0.86, + "learning_rate": 3.101141314184414e-07, + "logits/chosen": -1.6857885122299194, + "logits/rejected": -2.239570140838623, + "logps/chosen": -489.9275817871094, + "logps/rejected": -652.6864013671875, + "loss": 0.4121, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.383946657180786, + "rewards/margins": 1.0762474536895752, + "rewards/rejected": -3.4601943492889404, + "step": 13080 + }, + { + "epoch": 0.86, + "learning_rate": 3.0736541903785526e-07, + "logits/chosen": -1.9731498956680298, + "logits/rejected": -1.6244605779647827, + "logps/chosen": -529.0377197265625, + "logps/rejected": -638.1597900390625, + "loss": 0.3359, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.173402786254883, + "rewards/margins": 1.8639698028564453, + "rewards/rejected": -4.037372589111328, + "step": 13090 + }, + { + "epoch": 0.86, + "learning_rate": 3.0462814451331704e-07, + "logits/chosen": -2.0660529136657715, + "logits/rejected": -1.9338308572769165, + "logps/chosen": -512.8846435546875, + "logps/rejected": -600.3922119140625, + "loss": 0.5674, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.2265946865081787, + "rewards/margins": 1.1876270771026611, + "rewards/rejected": -3.4142215251922607, + "step": 13100 + }, + { + "epoch": 0.86, + "learning_rate": 3.019023221237927e-07, + "logits/chosen": -2.053157329559326, + "logits/rejected": -1.6921491622924805, + "logps/chosen": -498.1617126464844, + "logps/rejected": -530.1224365234375, + "loss": 0.4655, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7820048332214355, + "rewards/margins": 0.839253306388855, + "rewards/rejected": -3.621258497238159, + "step": 13110 + }, + { + "epoch": 0.86, + "learning_rate": 2.991879660885058e-07, + "logits/chosen": -2.083878517150879, + "logits/rejected": -1.2553603649139404, + "logps/chosen": -486.6934509277344, + "logps/rejected": -607.5968017578125, + "loss": 0.5171, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.330781936645508, + "rewards/margins": 1.6225910186767578, + "rewards/rejected": -3.9533724784851074, + "step": 13120 + }, + { + "epoch": 0.86, + "learning_rate": 2.9648509056686786e-07, + "logits/chosen": -2.3199565410614014, + "logits/rejected": -1.7166868448257446, + "logps/chosen": -556.6590576171875, + "logps/rejected": -598.4984130859375, + "loss": 0.3818, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.919614553451538, + "rewards/margins": 1.4941420555114746, + "rewards/rejected": -3.4137566089630127, + "step": 13130 + }, + { + "epoch": 0.86, + "learning_rate": 2.937937096584012e-07, + "logits/chosen": -1.9594379663467407, + "logits/rejected": -1.6278324127197266, + "logps/chosen": -526.9962158203125, + "logps/rejected": -588.111572265625, + "loss": 0.4816, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6737329959869385, + "rewards/margins": 0.8540897369384766, + "rewards/rejected": -3.527822494506836, + "step": 13140 + }, + { + "epoch": 0.86, + "learning_rate": 2.9111383740266756e-07, + "logits/chosen": -1.6279857158660889, + "logits/rejected": -2.1303136348724365, + "logps/chosen": -543.24853515625, + "logps/rejected": -681.8314208984375, + "loss": 0.5019, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.4309229850769043, + "rewards/margins": 0.676580548286438, + "rewards/rejected": -4.1075029373168945, + "step": 13150 + }, + { + "epoch": 0.86, + "learning_rate": 2.8844548777919255e-07, + "logits/chosen": -1.8237205743789673, + "logits/rejected": -1.8526685237884521, + "logps/chosen": -670.98486328125, + "logps/rejected": -855.240234375, + "loss": 0.5499, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.716068983078003, + "rewards/margins": 1.226020097732544, + "rewards/rejected": -4.942088603973389, + "step": 13160 + }, + { + "epoch": 0.86, + "learning_rate": 2.8578867470739594e-07, + "logits/chosen": -2.036724805831909, + "logits/rejected": -1.7551301717758179, + "logps/chosen": -541.3497314453125, + "logps/rejected": -731.2151489257812, + "loss": 0.4116, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.593327283859253, + "rewards/margins": 1.2935224771499634, + "rewards/rejected": -3.8868496417999268, + "step": 13170 + }, + { + "epoch": 0.86, + "learning_rate": 2.8314341204651484e-07, + "logits/chosen": -1.8775577545166016, + "logits/rejected": -1.483978033065796, + "logps/chosen": -518.1921997070312, + "logps/rejected": -620.164306640625, + "loss": 0.6077, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.475600242614746, + "rewards/margins": 1.0090327262878418, + "rewards/rejected": -3.484632968902588, + "step": 13180 + }, + { + "epoch": 0.86, + "learning_rate": 2.805097135955362e-07, + "logits/chosen": -1.7203683853149414, + "logits/rejected": -1.4739410877227783, + "logps/chosen": -573.33984375, + "logps/rejected": -617.6419677734375, + "loss": 0.6494, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.027435302734375, + "rewards/margins": 0.5865287780761719, + "rewards/rejected": -3.613964080810547, + "step": 13190 + }, + { + "epoch": 0.86, + "learning_rate": 2.778875930931213e-07, + "logits/chosen": -2.086585283279419, + "logits/rejected": -1.8391920328140259, + "logps/chosen": -534.3094482421875, + "logps/rejected": -591.4953002929688, + "loss": 0.5956, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.117051601409912, + "rewards/margins": 0.9744482040405273, + "rewards/rejected": -4.0914998054504395, + "step": 13200 + }, + { + "epoch": 0.86, + "learning_rate": 2.7527706421753426e-07, + "logits/chosen": -1.999886155128479, + "logits/rejected": -2.0072293281555176, + "logps/chosen": -597.1724853515625, + "logps/rejected": -748.3546142578125, + "loss": 0.3268, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.8634326457977295, + "rewards/margins": 1.959315299987793, + "rewards/rejected": -3.8227481842041016, + "step": 13210 + }, + { + "epoch": 0.86, + "learning_rate": 2.726781405865736e-07, + "logits/chosen": -1.7645056247711182, + "logits/rejected": -1.8943487405776978, + "logps/chosen": -550.005859375, + "logps/rejected": -743.2796630859375, + "loss": 0.528, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.024041175842285, + "rewards/margins": 0.9227795600891113, + "rewards/rejected": -3.9468207359313965, + "step": 13220 + }, + { + "epoch": 0.87, + "learning_rate": 2.7009083575749687e-07, + "logits/chosen": -1.7528998851776123, + "logits/rejected": -2.1692652702331543, + "logps/chosen": -533.8668212890625, + "logps/rejected": -633.972412109375, + "loss": 0.4979, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.3489327430725098, + "rewards/margins": 1.1471573114395142, + "rewards/rejected": -3.4960899353027344, + "step": 13230 + }, + { + "epoch": 0.87, + "learning_rate": 2.6751516322695457e-07, + "logits/chosen": -1.7719800472259521, + "logits/rejected": -2.000718355178833, + "logps/chosen": -583.4794921875, + "logps/rejected": -676.126708984375, + "loss": 0.4221, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.420154094696045, + "rewards/margins": 1.2737191915512085, + "rewards/rejected": -3.693873643875122, + "step": 13240 + }, + { + "epoch": 0.87, + "learning_rate": 2.649511364309154e-07, + "logits/chosen": -2.1271283626556396, + "logits/rejected": -1.6879568099975586, + "logps/chosen": -541.5220947265625, + "logps/rejected": -575.933837890625, + "loss": 0.7008, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.7217676639556885, + "rewards/margins": 0.6736433506011963, + "rewards/rejected": -3.3954110145568848, + "step": 13250 + }, + { + "epoch": 0.87, + "learning_rate": 2.6239876874460003e-07, + "logits/chosen": -1.8494091033935547, + "logits/rejected": -1.6988645792007446, + "logps/chosen": -529.8714599609375, + "logps/rejected": -623.36962890625, + "loss": 0.4867, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.4785268306732178, + "rewards/margins": 1.0714387893676758, + "rewards/rejected": -3.5499656200408936, + "step": 13260 + }, + { + "epoch": 0.87, + "learning_rate": 2.5985807348240744e-07, + "logits/chosen": -2.0470900535583496, + "logits/rejected": -2.0908408164978027, + "logps/chosen": -547.52783203125, + "logps/rejected": -677.0643310546875, + "loss": 0.3973, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.1159825325012207, + "rewards/margins": 1.9986178874969482, + "rewards/rejected": -4.11460018157959, + "step": 13270 + }, + { + "epoch": 0.87, + "learning_rate": 2.5732906389785014e-07, + "logits/chosen": -2.3125741481781006, + "logits/rejected": -2.243870735168457, + "logps/chosen": -578.3486938476562, + "logps/rejected": -613.689697265625, + "loss": 0.6074, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6407647132873535, + "rewards/margins": 0.7890304923057556, + "rewards/rejected": -3.429795026779175, + "step": 13280 + }, + { + "epoch": 0.87, + "learning_rate": 2.5481175318347956e-07, + "logits/chosen": -2.2822792530059814, + "logits/rejected": -2.039731025695801, + "logps/chosen": -590.3525390625, + "logps/rejected": -729.5716552734375, + "loss": 0.6833, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4964635372161865, + "rewards/margins": 0.7121137976646423, + "rewards/rejected": -3.2085776329040527, + "step": 13290 + }, + { + "epoch": 0.87, + "learning_rate": 2.5230615447082246e-07, + "logits/chosen": -2.2843728065490723, + "logits/rejected": -1.8483355045318604, + "logps/chosen": -563.0799560546875, + "logps/rejected": -601.3505249023438, + "loss": 0.4561, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.796326160430908, + "rewards/margins": 0.8913065195083618, + "rewards/rejected": -3.6876327991485596, + "step": 13300 + }, + { + "epoch": 0.87, + "learning_rate": 2.49812280830308e-07, + "logits/chosen": -1.7356340885162354, + "logits/rejected": -1.704167366027832, + "logps/chosen": -431.0927734375, + "logps/rejected": -556.7681884765625, + "loss": 0.5258, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.185537338256836, + "rewards/margins": 1.2141865491867065, + "rewards/rejected": -3.399723768234253, + "step": 13310 + }, + { + "epoch": 0.87, + "learning_rate": 2.4733014527120457e-07, + "logits/chosen": -2.1145758628845215, + "logits/rejected": -1.5190739631652832, + "logps/chosen": -648.5697021484375, + "logps/rejected": -629.7784423828125, + "loss": 0.6043, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.0062718391418457, + "rewards/margins": 0.17153212428092957, + "rewards/rejected": -3.1778042316436768, + "step": 13320 + }, + { + "epoch": 0.87, + "learning_rate": 2.4485976074154565e-07, + "logits/chosen": -2.2370784282684326, + "logits/rejected": -1.8612892627716064, + "logps/chosen": -573.6791381835938, + "logps/rejected": -620.290283203125, + "loss": 0.6516, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7251853942871094, + "rewards/margins": 1.1538364887237549, + "rewards/rejected": -3.8790218830108643, + "step": 13330 + }, + { + "epoch": 0.87, + "learning_rate": 2.4240114012806763e-07, + "logits/chosen": -1.9059484004974365, + "logits/rejected": -1.699082374572754, + "logps/chosen": -480.8922424316406, + "logps/rejected": -609.4310302734375, + "loss": 0.5809, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.237086534500122, + "rewards/margins": 0.8183034658432007, + "rewards/rejected": -3.055389881134033, + "step": 13340 + }, + { + "epoch": 0.87, + "learning_rate": 2.399542962561399e-07, + "logits/chosen": -2.176151990890503, + "logits/rejected": -2.2230257987976074, + "logps/chosen": -635.1368408203125, + "logps/rejected": -658.0455322265625, + "loss": 0.4534, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0653738975524902, + "rewards/margins": 0.7946423292160034, + "rewards/rejected": -3.8600165843963623, + "step": 13350 + }, + { + "epoch": 0.87, + "learning_rate": 2.3751924188969876e-07, + "logits/chosen": -1.8235515356063843, + "logits/rejected": -1.7832295894622803, + "logps/chosen": -537.9699096679688, + "logps/rejected": -640.5518188476562, + "loss": 0.5523, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.814718246459961, + "rewards/margins": 0.8247028589248657, + "rewards/rejected": -3.639420747756958, + "step": 13360 + }, + { + "epoch": 0.87, + "learning_rate": 2.3509598973118024e-07, + "logits/chosen": -1.9352995157241821, + "logits/rejected": -2.125542163848877, + "logps/chosen": -612.2140502929688, + "logps/rejected": -807.5963134765625, + "loss": 0.5532, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.1127424240112305, + "rewards/margins": 2.1476805210113525, + "rewards/rejected": -4.260422706604004, + "step": 13370 + }, + { + "epoch": 0.88, + "learning_rate": 2.326845524214555e-07, + "logits/chosen": -2.003291368484497, + "logits/rejected": -1.985430359840393, + "logps/chosen": -632.3148193359375, + "logps/rejected": -715.447021484375, + "loss": 0.6007, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.773860216140747, + "rewards/margins": 0.8511343002319336, + "rewards/rejected": -3.6249947547912598, + "step": 13380 + }, + { + "epoch": 0.88, + "learning_rate": 2.3028494253976158e-07, + "logits/chosen": -1.909105896949768, + "logits/rejected": -1.9562370777130127, + "logps/chosen": -641.9075927734375, + "logps/rejected": -664.9573974609375, + "loss": 0.5771, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.8007686138153076, + "rewards/margins": 0.5579482913017273, + "rewards/rejected": -3.3587164878845215, + "step": 13390 + }, + { + "epoch": 0.88, + "learning_rate": 2.2789717260364026e-07, + "logits/chosen": -2.392245054244995, + "logits/rejected": -1.9540112018585205, + "logps/chosen": -666.5771484375, + "logps/rejected": -664.3335571289062, + "loss": 0.5076, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.7387795448303223, + "rewards/margins": 0.9567523002624512, + "rewards/rejected": -3.6955318450927734, + "step": 13400 + }, + { + "epoch": 0.88, + "learning_rate": 2.255212550688682e-07, + "logits/chosen": -1.743224859237671, + "logits/rejected": -2.1734211444854736, + "logps/chosen": -580.0703735351562, + "logps/rejected": -669.6419677734375, + "loss": 0.7062, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.4899613857269287, + "rewards/margins": 0.4612503945827484, + "rewards/rejected": -2.951211452484131, + "step": 13410 + }, + { + "epoch": 0.88, + "learning_rate": 2.2315720232939598e-07, + "logits/chosen": -2.154392719268799, + "logits/rejected": -1.6075624227523804, + "logps/chosen": -609.3424072265625, + "logps/rejected": -720.8203735351562, + "loss": 0.6591, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.9730067253112793, + "rewards/margins": 1.2910255193710327, + "rewards/rejected": -4.264031887054443, + "step": 13420 + }, + { + "epoch": 0.88, + "learning_rate": 2.2080502671727956e-07, + "logits/chosen": -1.985039472579956, + "logits/rejected": -2.019219398498535, + "logps/chosen": -604.9932250976562, + "logps/rejected": -675.3074951171875, + "loss": 0.4031, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4360015392303467, + "rewards/margins": 1.0569236278533936, + "rewards/rejected": -3.492924928665161, + "step": 13430 + }, + { + "epoch": 0.88, + "learning_rate": 2.1846474050262078e-07, + "logits/chosen": -1.9477293491363525, + "logits/rejected": -1.6500422954559326, + "logps/chosen": -503.76171875, + "logps/rejected": -555.336181640625, + "loss": 0.52, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.2972493171691895, + "rewards/margins": 1.2646000385284424, + "rewards/rejected": -3.5618491172790527, + "step": 13440 + }, + { + "epoch": 0.88, + "learning_rate": 2.1613635589349756e-07, + "logits/chosen": -1.9945507049560547, + "logits/rejected": -1.4509528875350952, + "logps/chosen": -587.4645385742188, + "logps/rejected": -687.7674560546875, + "loss": 0.5055, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.0522592067718506, + "rewards/margins": 1.0424975156784058, + "rewards/rejected": -4.094756603240967, + "step": 13450 + }, + { + "epoch": 0.88, + "learning_rate": 2.1381988503590578e-07, + "logits/chosen": -1.9841388463974, + "logits/rejected": -2.04860258102417, + "logps/chosen": -541.0191650390625, + "logps/rejected": -620.4351196289062, + "loss": 0.5912, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.111358165740967, + "rewards/margins": 0.8610742688179016, + "rewards/rejected": -3.9724323749542236, + "step": 13460 + }, + { + "epoch": 0.88, + "learning_rate": 2.11515340013691e-07, + "logits/chosen": -1.5378882884979248, + "logits/rejected": -1.428012490272522, + "logps/chosen": -571.2432861328125, + "logps/rejected": -693.9453125, + "loss": 0.6854, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.346834659576416, + "rewards/margins": 0.9279208183288574, + "rewards/rejected": -4.274755001068115, + "step": 13470 + }, + { + "epoch": 0.88, + "learning_rate": 2.092227328484897e-07, + "logits/chosen": -2.31776762008667, + "logits/rejected": -1.6877338886260986, + "logps/chosen": -628.2243041992188, + "logps/rejected": -663.4994506835938, + "loss": 0.4897, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.9990774393081665, + "rewards/margins": 1.315735101699829, + "rewards/rejected": -3.314812421798706, + "step": 13480 + }, + { + "epoch": 0.88, + "learning_rate": 2.0694207549966345e-07, + "logits/chosen": -2.096724033355713, + "logits/rejected": -2.0771172046661377, + "logps/chosen": -475.6868591308594, + "logps/rejected": -588.9679565429688, + "loss": 0.5565, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0010933876037598, + "rewards/margins": 1.167586088180542, + "rewards/rejected": -3.1686794757843018, + "step": 13490 + }, + { + "epoch": 0.88, + "learning_rate": 2.0467337986423864e-07, + "logits/chosen": -1.917855978012085, + "logits/rejected": -1.869166612625122, + "logps/chosen": -600.2056274414062, + "logps/rejected": -674.4396362304688, + "loss": 0.4465, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6695377826690674, + "rewards/margins": 1.2429989576339722, + "rewards/rejected": -3.912536144256592, + "step": 13500 + }, + { + "epoch": 0.88, + "learning_rate": 2.0241665777684272e-07, + "logits/chosen": -2.0944838523864746, + "logits/rejected": -1.9450299739837646, + "logps/chosen": -550.61083984375, + "logps/rejected": -558.5845947265625, + "loss": 0.5995, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.613074541091919, + "rewards/margins": 0.7126724720001221, + "rewards/rejected": -3.32574725151062, + "step": 13510 + }, + { + "epoch": 0.88, + "learning_rate": 2.0017192100964366e-07, + "logits/chosen": -1.9588878154754639, + "logits/rejected": -1.8809268474578857, + "logps/chosen": -587.5408325195312, + "logps/rejected": -636.9974365234375, + "loss": 0.5443, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.8207666873931885, + "rewards/margins": 1.385787010192871, + "rewards/rejected": -4.206553936004639, + "step": 13520 + }, + { + "epoch": 0.89, + "learning_rate": 1.9793918127228777e-07, + "logits/chosen": -2.2451016902923584, + "logits/rejected": -1.945010781288147, + "logps/chosen": -692.6990356445312, + "logps/rejected": -717.7359619140625, + "loss": 0.3511, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.3657596111297607, + "rewards/margins": 1.6406257152557373, + "rewards/rejected": -4.00638484954834, + "step": 13530 + }, + { + "epoch": 0.89, + "learning_rate": 1.9571845021184005e-07, + "logits/chosen": -1.9744113683700562, + "logits/rejected": -2.2573609352111816, + "logps/chosen": -567.6195068359375, + "logps/rejected": -639.2779541015625, + "loss": 0.6477, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.4648168087005615, + "rewards/margins": 0.5581859350204468, + "rewards/rejected": -3.0230026245117188, + "step": 13540 + }, + { + "epoch": 0.89, + "learning_rate": 1.9350973941272027e-07, + "logits/chosen": -2.211911678314209, + "logits/rejected": -2.039405345916748, + "logps/chosen": -504.38165283203125, + "logps/rejected": -585.1155395507812, + "loss": 0.6163, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.1378955841064453, + "rewards/margins": 0.6837860941886902, + "rewards/rejected": -3.821681499481201, + "step": 13550 + }, + { + "epoch": 0.89, + "learning_rate": 1.9131306039664676e-07, + "logits/chosen": -1.8577501773834229, + "logits/rejected": -1.7375942468643188, + "logps/chosen": -654.9981689453125, + "logps/rejected": -676.6926879882812, + "loss": 0.6042, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.0645787715911865, + "rewards/margins": 0.8455365896224976, + "rewards/rejected": -3.9101154804229736, + "step": 13560 + }, + { + "epoch": 0.89, + "learning_rate": 1.8912842462257358e-07, + "logits/chosen": -2.2314064502716064, + "logits/rejected": -1.6110010147094727, + "logps/chosen": -546.2057495117188, + "logps/rejected": -739.7926635742188, + "loss": 0.357, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.6191498041152954, + "rewards/margins": 1.8934110403060913, + "rewards/rejected": -3.512561082839966, + "step": 13570 + }, + { + "epoch": 0.89, + "learning_rate": 1.869558434866303e-07, + "logits/chosen": -1.8159793615341187, + "logits/rejected": -1.8199894428253174, + "logps/chosen": -567.0482788085938, + "logps/rejected": -632.1754760742188, + "loss": 0.4181, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8042659759521484, + "rewards/margins": 0.914924144744873, + "rewards/rejected": -3.7191901206970215, + "step": 13580 + }, + { + "epoch": 0.89, + "learning_rate": 1.847953283220652e-07, + "logits/chosen": -1.9711805582046509, + "logits/rejected": -1.8406652212142944, + "logps/chosen": -500.529541015625, + "logps/rejected": -594.8906860351562, + "loss": 0.4779, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.4555704593658447, + "rewards/margins": 0.9383051991462708, + "rewards/rejected": -3.3938755989074707, + "step": 13590 + }, + { + "epoch": 0.89, + "learning_rate": 1.8264689039918265e-07, + "logits/chosen": -2.391815662384033, + "logits/rejected": -2.154350757598877, + "logps/chosen": -658.3121337890625, + "logps/rejected": -721.5520629882812, + "loss": 0.563, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.005051612854004, + "rewards/margins": 1.2892992496490479, + "rewards/rejected": -3.294351100921631, + "step": 13600 + }, + { + "epoch": 0.89, + "learning_rate": 1.8051054092528857e-07, + "logits/chosen": -2.197561025619507, + "logits/rejected": -2.2787792682647705, + "logps/chosen": -591.5797729492188, + "logps/rejected": -601.7685546875, + "loss": 0.6596, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -2.650155544281006, + "rewards/margins": -0.006956362631171942, + "rewards/rejected": -2.6431994438171387, + "step": 13610 + }, + { + "epoch": 0.89, + "learning_rate": 1.783862910446271e-07, + "logits/chosen": -1.7571876049041748, + "logits/rejected": -1.4969539642333984, + "logps/chosen": -516.4697265625, + "logps/rejected": -669.2288208007812, + "loss": 0.4461, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4835076332092285, + "rewards/margins": 1.576086401939392, + "rewards/rejected": -4.05959415435791, + "step": 13620 + }, + { + "epoch": 0.89, + "learning_rate": 1.762741518383271e-07, + "logits/chosen": -1.9321539402008057, + "logits/rejected": -1.8940976858139038, + "logps/chosen": -470.38592529296875, + "logps/rejected": -621.544921875, + "loss": 0.4946, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.25473690032959, + "rewards/margins": 0.9184789657592773, + "rewards/rejected": -3.173215389251709, + "step": 13630 + }, + { + "epoch": 0.89, + "learning_rate": 1.7417413432434082e-07, + "logits/chosen": -2.0840368270874023, + "logits/rejected": -1.9253311157226562, + "logps/chosen": -502.5130920410156, + "logps/rejected": -672.5440673828125, + "loss": 0.4869, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.920154094696045, + "rewards/margins": 1.1422127485275269, + "rewards/rejected": -4.062366962432861, + "step": 13640 + }, + { + "epoch": 0.89, + "learning_rate": 1.7208624945738855e-07, + "logits/chosen": -2.071925640106201, + "logits/rejected": -2.2120015621185303, + "logps/chosen": -634.158447265625, + "logps/rejected": -607.0089721679688, + "loss": 0.7913, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.838249921798706, + "rewards/margins": -0.061018358916044235, + "rewards/rejected": -2.7772316932678223, + "step": 13650 + }, + { + "epoch": 0.89, + "learning_rate": 1.7001050812889995e-07, + "logits/chosen": -2.302963972091675, + "logits/rejected": -1.9083874225616455, + "logps/chosen": -608.5677490234375, + "logps/rejected": -685.5296020507812, + "loss": 0.5522, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0623526573181152, + "rewards/margins": 1.2272964715957642, + "rewards/rejected": -4.28964900970459, + "step": 13660 + }, + { + "epoch": 0.89, + "learning_rate": 1.679469211669596e-07, + "logits/chosen": -2.027047634124756, + "logits/rejected": -2.0308895111083984, + "logps/chosen": -660.1836547851562, + "logps/rejected": -792.7001953125, + "loss": 0.5356, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6164486408233643, + "rewards/margins": 0.7289638519287109, + "rewards/rejected": -3.345412492752075, + "step": 13670 + }, + { + "epoch": 0.9, + "learning_rate": 1.6589549933624715e-07, + "logits/chosen": -2.072457790374756, + "logits/rejected": -1.5405828952789307, + "logps/chosen": -585.9480590820312, + "logps/rejected": -636.3859252929688, + "loss": 0.4296, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.647041082382202, + "rewards/margins": 1.593322992324829, + "rewards/rejected": -4.240364074707031, + "step": 13680 + }, + { + "epoch": 0.9, + "learning_rate": 1.638562533379845e-07, + "logits/chosen": -1.9048030376434326, + "logits/rejected": -1.890991449356079, + "logps/chosen": -559.7710571289062, + "logps/rejected": -668.1385498046875, + "loss": 0.5207, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.792799711227417, + "rewards/margins": 1.087615728378296, + "rewards/rejected": -3.880415439605713, + "step": 13690 + }, + { + "epoch": 0.9, + "learning_rate": 1.6182919380987676e-07, + "logits/chosen": -2.223335027694702, + "logits/rejected": -1.4407284259796143, + "logps/chosen": -591.7991943359375, + "logps/rejected": -634.6351318359375, + "loss": 0.3976, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.7539069652557373, + "rewards/margins": 1.480650782585144, + "rewards/rejected": -4.234557628631592, + "step": 13700 + }, + { + "epoch": 0.9, + "learning_rate": 1.598143313260603e-07, + "logits/chosen": -2.0772881507873535, + "logits/rejected": -1.9280656576156616, + "logps/chosen": -591.2493896484375, + "logps/rejected": -637.0396728515625, + "loss": 0.6488, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5404486656188965, + "rewards/margins": 0.6507530212402344, + "rewards/rejected": -3.1912014484405518, + "step": 13710 + }, + { + "epoch": 0.9, + "learning_rate": 1.5781167639704415e-07, + "logits/chosen": -1.6263024806976318, + "logits/rejected": -1.94978928565979, + "logps/chosen": -597.3123779296875, + "logps/rejected": -677.3203125, + "loss": 0.5913, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.080565929412842, + "rewards/margins": 0.8565647006034851, + "rewards/rejected": -3.9371306896209717, + "step": 13720 + }, + { + "epoch": 0.9, + "learning_rate": 1.5582123946965787e-07, + "logits/chosen": -2.0462169647216797, + "logits/rejected": -2.0411927700042725, + "logps/chosen": -604.1297607421875, + "logps/rejected": -650.912109375, + "loss": 0.5571, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.8162455558776855, + "rewards/margins": 0.8516273498535156, + "rewards/rejected": -3.6678733825683594, + "step": 13730 + }, + { + "epoch": 0.9, + "learning_rate": 1.5384303092699504e-07, + "logits/chosen": -1.9073169231414795, + "logits/rejected": -1.6117340326309204, + "logps/chosen": -558.4398193359375, + "logps/rejected": -686.1032104492188, + "loss": 0.436, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.904715061187744, + "rewards/margins": 1.413425087928772, + "rewards/rejected": -4.318139553070068, + "step": 13740 + }, + { + "epoch": 0.9, + "learning_rate": 1.518770610883613e-07, + "logits/chosen": -1.9963598251342773, + "logits/rejected": -1.8562904596328735, + "logps/chosen": -521.17333984375, + "logps/rejected": -635.5724487304688, + "loss": 0.5527, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.555896520614624, + "rewards/margins": 1.0598012208938599, + "rewards/rejected": -3.6156983375549316, + "step": 13750 + }, + { + "epoch": 0.9, + "learning_rate": 1.4992334020921735e-07, + "logits/chosen": -1.9674097299575806, + "logits/rejected": -1.5446860790252686, + "logps/chosen": -520.31298828125, + "logps/rejected": -600.7852172851562, + "loss": 0.4879, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.430971622467041, + "rewards/margins": 1.3507161140441895, + "rewards/rejected": -3.7816879749298096, + "step": 13760 + }, + { + "epoch": 0.9, + "learning_rate": 1.4798187848112905e-07, + "logits/chosen": -2.195971965789795, + "logits/rejected": -2.413776397705078, + "logps/chosen": -617.2885131835938, + "logps/rejected": -696.2974853515625, + "loss": 0.6088, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.3813633918762207, + "rewards/margins": 1.0204932689666748, + "rewards/rejected": -3.4018566608428955, + "step": 13770 + }, + { + "epoch": 0.9, + "learning_rate": 1.460526860317113e-07, + "logits/chosen": -2.0938820838928223, + "logits/rejected": -1.679978370666504, + "logps/chosen": -563.26806640625, + "logps/rejected": -562.6168212890625, + "loss": 0.7427, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.4968013763427734, + "rewards/margins": 0.9434688687324524, + "rewards/rejected": -3.440270185470581, + "step": 13780 + }, + { + "epoch": 0.9, + "learning_rate": 1.441357729245771e-07, + "logits/chosen": -1.9362106323242188, + "logits/rejected": -1.9356101751327515, + "logps/chosen": -622.1790771484375, + "logps/rejected": -744.4317626953125, + "loss": 0.4152, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6212239265441895, + "rewards/margins": 0.9055881500244141, + "rewards/rejected": -3.5268120765686035, + "step": 13790 + }, + { + "epoch": 0.9, + "learning_rate": 1.4223114915928482e-07, + "logits/chosen": -1.968400239944458, + "logits/rejected": -2.054955005645752, + "logps/chosen": -483.0773010253906, + "logps/rejected": -671.42919921875, + "loss": 0.4554, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0358223915100098, + "rewards/margins": 1.2512052059173584, + "rewards/rejected": -4.287027359008789, + "step": 13800 + }, + { + "epoch": 0.9, + "learning_rate": 1.403388246712842e-07, + "logits/chosen": -2.0551671981811523, + "logits/rejected": -2.219425678253174, + "logps/chosen": -569.8794555664062, + "logps/rejected": -619.9769287109375, + "loss": 0.4425, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.722309112548828, + "rewards/margins": 0.9440568685531616, + "rewards/rejected": -3.6663658618927, + "step": 13810 + }, + { + "epoch": 0.9, + "learning_rate": 1.3845880933186757e-07, + "logits/chosen": -1.735666036605835, + "logits/rejected": -1.5795748233795166, + "logps/chosen": -581.3074340820312, + "logps/rejected": -585.5355224609375, + "loss": 0.5682, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.425495147705078, + "rewards/margins": 0.8304556012153625, + "rewards/rejected": -4.255950450897217, + "step": 13820 + }, + { + "epoch": 0.9, + "learning_rate": 1.3659111294811457e-07, + "logits/chosen": -2.090566396713257, + "logits/rejected": -1.9472938776016235, + "logps/chosen": -597.61865234375, + "logps/rejected": -634.379638671875, + "loss": 0.4947, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5898215770721436, + "rewards/margins": 0.7958720922470093, + "rewards/rejected": -3.3856937885284424, + "step": 13830 + }, + { + "epoch": 0.91, + "learning_rate": 1.347357452628459e-07, + "logits/chosen": -1.9464733600616455, + "logits/rejected": -2.106271743774414, + "logps/chosen": -465.6495056152344, + "logps/rejected": -636.1798095703125, + "loss": 0.4478, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5175442695617676, + "rewards/margins": 1.1137826442718506, + "rewards/rejected": -3.6313271522521973, + "step": 13840 + }, + { + "epoch": 0.91, + "learning_rate": 1.3289271595456732e-07, + "logits/chosen": -1.9521602392196655, + "logits/rejected": -2.1690216064453125, + "logps/chosen": -469.918212890625, + "logps/rejected": -746.4378051757812, + "loss": 0.4991, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1301827430725098, + "rewards/margins": 1.5656712055206299, + "rewards/rejected": -3.6958537101745605, + "step": 13850 + }, + { + "epoch": 0.91, + "learning_rate": 1.310620346374228e-07, + "logits/chosen": -1.9243173599243164, + "logits/rejected": -1.9282804727554321, + "logps/chosen": -574.6932373046875, + "logps/rejected": -654.2211303710938, + "loss": 0.4016, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.658046245574951, + "rewards/margins": 1.051507592201233, + "rewards/rejected": -3.7095534801483154, + "step": 13860 + }, + { + "epoch": 0.91, + "learning_rate": 1.2924371086114274e-07, + "logits/chosen": -2.149164915084839, + "logits/rejected": -1.7731244564056396, + "logps/chosen": -556.2320556640625, + "logps/rejected": -671.5202026367188, + "loss": 0.5376, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.921588897705078, + "rewards/margins": 1.4650413990020752, + "rewards/rejected": -4.386630058288574, + "step": 13870 + }, + { + "epoch": 0.91, + "learning_rate": 1.274377541109953e-07, + "logits/chosen": -1.784868597984314, + "logits/rejected": -1.7221359014511108, + "logps/chosen": -615.0252075195312, + "logps/rejected": -646.4591674804688, + "loss": 0.4736, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.1848056316375732, + "rewards/margins": 1.1370110511779785, + "rewards/rejected": -4.321816921234131, + "step": 13880 + }, + { + "epoch": 0.91, + "learning_rate": 1.2564417380773435e-07, + "logits/chosen": -2.166748046875, + "logits/rejected": -2.1211986541748047, + "logps/chosen": -525.5357666015625, + "logps/rejected": -550.85400390625, + "loss": 0.7374, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.6531982421875, + "rewards/margins": 0.2655898928642273, + "rewards/rejected": -2.918788194656372, + "step": 13890 + }, + { + "epoch": 0.91, + "learning_rate": 1.2386297930755436e-07, + "logits/chosen": -2.168666362762451, + "logits/rejected": -1.830987572669983, + "logps/chosen": -577.5469970703125, + "logps/rejected": -601.9207153320312, + "loss": 0.5228, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.034702777862549, + "rewards/margins": 0.8586205244064331, + "rewards/rejected": -3.8933234214782715, + "step": 13900 + }, + { + "epoch": 0.91, + "learning_rate": 1.220941799020378e-07, + "logits/chosen": -2.2160067558288574, + "logits/rejected": -2.0485739707946777, + "logps/chosen": -487.3202209472656, + "logps/rejected": -670.6369018554688, + "loss": 0.531, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3113012313842773, + "rewards/margins": 1.3632745742797852, + "rewards/rejected": -3.6745758056640625, + "step": 13910 + }, + { + "epoch": 0.91, + "learning_rate": 1.2033778481810975e-07, + "logits/chosen": -2.11628794670105, + "logits/rejected": -2.22096848487854, + "logps/chosen": -599.7377319335938, + "logps/rejected": -669.64013671875, + "loss": 0.5778, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5265109539031982, + "rewards/margins": 1.0575026273727417, + "rewards/rejected": -3.5840137004852295, + "step": 13920 + }, + { + "epoch": 0.91, + "learning_rate": 1.1859380321798591e-07, + "logits/chosen": -2.1361804008483887, + "logits/rejected": -1.877889633178711, + "logps/chosen": -502.3155212402344, + "logps/rejected": -577.39990234375, + "loss": 0.479, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.577942132949829, + "rewards/margins": 0.9486045837402344, + "rewards/rejected": -3.5265464782714844, + "step": 13930 + }, + { + "epoch": 0.91, + "learning_rate": 1.1686224419912989e-07, + "logits/chosen": -2.0007095336914062, + "logits/rejected": -1.7303388118743896, + "logps/chosen": -581.5795288085938, + "logps/rejected": -736.5750732421875, + "loss": 0.505, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.580915689468384, + "rewards/margins": 1.4071890115737915, + "rewards/rejected": -3.9881045818328857, + "step": 13940 + }, + { + "epoch": 0.91, + "learning_rate": 1.1514311679420104e-07, + "logits/chosen": -2.116205930709839, + "logits/rejected": -2.267324447631836, + "logps/chosen": -573.3087158203125, + "logps/rejected": -651.9915771484375, + "loss": 0.526, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6597065925598145, + "rewards/margins": 0.9490043520927429, + "rewards/rejected": -3.6087112426757812, + "step": 13950 + }, + { + "epoch": 0.91, + "learning_rate": 1.1343642997101029e-07, + "logits/chosen": -2.0283255577087402, + "logits/rejected": -1.945887804031372, + "logps/chosen": -508.79248046875, + "logps/rejected": -639.9680786132812, + "loss": 0.687, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.1274921894073486, + "rewards/margins": 1.3700916767120361, + "rewards/rejected": -3.4975838661193848, + "step": 13960 + }, + { + "epoch": 0.91, + "learning_rate": 1.1174219263247188e-07, + "logits/chosen": -1.7178478240966797, + "logits/rejected": -1.9336674213409424, + "logps/chosen": -558.7196655273438, + "logps/rejected": -724.6270141601562, + "loss": 0.4229, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.0520660877227783, + "rewards/margins": 1.747532844543457, + "rewards/rejected": -4.7995991706848145, + "step": 13970 + }, + { + "epoch": 0.91, + "learning_rate": 1.1006041361655839e-07, + "logits/chosen": -1.9918807744979858, + "logits/rejected": -1.704734206199646, + "logps/chosen": -526.9503784179688, + "logps/rejected": -597.0250244140625, + "loss": 0.4088, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.31221342086792, + "rewards/margins": 1.1227428913116455, + "rewards/rejected": -3.4349560737609863, + "step": 13980 + }, + { + "epoch": 0.92, + "learning_rate": 1.0839110169625189e-07, + "logits/chosen": -1.6949018239974976, + "logits/rejected": -2.197110414505005, + "logps/chosen": -538.4711303710938, + "logps/rejected": -767.4324951171875, + "loss": 0.585, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4817819595336914, + "rewards/margins": 1.339092493057251, + "rewards/rejected": -3.8208746910095215, + "step": 13990 + }, + { + "epoch": 0.92, + "learning_rate": 1.06734265579502e-07, + "logits/chosen": -1.6315670013427734, + "logits/rejected": -1.8602148294448853, + "logps/chosen": -520.1702880859375, + "logps/rejected": -650.0765380859375, + "loss": 0.4227, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.6304497718811035, + "rewards/margins": 1.2677826881408691, + "rewards/rejected": -3.8982322216033936, + "step": 14000 + }, + { + "epoch": 0.92, + "learning_rate": 1.050899139091771e-07, + "logits/chosen": -2.0828208923339844, + "logits/rejected": -1.7871806621551514, + "logps/chosen": -535.5015258789062, + "logps/rejected": -591.5552978515625, + "loss": 0.5932, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.899904727935791, + "rewards/margins": 0.5952678918838501, + "rewards/rejected": -3.4951725006103516, + "step": 14010 + }, + { + "epoch": 0.92, + "learning_rate": 1.0345805526302072e-07, + "logits/chosen": -1.8452575206756592, + "logits/rejected": -1.7843999862670898, + "logps/chosen": -602.8890380859375, + "logps/rejected": -660.4598388671875, + "loss": 0.7046, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.1601126194000244, + "rewards/margins": 0.25720563530921936, + "rewards/rejected": -3.4173178672790527, + "step": 14020 + }, + { + "epoch": 0.92, + "learning_rate": 1.0183869815360764e-07, + "logits/chosen": -1.950768232345581, + "logits/rejected": -1.9863277673721313, + "logps/chosen": -510.62371826171875, + "logps/rejected": -669.0475463867188, + "loss": 0.4555, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.1132547855377197, + "rewards/margins": 1.230995774269104, + "rewards/rejected": -3.344250440597534, + "step": 14030 + }, + { + "epoch": 0.92, + "learning_rate": 1.0023185102829763e-07, + "logits/chosen": -2.395341396331787, + "logits/rejected": -1.7982008457183838, + "logps/chosen": -520.4229125976562, + "logps/rejected": -565.4215087890625, + "loss": 0.5862, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.729526996612549, + "rewards/margins": 1.086969256401062, + "rewards/rejected": -3.8164963722229004, + "step": 14040 + }, + { + "epoch": 0.92, + "learning_rate": 9.863752226919182e-08, + "logits/chosen": -2.1661365032196045, + "logits/rejected": -1.8107774257659912, + "logps/chosen": -597.0890502929688, + "logps/rejected": -642.457275390625, + "loss": 0.4836, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.7160167694091797, + "rewards/margins": 1.1926524639129639, + "rewards/rejected": -3.9086689949035645, + "step": 14050 + }, + { + "epoch": 0.92, + "learning_rate": 9.705572019309107e-08, + "logits/chosen": -2.0592494010925293, + "logits/rejected": -1.9266738891601562, + "logps/chosen": -540.168701171875, + "logps/rejected": -708.5350341796875, + "loss": 0.5257, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9037212133407593, + "rewards/margins": 1.595682144165039, + "rewards/rejected": -3.499403476715088, + "step": 14060 + }, + { + "epoch": 0.92, + "learning_rate": 9.548645305144849e-08, + "logits/chosen": -1.9899635314941406, + "logits/rejected": -1.475678563117981, + "logps/chosen": -504.0489807128906, + "logps/rejected": -611.5504150390625, + "loss": 0.6257, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.3494484424591064, + "rewards/margins": 0.8685885667800903, + "rewards/rejected": -4.218036651611328, + "step": 14070 + }, + { + "epoch": 0.92, + "learning_rate": 9.392972903033149e-08, + "logits/chosen": -2.318446636199951, + "logits/rejected": -2.0666234493255615, + "logps/chosen": -551.0264892578125, + "logps/rejected": -684.6751708984375, + "loss": 0.4677, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.280172824859619, + "rewards/margins": 1.5428760051727295, + "rewards/rejected": -3.8230483531951904, + "step": 14080 + }, + { + "epoch": 0.92, + "learning_rate": 9.238555625037449e-08, + "logits/chosen": -2.116685390472412, + "logits/rejected": -1.8756097555160522, + "logps/chosen": -518.8292236328125, + "logps/rejected": -609.4307861328125, + "loss": 0.6441, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.84653639793396, + "rewards/margins": 0.8913130760192871, + "rewards/rejected": -3.737849473953247, + "step": 14090 + }, + { + "epoch": 0.92, + "learning_rate": 9.085394276673903e-08, + "logits/chosen": -2.1735680103302, + "logits/rejected": -2.0650429725646973, + "logps/chosen": -649.1590576171875, + "logps/rejected": -814.1696166992188, + "loss": 0.5992, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.727769374847412, + "rewards/margins": 1.1337007284164429, + "rewards/rejected": -3.8614699840545654, + "step": 14100 + }, + { + "epoch": 0.92, + "learning_rate": 8.933489656907157e-08, + "logits/chosen": -2.0548338890075684, + "logits/rejected": -2.1526238918304443, + "logps/chosen": -484.33282470703125, + "logps/rejected": -654.7472534179688, + "loss": 0.3397, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.680506467819214, + "rewards/margins": 1.4911242723464966, + "rewards/rejected": -4.171631336212158, + "step": 14110 + }, + { + "epoch": 0.92, + "learning_rate": 8.782842558146127e-08, + "logits/chosen": -2.1364803314208984, + "logits/rejected": -1.8520491123199463, + "logps/chosen": -480.78509521484375, + "logps/rejected": -608.3148803710938, + "loss": 0.5262, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6304593086242676, + "rewards/margins": 1.3878322839736938, + "rewards/rejected": -4.01829195022583, + "step": 14120 + }, + { + "epoch": 0.92, + "learning_rate": 8.633453766239836e-08, + "logits/chosen": -1.675296425819397, + "logits/rejected": -2.0876195430755615, + "logps/chosen": -495.19342041015625, + "logps/rejected": -622.8082885742188, + "loss": 0.5607, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.1442830562591553, + "rewards/margins": 0.6814892292022705, + "rewards/rejected": -3.825772523880005, + "step": 14130 + }, + { + "epoch": 0.93, + "learning_rate": 8.485324060473448e-08, + "logits/chosen": -2.0316433906555176, + "logits/rejected": -1.7985146045684814, + "logps/chosen": -577.4540405273438, + "logps/rejected": -663.97119140625, + "loss": 0.5518, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.1149959564208984, + "rewards/margins": 0.9152809381484985, + "rewards/rejected": -4.030276775360107, + "step": 14140 + }, + { + "epoch": 0.93, + "learning_rate": 8.338454213564052e-08, + "logits/chosen": -2.191765546798706, + "logits/rejected": -2.410991907119751, + "logps/chosen": -818.0296630859375, + "logps/rejected": -747.2487182617188, + "loss": 0.6268, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.624410390853882, + "rewards/margins": 0.8381460905075073, + "rewards/rejected": -3.4625561237335205, + "step": 14150 + }, + { + "epoch": 0.93, + "learning_rate": 8.192844991656679e-08, + "logits/chosen": -1.954979658126831, + "logits/rejected": -1.678969383239746, + "logps/chosen": -613.8314208984375, + "logps/rejected": -630.6569213867188, + "loss": 0.6434, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.841416835784912, + "rewards/margins": 1.1300991773605347, + "rewards/rejected": -3.9715161323547363, + "step": 14160 + }, + { + "epoch": 0.93, + "learning_rate": 8.048497154320434e-08, + "logits/chosen": -2.226532459259033, + "logits/rejected": -1.5135737657546997, + "logps/chosen": -638.7493896484375, + "logps/rejected": -624.1115112304688, + "loss": 0.5291, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.5119431018829346, + "rewards/margins": 0.841154932975769, + "rewards/rejected": -3.353097915649414, + "step": 14170 + }, + { + "epoch": 0.93, + "learning_rate": 7.905411454544265e-08, + "logits/chosen": -1.9530729055404663, + "logits/rejected": -2.051448106765747, + "logps/chosen": -494.66705322265625, + "logps/rejected": -704.9006958007812, + "loss": 0.5769, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5946383476257324, + "rewards/margins": 1.4650105237960815, + "rewards/rejected": -4.0596489906311035, + "step": 14180 + }, + { + "epoch": 0.93, + "learning_rate": 7.763588638733332e-08, + "logits/chosen": -2.0100789070129395, + "logits/rejected": -2.243278741836548, + "logps/chosen": -556.4869384765625, + "logps/rejected": -712.3450927734375, + "loss": 0.4168, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.4392809867858887, + "rewards/margins": 2.094257354736328, + "rewards/rejected": -4.533538341522217, + "step": 14190 + }, + { + "epoch": 0.93, + "learning_rate": 7.623029446704899e-08, + "logits/chosen": -2.285468578338623, + "logits/rejected": -1.9770171642303467, + "logps/chosen": -600.0869140625, + "logps/rejected": -674.52294921875, + "loss": 0.3591, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4405174255371094, + "rewards/margins": 1.3958009481430054, + "rewards/rejected": -3.8363184928894043, + "step": 14200 + }, + { + "epoch": 0.93, + "learning_rate": 7.483734611684557e-08, + "logits/chosen": -2.212456226348877, + "logits/rejected": -1.953904151916504, + "logps/chosen": -514.7883911132812, + "logps/rejected": -627.7686157226562, + "loss": 0.4992, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.890023946762085, + "rewards/margins": 1.129746675491333, + "rewards/rejected": -4.019770622253418, + "step": 14210 + }, + { + "epoch": 0.93, + "learning_rate": 7.345704860302366e-08, + "logits/chosen": -1.8617370128631592, + "logits/rejected": -1.75583016872406, + "logps/chosen": -460.3050231933594, + "logps/rejected": -571.10302734375, + "loss": 0.5618, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.465756893157959, + "rewards/margins": 1.1497135162353516, + "rewards/rejected": -3.6154708862304688, + "step": 14220 + }, + { + "epoch": 0.93, + "learning_rate": 7.208940912589224e-08, + "logits/chosen": -2.284726619720459, + "logits/rejected": -1.6230722665786743, + "logps/chosen": -520.0506591796875, + "logps/rejected": -572.2464599609375, + "loss": 0.5166, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.474360704421997, + "rewards/margins": 0.902630627155304, + "rewards/rejected": -3.376990795135498, + "step": 14230 + }, + { + "epoch": 0.93, + "learning_rate": 7.073443481972753e-08, + "logits/chosen": -1.8819868564605713, + "logits/rejected": -2.2348008155822754, + "logps/chosen": -547.7752685546875, + "logps/rejected": -621.5697021484375, + "loss": 0.5359, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.5357489585876465, + "rewards/margins": 0.810367226600647, + "rewards/rejected": -3.346116304397583, + "step": 14240 + }, + { + "epoch": 0.93, + "learning_rate": 6.939213275274027e-08, + "logits/chosen": -1.5495518445968628, + "logits/rejected": -1.8627188205718994, + "logps/chosen": -553.0704345703125, + "logps/rejected": -624.6507568359375, + "loss": 0.515, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.8459842205047607, + "rewards/margins": 0.6540545225143433, + "rewards/rejected": -3.5000386238098145, + "step": 14250 + }, + { + "epoch": 0.93, + "learning_rate": 6.806250992703461e-08, + "logits/chosen": -2.2263686656951904, + "logits/rejected": -1.7921960353851318, + "logps/chosen": -630.40234375, + "logps/rejected": -653.8508911132812, + "loss": 0.5446, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4244742393493652, + "rewards/margins": 1.3381140232086182, + "rewards/rejected": -3.7625880241394043, + "step": 14260 + }, + { + "epoch": 0.93, + "learning_rate": 6.674557327857572e-08, + "logits/chosen": -2.0738942623138428, + "logits/rejected": -1.8814947605133057, + "logps/chosen": -545.239501953125, + "logps/rejected": -628.984619140625, + "loss": 0.5258, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.1627590656280518, + "rewards/margins": 0.5774599313735962, + "rewards/rejected": -3.7402186393737793, + "step": 14270 + }, + { + "epoch": 0.93, + "learning_rate": 6.544132967714917e-08, + "logits/chosen": -1.8430073261260986, + "logits/rejected": -1.827848196029663, + "logps/chosen": -531.9650268554688, + "logps/rejected": -599.7696533203125, + "loss": 0.7531, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.364248275756836, + "rewards/margins": 0.6611557602882385, + "rewards/rejected": -4.0254034996032715, + "step": 14280 + }, + { + "epoch": 0.93, + "learning_rate": 6.414978592632932e-08, + "logits/chosen": -1.9486316442489624, + "logits/rejected": -1.8632211685180664, + "logps/chosen": -615.6444702148438, + "logps/rejected": -666.4421997070312, + "loss": 0.5636, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.3053977489471436, + "rewards/margins": 1.1161303520202637, + "rewards/rejected": -3.4215283393859863, + "step": 14290 + }, + { + "epoch": 0.94, + "learning_rate": 6.287094876344046e-08, + "logits/chosen": -1.9855334758758545, + "logits/rejected": -1.8690643310546875, + "logps/chosen": -536.8694458007812, + "logps/rejected": -622.2525634765625, + "loss": 0.5167, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.398989200592041, + "rewards/margins": 1.2469675540924072, + "rewards/rejected": -3.645956516265869, + "step": 14300 + }, + { + "epoch": 0.94, + "learning_rate": 6.160482485952413e-08, + "logits/chosen": -1.8896257877349854, + "logits/rejected": -1.643678069114685, + "logps/chosen": -560.6286010742188, + "logps/rejected": -716.4464111328125, + "loss": 0.6004, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.47556734085083, + "rewards/margins": 1.5186498165130615, + "rewards/rejected": -3.9942169189453125, + "step": 14310 + }, + { + "epoch": 0.94, + "learning_rate": 6.035142081930234e-08, + "logits/chosen": -2.1216776371002197, + "logits/rejected": -1.4286061525344849, + "logps/chosen": -650.0715942382812, + "logps/rejected": -665.3375244140625, + "loss": 0.4731, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.341348648071289, + "rewards/margins": 1.6465059518814087, + "rewards/rejected": -3.9878551959991455, + "step": 14320 + }, + { + "epoch": 0.94, + "learning_rate": 5.911074318114496e-08, + "logits/chosen": -1.494458794593811, + "logits/rejected": -1.7939786911010742, + "logps/chosen": -537.7138671875, + "logps/rejected": -615.3954467773438, + "loss": 0.6188, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2225029468536377, + "rewards/margins": 0.672461748123169, + "rewards/rejected": -2.8949646949768066, + "step": 14330 + }, + { + "epoch": 0.94, + "learning_rate": 5.788279841703381e-08, + "logits/chosen": -1.9612480401992798, + "logits/rejected": -2.061548948287964, + "logps/chosen": -582.6454467773438, + "logps/rejected": -694.6004638671875, + "loss": 0.6293, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.083461284637451, + "rewards/margins": 0.7667292356491089, + "rewards/rejected": -3.8501904010772705, + "step": 14340 + }, + { + "epoch": 0.94, + "learning_rate": 5.66675929325311e-08, + "logits/chosen": -2.2754197120666504, + "logits/rejected": -2.2194416522979736, + "logps/chosen": -477.0834045410156, + "logps/rejected": -582.92333984375, + "loss": 0.467, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.0185394287109375, + "rewards/margins": 1.1576719284057617, + "rewards/rejected": -3.17621111869812, + "step": 14350 + }, + { + "epoch": 0.94, + "learning_rate": 5.546513306674301e-08, + "logits/chosen": -2.2503647804260254, + "logits/rejected": -1.8498117923736572, + "logps/chosen": -567.9696044921875, + "logps/rejected": -627.1837158203125, + "loss": 0.6846, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.134516954421997, + "rewards/margins": 0.9543534517288208, + "rewards/rejected": -4.088870048522949, + "step": 14360 + }, + { + "epoch": 0.94, + "learning_rate": 5.4275425092290004e-08, + "logits/chosen": -2.0579543113708496, + "logits/rejected": -1.4061779975891113, + "logps/chosen": -577.0407104492188, + "logps/rejected": -603.1378173828125, + "loss": 0.4578, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.449418783187866, + "rewards/margins": 1.5462074279785156, + "rewards/rejected": -3.995626449584961, + "step": 14370 + }, + { + "epoch": 0.94, + "learning_rate": 5.309847521527078e-08, + "logits/chosen": -1.9693748950958252, + "logits/rejected": -1.7713311910629272, + "logps/chosen": -477.5189514160156, + "logps/rejected": -606.2625732421875, + "loss": 0.5151, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.545546770095825, + "rewards/margins": 1.240389108657837, + "rewards/rejected": -3.785935878753662, + "step": 14380 + }, + { + "epoch": 0.94, + "learning_rate": 5.1934289575233385e-08, + "logits/chosen": -2.1927027702331543, + "logits/rejected": -2.040621042251587, + "logps/chosen": -565.4403076171875, + "logps/rejected": -641.1866455078125, + "loss": 0.3722, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.2529892921447754, + "rewards/margins": 1.0782588720321655, + "rewards/rejected": -3.3312485218048096, + "step": 14390 + }, + { + "epoch": 0.94, + "learning_rate": 5.078287424513994e-08, + "logits/chosen": -2.3554036617279053, + "logits/rejected": -1.893204689025879, + "logps/chosen": -580.8216552734375, + "logps/rejected": -546.94677734375, + "loss": 0.5845, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.0198404788970947, + "rewards/margins": 1.1553198099136353, + "rewards/rejected": -3.1751601696014404, + "step": 14400 + }, + { + "epoch": 0.94, + "learning_rate": 4.964423523133671e-08, + "logits/chosen": -1.8956031799316406, + "logits/rejected": -1.57084321975708, + "logps/chosen": -596.0135498046875, + "logps/rejected": -654.08544921875, + "loss": 0.5841, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.9032137393951416, + "rewards/margins": 0.8584194183349609, + "rewards/rejected": -3.7616333961486816, + "step": 14410 + }, + { + "epoch": 0.94, + "learning_rate": 4.8518378473522976e-08, + "logits/chosen": -1.9264984130859375, + "logits/rejected": -1.9649795293807983, + "logps/chosen": -718.8721313476562, + "logps/rejected": -723.98974609375, + "loss": 0.5798, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -2.825594425201416, + "rewards/margins": 0.8478279113769531, + "rewards/rejected": -3.673422336578369, + "step": 14420 + }, + { + "epoch": 0.94, + "learning_rate": 4.7405309844718584e-08, + "logits/chosen": -1.9904693365097046, + "logits/rejected": -2.0691580772399902, + "logps/chosen": -662.9415893554688, + "logps/rejected": -656.4781494140625, + "loss": 0.5028, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.6238646507263184, + "rewards/margins": 1.5457340478897095, + "rewards/rejected": -4.1695990562438965, + "step": 14430 + }, + { + "epoch": 0.94, + "learning_rate": 4.630503515123508e-08, + "logits/chosen": -2.187486410140991, + "logits/rejected": -1.6711041927337646, + "logps/chosen": -578.8890991210938, + "logps/rejected": -642.47216796875, + "loss": 0.4737, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4145071506500244, + "rewards/margins": 1.5308291912078857, + "rewards/rejected": -3.9453365802764893, + "step": 14440 + }, + { + "epoch": 0.95, + "learning_rate": 4.5217560132644056e-08, + "logits/chosen": -2.026479959487915, + "logits/rejected": -2.1466727256774902, + "logps/chosen": -517.5628051757812, + "logps/rejected": -748.7684326171875, + "loss": 0.4446, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.1509206295013428, + "rewards/margins": 1.1940914392471313, + "rewards/rejected": -4.3450117111206055, + "step": 14450 + }, + { + "epoch": 0.95, + "learning_rate": 4.41428904617483e-08, + "logits/chosen": -1.8300641775131226, + "logits/rejected": -1.963171362876892, + "logps/chosen": -551.1915283203125, + "logps/rejected": -632.8893432617188, + "loss": 0.4552, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4853625297546387, + "rewards/margins": 1.235141634941101, + "rewards/rejected": -3.72050404548645, + "step": 14460 + }, + { + "epoch": 0.95, + "learning_rate": 4.3081031744550696e-08, + "logits/chosen": -2.18308687210083, + "logits/rejected": -1.511857271194458, + "logps/chosen": -482.4222717285156, + "logps/rejected": -541.9293212890625, + "loss": 0.6497, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4646048545837402, + "rewards/margins": 0.9028294682502747, + "rewards/rejected": -3.3674347400665283, + "step": 14470 + }, + { + "epoch": 0.95, + "learning_rate": 4.2031989520227025e-08, + "logits/chosen": -1.781324028968811, + "logits/rejected": -1.8688886165618896, + "logps/chosen": -539.0523071289062, + "logps/rejected": -607.6040649414062, + "loss": 0.4513, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.917088747024536, + "rewards/margins": 1.1266096830368042, + "rewards/rejected": -4.043698310852051, + "step": 14480 + }, + { + "epoch": 0.95, + "learning_rate": 4.099576926109461e-08, + "logits/chosen": -1.958184003829956, + "logits/rejected": -2.033747434616089, + "logps/chosen": -712.1417236328125, + "logps/rejected": -733.5404052734375, + "loss": 0.6166, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -3.5497589111328125, + "rewards/margins": 0.6533251404762268, + "rewards/rejected": -4.2030839920043945, + "step": 14490 + }, + { + "epoch": 0.95, + "learning_rate": 3.997237637258705e-08, + "logits/chosen": -1.6168861389160156, + "logits/rejected": -1.8724403381347656, + "logps/chosen": -546.1658325195312, + "logps/rejected": -616.4589233398438, + "loss": 0.6491, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.780271530151367, + "rewards/margins": 0.6067410111427307, + "rewards/rejected": -3.387012481689453, + "step": 14500 + }, + { + "epoch": 0.95, + "learning_rate": 3.8961816193222035e-08, + "logits/chosen": -2.250966787338257, + "logits/rejected": -1.8082072734832764, + "logps/chosen": -601.9894409179688, + "logps/rejected": -652.6393432617188, + "loss": 0.4912, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.557422161102295, + "rewards/margins": 1.5952788591384888, + "rewards/rejected": -4.152700901031494, + "step": 14510 + }, + { + "epoch": 0.95, + "learning_rate": 3.79640939945769e-08, + "logits/chosen": -2.213221311569214, + "logits/rejected": -1.8191182613372803, + "logps/chosen": -567.0030517578125, + "logps/rejected": -648.6131591796875, + "loss": 0.532, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.401536703109741, + "rewards/margins": 1.0345122814178467, + "rewards/rejected": -3.436048984527588, + "step": 14520 + }, + { + "epoch": 0.95, + "learning_rate": 3.697921498125895e-08, + "logits/chosen": -2.357787609100342, + "logits/rejected": -1.991206407546997, + "logps/chosen": -641.5526123046875, + "logps/rejected": -709.0422973632812, + "loss": 0.5707, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.414332628250122, + "rewards/margins": 1.1486154794692993, + "rewards/rejected": -3.562948226928711, + "step": 14530 + }, + { + "epoch": 0.95, + "learning_rate": 3.6007184290880456e-08, + "logits/chosen": -2.375516176223755, + "logits/rejected": -2.1332132816314697, + "logps/chosen": -608.3237915039062, + "logps/rejected": -620.3967895507812, + "loss": 0.5038, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.9436047077178955, + "rewards/margins": 1.2093195915222168, + "rewards/rejected": -3.1529242992401123, + "step": 14540 + }, + { + "epoch": 0.95, + "learning_rate": 3.504800699402872e-08, + "logits/chosen": -2.153402090072632, + "logits/rejected": -1.7231197357177734, + "logps/chosen": -558.6926879882812, + "logps/rejected": -608.3848876953125, + "loss": 0.4761, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.9543708562850952, + "rewards/margins": 0.9569627642631531, + "rewards/rejected": -2.9113335609436035, + "step": 14550 + }, + { + "epoch": 0.95, + "learning_rate": 3.4101688094242967e-08, + "logits/chosen": -1.902076005935669, + "logits/rejected": -1.8242212533950806, + "logps/chosen": -474.19512939453125, + "logps/rejected": -666.9522705078125, + "loss": 0.4113, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.4757742881774902, + "rewards/margins": 1.9288349151611328, + "rewards/rejected": -4.404609203338623, + "step": 14560 + }, + { + "epoch": 0.95, + "learning_rate": 3.3168232527985564e-08, + "logits/chosen": -2.121044397354126, + "logits/rejected": -1.6909615993499756, + "logps/chosen": -657.7474365234375, + "logps/rejected": -793.020263671875, + "loss": 0.4025, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.607120990753174, + "rewards/margins": 1.5403274297714233, + "rewards/rejected": -4.147448539733887, + "step": 14570 + }, + { + "epoch": 0.95, + "learning_rate": 3.224764516461892e-08, + "logits/chosen": -1.758429765701294, + "logits/rejected": -1.5632776021957397, + "logps/chosen": -597.2870483398438, + "logps/rejected": -627.7225341796875, + "loss": 0.5881, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.684779405593872, + "rewards/margins": 1.027318000793457, + "rewards/rejected": -3.712097644805908, + "step": 14580 + }, + { + "epoch": 0.95, + "learning_rate": 3.133993080637665e-08, + "logits/chosen": -2.2052054405212402, + "logits/rejected": -1.766552209854126, + "logps/chosen": -565.1602783203125, + "logps/rejected": -716.3389892578125, + "loss": 0.3828, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.767854690551758, + "rewards/margins": 1.580475091934204, + "rewards/rejected": -4.348330020904541, + "step": 14590 + }, + { + "epoch": 0.96, + "learning_rate": 3.0445094188342186e-08, + "logits/chosen": -2.425266742706299, + "logits/rejected": -2.240478992462158, + "logps/chosen": -547.5765991210938, + "logps/rejected": -691.012939453125, + "loss": 0.498, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.614175796508789, + "rewards/margins": 1.1199251413345337, + "rewards/rejected": -3.734100341796875, + "step": 14600 + }, + { + "epoch": 0.96, + "learning_rate": 2.9563139978421028e-08, + "logits/chosen": -2.029474973678589, + "logits/rejected": -1.9663759469985962, + "logps/chosen": -583.2051391601562, + "logps/rejected": -761.0662841796875, + "loss": 0.6742, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.939052104949951, + "rewards/margins": 1.2332971096038818, + "rewards/rejected": -4.172348976135254, + "step": 14610 + }, + { + "epoch": 0.96, + "learning_rate": 2.869407277731939e-08, + "logits/chosen": -1.5976159572601318, + "logits/rejected": -1.4359140396118164, + "logps/chosen": -448.70831298828125, + "logps/rejected": -568.3868408203125, + "loss": 0.4283, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6614177227020264, + "rewards/margins": 1.1558550596237183, + "rewards/rejected": -3.817272901535034, + "step": 14620 + }, + { + "epoch": 0.96, + "learning_rate": 2.783789711851642e-08, + "logits/chosen": -1.7493689060211182, + "logits/rejected": -1.9442659616470337, + "logps/chosen": -503.8953552246094, + "logps/rejected": -651.5677490234375, + "loss": 0.4684, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3239119052886963, + "rewards/margins": 1.5627952814102173, + "rewards/rejected": -3.886706829071045, + "step": 14630 + }, + { + "epoch": 0.96, + "learning_rate": 2.6994617468244778e-08, + "logits/chosen": -1.977817177772522, + "logits/rejected": -2.0746657848358154, + "logps/chosen": -481.6962890625, + "logps/rejected": -559.5147705078125, + "loss": 0.6108, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6540279388427734, + "rewards/margins": 0.7342770099639893, + "rewards/rejected": -3.3883049488067627, + "step": 14640 + }, + { + "epoch": 0.96, + "learning_rate": 2.6164238225463155e-08, + "logits/chosen": -1.8128788471221924, + "logits/rejected": -2.017542600631714, + "logps/chosen": -582.6580200195312, + "logps/rejected": -668.75, + "loss": 0.5317, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.9285879135131836, + "rewards/margins": 1.08481764793396, + "rewards/rejected": -4.013405799865723, + "step": 14650 + }, + { + "epoch": 0.96, + "learning_rate": 2.534676372183742e-08, + "logits/chosen": -1.7201662063598633, + "logits/rejected": -1.8715013265609741, + "logps/chosen": -576.0623779296875, + "logps/rejected": -882.0549926757812, + "loss": 0.5135, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6352553367614746, + "rewards/margins": 1.320225477218628, + "rewards/rejected": -3.9554810523986816, + "step": 14660 + }, + { + "epoch": 0.96, + "learning_rate": 2.4542198221714218e-08, + "logits/chosen": -1.8759219646453857, + "logits/rejected": -1.9353220462799072, + "logps/chosen": -608.5661010742188, + "logps/rejected": -724.3257446289062, + "loss": 0.5991, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.945322036743164, + "rewards/margins": 1.2213268280029297, + "rewards/rejected": -4.166649341583252, + "step": 14670 + }, + { + "epoch": 0.96, + "learning_rate": 2.3750545922101854e-08, + "logits/chosen": -2.0961718559265137, + "logits/rejected": -1.6841129064559937, + "logps/chosen": -521.6822509765625, + "logps/rejected": -638.8639526367188, + "loss": 0.411, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.7227702140808105, + "rewards/margins": 1.513890027999878, + "rewards/rejected": -4.236660003662109, + "step": 14680 + }, + { + "epoch": 0.96, + "learning_rate": 2.2971810952646112e-08, + "logits/chosen": -2.2837064266204834, + "logits/rejected": -2.1549346446990967, + "logps/chosen": -638.1166381835938, + "logps/rejected": -623.2474975585938, + "loss": 0.6452, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.713475227355957, + "rewards/margins": 0.6098706126213074, + "rewards/rejected": -3.323345899581909, + "step": 14690 + }, + { + "epoch": 0.96, + "learning_rate": 2.2205997375610576e-08, + "logits/chosen": -2.247194290161133, + "logits/rejected": -1.9691559076309204, + "logps/chosen": -541.24609375, + "logps/rejected": -600.1187744140625, + "loss": 0.5547, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.379558801651001, + "rewards/margins": 0.8371645212173462, + "rewards/rejected": -3.2167232036590576, + "step": 14700 + }, + { + "epoch": 0.96, + "learning_rate": 2.1453109185853304e-08, + "logits/chosen": -1.9703853130340576, + "logits/rejected": -2.1267032623291016, + "logps/chosen": -538.843017578125, + "logps/rejected": -636.073974609375, + "loss": 0.5679, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.287182331085205, + "rewards/margins": 0.865347683429718, + "rewards/rejected": -3.152529716491699, + "step": 14710 + }, + { + "epoch": 0.96, + "learning_rate": 2.0713150310808784e-08, + "logits/chosen": -1.9766517877578735, + "logits/rejected": -2.010066509246826, + "logps/chosen": -465.16204833984375, + "logps/rejected": -689.02783203125, + "loss": 0.5972, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.1616501808166504, + "rewards/margins": 1.6034317016601562, + "rewards/rejected": -3.7650814056396484, + "step": 14720 + }, + { + "epoch": 0.96, + "learning_rate": 1.9986124610464064e-08, + "logits/chosen": -1.9305881261825562, + "logits/rejected": -1.5496737957000732, + "logps/chosen": -587.0402221679688, + "logps/rejected": -731.3446044921875, + "loss": 0.4927, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.2727527618408203, + "rewards/margins": 1.5993540287017822, + "rewards/rejected": -3.8721065521240234, + "step": 14730 + }, + { + "epoch": 0.96, + "learning_rate": 1.927203587734211e-08, + "logits/chosen": -2.180502414703369, + "logits/rejected": -1.900294542312622, + "logps/chosen": -609.2461547851562, + "logps/rejected": -682.5765380859375, + "loss": 0.5784, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.084608554840088, + "rewards/margins": 0.7377563714981079, + "rewards/rejected": -3.8223655223846436, + "step": 14740 + }, + { + "epoch": 0.97, + "learning_rate": 1.8570887836479034e-08, + "logits/chosen": -2.027315855026245, + "logits/rejected": -1.824481725692749, + "logps/chosen": -556.1491088867188, + "logps/rejected": -595.5225830078125, + "loss": 0.4913, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.941615581512451, + "rewards/margins": 0.7568367123603821, + "rewards/rejected": -3.6984519958496094, + "step": 14750 + }, + { + "epoch": 0.97, + "learning_rate": 1.7882684145406616e-08, + "logits/chosen": -1.7561118602752686, + "logits/rejected": -2.1623549461364746, + "logps/chosen": -552.8467407226562, + "logps/rejected": -614.5779418945312, + "loss": 0.5921, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.634575605392456, + "rewards/margins": 1.004473328590393, + "rewards/rejected": -3.6390490531921387, + "step": 14760 + }, + { + "epoch": 0.97, + "learning_rate": 1.7207428394132865e-08, + "logits/chosen": -1.8452908992767334, + "logits/rejected": -2.2522010803222656, + "logps/chosen": -595.1798095703125, + "logps/rejected": -801.8895263671875, + "loss": 0.4474, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.8258135318756104, + "rewards/margins": 1.2675896883010864, + "rewards/rejected": -4.093403339385986, + "step": 14770 + }, + { + "epoch": 0.97, + "learning_rate": 1.654512410512177e-08, + "logits/chosen": -1.8162853717803955, + "logits/rejected": -1.6008058786392212, + "logps/chosen": -591.1409912109375, + "logps/rejected": -628.3958129882812, + "loss": 0.5372, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.6309876441955566, + "rewards/margins": 0.3401626646518707, + "rewards/rejected": -3.9711506366729736, + "step": 14780 + }, + { + "epoch": 0.97, + "learning_rate": 1.5895774733277468e-08, + "logits/chosen": -1.8977062702178955, + "logits/rejected": -1.8740453720092773, + "logps/chosen": -610.1856689453125, + "logps/rejected": -745.1798095703125, + "loss": 0.642, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -3.373018741607666, + "rewards/margins": 0.5555127263069153, + "rewards/rejected": -3.9285316467285156, + "step": 14790 + }, + { + "epoch": 0.97, + "learning_rate": 1.5259383665924e-08, + "logits/chosen": -1.9771171808242798, + "logits/rejected": -2.1021008491516113, + "logps/chosen": -546.7236328125, + "logps/rejected": -603.7360229492188, + "loss": 0.4806, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.3632724285125732, + "rewards/margins": 0.8726188540458679, + "rewards/rejected": -3.235891342163086, + "step": 14800 + }, + { + "epoch": 0.97, + "learning_rate": 1.4635954222789461e-08, + "logits/chosen": -1.9596999883651733, + "logits/rejected": -2.053947687149048, + "logps/chosen": -546.943359375, + "logps/rejected": -663.4627075195312, + "loss": 0.5462, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.808772087097168, + "rewards/margins": 0.8841426968574524, + "rewards/rejected": -3.6929144859313965, + "step": 14810 + }, + { + "epoch": 0.97, + "learning_rate": 1.402548965598688e-08, + "logits/chosen": -2.0539464950561523, + "logits/rejected": -2.116236925125122, + "logps/chosen": -515.5634155273438, + "logps/rejected": -638.9573364257812, + "loss": 0.6564, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.3546359539031982, + "rewards/margins": 0.9324806928634644, + "rewards/rejected": -3.287116527557373, + "step": 14820 + }, + { + "epoch": 0.97, + "learning_rate": 1.3427993149998375e-08, + "logits/chosen": -1.9696085453033447, + "logits/rejected": -1.6930809020996094, + "logps/chosen": -572.9739990234375, + "logps/rejected": -665.8734130859375, + "loss": 0.4738, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.9606966972351074, + "rewards/margins": 1.3416447639465332, + "rewards/rejected": -4.302341938018799, + "step": 14830 + }, + { + "epoch": 0.97, + "learning_rate": 1.2843467821658518e-08, + "logits/chosen": -1.533319115638733, + "logits/rejected": -1.516014814376831, + "logps/chosen": -584.8552856445312, + "logps/rejected": -655.9161376953125, + "loss": 0.4087, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.864767074584961, + "rewards/margins": 1.255389928817749, + "rewards/rejected": -4.120157241821289, + "step": 14840 + }, + { + "epoch": 0.97, + "learning_rate": 1.2271916720137666e-08, + "logits/chosen": -2.2170162200927734, + "logits/rejected": -2.1248416900634766, + "logps/chosen": -568.857177734375, + "logps/rejected": -635.1595458984375, + "loss": 0.6052, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8181326389312744, + "rewards/margins": 0.6834885478019714, + "rewards/rejected": -3.5016212463378906, + "step": 14850 + }, + { + "epoch": 0.97, + "learning_rate": 1.171334282692671e-08, + "logits/chosen": -1.637953519821167, + "logits/rejected": -2.0385165214538574, + "logps/chosen": -564.2327270507812, + "logps/rejected": -709.8792114257812, + "loss": 0.3442, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.607111692428589, + "rewards/margins": 1.3842123746871948, + "rewards/rejected": -3.9913246631622314, + "step": 14860 + }, + { + "epoch": 0.97, + "learning_rate": 1.116774905582041e-08, + "logits/chosen": -1.9809516668319702, + "logits/rejected": -1.9079357385635376, + "logps/chosen": -535.7376708984375, + "logps/rejected": -629.07568359375, + "loss": 0.5655, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.8107266426086426, + "rewards/margins": 1.4259259700775146, + "rewards/rejected": -4.236652851104736, + "step": 14870 + }, + { + "epoch": 0.97, + "learning_rate": 1.0635138252902966e-08, + "logits/chosen": -1.839346170425415, + "logits/rejected": -1.8110500574111938, + "logps/chosen": -553.4337768554688, + "logps/rejected": -578.0079956054688, + "loss": 0.5583, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.765519618988037, + "rewards/margins": 0.7037255764007568, + "rewards/rejected": -3.469245195388794, + "step": 14880 + }, + { + "epoch": 0.97, + "learning_rate": 1.0115513196533589e-08, + "logits/chosen": -2.424407482147217, + "logits/rejected": -1.7454407215118408, + "logps/chosen": -595.8634033203125, + "logps/rejected": -676.6961669921875, + "loss": 0.4608, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.504723072052002, + "rewards/margins": 1.2304394245147705, + "rewards/rejected": -3.7351627349853516, + "step": 14890 + }, + { + "epoch": 0.97, + "learning_rate": 9.608876597330952e-09, + "logits/chosen": -2.0921707153320312, + "logits/rejected": -1.8799470663070679, + "logps/chosen": -651.5478515625, + "logps/rejected": -791.4033203125, + "loss": 0.5989, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.843231678009033, + "rewards/margins": 0.8656366467475891, + "rewards/rejected": -3.7088685035705566, + "step": 14900 + }, + { + "epoch": 0.98, + "learning_rate": 9.115231098159594e-09, + "logits/chosen": -1.8645089864730835, + "logits/rejected": -2.1675772666931152, + "logps/chosen": -625.9075927734375, + "logps/rejected": -554.9896850585938, + "loss": 0.5518, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.273899793624878, + "rewards/margins": 0.6048682928085327, + "rewards/rejected": -2.8787682056427, + "step": 14910 + }, + { + "epoch": 0.98, + "learning_rate": 8.634579274116317e-09, + "logits/chosen": -2.1418542861938477, + "logits/rejected": -2.051110029220581, + "logps/chosen": -646.91162109375, + "logps/rejected": -757.44189453125, + "loss": 0.6099, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.6040735244750977, + "rewards/margins": 0.9848331212997437, + "rewards/rejected": -3.5889065265655518, + "step": 14920 + }, + { + "epoch": 0.98, + "learning_rate": 8.166923632516865e-09, + "logits/chosen": -2.135305881500244, + "logits/rejected": -2.3295464515686035, + "logps/chosen": -550.7391967773438, + "logps/rejected": -599.1780395507812, + "loss": 0.5218, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.2799880504608154, + "rewards/margins": 1.2160379886627197, + "rewards/rejected": -3.4960262775421143, + "step": 14930 + }, + { + "epoch": 0.98, + "learning_rate": 7.712266612881492e-09, + "logits/chosen": -2.110206365585327, + "logits/rejected": -2.0911717414855957, + "logps/chosen": -646.8552856445312, + "logps/rejected": -775.9037475585938, + "loss": 0.6099, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8884596824645996, + "rewards/margins": 1.1088417768478394, + "rewards/rejected": -3.9973015785217285, + "step": 14940 + }, + { + "epoch": 0.98, + "learning_rate": 7.270610586924687e-09, + "logits/chosen": -1.9851608276367188, + "logits/rejected": -1.7386525869369507, + "logps/chosen": -532.4805908203125, + "logps/rejected": -577.1221923828125, + "loss": 0.4293, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.726097583770752, + "rewards/margins": 1.0200824737548828, + "rewards/rejected": -3.7461800575256348, + "step": 14950 + }, + { + "epoch": 0.98, + "learning_rate": 6.841957858539916e-09, + "logits/chosen": -2.039259433746338, + "logits/rejected": -1.5415773391723633, + "logps/chosen": -574.57080078125, + "logps/rejected": -579.6693115234375, + "loss": 0.544, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.8482258319854736, + "rewards/margins": 0.8674120903015137, + "rewards/rejected": -3.7156379222869873, + "step": 14960 + }, + { + "epoch": 0.98, + "learning_rate": 6.426310663790181e-09, + "logits/chosen": -2.1447885036468506, + "logits/rejected": -1.8553613424301147, + "logps/chosen": -472.54345703125, + "logps/rejected": -681.0835571289062, + "loss": 0.4591, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.4107956886291504, + "rewards/margins": 1.6803451776504517, + "rewards/rejected": -4.091141223907471, + "step": 14970 + }, + { + "epoch": 0.98, + "learning_rate": 6.023671170894696e-09, + "logits/chosen": -2.3050427436828613, + "logits/rejected": -2.0280723571777344, + "logps/chosen": -523.9729614257812, + "logps/rejected": -675.7442626953125, + "loss": 0.3343, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.1255812644958496, + "rewards/margins": 1.6824712753295898, + "rewards/rejected": -3.8080525398254395, + "step": 14980 + }, + { + "epoch": 0.98, + "learning_rate": 5.634041480218344e-09, + "logits/chosen": -2.022458553314209, + "logits/rejected": -1.8785483837127686, + "logps/chosen": -538.9459228515625, + "logps/rejected": -625.7891235351562, + "loss": 0.5558, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.573493480682373, + "rewards/margins": 1.359083890914917, + "rewards/rejected": -3.932577133178711, + "step": 14990 + }, + { + "epoch": 0.98, + "learning_rate": 5.257423624260849e-09, + "logits/chosen": -2.1214098930358887, + "logits/rejected": -1.5633071660995483, + "logps/chosen": -530.5240478515625, + "logps/rejected": -629.9700317382812, + "loss": 0.544, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -3.113564968109131, + "rewards/margins": 1.1282984018325806, + "rewards/rejected": -4.241864204406738, + "step": 15000 + }, + { + "epoch": 0.98, + "eval_logits/chosen": -2.0103464126586914, + "eval_logits/rejected": -1.8321548700332642, + "eval_logps/chosen": -559.2841186523438, + "eval_logps/rejected": -652.72314453125, + "eval_loss": 0.5345566272735596, + "eval_rewards/accuracies": 0.7450000047683716, + "eval_rewards/chosen": -2.6383869647979736, + "eval_rewards/margins": 1.1347852945327759, + "eval_rewards/rejected": -3.773172616958618, + "eval_runtime": 464.8123, + "eval_samples_per_second": 4.303, + "eval_steps_per_second": 2.151, + "step": 15000 + }, + { + "epoch": 0.98, + "learning_rate": 4.893819567644564e-09, + "logits/chosen": -2.589524030685425, + "logits/rejected": -2.085636854171753, + "logps/chosen": -627.2362060546875, + "logps/rejected": -616.60009765625, + "loss": 0.4448, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.7485313415527344, + "rewards/margins": 0.941628098487854, + "rewards/rejected": -2.690159559249878, + "step": 15010 + }, + { + "epoch": 0.98, + "learning_rate": 4.543231207107257e-09, + "logits/chosen": -2.071953058242798, + "logits/rejected": -1.9068734645843506, + "logps/chosen": -564.134033203125, + "logps/rejected": -653.0672607421875, + "loss": 0.5597, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.419016122817993, + "rewards/margins": 1.1532005071640015, + "rewards/rejected": -3.572216510772705, + "step": 15020 + }, + { + "epoch": 0.98, + "learning_rate": 4.205660371488785e-09, + "logits/chosen": -1.7492027282714844, + "logits/rejected": -2.114201784133911, + "logps/chosen": -523.1380004882812, + "logps/rejected": -662.16650390625, + "loss": 0.5591, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.3801138401031494, + "rewards/margins": 0.8532201647758484, + "rewards/rejected": -3.2333340644836426, + "step": 15030 + }, + { + "epoch": 0.98, + "learning_rate": 3.88110882172471e-09, + "logits/chosen": -2.2043449878692627, + "logits/rejected": -1.5802637338638306, + "logps/chosen": -593.7720336914062, + "logps/rejected": -702.6158447265625, + "loss": 0.4519, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.357976198196411, + "rewards/margins": 1.9908418655395508, + "rewards/rejected": -4.348818302154541, + "step": 15040 + }, + { + "epoch": 0.98, + "learning_rate": 3.569578250834371e-09, + "logits/chosen": -2.2800846099853516, + "logits/rejected": -1.9660142660140991, + "logps/chosen": -635.9804077148438, + "logps/rejected": -608.4877319335938, + "loss": 0.4264, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.367474317550659, + "rewards/margins": 1.1003978252410889, + "rewards/rejected": -3.467872142791748, + "step": 15050 + }, + { + "epoch": 0.99, + "learning_rate": 3.2710702839139353e-09, + "logits/chosen": -1.9524681568145752, + "logits/rejected": -2.089827060699463, + "logps/chosen": -536.96533203125, + "logps/rejected": -729.6658935546875, + "loss": 0.5196, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.171668529510498, + "rewards/margins": 1.4687433242797852, + "rewards/rejected": -3.640411853790283, + "step": 15060 + }, + { + "epoch": 0.99, + "learning_rate": 2.9855864781272448e-09, + "logits/chosen": -1.7301270961761475, + "logits/rejected": -1.9880526065826416, + "logps/chosen": -502.5127868652344, + "logps/rejected": -669.14013671875, + "loss": 0.4803, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.6482887268066406, + "rewards/margins": 1.0087933540344238, + "rewards/rejected": -3.6570820808410645, + "step": 15070 + }, + { + "epoch": 0.99, + "learning_rate": 2.7131283226977665e-09, + "logits/chosen": -1.7450988292694092, + "logits/rejected": -1.6128599643707275, + "logps/chosen": -495.0038146972656, + "logps/rejected": -660.7210693359375, + "loss": 0.5607, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.581587076187134, + "rewards/margins": 1.8689552545547485, + "rewards/rejected": -4.450542449951172, + "step": 15080 + }, + { + "epoch": 0.99, + "learning_rate": 2.4536972389008205e-09, + "logits/chosen": -1.9724218845367432, + "logits/rejected": -1.832627296447754, + "logps/chosen": -507.7687072753906, + "logps/rejected": -597.588623046875, + "loss": 0.4921, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.8451650142669678, + "rewards/margins": 0.994085431098938, + "rewards/rejected": -3.8392510414123535, + "step": 15090 + }, + { + "epoch": 0.99, + "learning_rate": 2.20729458005553e-09, + "logits/chosen": -1.9598588943481445, + "logits/rejected": -2.226276159286499, + "logps/chosen": -603.4548950195312, + "logps/rejected": -636.9799194335938, + "loss": 0.6833, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.43165922164917, + "rewards/margins": 0.9997150301933289, + "rewards/rejected": -3.4313747882843018, + "step": 15100 + }, + { + "epoch": 0.99, + "learning_rate": 1.9739216315192712e-09, + "logits/chosen": -1.884840726852417, + "logits/rejected": -1.8147767782211304, + "logps/chosen": -549.3267822265625, + "logps/rejected": -673.274658203125, + "loss": 0.6796, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2264692783355713, + "rewards/margins": 1.4674718379974365, + "rewards/rejected": -3.693941593170166, + "step": 15110 + }, + { + "epoch": 0.99, + "learning_rate": 1.7535796106796231e-09, + "logits/chosen": -1.8648507595062256, + "logits/rejected": -1.8699684143066406, + "logps/chosen": -559.1756591796875, + "logps/rejected": -737.6688842773438, + "loss": 0.5379, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.814243793487549, + "rewards/margins": 1.0435668230056763, + "rewards/rejected": -3.8578104972839355, + "step": 15120 + }, + { + "epoch": 0.99, + "learning_rate": 1.5462696669482636e-09, + "logits/chosen": -1.8213493824005127, + "logits/rejected": -1.9720951318740845, + "logps/chosen": -475.69268798828125, + "logps/rejected": -601.0528564453125, + "loss": 0.4822, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.752937078475952, + "rewards/margins": 1.1356090307235718, + "rewards/rejected": -3.8885464668273926, + "step": 15130 + }, + { + "epoch": 0.99, + "learning_rate": 1.3519928817556927e-09, + "logits/chosen": -2.2426865100860596, + "logits/rejected": -1.9893312454223633, + "logps/chosen": -523.9650268554688, + "logps/rejected": -693.4822998046875, + "loss": 0.588, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.4684181213378906, + "rewards/margins": 1.2350502014160156, + "rewards/rejected": -3.7034687995910645, + "step": 15140 + }, + { + "epoch": 0.99, + "learning_rate": 1.1707502685448512e-09, + "logits/chosen": -1.8712642192840576, + "logits/rejected": -1.9466218948364258, + "logps/chosen": -568.0076904296875, + "logps/rejected": -602.0161743164062, + "loss": 0.6481, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.9134345054626465, + "rewards/margins": 0.6558619737625122, + "rewards/rejected": -3.569296360015869, + "step": 15150 + }, + { + "epoch": 0.99, + "learning_rate": 1.002542772765569e-09, + "logits/chosen": -2.0638976097106934, + "logits/rejected": -1.9307587146759033, + "logps/chosen": -560.0218505859375, + "logps/rejected": -565.7151489257812, + "loss": 0.7394, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.1229779720306396, + "rewards/margins": 0.8032512664794922, + "rewards/rejected": -3.926229476928711, + "step": 15160 + }, + { + "epoch": 0.99, + "learning_rate": 8.473712718709559e-10, + "logits/chosen": -2.003127098083496, + "logits/rejected": -1.8916511535644531, + "logps/chosen": -585.7120361328125, + "logps/rejected": -612.2128295898438, + "loss": 0.4223, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.2510201930999756, + "rewards/margins": 1.000040888786316, + "rewards/rejected": -3.251060962677002, + "step": 15170 + }, + { + "epoch": 0.99, + "learning_rate": 7.052365753112966e-10, + "logits/chosen": -1.907179832458496, + "logits/rejected": -2.0109689235687256, + "logps/chosen": -516.2862548828125, + "logps/rejected": -582.3834228515625, + "loss": 0.6324, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -2.6489791870117188, + "rewards/margins": 0.7662774324417114, + "rewards/rejected": -3.4152565002441406, + "step": 15180 + }, + { + "epoch": 0.99, + "learning_rate": 5.761394245307195e-10, + "logits/chosen": -1.6526172161102295, + "logits/rejected": -1.8436660766601562, + "logps/chosen": -484.718017578125, + "logps/rejected": -684.2083740234375, + "loss": 0.4828, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.846797466278076, + "rewards/margins": 1.2969233989715576, + "rewards/rejected": -4.143720626831055, + "step": 15190 + }, + { + "epoch": 0.99, + "learning_rate": 4.6008049296358826e-10, + "logits/chosen": -1.6694622039794922, + "logits/rejected": -1.782187819480896, + "logps/chosen": -574.0707397460938, + "logps/rejected": -650.3125, + "loss": 0.7595, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -2.969832181930542, + "rewards/margins": 0.4895861744880676, + "rewards/rejected": -3.4594180583953857, + "step": 15200 + }, + { + "epoch": 1.0, + "learning_rate": 3.5706038603006146e-10, + "logits/chosen": -1.8179279565811157, + "logits/rejected": -1.777890920639038, + "logps/chosen": -544.8775024414062, + "logps/rejected": -649.1305541992188, + "loss": 0.6576, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -2.877633571624756, + "rewards/margins": 0.5929735898971558, + "rewards/rejected": -3.470606565475464, + "step": 15210 + }, + { + "epoch": 1.0, + "learning_rate": 2.670796411333165e-10, + "logits/chosen": -2.3590896129608154, + "logits/rejected": -2.0110230445861816, + "logps/chosen": -643.3109130859375, + "logps/rejected": -662.0802001953125, + "loss": 0.4969, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -2.8800227642059326, + "rewards/margins": 0.6459160447120667, + "rewards/rejected": -3.5259385108947754, + "step": 15220 + }, + { + "epoch": 1.0, + "learning_rate": 1.9013872765677455e-10, + "logits/chosen": -1.6060059070587158, + "logits/rejected": -1.6648750305175781, + "logps/chosen": -541.9176635742188, + "logps/rejected": -621.7283935546875, + "loss": 0.4782, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -3.0482802391052246, + "rewards/margins": 1.1852730512619019, + "rewards/rejected": -4.233553886413574, + "step": 15230 + }, + { + "epoch": 1.0, + "learning_rate": 1.262380469624347e-10, + "logits/chosen": -1.9589645862579346, + "logits/rejected": -1.5916931629180908, + "logps/chosen": -624.8267822265625, + "logps/rejected": -642.77490234375, + "loss": 0.4946, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.87965726852417, + "rewards/margins": 1.2809209823608398, + "rewards/rejected": -4.16057825088501, + "step": 15240 + }, + { + "epoch": 1.0, + "learning_rate": 7.53779323872661e-11, + "logits/chosen": -2.010342597961426, + "logits/rejected": -2.2257697582244873, + "logps/chosen": -536.9607543945312, + "logps/rejected": -731.04296875, + "loss": 0.5095, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.594208002090454, + "rewards/margins": 1.1701253652572632, + "rewards/rejected": -3.7643332481384277, + "step": 15250 + }, + { + "epoch": 1.0, + "learning_rate": 3.7558649242652734e-11, + "logits/chosen": -1.97809636592865, + "logits/rejected": -1.9274822473526, + "logps/chosen": -609.1251220703125, + "logps/rejected": -653.0401611328125, + "loss": 0.568, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.0441153049468994, + "rewards/margins": 1.080944299697876, + "rewards/rejected": -4.125059604644775, + "step": 15260 + }, + { + "epoch": 1.0, + "learning_rate": 1.2780394812450526e-11, + "logits/chosen": -2.1693949699401855, + "logits/rejected": -1.7180089950561523, + "logps/chosen": -669.6077880859375, + "logps/rejected": -679.9315185546875, + "loss": 0.5132, + "rewards/accuracies": 0.75, + "rewards/chosen": -3.558014392852783, + "rewards/margins": 0.8520815968513489, + "rewards/rejected": -4.410096645355225, + "step": 15270 + }, + { + "epoch": 1.0, + "learning_rate": 1.0432983521546646e-12, + "logits/chosen": -2.0951781272888184, + "logits/rejected": -1.8845336437225342, + "logps/chosen": -670.9354248046875, + "logps/rejected": -788.4154052734375, + "loss": 0.4315, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.9909508228302002, + "rewards/margins": 1.453499436378479, + "rewards/rejected": -3.4444503784179688, + "step": 15280 + }, + { + "epoch": 1.0, + "step": 15284, + "total_flos": 0.0, + "train_loss": 0.5599543302822287, + "train_runtime": 34322.3587, + "train_samples_per_second": 1.781, + "train_steps_per_second": 0.445 + } + ], + "logging_steps": 10, + "max_steps": 15284, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}