{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 100, "global_step": 7642, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.535947712418301e-09, "logits/chosen": -2.795367956161499, "logits/rejected": -2.783267021179199, "logps/chosen": -1426.8870849609375, "logps/rejected": -1156.8780517578125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.535947712418302e-08, "logits/chosen": -2.834068775177002, "logits/rejected": -2.826040267944336, "logps/chosen": -1849.96728515625, "logps/rejected": -1517.9075927734375, "loss": 0.6933, "rewards/accuracies": 0.2222222238779068, "rewards/chosen": 9.649489948060364e-05, "rewards/margins": -6.48392378934659e-05, "rewards/rejected": 0.0001613341155461967, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.3071895424836603e-07, "logits/chosen": -2.8028738498687744, "logits/rejected": -2.802607297897339, "logps/chosen": -1647.7279052734375, "logps/rejected": -1586.53955078125, "loss": 0.6933, "rewards/accuracies": 0.5, "rewards/chosen": -0.0003318481321912259, "rewards/margins": -0.0006864747265353799, "rewards/rejected": 0.0003546266525518149, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.9607843137254904e-07, "logits/chosen": -2.796626567840576, "logits/rejected": -2.784531593322754, "logps/chosen": -1275.7952880859375, "logps/rejected": -957.0416259765625, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.00011474229540908709, "rewards/margins": 5.542132930713706e-05, "rewards/rejected": 5.932092244620435e-05, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.6143790849673207e-07, "logits/chosen": -2.806631326675415, "logits/rejected": -2.8096060752868652, "logps/chosen": -1816.1331787109375, "logps/rejected": -1482.34375, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.0008866718853823841, "rewards/margins": 0.0015330163296312094, "rewards/rejected": -0.0006463441532105207, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.267973856209151e-07, "logits/chosen": -2.792966365814209, "logits/rejected": -2.794290542602539, "logps/chosen": -1384.16552734375, "logps/rejected": -1125.865966796875, "loss": 0.6932, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0008044252172112465, "rewards/margins": -0.00011883594561368227, "rewards/rejected": -0.0006855892715975642, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.921568627450981e-07, "logits/chosen": -2.7673702239990234, "logits/rejected": -2.7500195503234863, "logps/chosen": -1217.739501953125, "logps/rejected": -1133.6571044921875, "loss": 0.6933, "rewards/accuracies": 0.5, "rewards/chosen": -0.0003805880551226437, "rewards/margins": -0.0006572251440957189, "rewards/rejected": 0.0002766371180769056, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.5751633986928105e-07, "logits/chosen": -2.7791049480438232, "logits/rejected": -2.78680419921875, "logps/chosen": -995.9110107421875, "logps/rejected": -1084.221923828125, "loss": 0.693, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0002094264345942065, "rewards/margins": 0.0012642501387745142, "rewards/rejected": -0.001054823980666697, "step": 70 }, { "epoch": 0.02, "learning_rate": 5.228758169934641e-07, "logits/chosen": -2.808701753616333, "logits/rejected": -2.799790859222412, "logps/chosen": -1342.0267333984375, "logps/rejected": -1569.693603515625, "loss": 0.6933, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0006827354663982987, "rewards/margins": -0.00042857881635427475, "rewards/rejected": -0.0002541565918363631, "step": 80 }, { "epoch": 0.02, "learning_rate": 5.882352941176471e-07, "logits/chosen": -2.8107619285583496, "logits/rejected": -2.781428575515747, "logps/chosen": -1558.470458984375, "logps/rejected": -1330.8447265625, "loss": 0.6931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0006004419410601258, "rewards/margins": 0.00014814567111898214, "rewards/rejected": 0.00045229625538922846, "step": 90 }, { "epoch": 0.03, "learning_rate": 6.535947712418302e-07, "logits/chosen": -2.791748285293579, "logits/rejected": -2.796267032623291, "logps/chosen": -1339.14013671875, "logps/rejected": -1190.2244873046875, "loss": 0.6932, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.00010777993884403259, "rewards/margins": 0.00042422435944899917, "rewards/rejected": -0.00031644434784539044, "step": 100 }, { "epoch": 0.03, "eval_logits/chosen": -2.798377513885498, "eval_logits/rejected": -2.790869951248169, "eval_logps/chosen": -1562.5352783203125, "eval_logps/rejected": -1351.64404296875, "eval_loss": 0.6931096911430359, "eval_rewards/accuracies": 0.494047611951828, "eval_rewards/chosen": 0.00013916695024818182, "eval_rewards/margins": -6.539197056554258e-05, "eval_rewards/rejected": 0.00020455890626180917, "eval_runtime": 222.1187, "eval_samples_per_second": 9.004, "eval_steps_per_second": 0.284, "step": 100 }, { "epoch": 0.03, "learning_rate": 7.189542483660131e-07, "logits/chosen": -2.8092565536499023, "logits/rejected": -2.8138070106506348, "logps/chosen": -1324.2572021484375, "logps/rejected": -977.5646362304688, "loss": 0.6929, "rewards/accuracies": 0.625, "rewards/chosen": 0.0004745650221593678, "rewards/margins": 0.0005655864952132106, "rewards/rejected": -9.102150943363085e-05, "step": 110 }, { "epoch": 0.03, "learning_rate": 7.843137254901962e-07, "logits/chosen": -2.7817893028259277, "logits/rejected": -2.7743403911590576, "logps/chosen": -1438.9947509765625, "logps/rejected": -1309.9793701171875, "loss": 0.693, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -9.187074465444311e-05, "rewards/margins": -0.00023352287826128304, "rewards/rejected": 0.00014165208267513663, "step": 120 }, { "epoch": 0.03, "learning_rate": 8.496732026143792e-07, "logits/chosen": -2.798006772994995, "logits/rejected": -2.7805659770965576, "logps/chosen": -1641.3851318359375, "logps/rejected": -1417.8428955078125, "loss": 0.6928, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0013305357424542308, "rewards/margins": 0.0011499880347400904, "rewards/rejected": 0.00018054773681797087, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.150326797385621e-07, "logits/chosen": -2.837071180343628, "logits/rejected": -2.8371713161468506, "logps/chosen": -1430.638671875, "logps/rejected": -1356.1748046875, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -7.816695870133117e-05, "rewards/margins": -0.0002333349548280239, "rewards/rejected": 0.0001551680325064808, "step": 140 }, { "epoch": 0.04, "learning_rate": 9.80392156862745e-07, "logits/chosen": -2.828702449798584, "logits/rejected": -2.8166086673736572, "logps/chosen": -1465.7197265625, "logps/rejected": -1289.9031982421875, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.0003954515850637108, "rewards/margins": -0.00013864324137102813, "rewards/rejected": 0.0005340948118828237, "step": 150 }, { "epoch": 0.04, "learning_rate": 1.0457516339869283e-06, "logits/chosen": -2.8205227851867676, "logits/rejected": -2.8256192207336426, "logps/chosen": -1326.330322265625, "logps/rejected": -1407.6767578125, "loss": 0.6928, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0017105670413002372, "rewards/margins": 0.001616004854440689, "rewards/rejected": 9.456199040869251e-05, "step": 160 }, { "epoch": 0.04, "learning_rate": 1.111111111111111e-06, "logits/chosen": -2.7866125106811523, "logits/rejected": -2.7868504524230957, "logps/chosen": -1298.046630859375, "logps/rejected": -1401.292236328125, "loss": 0.6927, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0013844614150002599, "rewards/margins": 0.00066300731850788, "rewards/rejected": 0.0007214541547000408, "step": 170 }, { "epoch": 0.05, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -2.7920944690704346, "logits/rejected": -2.785459280014038, "logps/chosen": -1213.7120361328125, "logps/rejected": -1024.6798095703125, "loss": 0.6928, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0017878736834973097, "rewards/margins": 0.00030651717679575086, "rewards/rejected": 0.001481356448493898, "step": 180 }, { "epoch": 0.05, "learning_rate": 1.2418300653594772e-06, "logits/chosen": -2.7856953144073486, "logits/rejected": -2.788532257080078, "logps/chosen": -1345.113525390625, "logps/rejected": -1323.069091796875, "loss": 0.6923, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0006524743512272835, "rewards/margins": 0.00080809963401407, "rewards/rejected": -0.00015562539920210838, "step": 190 }, { "epoch": 0.05, "learning_rate": 1.3071895424836604e-06, "logits/chosen": -2.819009304046631, "logits/rejected": -2.8260154724121094, "logps/chosen": -1667.3140869140625, "logps/rejected": -1410.6632080078125, "loss": 0.6923, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0064869350753724575, "rewards/margins": 0.005160582717508078, "rewards/rejected": 0.0013263521250337362, "step": 200 }, { "epoch": 0.05, "eval_logits/chosen": -2.797353744506836, "eval_logits/rejected": -2.78989839553833, "eval_logps/chosen": -1562.09912109375, "eval_logps/rejected": -1351.3734130859375, "eval_loss": 0.6924985647201538, "eval_rewards/accuracies": 0.511904776096344, "eval_rewards/chosen": 0.00449884170666337, "eval_rewards/margins": 0.0015855859965085983, "eval_rewards/rejected": 0.0029132559429854155, "eval_runtime": 221.9883, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 200 }, { "epoch": 0.05, "learning_rate": 1.3725490196078434e-06, "logits/chosen": -2.767564058303833, "logits/rejected": -2.7736399173736572, "logps/chosen": -1639.3736572265625, "logps/rejected": -1532.915771484375, "loss": 0.6926, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.004849494434893131, "rewards/margins": 0.0012285599950700998, "rewards/rejected": 0.0036209344398230314, "step": 210 }, { "epoch": 0.06, "learning_rate": 1.4379084967320261e-06, "logits/chosen": -2.792647361755371, "logits/rejected": -2.795532703399658, "logps/chosen": -1155.707763671875, "logps/rejected": -1254.2430419921875, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.00366068072617054, "rewards/margins": 0.0027366329450160265, "rewards/rejected": 0.0009240478393621743, "step": 220 }, { "epoch": 0.06, "learning_rate": 1.5032679738562091e-06, "logits/chosen": -2.823070764541626, "logits/rejected": -2.8033041954040527, "logps/chosen": -1567.356689453125, "logps/rejected": -1238.0029296875, "loss": 0.6909, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0061655775643885136, "rewards/margins": 0.006119781639426947, "rewards/rejected": 4.579539017868228e-05, "step": 230 }, { "epoch": 0.06, "learning_rate": 1.5686274509803923e-06, "logits/chosen": -2.8057596683502197, "logits/rejected": -2.79594349861145, "logps/chosen": -1635.199951171875, "logps/rejected": -1150.596435546875, "loss": 0.692, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.005689248908311129, "rewards/margins": 0.0024390851613134146, "rewards/rejected": 0.0032501642126590014, "step": 240 }, { "epoch": 0.07, "learning_rate": 1.6339869281045753e-06, "logits/chosen": -2.831570863723755, "logits/rejected": -2.8272597789764404, "logps/chosen": -1722.236328125, "logps/rejected": -1391.227294921875, "loss": 0.6926, "rewards/accuracies": 0.5, "rewards/chosen": 0.006255643907934427, "rewards/margins": 0.000186113640666008, "rewards/rejected": 0.006069529801607132, "step": 250 }, { "epoch": 0.07, "learning_rate": 1.6993464052287585e-06, "logits/chosen": -2.7985095977783203, "logits/rejected": -2.804990768432617, "logps/chosen": -1589.3802490234375, "logps/rejected": -1299.8837890625, "loss": 0.6928, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0035744800698012114, "rewards/margins": -0.003007827326655388, "rewards/rejected": 0.006582307629287243, "step": 260 }, { "epoch": 0.07, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -2.798835277557373, "logits/rejected": -2.7780652046203613, "logps/chosen": -1849.673583984375, "logps/rejected": -1373.615478515625, "loss": 0.6927, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.012151990085840225, "rewards/margins": 0.010117733851075172, "rewards/rejected": 0.0020342557691037655, "step": 270 }, { "epoch": 0.07, "learning_rate": 1.8300653594771242e-06, "logits/chosen": -2.7835683822631836, "logits/rejected": -2.7650108337402344, "logps/chosen": -1215.5987548828125, "logps/rejected": -1194.396240234375, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.012225615791976452, "rewards/margins": 0.0057212356477975845, "rewards/rejected": 0.006504380609840155, "step": 280 }, { "epoch": 0.08, "learning_rate": 1.8954248366013072e-06, "logits/chosen": -2.7740135192871094, "logits/rejected": -2.766909122467041, "logps/chosen": -1559.8885498046875, "logps/rejected": -1529.93994140625, "loss": 0.6913, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.004790668375790119, "rewards/margins": 0.0015975991263985634, "rewards/rejected": 0.003193069249391556, "step": 290 }, { "epoch": 0.08, "learning_rate": 1.96078431372549e-06, "logits/chosen": -2.8152096271514893, "logits/rejected": -2.797621965408325, "logps/chosen": -1500.9361572265625, "logps/rejected": -1118.470947265625, "loss": 0.6937, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.003361351788043976, "rewards/margins": -0.00021784492128062993, "rewards/rejected": 0.0035791967529803514, "step": 300 }, { "epoch": 0.08, "eval_logits/chosen": -2.794529438018799, "eval_logits/rejected": -2.7872204780578613, "eval_logps/chosen": -1561.58154296875, "eval_logps/rejected": -1351.146240234375, "eval_loss": 0.6909098625183105, "eval_rewards/accuracies": 0.5376983880996704, "eval_rewards/chosen": 0.009676768444478512, "eval_rewards/margins": 0.004492546897381544, "eval_rewards/rejected": 0.00518422294408083, "eval_runtime": 222.0362, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 300 }, { "epoch": 0.08, "learning_rate": 2.0261437908496734e-06, "logits/chosen": -2.746243953704834, "logits/rejected": -2.7564194202423096, "logps/chosen": -1628.013427734375, "logps/rejected": -1673.1204833984375, "loss": 0.6906, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.004660730250179768, "rewards/margins": -0.0001595167414052412, "rewards/rejected": 0.004820247646421194, "step": 310 }, { "epoch": 0.08, "learning_rate": 2.0915032679738565e-06, "logits/chosen": -2.7773218154907227, "logits/rejected": -2.7734968662261963, "logps/chosen": -1595.344482421875, "logps/rejected": -1299.114013671875, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.011113145388662815, "rewards/margins": 0.008930252864956856, "rewards/rejected": 0.002182893455028534, "step": 320 }, { "epoch": 0.09, "learning_rate": 2.1568627450980393e-06, "logits/chosen": -2.7626965045928955, "logits/rejected": -2.78471040725708, "logps/chosen": -1494.483154296875, "logps/rejected": -1481.1407470703125, "loss": 0.6928, "rewards/accuracies": 0.5, "rewards/chosen": 0.006823359522968531, "rewards/margins": -0.0029601803980767727, "rewards/rejected": 0.009783540852367878, "step": 330 }, { "epoch": 0.09, "learning_rate": 2.222222222222222e-06, "logits/chosen": -2.7922797203063965, "logits/rejected": -2.797437906265259, "logps/chosen": -1879.9134521484375, "logps/rejected": -1432.9617919921875, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": 0.024633025750517845, "rewards/margins": 0.02049541473388672, "rewards/rejected": 0.004137612413614988, "step": 340 }, { "epoch": 0.09, "learning_rate": 2.2875816993464053e-06, "logits/chosen": -2.7698655128479004, "logits/rejected": -2.767660140991211, "logps/chosen": -1492.0418701171875, "logps/rejected": -1487.5615234375, "loss": 0.6915, "rewards/accuracies": 0.375, "rewards/chosen": 0.012676766142249107, "rewards/margins": -0.007590385619550943, "rewards/rejected": 0.020267153158783913, "step": 350 }, { "epoch": 0.09, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -2.8128461837768555, "logits/rejected": -2.8049893379211426, "logps/chosen": -1627.913818359375, "logps/rejected": -1413.0662841796875, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.009338868781924248, "rewards/margins": 0.01759433187544346, "rewards/rejected": -0.00825546495616436, "step": 360 }, { "epoch": 0.1, "learning_rate": 2.4183006535947716e-06, "logits/chosen": -2.766038179397583, "logits/rejected": -2.777884006500244, "logps/chosen": -1519.193359375, "logps/rejected": -1519.871337890625, "loss": 0.6928, "rewards/accuracies": 0.375, "rewards/chosen": 0.009928613901138306, "rewards/margins": -0.002825252478942275, "rewards/rejected": 0.012753868475556374, "step": 370 }, { "epoch": 0.1, "learning_rate": 2.4836601307189544e-06, "logits/chosen": -2.766292095184326, "logits/rejected": -2.7770702838897705, "logps/chosen": -1192.345703125, "logps/rejected": -1111.7589111328125, "loss": 0.6884, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.015065248124301434, "rewards/margins": 0.004281006287783384, "rewards/rejected": 0.010784241370856762, "step": 380 }, { "epoch": 0.1, "learning_rate": 2.549019607843137e-06, "logits/chosen": -2.7778379917144775, "logits/rejected": -2.7809524536132812, "logps/chosen": -1430.665283203125, "logps/rejected": -1653.656005859375, "loss": 0.6901, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.022431906312704086, "rewards/margins": 0.010017070919275284, "rewards/rejected": 0.012414836324751377, "step": 390 }, { "epoch": 0.1, "learning_rate": 2.6143790849673208e-06, "logits/chosen": -2.756230115890503, "logits/rejected": -2.7588517665863037, "logps/chosen": -1730.5394287109375, "logps/rejected": -1502.6300048828125, "loss": 0.6867, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01862536557018757, "rewards/margins": 0.007056856993585825, "rewards/rejected": 0.011568508110940456, "step": 400 }, { "epoch": 0.1, "eval_logits/chosen": -2.792309522628784, "eval_logits/rejected": -2.7853169441223145, "eval_logps/chosen": -1561.1024169921875, "eval_logps/rejected": -1351.063232421875, "eval_loss": 0.6892846822738647, "eval_rewards/accuracies": 0.5595238208770752, "eval_rewards/chosen": 0.014464985579252243, "eval_rewards/margins": 0.008450279943645, "eval_rewards/rejected": 0.006014703772962093, "eval_runtime": 222.1309, "eval_samples_per_second": 9.004, "eval_steps_per_second": 0.284, "step": 400 }, { "epoch": 0.11, "learning_rate": 2.6797385620915036e-06, "logits/chosen": -2.8415043354034424, "logits/rejected": -2.7838778495788574, "logps/chosen": -1729.2962646484375, "logps/rejected": -1401.625732421875, "loss": 0.6906, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.011543015018105507, "rewards/margins": -0.007434485945850611, "rewards/rejected": 0.01897750422358513, "step": 410 }, { "epoch": 0.11, "learning_rate": 2.7450980392156867e-06, "logits/chosen": -2.779257297515869, "logits/rejected": -2.76314640045166, "logps/chosen": -1407.1253662109375, "logps/rejected": -882.9191284179688, "loss": 0.6872, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.008474389091134071, "rewards/margins": 0.0015680404612794518, "rewards/rejected": 0.006906348280608654, "step": 420 }, { "epoch": 0.11, "learning_rate": 2.8104575163398695e-06, "logits/chosen": -2.761399745941162, "logits/rejected": -2.765044927597046, "logps/chosen": -1092.9505615234375, "logps/rejected": -1225.6053466796875, "loss": 0.6872, "rewards/accuracies": 0.625, "rewards/chosen": 0.015711713582277298, "rewards/margins": 0.01204732246696949, "rewards/rejected": 0.0036643915809690952, "step": 430 }, { "epoch": 0.12, "learning_rate": 2.8758169934640523e-06, "logits/chosen": -2.7783565521240234, "logits/rejected": -2.770191192626953, "logps/chosen": -1729.1246337890625, "logps/rejected": -1355.4561767578125, "loss": 0.6848, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.030698176473379135, "rewards/margins": 0.025134120136499405, "rewards/rejected": 0.005564060527831316, "step": 440 }, { "epoch": 0.12, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -2.7931742668151855, "logits/rejected": -2.782345771789551, "logps/chosen": -1868.706787109375, "logps/rejected": -1616.277099609375, "loss": 0.6943, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.013443054631352425, "rewards/margins": -0.013905840925872326, "rewards/rejected": 0.0004627843445632607, "step": 450 }, { "epoch": 0.12, "learning_rate": 3.0065359477124182e-06, "logits/chosen": -2.793461322784424, "logits/rejected": -2.778320550918579, "logps/chosen": -1475.04541015625, "logps/rejected": -1274.9873046875, "loss": 0.6861, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.010366128757596016, "rewards/margins": 0.004415568895637989, "rewards/rejected": -0.01478169672191143, "step": 460 }, { "epoch": 0.12, "learning_rate": 3.071895424836602e-06, "logits/chosen": -2.7877297401428223, "logits/rejected": -2.7807395458221436, "logps/chosen": -1361.2353515625, "logps/rejected": -1279.2861328125, "loss": 0.6818, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.012626223266124725, "rewards/margins": 0.018246522173285484, "rewards/rejected": -0.005620299372822046, "step": 470 }, { "epoch": 0.13, "learning_rate": 3.1372549019607846e-06, "logits/chosen": -2.812326192855835, "logits/rejected": -2.7740156650543213, "logps/chosen": -1300.698486328125, "logps/rejected": -972.8448486328125, "loss": 0.6845, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.001179039478302002, "rewards/margins": 0.026014486327767372, "rewards/rejected": -0.027193522080779076, "step": 480 }, { "epoch": 0.13, "learning_rate": 3.2026143790849674e-06, "logits/chosen": -2.7627980709075928, "logits/rejected": -2.761810541152954, "logps/chosen": -1492.3681640625, "logps/rejected": -1233.1475830078125, "loss": 0.6853, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.02171669341623783, "rewards/margins": 0.015219648368656635, "rewards/rejected": 0.006497042719274759, "step": 490 }, { "epoch": 0.13, "learning_rate": 3.2679738562091506e-06, "logits/chosen": -2.781360149383545, "logits/rejected": -2.7785849571228027, "logps/chosen": -1810.2314453125, "logps/rejected": -1465.8914794921875, "loss": 0.6921, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0031786567997187376, "rewards/margins": -0.0039799753576517105, "rewards/rejected": 0.00715863099321723, "step": 500 }, { "epoch": 0.13, "eval_logits/chosen": -2.789332389831543, "eval_logits/rejected": -2.782862663269043, "eval_logps/chosen": -1562.4755859375, "eval_logps/rejected": -1352.8848876953125, "eval_loss": 0.686707615852356, "eval_rewards/accuracies": 0.5734127163887024, "eval_rewards/chosen": 0.00073534733382985, "eval_rewards/margins": 0.01293771993368864, "eval_rewards/rejected": -0.012202374637126923, "eval_runtime": 222.0162, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 500 }, { "epoch": 0.13, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.7887978553771973, "logits/rejected": -2.782435655593872, "logps/chosen": -1512.6820068359375, "logps/rejected": -1341.0167236328125, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": 0.008763214573264122, "rewards/margins": 0.010548645630478859, "rewards/rejected": -0.001785430358722806, "step": 510 }, { "epoch": 0.14, "learning_rate": 3.398692810457517e-06, "logits/chosen": -2.7754645347595215, "logits/rejected": -2.7587666511535645, "logps/chosen": -1542.706298828125, "logps/rejected": -1310.3466796875, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": 0.0118903499096632, "rewards/margins": 0.02525492012500763, "rewards/rejected": -0.01336456835269928, "step": 520 }, { "epoch": 0.14, "learning_rate": 3.4640522875816997e-06, "logits/chosen": -2.809363842010498, "logits/rejected": -2.8009610176086426, "logps/chosen": -1153.473388671875, "logps/rejected": -1087.641357421875, "loss": 0.6878, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.005253266543149948, "rewards/margins": 0.028111198917031288, "rewards/rejected": -0.02285792864859104, "step": 530 }, { "epoch": 0.14, "learning_rate": 3.529411764705883e-06, "logits/chosen": -2.827617883682251, "logits/rejected": -2.8249759674072266, "logps/chosen": -1407.7529296875, "logps/rejected": -1284.2222900390625, "loss": 0.6853, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006767097860574722, "rewards/margins": 0.021953441202640533, "rewards/rejected": -0.015186344273388386, "step": 540 }, { "epoch": 0.14, "learning_rate": 3.5947712418300657e-06, "logits/chosen": -2.7767438888549805, "logits/rejected": -2.7729332447052, "logps/chosen": -1286.873291015625, "logps/rejected": -1123.163818359375, "loss": 0.68, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.008051171898841858, "rewards/margins": 0.0015671855071559548, "rewards/rejected": 0.006483986973762512, "step": 550 }, { "epoch": 0.15, "learning_rate": 3.6601307189542484e-06, "logits/chosen": -2.774984359741211, "logits/rejected": -2.764968156814575, "logps/chosen": -1587.454833984375, "logps/rejected": -1288.8699951171875, "loss": 0.6843, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0070315636694431305, "rewards/margins": 0.024187782779335976, "rewards/rejected": -0.017156217247247696, "step": 560 }, { "epoch": 0.15, "learning_rate": 3.7254901960784316e-06, "logits/chosen": -2.7530901432037354, "logits/rejected": -2.7850308418273926, "logps/chosen": -1379.649658203125, "logps/rejected": -1344.728515625, "loss": 0.6877, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.021333372220396996, "rewards/margins": -0.03623160347342491, "rewards/rejected": 0.014898233115673065, "step": 570 }, { "epoch": 0.15, "learning_rate": 3.7908496732026144e-06, "logits/chosen": -2.7495524883270264, "logits/rejected": -2.741302251815796, "logps/chosen": -1309.9893798828125, "logps/rejected": -1294.010498046875, "loss": 0.694, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.002793100429698825, "rewards/margins": 0.03543297201395035, "rewards/rejected": -0.03822607547044754, "step": 580 }, { "epoch": 0.15, "learning_rate": 3.856209150326798e-06, "logits/chosen": -2.803638458251953, "logits/rejected": -2.794506549835205, "logps/chosen": -1869.0966796875, "logps/rejected": -1490.4566650390625, "loss": 0.6817, "rewards/accuracies": 0.625, "rewards/chosen": 0.0023705377243459225, "rewards/margins": 0.050473470240831375, "rewards/rejected": -0.048102933913469315, "step": 590 }, { "epoch": 0.16, "learning_rate": 3.92156862745098e-06, "logits/chosen": -2.742553472518921, "logits/rejected": -2.748706102371216, "logps/chosen": -1474.915771484375, "logps/rejected": -1218.5316162109375, "loss": 0.6895, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0037573534063994884, "rewards/margins": 0.022884074598550797, "rewards/rejected": -0.026641424745321274, "step": 600 }, { "epoch": 0.16, "eval_logits/chosen": -2.780553102493286, "eval_logits/rejected": -2.7740375995635986, "eval_logps/chosen": -1562.0875244140625, "eval_logps/rejected": -1353.28662109375, "eval_loss": 0.6838135123252869, "eval_rewards/accuracies": 0.591269850730896, "eval_rewards/chosen": 0.004615093115717173, "eval_rewards/margins": 0.02083371952176094, "eval_rewards/rejected": -0.016218625009059906, "eval_runtime": 221.9789, "eval_samples_per_second": 9.01, "eval_steps_per_second": 0.284, "step": 600 }, { "epoch": 0.16, "learning_rate": 3.986928104575164e-06, "logits/chosen": -2.796996593475342, "logits/rejected": -2.7961318492889404, "logps/chosen": -1551.861328125, "logps/rejected": -1349.908203125, "loss": 0.6787, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.00037096330197528005, "rewards/margins": 0.019331419840455055, "rewards/rejected": -0.01896045356988907, "step": 610 }, { "epoch": 0.16, "learning_rate": 4.052287581699347e-06, "logits/chosen": -2.764712333679199, "logits/rejected": -2.7671523094177246, "logps/chosen": -1459.205078125, "logps/rejected": -1273.398681640625, "loss": 0.6829, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004301647190004587, "rewards/margins": 0.02450401708483696, "rewards/rejected": -0.02020237222313881, "step": 620 }, { "epoch": 0.16, "learning_rate": 4.11764705882353e-06, "logits/chosen": -2.781093120574951, "logits/rejected": -2.7770438194274902, "logps/chosen": -1245.2060546875, "logps/rejected": -1376.152099609375, "loss": 0.6892, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.001100041321478784, "rewards/margins": 0.004019447136670351, "rewards/rejected": -0.002919405000284314, "step": 630 }, { "epoch": 0.17, "learning_rate": 4.183006535947713e-06, "logits/chosen": -2.7561535835266113, "logits/rejected": -2.755764961242676, "logps/chosen": -1589.4029541015625, "logps/rejected": -1410.949951171875, "loss": 0.6752, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03143695369362831, "rewards/margins": 0.008360566571354866, "rewards/rejected": -0.03979751467704773, "step": 640 }, { "epoch": 0.17, "learning_rate": 4.2483660130718954e-06, "logits/chosen": -2.775606393814087, "logits/rejected": -2.7552859783172607, "logps/chosen": -1316.2952880859375, "logps/rejected": -1200.760986328125, "loss": 0.6912, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.008813906461000443, "rewards/margins": 0.0012561812764033675, "rewards/rejected": 0.007557724602520466, "step": 650 }, { "epoch": 0.17, "learning_rate": 4.313725490196079e-06, "logits/chosen": -2.7824950218200684, "logits/rejected": -2.780513048171997, "logps/chosen": -1619.6287841796875, "logps/rejected": -1157.877685546875, "loss": 0.6842, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0035666965413838625, "rewards/margins": 0.019938554614782333, "rewards/rejected": -0.023505253717303276, "step": 660 }, { "epoch": 0.18, "learning_rate": 4.379084967320262e-06, "logits/chosen": -2.773609161376953, "logits/rejected": -2.773920774459839, "logps/chosen": -1857.786376953125, "logps/rejected": -1201.499755859375, "loss": 0.6815, "rewards/accuracies": 0.625, "rewards/chosen": -0.022376973181962967, "rewards/margins": 0.019164763391017914, "rewards/rejected": -0.04154173657298088, "step": 670 }, { "epoch": 0.18, "learning_rate": 4.444444444444444e-06, "logits/chosen": -2.773653745651245, "logits/rejected": -2.783552885055542, "logps/chosen": -1578.38525390625, "logps/rejected": -1208.565673828125, "loss": 0.6913, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.01020563580095768, "rewards/margins": 0.015890780836343765, "rewards/rejected": -0.026096414774656296, "step": 680 }, { "epoch": 0.18, "learning_rate": 4.509803921568628e-06, "logits/chosen": -2.759702205657959, "logits/rejected": -2.7624175548553467, "logps/chosen": -1402.7972412109375, "logps/rejected": -1246.9429931640625, "loss": 0.6752, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0054585887119174, "rewards/margins": 0.015420796349644661, "rewards/rejected": -0.020879384130239487, "step": 690 }, { "epoch": 0.18, "learning_rate": 4.5751633986928105e-06, "logits/chosen": -2.7728593349456787, "logits/rejected": -2.776383876800537, "logps/chosen": -1766.4078369140625, "logps/rejected": -1340.6552734375, "loss": 0.6792, "rewards/accuracies": 0.5, "rewards/chosen": -0.01140767615288496, "rewards/margins": 0.010001585818827152, "rewards/rejected": -0.021409258246421814, "step": 700 }, { "epoch": 0.18, "eval_logits/chosen": -2.765734910964966, "eval_logits/rejected": -2.7592179775238037, "eval_logps/chosen": -1564.490966796875, "eval_logps/rejected": -1356.0621337890625, "eval_loss": 0.6818892359733582, "eval_rewards/accuracies": 0.5992063283920288, "eval_rewards/chosen": -0.019420143216848373, "eval_rewards/margins": 0.024553872644901276, "eval_rewards/rejected": -0.04397401958703995, "eval_runtime": 221.9951, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.640522875816994e-06, "logits/chosen": -2.780527353286743, "logits/rejected": -2.7740638256073, "logps/chosen": -1199.1182861328125, "logps/rejected": -1168.7066650390625, "loss": 0.6812, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0392436683177948, "rewards/margins": 0.012266580015420914, "rewards/rejected": -0.05151023715734482, "step": 710 }, { "epoch": 0.19, "learning_rate": 4.705882352941177e-06, "logits/chosen": -2.743067979812622, "logits/rejected": -2.721900224685669, "logps/chosen": -1631.0428466796875, "logps/rejected": -1451.7489013671875, "loss": 0.689, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.027515646070241928, "rewards/margins": 0.017311880365014076, "rewards/rejected": -0.044827524572610855, "step": 720 }, { "epoch": 0.19, "learning_rate": 4.77124183006536e-06, "logits/chosen": -2.7711644172668457, "logits/rejected": -2.7652924060821533, "logps/chosen": -1598.7469482421875, "logps/rejected": -1494.751708984375, "loss": 0.6858, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.016739655286073685, "rewards/margins": 0.026004815474152565, "rewards/rejected": -0.0427444651722908, "step": 730 }, { "epoch": 0.19, "learning_rate": 4.836601307189543e-06, "logits/chosen": -2.7872672080993652, "logits/rejected": -2.7747020721435547, "logps/chosen": -1697.3082275390625, "logps/rejected": -1134.6143798828125, "loss": 0.6815, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.005160794127732515, "rewards/margins": 0.048423707485198975, "rewards/rejected": -0.0535845048725605, "step": 740 }, { "epoch": 0.2, "learning_rate": 4.901960784313726e-06, "logits/chosen": -2.790926694869995, "logits/rejected": -2.770566940307617, "logps/chosen": -1459.629150390625, "logps/rejected": -1280.535888671875, "loss": 0.6791, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.036373939365148544, "rewards/margins": 0.00627900892868638, "rewards/rejected": -0.04265294969081879, "step": 750 }, { "epoch": 0.2, "learning_rate": 4.967320261437909e-06, "logits/chosen": -2.7549121379852295, "logits/rejected": -2.7587850093841553, "logps/chosen": -1506.0882568359375, "logps/rejected": -1087.849853515625, "loss": 0.6907, "rewards/accuracies": 0.5, "rewards/chosen": -0.062040358781814575, "rewards/margins": 0.02471708320081234, "rewards/rejected": -0.08675744384527206, "step": 760 }, { "epoch": 0.2, "learning_rate": 4.99999347843947e-06, "logits/chosen": -2.738149642944336, "logits/rejected": -2.7390432357788086, "logps/chosen": -1282.12353515625, "logps/rejected": -1458.2039794921875, "loss": 0.6781, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.029346242547035217, "rewards/margins": 0.007969383150339127, "rewards/rejected": -0.037315625697374344, "step": 770 }, { "epoch": 0.2, "learning_rate": 4.999941306159375e-06, "logits/chosen": -2.7938692569732666, "logits/rejected": -2.78912615776062, "logps/chosen": -1432.209716796875, "logps/rejected": -1223.073486328125, "loss": 0.6693, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.023907741531729698, "rewards/margins": 0.02465171553194523, "rewards/rejected": -0.04855945706367493, "step": 780 }, { "epoch": 0.21, "learning_rate": 4.999836962687967e-06, "logits/chosen": -2.7668633460998535, "logits/rejected": -2.784122943878174, "logps/chosen": -1346.76953125, "logps/rejected": -1334.90185546875, "loss": 0.6859, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08885478973388672, "rewards/margins": 0.02054060809314251, "rewards/rejected": -0.10939540714025497, "step": 790 }, { "epoch": 0.21, "learning_rate": 4.999680450202786e-06, "logits/chosen": -2.7835853099823, "logits/rejected": -2.7726187705993652, "logps/chosen": -1530.6263427734375, "logps/rejected": -1177.0797119140625, "loss": 0.6802, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06294900178909302, "rewards/margins": 0.0048830811865627766, "rewards/rejected": -0.0678320825099945, "step": 800 }, { "epoch": 0.21, "eval_logits/chosen": -2.7611026763916016, "eval_logits/rejected": -2.755068302154541, "eval_logps/chosen": -1567.8170166015625, "eval_logps/rejected": -1359.8597412109375, "eval_loss": 0.6791194081306458, "eval_rewards/accuracies": 0.5813491940498352, "eval_rewards/chosen": -0.05267925187945366, "eval_rewards/margins": 0.02927049808204174, "eval_rewards/rejected": -0.08194974809885025, "eval_runtime": 222.0481, "eval_samples_per_second": 9.007, "eval_steps_per_second": 0.284, "step": 800 }, { "epoch": 0.21, "learning_rate": 4.999471771970087e-06, "logits/chosen": -2.7775638103485107, "logits/rejected": -2.7644567489624023, "logps/chosen": -1436.937255859375, "logps/rejected": -1275.5181884765625, "loss": 0.6842, "rewards/accuracies": 0.5, "rewards/chosen": -0.07279221713542938, "rewards/margins": 0.000938097364269197, "rewards/rejected": -0.07373031228780746, "step": 810 }, { "epoch": 0.21, "learning_rate": 4.999210932344767e-06, "logits/chosen": -2.761268377304077, "logits/rejected": -2.7647528648376465, "logps/chosen": -1623.6156005859375, "logps/rejected": -1342.4853515625, "loss": 0.6801, "rewards/accuracies": 0.625, "rewards/chosen": -0.033265478909015656, "rewards/margins": 0.06735644489526749, "rewards/rejected": -0.10062190145254135, "step": 820 }, { "epoch": 0.22, "learning_rate": 4.998897936770281e-06, "logits/chosen": -2.695216655731201, "logits/rejected": -2.7059473991394043, "logps/chosen": -1521.4244384765625, "logps/rejected": -1069.4229736328125, "loss": 0.6838, "rewards/accuracies": 0.5, "rewards/chosen": -0.05603489279747009, "rewards/margins": 0.02252401039004326, "rewards/rejected": -0.07855890691280365, "step": 830 }, { "epoch": 0.22, "learning_rate": 4.998532791778521e-06, "logits/chosen": -2.764151096343994, "logits/rejected": -2.7464096546173096, "logps/chosen": -1710.6044921875, "logps/rejected": -1356.273681640625, "loss": 0.6778, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.053830236196517944, "rewards/margins": 0.028797442093491554, "rewards/rejected": -0.08262769132852554, "step": 840 }, { "epoch": 0.22, "learning_rate": 4.9981155049896885e-06, "logits/chosen": -2.762856960296631, "logits/rejected": -2.757084846496582, "logps/chosen": -1510.125244140625, "logps/rejected": -1310.7681884765625, "loss": 0.6705, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02246144786477089, "rewards/margins": 0.04654809087514877, "rewards/rejected": -0.06900953501462936, "step": 850 }, { "epoch": 0.23, "learning_rate": 4.997646085112126e-06, "logits/chosen": -2.7250123023986816, "logits/rejected": -2.733142137527466, "logps/chosen": -1746.1890869140625, "logps/rejected": -1475.557373046875, "loss": 0.6779, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03953806310892105, "rewards/margins": 0.04774565249681473, "rewards/rejected": -0.08728370070457458, "step": 860 }, { "epoch": 0.23, "learning_rate": 4.997124541942141e-06, "logits/chosen": -2.733189105987549, "logits/rejected": -2.755445957183838, "logps/chosen": -1365.820556640625, "logps/rejected": -1418.225830078125, "loss": 0.6864, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.041343413293361664, "rewards/margins": 0.023359699174761772, "rewards/rejected": -0.06470310688018799, "step": 870 }, { "epoch": 0.23, "learning_rate": 4.996550886363801e-06, "logits/chosen": -2.7360308170318604, "logits/rejected": -2.7451579570770264, "logps/chosen": -1361.407958984375, "logps/rejected": -1424.7965087890625, "loss": 0.6817, "rewards/accuracies": 0.5, "rewards/chosen": -0.03000304475426674, "rewards/margins": 0.032740574330091476, "rewards/rejected": -0.06274361908435822, "step": 880 }, { "epoch": 0.23, "learning_rate": 4.995925130348706e-06, "logits/chosen": -2.7535760402679443, "logits/rejected": -2.753044366836548, "logps/chosen": -1625.3104248046875, "logps/rejected": -1407.3009033203125, "loss": 0.6788, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.03740937262773514, "rewards/margins": 0.04210934415459633, "rewards/rejected": -0.07951872050762177, "step": 890 }, { "epoch": 0.24, "learning_rate": 4.995247286955734e-06, "logits/chosen": -2.752267837524414, "logits/rejected": -2.7573249340057373, "logps/chosen": -1691.3726806640625, "logps/rejected": -1371.1131591796875, "loss": 0.6812, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03671065717935562, "rewards/margins": 0.020868580788373947, "rewards/rejected": -0.05757923796772957, "step": 900 }, { "epoch": 0.24, "eval_logits/chosen": -2.7654597759246826, "eval_logits/rejected": -2.758779525756836, "eval_logps/chosen": -1566.5771484375, "eval_logps/rejected": -1359.92431640625, "eval_loss": 0.6772189140319824, "eval_rewards/accuracies": 0.5714285969734192, "eval_rewards/chosen": -0.04028034210205078, "eval_rewards/margins": 0.04231574013829231, "eval_rewards/rejected": -0.08259608596563339, "eval_runtime": 222.1282, "eval_samples_per_second": 9.004, "eval_steps_per_second": 0.284, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.994517370330779e-06, "logits/chosen": -2.726576566696167, "logits/rejected": -2.7278590202331543, "logps/chosen": -1630.087646484375, "logps/rejected": -1243.8406982421875, "loss": 0.6606, "rewards/accuracies": 0.625, "rewards/chosen": -0.008724043145775795, "rewards/margins": 0.08514805883169174, "rewards/rejected": -0.09387209266424179, "step": 910 }, { "epoch": 0.24, "learning_rate": 4.993735395706446e-06, "logits/chosen": -2.746229648590088, "logits/rejected": -2.7480199337005615, "logps/chosen": -1577.779052734375, "logps/rejected": -1403.44970703125, "loss": 0.6856, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.06129397824406624, "rewards/margins": 0.010011469945311546, "rewards/rejected": -0.07130544632673264, "step": 920 }, { "epoch": 0.24, "learning_rate": 4.992901379401737e-06, "logits/chosen": -2.7393672466278076, "logits/rejected": -2.749816417694092, "logps/chosen": -1264.5728759765625, "logps/rejected": -1142.581787109375, "loss": 0.6765, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.052227932959795, "rewards/margins": 0.07175948470830917, "rewards/rejected": -0.12398741394281387, "step": 930 }, { "epoch": 0.25, "learning_rate": 4.992015338821711e-06, "logits/chosen": -2.7358882427215576, "logits/rejected": -2.728848934173584, "logps/chosen": -1451.732666015625, "logps/rejected": -1161.045654296875, "loss": 0.6771, "rewards/accuracies": 0.625, "rewards/chosen": -0.06770779937505722, "rewards/margins": 0.08663908392190933, "rewards/rejected": -0.15434686839580536, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.991077292457117e-06, "logits/chosen": -2.7200193405151367, "logits/rejected": -2.7250800132751465, "logps/chosen": -1698.7503662109375, "logps/rejected": -1226.952880859375, "loss": 0.6747, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09897660464048386, "rewards/margins": 0.03143421933054924, "rewards/rejected": -0.1304108202457428, "step": 950 }, { "epoch": 0.25, "learning_rate": 4.990087259884016e-06, "logits/chosen": -2.7504963874816895, "logits/rejected": -2.7447590827941895, "logps/chosen": -1224.8485107421875, "logps/rejected": -1163.18408203125, "loss": 0.6793, "rewards/accuracies": 0.5, "rewards/chosen": -0.09718232601881027, "rewards/margins": 0.026497045531868935, "rewards/rejected": -0.12367937713861465, "step": 960 }, { "epoch": 0.25, "learning_rate": 4.989045261763362e-06, "logits/chosen": -2.722668409347534, "logits/rejected": -2.7087206840515137, "logps/chosen": -1633.8773193359375, "logps/rejected": -1421.2122802734375, "loss": 0.6863, "rewards/accuracies": 0.5, "rewards/chosen": -0.10039496421813965, "rewards/margins": 0.020698342472314835, "rewards/rejected": -0.12109329551458359, "step": 970 }, { "epoch": 0.26, "learning_rate": 4.98795131984058e-06, "logits/chosen": -2.7724738121032715, "logits/rejected": -2.7552850246429443, "logps/chosen": -1841.216796875, "logps/rejected": -1583.920654296875, "loss": 0.6713, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.006210956256836653, "rewards/margins": 0.09263283014297485, "rewards/rejected": -0.08642186224460602, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.986805456945107e-06, "logits/chosen": -2.721271514892578, "logits/rejected": -2.737414836883545, "logps/chosen": -1505.755859375, "logps/rejected": -1432.326416015625, "loss": 0.6811, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.07596326619386673, "rewards/margins": 0.058528609573841095, "rewards/rejected": -0.13449189066886902, "step": 990 }, { "epoch": 0.26, "learning_rate": 4.985607696989919e-06, "logits/chosen": -2.7340989112854004, "logits/rejected": -2.7340407371520996, "logps/chosen": -1303.5263671875, "logps/rejected": -1050.8271484375, "loss": 0.6714, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06350848078727722, "rewards/margins": 0.08284474909305573, "rewards/rejected": -0.14635322988033295, "step": 1000 }, { "epoch": 0.26, "eval_logits/chosen": -2.7476096153259277, "eval_logits/rejected": -2.741795539855957, "eval_logps/chosen": -1571.4063720703125, "eval_logps/rejected": -1365.27587890625, "eval_loss": 0.6746096014976501, "eval_rewards/accuracies": 0.5714285969734192, "eval_rewards/chosen": -0.08857344090938568, "eval_rewards/margins": 0.04753944277763367, "eval_rewards/rejected": -0.13611288368701935, "eval_runtime": 222.1714, "eval_samples_per_second": 9.002, "eval_steps_per_second": 0.284, "step": 1000 }, { "epoch": 0.26, "learning_rate": 4.984358064971026e-06, "logits/chosen": -2.7566912174224854, "logits/rejected": -2.7654871940612793, "logps/chosen": -1185.7261962890625, "logps/rejected": -1369.632080078125, "loss": 0.6657, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.050084006041288376, "rewards/margins": 0.06669258326292038, "rewards/rejected": -0.11677658557891846, "step": 1010 }, { "epoch": 0.27, "learning_rate": 4.983056586966958e-06, "logits/chosen": -2.7586092948913574, "logits/rejected": -2.7438788414001465, "logps/chosen": -1385.3212890625, "logps/rejected": -1269.158935546875, "loss": 0.6855, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.10389542579650879, "rewards/margins": 0.043193086981773376, "rewards/rejected": -0.14708851277828217, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.981703290138215e-06, "logits/chosen": -2.7322840690612793, "logits/rejected": -2.7069272994995117, "logps/chosen": -1495.0472412109375, "logps/rejected": -1233.1636962890625, "loss": 0.6619, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0797240361571312, "rewards/margins": 0.03941266983747482, "rewards/rejected": -0.11913671344518661, "step": 1030 }, { "epoch": 0.27, "learning_rate": 4.980298202726706e-06, "logits/chosen": -2.7719852924346924, "logits/rejected": -2.77016544342041, "logps/chosen": -1452.3310546875, "logps/rejected": -1250.4979248046875, "loss": 0.6874, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09233850240707397, "rewards/margins": 0.043667055666446686, "rewards/rejected": -0.13600555062294006, "step": 1040 }, { "epoch": 0.27, "learning_rate": 4.978841354055148e-06, "logits/chosen": -2.734018325805664, "logits/rejected": -2.733374834060669, "logps/chosen": -1476.0380859375, "logps/rejected": -1346.449951171875, "loss": 0.6578, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.022154351696372032, "rewards/margins": 0.14931711554527283, "rewards/rejected": -0.17147144675254822, "step": 1050 }, { "epoch": 0.28, "learning_rate": 4.977332774526471e-06, "logits/chosen": -2.7099456787109375, "logits/rejected": -2.7068095207214355, "logps/chosen": -1252.1337890625, "logps/rejected": -1303.8621826171875, "loss": 0.6841, "rewards/accuracies": 0.625, "rewards/chosen": -0.1030571460723877, "rewards/margins": 0.04800555855035782, "rewards/rejected": -0.1510627120733261, "step": 1060 }, { "epoch": 0.28, "learning_rate": 4.97577249562317e-06, "logits/chosen": -2.7273364067077637, "logits/rejected": -2.71481990814209, "logps/chosen": -1782.357666015625, "logps/rejected": -1272.0413818359375, "loss": 0.6679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.04302068054676056, "rewards/margins": 0.08528807759284973, "rewards/rejected": -0.12830877304077148, "step": 1070 }, { "epoch": 0.28, "learning_rate": 4.974160549906652e-06, "logits/chosen": -2.7501015663146973, "logits/rejected": -2.725672960281372, "logps/chosen": -1465.035888671875, "logps/rejected": -1474.959228515625, "loss": 0.6817, "rewards/accuracies": 0.625, "rewards/chosen": -0.11869201809167862, "rewards/margins": 0.05353887006640434, "rewards/rejected": -0.17223089933395386, "step": 1080 }, { "epoch": 0.29, "learning_rate": 4.972496971016559e-06, "logits/chosen": -2.7162396907806396, "logits/rejected": -2.726938486099243, "logps/chosen": -1366.2523193359375, "logps/rejected": -1355.443603515625, "loss": 0.6651, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.052674632519483566, "rewards/margins": 0.05954190343618393, "rewards/rejected": -0.11221654713153839, "step": 1090 }, { "epoch": 0.29, "learning_rate": 4.9707817936700635e-06, "logits/chosen": -2.7875306606292725, "logits/rejected": -2.7665858268737793, "logps/chosen": -1312.6688232421875, "logps/rejected": -1187.6529541015625, "loss": 0.676, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.14929260313510895, "rewards/margins": -0.008267087861895561, "rewards/rejected": -0.14102551341056824, "step": 1100 }, { "epoch": 0.29, "eval_logits/chosen": -2.7504940032958984, "eval_logits/rejected": -2.7433109283447266, "eval_logps/chosen": -1573.961669921875, "eval_logps/rejected": -1368.9942626953125, "eval_loss": 0.6743620038032532, "eval_rewards/accuracies": 0.5892857313156128, "eval_rewards/chosen": -0.11412478238344193, "eval_rewards/margins": 0.05917017161846161, "eval_rewards/rejected": -0.17329494655132294, "eval_runtime": 221.9771, "eval_samples_per_second": 9.01, "eval_steps_per_second": 0.284, "step": 1100 }, { "epoch": 0.29, "learning_rate": 4.969015053661142e-06, "logits/chosen": -2.769805669784546, "logits/rejected": -2.75061297416687, "logps/chosen": -1684.818603515625, "logps/rejected": -1462.84033203125, "loss": 0.668, "rewards/accuracies": 0.625, "rewards/chosen": -0.07076840102672577, "rewards/margins": 0.1183507889509201, "rewards/rejected": -0.18911918997764587, "step": 1110 }, { "epoch": 0.29, "learning_rate": 4.967196787859835e-06, "logits/chosen": -2.7415719032287598, "logits/rejected": -2.7441704273223877, "logps/chosen": -1657.051025390625, "logps/rejected": -1375.3583984375, "loss": 0.6699, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1343439519405365, "rewards/margins": 0.05747341364622116, "rewards/rejected": -0.19181737303733826, "step": 1120 }, { "epoch": 0.3, "learning_rate": 4.965327034211469e-06, "logits/chosen": -2.735103130340576, "logits/rejected": -2.7535860538482666, "logps/chosen": -1481.1834716796875, "logps/rejected": -1282.894775390625, "loss": 0.6657, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08777041733264923, "rewards/margins": 0.08276239037513733, "rewards/rejected": -0.17053279280662537, "step": 1130 }, { "epoch": 0.3, "learning_rate": 4.96340583173587e-06, "logits/chosen": -2.752781867980957, "logits/rejected": -2.717355728149414, "logps/chosen": -1356.7645263671875, "logps/rejected": -978.78759765625, "loss": 0.6581, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08919491618871689, "rewards/margins": 0.07804575562477112, "rewards/rejected": -0.1672406792640686, "step": 1140 }, { "epoch": 0.3, "learning_rate": 4.96143322052655e-06, "logits/chosen": -2.7316513061523438, "logits/rejected": -2.7153308391571045, "logps/chosen": -1602.01513671875, "logps/rejected": -1217.658935546875, "loss": 0.6696, "rewards/accuracies": 0.625, "rewards/chosen": -0.08242569863796234, "rewards/margins": 0.12858238816261292, "rewards/rejected": -0.21100810170173645, "step": 1150 }, { "epoch": 0.3, "learning_rate": 4.959409241749864e-06, "logits/chosen": -2.7121639251708984, "logits/rejected": -2.709986448287964, "logps/chosen": -1387.0302734375, "logps/rejected": -1208.765380859375, "loss": 0.665, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15488040447235107, "rewards/margins": 0.045979466289281845, "rewards/rejected": -0.20085985958576202, "step": 1160 }, { "epoch": 0.31, "learning_rate": 4.957333937644159e-06, "logits/chosen": -2.7244646549224854, "logits/rejected": -2.733808994293213, "logps/chosen": -1502.994873046875, "logps/rejected": -1184.745849609375, "loss": 0.6854, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.18733811378479004, "rewards/margins": 0.021027732640504837, "rewards/rejected": -0.20836582779884338, "step": 1170 }, { "epoch": 0.31, "learning_rate": 4.955207351518885e-06, "logits/chosen": -2.7539525032043457, "logits/rejected": -2.7505831718444824, "logps/chosen": -1319.450927734375, "logps/rejected": -1050.664794921875, "loss": 0.6786, "rewards/accuracies": 0.625, "rewards/chosen": -0.13988730311393738, "rewards/margins": 0.050179190933704376, "rewards/rejected": -0.19006650149822235, "step": 1180 }, { "epoch": 0.31, "learning_rate": 4.953029527753699e-06, "logits/chosen": -2.720780611038208, "logits/rejected": -2.7177436351776123, "logps/chosen": -1440.0672607421875, "logps/rejected": -1173.736083984375, "loss": 0.6724, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10402397066354752, "rewards/margins": 0.10216061025857925, "rewards/rejected": -0.20618458092212677, "step": 1190 }, { "epoch": 0.31, "learning_rate": 4.95080051179753e-06, "logits/chosen": -2.7565178871154785, "logits/rejected": -2.74467396736145, "logps/chosen": -1439.5458984375, "logps/rejected": -1342.901611328125, "loss": 0.6779, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.11544078588485718, "rewards/margins": 0.02371135726571083, "rewards/rejected": -0.1391521394252777, "step": 1200 }, { "epoch": 0.31, "eval_logits/chosen": -2.7510786056518555, "eval_logits/rejected": -2.743079423904419, "eval_logps/chosen": -1573.1090087890625, "eval_logps/rejected": -1368.6934814453125, "eval_loss": 0.6702868938446045, "eval_rewards/accuracies": 0.5932539701461792, "eval_rewards/chosen": -0.10559960454702377, "eval_rewards/margins": 0.06468784809112549, "eval_rewards/rejected": -0.17028746008872986, "eval_runtime": 221.8656, "eval_samples_per_second": 9.014, "eval_steps_per_second": 0.284, "step": 1200 }, { "epoch": 0.32, "learning_rate": 4.948520350167637e-06, "logits/chosen": -2.7409512996673584, "logits/rejected": -2.7256882190704346, "logps/chosen": -1389.34033203125, "logps/rejected": -1342.9583740234375, "loss": 0.6709, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11397924274206161, "rewards/margins": 0.09994085133075714, "rewards/rejected": -0.21392011642456055, "step": 1210 }, { "epoch": 0.32, "learning_rate": 4.946189090448639e-06, "logits/chosen": -2.728647470474243, "logits/rejected": -2.724257707595825, "logps/chosen": -1304.4896240234375, "logps/rejected": -1695.031494140625, "loss": 0.675, "rewards/accuracies": 0.625, "rewards/chosen": -0.12990212440490723, "rewards/margins": 0.12769030034542084, "rewards/rejected": -0.25759243965148926, "step": 1220 }, { "epoch": 0.32, "learning_rate": 4.943806781291515e-06, "logits/chosen": -2.717841386795044, "logits/rejected": -2.705570697784424, "logps/chosen": -1433.613525390625, "logps/rejected": -1162.338134765625, "loss": 0.648, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10121190547943115, "rewards/margins": 0.07192480564117432, "rewards/rejected": -0.17313668131828308, "step": 1230 }, { "epoch": 0.32, "learning_rate": 4.941373472412595e-06, "logits/chosen": -2.719426155090332, "logits/rejected": -2.707892894744873, "logps/chosen": -1468.894775390625, "logps/rejected": -1463.6505126953125, "loss": 0.6706, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.09925106167793274, "rewards/margins": 0.07948430627584457, "rewards/rejected": -0.17873536050319672, "step": 1240 }, { "epoch": 0.33, "learning_rate": 4.938889214592521e-06, "logits/chosen": -2.7018826007843018, "logits/rejected": -2.7066359519958496, "logps/chosen": -1199.9141845703125, "logps/rejected": -1288.7686767578125, "loss": 0.6584, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14441849291324615, "rewards/margins": 0.06486638635396957, "rewards/rejected": -0.20928487181663513, "step": 1250 }, { "epoch": 0.33, "learning_rate": 4.936354059675186e-06, "logits/chosen": -2.7446064949035645, "logits/rejected": -2.7585690021514893, "logps/chosen": -1435.941162109375, "logps/rejected": -1301.364501953125, "loss": 0.6616, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1579599678516388, "rewards/margins": 0.09013622999191284, "rewards/rejected": -0.24809618294239044, "step": 1260 }, { "epoch": 0.33, "learning_rate": 4.933768060566654e-06, "logits/chosen": -2.7097179889678955, "logits/rejected": -2.712663412094116, "logps/chosen": -1599.5322265625, "logps/rejected": -1429.913330078125, "loss": 0.6781, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.10585727542638779, "rewards/margins": 0.09449507296085358, "rewards/rejected": -0.20035234093666077, "step": 1270 }, { "epoch": 0.33, "learning_rate": 4.931131271234052e-06, "logits/chosen": -2.702580451965332, "logits/rejected": -2.705609083175659, "logps/chosen": -2137.19091796875, "logps/rejected": -1472.0059814453125, "loss": 0.6608, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15121182799339294, "rewards/margins": 0.07554563879966736, "rewards/rejected": -0.2267574816942215, "step": 1280 }, { "epoch": 0.34, "learning_rate": 4.928443746704448e-06, "logits/chosen": -2.7274062633514404, "logits/rejected": -2.7447800636291504, "logps/chosen": -1391.8414306640625, "logps/rejected": -1164.763916015625, "loss": 0.6842, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1704031229019165, "rewards/margins": 0.01789279095828533, "rewards/rejected": -0.1882958859205246, "step": 1290 }, { "epoch": 0.34, "learning_rate": 4.925705543063703e-06, "logits/chosen": -2.7310848236083984, "logits/rejected": -2.7340548038482666, "logps/chosen": -1622.049560546875, "logps/rejected": -1287.524658203125, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12351367622613907, "rewards/margins": 0.08872373402118683, "rewards/rejected": -0.2122374027967453, "step": 1300 }, { "epoch": 0.34, "eval_logits/chosen": -2.745222568511963, "eval_logits/rejected": -2.7375075817108154, "eval_logps/chosen": -1573.912109375, "eval_logps/rejected": -1370.159912109375, "eval_loss": 0.6676135659217834, "eval_rewards/accuracies": 0.5972222089767456, "eval_rewards/chosen": -0.1136305034160614, "eval_rewards/margins": 0.07132188230752945, "eval_rewards/rejected": -0.18495237827301025, "eval_runtime": 221.931, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 1300 }, { "epoch": 0.34, "learning_rate": 4.922916717455297e-06, "logits/chosen": -2.7452569007873535, "logits/rejected": -2.7377490997314453, "logps/chosen": -1249.689697265625, "logps/rejected": -1024.716064453125, "loss": 0.6612, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11122635751962662, "rewards/margins": 0.10638532787561417, "rewards/rejected": -0.2176116704940796, "step": 1310 }, { "epoch": 0.35, "learning_rate": 4.920077328079136e-06, "logits/chosen": -2.7663183212280273, "logits/rejected": -2.7657904624938965, "logps/chosen": -1489.3701171875, "logps/rejected": -1184.8206787109375, "loss": 0.6627, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09591363370418549, "rewards/margins": 0.10784796625375748, "rewards/rejected": -0.20376160740852356, "step": 1320 }, { "epoch": 0.35, "learning_rate": 4.9171874341903445e-06, "logits/chosen": -2.7501184940338135, "logits/rejected": -2.7545647621154785, "logps/chosen": -1703.434814453125, "logps/rejected": -1121.3775634765625, "loss": 0.6766, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.11108261346817017, "rewards/margins": 0.11215372383594513, "rewards/rejected": -0.2232363522052765, "step": 1330 }, { "epoch": 0.35, "learning_rate": 4.914247096098019e-06, "logits/chosen": -2.755143642425537, "logits/rejected": -2.736085891723633, "logps/chosen": -1771.996826171875, "logps/rejected": -1301.4976806640625, "loss": 0.672, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.15492898225784302, "rewards/margins": 0.040635328739881516, "rewards/rejected": -0.19556431472301483, "step": 1340 }, { "epoch": 0.35, "learning_rate": 4.911256375163977e-06, "logits/chosen": -2.7278788089752197, "logits/rejected": -2.7153897285461426, "logps/chosen": -1302.1484375, "logps/rejected": -1428.9267578125, "loss": 0.6697, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1328674554824829, "rewards/margins": 0.05615769699215889, "rewards/rejected": -0.1890251487493515, "step": 1350 }, { "epoch": 0.36, "learning_rate": 4.908215333801474e-06, "logits/chosen": -2.714822292327881, "logits/rejected": -2.7064924240112305, "logps/chosen": -1233.5889892578125, "logps/rejected": -1167.036376953125, "loss": 0.6868, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.16341665387153625, "rewards/margins": 0.04497160390019417, "rewards/rejected": -0.20838825404644012, "step": 1360 }, { "epoch": 0.36, "learning_rate": 4.9051240354739004e-06, "logits/chosen": -2.747804880142212, "logits/rejected": -2.756502866744995, "logps/chosen": -1667.056640625, "logps/rejected": -1420.6734619140625, "loss": 0.6721, "rewards/accuracies": 0.625, "rewards/chosen": -0.14670798182487488, "rewards/margins": 0.09335624426603317, "rewards/rejected": -0.24006421864032745, "step": 1370 }, { "epoch": 0.36, "learning_rate": 4.901982544693457e-06, "logits/chosen": -2.784402370452881, "logits/rejected": -2.7820801734924316, "logps/chosen": -1775.5872802734375, "logps/rejected": -1598.1461181640625, "loss": 0.6679, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08600615710020065, "rewards/margins": 0.07457789778709412, "rewards/rejected": -0.16058406233787537, "step": 1380 }, { "epoch": 0.36, "learning_rate": 4.898790927019809e-06, "logits/chosen": -2.728027820587158, "logits/rejected": -2.7303080558776855, "logps/chosen": -1424.24755859375, "logps/rejected": -1356.0853271484375, "loss": 0.672, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1718912571668625, "rewards/margins": 0.11604329198598862, "rewards/rejected": -0.2879345417022705, "step": 1390 }, { "epoch": 0.37, "learning_rate": 4.895549249058718e-06, "logits/chosen": -2.6968743801116943, "logits/rejected": -2.695655584335327, "logps/chosen": -1699.045166015625, "logps/rejected": -1486.0692138671875, "loss": 0.6664, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.12043057382106781, "rewards/margins": 0.08103757351636887, "rewards/rejected": -0.20146813988685608, "step": 1400 }, { "epoch": 0.37, "eval_logits/chosen": -2.737523317337036, "eval_logits/rejected": -2.730177879333496, "eval_logps/chosen": -1576.802734375, "eval_logps/rejected": -1373.31103515625, "eval_loss": 0.6669156551361084, "eval_rewards/accuracies": 0.6071428656578064, "eval_rewards/chosen": -0.1425366997718811, "eval_rewards/margins": 0.07392816990613937, "eval_rewards/rejected": -0.21646487712860107, "eval_runtime": 221.9953, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 1400 }, { "epoch": 0.37, "learning_rate": 4.892257578460656e-06, "logits/chosen": -2.736672878265381, "logits/rejected": -2.731189250946045, "logps/chosen": -1334.677001953125, "logps/rejected": -1220.300048828125, "loss": 0.6567, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19311638176441193, "rewards/margins": 0.0334378182888031, "rewards/rejected": -0.22655422985553741, "step": 1410 }, { "epoch": 0.37, "learning_rate": 4.888915983919383e-06, "logits/chosen": -2.751110553741455, "logits/rejected": -2.7499165534973145, "logps/chosen": -1545.2374267578125, "logps/rejected": -1497.2535400390625, "loss": 0.6665, "rewards/accuracies": 0.5, "rewards/chosen": -0.17348986864089966, "rewards/margins": 0.014758164063096046, "rewards/rejected": -0.18824802339076996, "step": 1420 }, { "epoch": 0.37, "learning_rate": 4.885524535170525e-06, "logits/chosen": -2.716099500656128, "logits/rejected": -2.7249863147735596, "logps/chosen": -1443.5123291015625, "logps/rejected": -1279.0321044921875, "loss": 0.6687, "rewards/accuracies": 0.625, "rewards/chosen": -0.19876326620578766, "rewards/margins": 0.085506372153759, "rewards/rejected": -0.28426963090896606, "step": 1430 }, { "epoch": 0.38, "learning_rate": 4.882083302990113e-06, "logits/chosen": -2.7442541122436523, "logits/rejected": -2.7311859130859375, "logps/chosen": -1410.497802734375, "logps/rejected": -1370.2880859375, "loss": 0.6697, "rewards/accuracies": 0.625, "rewards/chosen": -0.1760236769914627, "rewards/margins": 0.07774855941534042, "rewards/rejected": -0.25377222895622253, "step": 1440 }, { "epoch": 0.38, "learning_rate": 4.878592359193104e-06, "logits/chosen": -2.7766852378845215, "logits/rejected": -2.7610340118408203, "logps/chosen": -1116.6553955078125, "logps/rejected": -992.8492431640625, "loss": 0.6491, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10915567725896835, "rewards/margins": 0.18461118638515472, "rewards/rejected": -0.29376688599586487, "step": 1450 }, { "epoch": 0.38, "learning_rate": 4.875051776631888e-06, "logits/chosen": -2.7420220375061035, "logits/rejected": -2.7181191444396973, "logps/chosen": -2061.544921875, "logps/rejected": -1855.9967041015625, "loss": 0.6683, "rewards/accuracies": 0.625, "rewards/chosen": -0.14548955857753754, "rewards/margins": 0.05113809555768967, "rewards/rejected": -0.1966276466846466, "step": 1460 }, { "epoch": 0.38, "learning_rate": 4.871461629194764e-06, "logits/chosen": -2.7094175815582275, "logits/rejected": -2.7036585807800293, "logps/chosen": -1583.8709716796875, "logps/rejected": -1170.502685546875, "loss": 0.67, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.24922314286231995, "rewards/margins": 0.04640321061015129, "rewards/rejected": -0.29562637209892273, "step": 1470 }, { "epoch": 0.39, "learning_rate": 4.8678219918043984e-06, "logits/chosen": -2.713094711303711, "logits/rejected": -2.7234363555908203, "logps/chosen": -1121.4586181640625, "logps/rejected": -1276.220458984375, "loss": 0.6404, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.18884606659412384, "rewards/margins": 0.07648530602455139, "rewards/rejected": -0.26533135771751404, "step": 1480 }, { "epoch": 0.39, "learning_rate": 4.864132940416262e-06, "logits/chosen": -2.7564730644226074, "logits/rejected": -2.7708687782287598, "logps/chosen": -1380.2691650390625, "logps/rejected": -1315.7677001953125, "loss": 0.6967, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.24362365901470184, "rewards/margins": 0.0016543098026886582, "rewards/rejected": -0.24527797102928162, "step": 1490 }, { "epoch": 0.39, "learning_rate": 4.860394552017044e-06, "logits/chosen": -2.774509906768799, "logits/rejected": -2.7636191844940186, "logps/chosen": -1612.9970703125, "logps/rejected": -1335.4024658203125, "loss": 0.6705, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1847611963748932, "rewards/margins": 0.07922552525997162, "rewards/rejected": -0.263986736536026, "step": 1500 }, { "epoch": 0.39, "eval_logits/chosen": -2.7545900344848633, "eval_logits/rejected": -2.748063802719116, "eval_logps/chosen": -1580.59130859375, "eval_logps/rejected": -1378.6722412109375, "eval_loss": 0.666502058506012, "eval_rewards/accuracies": 0.6071428656578064, "eval_rewards/chosen": -0.18042320013046265, "eval_rewards/margins": 0.0896516814827919, "eval_rewards/rejected": -0.27007487416267395, "eval_runtime": 222.0107, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 1500 }, { "epoch": 0.4, "learning_rate": 4.856606904623047e-06, "logits/chosen": -2.731475591659546, "logits/rejected": -2.735136032104492, "logps/chosen": -1508.71142578125, "logps/rejected": -1404.9801025390625, "loss": 0.6814, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.19713035225868225, "rewards/margins": 0.09451910108327866, "rewards/rejected": -0.2916494607925415, "step": 1510 }, { "epoch": 0.4, "learning_rate": 4.852770077278557e-06, "logits/chosen": -2.726473808288574, "logits/rejected": -2.717942714691162, "logps/chosen": -1575.031005859375, "logps/rejected": -1413.8336181640625, "loss": 0.6629, "rewards/accuracies": 0.625, "rewards/chosen": -0.18052199482917786, "rewards/margins": 0.1278577744960785, "rewards/rejected": -0.30837976932525635, "step": 1520 }, { "epoch": 0.4, "learning_rate": 4.848884150054196e-06, "logits/chosen": -2.718505382537842, "logits/rejected": -2.7180285453796387, "logps/chosen": -1285.03125, "logps/rejected": -1022.1572265625, "loss": 0.6622, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2162628471851349, "rewards/margins": 0.07336204499006271, "rewards/rejected": -0.2896248698234558, "step": 1530 }, { "epoch": 0.4, "learning_rate": 4.8449492040452495e-06, "logits/chosen": -2.719595432281494, "logits/rejected": -2.7203640937805176, "logps/chosen": -1711.162109375, "logps/rejected": -1532.151611328125, "loss": 0.6709, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2285035401582718, "rewards/margins": 0.13888053596019745, "rewards/rejected": -0.36738404631614685, "step": 1540 }, { "epoch": 0.41, "learning_rate": 4.840965321369973e-06, "logits/chosen": -2.737882137298584, "logits/rejected": -2.7262706756591797, "logps/chosen": -1420.25439453125, "logps/rejected": -1141.2373046875, "loss": 0.689, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.19759336113929749, "rewards/margins": 0.03492305055260658, "rewards/rejected": -0.23251643776893616, "step": 1550 }, { "epoch": 0.41, "learning_rate": 4.8369325851678795e-06, "logits/chosen": -2.737092971801758, "logits/rejected": -2.7480967044830322, "logps/chosen": -1542.2943115234375, "logps/rejected": -1442.6156005859375, "loss": 0.6566, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.14410072565078735, "rewards/margins": 0.07033728063106537, "rewards/rejected": -0.21443800628185272, "step": 1560 }, { "epoch": 0.41, "learning_rate": 4.832851079598007e-06, "logits/chosen": -2.722095012664795, "logits/rejected": -2.7370333671569824, "logps/chosen": -1584.534912109375, "logps/rejected": -1324.1781005859375, "loss": 0.673, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2010543793439865, "rewards/margins": 0.03377969563007355, "rewards/rejected": -0.23483404517173767, "step": 1570 }, { "epoch": 0.41, "learning_rate": 4.828720889837158e-06, "logits/chosen": -2.731977701187134, "logits/rejected": -2.7154393196105957, "logps/chosen": -1614.5052490234375, "logps/rejected": -1165.898193359375, "loss": 0.6693, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21724538505077362, "rewards/margins": 0.11410681903362274, "rewards/rejected": -0.33135223388671875, "step": 1580 }, { "epoch": 0.42, "learning_rate": 4.824542102078125e-06, "logits/chosen": -2.726020336151123, "logits/rejected": -2.737281322479248, "logps/chosen": -1595.6234130859375, "logps/rejected": -1460.412841796875, "loss": 0.6655, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13410621881484985, "rewards/margins": 0.13845457136631012, "rewards/rejected": -0.2725607752799988, "step": 1590 }, { "epoch": 0.42, "learning_rate": 4.820314803527888e-06, "logits/chosen": -2.759023904800415, "logits/rejected": -2.7574801445007324, "logps/chosen": -1377.1361083984375, "logps/rejected": -1252.229736328125, "loss": 0.6411, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13430440425872803, "rewards/margins": 0.14161941409111023, "rewards/rejected": -0.27592384815216064, "step": 1600 }, { "epoch": 0.42, "eval_logits/chosen": -2.731712818145752, "eval_logits/rejected": -2.7248520851135254, "eval_logps/chosen": -1581.7911376953125, "eval_logps/rejected": -1378.941650390625, "eval_loss": 0.6652618646621704, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.1924203485250473, "eval_rewards/margins": 0.08035055547952652, "eval_rewards/rejected": -0.2727709412574768, "eval_runtime": 222.0254, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 1600 }, { "epoch": 0.42, "learning_rate": 4.816039082405799e-06, "logits/chosen": -2.7569971084594727, "logits/rejected": -2.735839366912842, "logps/chosen": -1658.815673828125, "logps/rejected": -1198.2847900390625, "loss": 0.665, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21991725265979767, "rewards/margins": 0.11259231716394424, "rewards/rejected": -0.3325095772743225, "step": 1610 }, { "epoch": 0.42, "learning_rate": 4.81171502794174e-06, "logits/chosen": -2.7295005321502686, "logits/rejected": -2.718212604522705, "logps/chosen": -1451.7779541015625, "logps/rejected": -1099.2412109375, "loss": 0.6443, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11921729892492294, "rewards/margins": 0.14378681778907776, "rewards/rejected": -0.2630041241645813, "step": 1620 }, { "epoch": 0.43, "learning_rate": 4.8073427303742584e-06, "logits/chosen": -2.743295907974243, "logits/rejected": -2.7398316860198975, "logps/chosen": -1610.1199951171875, "logps/rejected": -1429.06591796875, "loss": 0.6755, "rewards/accuracies": 0.625, "rewards/chosen": -0.18854033946990967, "rewards/margins": 0.029284056276082993, "rewards/rejected": -0.21782438457012177, "step": 1630 }, { "epoch": 0.43, "learning_rate": 4.802922280948685e-06, "logits/chosen": -2.7415010929107666, "logits/rejected": -2.735572338104248, "logps/chosen": -1472.3653564453125, "logps/rejected": -1607.3350830078125, "loss": 0.6738, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.19889327883720398, "rewards/margins": -0.004034703131765127, "rewards/rejected": -0.1948585957288742, "step": 1640 }, { "epoch": 0.43, "learning_rate": 4.798453771915231e-06, "logits/chosen": -2.673447370529175, "logits/rejected": -2.6908771991729736, "logps/chosen": -1097.9010009765625, "logps/rejected": -1113.3797607421875, "loss": 0.6662, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.20751328766345978, "rewards/margins": 0.0491621233522892, "rewards/rejected": -0.2566754221916199, "step": 1650 }, { "epoch": 0.43, "learning_rate": 4.793937296527062e-06, "logits/chosen": -2.7060468196868896, "logits/rejected": -2.696061849594116, "logps/chosen": -1091.1229248046875, "logps/rejected": -950.5789184570312, "loss": 0.6589, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2101879119873047, "rewards/margins": 0.07423336803913116, "rewards/rejected": -0.28442126512527466, "step": 1660 }, { "epoch": 0.44, "learning_rate": 4.78937294903835e-06, "logits/chosen": -2.7246451377868652, "logits/rejected": -2.7175862789154053, "logps/chosen": -1581.0328369140625, "logps/rejected": -1174.4051513671875, "loss": 0.6562, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.11526259034872055, "rewards/margins": 0.12269117683172226, "rewards/rejected": -0.23795375227928162, "step": 1670 }, { "epoch": 0.44, "learning_rate": 4.78476082470231e-06, "logits/chosen": -2.7286760807037354, "logits/rejected": -2.7100577354431152, "logps/chosen": -1294.1153564453125, "logps/rejected": -1143.6041259765625, "loss": 0.6792, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22615864872932434, "rewards/margins": 0.050124991685152054, "rewards/rejected": -0.2762836217880249, "step": 1680 }, { "epoch": 0.44, "learning_rate": 4.780101019769212e-06, "logits/chosen": -2.736978054046631, "logits/rejected": -2.736088275909424, "logps/chosen": -1240.342041015625, "logps/rejected": -1338.625732421875, "loss": 0.6746, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.20585688948631287, "rewards/margins": 0.09276419878005981, "rewards/rejected": -0.29862111806869507, "step": 1690 }, { "epoch": 0.44, "learning_rate": 4.775393631484368e-06, "logits/chosen": -2.7205305099487305, "logits/rejected": -2.7118542194366455, "logps/chosen": -1580.404541015625, "logps/rejected": -1334.212646484375, "loss": 0.665, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1873815357685089, "rewards/margins": 0.06995360553264618, "rewards/rejected": -0.2573351263999939, "step": 1700 }, { "epoch": 0.44, "eval_logits/chosen": -2.742210626602173, "eval_logits/rejected": -2.7354586124420166, "eval_logps/chosen": -1582.2147216796875, "eval_logps/rejected": -1379.5565185546875, "eval_loss": 0.6644400358200073, "eval_rewards/accuracies": 0.613095223903656, "eval_rewards/chosen": -0.19665634632110596, "eval_rewards/margins": 0.08226174861192703, "eval_rewards/rejected": -0.2789180874824524, "eval_runtime": 221.9276, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 1700 }, { "epoch": 0.45, "learning_rate": 4.770638758086105e-06, "logits/chosen": -2.7356081008911133, "logits/rejected": -2.731968641281128, "logps/chosen": -1441.8275146484375, "logps/rejected": -1499.3004150390625, "loss": 0.6697, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2256285697221756, "rewards/margins": 0.053405504673719406, "rewards/rejected": -0.2790340781211853, "step": 1710 }, { "epoch": 0.45, "learning_rate": 4.7658364988037184e-06, "logits/chosen": -2.7387094497680664, "logits/rejected": -2.7272191047668457, "logps/chosen": -1506.8668212890625, "logps/rejected": -1364.9649658203125, "loss": 0.6646, "rewards/accuracies": 0.625, "rewards/chosen": -0.1541571319103241, "rewards/margins": 0.14272871613502502, "rewards/rejected": -0.2968858480453491, "step": 1720 }, { "epoch": 0.45, "learning_rate": 4.760986953855395e-06, "logits/chosen": -2.7447781562805176, "logits/rejected": -2.731154203414917, "logps/chosen": -1475.6947021484375, "logps/rejected": -1100.525146484375, "loss": 0.6701, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2217240035533905, "rewards/margins": 0.06821813434362411, "rewards/rejected": -0.2899421155452728, "step": 1730 }, { "epoch": 0.46, "learning_rate": 4.756090224446127e-06, "logits/chosen": -2.780522108078003, "logits/rejected": -2.782181978225708, "logps/chosen": -1490.47412109375, "logps/rejected": -1478.94775390625, "loss": 0.676, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25417935848236084, "rewards/margins": 0.084381103515625, "rewards/rejected": -0.3385604918003082, "step": 1740 }, { "epoch": 0.46, "learning_rate": 4.7511464127655945e-06, "logits/chosen": -2.737290143966675, "logits/rejected": -2.7433362007141113, "logps/chosen": -1266.33056640625, "logps/rejected": -1185.47802734375, "loss": 0.6604, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19823206961154938, "rewards/margins": 0.0937265157699585, "rewards/rejected": -0.29195863008499146, "step": 1750 }, { "epoch": 0.46, "learning_rate": 4.74615562198604e-06, "logits/chosen": -2.7513489723205566, "logits/rejected": -2.7504875659942627, "logps/chosen": -1600.952880859375, "logps/rejected": -1202.5400390625, "loss": 0.6775, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19948622584342957, "rewards/margins": 0.025036226958036423, "rewards/rejected": -0.2245224416255951, "step": 1760 }, { "epoch": 0.46, "learning_rate": 4.741117956260107e-06, "logits/chosen": -2.7026009559631348, "logits/rejected": -2.7085928916931152, "logps/chosen": -1406.4818115234375, "logps/rejected": -1320.4462890625, "loss": 0.6576, "rewards/accuracies": 0.5, "rewards/chosen": -0.22521424293518066, "rewards/margins": 0.03839210420846939, "rewards/rejected": -0.26360636949539185, "step": 1770 }, { "epoch": 0.47, "learning_rate": 4.736033520718672e-06, "logits/chosen": -2.690847158432007, "logits/rejected": -2.710789918899536, "logps/chosen": -1289.645263671875, "logps/rejected": -1037.477294921875, "loss": 0.6612, "rewards/accuracies": 0.625, "rewards/chosen": -0.21732059121131897, "rewards/margins": 0.03371669352054596, "rewards/rejected": -0.25103726983070374, "step": 1780 }, { "epoch": 0.47, "learning_rate": 4.730902421468652e-06, "logits/chosen": -2.6835672855377197, "logits/rejected": -2.672365665435791, "logps/chosen": -1312.6485595703125, "logps/rejected": -1355.291748046875, "loss": 0.6555, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1490660011768341, "rewards/margins": 0.14994415640830994, "rewards/rejected": -0.29901012778282166, "step": 1790 }, { "epoch": 0.47, "learning_rate": 4.7257247655907854e-06, "logits/chosen": -2.7463784217834473, "logits/rejected": -2.750366449356079, "logps/chosen": -1203.7630615234375, "logps/rejected": -1130.248291015625, "loss": 0.6563, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2655632495880127, "rewards/margins": 0.054849814623594284, "rewards/rejected": -0.3204130530357361, "step": 1800 }, { "epoch": 0.47, "eval_logits/chosen": -2.73252534866333, "eval_logits/rejected": -2.725741147994995, "eval_logps/chosen": -1583.275146484375, "eval_logps/rejected": -1381.0634765625, "eval_loss": 0.6638898849487305, "eval_rewards/accuracies": 0.6210317611694336, "eval_rewards/chosen": -0.20726004242897034, "eval_rewards/margins": 0.08672784268856049, "eval_rewards/rejected": -0.29398787021636963, "eval_runtime": 221.9932, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 1800 }, { "epoch": 0.47, "learning_rate": 4.720500661137397e-06, "logits/chosen": -2.744943380355835, "logits/rejected": -2.7473301887512207, "logps/chosen": -1646.609619140625, "logps/rejected": -1309.5911865234375, "loss": 0.6391, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18750329315662384, "rewards/margins": 0.09809277206659317, "rewards/rejected": -0.2855960726737976, "step": 1810 }, { "epoch": 0.48, "learning_rate": 4.71523021713015e-06, "logits/chosen": -2.7237682342529297, "logits/rejected": -2.7082934379577637, "logps/chosen": -1719.270751953125, "logps/rejected": -1236.6881103515625, "loss": 0.6651, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20721980929374695, "rewards/margins": 0.06654822826385498, "rewards/rejected": -0.27376803755760193, "step": 1820 }, { "epoch": 0.48, "learning_rate": 4.709913543557761e-06, "logits/chosen": -2.7400131225585938, "logits/rejected": -2.740361452102661, "logps/chosen": -1691.213623046875, "logps/rejected": -1475.6851806640625, "loss": 0.6585, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18694502115249634, "rewards/margins": 0.13363699615001678, "rewards/rejected": -0.3205820322036743, "step": 1830 }, { "epoch": 0.48, "learning_rate": 4.704550751373715e-06, "logits/chosen": -2.7532076835632324, "logits/rejected": -2.75807523727417, "logps/chosen": -1608.8062744140625, "logps/rejected": -1378.0643310546875, "loss": 0.6639, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2479935884475708, "rewards/margins": 0.10084398090839386, "rewards/rejected": -0.34883755445480347, "step": 1840 }, { "epoch": 0.48, "learning_rate": 4.699141952493941e-06, "logits/chosen": -2.7423033714294434, "logits/rejected": -2.73795223236084, "logps/chosen": -1499.7720947265625, "logps/rejected": -1187.2939453125, "loss": 0.649, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.23238825798034668, "rewards/margins": 0.04651142284274101, "rewards/rejected": -0.2788996994495392, "step": 1850 }, { "epoch": 0.49, "learning_rate": 4.6936872597944814e-06, "logits/chosen": -2.7192182540893555, "logits/rejected": -2.7208571434020996, "logps/chosen": -1266.417236328125, "logps/rejected": -1494.069091796875, "loss": 0.6431, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2008957415819168, "rewards/margins": 0.16491912305355072, "rewards/rejected": -0.3658148944377899, "step": 1860 }, { "epoch": 0.49, "learning_rate": 4.688186787109136e-06, "logits/chosen": -2.7188289165496826, "logits/rejected": -2.701460361480713, "logps/chosen": -1499.3271484375, "logps/rejected": -1498.5401611328125, "loss": 0.6625, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2466460019350052, "rewards/margins": 0.04578220099210739, "rewards/rejected": -0.29242822527885437, "step": 1870 }, { "epoch": 0.49, "learning_rate": 4.682640649227085e-06, "logits/chosen": -2.7496445178985596, "logits/rejected": -2.7551424503326416, "logps/chosen": -1668.005126953125, "logps/rejected": -1245.0250244140625, "loss": 0.6558, "rewards/accuracies": 0.625, "rewards/chosen": -0.24418357014656067, "rewards/margins": 0.12311581522226334, "rewards/rejected": -0.3672993779182434, "step": 1880 }, { "epoch": 0.49, "learning_rate": 4.677048961890492e-06, "logits/chosen": -2.733494997024536, "logits/rejected": -2.7331326007843018, "logps/chosen": -1348.33203125, "logps/rejected": -1327.357177734375, "loss": 0.6894, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.31300076842308044, "rewards/margins": -0.028551051393151283, "rewards/rejected": -0.2844497561454773, "step": 1890 }, { "epoch": 0.5, "learning_rate": 4.671411841792096e-06, "logits/chosen": -2.7191219329833984, "logits/rejected": -2.7150659561157227, "logps/chosen": -1335.385986328125, "logps/rejected": -1472.392333984375, "loss": 0.6668, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2669922113418579, "rewards/margins": 0.07704712450504303, "rewards/rejected": -0.34403929114341736, "step": 1900 }, { "epoch": 0.5, "eval_logits/chosen": -2.742595672607422, "eval_logits/rejected": -2.7350475788116455, "eval_logps/chosen": -1585.14697265625, "eval_logps/rejected": -1384.1845703125, "eval_loss": 0.6620241403579712, "eval_rewards/accuracies": 0.6170634627342224, "eval_rewards/chosen": -0.225979283452034, "eval_rewards/margins": 0.09921804070472717, "eval_rewards/rejected": -0.3251972794532776, "eval_runtime": 222.0637, "eval_samples_per_second": 9.006, "eval_steps_per_second": 0.284, "step": 1900 }, { "epoch": 0.5, "learning_rate": 4.665729406572764e-06, "logits/chosen": -2.7427189350128174, "logits/rejected": -2.753286838531494, "logps/chosen": -1317.99560546875, "logps/rejected": -1335.033203125, "loss": 0.6874, "rewards/accuracies": 0.625, "rewards/chosen": -0.32792624831199646, "rewards/margins": 0.041652776300907135, "rewards/rejected": -0.3695790767669678, "step": 1910 }, { "epoch": 0.5, "learning_rate": 4.660001774819048e-06, "logits/chosen": -2.7054402828216553, "logits/rejected": -2.7036356925964355, "logps/chosen": -1334.2403564453125, "logps/rejected": -1241.926513671875, "loss": 0.6782, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.3110745847225189, "rewards/margins": 0.018700579181313515, "rewards/rejected": -0.3297751545906067, "step": 1920 }, { "epoch": 0.51, "learning_rate": 4.654229066060702e-06, "logits/chosen": -2.7333807945251465, "logits/rejected": -2.742948055267334, "logps/chosen": -1359.8046875, "logps/rejected": -1150.7369384765625, "loss": 0.6598, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.23911187052726746, "rewards/margins": 0.02121734246611595, "rewards/rejected": -0.2603291869163513, "step": 1930 }, { "epoch": 0.51, "learning_rate": 4.648411400768193e-06, "logits/chosen": -2.73518705368042, "logits/rejected": -2.725837230682373, "logps/chosen": -1309.609130859375, "logps/rejected": -1176.9456787109375, "loss": 0.6567, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.264873206615448, "rewards/margins": 0.09074047952890396, "rewards/rejected": -0.355613648891449, "step": 1940 }, { "epoch": 0.51, "learning_rate": 4.642548900350182e-06, "logits/chosen": -2.7353572845458984, "logits/rejected": -2.7142481803894043, "logps/chosen": -1753.3665771484375, "logps/rejected": -1410.8369140625, "loss": 0.6604, "rewards/accuracies": 0.625, "rewards/chosen": -0.19214151799678802, "rewards/margins": 0.10666815936565399, "rewards/rejected": -0.298809677362442, "step": 1950 }, { "epoch": 0.51, "learning_rate": 4.636641687150994e-06, "logits/chosen": -2.7391467094421387, "logits/rejected": -2.717474937438965, "logps/chosen": -1374.9207763671875, "logps/rejected": -1112.2822265625, "loss": 0.6682, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11961638927459717, "rewards/margins": 0.14005030691623688, "rewards/rejected": -0.25966668128967285, "step": 1960 }, { "epoch": 0.52, "learning_rate": 4.6306898844480615e-06, "logits/chosen": -2.7787628173828125, "logits/rejected": -2.751187324523926, "logps/chosen": -1711.725830078125, "logps/rejected": -1327.750732421875, "loss": 0.6574, "rewards/accuracies": 0.625, "rewards/chosen": -0.2270718514919281, "rewards/margins": 0.06859883666038513, "rewards/rejected": -0.29567068815231323, "step": 1970 }, { "epoch": 0.52, "learning_rate": 4.624693616449358e-06, "logits/chosen": -2.7466940879821777, "logits/rejected": -2.7169535160064697, "logps/chosen": -1387.667724609375, "logps/rejected": -1170.1529541015625, "loss": 0.6672, "rewards/accuracies": 0.5, "rewards/chosen": -0.2690119445323944, "rewards/margins": 0.009896782226860523, "rewards/rejected": -0.27890869975090027, "step": 1980 }, { "epoch": 0.52, "learning_rate": 4.6186530082908e-06, "logits/chosen": -2.7376532554626465, "logits/rejected": -2.7437498569488525, "logps/chosen": -1513.381103515625, "logps/rejected": -1244.107421875, "loss": 0.6808, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.20415648818016052, "rewards/margins": 0.024326255545020103, "rewards/rejected": -0.22848275303840637, "step": 1990 }, { "epoch": 0.52, "learning_rate": 4.612568186033633e-06, "logits/chosen": -2.779754638671875, "logits/rejected": -2.765529155731201, "logps/chosen": -1780.3160400390625, "logps/rejected": -1493.633056640625, "loss": 0.6632, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18064382672309875, "rewards/margins": 0.1619485467672348, "rewards/rejected": -0.34259235858917236, "step": 2000 }, { "epoch": 0.52, "eval_logits/chosen": -2.744863986968994, "eval_logits/rejected": -2.737123966217041, "eval_logps/chosen": -1581.7919921875, "eval_logps/rejected": -1379.9453125, "eval_loss": 0.660542905330658, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.19242867827415466, "eval_rewards/margins": 0.09037821739912033, "eval_rewards/rejected": -0.2828068733215332, "eval_runtime": 222.0239, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 2000 }, { "epoch": 0.53, "learning_rate": 4.6064392766618125e-06, "logits/chosen": -2.7288384437561035, "logits/rejected": -2.7342894077301025, "logps/chosen": -1477.8890380859375, "logps/rejected": -1176.8448486328125, "loss": 0.6426, "rewards/accuracies": 0.75, "rewards/chosen": -0.17168466746807098, "rewards/margins": 0.12829741835594177, "rewards/rejected": -0.29998213052749634, "step": 2010 }, { "epoch": 0.53, "learning_rate": 4.60026640807934e-06, "logits/chosen": -2.7314982414245605, "logits/rejected": -2.721731424331665, "logps/chosen": -1486.63818359375, "logps/rejected": -1138.0081787109375, "loss": 0.6408, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19623301923274994, "rewards/margins": 0.20260627567768097, "rewards/rejected": -0.3988392949104309, "step": 2020 }, { "epoch": 0.53, "learning_rate": 4.594049709107604e-06, "logits/chosen": -2.7123606204986572, "logits/rejected": -2.7007033824920654, "logps/chosen": -1565.0352783203125, "logps/rejected": -1272.5074462890625, "loss": 0.6605, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25973600149154663, "rewards/margins": 0.08567461371421814, "rewards/rejected": -0.3454105854034424, "step": 2030 }, { "epoch": 0.53, "learning_rate": 4.587789309482687e-06, "logits/chosen": -2.734696626663208, "logits/rejected": -2.7014527320861816, "logps/chosen": -1527.3094482421875, "logps/rejected": -1281.611083984375, "loss": 0.6817, "rewards/accuracies": 0.5, "rewards/chosen": -0.3249433636665344, "rewards/margins": 0.0034400448203086853, "rewards/rejected": -0.3283833861351013, "step": 2040 }, { "epoch": 0.54, "learning_rate": 4.581485339852659e-06, "logits/chosen": -2.7503104209899902, "logits/rejected": -2.744598865509033, "logps/chosen": -1337.9700927734375, "logps/rejected": -1396.9603271484375, "loss": 0.6815, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.26085740327835083, "rewards/margins": 0.07063094526529312, "rewards/rejected": -0.33148834109306335, "step": 2050 }, { "epoch": 0.54, "learning_rate": 4.5751379317748514e-06, "logits/chosen": -2.682860851287842, "logits/rejected": -2.688174247741699, "logps/chosen": -1799.8050537109375, "logps/rejected": -1376.067626953125, "loss": 0.6568, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.16199791431427002, "rewards/margins": 0.1496579945087433, "rewards/rejected": -0.3116559088230133, "step": 2060 }, { "epoch": 0.54, "learning_rate": 4.56874721771311e-06, "logits/chosen": -2.7304446697235107, "logits/rejected": -2.731867551803589, "logps/chosen": -1426.6251220703125, "logps/rejected": -1212.8525390625, "loss": 0.6755, "rewards/accuracies": 0.625, "rewards/chosen": -0.22788569331169128, "rewards/margins": 0.08013808727264404, "rewards/rejected": -0.3080237805843353, "step": 2070 }, { "epoch": 0.54, "learning_rate": 4.562313331035032e-06, "logits/chosen": -2.7180185317993164, "logits/rejected": -2.710294246673584, "logps/chosen": -1632.967041015625, "logps/rejected": -1406.680419921875, "loss": 0.6626, "rewards/accuracies": 0.5, "rewards/chosen": -0.2711654007434845, "rewards/margins": 0.04249387979507446, "rewards/rejected": -0.31365928053855896, "step": 2080 }, { "epoch": 0.55, "learning_rate": 4.555836406009183e-06, "logits/chosen": -2.7600905895233154, "logits/rejected": -2.750833749771118, "logps/chosen": -1816.625244140625, "logps/rejected": -1559.4515380859375, "loss": 0.6585, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25869041681289673, "rewards/margins": 0.05229368805885315, "rewards/rejected": -0.3109840750694275, "step": 2090 }, { "epoch": 0.55, "learning_rate": 4.5493165778022945e-06, "logits/chosen": -2.732236385345459, "logits/rejected": -2.7228591442108154, "logps/chosen": -1642.137451171875, "logps/rejected": -1425.29296875, "loss": 0.6427, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20737795531749725, "rewards/margins": 0.16400590538978577, "rewards/rejected": -0.3713838756084442, "step": 2100 }, { "epoch": 0.55, "eval_logits/chosen": -2.7333147525787354, "eval_logits/rejected": -2.726013660430908, "eval_logps/chosen": -1583.61376953125, "eval_logps/rejected": -1382.8006591796875, "eval_loss": 0.6596797108650208, "eval_rewards/accuracies": 0.6230158805847168, "eval_rewards/chosen": -0.21064533293247223, "eval_rewards/margins": 0.10071565955877304, "eval_rewards/rejected": -0.31136101484298706, "eval_runtime": 221.9246, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 2100 }, { "epoch": 0.55, "learning_rate": 4.542753982476443e-06, "logits/chosen": -2.731502056121826, "logits/rejected": -2.7018508911132812, "logps/chosen": -1598.5562744140625, "logps/rejected": -1559.6695556640625, "loss": 0.6689, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1821993887424469, "rewards/margins": 0.11268649995326996, "rewards/rejected": -0.29488590359687805, "step": 2110 }, { "epoch": 0.55, "learning_rate": 4.53614875698621e-06, "logits/chosen": -2.695430278778076, "logits/rejected": -2.695596933364868, "logps/chosen": -1473.1234130859375, "logps/rejected": -1316.5185546875, "loss": 0.6676, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21487128734588623, "rewards/margins": 0.08438606560230255, "rewards/rejected": -0.2992573380470276, "step": 2120 }, { "epoch": 0.56, "learning_rate": 4.529501039175824e-06, "logits/chosen": -2.7299904823303223, "logits/rejected": -2.7255425453186035, "logps/chosen": -1602.1400146484375, "logps/rejected": -1485.03759765625, "loss": 0.6502, "rewards/accuracies": 0.625, "rewards/chosen": -0.18949568271636963, "rewards/margins": 0.15136829018592834, "rewards/rejected": -0.34086400270462036, "step": 2130 }, { "epoch": 0.56, "learning_rate": 4.522810967776287e-06, "logits/chosen": -2.7604377269744873, "logits/rejected": -2.750189781188965, "logps/chosen": -1660.505615234375, "logps/rejected": -1412.712890625, "loss": 0.6334, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18246378004550934, "rewards/margins": 0.1688774675130844, "rewards/rejected": -0.35134127736091614, "step": 2140 }, { "epoch": 0.56, "learning_rate": 4.516078682402473e-06, "logits/chosen": -2.7063987255096436, "logits/rejected": -2.71260666847229, "logps/chosen": -1543.899169921875, "logps/rejected": -1228.962158203125, "loss": 0.672, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23417046666145325, "rewards/margins": 0.06204131245613098, "rewards/rejected": -0.29621177911758423, "step": 2150 }, { "epoch": 0.57, "learning_rate": 4.509304323550221e-06, "logits/chosen": -2.761087417602539, "logits/rejected": -2.753976821899414, "logps/chosen": -1480.669921875, "logps/rejected": -1370.41796875, "loss": 0.6594, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1856047511100769, "rewards/margins": 0.15950414538383484, "rewards/rejected": -0.34510886669158936, "step": 2160 }, { "epoch": 0.57, "learning_rate": 4.502488032593398e-06, "logits/chosen": -2.7342491149902344, "logits/rejected": -2.7242026329040527, "logps/chosen": -1605.33447265625, "logps/rejected": -1437.894287109375, "loss": 0.672, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.30404919385910034, "rewards/margins": 0.0675443634390831, "rewards/rejected": -0.37159356474876404, "step": 2170 }, { "epoch": 0.57, "learning_rate": 4.495629951780951e-06, "logits/chosen": -2.753080129623413, "logits/rejected": -2.7350218296051025, "logps/chosen": -1460.925048828125, "logps/rejected": -1252.1539306640625, "loss": 0.6588, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.24314546585083008, "rewards/margins": 0.0389077290892601, "rewards/rejected": -0.2820531725883484, "step": 2180 }, { "epoch": 0.57, "learning_rate": 4.488730224233941e-06, "logits/chosen": -2.7286741733551025, "logits/rejected": -2.719348430633545, "logps/chosen": -1473.2852783203125, "logps/rejected": -1500.265380859375, "loss": 0.6556, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2850351929664612, "rewards/margins": 0.12426628917455673, "rewards/rejected": -0.4093014597892761, "step": 2190 }, { "epoch": 0.58, "learning_rate": 4.481788993942547e-06, "logits/chosen": -2.7154481410980225, "logits/rejected": -2.722774028778076, "logps/chosen": -1465.152587890625, "logps/rejected": -1138.082763671875, "loss": 0.6923, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.25783294439315796, "rewards/margins": 0.05801212787628174, "rewards/rejected": -0.3158450722694397, "step": 2200 }, { "epoch": 0.58, "eval_logits/chosen": -2.7242798805236816, "eval_logits/rejected": -2.717473030090332, "eval_logps/chosen": -1583.8399658203125, "eval_logps/rejected": -1383.4486083984375, "eval_loss": 0.659185528755188, "eval_rewards/accuracies": 0.6230158805847168, "eval_rewards/chosen": -0.2129082977771759, "eval_rewards/margins": 0.10493296384811401, "eval_rewards/rejected": -0.3178412616252899, "eval_runtime": 221.9944, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 2200 }, { "epoch": 0.58, "learning_rate": 4.474806405763076e-06, "logits/chosen": -2.7109556198120117, "logits/rejected": -2.6972789764404297, "logps/chosen": -1306.2476806640625, "logps/rejected": -899.9601440429688, "loss": 0.6719, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.30284592509269714, "rewards/margins": 0.037451691925525665, "rewards/rejected": -0.3402976095676422, "step": 2210 }, { "epoch": 0.58, "learning_rate": 4.4677826054149235e-06, "logits/chosen": -2.637697696685791, "logits/rejected": -2.6475868225097656, "logps/chosen": -1411.45458984375, "logps/rejected": -1260.859130859375, "loss": 0.669, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23996862769126892, "rewards/margins": 0.13470852375030518, "rewards/rejected": -0.3746771514415741, "step": 2220 }, { "epoch": 0.58, "learning_rate": 4.460717739477543e-06, "logits/chosen": -2.740626096725464, "logits/rejected": -2.7197346687316895, "logps/chosen": -1452.495361328125, "logps/rejected": -1311.1324462890625, "loss": 0.6558, "rewards/accuracies": 0.625, "rewards/chosen": -0.1936483234167099, "rewards/margins": 0.14396671950817108, "rewards/rejected": -0.337615042924881, "step": 2230 }, { "epoch": 0.59, "learning_rate": 4.4536119553873866e-06, "logits/chosen": -2.691063642501831, "logits/rejected": -2.698746681213379, "logps/chosen": -1228.3648681640625, "logps/rejected": -996.9989013671875, "loss": 0.6491, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21332640945911407, "rewards/margins": 0.14366553723812103, "rewards/rejected": -0.3569919466972351, "step": 2240 }, { "epoch": 0.59, "learning_rate": 4.446465401434824e-06, "logits/chosen": -2.701746940612793, "logits/rejected": -2.6930394172668457, "logps/chosen": -1635.9879150390625, "logps/rejected": -1463.801025390625, "loss": 0.6599, "rewards/accuracies": 0.75, "rewards/chosen": -0.17793354392051697, "rewards/margins": 0.11402539908885956, "rewards/rejected": -0.2919589579105377, "step": 2250 }, { "epoch": 0.59, "learning_rate": 4.43927822676105e-06, "logits/chosen": -2.720634937286377, "logits/rejected": -2.705869436264038, "logps/chosen": -1196.5458984375, "logps/rejected": -1118.008544921875, "loss": 0.662, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20704011619091034, "rewards/margins": 0.11822967231273651, "rewards/rejected": -0.32526981830596924, "step": 2260 }, { "epoch": 0.59, "learning_rate": 4.432050581354972e-06, "logits/chosen": -2.7368626594543457, "logits/rejected": -2.7253921031951904, "logps/chosen": -1714.7447509765625, "logps/rejected": -1365.946044921875, "loss": 0.6638, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.208714097738266, "rewards/margins": 0.05404313653707504, "rewards/rejected": -0.26275724172592163, "step": 2270 }, { "epoch": 0.6, "learning_rate": 4.424782616050078e-06, "logits/chosen": -2.7176461219787598, "logits/rejected": -2.700206756591797, "logps/chosen": -1514.9925537109375, "logps/rejected": -1475.500732421875, "loss": 0.6488, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21896132826805115, "rewards/margins": 0.11795832961797714, "rewards/rejected": -0.33691972494125366, "step": 2280 }, { "epoch": 0.6, "learning_rate": 4.4174744825212954e-06, "logits/chosen": -2.7620787620544434, "logits/rejected": -2.7197585105895996, "logps/chosen": -1456.243408203125, "logps/rejected": -1030.5003662109375, "loss": 0.6494, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2278098315000534, "rewards/margins": 0.1794498711824417, "rewards/rejected": -0.4072597026824951, "step": 2290 }, { "epoch": 0.6, "learning_rate": 4.410126333281815e-06, "logits/chosen": -2.676535129547119, "logits/rejected": -2.6870205402374268, "logps/chosen": -1891.846435546875, "logps/rejected": -1446.581298828125, "loss": 0.6496, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19136743247509003, "rewards/margins": 0.2605608105659485, "rewards/rejected": -0.4519282281398773, "step": 2300 }, { "epoch": 0.6, "eval_logits/chosen": -2.7234508991241455, "eval_logits/rejected": -2.715916633605957, "eval_logps/chosen": -1586.070556640625, "eval_logps/rejected": -1386.091552734375, "eval_loss": 0.6581032872200012, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.2352151870727539, "eval_rewards/margins": 0.10905227065086365, "eval_rewards/rejected": -0.3442673981189728, "eval_runtime": 222.0195, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 2300 }, { "epoch": 0.6, "learning_rate": 4.402738321679918e-06, "logits/chosen": -2.74485182762146, "logits/rejected": -2.7311596870422363, "logps/chosen": -1372.6346435546875, "logps/rejected": -1097.726806640625, "loss": 0.6901, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.35119324922561646, "rewards/margins": -0.023585880175232887, "rewards/rejected": -0.3276073634624481, "step": 2310 }, { "epoch": 0.61, "learning_rate": 4.395310601895772e-06, "logits/chosen": -2.713000774383545, "logits/rejected": -2.712918281555176, "logps/chosen": -1417.783935546875, "logps/rejected": -1562.4224853515625, "loss": 0.6689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28538185358047485, "rewards/margins": 0.11929192394018173, "rewards/rejected": -0.4046737551689148, "step": 2320 }, { "epoch": 0.61, "learning_rate": 4.38784332893821e-06, "logits/chosen": -2.646317481994629, "logits/rejected": -2.679353713989258, "logps/chosen": -1165.4190673828125, "logps/rejected": -1229.4197998046875, "loss": 0.6631, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3009629249572754, "rewards/margins": 0.004149970598518848, "rewards/rejected": -0.30511292815208435, "step": 2330 }, { "epoch": 0.61, "learning_rate": 4.380336658641503e-06, "logits/chosen": -2.692584276199341, "logits/rejected": -2.7181296348571777, "logps/chosen": -1560.274169921875, "logps/rejected": -1453.550537109375, "loss": 0.6464, "rewards/accuracies": 0.625, "rewards/chosen": -0.21228572726249695, "rewards/margins": 0.10219845920801163, "rewards/rejected": -0.31448420882225037, "step": 2340 }, { "epoch": 0.62, "learning_rate": 4.372790747662101e-06, "logits/chosen": -2.6889491081237793, "logits/rejected": -2.6906516551971436, "logps/chosen": -1620.80517578125, "logps/rejected": -1415.034423828125, "loss": 0.6674, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3593459725379944, "rewards/margins": 0.0033436850644648075, "rewards/rejected": -0.36268967390060425, "step": 2350 }, { "epoch": 0.62, "learning_rate": 4.365205753475367e-06, "logits/chosen": -2.675593852996826, "logits/rejected": -2.6628692150115967, "logps/chosen": -1526.362060546875, "logps/rejected": -1447.8023681640625, "loss": 0.6473, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22609886527061462, "rewards/margins": 0.1290428638458252, "rewards/rejected": -0.3551417291164398, "step": 2360 }, { "epoch": 0.62, "learning_rate": 4.35758183437229e-06, "logits/chosen": -2.7212536334991455, "logits/rejected": -2.7167136669158936, "logps/chosen": -1591.127197265625, "logps/rejected": -1531.5855712890625, "loss": 0.6667, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22146666049957275, "rewards/margins": 0.12169722467660904, "rewards/rejected": -0.3431639075279236, "step": 2370 }, { "epoch": 0.62, "learning_rate": 4.3499191494561835e-06, "logits/chosen": -2.760378360748291, "logits/rejected": -2.7481420040130615, "logps/chosen": -1869.5081787109375, "logps/rejected": -1497.3836669921875, "loss": 0.6601, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21052077412605286, "rewards/margins": 0.1314498633146286, "rewards/rejected": -0.34197065234184265, "step": 2380 }, { "epoch": 0.63, "learning_rate": 4.3422178586393615e-06, "logits/chosen": -2.730377197265625, "logits/rejected": -2.743986129760742, "logps/chosen": -1398.206787109375, "logps/rejected": -1236.90673828125, "loss": 0.6555, "rewards/accuracies": 0.625, "rewards/chosen": -0.18501418828964233, "rewards/margins": 0.15820163488388062, "rewards/rejected": -0.34321585297584534, "step": 2390 }, { "epoch": 0.63, "learning_rate": 4.334478122639804e-06, "logits/chosen": -2.745204448699951, "logits/rejected": -2.724517345428467, "logps/chosen": -1631.51318359375, "logps/rejected": -1574.7022705078125, "loss": 0.6668, "rewards/accuracies": 0.625, "rewards/chosen": -0.23339959979057312, "rewards/margins": 0.09116321802139282, "rewards/rejected": -0.32456284761428833, "step": 2400 }, { "epoch": 0.63, "eval_logits/chosen": -2.740978479385376, "eval_logits/rejected": -2.7320845127105713, "eval_logps/chosen": -1587.576904296875, "eval_logps/rejected": -1387.298095703125, "eval_loss": 0.6576688885688782, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.25027817487716675, "eval_rewards/margins": 0.10605475306510925, "eval_rewards/rejected": -0.356332927942276, "eval_runtime": 222.0763, "eval_samples_per_second": 9.006, "eval_steps_per_second": 0.284, "step": 2400 }, { "epoch": 0.63, "learning_rate": 4.3267001029778015e-06, "logits/chosen": -2.757209300994873, "logits/rejected": -2.7432403564453125, "logps/chosen": -1993.5084228515625, "logps/rejected": -1199.38720703125, "loss": 0.651, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.23706431686878204, "rewards/margins": 0.13261531293392181, "rewards/rejected": -0.36967962980270386, "step": 2410 }, { "epoch": 0.63, "learning_rate": 4.318883961972585e-06, "logits/chosen": -2.734022378921509, "logits/rejected": -2.7461204528808594, "logps/chosen": -1391.851806640625, "logps/rejected": -1220.397216796875, "loss": 0.65, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1621847301721573, "rewards/margins": 0.11607532203197479, "rewards/rejected": -0.2782600522041321, "step": 2420 }, { "epoch": 0.64, "learning_rate": 4.311029862738942e-06, "logits/chosen": -2.7122702598571777, "logits/rejected": -2.7062220573425293, "logps/chosen": -1442.494873046875, "logps/rejected": -1381.0362548828125, "loss": 0.6813, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2430756539106369, "rewards/margins": 0.1406748741865158, "rewards/rejected": -0.3837505280971527, "step": 2430 }, { "epoch": 0.64, "learning_rate": 4.303137969183804e-06, "logits/chosen": -2.718060255050659, "logits/rejected": -2.7174715995788574, "logps/chosen": -1650.6988525390625, "logps/rejected": -1207.530517578125, "loss": 0.631, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1915169060230255, "rewards/margins": 0.1620643436908722, "rewards/rejected": -0.3535812199115753, "step": 2440 }, { "epoch": 0.64, "learning_rate": 4.295208446002832e-06, "logits/chosen": -2.736496925354004, "logits/rejected": -2.727999210357666, "logps/chosen": -1543.1021728515625, "logps/rejected": -1205.076416015625, "loss": 0.6544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28645798563957214, "rewards/margins": 0.09804262965917587, "rewards/rejected": -0.38450056314468384, "step": 2450 }, { "epoch": 0.64, "learning_rate": 4.287241458676981e-06, "logits/chosen": -2.69891619682312, "logits/rejected": -2.700446367263794, "logps/chosen": -1317.9898681640625, "logps/rejected": -1057.5445556640625, "loss": 0.6642, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3027532994747162, "rewards/margins": 0.06918701529502869, "rewards/rejected": -0.3719402551651001, "step": 2460 }, { "epoch": 0.65, "learning_rate": 4.279237173469043e-06, "logits/chosen": -2.7122912406921387, "logits/rejected": -2.704481840133667, "logps/chosen": -1588.9444580078125, "logps/rejected": -1400.419921875, "loss": 0.6461, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16931328177452087, "rewards/margins": 0.1601758748292923, "rewards/rejected": -0.32948917150497437, "step": 2470 }, { "epoch": 0.65, "learning_rate": 4.271195757420177e-06, "logits/chosen": -2.7225699424743652, "logits/rejected": -2.688791275024414, "logps/chosen": -1543.356201171875, "logps/rejected": -1652.3414306640625, "loss": 0.6565, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.22034041583538055, "rewards/margins": 0.047962792217731476, "rewards/rejected": -0.26830318570137024, "step": 2480 }, { "epoch": 0.65, "learning_rate": 4.263117378346425e-06, "logits/chosen": -2.7116904258728027, "logits/rejected": -2.720613956451416, "logps/chosen": -1549.628173828125, "logps/rejected": -1367.11181640625, "loss": 0.6472, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28746548295021057, "rewards/margins": 0.13900980353355408, "rewards/rejected": -0.4264752268791199, "step": 2490 }, { "epoch": 0.65, "learning_rate": 4.255002204835208e-06, "logits/chosen": -2.7465109825134277, "logits/rejected": -2.7485060691833496, "logps/chosen": -1488.91259765625, "logps/rejected": -1472.1722412109375, "loss": 0.6477, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2156025469303131, "rewards/margins": 0.14002607762813568, "rewards/rejected": -0.3556286692619324, "step": 2500 }, { "epoch": 0.65, "eval_logits/chosen": -2.7370338439941406, "eval_logits/rejected": -2.728743076324463, "eval_logps/chosen": -1589.1619873046875, "eval_logps/rejected": -1390.239990234375, "eval_loss": 0.6559935808181763, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.2661284804344177, "eval_rewards/margins": 0.11962475627660751, "eval_rewards/rejected": -0.38575324416160583, "eval_runtime": 221.9192, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 2500 }, { "epoch": 0.66, "learning_rate": 4.246850406241812e-06, "logits/chosen": -2.7424569129943848, "logits/rejected": -2.721717357635498, "logps/chosen": -1479.636962890625, "logps/rejected": -1123.767822265625, "loss": 0.6845, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3048413395881653, "rewards/margins": 0.08976776897907257, "rewards/rejected": -0.39460912346839905, "step": 2510 }, { "epoch": 0.66, "learning_rate": 4.2386621526858465e-06, "logits/chosen": -2.7249627113342285, "logits/rejected": -2.708723306655884, "logps/chosen": -1231.6832275390625, "logps/rejected": -1143.8583984375, "loss": 0.6696, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21194536983966827, "rewards/margins": 0.18466496467590332, "rewards/rejected": -0.3966103196144104, "step": 2520 }, { "epoch": 0.66, "learning_rate": 4.2304376150477015e-06, "logits/chosen": -2.738765239715576, "logits/rejected": -2.727811098098755, "logps/chosen": -1535.489990234375, "logps/rejected": -1317.7042236328125, "loss": 0.6717, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19936420023441315, "rewards/margins": 0.14922715723514557, "rewards/rejected": -0.3485913574695587, "step": 2530 }, { "epoch": 0.66, "learning_rate": 4.222176964964977e-06, "logits/chosen": -2.722945213317871, "logits/rejected": -2.7192633152008057, "logps/chosen": -1187.5484619140625, "logps/rejected": -1205.326416015625, "loss": 0.6474, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24626651406288147, "rewards/margins": 0.13393202424049377, "rewards/rejected": -0.38019853830337524, "step": 2540 }, { "epoch": 0.67, "learning_rate": 4.213880374828903e-06, "logits/chosen": -2.7221715450286865, "logits/rejected": -2.716590404510498, "logps/chosen": -1716.3052978515625, "logps/rejected": -1616.6943359375, "loss": 0.6642, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.20805975794792175, "rewards/margins": 0.14398939907550812, "rewards/rejected": -0.35204917192459106, "step": 2550 }, { "epoch": 0.67, "learning_rate": 4.2055480177807406e-06, "logits/chosen": -2.6929428577423096, "logits/rejected": -2.6970746517181396, "logps/chosen": -1225.0126953125, "logps/rejected": -1057.3948974609375, "loss": 0.6528, "rewards/accuracies": 0.625, "rewards/chosen": -0.30539470911026, "rewards/margins": 0.0778200551867485, "rewards/rejected": -0.3832147717475891, "step": 2560 }, { "epoch": 0.67, "learning_rate": 4.1971800677081696e-06, "logits/chosen": -2.745729446411133, "logits/rejected": -2.7569854259490967, "logps/chosen": -1561.8831787109375, "logps/rejected": -1445.382568359375, "loss": 0.6349, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.34451374411582947, "rewards/margins": 0.07771845161914825, "rewards/rejected": -0.4222322106361389, "step": 2570 }, { "epoch": 0.68, "learning_rate": 4.188776699241661e-06, "logits/chosen": -2.6861016750335693, "logits/rejected": -2.659060001373291, "logps/chosen": -1728.2652587890625, "logps/rejected": -1673.9222412109375, "loss": 0.6365, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23023895919322968, "rewards/margins": 0.20911893248558044, "rewards/rejected": -0.4393579065799713, "step": 2580 }, { "epoch": 0.68, "learning_rate": 4.180338087750827e-06, "logits/chosen": -2.753697156906128, "logits/rejected": -2.727743148803711, "logps/chosen": -1934.5179443359375, "logps/rejected": -1417.443115234375, "loss": 0.6394, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27421361207962036, "rewards/margins": 0.25228267908096313, "rewards/rejected": -0.5264962315559387, "step": 2590 }, { "epoch": 0.68, "learning_rate": 4.1718644093407704e-06, "logits/chosen": -2.7282567024230957, "logits/rejected": -2.704284906387329, "logps/chosen": -1521.0718994140625, "logps/rejected": -1392.9830322265625, "loss": 0.6444, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2527593672275543, "rewards/margins": 0.17600814998149872, "rewards/rejected": -0.42876753211021423, "step": 2600 }, { "epoch": 0.68, "eval_logits/chosen": -2.7330162525177, "eval_logits/rejected": -2.7239952087402344, "eval_logps/chosen": -1590.8504638671875, "eval_logps/rejected": -1391.5975341796875, "eval_loss": 0.6549809575080872, "eval_rewards/accuracies": 0.6269841194152832, "eval_rewards/chosen": -0.283012717962265, "eval_rewards/margins": 0.11631587892770767, "eval_rewards/rejected": -0.39932864904403687, "eval_runtime": 221.9289, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 2600 }, { "epoch": 0.68, "learning_rate": 4.163355840848401e-06, "logits/chosen": -2.73819899559021, "logits/rejected": -2.7327933311462402, "logps/chosen": -1506.4268798828125, "logps/rejected": -1232.0145263671875, "loss": 0.6505, "rewards/accuracies": 0.625, "rewards/chosen": -0.2977539896965027, "rewards/margins": 0.10095224529504776, "rewards/rejected": -0.39870625734329224, "step": 2610 }, { "epoch": 0.69, "learning_rate": 4.154812559838748e-06, "logits/chosen": -2.7333149909973145, "logits/rejected": -2.719111919403076, "logps/chosen": -1742.7320556640625, "logps/rejected": -1420.9490966796875, "loss": 0.6734, "rewards/accuracies": 0.5, "rewards/chosen": -0.29098597168922424, "rewards/margins": 0.03577999770641327, "rewards/rejected": -0.3267659544944763, "step": 2620 }, { "epoch": 0.69, "learning_rate": 4.146234744601259e-06, "logits/chosen": -2.731231451034546, "logits/rejected": -2.7138350009918213, "logps/chosen": -1467.53662109375, "logps/rejected": -1297.028076171875, "loss": 0.6314, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2917880415916443, "rewards/margins": 0.1302862912416458, "rewards/rejected": -0.4220743775367737, "step": 2630 }, { "epoch": 0.69, "learning_rate": 4.137622574146071e-06, "logits/chosen": -2.7165801525115967, "logits/rejected": -2.716395616531372, "logps/chosen": -1543.955322265625, "logps/rejected": -1588.169921875, "loss": 0.6301, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2739286720752716, "rewards/margins": 0.16008147597312927, "rewards/rejected": -0.4340101182460785, "step": 2640 }, { "epoch": 0.69, "learning_rate": 4.12897622820028e-06, "logits/chosen": -2.6904263496398926, "logits/rejected": -2.6798410415649414, "logps/chosen": -1184.8038330078125, "logps/rejected": -1180.9886474609375, "loss": 0.6495, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3079219460487366, "rewards/margins": 0.14843253791332245, "rewards/rejected": -0.4563544690608978, "step": 2650 }, { "epoch": 0.7, "learning_rate": 4.120295887204191e-06, "logits/chosen": -2.707143545150757, "logits/rejected": -2.6978511810302734, "logps/chosen": -1218.4644775390625, "logps/rejected": -1025.177490234375, "loss": 0.6622, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3178409934043884, "rewards/margins": 0.09260964393615723, "rewards/rejected": -0.41045063734054565, "step": 2660 }, { "epoch": 0.7, "learning_rate": 4.111581732307548e-06, "logits/chosen": -2.7155771255493164, "logits/rejected": -2.7257542610168457, "logps/chosen": -1921.700439453125, "logps/rejected": -1344.607421875, "loss": 0.6604, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24331799149513245, "rewards/margins": 0.26474809646606445, "rewards/rejected": -0.5080660581588745, "step": 2670 }, { "epoch": 0.7, "learning_rate": 4.1028339453657595e-06, "logits/chosen": -2.734602212905884, "logits/rejected": -2.6983163356781006, "logps/chosen": -1651.4810791015625, "logps/rejected": -1358.7333984375, "loss": 0.657, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2954995036125183, "rewards/margins": 0.22179126739501953, "rewards/rejected": -0.5172907710075378, "step": 2680 }, { "epoch": 0.7, "learning_rate": 4.094052708936096e-06, "logits/chosen": -2.6809024810791016, "logits/rejected": -2.6818182468414307, "logps/chosen": -1520.92626953125, "logps/rejected": -1463.026611328125, "loss": 0.6325, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.37137606739997864, "rewards/margins": 0.08747304975986481, "rewards/rejected": -0.45884910225868225, "step": 2690 }, { "epoch": 0.71, "learning_rate": 4.0852382062738874e-06, "logits/chosen": -2.6768171787261963, "logits/rejected": -2.684788227081299, "logps/chosen": -1481.054931640625, "logps/rejected": -1389.98193359375, "loss": 0.6594, "rewards/accuracies": 0.625, "rewards/chosen": -0.30067363381385803, "rewards/margins": 0.04770331829786301, "rewards/rejected": -0.34837692975997925, "step": 2700 }, { "epoch": 0.71, "eval_logits/chosen": -2.6817612648010254, "eval_logits/rejected": -2.6747777462005615, "eval_logps/chosen": -1598.0084228515625, "eval_logps/rejected": -1400.2867431640625, "eval_loss": 0.6565902829170227, "eval_rewards/accuracies": 0.6190476417541504, "eval_rewards/chosen": -0.3545936048030853, "eval_rewards/margins": 0.1316264271736145, "eval_rewards/rejected": -0.4862200617790222, "eval_runtime": 221.9819, "eval_samples_per_second": 9.01, "eval_steps_per_second": 0.284, "step": 2700 }, { "epoch": 0.71, "learning_rate": 4.076390621328693e-06, "logits/chosen": -2.6784491539001465, "logits/rejected": -2.6636245250701904, "logps/chosen": -1459.29052734375, "logps/rejected": -1261.677001953125, "loss": 0.6739, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3422325849533081, "rewards/margins": 0.013906337320804596, "rewards/rejected": -0.3561389446258545, "step": 2710 }, { "epoch": 0.71, "learning_rate": 4.067510138740467e-06, "logits/chosen": -2.71091890335083, "logits/rejected": -2.700723171234131, "logps/chosen": -1594.1536865234375, "logps/rejected": -1248.9345703125, "loss": 0.6468, "rewards/accuracies": 0.5, "rewards/chosen": -0.3426397144794464, "rewards/margins": 0.05318804457783699, "rewards/rejected": -0.3958277404308319, "step": 2720 }, { "epoch": 0.71, "learning_rate": 4.058596943835703e-06, "logits/chosen": -2.6955184936523438, "logits/rejected": -2.6890547275543213, "logps/chosen": -1438.625732421875, "logps/rejected": -1318.1397705078125, "loss": 0.6567, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.26787054538726807, "rewards/margins": 0.04538039490580559, "rewards/rejected": -0.31325095891952515, "step": 2730 }, { "epoch": 0.72, "learning_rate": 4.049651222623568e-06, "logits/chosen": -2.7120392322540283, "logits/rejected": -2.705559730529785, "logps/chosen": -1674.390380859375, "logps/rejected": -1225.760009765625, "loss": 0.6136, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.19621030986309052, "rewards/margins": 0.2308935821056366, "rewards/rejected": -0.4271039068698883, "step": 2740 }, { "epoch": 0.72, "learning_rate": 4.040673161792014e-06, "logits/chosen": -2.700012683868408, "logits/rejected": -2.6899571418762207, "logps/chosen": -972.44140625, "logps/rejected": -753.1502685546875, "loss": 0.6859, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.30196860432624817, "rewards/margins": 0.11960021406412125, "rewards/rejected": -0.42156878113746643, "step": 2750 }, { "epoch": 0.72, "learning_rate": 4.031662948703896e-06, "logits/chosen": -2.650031328201294, "logits/rejected": -2.641418933868408, "logps/chosen": -1580.4583740234375, "logps/rejected": -1196.228759765625, "loss": 0.6657, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.29348674416542053, "rewards/margins": 0.051687635481357574, "rewards/rejected": -0.3451744019985199, "step": 2760 }, { "epoch": 0.72, "learning_rate": 4.022620771393047e-06, "logits/chosen": -2.7186498641967773, "logits/rejected": -2.690887928009033, "logps/chosen": -1672.505859375, "logps/rejected": -1422.82080078125, "loss": 0.6726, "rewards/accuracies": 0.625, "rewards/chosen": -0.30704042315483093, "rewards/margins": 0.3519892692565918, "rewards/rejected": -0.6590296030044556, "step": 2770 }, { "epoch": 0.73, "learning_rate": 4.013546818560362e-06, "logits/chosen": -2.704613447189331, "logits/rejected": -2.6747727394104004, "logps/chosen": -1655.041748046875, "logps/rejected": -1089.197265625, "loss": 0.665, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.34044402837753296, "rewards/margins": 0.08583029359579086, "rewards/rejected": -0.4262743592262268, "step": 2780 }, { "epoch": 0.73, "learning_rate": 4.00444127956986e-06, "logits/chosen": -2.7101197242736816, "logits/rejected": -2.6946587562561035, "logps/chosen": -1646.9859619140625, "logps/rejected": -1348.275390625, "loss": 0.6428, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.21547237038612366, "rewards/margins": 0.07543666660785675, "rewards/rejected": -0.2909089922904968, "step": 2790 }, { "epoch": 0.73, "learning_rate": 3.9953043444447255e-06, "logits/chosen": -2.6812894344329834, "logits/rejected": -2.666337251663208, "logps/chosen": -1310.0408935546875, "logps/rejected": -1114.087158203125, "loss": 0.6329, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2714948058128357, "rewards/margins": 0.21535822749137878, "rewards/rejected": -0.48685306310653687, "step": 2800 }, { "epoch": 0.73, "eval_logits/chosen": -2.706270694732666, "eval_logits/rejected": -2.698537588119507, "eval_logps/chosen": -1590.024658203125, "eval_logps/rejected": -1391.0291748046875, "eval_loss": 0.6544455289840698, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.2747553884983063, "eval_rewards/margins": 0.11888986825942993, "eval_rewards/rejected": -0.3936452269554138, "eval_runtime": 221.9495, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 2800 }, { "epoch": 0.74, "learning_rate": 3.986136203863355e-06, "logits/chosen": -2.709186553955078, "logits/rejected": -2.710569381713867, "logps/chosen": -1612.9268798828125, "logps/rejected": -1281.6539306640625, "loss": 0.6538, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1813502460718155, "rewards/margins": 0.16553938388824463, "rewards/rejected": -0.3468896448612213, "step": 2810 }, { "epoch": 0.74, "learning_rate": 3.976937049155365e-06, "logits/chosen": -2.707552909851074, "logits/rejected": -2.7168171405792236, "logps/chosen": -1346.50634765625, "logps/rejected": -1408.813720703125, "loss": 0.6427, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3057866394519806, "rewards/margins": 0.14527741074562073, "rewards/rejected": -0.45106402039527893, "step": 2820 }, { "epoch": 0.74, "learning_rate": 3.967707072297608e-06, "logits/chosen": -2.720702886581421, "logits/rejected": -2.704594135284424, "logps/chosen": -1848.3275146484375, "logps/rejected": -1304.785888671875, "loss": 0.6654, "rewards/accuracies": 0.625, "rewards/chosen": -0.23301962018013, "rewards/margins": 0.12841656804084778, "rewards/rejected": -0.36143621802330017, "step": 2830 }, { "epoch": 0.74, "learning_rate": 3.958446465910159e-06, "logits/chosen": -2.7212958335876465, "logits/rejected": -2.711705207824707, "logps/chosen": -1518.9945068359375, "logps/rejected": -1312.5887451171875, "loss": 0.6328, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24658803641796112, "rewards/margins": 0.18430814146995544, "rewards/rejected": -0.43089619278907776, "step": 2840 }, { "epoch": 0.75, "learning_rate": 3.9491554232523066e-06, "logits/chosen": -2.6986780166625977, "logits/rejected": -2.7011802196502686, "logps/chosen": -1586.0, "logps/rejected": -1219.230712890625, "loss": 0.6506, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3162277638912201, "rewards/margins": 0.15653644502162933, "rewards/rejected": -0.472764253616333, "step": 2850 }, { "epoch": 0.75, "learning_rate": 3.939834138218505e-06, "logits/chosen": -2.7183499336242676, "logits/rejected": -2.7136991024017334, "logps/chosen": -1092.1768798828125, "logps/rejected": -1095.692138671875, "loss": 0.6804, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3108363747596741, "rewards/margins": 0.07240144908428192, "rewards/rejected": -0.3832378387451172, "step": 2860 }, { "epoch": 0.75, "learning_rate": 3.930482805334339e-06, "logits/chosen": -2.716367721557617, "logits/rejected": -2.7113070487976074, "logps/chosen": -1651.617431640625, "logps/rejected": -1553.928955078125, "loss": 0.6551, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23448964953422546, "rewards/margins": 0.1761372834444046, "rewards/rejected": -0.41062694787979126, "step": 2870 }, { "epoch": 0.75, "learning_rate": 3.921101619752464e-06, "logits/chosen": -2.7226855754852295, "logits/rejected": -2.7159783840179443, "logps/chosen": -1491.2879638671875, "logps/rejected": -1291.7816162109375, "loss": 0.6551, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3070013225078583, "rewards/margins": 0.17114727199077606, "rewards/rejected": -0.47814860939979553, "step": 2880 }, { "epoch": 0.76, "learning_rate": 3.911690777248525e-06, "logits/chosen": -2.703758478164673, "logits/rejected": -2.702188491821289, "logps/chosen": -1669.4017333984375, "logps/rejected": -1359.0408935546875, "loss": 0.6651, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.32670092582702637, "rewards/margins": 0.0898340493440628, "rewards/rejected": -0.41653499007225037, "step": 2890 }, { "epoch": 0.76, "learning_rate": 3.902250474217079e-06, "logits/chosen": -2.722134590148926, "logits/rejected": -2.7083685398101807, "logps/chosen": -1339.631591796875, "logps/rejected": -1110.2490234375, "loss": 0.6351, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.299724817276001, "rewards/margins": 0.12701234221458435, "rewards/rejected": -0.4267371594905853, "step": 2900 }, { "epoch": 0.76, "eval_logits/chosen": -2.713578701019287, "eval_logits/rejected": -2.7050318717956543, "eval_logps/chosen": -1591.8255615234375, "eval_logps/rejected": -1393.1846923828125, "eval_loss": 0.6545432209968567, "eval_rewards/accuracies": 0.6269841194152832, "eval_rewards/chosen": -0.292764276266098, "eval_rewards/margins": 0.1224350854754448, "eval_rewards/rejected": -0.4151993691921234, "eval_runtime": 222.0124, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 2900 }, { "epoch": 0.76, "learning_rate": 3.892780907667495e-06, "logits/chosen": -2.6744871139526367, "logits/rejected": -2.677670478820801, "logps/chosen": -1297.406005859375, "logps/rejected": -1041.806640625, "loss": 0.6526, "rewards/accuracies": 0.625, "rewards/chosen": -0.3169351816177368, "rewards/margins": 0.08249001950025558, "rewards/rejected": -0.399425208568573, "step": 2910 }, { "epoch": 0.76, "learning_rate": 3.883282275219837e-06, "logits/chosen": -2.680647611618042, "logits/rejected": -2.674455165863037, "logps/chosen": -1549.8154296875, "logps/rejected": -1308.754638671875, "loss": 0.6657, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.27796778082847595, "rewards/margins": 0.0887480154633522, "rewards/rejected": -0.36671575903892517, "step": 2920 }, { "epoch": 0.77, "learning_rate": 3.873754775100751e-06, "logits/chosen": -2.7278220653533936, "logits/rejected": -2.7124266624450684, "logps/chosen": -1649.2249755859375, "logps/rejected": -1332.158203125, "loss": 0.6359, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22023046016693115, "rewards/margins": 0.2440110146999359, "rewards/rejected": -0.46424150466918945, "step": 2930 }, { "epoch": 0.77, "learning_rate": 3.8641986061393145e-06, "logits/chosen": -2.695570468902588, "logits/rejected": -2.6847481727600098, "logps/chosen": -1939.2047119140625, "logps/rejected": -1484.7591552734375, "loss": 0.6457, "rewards/accuracies": 0.75, "rewards/chosen": -0.2145908772945404, "rewards/margins": 0.16744297742843628, "rewards/rejected": -0.3820338845252991, "step": 2940 }, { "epoch": 0.77, "learning_rate": 3.854613967762898e-06, "logits/chosen": -2.713531017303467, "logits/rejected": -2.7223057746887207, "logps/chosen": -1435.392333984375, "logps/rejected": -1526.7779541015625, "loss": 0.6368, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.32843753695487976, "rewards/margins": 0.14957351982593536, "rewards/rejected": -0.4780110716819763, "step": 2950 }, { "epoch": 0.77, "learning_rate": 3.845001059992999e-06, "logits/chosen": -2.70269513130188, "logits/rejected": -2.6946027278900146, "logps/chosen": -1615.549072265625, "logps/rejected": -1480.8746337890625, "loss": 0.6529, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.287524551153183, "rewards/margins": 0.06467927992343903, "rewards/rejected": -0.3522038459777832, "step": 2960 }, { "epoch": 0.78, "learning_rate": 3.835360083441067e-06, "logits/chosen": -2.7028274536132812, "logits/rejected": -2.6871161460876465, "logps/chosen": -1583.056396484375, "logps/rejected": -1453.07568359375, "loss": 0.6492, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.33619558811187744, "rewards/margins": 0.10127142816781998, "rewards/rejected": -0.4374670088291168, "step": 2970 }, { "epoch": 0.78, "learning_rate": 3.825691239304318e-06, "logits/chosen": -2.68107008934021, "logits/rejected": -2.671574592590332, "logps/chosen": -1545.6512451171875, "logps/rejected": -1043.787109375, "loss": 0.6621, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3671846091747284, "rewards/margins": 0.12449660152196884, "rewards/rejected": -0.4916812479496002, "step": 2980 }, { "epoch": 0.78, "learning_rate": 3.8159947293615385e-06, "logits/chosen": -2.6950478553771973, "logits/rejected": -2.6778013706207275, "logps/chosen": -1619.7373046875, "logps/rejected": -1588.569091796875, "loss": 0.6453, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2479819506406784, "rewards/margins": 0.2056024968624115, "rewards/rejected": -0.4535844922065735, "step": 2990 }, { "epoch": 0.79, "learning_rate": 3.806270755968866e-06, "logits/chosen": -2.706171989440918, "logits/rejected": -2.684222936630249, "logps/chosen": -1458.9422607421875, "logps/rejected": -1434.80078125, "loss": 0.6724, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.38695085048675537, "rewards/margins": 0.0020756437443196774, "rewards/rejected": -0.3890264630317688, "step": 3000 }, { "epoch": 0.79, "eval_logits/chosen": -2.70687198638916, "eval_logits/rejected": -2.698601484298706, "eval_logps/chosen": -1593.22021484375, "eval_logps/rejected": -1395.8458251953125, "eval_loss": 0.6528115272521973, "eval_rewards/accuracies": 0.6448412537574768, "eval_rewards/chosen": -0.30671125650405884, "eval_rewards/margins": 0.13510096073150635, "eval_rewards/rejected": -0.4418122470378876, "eval_runtime": 222.0026, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 3000 }, { "epoch": 0.79, "learning_rate": 3.7965195220555784e-06, "logits/chosen": -2.7062435150146484, "logits/rejected": -2.6944046020507812, "logps/chosen": -1621.0589599609375, "logps/rejected": -1340.968994140625, "loss": 0.6426, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27050885558128357, "rewards/margins": 0.11764784157276154, "rewards/rejected": -0.3881567120552063, "step": 3010 }, { "epoch": 0.79, "learning_rate": 3.786741231119847e-06, "logits/chosen": -2.723007917404175, "logits/rejected": -2.71048903465271, "logps/chosen": -1695.354248046875, "logps/rejected": -1328.361083984375, "loss": 0.6515, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27845701575279236, "rewards/margins": 0.2164488285779953, "rewards/rejected": -0.4949057996273041, "step": 3020 }, { "epoch": 0.79, "learning_rate": 3.7769360872244992e-06, "logits/chosen": -2.736701488494873, "logits/rejected": -2.733898878097534, "logps/chosen": -1670.4505615234375, "logps/rejected": -1539.7659912109375, "loss": 0.6553, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20875000953674316, "rewards/margins": 0.13187198340892792, "rewards/rejected": -0.3406220078468323, "step": 3030 }, { "epoch": 0.8, "learning_rate": 3.767104294992754e-06, "logits/chosen": -2.680217742919922, "logits/rejected": -2.673816442489624, "logps/chosen": -1585.65380859375, "logps/rejected": -1541.0491943359375, "loss": 0.638, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28602826595306396, "rewards/margins": 0.14265409111976624, "rewards/rejected": -0.428682416677475, "step": 3040 }, { "epoch": 0.8, "learning_rate": 3.7572460596039524e-06, "logits/chosen": -2.6832826137542725, "logits/rejected": -2.6885409355163574, "logps/chosen": -1698.9482421875, "logps/rejected": -1336.911865234375, "loss": 0.6208, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.281388521194458, "rewards/margins": 0.2917434573173523, "rewards/rejected": -0.5731319785118103, "step": 3050 }, { "epoch": 0.8, "learning_rate": 3.74736158678928e-06, "logits/chosen": -2.7269351482391357, "logits/rejected": -2.7052764892578125, "logps/chosen": -1629.1763916015625, "logps/rejected": -1432.0966796875, "loss": 0.6493, "rewards/accuracies": 0.625, "rewards/chosen": -0.3689160943031311, "rewards/margins": 0.16314366459846497, "rewards/rejected": -0.5320597290992737, "step": 3060 }, { "epoch": 0.8, "learning_rate": 3.7374510828274673e-06, "logits/chosen": -2.672687530517578, "logits/rejected": -2.6832921504974365, "logps/chosen": -1510.4393310546875, "logps/rejected": -1656.064208984375, "loss": 0.6482, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4276893734931946, "rewards/margins": 0.14145776629447937, "rewards/rejected": -0.5691471099853516, "step": 3070 }, { "epoch": 0.81, "learning_rate": 3.72751475454049e-06, "logits/chosen": -2.6742148399353027, "logits/rejected": -2.6767220497131348, "logps/chosen": -1322.834716796875, "logps/rejected": -1213.99951171875, "loss": 0.6702, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31878334283828735, "rewards/margins": 0.08807969093322754, "rewards/rejected": -0.4068630337715149, "step": 3080 }, { "epoch": 0.81, "learning_rate": 3.7175528092892503e-06, "logits/chosen": -2.6825454235076904, "logits/rejected": -2.672701597213745, "logps/chosen": -1248.2161865234375, "logps/rejected": -943.9337768554688, "loss": 0.6487, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3250463306903839, "rewards/margins": 0.2221144735813141, "rewards/rejected": -0.5471608638763428, "step": 3090 }, { "epoch": 0.81, "learning_rate": 3.7075654549692498e-06, "logits/chosen": -2.683488368988037, "logits/rejected": -2.6680476665496826, "logps/chosen": -1318.1368408203125, "logps/rejected": -1133.6009521484375, "loss": 0.6413, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33140167593955994, "rewards/margins": 0.13203348219394684, "rewards/rejected": -0.46343517303466797, "step": 3100 }, { "epoch": 0.81, "eval_logits/chosen": -2.6984853744506836, "eval_logits/rejected": -2.689216375350952, "eval_logps/chosen": -1594.0811767578125, "eval_logps/rejected": -1397.078125, "eval_loss": 0.6514426469802856, "eval_rewards/accuracies": 0.6547619104385376, "eval_rewards/chosen": -0.31532174348831177, "eval_rewards/margins": 0.13881219923496246, "eval_rewards/rejected": -0.4541339576244354, "eval_runtime": 222.0759, "eval_samples_per_second": 9.006, "eval_steps_per_second": 0.284, "step": 3100 }, { "epoch": 0.81, "learning_rate": 3.697552900006249e-06, "logits/chosen": -2.7257251739501953, "logits/rejected": -2.721527099609375, "logps/chosen": -1413.564697265625, "logps/rejected": -1260.1339111328125, "loss": 0.6703, "rewards/accuracies": 0.625, "rewards/chosen": -0.3458861708641052, "rewards/margins": 0.1819792091846466, "rewards/rejected": -0.5278654098510742, "step": 3110 }, { "epoch": 0.82, "learning_rate": 3.6875153533519244e-06, "logits/chosen": -2.700303554534912, "logits/rejected": -2.700843095779419, "logps/chosen": -1726.0238037109375, "logps/rejected": -1657.860107421875, "loss": 0.6642, "rewards/accuracies": 0.5, "rewards/chosen": -0.2840934693813324, "rewards/margins": 0.05369790643453598, "rewards/rejected": -0.337791383266449, "step": 3120 }, { "epoch": 0.82, "learning_rate": 3.6774530244794992e-06, "logits/chosen": -2.7197420597076416, "logits/rejected": -2.7181406021118164, "logps/chosen": -1546.3883056640625, "logps/rejected": -1361.9136962890625, "loss": 0.6529, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3362138867378235, "rewards/margins": 0.11709228903055191, "rewards/rejected": -0.4533061385154724, "step": 3130 }, { "epoch": 0.82, "learning_rate": 3.667366123379378e-06, "logits/chosen": -2.69582200050354, "logits/rejected": -2.7219948768615723, "logps/chosen": -1652.6771240234375, "logps/rejected": -1682.2486572265625, "loss": 0.6973, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.39420560002326965, "rewards/margins": -0.011835318990051746, "rewards/rejected": -0.382370263338089, "step": 3140 }, { "epoch": 0.82, "learning_rate": 3.6572548605547607e-06, "logits/chosen": -2.697044849395752, "logits/rejected": -2.711665153503418, "logps/chosen": -1529.5, "logps/rejected": -1450.67333984375, "loss": 0.6736, "rewards/accuracies": 0.5, "rewards/chosen": -0.39427393674850464, "rewards/margins": 0.12939773499965668, "rewards/rejected": -0.5236716866493225, "step": 3150 }, { "epoch": 0.83, "learning_rate": 3.6471194470172538e-06, "logits/chosen": -2.699864625930786, "logits/rejected": -2.7058236598968506, "logps/chosen": -1312.325439453125, "logps/rejected": -1181.29296875, "loss": 0.6343, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2711292803287506, "rewards/margins": 0.16613033413887024, "rewards/rejected": -0.43725961446762085, "step": 3160 }, { "epoch": 0.83, "learning_rate": 3.636960094282461e-06, "logits/chosen": -2.713305950164795, "logits/rejected": -2.7030246257781982, "logps/chosen": -1462.4920654296875, "logps/rejected": -1178.053466796875, "loss": 0.6655, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.3815879225730896, "rewards/margins": 0.024677610024809837, "rewards/rejected": -0.4062655568122864, "step": 3170 }, { "epoch": 0.83, "learning_rate": 3.6267770143655743e-06, "logits/chosen": -2.726931095123291, "logits/rejected": -2.735182285308838, "logps/chosen": -1840.1080322265625, "logps/rejected": -1748.771240234375, "loss": 0.6537, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2249617874622345, "rewards/margins": 0.2421763390302658, "rewards/rejected": -0.4671381413936615, "step": 3180 }, { "epoch": 0.83, "learning_rate": 3.6165704197769484e-06, "logits/chosen": -2.7426035404205322, "logits/rejected": -2.739197254180908, "logps/chosen": -1690.318603515625, "logps/rejected": -1485.94384765625, "loss": 0.6508, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24536451697349548, "rewards/margins": 0.17550361156463623, "rewards/rejected": -0.4208681583404541, "step": 3190 }, { "epoch": 0.84, "learning_rate": 3.606340523517663e-06, "logits/chosen": -2.6690893173217773, "logits/rejected": -2.6679508686065674, "logps/chosen": -1618.514404296875, "logps/rejected": -1375.9447021484375, "loss": 0.6242, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2654856741428375, "rewards/margins": 0.12073127925395966, "rewards/rejected": -0.386216938495636, "step": 3200 }, { "epoch": 0.84, "eval_logits/chosen": -2.720609188079834, "eval_logits/rejected": -2.7122809886932373, "eval_logps/chosen": -1594.5162353515625, "eval_logps/rejected": -1397.845947265625, "eval_loss": 0.652283787727356, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.3196706473827362, "eval_rewards/margins": 0.14214123785495758, "eval_rewards/rejected": -0.461811900138855, "eval_runtime": 222.0708, "eval_samples_per_second": 9.006, "eval_steps_per_second": 0.284, "step": 3200 }, { "epoch": 0.84, "learning_rate": 3.5960875390750793e-06, "logits/chosen": -2.683267831802368, "logits/rejected": -2.6658530235290527, "logps/chosen": -1604.8580322265625, "logps/rejected": -1078.8558349609375, "loss": 0.6726, "rewards/accuracies": 0.5, "rewards/chosen": -0.3753311038017273, "rewards/margins": 0.055336564779281616, "rewards/rejected": -0.4306676983833313, "step": 3210 }, { "epoch": 0.84, "learning_rate": 3.585811680418386e-06, "logits/chosen": -2.718259334564209, "logits/rejected": -2.700469732284546, "logps/chosen": -1195.2596435546875, "logps/rejected": -1219.2757568359375, "loss": 0.6469, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.32439124584198, "rewards/margins": 0.08395363390445709, "rewards/rejected": -0.40834489464759827, "step": 3220 }, { "epoch": 0.85, "learning_rate": 3.5755131619941347e-06, "logits/chosen": -2.730173349380493, "logits/rejected": -2.741326332092285, "logps/chosen": -1548.9466552734375, "logps/rejected": -1413.3919677734375, "loss": 0.6699, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.28420156240463257, "rewards/margins": 0.03134525939822197, "rewards/rejected": -0.31554684042930603, "step": 3230 }, { "epoch": 0.85, "learning_rate": 3.565192198721759e-06, "logits/chosen": -2.7104811668395996, "logits/rejected": -2.698362350463867, "logps/chosen": -1477.9722900390625, "logps/rejected": -1029.33251953125, "loss": 0.6688, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.40409666299819946, "rewards/margins": 0.008787902072072029, "rewards/rejected": -0.41288453340530396, "step": 3240 }, { "epoch": 0.85, "learning_rate": 3.5548490059890965e-06, "logits/chosen": -2.718234062194824, "logits/rejected": -2.7058792114257812, "logps/chosen": -1958.7900390625, "logps/rejected": -1565.6016845703125, "loss": 0.6694, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2763881981372833, "rewards/margins": 0.1657334268093109, "rewards/rejected": -0.44212159514427185, "step": 3250 }, { "epoch": 0.85, "learning_rate": 3.5444837996478903e-06, "logits/chosen": -2.7599310874938965, "logits/rejected": -2.730556011199951, "logps/chosen": -1544.931640625, "logps/rejected": -1118.84912109375, "loss": 0.6464, "rewards/accuracies": 0.625, "rewards/chosen": -0.36453551054000854, "rewards/margins": 0.16067495942115784, "rewards/rejected": -0.5252104997634888, "step": 3260 }, { "epoch": 0.86, "learning_rate": 3.534096796009282e-06, "logits/chosen": -2.7185637950897217, "logits/rejected": -2.7207720279693604, "logps/chosen": -1313.7662353515625, "logps/rejected": -1141.20947265625, "loss": 0.6594, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.26091524958610535, "rewards/margins": 0.19384625554084778, "rewards/rejected": -0.4547615647315979, "step": 3270 }, { "epoch": 0.86, "learning_rate": 3.5236882118393046e-06, "logits/chosen": -2.723361015319824, "logits/rejected": -2.715217113494873, "logps/chosen": -1645.950439453125, "logps/rejected": -1256.7423095703125, "loss": 0.6612, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3042986989021301, "rewards/margins": 0.16423553228378296, "rewards/rejected": -0.4685342311859131, "step": 3280 }, { "epoch": 0.86, "learning_rate": 3.5132582643543513e-06, "logits/chosen": -2.6780922412872314, "logits/rejected": -2.669283390045166, "logps/chosen": -1180.63427734375, "logps/rejected": -1273.1810302734375, "loss": 0.6426, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3003751337528229, "rewards/margins": 0.14592352509498596, "rewards/rejected": -0.44629865884780884, "step": 3290 }, { "epoch": 0.86, "learning_rate": 3.5028071712166456e-06, "logits/chosen": -2.7153820991516113, "logits/rejected": -2.699248790740967, "logps/chosen": -1570.4791259765625, "logps/rejected": -1684.700439453125, "loss": 0.6773, "rewards/accuracies": 0.5, "rewards/chosen": -0.4282234311103821, "rewards/margins": -0.009249694645404816, "rewards/rejected": -0.41897374391555786, "step": 3300 }, { "epoch": 0.86, "eval_logits/chosen": -2.713590621948242, "eval_logits/rejected": -2.704197883605957, "eval_logps/chosen": -1592.927978515625, "eval_logps/rejected": -1395.993896484375, "eval_loss": 0.6505909562110901, "eval_rewards/accuracies": 0.6507936716079712, "eval_rewards/chosen": -0.30378803610801697, "eval_rewards/margins": 0.1395045667886734, "eval_rewards/rejected": -0.44329264760017395, "eval_runtime": 222.0133, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 3300 }, { "epoch": 0.87, "learning_rate": 3.4923351505297008e-06, "logits/chosen": -2.7215983867645264, "logits/rejected": -2.7041127681732178, "logps/chosen": -1594.6470947265625, "logps/rejected": -1549.53369140625, "loss": 0.6382, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33198311924934387, "rewards/margins": 0.22456741333007812, "rewards/rejected": -0.5565505623817444, "step": 3310 }, { "epoch": 0.87, "learning_rate": 3.481842420833766e-06, "logits/chosen": -2.6897029876708984, "logits/rejected": -2.705749273300171, "logps/chosen": -1553.8692626953125, "logps/rejected": -1572.8138427734375, "loss": 0.6448, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2950561046600342, "rewards/margins": 0.21469798684120178, "rewards/rejected": -0.5097540616989136, "step": 3320 }, { "epoch": 0.87, "learning_rate": 3.4713292011012645e-06, "logits/chosen": -2.738208055496216, "logits/rejected": -2.7477622032165527, "logps/chosen": -1472.1783447265625, "logps/rejected": -1493.874267578125, "loss": 0.6701, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3312566876411438, "rewards/margins": 0.14993061125278473, "rewards/rejected": -0.48118728399276733, "step": 3330 }, { "epoch": 0.87, "learning_rate": 3.4607957107322277e-06, "logits/chosen": -2.6952614784240723, "logits/rejected": -2.699683666229248, "logps/chosen": -1731.744140625, "logps/rejected": -1487.7821044921875, "loss": 0.6656, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3338416814804077, "rewards/margins": 0.04807500168681145, "rewards/rejected": -0.38191670179367065, "step": 3340 }, { "epoch": 0.88, "learning_rate": 3.4502421695497112e-06, "logits/chosen": -2.6966915130615234, "logits/rejected": -2.6960909366607666, "logps/chosen": -1478.6884765625, "logps/rejected": -1387.267578125, "loss": 0.6344, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3261907696723938, "rewards/margins": 0.17302435636520386, "rewards/rejected": -0.49921512603759766, "step": 3350 }, { "epoch": 0.88, "learning_rate": 3.4396687977952137e-06, "logits/chosen": -2.667715072631836, "logits/rejected": -2.67319655418396, "logps/chosen": -1495.124755859375, "logps/rejected": -1264.6702880859375, "loss": 0.6615, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3378712236881256, "rewards/margins": 0.07796554267406464, "rewards/rejected": -0.41583672165870667, "step": 3360 }, { "epoch": 0.88, "learning_rate": 3.429075816124075e-06, "logits/chosen": -2.6785030364990234, "logits/rejected": -2.679802179336548, "logps/chosen": -1791.1708984375, "logps/rejected": -1383.6787109375, "loss": 0.6622, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.377760112285614, "rewards/margins": 0.13859833776950836, "rewards/rejected": -0.516358494758606, "step": 3370 }, { "epoch": 0.88, "learning_rate": 3.418463445600874e-06, "logits/chosen": -2.671396493911743, "logits/rejected": -2.685154676437378, "logps/chosen": -1854.9521484375, "logps/rejected": -1450.7947998046875, "loss": 0.6512, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25897151231765747, "rewards/margins": 0.2364932745695114, "rewards/rejected": -0.4954647421836853, "step": 3380 }, { "epoch": 0.89, "learning_rate": 3.4078319076948173e-06, "logits/chosen": -2.6842753887176514, "logits/rejected": -2.6870522499084473, "logps/chosen": -1303.2357177734375, "logps/rejected": -1232.04638671875, "loss": 0.6741, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.35404831171035767, "rewards/margins": -0.003727942705154419, "rewards/rejected": -0.35032039880752563, "step": 3390 }, { "epoch": 0.89, "learning_rate": 3.3971814242751123e-06, "logits/chosen": -2.6884913444519043, "logits/rejected": -2.6751790046691895, "logps/chosen": -1439.244140625, "logps/rejected": -1143.121826171875, "loss": 0.6531, "rewards/accuracies": 0.625, "rewards/chosen": -0.22703692317008972, "rewards/margins": 0.15256431698799133, "rewards/rejected": -0.37960129976272583, "step": 3400 }, { "epoch": 0.89, "eval_logits/chosen": -2.6711924076080322, "eval_logits/rejected": -2.6620049476623535, "eval_logps/chosen": -1592.909912109375, "eval_logps/rejected": -1395.920654296875, "eval_loss": 0.6505374908447266, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.3036077916622162, "eval_rewards/margins": 0.13895148038864136, "eval_rewards/rejected": -0.44255930185317993, "eval_runtime": 222.0417, "eval_samples_per_second": 9.007, "eval_steps_per_second": 0.284, "step": 3400 }, { "epoch": 0.89, "learning_rate": 3.386512217606339e-06, "logits/chosen": -2.6748526096343994, "logits/rejected": -2.6637487411499023, "logps/chosen": -1303.639892578125, "logps/rejected": -1194.2991943359375, "loss": 0.6513, "rewards/accuracies": 0.5, "rewards/chosen": -0.37782102823257446, "rewards/margins": 0.053777169436216354, "rewards/rejected": -0.4315981864929199, "step": 3410 }, { "epoch": 0.9, "learning_rate": 3.375824510343816e-06, "logits/chosen": -2.66794753074646, "logits/rejected": -2.668147563934326, "logps/chosen": -1223.28271484375, "logps/rejected": -1204.585205078125, "loss": 0.6627, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4264448285102844, "rewards/margins": 0.11545290797948837, "rewards/rejected": -0.5418976545333862, "step": 3420 }, { "epoch": 0.9, "learning_rate": 3.3651185255289466e-06, "logits/chosen": -2.698690176010132, "logits/rejected": -2.7189040184020996, "logps/chosen": -1692.838134765625, "logps/rejected": -1683.9605712890625, "loss": 0.6314, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2621743977069855, "rewards/margins": 0.2654629349708557, "rewards/rejected": -0.5276373624801636, "step": 3430 }, { "epoch": 0.9, "learning_rate": 3.354394486584568e-06, "logits/chosen": -2.6886143684387207, "logits/rejected": -2.6708929538726807, "logps/chosen": -1850.3284912109375, "logps/rejected": -1404.5838623046875, "loss": 0.6484, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2724509835243225, "rewards/margins": 0.15381048619747162, "rewards/rejected": -0.42626142501831055, "step": 3440 }, { "epoch": 0.9, "learning_rate": 3.3436526173102913e-06, "logits/chosen": -2.6439101696014404, "logits/rejected": -2.632524013519287, "logps/chosen": -1454.0938720703125, "logps/rejected": -1376.771728515625, "loss": 0.6482, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.32575541734695435, "rewards/margins": 0.10104187577962875, "rewards/rejected": -0.4267973005771637, "step": 3450 }, { "epoch": 0.91, "learning_rate": 3.3328931418778254e-06, "logits/chosen": -2.676669120788574, "logits/rejected": -2.6826610565185547, "logps/chosen": -1268.5804443359375, "logps/rejected": -1398.8740234375, "loss": 0.6505, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.38719624280929565, "rewards/margins": 0.012085462920367718, "rewards/rejected": -0.39928168058395386, "step": 3460 }, { "epoch": 0.91, "learning_rate": 3.3221162848263028e-06, "logits/chosen": -2.6567957401275635, "logits/rejected": -2.6414246559143066, "logps/chosen": -1245.016357421875, "logps/rejected": -1106.606201171875, "loss": 0.6277, "rewards/accuracies": 0.625, "rewards/chosen": -0.2696836590766907, "rewards/margins": 0.2176477015018463, "rewards/rejected": -0.4873313903808594, "step": 3470 }, { "epoch": 0.91, "learning_rate": 3.3113222710575914e-06, "logits/chosen": -2.642732858657837, "logits/rejected": -2.637019395828247, "logps/chosen": -1382.54296875, "logps/rejected": -1240.7161865234375, "loss": 0.6648, "rewards/accuracies": 0.625, "rewards/chosen": -0.34380394220352173, "rewards/margins": 0.14207597076892853, "rewards/rejected": -0.48587995767593384, "step": 3480 }, { "epoch": 0.91, "learning_rate": 3.300511325831603e-06, "logits/chosen": -2.6817688941955566, "logits/rejected": -2.6702680587768555, "logps/chosen": -1511.686767578125, "logps/rejected": -1441.002197265625, "loss": 0.651, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4115941524505615, "rewards/margins": 0.09103700518608093, "rewards/rejected": -0.5026311278343201, "step": 3490 }, { "epoch": 0.92, "learning_rate": 3.289683674761592e-06, "logits/chosen": -2.6555826663970947, "logits/rejected": -2.642547607421875, "logps/chosen": -1455.566162109375, "logps/rejected": -1279.9700927734375, "loss": 0.6499, "rewards/accuracies": 0.625, "rewards/chosen": -0.2933773994445801, "rewards/margins": 0.18951551616191864, "rewards/rejected": -0.4828929007053375, "step": 3500 }, { "epoch": 0.92, "eval_logits/chosen": -2.6700594425201416, "eval_logits/rejected": -2.661106824874878, "eval_logps/chosen": -1597.6368408203125, "eval_logps/rejected": -1401.417724609375, "eval_loss": 0.6504107713699341, "eval_rewards/accuracies": 0.6448412537574768, "eval_rewards/chosen": -0.3508760929107666, "eval_rewards/margins": 0.14665423333644867, "eval_rewards/rejected": -0.49753034114837646, "eval_runtime": 222.1254, "eval_samples_per_second": 9.004, "eval_steps_per_second": 0.284, "step": 3500 }, { "epoch": 0.92, "learning_rate": 3.2788395438094444e-06, "logits/chosen": -2.6743717193603516, "logits/rejected": -2.65002179145813, "logps/chosen": -1744.8306884765625, "logps/rejected": -1472.7691650390625, "loss": 0.6635, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3447830080986023, "rewards/margins": 0.14540955424308777, "rewards/rejected": -0.4901925027370453, "step": 3510 }, { "epoch": 0.92, "learning_rate": 3.2679791592809653e-06, "logits/chosen": -2.6821513175964355, "logits/rejected": -2.6835196018218994, "logps/chosen": -1547.2332763671875, "logps/rejected": -1329.255615234375, "loss": 0.6465, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.288739413022995, "rewards/margins": 0.187089741230011, "rewards/rejected": -0.47582918405532837, "step": 3520 }, { "epoch": 0.92, "learning_rate": 3.257102747821157e-06, "logits/chosen": -2.6712074279785156, "logits/rejected": -2.665398120880127, "logps/chosen": -1841.3148193359375, "logps/rejected": -1583.763427734375, "loss": 0.6481, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3188974857330322, "rewards/margins": 0.1695767343044281, "rewards/rejected": -0.4884742200374603, "step": 3530 }, { "epoch": 0.93, "learning_rate": 3.246210536409484e-06, "logits/chosen": -2.649895668029785, "logits/rejected": -2.667114019393921, "logps/chosen": -1591.7431640625, "logps/rejected": -1639.8531494140625, "loss": 0.6405, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.34883302450180054, "rewards/margins": 0.20935270190238953, "rewards/rejected": -0.5581857562065125, "step": 3540 }, { "epoch": 0.93, "learning_rate": 3.235302752355142e-06, "logits/chosen": -2.6782820224761963, "logits/rejected": -2.6591145992279053, "logps/chosen": -1445.5625, "logps/rejected": -1209.8031005859375, "loss": 0.6472, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33042627573013306, "rewards/margins": 0.14861580729484558, "rewards/rejected": -0.47904205322265625, "step": 3550 }, { "epoch": 0.93, "learning_rate": 3.2243796232923097e-06, "logits/chosen": -2.6708455085754395, "logits/rejected": -2.6628527641296387, "logps/chosen": -1832.881591796875, "logps/rejected": -1392.3641357421875, "loss": 0.6558, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3827039301395416, "rewards/margins": 0.20069828629493713, "rewards/rejected": -0.583402156829834, "step": 3560 }, { "epoch": 0.93, "learning_rate": 3.2134413771754037e-06, "logits/chosen": -2.646705150604248, "logits/rejected": -2.6478943824768066, "logps/chosen": -1485.1029052734375, "logps/rejected": -1430.926025390625, "loss": 0.6465, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3916592597961426, "rewards/margins": 0.14465120434761047, "rewards/rejected": -0.5363104939460754, "step": 3570 }, { "epoch": 0.94, "learning_rate": 3.2024882422743118e-06, "logits/chosen": -2.6661245822906494, "logits/rejected": -2.630502223968506, "logps/chosen": -1536.616943359375, "logps/rejected": -995.7938232421875, "loss": 0.6477, "rewards/accuracies": 0.5, "rewards/chosen": -0.39404481649398804, "rewards/margins": 0.10233994573354721, "rewards/rejected": -0.49638479948043823, "step": 3580 }, { "epoch": 0.94, "learning_rate": 3.1915204471696425e-06, "logits/chosen": -2.7028446197509766, "logits/rejected": -2.6725103855133057, "logps/chosen": -1654.890380859375, "logps/rejected": -1495.13916015625, "loss": 0.6671, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2148229330778122, "rewards/margins": 0.2208261936903, "rewards/rejected": -0.4356490969657898, "step": 3590 }, { "epoch": 0.94, "learning_rate": 3.180538220747943e-06, "logits/chosen": -2.67252779006958, "logits/rejected": -2.651607036590576, "logps/chosen": -1619.154541015625, "logps/rejected": -1425.920166015625, "loss": 0.6439, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3832196593284607, "rewards/margins": 0.1527005434036255, "rewards/rejected": -0.535920262336731, "step": 3600 }, { "epoch": 0.94, "eval_logits/chosen": -2.684110164642334, "eval_logits/rejected": -2.6758134365081787, "eval_logps/chosen": -1597.77294921875, "eval_logps/rejected": -1401.4176025390625, "eval_loss": 0.6509248614311218, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.3522396683692932, "eval_rewards/margins": 0.1452905237674713, "eval_rewards/rejected": -0.4975302219390869, "eval_runtime": 221.9749, "eval_samples_per_second": 9.01, "eval_steps_per_second": 0.284, "step": 3600 }, { "epoch": 0.94, "learning_rate": 3.1695417921969287e-06, "logits/chosen": -2.679503917694092, "logits/rejected": -2.6619620323181152, "logps/chosen": -1550.0435791015625, "logps/rejected": -1238.9837646484375, "loss": 0.6431, "rewards/accuracies": 0.5, "rewards/chosen": -0.37688523530960083, "rewards/margins": 0.07274709641933441, "rewards/rejected": -0.44963231682777405, "step": 3610 }, { "epoch": 0.95, "learning_rate": 3.158531391000697e-06, "logits/chosen": -2.697350025177002, "logits/rejected": -2.6872169971466064, "logps/chosen": -1467.5635986328125, "logps/rejected": -1384.6875, "loss": 0.6375, "rewards/accuracies": 0.625, "rewards/chosen": -0.3753679394721985, "rewards/margins": 0.08869564533233643, "rewards/rejected": -0.4640636444091797, "step": 3620 }, { "epoch": 0.95, "learning_rate": 3.147507246934943e-06, "logits/chosen": -2.6543407440185547, "logits/rejected": -2.6734654903411865, "logps/chosen": -1608.889892578125, "logps/rejected": -1336.0, "loss": 0.6437, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3899288773536682, "rewards/margins": 0.1723022758960724, "rewards/rejected": -0.562231183052063, "step": 3630 }, { "epoch": 0.95, "learning_rate": 3.136469590062158e-06, "logits/chosen": -2.702857494354248, "logits/rejected": -2.6768500804901123, "logps/chosen": -1747.8372802734375, "logps/rejected": -1472.735107421875, "loss": 0.6332, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21806052327156067, "rewards/margins": 0.20363029837608337, "rewards/rejected": -0.42169085144996643, "step": 3640 }, { "epoch": 0.96, "learning_rate": 3.1254186507268354e-06, "logits/chosen": -2.7096176147460938, "logits/rejected": -2.6751861572265625, "logps/chosen": -1822.2640380859375, "logps/rejected": -1292.9925537109375, "loss": 0.6645, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3251684606075287, "rewards/margins": 0.17529870569705963, "rewards/rejected": -0.5004671812057495, "step": 3650 }, { "epoch": 0.96, "learning_rate": 3.114354659550656e-06, "logits/chosen": -2.6707568168640137, "logits/rejected": -2.656738758087158, "logps/chosen": -1582.6048583984375, "logps/rejected": -1370.0179443359375, "loss": 0.6368, "rewards/accuracies": 0.625, "rewards/chosen": -0.4510306417942047, "rewards/margins": 0.1725241243839264, "rewards/rejected": -0.6235548257827759, "step": 3660 }, { "epoch": 0.96, "learning_rate": 3.1032778474276816e-06, "logits/chosen": -2.6743695735931396, "logits/rejected": -2.6786069869995117, "logps/chosen": -1812.0972900390625, "logps/rejected": -1499.954833984375, "loss": 0.6378, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42497795820236206, "rewards/margins": 0.13020361959934235, "rewards/rejected": -0.5551815629005432, "step": 3670 }, { "epoch": 0.96, "learning_rate": 3.092188445519532e-06, "logits/chosen": -2.6756932735443115, "logits/rejected": -2.6719775199890137, "logps/chosen": -1629.5001220703125, "logps/rejected": -1444.050048828125, "loss": 0.6518, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.39225250482559204, "rewards/margins": 0.19171519577503204, "rewards/rejected": -0.5839677453041077, "step": 3680 }, { "epoch": 0.97, "learning_rate": 3.081086685250565e-06, "logits/chosen": -2.71132755279541, "logits/rejected": -2.7033679485321045, "logps/chosen": -1543.3043212890625, "logps/rejected": -1258.254638671875, "loss": 0.6459, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3387759327888489, "rewards/margins": 0.23766681551933289, "rewards/rejected": -0.5764427781105042, "step": 3690 }, { "epoch": 0.97, "learning_rate": 3.0699727983030434e-06, "logits/chosen": -2.6973066329956055, "logits/rejected": -2.660385847091675, "logps/chosen": -1589.33544921875, "logps/rejected": -1132.1702880859375, "loss": 0.6279, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3620646297931671, "rewards/margins": 0.19742536544799805, "rewards/rejected": -0.5594899654388428, "step": 3700 }, { "epoch": 0.97, "eval_logits/chosen": -2.7011826038360596, "eval_logits/rejected": -2.691807746887207, "eval_logps/chosen": -1602.89501953125, "eval_logps/rejected": -1406.66748046875, "eval_loss": 0.6505332589149475, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.4034595787525177, "eval_rewards/margins": 0.14656895399093628, "eval_rewards/rejected": -0.5500285625457764, "eval_runtime": 222.0396, "eval_samples_per_second": 9.007, "eval_steps_per_second": 0.284, "step": 3700 }, { "epoch": 0.97, "learning_rate": 3.058847016612301e-06, "logits/chosen": -2.7201943397521973, "logits/rejected": -2.7136852741241455, "logps/chosen": -1556.614501953125, "logps/rejected": -1279.77587890625, "loss": 0.6211, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3983418047428131, "rewards/margins": 0.14020150899887085, "rewards/rejected": -0.5385433435440063, "step": 3710 }, { "epoch": 0.97, "learning_rate": 3.0477095723619034e-06, "logits/chosen": -2.7106566429138184, "logits/rejected": -2.7031445503234863, "logps/chosen": -1471.138916015625, "logps/rejected": -1194.182373046875, "loss": 0.636, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31719970703125, "rewards/margins": 0.25403183698654175, "rewards/rejected": -0.571231484413147, "step": 3720 }, { "epoch": 0.98, "learning_rate": 3.0365606979788003e-06, "logits/chosen": -2.7038567066192627, "logits/rejected": -2.717050552368164, "logps/chosen": -1422.19091796875, "logps/rejected": -1356.302001953125, "loss": 0.6515, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3847157061100006, "rewards/margins": 0.13767775893211365, "rewards/rejected": -0.5223934650421143, "step": 3730 }, { "epoch": 0.98, "learning_rate": 3.0254006261284786e-06, "logits/chosen": -2.7265188694000244, "logits/rejected": -2.696443557739258, "logps/chosen": -1579.3212890625, "logps/rejected": -1215.633056640625, "loss": 0.6335, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3812587261199951, "rewards/margins": 0.18493662774562836, "rewards/rejected": -0.5661953687667847, "step": 3740 }, { "epoch": 0.98, "learning_rate": 3.0142295897101032e-06, "logits/chosen": -2.677203416824341, "logits/rejected": -2.663817882537842, "logps/chosen": -1657.607421875, "logps/rejected": -1373.084716796875, "loss": 0.6332, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.44950050115585327, "rewards/margins": 0.10747332870960236, "rewards/rejected": -0.5569738149642944, "step": 3750 }, { "epoch": 0.98, "learning_rate": 3.0030478218516578e-06, "logits/chosen": -2.752816677093506, "logits/rejected": -2.7207674980163574, "logps/chosen": -1824.684326171875, "logps/rejected": -1472.426513671875, "loss": 0.6291, "rewards/accuracies": 0.75, "rewards/chosen": -0.31192246079444885, "rewards/margins": 0.19837240874767303, "rewards/rejected": -0.5102948546409607, "step": 3760 }, { "epoch": 0.99, "learning_rate": 2.9918555559050826e-06, "logits/chosen": -2.711655378341675, "logits/rejected": -2.718435287475586, "logps/chosen": -1729.4273681640625, "logps/rejected": -1415.324951171875, "loss": 0.6432, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34275177121162415, "rewards/margins": 0.1975889950990677, "rewards/rejected": -0.540340781211853, "step": 3770 }, { "epoch": 0.99, "learning_rate": 2.980653025441399e-06, "logits/chosen": -2.6924350261688232, "logits/rejected": -2.675720691680908, "logps/chosen": -1520.789794921875, "logps/rejected": -1284.784423828125, "loss": 0.6459, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.5108143091201782, "rewards/margins": 0.06858311593532562, "rewards/rejected": -0.5793974995613098, "step": 3780 }, { "epoch": 0.99, "learning_rate": 2.969440464245841e-06, "logits/chosen": -2.6855132579803467, "logits/rejected": -2.6786370277404785, "logps/chosen": -1267.9501953125, "logps/rejected": -1170.1279296875, "loss": 0.6634, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5093327760696411, "rewards/margins": -0.006477591581642628, "rewards/rejected": -0.5028551816940308, "step": 3790 }, { "epoch": 0.99, "learning_rate": 2.95821810631297e-06, "logits/chosen": -2.69722580909729, "logits/rejected": -2.6795449256896973, "logps/chosen": -1494.9287109375, "logps/rejected": -1520.502685546875, "loss": 0.6443, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5035138726234436, "rewards/margins": 0.12939448654651642, "rewards/rejected": -0.6329083442687988, "step": 3800 }, { "epoch": 0.99, "eval_logits/chosen": -2.696512460708618, "eval_logits/rejected": -2.6875741481781006, "eval_logps/chosen": -1602.2508544921875, "eval_logps/rejected": -1406.07275390625, "eval_loss": 0.6497198343276978, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.3970177173614502, "eval_rewards/margins": 0.14706376194953918, "eval_rewards/rejected": -0.544081449508667, "eval_runtime": 221.9365, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 3800 }, { "epoch": 1.0, "learning_rate": 2.946986185841801e-06, "logits/chosen": -2.7158286571502686, "logits/rejected": -2.702641248703003, "logps/chosen": -1648.909423828125, "logps/rejected": -1226.8458251953125, "loss": 0.6522, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.35285401344299316, "rewards/margins": 0.07958800345659256, "rewards/rejected": -0.4324420392513275, "step": 3810 }, { "epoch": 1.0, "learning_rate": 2.935744937230903e-06, "logits/chosen": -2.6734156608581543, "logits/rejected": -2.685497760772705, "logps/chosen": -1302.2877197265625, "logps/rejected": -1202.2618408203125, "loss": 0.6363, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5028305649757385, "rewards/margins": 0.08032174408435822, "rewards/rejected": -0.5831522941589355, "step": 3820 }, { "epoch": 1.0, "learning_rate": 2.924494595073517e-06, "logits/chosen": -2.6925551891326904, "logits/rejected": -2.7131991386413574, "logps/chosen": -1666.137939453125, "logps/rejected": -1432.2186279296875, "loss": 0.6262, "rewards/accuracies": 0.625, "rewards/chosen": -0.37226518988609314, "rewards/margins": 0.2526193857192993, "rewards/rejected": -0.6248846054077148, "step": 3830 }, { "epoch": 1.0, "learning_rate": 2.9132353941526575e-06, "logits/chosen": -2.679866313934326, "logits/rejected": -2.6491472721099854, "logps/chosen": -1389.10986328125, "logps/rejected": -1554.234130859375, "loss": 0.6387, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35313764214515686, "rewards/margins": 0.24915650486946106, "rewards/rejected": -0.6022941470146179, "step": 3840 }, { "epoch": 1.01, "learning_rate": 2.901967569436209e-06, "logits/chosen": -2.6779770851135254, "logits/rejected": -2.6733503341674805, "logps/chosen": -1629.6099853515625, "logps/rejected": -1241.42333984375, "loss": 0.623, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.405801922082901, "rewards/margins": 0.10763572156429291, "rewards/rejected": -0.5134376287460327, "step": 3850 }, { "epoch": 1.01, "learning_rate": 2.89069135607203e-06, "logits/chosen": -2.718233823776245, "logits/rejected": -2.6924235820770264, "logps/chosen": -1733.679443359375, "logps/rejected": -1611.1083984375, "loss": 0.6483, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25229984521865845, "rewards/margins": 0.31755656003952026, "rewards/rejected": -0.5698564648628235, "step": 3860 }, { "epoch": 1.01, "learning_rate": 2.8794069893830386e-06, "logits/chosen": -2.715327024459839, "logits/rejected": -2.715574264526367, "logps/chosen": -1536.84765625, "logps/rejected": -1138.0296630859375, "loss": 0.6242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4053262770175934, "rewards/margins": 0.10040046274662018, "rewards/rejected": -0.50572669506073, "step": 3870 }, { "epoch": 1.02, "learning_rate": 2.8681147048623038e-06, "logits/chosen": -2.6821646690368652, "logits/rejected": -2.6729283332824707, "logps/chosen": -1832.4056396484375, "logps/rejected": -1504.1866455078125, "loss": 0.6356, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3265860378742218, "rewards/margins": 0.22155019640922546, "rewards/rejected": -0.5481362342834473, "step": 3880 }, { "epoch": 1.02, "learning_rate": 2.8568147381681333e-06, "logits/chosen": -2.717892646789551, "logits/rejected": -2.7001843452453613, "logps/chosen": -1530.6292724609375, "logps/rejected": -1179.5780029296875, "loss": 0.6367, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31728649139404297, "rewards/margins": 0.2593171000480652, "rewards/rejected": -0.5766035318374634, "step": 3890 }, { "epoch": 1.02, "learning_rate": 2.8455073251191533e-06, "logits/chosen": -2.6777820587158203, "logits/rejected": -2.677088737487793, "logps/chosen": -1128.9188232421875, "logps/rejected": -1106.5223388671875, "loss": 0.6355, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4586946368217468, "rewards/margins": 0.09128829836845398, "rewards/rejected": -0.5499829649925232, "step": 3900 }, { "epoch": 1.02, "eval_logits/chosen": -2.7038865089416504, "eval_logits/rejected": -2.6949610710144043, "eval_logps/chosen": -1597.9246826171875, "eval_logps/rejected": -1401.5294189453125, "eval_loss": 0.6484230756759644, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.3537573218345642, "eval_rewards/margins": 0.14489062130451202, "eval_rewards/rejected": -0.49864792823791504, "eval_runtime": 222.0835, "eval_samples_per_second": 9.006, "eval_steps_per_second": 0.284, "step": 3900 }, { "epoch": 1.02, "learning_rate": 2.8341927016893887e-06, "logits/chosen": -2.691997528076172, "logits/rejected": -2.696678400039673, "logps/chosen": -1353.6199951171875, "logps/rejected": -1345.487548828125, "loss": 0.6653, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.397295743227005, "rewards/margins": 0.09288805723190308, "rewards/rejected": -0.4901837706565857, "step": 3910 }, { "epoch": 1.03, "learning_rate": 2.822871104003335e-06, "logits/chosen": -2.702699661254883, "logits/rejected": -2.679598331451416, "logps/chosen": -1657.781982421875, "logps/rejected": -1315.1046142578125, "loss": 0.6034, "rewards/accuracies": 0.75, "rewards/chosen": -0.319889098405838, "rewards/margins": 0.31032368540763855, "rewards/rejected": -0.6302127838134766, "step": 3920 }, { "epoch": 1.03, "learning_rate": 2.8115427683310355e-06, "logits/chosen": -2.693666696548462, "logits/rejected": -2.680574893951416, "logps/chosen": -1606.5465087890625, "logps/rejected": -1350.802734375, "loss": 0.6385, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.41127365827560425, "rewards/margins": 0.09982170164585114, "rewards/rejected": -0.511095404624939, "step": 3930 }, { "epoch": 1.03, "learning_rate": 2.8002079310831477e-06, "logits/chosen": -2.656543016433716, "logits/rejected": -2.647818088531494, "logps/chosen": -1718.5107421875, "logps/rejected": -1301.51416015625, "loss": 0.658, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3816748261451721, "rewards/margins": 0.1260094940662384, "rewards/rejected": -0.5076843500137329, "step": 3940 }, { "epoch": 1.03, "learning_rate": 2.7888668288060095e-06, "logits/chosen": -2.704258680343628, "logits/rejected": -2.706493377685547, "logps/chosen": -1798.8916015625, "logps/rejected": -1319.958251953125, "loss": 0.633, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3321087956428528, "rewards/margins": 0.25544315576553345, "rewards/rejected": -0.5875519514083862, "step": 3950 }, { "epoch": 1.04, "learning_rate": 2.7775196981767044e-06, "logits/chosen": -2.705000400543213, "logits/rejected": -2.6998260021209717, "logps/chosen": -1702.5657958984375, "logps/rejected": -1557.700927734375, "loss": 0.6578, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4326794147491455, "rewards/margins": 0.19303780794143677, "rewards/rejected": -0.6257172226905823, "step": 3960 }, { "epoch": 1.04, "learning_rate": 2.7661667759981213e-06, "logits/chosen": -2.7508487701416016, "logits/rejected": -2.741401433944702, "logps/chosen": -1811.778564453125, "logps/rejected": -1451.1256103515625, "loss": 0.6158, "rewards/accuracies": 0.75, "rewards/chosen": -0.2792471945285797, "rewards/margins": 0.21910421550273895, "rewards/rejected": -0.49835139513015747, "step": 3970 }, { "epoch": 1.04, "learning_rate": 2.7548082991940137e-06, "logits/chosen": -2.695704936981201, "logits/rejected": -2.6940112113952637, "logps/chosen": -1396.2000732421875, "logps/rejected": -1184.29638671875, "loss": 0.6579, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.515252947807312, "rewards/margins": 0.2268890142440796, "rewards/rejected": -0.7421420216560364, "step": 3980 }, { "epoch": 1.04, "learning_rate": 2.743444504804051e-06, "logits/chosen": -2.6837940216064453, "logits/rejected": -2.6780648231506348, "logps/chosen": -1294.56884765625, "logps/rejected": -1170.3057861328125, "loss": 0.6325, "rewards/accuracies": 0.625, "rewards/chosen": -0.42278486490249634, "rewards/margins": 0.1340743750333786, "rewards/rejected": -0.5568591952323914, "step": 3990 }, { "epoch": 1.05, "learning_rate": 2.7320756299788788e-06, "logits/chosen": -2.723829746246338, "logits/rejected": -2.7320613861083984, "logps/chosen": -1578.4306640625, "logps/rejected": -1222.0570068359375, "loss": 0.6683, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.5014371275901794, "rewards/margins": -0.04650117829442024, "rewards/rejected": -0.4549359679222107, "step": 4000 }, { "epoch": 1.05, "eval_logits/chosen": -2.7079918384552, "eval_logits/rejected": -2.699162244796753, "eval_logps/chosen": -1598.626220703125, "eval_logps/rejected": -1402.8544921875, "eval_loss": 0.6481702923774719, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.3607728183269501, "eval_rewards/margins": 0.1511262059211731, "eval_rewards/rejected": -0.5118989944458008, "eval_runtime": 221.9957, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 4000 }, { "epoch": 1.05, "learning_rate": 2.7207019119751644e-06, "logits/chosen": -2.692671298980713, "logits/rejected": -2.6763689517974854, "logps/chosen": -1635.9403076171875, "logps/rejected": -1283.6893310546875, "loss": 0.6155, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24907469749450684, "rewards/margins": 0.21768009662628174, "rewards/rejected": -0.46675482392311096, "step": 4010 }, { "epoch": 1.05, "learning_rate": 2.7093235881506474e-06, "logits/chosen": -2.6583075523376465, "logits/rejected": -2.6455483436584473, "logps/chosen": -1569.704833984375, "logps/rejected": -1339.1483154296875, "loss": 0.648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4154641628265381, "rewards/margins": 0.11932597309350967, "rewards/rejected": -0.5347901582717896, "step": 4020 }, { "epoch": 1.05, "learning_rate": 2.6979408959591863e-06, "logits/chosen": -2.676906108856201, "logits/rejected": -2.654895305633545, "logps/chosen": -1454.9947509765625, "logps/rejected": -1063.083740234375, "loss": 0.6484, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3592832684516907, "rewards/margins": 0.1434701383113861, "rewards/rejected": -0.5027534365653992, "step": 4030 }, { "epoch": 1.06, "learning_rate": 2.6865540729458034e-06, "logits/chosen": -2.722224235534668, "logits/rejected": -2.6849489212036133, "logps/chosen": -1687.6962890625, "logps/rejected": -1196.1148681640625, "loss": 0.6302, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3025377690792084, "rewards/margins": 0.15057072043418884, "rewards/rejected": -0.453108549118042, "step": 4040 }, { "epoch": 1.06, "learning_rate": 2.675163356741726e-06, "logits/chosen": -2.6830029487609863, "logits/rejected": -2.683077812194824, "logps/chosen": -1607.4324951171875, "logps/rejected": -1472.721435546875, "loss": 0.6434, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3175775408744812, "rewards/margins": 0.12574470043182373, "rewards/rejected": -0.44332224130630493, "step": 4050 }, { "epoch": 1.06, "learning_rate": 2.6637689850594285e-06, "logits/chosen": -2.6846535205841064, "logits/rejected": -2.6847877502441406, "logps/chosen": -1888.1953125, "logps/rejected": -1392.2398681640625, "loss": 0.6614, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.35660284757614136, "rewards/margins": 0.07980741560459137, "rewards/rejected": -0.4364103376865387, "step": 4060 }, { "epoch": 1.07, "learning_rate": 2.652371195687671e-06, "logits/chosen": -2.6865925788879395, "logits/rejected": -2.6678271293640137, "logps/chosen": -1839.5250244140625, "logps/rejected": -1401.562744140625, "loss": 0.6581, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3077114224433899, "rewards/margins": 0.15223054587841034, "rewards/rejected": -0.45994195342063904, "step": 4070 }, { "epoch": 1.07, "learning_rate": 2.64097022648654e-06, "logits/chosen": -2.7195897102355957, "logits/rejected": -2.709167957305908, "logps/chosen": -1437.06103515625, "logps/rejected": -1188.230712890625, "loss": 0.6318, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2933391332626343, "rewards/margins": 0.14199650287628174, "rewards/rejected": -0.435335636138916, "step": 4080 }, { "epoch": 1.07, "learning_rate": 2.6295663153824774e-06, "logits/chosen": -2.676091194152832, "logits/rejected": -2.6590778827667236, "logps/chosen": -1539.490478515625, "logps/rejected": -1512.3614501953125, "loss": 0.6538, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3669421076774597, "rewards/margins": 0.07918572425842285, "rewards/rejected": -0.44612783193588257, "step": 4090 }, { "epoch": 1.07, "learning_rate": 2.6181597003633218e-06, "logits/chosen": -2.722808599472046, "logits/rejected": -2.7022972106933594, "logps/chosen": -1721.559326171875, "logps/rejected": -1385.15625, "loss": 0.6459, "rewards/accuracies": 0.625, "rewards/chosen": -0.2962713837623596, "rewards/margins": 0.18515335023403168, "rewards/rejected": -0.4814247190952301, "step": 4100 }, { "epoch": 1.07, "eval_logits/chosen": -2.694403886795044, "eval_logits/rejected": -2.6851539611816406, "eval_logps/chosen": -1595.5987548828125, "eval_logps/rejected": -1399.263427734375, "eval_loss": 0.6475256085395813, "eval_rewards/accuracies": 0.6448412537574768, "eval_rewards/chosen": -0.33049485087394714, "eval_rewards/margins": 0.14549362659454346, "eval_rewards/rejected": -0.4759885370731354, "eval_runtime": 222.0313, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 4100 }, { "epoch": 1.08, "learning_rate": 2.606750619473342e-06, "logits/chosen": -2.6989099979400635, "logits/rejected": -2.6851353645324707, "logps/chosen": -1384.610595703125, "logps/rejected": -1350.0355224609375, "loss": 0.6369, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3065218925476074, "rewards/margins": 0.11955137550830841, "rewards/rejected": -0.42607325315475464, "step": 4110 }, { "epoch": 1.08, "learning_rate": 2.595339310808262e-06, "logits/chosen": -2.6756765842437744, "logits/rejected": -2.6787614822387695, "logps/chosen": -1460.1578369140625, "logps/rejected": -1376.394775390625, "loss": 0.6227, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3293229937553406, "rewards/margins": 0.15691342949867249, "rewards/rejected": -0.48623642325401306, "step": 4120 }, { "epoch": 1.08, "learning_rate": 2.5839260125103004e-06, "logits/chosen": -2.656978130340576, "logits/rejected": -2.6542904376983643, "logps/chosen": -1446.76171875, "logps/rejected": -1587.48828125, "loss": 0.6449, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29374203085899353, "rewards/margins": 0.17694918811321259, "rewards/rejected": -0.4706912636756897, "step": 4130 }, { "epoch": 1.08, "learning_rate": 2.5725109627631984e-06, "logits/chosen": -2.7323098182678223, "logits/rejected": -2.71343731880188, "logps/chosen": -1746.254150390625, "logps/rejected": -1433.3355712890625, "loss": 0.6762, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3805330693721771, "rewards/margins": 0.14356563985347748, "rewards/rejected": -0.5240987539291382, "step": 4140 }, { "epoch": 1.09, "learning_rate": 2.5610943997872443e-06, "logits/chosen": -2.714146852493286, "logits/rejected": -2.6969714164733887, "logps/chosen": -1565.2064208984375, "logps/rejected": -1335.5848388671875, "loss": 0.6345, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2877258360385895, "rewards/margins": 0.21380770206451416, "rewards/rejected": -0.501533567905426, "step": 4150 }, { "epoch": 1.09, "learning_rate": 2.5496765618343096e-06, "logits/chosen": -2.703857898712158, "logits/rejected": -2.6917083263397217, "logps/chosen": -1705.0570068359375, "logps/rejected": -1651.3447265625, "loss": 0.6253, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31827279925346375, "rewards/margins": 0.18771009147167206, "rewards/rejected": -0.5059828758239746, "step": 4160 }, { "epoch": 1.09, "learning_rate": 2.538257687182871e-06, "logits/chosen": -2.7111072540283203, "logits/rejected": -2.7166075706481934, "logps/chosen": -1474.952880859375, "logps/rejected": -1338.7607421875, "loss": 0.6475, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3775080144405365, "rewards/margins": 0.05340402573347092, "rewards/rejected": -0.4309120178222656, "step": 4170 }, { "epoch": 1.09, "learning_rate": 2.526838014133041e-06, "logits/chosen": -2.7166781425476074, "logits/rejected": -2.681563377380371, "logps/chosen": -1745.790283203125, "logps/rejected": -1185.437744140625, "loss": 0.6368, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.34735316038131714, "rewards/margins": 0.14297917485237122, "rewards/rejected": -0.4903322756290436, "step": 4180 }, { "epoch": 1.1, "learning_rate": 2.515417781001594e-06, "logits/chosen": -2.6650118827819824, "logits/rejected": -2.6872620582580566, "logps/chosen": -1447.572021484375, "logps/rejected": -1491.62548828125, "loss": 0.6357, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3270224332809448, "rewards/margins": 0.08632199466228485, "rewards/rejected": -0.41334444284439087, "step": 4190 }, { "epoch": 1.1, "learning_rate": 2.503997226116992e-06, "logits/chosen": -2.685615062713623, "logits/rejected": -2.6830313205718994, "logps/chosen": -1447.239990234375, "logps/rejected": -1051.3704833984375, "loss": 0.6451, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.313309907913208, "rewards/margins": 0.2056921422481537, "rewards/rejected": -0.5190020799636841, "step": 4200 }, { "epoch": 1.1, "eval_logits/chosen": -2.704213857650757, "eval_logits/rejected": -2.6953837871551514, "eval_logps/chosen": -1597.2633056640625, "eval_logps/rejected": -1401.5712890625, "eval_loss": 0.6471446752548218, "eval_rewards/accuracies": 0.636904776096344, "eval_rewards/chosen": -0.34714046120643616, "eval_rewards/margins": 0.1519256830215454, "eval_rewards/rejected": -0.49906620383262634, "eval_runtime": 222.0001, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 4200 }, { "epoch": 1.1, "learning_rate": 2.4925765878144115e-06, "logits/chosen": -2.709895372390747, "logits/rejected": -2.6850790977478027, "logps/chosen": -1786.897216796875, "logps/rejected": -1272.6116943359375, "loss": 0.6101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2910698354244232, "rewards/margins": 0.18431541323661804, "rewards/rejected": -0.47538524866104126, "step": 4210 }, { "epoch": 1.1, "learning_rate": 2.4811561044307727e-06, "logits/chosen": -2.7279367446899414, "logits/rejected": -2.703131675720215, "logps/chosen": -1588.3284912109375, "logps/rejected": -1520.607421875, "loss": 0.6235, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33380937576293945, "rewards/margins": 0.14665281772613525, "rewards/rejected": -0.4804622232913971, "step": 4220 }, { "epoch": 1.11, "learning_rate": 2.469736014299758e-06, "logits/chosen": -2.682407855987549, "logits/rejected": -2.672849416732788, "logps/chosen": -1425.3299560546875, "logps/rejected": -1198.2889404296875, "loss": 0.664, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.38793474435806274, "rewards/margins": 0.05479978397488594, "rewards/rejected": -0.4427345395088196, "step": 4230 }, { "epoch": 1.11, "learning_rate": 2.458316555746846e-06, "logits/chosen": -2.693711757659912, "logits/rejected": -2.674509048461914, "logps/chosen": -1880.191650390625, "logps/rejected": -1725.5045166015625, "loss": 0.6343, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3580625057220459, "rewards/margins": 0.14049111306667328, "rewards/rejected": -0.498553603887558, "step": 4240 }, { "epoch": 1.11, "learning_rate": 2.446897967084334e-06, "logits/chosen": -2.7132728099823, "logits/rejected": -2.7284951210021973, "logps/chosen": -1449.487548828125, "logps/rejected": -1492.966064453125, "loss": 0.6573, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4193345606327057, "rewards/margins": 0.08319560438394547, "rewards/rejected": -0.5025301575660706, "step": 4250 }, { "epoch": 1.11, "learning_rate": 2.4354804866063684e-06, "logits/chosen": -2.6998226642608643, "logits/rejected": -2.7050068378448486, "logps/chosen": -1740.021240234375, "logps/rejected": -1726.16796875, "loss": 0.6485, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3352826237678528, "rewards/margins": 0.14481016993522644, "rewards/rejected": -0.480092853307724, "step": 4260 }, { "epoch": 1.12, "learning_rate": 2.424064352583964e-06, "logits/chosen": -2.661332607269287, "logits/rejected": -2.6545464992523193, "logps/chosen": -1448.39794921875, "logps/rejected": -1272.430908203125, "loss": 0.649, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.486703097820282, "rewards/margins": 0.09392253309488297, "rewards/rejected": -0.5806256532669067, "step": 4270 }, { "epoch": 1.12, "learning_rate": 2.4126498032600403e-06, "logits/chosen": -2.678957223892212, "logits/rejected": -2.666158437728882, "logps/chosen": -1762.4287109375, "logps/rejected": -1390.131103515625, "loss": 0.6115, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3684222102165222, "rewards/margins": 0.2715838551521301, "rewards/rejected": -0.6400061249732971, "step": 4280 }, { "epoch": 1.12, "learning_rate": 2.401237076844445e-06, "logits/chosen": -2.700265407562256, "logits/rejected": -2.6984000205993652, "logps/chosen": -1472.801025390625, "logps/rejected": -1357.495849609375, "loss": 0.653, "rewards/accuracies": 0.625, "rewards/chosen": -0.3983010947704315, "rewards/margins": 0.22106428444385529, "rewards/rejected": -0.619365394115448, "step": 4290 }, { "epoch": 1.13, "learning_rate": 2.38982641150898e-06, "logits/chosen": -2.73397159576416, "logits/rejected": -2.705148696899414, "logps/chosen": -1355.8466796875, "logps/rejected": -1200.568603515625, "loss": 0.6744, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.34279078245162964, "rewards/margins": 0.10427943617105484, "rewards/rejected": -0.4470701813697815, "step": 4300 }, { "epoch": 1.13, "eval_logits/chosen": -2.7094671726226807, "eval_logits/rejected": -2.7008376121520996, "eval_logps/chosen": -1598.7427978515625, "eval_logps/rejected": -1402.7869873046875, "eval_loss": 0.6482938528060913, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.361937552690506, "eval_rewards/margins": 0.14928516745567322, "eval_rewards/rejected": -0.5112226605415344, "eval_runtime": 221.8751, "eval_samples_per_second": 9.014, "eval_steps_per_second": 0.284, "step": 4300 }, { "epoch": 1.13, "learning_rate": 2.3784180453824414e-06, "logits/chosen": -2.6598381996154785, "logits/rejected": -2.6440939903259277, "logps/chosen": -1149.7069091796875, "logps/rejected": -1251.6026611328125, "loss": 0.6159, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3814155161380768, "rewards/margins": 0.27214717864990234, "rewards/rejected": -0.6535626649856567, "step": 4310 }, { "epoch": 1.13, "learning_rate": 2.367012216545638e-06, "logits/chosen": -2.7360281944274902, "logits/rejected": -2.704324722290039, "logps/chosen": -1364.4183349609375, "logps/rejected": -1276.6588134765625, "loss": 0.6425, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3955642879009247, "rewards/margins": 0.0812029018998146, "rewards/rejected": -0.4767672121524811, "step": 4320 }, { "epoch": 1.13, "learning_rate": 2.3556091630264294e-06, "logits/chosen": -2.708026885986328, "logits/rejected": -2.715130567550659, "logps/chosen": -1570.659912109375, "logps/rejected": -1440.263427734375, "loss": 0.6338, "rewards/accuracies": 0.625, "rewards/chosen": -0.4111559987068176, "rewards/margins": 0.19120827317237854, "rewards/rejected": -0.6023643016815186, "step": 4330 }, { "epoch": 1.14, "learning_rate": 2.344209122794757e-06, "logits/chosen": -2.731186866760254, "logits/rejected": -2.7232601642608643, "logps/chosen": -1690.5205078125, "logps/rejected": -1551.1507568359375, "loss": 0.6155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.395660936832428, "rewards/margins": 0.2794150114059448, "rewards/rejected": -0.675075888633728, "step": 4340 }, { "epoch": 1.14, "learning_rate": 2.3328123337576787e-06, "logits/chosen": -2.6726715564727783, "logits/rejected": -2.6711506843566895, "logps/chosen": -1203.287109375, "logps/rejected": -1226.397705078125, "loss": 0.6488, "rewards/accuracies": 0.625, "rewards/chosen": -0.4064127504825592, "rewards/margins": 0.12786249816417694, "rewards/rejected": -0.534275233745575, "step": 4350 }, { "epoch": 1.14, "learning_rate": 2.3214190337544017e-06, "logits/chosen": -2.7237210273742676, "logits/rejected": -2.7012486457824707, "logps/chosen": -1373.9417724609375, "logps/rejected": -1111.353271484375, "loss": 0.6324, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3576543927192688, "rewards/margins": 0.24221619963645935, "rewards/rejected": -0.5998705625534058, "step": 4360 }, { "epoch": 1.14, "learning_rate": 2.310029460551323e-06, "logits/chosen": -2.670630693435669, "logits/rejected": -2.6639411449432373, "logps/chosen": -1617.3814697265625, "logps/rejected": -1109.9146728515625, "loss": 0.6155, "rewards/accuracies": 0.625, "rewards/chosen": -0.3107014298439026, "rewards/margins": 0.20053979754447937, "rewards/rejected": -0.5112412571907043, "step": 4370 }, { "epoch": 1.15, "learning_rate": 2.2986438518370645e-06, "logits/chosen": -2.6928963661193848, "logits/rejected": -2.675933361053467, "logps/chosen": -1699.2435302734375, "logps/rejected": -1564.343017578125, "loss": 0.6306, "rewards/accuracies": 0.625, "rewards/chosen": -0.40069809556007385, "rewards/margins": 0.17530310153961182, "rewards/rejected": -0.5760011672973633, "step": 4380 }, { "epoch": 1.15, "learning_rate": 2.2872624452175123e-06, "logits/chosen": -2.687253475189209, "logits/rejected": -2.6814191341400146, "logps/chosen": -1764.602783203125, "logps/rejected": -1355.3795166015625, "loss": 0.6642, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4385541081428528, "rewards/margins": 0.19212810695171356, "rewards/rejected": -0.6306821703910828, "step": 4390 }, { "epoch": 1.15, "learning_rate": 2.2758854782108584e-06, "logits/chosen": -2.6809728145599365, "logits/rejected": -2.6859257221221924, "logps/chosen": -1223.7222900390625, "logps/rejected": -1274.3193359375, "loss": 0.6355, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5250788927078247, "rewards/margins": 0.08925594389438629, "rewards/rejected": -0.6143348217010498, "step": 4400 }, { "epoch": 1.15, "eval_logits/chosen": -2.7000913619995117, "eval_logits/rejected": -2.691587209701538, "eval_logps/chosen": -1602.953125, "eval_logps/rejected": -1407.248046875, "eval_loss": 0.6476736068725586, "eval_rewards/accuracies": 0.6269841194152832, "eval_rewards/chosen": -0.4040408134460449, "eval_rewards/margins": 0.15179233253002167, "eval_rewards/rejected": -0.5558331608772278, "eval_runtime": 221.9412, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 4400 }, { "epoch": 1.15, "learning_rate": 2.2645131882426458e-06, "logits/chosen": -2.6515867710113525, "logits/rejected": -2.6461291313171387, "logps/chosen": -1675.511962890625, "logps/rejected": -1306.486083984375, "loss": 0.6526, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4507780969142914, "rewards/margins": 0.0885946974158287, "rewards/rejected": -0.5393728017807007, "step": 4410 }, { "epoch": 1.16, "learning_rate": 2.2531458126408154e-06, "logits/chosen": -2.696350574493408, "logits/rejected": -2.671020984649658, "logps/chosen": -1504.6510009765625, "logps/rejected": -1417.209228515625, "loss": 0.6406, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.44704046845436096, "rewards/margins": 0.16284236311912537, "rewards/rejected": -0.6098828315734863, "step": 4420 }, { "epoch": 1.16, "learning_rate": 2.2417835886307452e-06, "logits/chosen": -2.6964614391326904, "logits/rejected": -2.693498134613037, "logps/chosen": -1594.204345703125, "logps/rejected": -1482.2950439453125, "loss": 0.6424, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.412257581949234, "rewards/margins": 0.11970730125904083, "rewards/rejected": -0.5319648385047913, "step": 4430 }, { "epoch": 1.16, "learning_rate": 2.2304267533303075e-06, "logits/chosen": -2.7227253913879395, "logits/rejected": -2.717461109161377, "logps/chosen": -1820.9801025390625, "logps/rejected": -1700.9222412109375, "loss": 0.6213, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4056100845336914, "rewards/margins": 0.19048570096492767, "rewards/rejected": -0.5960958003997803, "step": 4440 }, { "epoch": 1.16, "learning_rate": 2.219075543744918e-06, "logits/chosen": -2.6950507164001465, "logits/rejected": -2.686513662338257, "logps/chosen": -1661.6273193359375, "logps/rejected": -1578.125, "loss": 0.6407, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3556327819824219, "rewards/margins": 0.23430833220481873, "rewards/rejected": -0.589941143989563, "step": 4450 }, { "epoch": 1.17, "learning_rate": 2.207730196762589e-06, "logits/chosen": -2.685410976409912, "logits/rejected": -2.6853833198547363, "logps/chosen": -1636.17236328125, "logps/rejected": -1485.104736328125, "loss": 0.6347, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3346371650695801, "rewards/margins": 0.1579541265964508, "rewards/rejected": -0.49259132146835327, "step": 4460 }, { "epoch": 1.17, "learning_rate": 2.1963909491489846e-06, "logits/chosen": -2.629254102706909, "logits/rejected": -2.631855010986328, "logps/chosen": -1437.640625, "logps/rejected": -1336.6815185546875, "loss": 0.6243, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.540132462978363, "rewards/margins": 0.1326352059841156, "rewards/rejected": -0.672767698764801, "step": 4470 }, { "epoch": 1.17, "learning_rate": 2.185058037542486e-06, "logits/chosen": -2.6665568351745605, "logits/rejected": -2.672071933746338, "logps/chosen": -1345.5667724609375, "logps/rejected": -1157.662109375, "loss": 0.6238, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3904740512371063, "rewards/margins": 0.15949508547782898, "rewards/rejected": -0.5499691367149353, "step": 4480 }, { "epoch": 1.18, "learning_rate": 2.173731698449244e-06, "logits/chosen": -2.7047767639160156, "logits/rejected": -2.7059977054595947, "logps/chosen": -1669.259765625, "logps/rejected": -1611.8948974609375, "loss": 0.6342, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39055395126342773, "rewards/margins": 0.24455150961875916, "rewards/rejected": -0.6351054310798645, "step": 4490 }, { "epoch": 1.18, "learning_rate": 2.1624121682382495e-06, "logits/chosen": -2.6913721561431885, "logits/rejected": -2.680323839187622, "logps/chosen": -1435.3963623046875, "logps/rejected": -1044.6759033203125, "loss": 0.6187, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42885956168174744, "rewards/margins": 0.17149262130260468, "rewards/rejected": -0.6003522276878357, "step": 4500 }, { "epoch": 1.18, "eval_logits/chosen": -2.6962525844573975, "eval_logits/rejected": -2.6882517337799072, "eval_logps/chosen": -1603.0440673828125, "eval_logps/rejected": -1407.0084228515625, "eval_loss": 0.6472293138504028, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.40495049953460693, "eval_rewards/margins": 0.14848746359348297, "eval_rewards/rejected": -0.5534379482269287, "eval_runtime": 222.0323, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 4500 }, { "epoch": 1.18, "learning_rate": 2.1510996831363993e-06, "logits/chosen": -2.654839277267456, "logits/rejected": -2.654435157775879, "logps/chosen": -1602.6588134765625, "logps/rejected": -1460.040771484375, "loss": 0.6251, "rewards/accuracies": 0.75, "rewards/chosen": -0.31933337450027466, "rewards/margins": 0.2544369697570801, "rewards/rejected": -0.5737703442573547, "step": 4510 }, { "epoch": 1.18, "learning_rate": 2.139794479223565e-06, "logits/chosen": -2.6878533363342285, "logits/rejected": -2.700411319732666, "logps/chosen": -1505.231201171875, "logps/rejected": -1499.4599609375, "loss": 0.6241, "rewards/accuracies": 0.625, "rewards/chosen": -0.3987555503845215, "rewards/margins": 0.07872674614191055, "rewards/rejected": -0.47748225927352905, "step": 4520 }, { "epoch": 1.19, "learning_rate": 2.128496792427669e-06, "logits/chosen": -2.702573299407959, "logits/rejected": -2.6977837085723877, "logps/chosen": -1317.4459228515625, "logps/rejected": -1278.5416259765625, "loss": 0.6258, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5213514566421509, "rewards/margins": 0.08871433138847351, "rewards/rejected": -0.610065758228302, "step": 4530 }, { "epoch": 1.19, "learning_rate": 2.117206858519758e-06, "logits/chosen": -2.70503568649292, "logits/rejected": -2.6909327507019043, "logps/chosen": -2104.00927734375, "logps/rejected": -1710.705078125, "loss": 0.627, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29126936197280884, "rewards/margins": 0.23307795822620392, "rewards/rejected": -0.5243473052978516, "step": 4540 }, { "epoch": 1.19, "learning_rate": 2.1059249131090844e-06, "logits/chosen": -2.7203164100646973, "logits/rejected": -2.7143332958221436, "logps/chosen": -1671.8541259765625, "logps/rejected": -1462.7237548828125, "loss": 0.6644, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35251596570014954, "rewards/margins": 0.12573233246803284, "rewards/rejected": -0.4782482981681824, "step": 4550 }, { "epoch": 1.19, "learning_rate": 2.094651191638189e-06, "logits/chosen": -2.7122576236724854, "logits/rejected": -2.712846517562866, "logps/chosen": -1451.910888671875, "logps/rejected": -1314.6732177734375, "loss": 0.6382, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3421178162097931, "rewards/margins": 0.19824132323265076, "rewards/rejected": -0.5403591990470886, "step": 4560 }, { "epoch": 1.2, "learning_rate": 2.0833859293779867e-06, "logits/chosen": -2.73149037361145, "logits/rejected": -2.7085766792297363, "logps/chosen": -1928.4527587890625, "logps/rejected": -1434.140380859375, "loss": 0.6435, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3224185109138489, "rewards/margins": 0.23348590731620789, "rewards/rejected": -0.5559044480323792, "step": 4570 }, { "epoch": 1.2, "learning_rate": 2.0721293614228568e-06, "logits/chosen": -2.691683292388916, "logits/rejected": -2.675884246826172, "logps/chosen": -1427.173828125, "logps/rejected": -1134.955078125, "loss": 0.6332, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2849566340446472, "rewards/margins": 0.2647608816623688, "rewards/rejected": -0.5497175455093384, "step": 4580 }, { "epoch": 1.2, "learning_rate": 2.060881722685742e-06, "logits/chosen": -2.730750560760498, "logits/rejected": -2.7235236167907715, "logps/chosen": -1560.0443115234375, "logps/rejected": -1230.85693359375, "loss": 0.6558, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.38310301303863525, "rewards/margins": 0.09956183284521103, "rewards/rejected": -0.48266488313674927, "step": 4590 }, { "epoch": 1.2, "learning_rate": 2.049643247893235e-06, "logits/chosen": -2.7023532390594482, "logits/rejected": -2.7045681476593018, "logps/chosen": -1575.8134765625, "logps/rejected": -1411.7012939453125, "loss": 0.6555, "rewards/accuracies": 0.5, "rewards/chosen": -0.5016661882400513, "rewards/margins": 0.09470875561237335, "rewards/rejected": -0.5963749289512634, "step": 4600 }, { "epoch": 1.2, "eval_logits/chosen": -2.716783285140991, "eval_logits/rejected": -2.707549810409546, "eval_logps/chosen": -1601.382568359375, "eval_logps/rejected": -1405.2078857421875, "eval_loss": 0.6472097635269165, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.38833513855934143, "eval_rewards/margins": 0.14709699153900146, "eval_rewards/rejected": -0.5354321002960205, "eval_runtime": 221.9388, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 4600 }, { "epoch": 1.21, "learning_rate": 2.0384141715806903e-06, "logits/chosen": -2.6752729415893555, "logits/rejected": -2.6681485176086426, "logps/chosen": -1369.5797119140625, "logps/rejected": -1186.574462890625, "loss": 0.6345, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3633851408958435, "rewards/margins": 0.12907521426677704, "rewards/rejected": -0.49246034026145935, "step": 4610 }, { "epoch": 1.21, "learning_rate": 2.0271947280873255e-06, "logits/chosen": -2.70173978805542, "logits/rejected": -2.6857197284698486, "logps/chosen": -1928.5341796875, "logps/rejected": -1503.6873779296875, "loss": 0.6447, "rewards/accuracies": 0.75, "rewards/chosen": -0.3070146143436432, "rewards/margins": 0.21328814327716827, "rewards/rejected": -0.5203026533126831, "step": 4620 }, { "epoch": 1.21, "learning_rate": 2.0159851515513302e-06, "logits/chosen": -2.7439606189727783, "logits/rejected": -2.725101947784424, "logps/chosen": -1525.56787109375, "logps/rejected": -1400.937255859375, "loss": 0.65, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3897179365158081, "rewards/margins": 0.20567400753498077, "rewards/rejected": -0.5953919291496277, "step": 4630 }, { "epoch": 1.21, "learning_rate": 2.004785675904982e-06, "logits/chosen": -2.7022523880004883, "logits/rejected": -2.7149405479431152, "logps/chosen": -1141.4708251953125, "logps/rejected": -1319.975341796875, "loss": 0.6594, "rewards/accuracies": 0.625, "rewards/chosen": -0.40314921736717224, "rewards/margins": 0.05639972165226936, "rewards/rejected": -0.4595489501953125, "step": 4640 }, { "epoch": 1.22, "learning_rate": 1.9935965348697624e-06, "logits/chosen": -2.7120418548583984, "logits/rejected": -2.6966214179992676, "logps/chosen": -1568.3426513671875, "logps/rejected": -1232.408447265625, "loss": 0.6226, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.382880836725235, "rewards/margins": 0.1441594809293747, "rewards/rejected": -0.5270403027534485, "step": 4650 }, { "epoch": 1.22, "learning_rate": 1.9824179619514807e-06, "logits/chosen": -2.712540864944458, "logits/rejected": -2.70318341255188, "logps/chosen": -1457.201904296875, "logps/rejected": -1480.9552001953125, "loss": 0.6325, "rewards/accuracies": 0.5, "rewards/chosen": -0.4275636076927185, "rewards/margins": 0.03508736938238144, "rewards/rejected": -0.46265095472335815, "step": 4660 }, { "epoch": 1.22, "learning_rate": 1.9712501904354004e-06, "logits/chosen": -2.711000919342041, "logits/rejected": -2.7133917808532715, "logps/chosen": -1551.674072265625, "logps/rejected": -1135.0037841796875, "loss": 0.6527, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4668704867362976, "rewards/margins": 0.1215348094701767, "rewards/rejected": -0.5884053111076355, "step": 4670 }, { "epoch": 1.22, "learning_rate": 1.960093453381369e-06, "logits/chosen": -2.6892762184143066, "logits/rejected": -2.684814214706421, "logps/chosen": -1495.062255859375, "logps/rejected": -1371.4951171875, "loss": 0.6442, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5431568026542664, "rewards/margins": 0.0739276260137558, "rewards/rejected": -0.6170844435691833, "step": 4680 }, { "epoch": 1.23, "learning_rate": 1.948947983618962e-06, "logits/chosen": -2.6771562099456787, "logits/rejected": -2.682755708694458, "logps/chosen": -1703.9365234375, "logps/rejected": -1336.865234375, "loss": 0.6401, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4650822579860687, "rewards/margins": 0.1413726508617401, "rewards/rejected": -0.6064549088478088, "step": 4690 }, { "epoch": 1.23, "learning_rate": 1.937814013742611e-06, "logits/chosen": -2.6763648986816406, "logits/rejected": -2.669142246246338, "logps/chosen": -1396.1729736328125, "logps/rejected": -1169.147705078125, "loss": 0.6178, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.45917263627052307, "rewards/margins": 0.19413240253925323, "rewards/rejected": -0.6533050537109375, "step": 4700 }, { "epoch": 1.23, "eval_logits/chosen": -2.700589179992676, "eval_logits/rejected": -2.6911513805389404, "eval_logps/chosen": -1602.476318359375, "eval_logps/rejected": -1405.8092041015625, "eval_loss": 0.6476128101348877, "eval_rewards/accuracies": 0.6190476417541504, "eval_rewards/chosen": -0.39927202463150024, "eval_rewards/margins": 0.1421724110841751, "eval_rewards/rejected": -0.5414443612098694, "eval_runtime": 222.0165, "eval_samples_per_second": 9.008, "eval_steps_per_second": 0.284, "step": 4700 }, { "epoch": 1.23, "learning_rate": 1.9266917761067617e-06, "logits/chosen": -2.6928534507751465, "logits/rejected": -2.695483922958374, "logps/chosen": -1367.698974609375, "logps/rejected": -1266.385986328125, "loss": 0.6361, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3876931071281433, "rewards/margins": 0.1019570380449295, "rewards/rejected": -0.489650160074234, "step": 4710 }, { "epoch": 1.24, "learning_rate": 1.915581502821017e-06, "logits/chosen": -2.7133240699768066, "logits/rejected": -2.7143406867980957, "logps/chosen": -1590.260009765625, "logps/rejected": -1481.0340576171875, "loss": 0.6478, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43118634819984436, "rewards/margins": 0.1839209347963333, "rewards/rejected": -0.6151072978973389, "step": 4720 }, { "epoch": 1.24, "learning_rate": 1.9044834257452997e-06, "logits/chosen": -2.6916699409484863, "logits/rejected": -2.6897857189178467, "logps/chosen": -1501.4925537109375, "logps/rejected": -1478.397216796875, "loss": 0.6044, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3613813817501068, "rewards/margins": 0.3248251974582672, "rewards/rejected": -0.6862064599990845, "step": 4730 }, { "epoch": 1.24, "learning_rate": 1.893397776485006e-06, "logits/chosen": -2.7368080615997314, "logits/rejected": -2.731428861618042, "logps/chosen": -1889.243408203125, "logps/rejected": -1275.2314453125, "loss": 0.6557, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35338732600212097, "rewards/margins": 0.1933208405971527, "rewards/rejected": -0.5467082262039185, "step": 4740 }, { "epoch": 1.24, "learning_rate": 1.8823247863861804e-06, "logits/chosen": -2.7085061073303223, "logits/rejected": -2.6927361488342285, "logps/chosen": -1784.7685546875, "logps/rejected": -1247.2198486328125, "loss": 0.6375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37581080198287964, "rewards/margins": 0.2545829117298126, "rewards/rejected": -0.6303936839103699, "step": 4750 }, { "epoch": 1.25, "learning_rate": 1.8712646865306822e-06, "logits/chosen": -2.6886403560638428, "logits/rejected": -2.6807615756988525, "logps/chosen": -1332.3145751953125, "logps/rejected": -1414.520751953125, "loss": 0.6221, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.40824103355407715, "rewards/margins": 0.08367902040481567, "rewards/rejected": -0.4919200539588928, "step": 4760 }, { "epoch": 1.25, "learning_rate": 1.8602177077313631e-06, "logits/chosen": -2.7041351795196533, "logits/rejected": -2.6894426345825195, "logps/chosen": -1375.96435546875, "logps/rejected": -1346.1580810546875, "loss": 0.636, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24864885210990906, "rewards/margins": 0.2993265390396118, "rewards/rejected": -0.5479754209518433, "step": 4770 }, { "epoch": 1.25, "learning_rate": 1.8491840805272546e-06, "logits/chosen": -2.7112724781036377, "logits/rejected": -2.718292474746704, "logps/chosen": -1560.8367919921875, "logps/rejected": -1538.97802734375, "loss": 0.6279, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43749088048934937, "rewards/margins": 0.12982013821601868, "rewards/rejected": -0.5673110485076904, "step": 4780 }, { "epoch": 1.25, "learning_rate": 1.8381640351787516e-06, "logits/chosen": -2.7069056034088135, "logits/rejected": -2.6937079429626465, "logps/chosen": -1586.96630859375, "logps/rejected": -1298.62890625, "loss": 0.6174, "rewards/accuracies": 0.75, "rewards/chosen": -0.38084354996681213, "rewards/margins": 0.25583982467651367, "rewards/rejected": -0.6366834044456482, "step": 4790 }, { "epoch": 1.26, "learning_rate": 1.8271578016628122e-06, "logits/chosen": -2.700934886932373, "logits/rejected": -2.695272207260132, "logps/chosen": -1409.424560546875, "logps/rejected": -1458.7320556640625, "loss": 0.6242, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39179927110671997, "rewards/margins": 0.16118505597114563, "rewards/rejected": -0.552984356880188, "step": 4800 }, { "epoch": 1.26, "eval_logits/chosen": -2.701568841934204, "eval_logits/rejected": -2.6917405128479004, "eval_logps/chosen": -1605.5714111328125, "eval_logps/rejected": -1409.126708984375, "eval_loss": 0.6477026343345642, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.4302244782447815, "eval_rewards/margins": 0.14439751207828522, "eval_rewards/rejected": -0.5746219158172607, "eval_runtime": 222.0078, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 4800 }, { "epoch": 1.26, "learning_rate": 1.8161656096681546e-06, "logits/chosen": -2.6858129501342773, "logits/rejected": -2.65826416015625, "logps/chosen": -1292.4794921875, "logps/rejected": -886.6090087890625, "loss": 0.635, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5286234021186829, "rewards/margins": 0.1071949228644371, "rewards/rejected": -0.6358182430267334, "step": 4810 }, { "epoch": 1.26, "learning_rate": 1.8051876885904645e-06, "logits/chosen": -2.6940348148345947, "logits/rejected": -2.668518304824829, "logps/chosen": -1209.8846435546875, "logps/rejected": -1209.081298828125, "loss": 0.6463, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.4978295862674713, "rewards/margins": 0.07090970128774643, "rewards/rejected": -0.5687392950057983, "step": 4820 }, { "epoch": 1.26, "learning_rate": 1.7942242675276098e-06, "logits/chosen": -2.691037893295288, "logits/rejected": -2.698622941970825, "logps/chosen": -1303.959228515625, "logps/rejected": -1136.2342529296875, "loss": 0.6618, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4843706488609314, "rewards/margins": 0.10510985553264618, "rewards/rejected": -0.5894805788993835, "step": 4830 }, { "epoch": 1.27, "learning_rate": 1.783275575274856e-06, "logits/chosen": -2.7178115844726562, "logits/rejected": -2.704846143722534, "logps/chosen": -1262.106689453125, "logps/rejected": -1242.8951416015625, "loss": 0.645, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4795067310333252, "rewards/margins": 0.03653149679303169, "rewards/rejected": -0.5160382390022278, "step": 4840 }, { "epoch": 1.27, "learning_rate": 1.7723418403200943e-06, "logits/chosen": -2.6975011825561523, "logits/rejected": -2.6939289569854736, "logps/chosen": -1763.947265625, "logps/rejected": -1696.672607421875, "loss": 0.6684, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.46184077858924866, "rewards/margins": 0.04751107469201088, "rewards/rejected": -0.509351909160614, "step": 4850 }, { "epoch": 1.27, "learning_rate": 1.7614232908390748e-06, "logits/chosen": -2.7259678840637207, "logits/rejected": -2.714102268218994, "logps/chosen": -1708.26953125, "logps/rejected": -1277.268798828125, "loss": 0.6668, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.339743971824646, "rewards/margins": 0.13007903099060059, "rewards/rejected": -0.4698229730129242, "step": 4860 }, { "epoch": 1.27, "learning_rate": 1.7505201546906398e-06, "logits/chosen": -2.728283405303955, "logits/rejected": -2.717923641204834, "logps/chosen": -1363.5814208984375, "logps/rejected": -1250.927734375, "loss": 0.6323, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3779984414577484, "rewards/margins": 0.18015049397945404, "rewards/rejected": -0.5581489205360413, "step": 4870 }, { "epoch": 1.28, "learning_rate": 1.7396326594119717e-06, "logits/chosen": -2.637516975402832, "logits/rejected": -2.659529209136963, "logps/chosen": -1411.4888916015625, "logps/rejected": -1366.10205078125, "loss": 0.6638, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.36083534359931946, "rewards/margins": 0.07913025468587875, "rewards/rejected": -0.4399656355381012, "step": 4880 }, { "epoch": 1.28, "learning_rate": 1.7287610322138449e-06, "logits/chosen": -2.7112040519714355, "logits/rejected": -2.6832072734832764, "logps/chosen": -1514.82177734375, "logps/rejected": -1104.4573974609375, "loss": 0.6283, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35094964504241943, "rewards/margins": 0.20894651114940643, "rewards/rejected": -0.5598961710929871, "step": 4890 }, { "epoch": 1.28, "learning_rate": 1.7179054999758817e-06, "logits/chosen": -2.6823577880859375, "logits/rejected": -2.677356243133545, "logps/chosen": -1602.299072265625, "logps/rejected": -1288.768310546875, "loss": 0.6221, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35732200741767883, "rewards/margins": 0.25401774048805237, "rewards/rejected": -0.6113396883010864, "step": 4900 }, { "epoch": 1.28, "eval_logits/chosen": -2.716733694076538, "eval_logits/rejected": -2.707334041595459, "eval_logps/chosen": -1601.0272216796875, "eval_logps/rejected": -1404.6871337890625, "eval_loss": 0.6463930606842041, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.38478225469589233, "eval_rewards/margins": 0.14544257521629333, "eval_rewards/rejected": -0.5302248001098633, "eval_runtime": 221.9549, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 4900 }, { "epoch": 1.29, "learning_rate": 1.7070662892418225e-06, "logits/chosen": -2.7210943698883057, "logits/rejected": -2.6983580589294434, "logps/chosen": -1570.259521484375, "logps/rejected": -1662.6558837890625, "loss": 0.6416, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4329853951931, "rewards/margins": 0.20024752616882324, "rewards/rejected": -0.6332329511642456, "step": 4910 }, { "epoch": 1.29, "learning_rate": 1.6962436262147913e-06, "logits/chosen": -2.7212061882019043, "logits/rejected": -2.7054669857025146, "logps/chosen": -1921.957763671875, "logps/rejected": -1697.683349609375, "loss": 0.6421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2484970986843109, "rewards/margins": 0.2698536217212677, "rewards/rejected": -0.5183507204055786, "step": 4920 }, { "epoch": 1.29, "learning_rate": 1.6854377367525814e-06, "logits/chosen": -2.6766185760498047, "logits/rejected": -2.678335428237915, "logps/chosen": -1425.6380615234375, "logps/rejected": -1139.2139892578125, "loss": 0.6755, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5290840268135071, "rewards/margins": 0.013314229436218739, "rewards/rejected": -0.54239821434021, "step": 4930 }, { "epoch": 1.29, "learning_rate": 1.6746488463629362e-06, "logits/chosen": -2.6874043941497803, "logits/rejected": -2.697096824645996, "logps/chosen": -1432.6015625, "logps/rejected": -1319.1143798828125, "loss": 0.6488, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4031291902065277, "rewards/margins": 0.18114542961120605, "rewards/rejected": -0.5842746496200562, "step": 4940 }, { "epoch": 1.3, "learning_rate": 1.6638771801988483e-06, "logits/chosen": -2.7338335514068604, "logits/rejected": -2.7252652645111084, "logps/chosen": -1705.234375, "logps/rejected": -1308.44384765625, "loss": 0.6515, "rewards/accuracies": 0.625, "rewards/chosen": -0.38323846459388733, "rewards/margins": 0.1837080419063568, "rewards/rejected": -0.5669465065002441, "step": 4950 }, { "epoch": 1.3, "learning_rate": 1.653122963053857e-06, "logits/chosen": -2.679515838623047, "logits/rejected": -2.6913959980010986, "logps/chosen": -1297.17138671875, "logps/rejected": -1433.731201171875, "loss": 0.663, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.36984187364578247, "rewards/margins": 0.11996430158615112, "rewards/rejected": -0.48980623483657837, "step": 4960 }, { "epoch": 1.3, "learning_rate": 1.6423864193573606e-06, "logits/chosen": -2.732271909713745, "logits/rejected": -2.7238974571228027, "logps/chosen": -1768.8033447265625, "logps/rejected": -1411.5208740234375, "loss": 0.6216, "rewards/accuracies": 0.625, "rewards/chosen": -0.5462476015090942, "rewards/margins": 0.15834124386310577, "rewards/rejected": -0.7045888304710388, "step": 4970 }, { "epoch": 1.3, "learning_rate": 1.6316677731699286e-06, "logits/chosen": -2.7058351039886475, "logits/rejected": -2.678009271621704, "logps/chosen": -1341.9267578125, "logps/rejected": -1204.485595703125, "loss": 0.614, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4298137128353119, "rewards/margins": 0.18093213438987732, "rewards/rejected": -0.6107458472251892, "step": 4980 }, { "epoch": 1.31, "learning_rate": 1.6209672481786302e-06, "logits/chosen": -2.707846164703369, "logits/rejected": -2.6903176307678223, "logps/chosen": -1449.9866943359375, "logps/rejected": -1365.045166015625, "loss": 0.6434, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.43810850381851196, "rewards/margins": 0.18651129305362701, "rewards/rejected": -0.6246197819709778, "step": 4990 }, { "epoch": 1.31, "learning_rate": 1.6102850676923616e-06, "logits/chosen": -2.7575032711029053, "logits/rejected": -2.7509520053863525, "logps/chosen": -1555.2666015625, "logps/rejected": -1557.8531494140625, "loss": 0.6582, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.43144112825393677, "rewards/margins": 0.06042899936437607, "rewards/rejected": -0.49187007546424866, "step": 5000 }, { "epoch": 1.31, "eval_logits/chosen": -2.7267844676971436, "eval_logits/rejected": -2.717449903488159, "eval_logps/chosen": -1602.501220703125, "eval_logps/rejected": -1406.292724609375, "eval_loss": 0.6459673643112183, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.39952224493026733, "eval_rewards/margins": 0.14675946533679962, "eval_rewards/rejected": -0.5462816953659058, "eval_runtime": 222.1193, "eval_samples_per_second": 9.004, "eval_steps_per_second": 0.284, "step": 5000 }, { "epoch": 1.31, "learning_rate": 1.5996214546371888e-06, "logits/chosen": -2.741490602493286, "logits/rejected": -2.730355978012085, "logps/chosen": -1695.7152099609375, "logps/rejected": -1342.337158203125, "loss": 0.6385, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36179646849632263, "rewards/margins": 0.20229323208332062, "rewards/rejected": -0.5640896558761597, "step": 5010 }, { "epoch": 1.31, "learning_rate": 1.588976631551697e-06, "logits/chosen": -2.6824703216552734, "logits/rejected": -2.6855571269989014, "logps/chosen": -1382.963623046875, "logps/rejected": -1372.467041015625, "loss": 0.6429, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4259725511074066, "rewards/margins": 0.19692833721637726, "rewards/rejected": -0.6229008436203003, "step": 5020 }, { "epoch": 1.32, "learning_rate": 1.5783508205823412e-06, "logits/chosen": -2.7435383796691895, "logits/rejected": -2.740779399871826, "logps/chosen": -1508.678466796875, "logps/rejected": -1334.25, "loss": 0.6218, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.382813036441803, "rewards/margins": 0.24260124564170837, "rewards/rejected": -0.625414252281189, "step": 5030 }, { "epoch": 1.32, "learning_rate": 1.5677442434788143e-06, "logits/chosen": -2.7308874130249023, "logits/rejected": -2.7374587059020996, "logps/chosen": -1877.0191650390625, "logps/rejected": -1829.716796875, "loss": 0.6812, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4555627703666687, "rewards/margins": 0.05523302033543587, "rewards/rejected": -0.5107957720756531, "step": 5040 }, { "epoch": 1.32, "learning_rate": 1.5571571215894181e-06, "logits/chosen": -2.740858316421509, "logits/rejected": -2.7242140769958496, "logps/chosen": -1411.8358154296875, "logps/rejected": -1487.0919189453125, "loss": 0.6256, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36944401264190674, "rewards/margins": 0.280577152967453, "rewards/rejected": -0.6500211954116821, "step": 5050 }, { "epoch": 1.32, "learning_rate": 1.5465896758564452e-06, "logits/chosen": -2.7520554065704346, "logits/rejected": -2.75130295753479, "logps/chosen": -1459.7698974609375, "logps/rejected": -1445.945556640625, "loss": 0.6289, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2901294529438019, "rewards/margins": 0.2617509663105011, "rewards/rejected": -0.551880419254303, "step": 5060 }, { "epoch": 1.33, "learning_rate": 1.5360421268115653e-06, "logits/chosen": -2.719130277633667, "logits/rejected": -2.718916177749634, "logps/chosen": -1367.1160888671875, "logps/rejected": -1231.0194091796875, "loss": 0.6296, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.273499459028244, "rewards/margins": 0.24340489506721497, "rewards/rejected": -0.516904354095459, "step": 5070 }, { "epoch": 1.33, "learning_rate": 1.5255146945712267e-06, "logits/chosen": -2.7351174354553223, "logits/rejected": -2.7053210735321045, "logps/chosen": -1379.465576171875, "logps/rejected": -1388.5853271484375, "loss": 0.6134, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40679627656936646, "rewards/margins": 0.19303181767463684, "rewards/rejected": -0.5998281240463257, "step": 5080 }, { "epoch": 1.33, "learning_rate": 1.5150075988320594e-06, "logits/chosen": -2.7461514472961426, "logits/rejected": -2.732804536819458, "logps/chosen": -1556.8673095703125, "logps/rejected": -1557.4312744140625, "loss": 0.6263, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.39684033393859863, "rewards/margins": 0.14313337206840515, "rewards/rejected": -0.539973795413971, "step": 5090 }, { "epoch": 1.33, "learning_rate": 1.5045210588662929e-06, "logits/chosen": -2.723940849304199, "logits/rejected": -2.7165207862854004, "logps/chosen": -1319.94970703125, "logps/rejected": -1179.8677978515625, "loss": 0.6276, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3562172055244446, "rewards/margins": 0.20867054164409637, "rewards/rejected": -0.5648878216743469, "step": 5100 }, { "epoch": 1.33, "eval_logits/chosen": -2.728083372116089, "eval_logits/rejected": -2.7191505432128906, "eval_logps/chosen": -1603.0245361328125, "eval_logps/rejected": -1407.0914306640625, "eval_loss": 0.6458316445350647, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.4047529995441437, "eval_rewards/margins": 0.1495141237974167, "eval_rewards/rejected": -0.5542671084403992, "eval_runtime": 221.9733, "eval_samples_per_second": 9.01, "eval_steps_per_second": 0.284, "step": 5100 }, { "epoch": 1.34, "learning_rate": 1.4940552935171781e-06, "logits/chosen": -2.7510228157043457, "logits/rejected": -2.733030319213867, "logps/chosen": -1656.7183837890625, "logps/rejected": -1355.869140625, "loss": 0.6528, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3592923879623413, "rewards/margins": 0.11819101870059967, "rewards/rejected": -0.4774834215641022, "step": 5110 }, { "epoch": 1.34, "learning_rate": 1.483610521194419e-06, "logits/chosen": -2.7261557579040527, "logits/rejected": -2.69697642326355, "logps/chosen": -1745.705322265625, "logps/rejected": -1580.485595703125, "loss": 0.6212, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.409809410572052, "rewards/margins": 0.17661479115486145, "rewards/rejected": -0.5864241719245911, "step": 5120 }, { "epoch": 1.34, "learning_rate": 1.4731869598696226e-06, "logits/chosen": -2.73225998878479, "logits/rejected": -2.716576099395752, "logps/chosen": -1597.790771484375, "logps/rejected": -1318.602294921875, "loss": 0.6156, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3756243586540222, "rewards/margins": 0.22246117889881134, "rewards/rejected": -0.5980855226516724, "step": 5130 }, { "epoch": 1.35, "learning_rate": 1.4627848270717387e-06, "logits/chosen": -2.7219340801239014, "logits/rejected": -2.7200512886047363, "logps/chosen": -1253.752197265625, "logps/rejected": -1163.3043212890625, "loss": 0.6312, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.45713797211647034, "rewards/margins": 0.22213175892829895, "rewards/rejected": -0.6792697310447693, "step": 5140 }, { "epoch": 1.35, "learning_rate": 1.4524043398825277e-06, "logits/chosen": -2.761448621749878, "logits/rejected": -2.7348995208740234, "logps/chosen": -1840.7236328125, "logps/rejected": -1707.0986328125, "loss": 0.6369, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2975284457206726, "rewards/margins": 0.1861899346113205, "rewards/rejected": -0.4837183952331543, "step": 5150 }, { "epoch": 1.35, "learning_rate": 1.4420457149320299e-06, "logits/chosen": -2.703780174255371, "logits/rejected": -2.7139551639556885, "logps/chosen": -1718.882080078125, "logps/rejected": -1578.334716796875, "loss": 0.6357, "rewards/accuracies": 0.625, "rewards/chosen": -0.3426080644130707, "rewards/margins": 0.20360854268074036, "rewards/rejected": -0.5462166666984558, "step": 5160 }, { "epoch": 1.35, "learning_rate": 1.431709168394042e-06, "logits/chosen": -2.7347803115844727, "logits/rejected": -2.735466480255127, "logps/chosen": -1229.0181884765625, "logps/rejected": -1180.7528076171875, "loss": 0.6334, "rewards/accuracies": 0.625, "rewards/chosen": -0.385633647441864, "rewards/margins": 0.1824651062488556, "rewards/rejected": -0.5680987238883972, "step": 5170 }, { "epoch": 1.36, "learning_rate": 1.4213949159816059e-06, "logits/chosen": -2.721846580505371, "logits/rejected": -2.7203266620635986, "logps/chosen": -1505.1744384765625, "logps/rejected": -1412.5858154296875, "loss": 0.6487, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3777450621128082, "rewards/margins": 0.15947818756103516, "rewards/rejected": -0.537223219871521, "step": 5180 }, { "epoch": 1.36, "learning_rate": 1.4111031729425103e-06, "logits/chosen": -2.702693462371826, "logits/rejected": -2.708313226699829, "logps/chosen": -1602.787109375, "logps/rejected": -1448.283447265625, "loss": 0.6541, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5254753232002258, "rewards/margins": 0.0998118668794632, "rewards/rejected": -0.6252871751785278, "step": 5190 }, { "epoch": 1.36, "learning_rate": 1.4008341540547965e-06, "logits/chosen": -2.7064757347106934, "logits/rejected": -2.6883506774902344, "logps/chosen": -1634.73828125, "logps/rejected": -1369.5660400390625, "loss": 0.6573, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.37241652607917786, "rewards/margins": 0.14691108465194702, "rewards/rejected": -0.5193276405334473, "step": 5200 }, { "epoch": 1.36, "eval_logits/chosen": -2.7229835987091064, "eval_logits/rejected": -2.7142302989959717, "eval_logps/chosen": -1603.234375, "eval_logps/rejected": -1407.468017578125, "eval_loss": 0.6451988816261292, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.40685272216796875, "eval_rewards/margins": 0.15118181705474854, "eval_rewards/rejected": -0.5580345392227173, "eval_runtime": 221.9736, "eval_samples_per_second": 9.01, "eval_steps_per_second": 0.284, "step": 5200 }, { "epoch": 1.36, "learning_rate": 1.3905880736222737e-06, "logits/chosen": -2.7218070030212402, "logits/rejected": -2.7174899578094482, "logps/chosen": -1455.731689453125, "logps/rejected": -1164.8289794921875, "loss": 0.6396, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.40525612235069275, "rewards/margins": 0.162841796875, "rewards/rejected": -0.5680979490280151, "step": 5210 }, { "epoch": 1.37, "learning_rate": 1.3803651454700531e-06, "logits/chosen": -2.705242872238159, "logits/rejected": -2.692960023880005, "logps/chosen": -1358.7576904296875, "logps/rejected": -1273.019287109375, "loss": 0.6561, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.42546719312667847, "rewards/margins": 0.03913971036672592, "rewards/rejected": -0.4646069407463074, "step": 5220 }, { "epoch": 1.37, "learning_rate": 1.3701655829400773e-06, "logits/chosen": -2.7341837882995605, "logits/rejected": -2.7173619270324707, "logps/chosen": -1405.4193115234375, "logps/rejected": -1325.1807861328125, "loss": 0.6365, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.5209168195724487, "rewards/margins": 0.011190772987902164, "rewards/rejected": -0.5321077108383179, "step": 5230 }, { "epoch": 1.37, "learning_rate": 1.3599895988866756e-06, "logits/chosen": -2.714791774749756, "logits/rejected": -2.696171283721924, "logps/chosen": -1703.659912109375, "logps/rejected": -1553.957275390625, "loss": 0.6615, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.39020878076553345, "rewards/margins": 0.12071947753429413, "rewards/rejected": -0.5109282732009888, "step": 5240 }, { "epoch": 1.37, "learning_rate": 1.3498374056721198e-06, "logits/chosen": -2.6696650981903076, "logits/rejected": -2.650538682937622, "logps/chosen": -1428.1317138671875, "logps/rejected": -1197.024658203125, "loss": 0.6233, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4180733561515808, "rewards/margins": 0.23636317253112793, "rewards/rejected": -0.6544365286827087, "step": 5250 }, { "epoch": 1.38, "learning_rate": 1.3397092151621883e-06, "logits/chosen": -2.7149243354797363, "logits/rejected": -2.6980550289154053, "logps/chosen": -1731.099609375, "logps/rejected": -1579.0975341796875, "loss": 0.6072, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.41332024335861206, "rewards/margins": 0.19287510216236115, "rewards/rejected": -0.606195330619812, "step": 5260 }, { "epoch": 1.38, "learning_rate": 1.3296052387217484e-06, "logits/chosen": -2.7171783447265625, "logits/rejected": -2.72312331199646, "logps/chosen": -1473.4296875, "logps/rejected": -1383.3031005859375, "loss": 0.6632, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.37961164116859436, "rewards/margins": 0.08137451112270355, "rewards/rejected": -0.4609861373901367, "step": 5270 }, { "epoch": 1.38, "learning_rate": 1.3195256872103476e-06, "logits/chosen": -2.7354798316955566, "logits/rejected": -2.7498269081115723, "logps/chosen": -1588.6763916015625, "logps/rejected": -1422.8865966796875, "loss": 0.6343, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.42312923073768616, "rewards/margins": 0.16869792342185974, "rewards/rejected": -0.5918271541595459, "step": 5280 }, { "epoch": 1.38, "learning_rate": 1.3094707709778068e-06, "logits/chosen": -2.6907153129577637, "logits/rejected": -2.6785695552825928, "logps/chosen": -1365.05224609375, "logps/rejected": -1313.2744140625, "loss": 0.6147, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.47066640853881836, "rewards/margins": 0.15152886509895325, "rewards/rejected": -0.622195303440094, "step": 5290 }, { "epoch": 1.39, "learning_rate": 1.2994406998598364e-06, "logits/chosen": -2.7071175575256348, "logits/rejected": -2.7032291889190674, "logps/chosen": -1262.134765625, "logps/rejected": -1245.59765625, "loss": 0.6672, "rewards/accuracies": 0.5, "rewards/chosen": -0.5115293860435486, "rewards/margins": 0.11049805581569672, "rewards/rejected": -0.6220273971557617, "step": 5300 }, { "epoch": 1.39, "eval_logits/chosen": -2.707981586456299, "eval_logits/rejected": -2.6997311115264893, "eval_logps/chosen": -1602.744140625, "eval_logps/rejected": -1406.7059326171875, "eval_loss": 0.6457715034484863, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.4019514322280884, "eval_rewards/margins": 0.14846062660217285, "eval_rewards/rejected": -0.5504120588302612, "eval_runtime": 222.0084, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 5300 }, { "epoch": 1.39, "learning_rate": 1.2894356831736558e-06, "logits/chosen": -2.7404913902282715, "logits/rejected": -2.7101473808288574, "logps/chosen": -1680.30859375, "logps/rejected": -1509.7115478515625, "loss": 0.6541, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5058525800704956, "rewards/margins": 0.10094550997018814, "rewards/rejected": -0.6067981123924255, "step": 5310 }, { "epoch": 1.39, "learning_rate": 1.2794559297136203e-06, "logits/chosen": -2.7266457080841064, "logits/rejected": -2.7247345447540283, "logps/chosen": -1590.14111328125, "logps/rejected": -1474.99755859375, "loss": 0.6407, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41412800550460815, "rewards/margins": 0.21516411006450653, "rewards/rejected": -0.6292921304702759, "step": 5320 }, { "epoch": 1.39, "learning_rate": 1.2695016477468724e-06, "logits/chosen": -2.690953493118286, "logits/rejected": -2.6827735900878906, "logps/chosen": -1386.6346435546875, "logps/rejected": -1365.217041015625, "loss": 0.6586, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.48004859685897827, "rewards/margins": 0.21122178435325623, "rewards/rejected": -0.6912704110145569, "step": 5330 }, { "epoch": 1.4, "learning_rate": 1.2595730450089874e-06, "logits/chosen": -2.700653076171875, "logits/rejected": -2.708189010620117, "logps/chosen": -1437.6949462890625, "logps/rejected": -1425.35546875, "loss": 0.6465, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.37449318170547485, "rewards/margins": 0.10607191175222397, "rewards/rejected": -0.4805651307106018, "step": 5340 }, { "epoch": 1.4, "learning_rate": 1.2496703286996433e-06, "logits/chosen": -2.662972927093506, "logits/rejected": -2.651554584503174, "logps/chosen": -1539.100830078125, "logps/rejected": -1469.132080078125, "loss": 0.6183, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42751431465148926, "rewards/margins": 0.19100254774093628, "rewards/rejected": -0.6185168623924255, "step": 5350 }, { "epoch": 1.4, "learning_rate": 1.2397937054782961e-06, "logits/chosen": -2.6878347396850586, "logits/rejected": -2.7087759971618652, "logps/chosen": -1573.5694580078125, "logps/rejected": -1411.982666015625, "loss": 0.6867, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4747236669063568, "rewards/margins": 0.060199446976184845, "rewards/rejected": -0.5349230766296387, "step": 5360 }, { "epoch": 1.41, "learning_rate": 1.2299433814598635e-06, "logits/chosen": -2.719141960144043, "logits/rejected": -2.701843738555908, "logps/chosen": -1555.9644775390625, "logps/rejected": -1262.3304443359375, "loss": 0.6165, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.47410351037979126, "rewards/margins": 0.08914776146411896, "rewards/rejected": -0.5632511973381042, "step": 5370 }, { "epoch": 1.41, "learning_rate": 1.2201195622104265e-06, "logits/chosen": -2.7186007499694824, "logits/rejected": -2.7116668224334717, "logps/chosen": -1402.317138671875, "logps/rejected": -1303.7572021484375, "loss": 0.6388, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3963562846183777, "rewards/margins": 0.08322183787822723, "rewards/rejected": -0.4795781672000885, "step": 5380 }, { "epoch": 1.41, "learning_rate": 1.2103224527429417e-06, "logits/chosen": -2.724838972091675, "logits/rejected": -2.7124266624450684, "logps/chosen": -1474.955322265625, "logps/rejected": -1274.307861328125, "loss": 0.6386, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4477899670600891, "rewards/margins": 0.18380063772201538, "rewards/rejected": -0.6315906047821045, "step": 5390 }, { "epoch": 1.41, "learning_rate": 1.2005522575129559e-06, "logits/chosen": -2.6977944374084473, "logits/rejected": -2.684950113296509, "logps/chosen": -1461.167724609375, "logps/rejected": -1330.77490234375, "loss": 0.6112, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3553297817707062, "rewards/margins": 0.2838284373283386, "rewards/rejected": -0.6391581892967224, "step": 5400 }, { "epoch": 1.41, "eval_logits/chosen": -2.70361590385437, "eval_logits/rejected": -2.695265054702759, "eval_logps/chosen": -1602.899658203125, "eval_logps/rejected": -1406.76318359375, "eval_loss": 0.6459503173828125, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.4035067558288574, "eval_rewards/margins": 0.14747834205627441, "eval_rewards/rejected": -0.5509850978851318, "eval_runtime": 221.9833, "eval_samples_per_second": 9.01, "eval_steps_per_second": 0.284, "step": 5400 }, { "epoch": 1.42, "learning_rate": 1.1908091804143469e-06, "logits/chosen": -2.6938157081604004, "logits/rejected": -2.686267375946045, "logps/chosen": -1382.980224609375, "logps/rejected": -1162.074951171875, "loss": 0.6195, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3829803168773651, "rewards/margins": 0.22588439285755157, "rewards/rejected": -0.6088647246360779, "step": 5410 }, { "epoch": 1.42, "learning_rate": 1.1810934247750649e-06, "logits/chosen": -2.6722521781921387, "logits/rejected": -2.6724560260772705, "logps/chosen": -1091.403076171875, "logps/rejected": -973.5833740234375, "loss": 0.6671, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.44891077280044556, "rewards/margins": 0.09966419637203217, "rewards/rejected": -0.5485749244689941, "step": 5420 }, { "epoch": 1.42, "learning_rate": 1.1714051933528881e-06, "logits/chosen": -2.7134673595428467, "logits/rejected": -2.686100482940674, "logps/chosen": -1661.9583740234375, "logps/rejected": -1192.046875, "loss": 0.6279, "rewards/accuracies": 0.625, "rewards/chosen": -0.36968302726745605, "rewards/margins": 0.10744525492191315, "rewards/rejected": -0.477128267288208, "step": 5430 }, { "epoch": 1.42, "learning_rate": 1.161744688331192e-06, "logits/chosen": -2.691920518875122, "logits/rejected": -2.6858606338500977, "logps/chosen": -1821.1956787109375, "logps/rejected": -1636.1937255859375, "loss": 0.6193, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.44910621643066406, "rewards/margins": 0.11931423842906952, "rewards/rejected": -0.56842041015625, "step": 5440 }, { "epoch": 1.43, "learning_rate": 1.152112111314733e-06, "logits/chosen": -2.6686320304870605, "logits/rejected": -2.6621997356414795, "logps/chosen": -1850.287841796875, "logps/rejected": -1289.6005859375, "loss": 0.6289, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37908318638801575, "rewards/margins": 0.15685948729515076, "rewards/rejected": -0.5359426736831665, "step": 5450 }, { "epoch": 1.43, "learning_rate": 1.142507663325439e-06, "logits/chosen": -2.682161808013916, "logits/rejected": -2.6786532402038574, "logps/chosen": -1570.059814453125, "logps/rejected": -1518.0584716796875, "loss": 0.6434, "rewards/accuracies": 0.625, "rewards/chosen": -0.3491072356700897, "rewards/margins": 0.21276946365833282, "rewards/rejected": -0.5618767142295837, "step": 5460 }, { "epoch": 1.43, "learning_rate": 1.132931544798211e-06, "logits/chosen": -2.7180349826812744, "logits/rejected": -2.691465139389038, "logps/chosen": -1448.842041015625, "logps/rejected": -1093.453369140625, "loss": 0.635, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3692038655281067, "rewards/margins": 0.17854595184326172, "rewards/rejected": -0.5477498173713684, "step": 5470 }, { "epoch": 1.43, "learning_rate": 1.1233839555767482e-06, "logits/chosen": -2.695664405822754, "logits/rejected": -2.6881296634674072, "logps/chosen": -1042.5235595703125, "logps/rejected": -1147.806884765625, "loss": 0.6391, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.34199172258377075, "rewards/margins": 0.10589548200368881, "rewards/rejected": -0.44788724184036255, "step": 5480 }, { "epoch": 1.44, "learning_rate": 1.1138650949093668e-06, "logits/chosen": -2.66930890083313, "logits/rejected": -2.6495189666748047, "logps/chosen": -1223.517333984375, "logps/rejected": -1291.786376953125, "loss": 0.6465, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35710740089416504, "rewards/margins": 0.2592705488204956, "rewards/rejected": -0.6163779497146606, "step": 5490 }, { "epoch": 1.44, "learning_rate": 1.1043751614448543e-06, "logits/chosen": -2.736130475997925, "logits/rejected": -2.7363991737365723, "logps/chosen": -1665.0625, "logps/rejected": -1605.57861328125, "loss": 0.6421, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3382241725921631, "rewards/margins": 0.17074397206306458, "rewards/rejected": -0.5089680552482605, "step": 5500 }, { "epoch": 1.44, "eval_logits/chosen": -2.708101987838745, "eval_logits/rejected": -2.6991231441497803, "eval_logps/chosen": -1601.6962890625, "eval_logps/rejected": -1405.801025390625, "eval_loss": 0.6449205875396729, "eval_rewards/accuracies": 0.6408730149269104, "eval_rewards/chosen": -0.3914722204208374, "eval_rewards/margins": 0.14989058673381805, "eval_rewards/rejected": -0.5413628220558167, "eval_runtime": 222.0008, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 5500 }, { "epoch": 1.44, "learning_rate": 1.0949143532283107e-06, "logits/chosen": -2.72534441947937, "logits/rejected": -2.692267656326294, "logps/chosen": -1723.418701171875, "logps/rejected": -1653.270751953125, "loss": 0.6283, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.31096896529197693, "rewards/margins": 0.22809436917304993, "rewards/rejected": -0.5390633344650269, "step": 5510 }, { "epoch": 1.44, "learning_rate": 1.0854828676970275e-06, "logits/chosen": -2.719973087310791, "logits/rejected": -2.6971845626831055, "logps/chosen": -1367.62109375, "logps/rejected": -1190.7325439453125, "loss": 0.6439, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4206356108188629, "rewards/margins": 0.029691871255636215, "rewards/rejected": -0.45032748579978943, "step": 5520 }, { "epoch": 1.45, "learning_rate": 1.076080901676361e-06, "logits/chosen": -2.710170269012451, "logits/rejected": -2.699993848800659, "logps/chosen": -1609.293212890625, "logps/rejected": -1397.969970703125, "loss": 0.6515, "rewards/accuracies": 0.625, "rewards/chosen": -0.4713289141654968, "rewards/margins": 0.12276891618967056, "rewards/rejected": -0.5940978527069092, "step": 5530 }, { "epoch": 1.45, "learning_rate": 1.0667086513756234e-06, "logits/chosen": -2.6901488304138184, "logits/rejected": -2.6973462104797363, "logps/chosen": -1350.40283203125, "logps/rejected": -1136.89599609375, "loss": 0.6265, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3778039813041687, "rewards/margins": 0.18645079433918, "rewards/rejected": -0.5642547607421875, "step": 5540 }, { "epoch": 1.45, "learning_rate": 1.0573663123839912e-06, "logits/chosen": -2.696373224258423, "logits/rejected": -2.69925856590271, "logps/chosen": -1330.9554443359375, "logps/rejected": -1100.8270263671875, "loss": 0.6099, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4058258533477783, "rewards/margins": 0.26610302925109863, "rewards/rejected": -0.671928882598877, "step": 5550 }, { "epoch": 1.46, "learning_rate": 1.0480540796664251e-06, "logits/chosen": -2.6961722373962402, "logits/rejected": -2.693441152572632, "logps/chosen": -1421.529052734375, "logps/rejected": -1489.1346435546875, "loss": 0.6441, "rewards/accuracies": 0.625, "rewards/chosen": -0.4659119248390198, "rewards/margins": 0.11813143640756607, "rewards/rejected": -0.5840433239936829, "step": 5560 }, { "epoch": 1.46, "learning_rate": 1.0387721475595978e-06, "logits/chosen": -2.7045772075653076, "logits/rejected": -2.6836910247802734, "logps/chosen": -1542.04248046875, "logps/rejected": -1230.510986328125, "loss": 0.6117, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41788873076438904, "rewards/margins": 0.1756511926651001, "rewards/rejected": -0.5935398936271667, "step": 5570 }, { "epoch": 1.46, "learning_rate": 1.0295207097678378e-06, "logits/chosen": -2.71760892868042, "logits/rejected": -2.69905424118042, "logps/chosen": -1756.976318359375, "logps/rejected": -1334.25146484375, "loss": 0.6428, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4227485656738281, "rewards/margins": 0.22662191092967987, "rewards/rejected": -0.6493704915046692, "step": 5580 }, { "epoch": 1.46, "learning_rate": 1.0202999593590924e-06, "logits/chosen": -2.721705675125122, "logits/rejected": -2.6900992393493652, "logps/chosen": -1567.1680908203125, "logps/rejected": -1261.202880859375, "loss": 0.628, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.42403268814086914, "rewards/margins": 0.19053298234939575, "rewards/rejected": -0.6145657300949097, "step": 5590 }, { "epoch": 1.47, "learning_rate": 1.011110088760891e-06, "logits/chosen": -2.667708396911621, "logits/rejected": -2.6748549938201904, "logps/chosen": -1374.74658203125, "logps/rejected": -1232.95849609375, "loss": 0.658, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28120842576026917, "rewards/margins": 0.15552183985710144, "rewards/rejected": -0.436730295419693, "step": 5600 }, { "epoch": 1.47, "eval_logits/chosen": -2.7027201652526855, "eval_logits/rejected": -2.693756103515625, "eval_logps/chosen": -1602.7802734375, "eval_logps/rejected": -1407.1986083984375, "eval_loss": 0.6451008319854736, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.40230992436408997, "eval_rewards/margins": 0.15302817523479462, "eval_rewards/rejected": -0.5553380846977234, "eval_runtime": 221.8522, "eval_samples_per_second": 9.015, "eval_steps_per_second": 0.284, "step": 5600 }, { "epoch": 1.47, "learning_rate": 1.0019512897563347e-06, "logits/chosen": -2.687178134918213, "logits/rejected": -2.696901798248291, "logps/chosen": -1677.1754150390625, "logps/rejected": -1356.8736572265625, "loss": 0.6386, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.41005244851112366, "rewards/margins": 0.22635486721992493, "rewards/rejected": -0.6364073753356934, "step": 5610 }, { "epoch": 1.47, "learning_rate": 9.928237534800935e-07, "logits/chosen": -2.709001064300537, "logits/rejected": -2.7000620365142822, "logps/chosen": -1754.002197265625, "logps/rejected": -1632.352294921875, "loss": 0.5925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.41502103209495544, "rewards/margins": 0.1642555445432663, "rewards/rejected": -0.5792765617370605, "step": 5620 }, { "epoch": 1.47, "learning_rate": 9.837276704144174e-07, "logits/chosen": -2.6920666694641113, "logits/rejected": -2.690368175506592, "logps/chosen": -1598.8590087890625, "logps/rejected": -1336.1282958984375, "loss": 0.6366, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4579346179962158, "rewards/margins": 0.12214440107345581, "rewards/rejected": -0.5800789594650269, "step": 5630 }, { "epoch": 1.48, "learning_rate": 9.746632303851569e-07, "logits/chosen": -2.7054312229156494, "logits/rejected": -2.6833667755126953, "logps/chosen": -1388.1898193359375, "logps/rejected": -1140.6986083984375, "loss": 0.6353, "rewards/accuracies": 0.625, "rewards/chosen": -0.4576742649078369, "rewards/margins": 0.07921469211578369, "rewards/rejected": -0.5368889570236206, "step": 5640 }, { "epoch": 1.48, "learning_rate": 9.65630622557809e-07, "logits/chosen": -2.681272029876709, "logits/rejected": -2.6905956268310547, "logps/chosen": -1354.0238037109375, "logps/rejected": -1291.4276123046875, "loss": 0.6635, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.45517176389694214, "rewards/margins": 0.15478548407554626, "rewards/rejected": -0.6099572777748108, "step": 5650 }, { "epoch": 1.48, "learning_rate": 9.56630035433561e-07, "logits/chosen": -2.6686363220214844, "logits/rejected": -2.6861915588378906, "logps/chosen": -1292.4571533203125, "logps/rejected": -1379.446044921875, "loss": 0.6768, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.47962379455566406, "rewards/margins": 0.084124356508255, "rewards/rejected": -0.5637482404708862, "step": 5660 }, { "epoch": 1.48, "learning_rate": 9.476616568453659e-07, "logits/chosen": -2.692587375640869, "logits/rejected": -2.6729211807250977, "logps/chosen": -1336.821533203125, "logps/rejected": -1283.6484375, "loss": 0.6387, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.34590721130371094, "rewards/margins": 0.15082183480262756, "rewards/rejected": -0.4967290759086609, "step": 5670 }, { "epoch": 1.49, "learning_rate": 9.387256739540162e-07, "logits/chosen": -2.7087090015411377, "logits/rejected": -2.679457664489746, "logps/chosen": -1880.095947265625, "logps/rejected": -1294.837890625, "loss": 0.649, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.32686570286750793, "rewards/margins": 0.17968794703483582, "rewards/rejected": -0.506553590297699, "step": 5680 }, { "epoch": 1.49, "learning_rate": 9.298222732442377e-07, "logits/chosen": -2.685485363006592, "logits/rejected": -2.6599249839782715, "logps/chosen": -1625.441650390625, "logps/rejected": -1470.1597900390625, "loss": 0.6498, "rewards/accuracies": 0.625, "rewards/chosen": -0.36153444647789, "rewards/margins": 0.1952248066663742, "rewards/rejected": -0.5567591786384583, "step": 5690 }, { "epoch": 1.49, "learning_rate": 9.20951640520803e-07, "logits/chosen": -2.6474082469940186, "logits/rejected": -2.6487858295440674, "logps/chosen": -1660.1871337890625, "logps/rejected": -1259.378662109375, "loss": 0.6437, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3772795796394348, "rewards/margins": 0.26258450746536255, "rewards/rejected": -0.6398640871047974, "step": 5700 }, { "epoch": 1.49, "eval_logits/chosen": -2.6972296237945557, "eval_logits/rejected": -2.6883459091186523, "eval_logps/chosen": -1603.052734375, "eval_logps/rejected": -1407.21630859375, "eval_loss": 0.6453641653060913, "eval_rewards/accuracies": 0.6388888955116272, "eval_rewards/chosen": -0.4050370156764984, "eval_rewards/margins": 0.15047885477542877, "eval_rewards/rejected": -0.5555158853530884, "eval_runtime": 221.8964, "eval_samples_per_second": 9.013, "eval_steps_per_second": 0.284, "step": 5700 }, { "epoch": 1.49, "learning_rate": 9.121139609046484e-07, "logits/chosen": -2.665215015411377, "logits/rejected": -2.653277635574341, "logps/chosen": -1373.9349365234375, "logps/rejected": -847.8890380859375, "loss": 0.6369, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4055308401584625, "rewards/margins": 0.24549469351768494, "rewards/rejected": -0.6510254740715027, "step": 5710 }, { "epoch": 1.5, "learning_rate": 9.033094188290121e-07, "logits/chosen": -2.7135136127471924, "logits/rejected": -2.6930954456329346, "logps/chosen": -1529.5589599609375, "logps/rejected": -1347.803466796875, "loss": 0.6189, "rewards/accuracies": 0.625, "rewards/chosen": -0.42335405945777893, "rewards/margins": 0.19795912504196167, "rewards/rejected": -0.621313214302063, "step": 5720 }, { "epoch": 1.5, "learning_rate": 8.945381980355889e-07, "logits/chosen": -2.6918070316314697, "logits/rejected": -2.694427013397217, "logps/chosen": -1536.6533203125, "logps/rejected": -1356.972412109375, "loss": 0.6446, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35820502042770386, "rewards/margins": 0.21345534920692444, "rewards/rejected": -0.5716603994369507, "step": 5730 }, { "epoch": 1.5, "learning_rate": 8.858004815706919e-07, "logits/chosen": -2.695432186126709, "logits/rejected": -2.684980869293213, "logps/chosen": -1508.866943359375, "logps/rejected": -1638.4742431640625, "loss": 0.637, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.40777310729026794, "rewards/margins": 0.200104758143425, "rewards/rejected": -0.6078779101371765, "step": 5740 }, { "epoch": 1.5, "learning_rate": 8.77096451781432e-07, "logits/chosen": -2.720970869064331, "logits/rejected": -2.725588321685791, "logps/chosen": -1464.5390625, "logps/rejected": -1301.757568359375, "loss": 0.653, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3903093636035919, "rewards/margins": 0.22096291184425354, "rewards/rejected": -0.6112722158432007, "step": 5750 }, { "epoch": 1.51, "learning_rate": 8.684262903119165e-07, "logits/chosen": -2.6991419792175293, "logits/rejected": -2.6937973499298096, "logps/chosen": -1612.3106689453125, "logps/rejected": -1525.4544677734375, "loss": 0.6388, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38578009605407715, "rewards/margins": 0.16267183423042297, "rewards/rejected": -0.5484519600868225, "step": 5760 }, { "epoch": 1.51, "learning_rate": 8.597901780994525e-07, "logits/chosen": -2.7179884910583496, "logits/rejected": -2.6874876022338867, "logps/chosen": -1531.790283203125, "logps/rejected": -1408.367919921875, "loss": 0.6342, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4343856871128082, "rewards/margins": 0.11403163522481918, "rewards/rejected": -0.548417329788208, "step": 5770 }, { "epoch": 1.51, "learning_rate": 8.511882953707773e-07, "logits/chosen": -2.71705961227417, "logits/rejected": -2.721656560897827, "logps/chosen": -1742.287841796875, "logps/rejected": -1498.193115234375, "loss": 0.6361, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.40885210037231445, "rewards/margins": 0.07669506967067719, "rewards/rejected": -0.48554715514183044, "step": 5780 }, { "epoch": 1.52, "learning_rate": 8.426208216382944e-07, "logits/chosen": -2.681305170059204, "logits/rejected": -2.688035726547241, "logps/chosen": -1341.0355224609375, "logps/rejected": -1515.4603271484375, "loss": 0.6474, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39439091086387634, "rewards/margins": 0.21398195624351501, "rewards/rejected": -0.6083729267120361, "step": 5790 }, { "epoch": 1.52, "learning_rate": 8.340879356963245e-07, "logits/chosen": -2.6872317790985107, "logits/rejected": -2.678461790084839, "logps/chosen": -1467.8450927734375, "logps/rejected": -1348.451904296875, "loss": 0.6289, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3274436891078949, "rewards/margins": 0.23764677345752716, "rewards/rejected": -0.565090537071228, "step": 5800 }, { "epoch": 1.52, "eval_logits/chosen": -2.7093505859375, "eval_logits/rejected": -2.7007176876068115, "eval_logps/chosen": -1602.4105224609375, "eval_logps/rejected": -1406.861083984375, "eval_loss": 0.6442674398422241, "eval_rewards/accuracies": 0.64682537317276, "eval_rewards/chosen": -0.3986143171787262, "eval_rewards/margins": 0.15335094928741455, "eval_rewards/rejected": -0.5519652366638184, "eval_runtime": 221.9954, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 5800 }, { "epoch": 1.52, "learning_rate": 8.255898156173777e-07, "logits/chosen": -2.721546173095703, "logits/rejected": -2.7181859016418457, "logps/chosen": -1618.231201171875, "logps/rejected": -1498.933837890625, "loss": 0.6084, "rewards/accuracies": 0.75, "rewards/chosen": -0.32977229356765747, "rewards/margins": 0.3294047713279724, "rewards/rejected": -0.6591770648956299, "step": 5810 }, { "epoch": 1.52, "learning_rate": 8.171266387484389e-07, "logits/chosen": -2.716600179672241, "logits/rejected": -2.7120096683502197, "logps/chosen": -1584.3115234375, "logps/rejected": -1282.3509521484375, "loss": 0.6282, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40199166536331177, "rewards/margins": 0.1491709053516388, "rewards/rejected": -0.5511625409126282, "step": 5820 }, { "epoch": 1.53, "learning_rate": 8.086985817072604e-07, "logits/chosen": -2.7204251289367676, "logits/rejected": -2.71130108833313, "logps/chosen": -1321.2801513671875, "logps/rejected": -1095.7039794921875, "loss": 0.6577, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.47304391860961914, "rewards/margins": 0.024396944791078568, "rewards/rejected": -0.4974408745765686, "step": 5830 }, { "epoch": 1.53, "learning_rate": 8.003058203786835e-07, "logits/chosen": -2.6963999271392822, "logits/rejected": -2.680232048034668, "logps/chosen": -1220.0439453125, "logps/rejected": -1195.7838134765625, "loss": 0.6135, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43111515045166016, "rewards/margins": 0.16377350687980652, "rewards/rejected": -0.5948886871337891, "step": 5840 }, { "epoch": 1.53, "learning_rate": 7.91948529910963e-07, "logits/chosen": -2.681727409362793, "logits/rejected": -2.673245906829834, "logps/chosen": -1630.849853515625, "logps/rejected": -1601.662353515625, "loss": 0.6352, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.36390581727027893, "rewards/margins": 0.21514716744422913, "rewards/rejected": -0.5790529847145081, "step": 5850 }, { "epoch": 1.53, "learning_rate": 7.836268847121126e-07, "logits/chosen": -2.7189762592315674, "logits/rejected": -2.7372474670410156, "logps/chosen": -1822.910888671875, "logps/rejected": -1758.435546875, "loss": 0.651, "rewards/accuracies": 0.625, "rewards/chosen": -0.4066740572452545, "rewards/margins": 0.16316178441047668, "rewards/rejected": -0.569835901260376, "step": 5860 }, { "epoch": 1.54, "learning_rate": 7.753410584462681e-07, "logits/chosen": -2.733602285385132, "logits/rejected": -2.725095272064209, "logps/chosen": -1537.243896484375, "logps/rejected": -1504.42431640625, "loss": 0.6489, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3789084851741791, "rewards/margins": 0.12120094150304794, "rewards/rejected": -0.5001094937324524, "step": 5870 }, { "epoch": 1.54, "learning_rate": 7.670912240300596e-07, "logits/chosen": -2.6847469806671143, "logits/rejected": -2.678020715713501, "logps/chosen": -1495.7813720703125, "logps/rejected": -1345.4945068359375, "loss": 0.6543, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3611437976360321, "rewards/margins": 0.15165142714977264, "rewards/rejected": -0.5127952694892883, "step": 5880 }, { "epoch": 1.54, "learning_rate": 7.588775536290035e-07, "logits/chosen": -2.6968045234680176, "logits/rejected": -2.6823792457580566, "logps/chosen": -1608.9373779296875, "logps/rejected": -1043.4354248046875, "loss": 0.63, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34363892674446106, "rewards/margins": 0.16888666152954102, "rewards/rejected": -0.5125256776809692, "step": 5890 }, { "epoch": 1.54, "learning_rate": 7.507002186539147e-07, "logits/chosen": -2.6805570125579834, "logits/rejected": -2.675029993057251, "logps/chosen": -1435.6429443359375, "logps/rejected": -1435.0908203125, "loss": 0.6361, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39160555601119995, "rewards/margins": 0.1981893628835678, "rewards/rejected": -0.589794933795929, "step": 5900 }, { "epoch": 1.54, "eval_logits/chosen": -2.7046704292297363, "eval_logits/rejected": -2.696174383163452, "eval_logps/chosen": -1602.9124755859375, "eval_logps/rejected": -1407.40869140625, "eval_loss": 0.6442207098007202, "eval_rewards/accuracies": 0.6408730149269104, "eval_rewards/chosen": -0.4036337435245514, "eval_rewards/margins": 0.1538066416978836, "eval_rewards/rejected": -0.5574404001235962, "eval_runtime": 221.9282, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 5900 }, { "epoch": 1.55, "learning_rate": 7.425593897573216e-07, "logits/chosen": -2.693079948425293, "logits/rejected": -2.6898703575134277, "logps/chosen": -1721.556396484375, "logps/rejected": -1643.6937255859375, "loss": 0.632, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3537727892398834, "rewards/margins": 0.3143417239189148, "rewards/rejected": -0.6681144833564758, "step": 5910 }, { "epoch": 1.55, "learning_rate": 7.344552368299088e-07, "logits/chosen": -2.6862666606903076, "logits/rejected": -2.694638967514038, "logps/chosen": -1307.9896240234375, "logps/rejected": -1450.29443359375, "loss": 0.6364, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4261978268623352, "rewards/margins": 0.1255144625902176, "rewards/rejected": -0.5517122745513916, "step": 5920 }, { "epoch": 1.55, "learning_rate": 7.26387928996973e-07, "logits/chosen": -2.692228317260742, "logits/rejected": -2.698779344558716, "logps/chosen": -1149.926025390625, "logps/rejected": -1233.862548828125, "loss": 0.6409, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.46561938524246216, "rewards/margins": 0.2557544708251953, "rewards/rejected": -0.7213739156723022, "step": 5930 }, { "epoch": 1.55, "learning_rate": 7.183576346148899e-07, "logits/chosen": -2.6880781650543213, "logits/rejected": -2.6710593700408936, "logps/chosen": -1776.842529296875, "logps/rejected": -1320.5303955078125, "loss": 0.639, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30308040976524353, "rewards/margins": 0.35630732774734497, "rewards/rejected": -0.6593877673149109, "step": 5940 }, { "epoch": 1.56, "learning_rate": 7.103645212676044e-07, "logits/chosen": -2.6921093463897705, "logits/rejected": -2.6773791313171387, "logps/chosen": -1392.0758056640625, "logps/rejected": -1509.171875, "loss": 0.6312, "rewards/accuracies": 0.5, "rewards/chosen": -0.4809054434299469, "rewards/margins": 0.11313720047473907, "rewards/rejected": -0.5940426588058472, "step": 5950 }, { "epoch": 1.56, "learning_rate": 7.024087557631318e-07, "logits/chosen": -2.702073097229004, "logits/rejected": -2.7095000743865967, "logps/chosen": -1254.189697265625, "logps/rejected": -1364.05615234375, "loss": 0.6292, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.44157737493515015, "rewards/margins": 0.08752346783876419, "rewards/rejected": -0.5291008353233337, "step": 5960 }, { "epoch": 1.56, "learning_rate": 6.944905041300739e-07, "logits/chosen": -2.6660048961639404, "logits/rejected": -2.6542418003082275, "logps/chosen": -1623.0982666015625, "logps/rejected": -1455.97802734375, "loss": 0.603, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40555793046951294, "rewards/margins": 0.33708950877189636, "rewards/rejected": -0.7426473498344421, "step": 5970 }, { "epoch": 1.57, "learning_rate": 6.866099316141606e-07, "logits/chosen": -2.702817678451538, "logits/rejected": -2.716486930847168, "logps/chosen": -1421.0152587890625, "logps/rejected": -1557.4527587890625, "loss": 0.6305, "rewards/accuracies": 0.625, "rewards/chosen": -0.42785245180130005, "rewards/margins": 0.20500314235687256, "rewards/rejected": -0.6328555941581726, "step": 5980 }, { "epoch": 1.57, "learning_rate": 6.787672026747946e-07, "logits/chosen": -2.698267698287964, "logits/rejected": -2.677493095397949, "logps/chosen": -1327.359375, "logps/rejected": -1614.907470703125, "loss": 0.6448, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5610150098800659, "rewards/margins": 0.19009160995483398, "rewards/rejected": -0.7511066198348999, "step": 5990 }, { "epoch": 1.57, "learning_rate": 6.709624809816223e-07, "logits/chosen": -2.701934337615967, "logits/rejected": -2.692894458770752, "logps/chosen": -1514.8740234375, "logps/rejected": -1444.414794921875, "loss": 0.6374, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35164642333984375, "rewards/margins": 0.16492925584316254, "rewards/rejected": -0.5165756940841675, "step": 6000 }, { "epoch": 1.57, "eval_logits/chosen": -2.7047617435455322, "eval_logits/rejected": -2.6962783336639404, "eval_logps/chosen": -1604.185302734375, "eval_logps/rejected": -1408.8310546875, "eval_loss": 0.644648551940918, "eval_rewards/accuracies": 0.6428571343421936, "eval_rewards/chosen": -0.4163608253002167, "eval_rewards/margins": 0.15530355274677277, "eval_rewards/rejected": -0.5716643929481506, "eval_runtime": 221.9315, "eval_samples_per_second": 9.012, "eval_steps_per_second": 0.284, "step": 6000 }, { "epoch": 1.57, "learning_rate": 6.6319592941112e-07, "logits/chosen": -2.7169597148895264, "logits/rejected": -2.6866252422332764, "logps/chosen": -1711.214111328125, "logps/rejected": -1542.6513671875, "loss": 0.656, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2974657416343689, "rewards/margins": 0.2659049928188324, "rewards/rejected": -0.5633708238601685, "step": 6010 }, { "epoch": 1.58, "learning_rate": 6.554677100431927e-07, "logits/chosen": -2.733557939529419, "logits/rejected": -2.715567111968994, "logps/chosen": -1610.2353515625, "logps/rejected": -1287.1390380859375, "loss": 0.6268, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3757496178150177, "rewards/margins": 0.29556283354759216, "rewards/rejected": -0.6713123917579651, "step": 6020 }, { "epoch": 1.58, "learning_rate": 6.4777798415779e-07, "logits/chosen": -2.7137365341186523, "logits/rejected": -2.7267062664031982, "logps/chosen": -1452.125, "logps/rejected": -1292.182373046875, "loss": 0.5904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3056414723396301, "rewards/margins": 0.2070426493883133, "rewards/rejected": -0.512684166431427, "step": 6030 }, { "epoch": 1.58, "learning_rate": 6.401269122315451e-07, "logits/chosen": -2.7094180583953857, "logits/rejected": -2.7037181854248047, "logps/chosen": -1874.9847412109375, "logps/rejected": -1374.64697265625, "loss": 0.6367, "rewards/accuracies": 0.75, "rewards/chosen": -0.3752003312110901, "rewards/margins": 0.28431040048599243, "rewards/rejected": -0.6595107316970825, "step": 6040 }, { "epoch": 1.58, "learning_rate": 6.325146539344196e-07, "logits/chosen": -2.6920981407165527, "logits/rejected": -2.6991913318634033, "logps/chosen": -1592.9559326171875, "logps/rejected": -1228.34375, "loss": 0.6741, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.42658185958862305, "rewards/margins": 0.15396739542484283, "rewards/rejected": -0.5805492997169495, "step": 6050 }, { "epoch": 1.59, "learning_rate": 6.249413681263782e-07, "logits/chosen": -2.6854660511016846, "logits/rejected": -2.689037561416626, "logps/chosen": -1591.2857666015625, "logps/rejected": -1392.5634765625, "loss": 0.6136, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3609497547149658, "rewards/margins": 0.27985721826553345, "rewards/rejected": -0.6408069133758545, "step": 6060 }, { "epoch": 1.59, "learning_rate": 6.174072128540686e-07, "logits/chosen": -2.7083308696746826, "logits/rejected": -2.6896657943725586, "logps/chosen": -1535.8818359375, "logps/rejected": -1262.798095703125, "loss": 0.6266, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35114628076553345, "rewards/margins": 0.3368942141532898, "rewards/rejected": -0.6880404949188232, "step": 6070 }, { "epoch": 1.59, "learning_rate": 6.099123453475245e-07, "logits/chosen": -2.713439464569092, "logits/rejected": -2.681724786758423, "logps/chosen": -1554.7132568359375, "logps/rejected": -1333.768798828125, "loss": 0.6509, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4823225438594818, "rewards/margins": 0.2098710536956787, "rewards/rejected": -0.6921936273574829, "step": 6080 }, { "epoch": 1.59, "learning_rate": 6.024569220168836e-07, "logits/chosen": -2.6846044063568115, "logits/rejected": -2.6816964149475098, "logps/chosen": -1673.781494140625, "logps/rejected": -1116.6241455078125, "loss": 0.6224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39769247174263, "rewards/margins": 0.258812814950943, "rewards/rejected": -0.6565052270889282, "step": 6090 }, { "epoch": 1.6, "learning_rate": 5.950410984491268e-07, "logits/chosen": -2.684141159057617, "logits/rejected": -2.654639720916748, "logps/chosen": -1464.31103515625, "logps/rejected": -1539.7857666015625, "loss": 0.6423, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4491928517818451, "rewards/margins": 0.1657174527645111, "rewards/rejected": -0.6149102449417114, "step": 6100 }, { "epoch": 1.6, "eval_logits/chosen": -2.6991634368896484, "eval_logits/rejected": -2.6905128955841064, "eval_logps/chosen": -1604.669189453125, "eval_logps/rejected": -1409.4735107421875, "eval_loss": 0.6447591781616211, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.42120110988616943, "eval_rewards/margins": 0.15688644349575043, "eval_rewards/rejected": -0.5780875086784363, "eval_runtime": 221.9398, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 6100 }, { "epoch": 1.6, "learning_rate": 5.876650294048262e-07, "logits/chosen": -2.6803715229034424, "logits/rejected": -2.6851279735565186, "logps/chosen": -1618.722900390625, "logps/rejected": -1358.609619140625, "loss": 0.6144, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3551791310310364, "rewards/margins": 0.24444207549095154, "rewards/rejected": -0.5996211767196655, "step": 6110 }, { "epoch": 1.6, "learning_rate": 5.8032886881492e-07, "logits/chosen": -2.7037949562072754, "logits/rejected": -2.673633575439453, "logps/chosen": -1692.9459228515625, "logps/rejected": -1603.280517578125, "loss": 0.6317, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4877226948738098, "rewards/margins": 0.14314484596252441, "rewards/rejected": -0.6308675408363342, "step": 6120 }, { "epoch": 1.6, "learning_rate": 5.730327697774988e-07, "logits/chosen": -2.7081751823425293, "logits/rejected": -2.6860859394073486, "logps/chosen": -1419.7940673828125, "logps/rejected": -1213.22412109375, "loss": 0.6271, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3978734016418457, "rewards/margins": 0.1460903137922287, "rewards/rejected": -0.5439636707305908, "step": 6130 }, { "epoch": 1.61, "learning_rate": 5.657768845546068e-07, "logits/chosen": -2.7000532150268555, "logits/rejected": -2.697673797607422, "logps/chosen": -1345.072265625, "logps/rejected": -1430.123046875, "loss": 0.6313, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4374998211860657, "rewards/margins": 0.19011881947517395, "rewards/rejected": -0.627618670463562, "step": 6140 }, { "epoch": 1.61, "learning_rate": 5.585613645690713e-07, "logits/chosen": -2.675696849822998, "logits/rejected": -2.6756703853607178, "logps/chosen": -1513.296630859375, "logps/rejected": -1221.71435546875, "loss": 0.6418, "rewards/accuracies": 0.625, "rewards/chosen": -0.5287090539932251, "rewards/margins": 0.09966927766799927, "rewards/rejected": -0.6283783912658691, "step": 6150 }, { "epoch": 1.61, "learning_rate": 5.513863604013355e-07, "logits/chosen": -2.7069761753082275, "logits/rejected": -2.719494581222534, "logps/chosen": -1532.029052734375, "logps/rejected": -1490.412353515625, "loss": 0.6218, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4088051915168762, "rewards/margins": 0.22951212525367737, "rewards/rejected": -0.6383172869682312, "step": 6160 }, { "epoch": 1.61, "learning_rate": 5.442520217863215e-07, "logits/chosen": -2.7155890464782715, "logits/rejected": -2.706444263458252, "logps/chosen": -1841.702392578125, "logps/rejected": -1519.7572021484375, "loss": 0.6097, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2945634722709656, "rewards/margins": 0.31701231002807617, "rewards/rejected": -0.6115757822990417, "step": 6170 }, { "epoch": 1.62, "learning_rate": 5.371584976103034e-07, "logits/chosen": -2.6755757331848145, "logits/rejected": -2.682091236114502, "logps/chosen": -1305.5174560546875, "logps/rejected": -1578.818603515625, "loss": 0.6232, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4950660169124603, "rewards/margins": 0.0794348269701004, "rewards/rejected": -0.5745008587837219, "step": 6180 }, { "epoch": 1.62, "learning_rate": 5.301059359077987e-07, "logits/chosen": -2.680753231048584, "logits/rejected": -2.6786160469055176, "logps/chosen": -1549.20654296875, "logps/rejected": -1367.5374755859375, "loss": 0.6435, "rewards/accuracies": 0.625, "rewards/chosen": -0.44760221242904663, "rewards/margins": 0.1415398269891739, "rewards/rejected": -0.5891419649124146, "step": 6190 }, { "epoch": 1.62, "learning_rate": 5.230944838584806e-07, "logits/chosen": -2.7030324935913086, "logits/rejected": -2.6957592964172363, "logps/chosen": -1537.458740234375, "logps/rejected": -1226.929443359375, "loss": 0.6611, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5085504651069641, "rewards/margins": 0.0372183658182621, "rewards/rejected": -0.5457688570022583, "step": 6200 }, { "epoch": 1.62, "eval_logits/chosen": -2.7009968757629395, "eval_logits/rejected": -2.6925363540649414, "eval_logps/chosen": -1605.986572265625, "eval_logps/rejected": -1410.8238525390625, "eval_loss": 0.6452645063400269, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.4343767464160919, "eval_rewards/margins": 0.1572161614894867, "eval_rewards/rejected": -0.5915929675102234, "eval_runtime": 221.9554, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 6200 }, { "epoch": 1.63, "learning_rate": 5.161242877841083e-07, "logits/chosen": -2.710780620574951, "logits/rejected": -2.714012622833252, "logps/chosen": -1271.187255859375, "logps/rejected": -1383.121337890625, "loss": 0.6161, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4201042652130127, "rewards/margins": 0.12770195305347443, "rewards/rejected": -0.5478062629699707, "step": 6210 }, { "epoch": 1.63, "learning_rate": 5.091954931454682e-07, "logits/chosen": -2.6867432594299316, "logits/rejected": -2.6809990406036377, "logps/chosen": -1415.892333984375, "logps/rejected": -1244.710693359375, "loss": 0.6515, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4569178521633148, "rewards/margins": 0.14460769295692444, "rewards/rejected": -0.601525604724884, "step": 6220 }, { "epoch": 1.63, "learning_rate": 5.023082445393446e-07, "logits/chosen": -2.648237705230713, "logits/rejected": -2.663208246231079, "logps/chosen": -1238.7562255859375, "logps/rejected": -1183.0709228515625, "loss": 0.6458, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.41196736693382263, "rewards/margins": 0.1221848726272583, "rewards/rejected": -0.5341522097587585, "step": 6230 }, { "epoch": 1.63, "learning_rate": 4.95462685695498e-07, "logits/chosen": -2.716076135635376, "logits/rejected": -2.702650547027588, "logps/chosen": -1689.1253662109375, "logps/rejected": -1441.07373046875, "loss": 0.6646, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4031899869441986, "rewards/margins": 0.01048656739294529, "rewards/rejected": -0.41367655992507935, "step": 6240 }, { "epoch": 1.64, "learning_rate": 4.88658959473666e-07, "logits/chosen": -2.6755599975585938, "logits/rejected": -2.678421974182129, "logps/chosen": -1729.2132568359375, "logps/rejected": -1477.9561767578125, "loss": 0.6439, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4346516728401184, "rewards/margins": 0.22722020745277405, "rewards/rejected": -0.6618717908859253, "step": 6250 }, { "epoch": 1.64, "learning_rate": 4.818972078605821e-07, "logits/chosen": -2.67181134223938, "logits/rejected": -2.669654130935669, "logps/chosen": -1462.231689453125, "logps/rejected": -1391.5391845703125, "loss": 0.625, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4106716215610504, "rewards/margins": 0.10334018617868423, "rewards/rejected": -0.514011800289154, "step": 6260 }, { "epoch": 1.64, "learning_rate": 4.7517757196701514e-07, "logits/chosen": -2.7111282348632812, "logits/rejected": -2.699073314666748, "logps/chosen": -2008.9390869140625, "logps/rejected": -1729.846923828125, "loss": 0.6381, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38991931080818176, "rewards/margins": 0.37259799242019653, "rewards/rejected": -0.7625172734260559, "step": 6270 }, { "epoch": 1.64, "learning_rate": 4.6850019202482193e-07, "logits/chosen": -2.7020959854125977, "logits/rejected": -2.7092764377593994, "logps/chosen": -1485.4185791015625, "logps/rejected": -1375.3634033203125, "loss": 0.6131, "rewards/accuracies": 0.75, "rewards/chosen": -0.49320369958877563, "rewards/margins": 0.2785857915878296, "rewards/rejected": -0.7717894315719604, "step": 6280 }, { "epoch": 1.65, "learning_rate": 4.618652073840188e-07, "logits/chosen": -2.690502405166626, "logits/rejected": -2.7097690105438232, "logps/chosen": -1226.995361328125, "logps/rejected": -1234.814697265625, "loss": 0.6014, "rewards/accuracies": 0.625, "rewards/chosen": -0.3699643611907959, "rewards/margins": 0.2853412628173828, "rewards/rejected": -0.6553056240081787, "step": 6290 }, { "epoch": 1.65, "learning_rate": 4.5527275650987965e-07, "logits/chosen": -2.6978249549865723, "logits/rejected": -2.7032647132873535, "logps/chosen": -1490.8839111328125, "logps/rejected": -1340.5758056640625, "loss": 0.6355, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4975271224975586, "rewards/margins": 0.1745329648256302, "rewards/rejected": -0.6720601320266724, "step": 6300 }, { "epoch": 1.65, "eval_logits/chosen": -2.7008187770843506, "eval_logits/rejected": -2.6922249794006348, "eval_logps/chosen": -1605.803466796875, "eval_logps/rejected": -1410.7569580078125, "eval_loss": 0.6450992226600647, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.43254372477531433, "eval_rewards/margins": 0.15837757289409637, "eval_rewards/rejected": -0.5909213423728943, "eval_runtime": 221.9031, "eval_samples_per_second": 9.013, "eval_steps_per_second": 0.284, "step": 6300 }, { "epoch": 1.65, "learning_rate": 4.487229769800394e-07, "logits/chosen": -2.652451515197754, "logits/rejected": -2.6576945781707764, "logps/chosen": -1265.826171875, "logps/rejected": -1329.8004150390625, "loss": 0.6298, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4063987135887146, "rewards/margins": 0.31934913992881775, "rewards/rejected": -0.72574782371521, "step": 6310 }, { "epoch": 1.65, "learning_rate": 4.422160054816285e-07, "logits/chosen": -2.680227756500244, "logits/rejected": -2.671940803527832, "logps/chosen": -1844.2279052734375, "logps/rejected": -1543.527587890625, "loss": 0.6514, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.42796629667282104, "rewards/margins": 0.08723724633455276, "rewards/rejected": -0.515203595161438, "step": 6320 }, { "epoch": 1.66, "learning_rate": 4.35751977808416e-07, "logits/chosen": -2.69242262840271, "logits/rejected": -2.6834845542907715, "logps/chosen": -1286.2861328125, "logps/rejected": -1022.7605590820312, "loss": 0.6204, "rewards/accuracies": 0.625, "rewards/chosen": -0.41723084449768066, "rewards/margins": 0.20962531864643097, "rewards/rejected": -0.6268561482429504, "step": 6330 }, { "epoch": 1.66, "learning_rate": 4.293310288579794e-07, "logits/chosen": -2.7137248516082764, "logits/rejected": -2.697584390640259, "logps/chosen": -1610.983154296875, "logps/rejected": -1428.408935546875, "loss": 0.6083, "rewards/accuracies": 0.75, "rewards/chosen": -0.3962358832359314, "rewards/margins": 0.24171388149261475, "rewards/rejected": -0.6379498243331909, "step": 6340 }, { "epoch": 1.66, "learning_rate": 4.2295329262888733e-07, "logits/chosen": -2.681994676589966, "logits/rejected": -2.6608827114105225, "logps/chosen": -1370.623779296875, "logps/rejected": -1160.845703125, "loss": 0.6433, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.448493093252182, "rewards/margins": 0.1068970188498497, "rewards/rejected": -0.5553901195526123, "step": 6350 }, { "epoch": 1.66, "learning_rate": 4.1661890221790316e-07, "logits/chosen": -2.685898780822754, "logits/rejected": -2.6794886589050293, "logps/chosen": -1368.075439453125, "logps/rejected": -1313.705078125, "loss": 0.6254, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4664105474948883, "rewards/margins": 0.07501848042011261, "rewards/rejected": -0.5414290428161621, "step": 6360 }, { "epoch": 1.67, "learning_rate": 4.103279898172072e-07, "logits/chosen": -2.738821268081665, "logits/rejected": -2.7228472232818604, "logps/chosen": -1854.404052734375, "logps/rejected": -1583.7427978515625, "loss": 0.6266, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25630882382392883, "rewards/margins": 0.2646670341491699, "rewards/rejected": -0.5209758281707764, "step": 6370 }, { "epoch": 1.67, "learning_rate": 4.040806867116401e-07, "logits/chosen": -2.72967791557312, "logits/rejected": -2.698141098022461, "logps/chosen": -1598.608154296875, "logps/rejected": -1285.6024169921875, "loss": 0.6149, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.44886884093284607, "rewards/margins": 0.24276570975780487, "rewards/rejected": -0.6916345953941345, "step": 6380 }, { "epoch": 1.67, "learning_rate": 3.978771232759615e-07, "logits/chosen": -2.6666808128356934, "logits/rejected": -2.676840305328369, "logps/chosen": -1426.3568115234375, "logps/rejected": -1619.660888671875, "loss": 0.6252, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4658185839653015, "rewards/margins": 0.22935962677001953, "rewards/rejected": -0.6951782703399658, "step": 6390 }, { "epoch": 1.67, "learning_rate": 3.917174289721276e-07, "logits/chosen": -2.674344301223755, "logits/rejected": -2.6882286071777344, "logps/chosen": -1507.402587890625, "logps/rejected": -1146.4400634765625, "loss": 0.6555, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5148229598999023, "rewards/margins": 0.18879520893096924, "rewards/rejected": -0.7036181688308716, "step": 6400 }, { "epoch": 1.67, "eval_logits/chosen": -2.7020721435546875, "eval_logits/rejected": -2.6935031414031982, "eval_logps/chosen": -1605.8125, "eval_logps/rejected": -1410.7894287109375, "eval_loss": 0.6451132297515869, "eval_rewards/accuracies": 0.6230158805847168, "eval_rewards/chosen": -0.43263548612594604, "eval_rewards/margins": 0.15861284732818604, "eval_rewards/rejected": -0.5912482142448425, "eval_runtime": 222.0105, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 6400 }, { "epoch": 1.68, "learning_rate": 3.856017323465938e-07, "logits/chosen": -2.706326723098755, "logits/rejected": -2.713017225265503, "logps/chosen": -1699.1292724609375, "logps/rejected": -1701.164794921875, "loss": 0.6221, "rewards/accuracies": 0.75, "rewards/chosen": -0.49689310789108276, "rewards/margins": 0.24944552779197693, "rewards/rejected": -0.7463387250900269, "step": 6410 }, { "epoch": 1.68, "learning_rate": 3.7953016102762695e-07, "logits/chosen": -2.691145420074463, "logits/rejected": -2.677729845046997, "logps/chosen": -1404.14501953125, "logps/rejected": -1107.845458984375, "loss": 0.6219, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4841841757297516, "rewards/margins": 0.13497625291347504, "rewards/rejected": -0.6191604137420654, "step": 6420 }, { "epoch": 1.68, "learning_rate": 3.7350284172264493e-07, "logits/chosen": -2.677605390548706, "logits/rejected": -2.670762538909912, "logps/chosen": -1495.4324951171875, "logps/rejected": -1399.22021484375, "loss": 0.6222, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4789021909236908, "rewards/margins": 0.1323304921388626, "rewards/rejected": -0.6112326979637146, "step": 6430 }, { "epoch": 1.69, "learning_rate": 3.67519900215573e-07, "logits/chosen": -2.718522310256958, "logits/rejected": -2.7079195976257324, "logps/chosen": -1269.6953125, "logps/rejected": -1307.8406982421875, "loss": 0.6652, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.43117189407348633, "rewards/margins": 0.12488353252410889, "rewards/rejected": -0.5560554265975952, "step": 6440 }, { "epoch": 1.69, "learning_rate": 3.615814613642174e-07, "logits/chosen": -2.695288896560669, "logits/rejected": -2.6770596504211426, "logps/chosen": -1622.7061767578125, "logps/rejected": -1300.5322265625, "loss": 0.613, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35286465287208557, "rewards/margins": 0.2980819344520569, "rewards/rejected": -0.6509465575218201, "step": 6450 }, { "epoch": 1.69, "learning_rate": 3.5568764909765795e-07, "logits/chosen": -2.6688880920410156, "logits/rejected": -2.663649797439575, "logps/chosen": -1231.46923828125, "logps/rejected": -1042.579833984375, "loss": 0.6217, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4582611620426178, "rewards/margins": 0.24432484805583954, "rewards/rejected": -0.7025860548019409, "step": 6460 }, { "epoch": 1.69, "learning_rate": 3.498385864136672e-07, "logits/chosen": -2.68622088432312, "logits/rejected": -2.699253559112549, "logps/chosen": -1828.1165771484375, "logps/rejected": -1496.741943359375, "loss": 0.6386, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43254002928733826, "rewards/margins": 0.17335142195224762, "rewards/rejected": -0.6058914065361023, "step": 6470 }, { "epoch": 1.7, "learning_rate": 3.440343953761363e-07, "logits/chosen": -2.7274928092956543, "logits/rejected": -2.7285075187683105, "logps/chosen": -1574.9361572265625, "logps/rejected": -1374.0662841796875, "loss": 0.6368, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5280033349990845, "rewards/margins": 0.18731360137462616, "rewards/rejected": -0.715316891670227, "step": 6480 }, { "epoch": 1.7, "learning_rate": 3.382751971125345e-07, "logits/chosen": -2.6805925369262695, "logits/rejected": -2.6991703510284424, "logps/chosen": -1428.466552734375, "logps/rejected": -1461.8367919921875, "loss": 0.6573, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.44011393189430237, "rewards/margins": 0.03761814907193184, "rewards/rejected": -0.47773200273513794, "step": 6490 }, { "epoch": 1.7, "learning_rate": 3.3256111181137753e-07, "logits/chosen": -2.701589584350586, "logits/rejected": -2.7013587951660156, "logps/chosen": -1421.63134765625, "logps/rejected": -1250.77880859375, "loss": 0.6584, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4727795720100403, "rewards/margins": 0.12564000487327576, "rewards/rejected": -0.5984196066856384, "step": 6500 }, { "epoch": 1.7, "eval_logits/chosen": -2.6986544132232666, "eval_logits/rejected": -2.6900033950805664, "eval_logps/chosen": -1605.6461181640625, "eval_logps/rejected": -1410.715087890625, "eval_loss": 0.6449440121650696, "eval_rewards/accuracies": 0.6269841194152832, "eval_rewards/chosen": -0.43097057938575745, "eval_rewards/margins": 0.15953212976455688, "eval_rewards/rejected": -0.5905026793479919, "eval_runtime": 221.9035, "eval_samples_per_second": 9.013, "eval_steps_per_second": 0.284, "step": 6500 }, { "epoch": 1.7, "learning_rate": 3.2689225871971905e-07, "logits/chosen": -2.681798219680786, "logits/rejected": -2.6716222763061523, "logps/chosen": -1813.3060302734375, "logps/rejected": -1428.790283203125, "loss": 0.6145, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3622683584690094, "rewards/margins": 0.19917793571949005, "rewards/rejected": -0.5614463090896606, "step": 6510 }, { "epoch": 1.71, "learning_rate": 3.2126875614066523e-07, "logits/chosen": -2.688297986984253, "logits/rejected": -2.6739754676818848, "logps/chosen": -1608.40673828125, "logps/rejected": -1535.1259765625, "loss": 0.6206, "rewards/accuracies": 0.625, "rewards/chosen": -0.42029422521591187, "rewards/margins": 0.12229009717702866, "rewards/rejected": -0.5425843000411987, "step": 6520 }, { "epoch": 1.71, "learning_rate": 3.156907214309024e-07, "logits/chosen": -2.684985637664795, "logits/rejected": -2.6793723106384277, "logps/chosen": -1526.4219970703125, "logps/rejected": -1399.494140625, "loss": 0.6199, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3454342484474182, "rewards/margins": 0.2345781773328781, "rewards/rejected": -0.5800124406814575, "step": 6530 }, { "epoch": 1.71, "learning_rate": 3.1015827099824923e-07, "logits/chosen": -2.6967806816101074, "logits/rejected": -2.685199737548828, "logps/chosen": -1596.5302734375, "logps/rejected": -1385.5205078125, "loss": 0.6364, "rewards/accuracies": 0.625, "rewards/chosen": -0.3673211634159088, "rewards/margins": 0.29226335883140564, "rewards/rejected": -0.6595844626426697, "step": 6540 }, { "epoch": 1.71, "learning_rate": 3.0467152029922926e-07, "logits/chosen": -2.720731258392334, "logits/rejected": -2.7024600505828857, "logps/chosen": -1980.4075927734375, "logps/rejected": -1463.03515625, "loss": 0.6366, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37323513627052307, "rewards/margins": 0.40360528230667114, "rewards/rejected": -0.7768403887748718, "step": 6550 }, { "epoch": 1.72, "learning_rate": 2.992305838366591e-07, "logits/chosen": -2.6530847549438477, "logits/rejected": -2.667252540588379, "logps/chosen": -1265.091552734375, "logps/rejected": -1235.925537109375, "loss": 0.6338, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.43354421854019165, "rewards/margins": 0.2392309159040451, "rewards/rejected": -0.672775149345398, "step": 6560 }, { "epoch": 1.72, "learning_rate": 2.938355751572583e-07, "logits/chosen": -2.712062358856201, "logits/rejected": -2.7131266593933105, "logps/chosen": -1501.1148681640625, "logps/rejected": -1550.7257080078125, "loss": 0.6582, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5113368630409241, "rewards/margins": 0.08612775057554245, "rewards/rejected": -0.5974645614624023, "step": 6570 }, { "epoch": 1.72, "learning_rate": 2.8848660684928307e-07, "logits/chosen": -2.6969046592712402, "logits/rejected": -2.697132110595703, "logps/chosen": -1749.205810546875, "logps/rejected": -1346.0675048828125, "loss": 0.6395, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36252182722091675, "rewards/margins": 0.21485641598701477, "rewards/rejected": -0.5773781538009644, "step": 6580 }, { "epoch": 1.72, "learning_rate": 2.8318379054017383e-07, "logits/chosen": -2.666499376296997, "logits/rejected": -2.661734104156494, "logps/chosen": -1770.1800537109375, "logps/rejected": -1656.2496337890625, "loss": 0.6425, "rewards/accuracies": 0.625, "rewards/chosen": -0.3796055018901825, "rewards/margins": 0.18472369015216827, "rewards/rejected": -0.5643291473388672, "step": 6590 }, { "epoch": 1.73, "learning_rate": 2.779272368942246e-07, "logits/chosen": -2.6835289001464844, "logits/rejected": -2.713869094848633, "logps/chosen": -1644.5257568359375, "logps/rejected": -1615.684326171875, "loss": 0.6371, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4157450199127197, "rewards/margins": 0.14883123338222504, "rewards/rejected": -0.564576268196106, "step": 6600 }, { "epoch": 1.73, "eval_logits/chosen": -2.6984503269195557, "eval_logits/rejected": -2.689661741256714, "eval_logps/chosen": -1605.211181640625, "eval_logps/rejected": -1410.3033447265625, "eval_loss": 0.644827663898468, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.4266229569911957, "eval_rewards/margins": 0.15976297855377197, "eval_rewards/rejected": -0.5863860249519348, "eval_runtime": 222.1021, "eval_samples_per_second": 9.005, "eval_steps_per_second": 0.284, "step": 6600 }, { "epoch": 1.73, "learning_rate": 2.7271705561027986e-07, "logits/chosen": -2.6922175884246826, "logits/rejected": -2.691366195678711, "logps/chosen": -1414.966796875, "logps/rejected": -1288.3101806640625, "loss": 0.6145, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.48495978116989136, "rewards/margins": 0.1690906137228012, "rewards/rejected": -0.6540504097938538, "step": 6610 }, { "epoch": 1.73, "learning_rate": 2.6755335541943677e-07, "logits/chosen": -2.6796720027923584, "logits/rejected": -2.670698642730713, "logps/chosen": -1534.7186279296875, "logps/rejected": -1293.262939453125, "loss": 0.6729, "rewards/accuracies": 0.5, "rewards/chosen": -0.531518280506134, "rewards/margins": 0.024871502071619034, "rewards/rejected": -0.5563897490501404, "step": 6620 }, { "epoch": 1.74, "learning_rate": 2.62436244082781e-07, "logits/chosen": -2.723823308944702, "logits/rejected": -2.705821990966797, "logps/chosen": -1626.5982666015625, "logps/rejected": -1413.130615234375, "loss": 0.6512, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4747316241264343, "rewards/margins": 0.1358988732099533, "rewards/rejected": -0.6106305122375488, "step": 6630 }, { "epoch": 1.74, "learning_rate": 2.5736582838913836e-07, "logits/chosen": -2.7102105617523193, "logits/rejected": -2.702176809310913, "logps/chosen": -1340.4083251953125, "logps/rejected": -1251.639892578125, "loss": 0.6995, "rewards/accuracies": 0.5, "rewards/chosen": -0.5650926232337952, "rewards/margins": 0.00785607099533081, "rewards/rejected": -0.5729486346244812, "step": 6640 }, { "epoch": 1.74, "learning_rate": 2.5234221415284363e-07, "logits/chosen": -2.7118804454803467, "logits/rejected": -2.708615303039551, "logps/chosen": -1556.7886962890625, "logps/rejected": -1601.3795166015625, "loss": 0.6605, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4189126491546631, "rewards/margins": 0.17687158286571503, "rewards/rejected": -0.5957843065261841, "step": 6650 }, { "epoch": 1.74, "learning_rate": 2.4736550621153375e-07, "logits/chosen": -2.7197377681732178, "logits/rejected": -2.713139057159424, "logps/chosen": -1586.169677734375, "logps/rejected": -1249.861083984375, "loss": 0.6616, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4749983251094818, "rewards/margins": 0.15875229239463806, "rewards/rejected": -0.6337506771087646, "step": 6660 }, { "epoch": 1.75, "learning_rate": 2.424358084239609e-07, "logits/chosen": -2.7143969535827637, "logits/rejected": -2.7137789726257324, "logps/chosen": -1450.5582275390625, "logps/rejected": -1345.9810791015625, "loss": 0.5912, "rewards/accuracies": 0.625, "rewards/chosen": -0.4128552973270416, "rewards/margins": 0.13145416975021362, "rewards/rejected": -0.5443094968795776, "step": 6670 }, { "epoch": 1.75, "learning_rate": 2.3755322366782158e-07, "logits/chosen": -2.655879497528076, "logits/rejected": -2.6556153297424316, "logps/chosen": -1635.095703125, "logps/rejected": -1260.944091796875, "loss": 0.6132, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36572757363319397, "rewards/margins": 0.18873073160648346, "rewards/rejected": -0.5544583201408386, "step": 6680 }, { "epoch": 1.75, "learning_rate": 2.3271785383761431e-07, "logits/chosen": -2.7286901473999023, "logits/rejected": -2.691357374191284, "logps/chosen": -1588.2955322265625, "logps/rejected": -1274.890869140625, "loss": 0.6173, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.39285022020339966, "rewards/margins": 0.18746483325958252, "rewards/rejected": -0.580315113067627, "step": 6690 }, { "epoch": 1.75, "learning_rate": 2.2792979984250978e-07, "logits/chosen": -2.6657614707946777, "logits/rejected": -2.6573615074157715, "logps/chosen": -1582.229736328125, "logps/rejected": -956.1892700195312, "loss": 0.6051, "rewards/accuracies": 0.625, "rewards/chosen": -0.43431225419044495, "rewards/margins": 0.2427477389574051, "rewards/rejected": -0.6770600080490112, "step": 6700 }, { "epoch": 1.75, "eval_logits/chosen": -2.701227903366089, "eval_logits/rejected": -2.6926941871643066, "eval_logps/chosen": -1604.7469482421875, "eval_logps/rejected": -1409.8746337890625, "eval_loss": 0.6445795893669128, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.42197802662849426, "eval_rewards/margins": 0.16012054681777954, "eval_rewards/rejected": -0.5820986032485962, "eval_runtime": 221.9478, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 6700 }, { "epoch": 1.76, "learning_rate": 2.231891616042453e-07, "logits/chosen": -2.701498508453369, "logits/rejected": -2.704812526702881, "logps/chosen": -1382.1441650390625, "logps/rejected": -1292.918212890625, "loss": 0.669, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.4683159291744232, "rewards/margins": 0.08218617737293243, "rewards/rejected": -0.5505021810531616, "step": 6710 }, { "epoch": 1.76, "learning_rate": 2.1849603805504328e-07, "logits/chosen": -2.680833339691162, "logits/rejected": -2.694401979446411, "logps/chosen": -1717.7662353515625, "logps/rejected": -1386.9920654296875, "loss": 0.6116, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29614511132240295, "rewards/margins": 0.2935263216495514, "rewards/rejected": -0.5896713733673096, "step": 6720 }, { "epoch": 1.76, "learning_rate": 2.1385052713554066e-07, "logits/chosen": -2.6653406620025635, "logits/rejected": -2.6641170978546143, "logps/chosen": -1424.972412109375, "logps/rejected": -1280.713623046875, "loss": 0.6394, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36866647005081177, "rewards/margins": 0.2155831754207611, "rewards/rejected": -0.5842496156692505, "step": 6730 }, { "epoch": 1.76, "learning_rate": 2.0925272579274873e-07, "logits/chosen": -2.6768558025360107, "logits/rejected": -2.669332504272461, "logps/chosen": -1590.535888671875, "logps/rejected": -1380.893310546875, "loss": 0.6491, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4337334632873535, "rewards/margins": 0.05396395921707153, "rewards/rejected": -0.48769742250442505, "step": 6740 }, { "epoch": 1.77, "learning_rate": 2.047027299780302e-07, "logits/chosen": -2.7149715423583984, "logits/rejected": -2.712569236755371, "logps/chosen": -1647.9619140625, "logps/rejected": -1534.022705078125, "loss": 0.6521, "rewards/accuracies": 0.625, "rewards/chosen": -0.36180227994918823, "rewards/margins": 0.08609117567539215, "rewards/rejected": -0.44789353013038635, "step": 6750 }, { "epoch": 1.77, "learning_rate": 2.0020063464509492e-07, "logits/chosen": -2.6872036457061768, "logits/rejected": -2.6867897510528564, "logps/chosen": -1633.364990234375, "logps/rejected": -1352.14111328125, "loss": 0.6569, "rewards/accuracies": 0.625, "rewards/chosen": -0.3849290907382965, "rewards/margins": 0.17690841853618622, "rewards/rejected": -0.5618374943733215, "step": 6760 }, { "epoch": 1.77, "learning_rate": 1.957465337480191e-07, "logits/chosen": -2.6997156143188477, "logits/rejected": -2.7141852378845215, "logps/chosen": -1325.0081787109375, "logps/rejected": -1167.6669921875, "loss": 0.6109, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4304627478122711, "rewards/margins": 0.1625189185142517, "rewards/rejected": -0.5929816365242004, "step": 6770 }, { "epoch": 1.77, "learning_rate": 1.9134052023928622e-07, "logits/chosen": -2.725517749786377, "logits/rejected": -2.714160203933716, "logps/chosen": -1786.7330322265625, "logps/rejected": -1655.354736328125, "loss": 0.6293, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.39226698875427246, "rewards/margins": 0.18630550801753998, "rewards/rejected": -0.5785725116729736, "step": 6780 }, { "epoch": 1.78, "learning_rate": 1.8698268606784392e-07, "logits/chosen": -2.6865007877349854, "logits/rejected": -2.686338424682617, "logps/chosen": -1578.5491943359375, "logps/rejected": -1273.3411865234375, "loss": 0.6167, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.35822856426239014, "rewards/margins": 0.25197267532348633, "rewards/rejected": -0.6102012395858765, "step": 6790 }, { "epoch": 1.78, "learning_rate": 1.826731221771866e-07, "logits/chosen": -2.6549112796783447, "logits/rejected": -2.6449966430664062, "logps/chosen": -1780.359375, "logps/rejected": -1476.2601318359375, "loss": 0.6136, "rewards/accuracies": 0.625, "rewards/chosen": -0.3461179733276367, "rewards/margins": 0.23195484280586243, "rewards/rejected": -0.5780729055404663, "step": 6800 }, { "epoch": 1.78, "eval_logits/chosen": -2.702434539794922, "eval_logits/rejected": -2.694024085998535, "eval_logps/chosen": -1604.7393798828125, "eval_logps/rejected": -1409.8861083984375, "eval_loss": 0.6445672512054443, "eval_rewards/accuracies": 0.6309523582458496, "eval_rewards/chosen": -0.4219011664390564, "eval_rewards/margins": 0.16031363606452942, "eval_rewards/rejected": -0.5822148323059082, "eval_runtime": 221.9949, "eval_samples_per_second": 9.009, "eval_steps_per_second": 0.284, "step": 6800 }, { "epoch": 1.78, "learning_rate": 1.7841191850345967e-07, "logits/chosen": -2.7168614864349365, "logits/rejected": -2.7038416862487793, "logps/chosen": -1741.0836181640625, "logps/rejected": -1419.6201171875, "loss": 0.6262, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.44240108132362366, "rewards/margins": 0.20116452872753143, "rewards/rejected": -0.6435655951499939, "step": 6810 }, { "epoch": 1.78, "learning_rate": 1.7419916397357905e-07, "logits/chosen": -2.6968274116516113, "logits/rejected": -2.6936259269714355, "logps/chosen": -1782.3134765625, "logps/rejected": -1579.941162109375, "loss": 0.6335, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.38433465361595154, "rewards/margins": 0.12539786100387573, "rewards/rejected": -0.5097325444221497, "step": 6820 }, { "epoch": 1.79, "learning_rate": 1.700349465033782e-07, "logits/chosen": -2.6470327377319336, "logits/rejected": -2.642458438873291, "logps/chosen": -1636.154296875, "logps/rejected": -1551.156982421875, "loss": 0.6415, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3473636507987976, "rewards/margins": 0.3328436017036438, "rewards/rejected": -0.6802071928977966, "step": 6830 }, { "epoch": 1.79, "learning_rate": 1.6591935299577227e-07, "logits/chosen": -2.7135472297668457, "logits/rejected": -2.7110230922698975, "logps/chosen": -1444.708984375, "logps/rejected": -1374.4935302734375, "loss": 0.6437, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4015157222747803, "rewards/margins": 0.1597764492034912, "rewards/rejected": -0.5612921118736267, "step": 6840 }, { "epoch": 1.79, "learning_rate": 1.6185246933894338e-07, "logits/chosen": -2.6957876682281494, "logits/rejected": -2.7021939754486084, "logps/chosen": -1620.359619140625, "logps/rejected": -1610.036865234375, "loss": 0.6066, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3133171796798706, "rewards/margins": 0.36562633514404297, "rewards/rejected": -0.6789435148239136, "step": 6850 }, { "epoch": 1.8, "learning_rate": 1.5783438040455097e-07, "logits/chosen": -2.7235379219055176, "logits/rejected": -2.7061972618103027, "logps/chosen": -1601.1849365234375, "logps/rejected": -1397.846923828125, "loss": 0.6452, "rewards/accuracies": 0.75, "rewards/chosen": -0.4237557351589203, "rewards/margins": 0.19110596179962158, "rewards/rejected": -0.6148617267608643, "step": 6860 }, { "epoch": 1.8, "learning_rate": 1.538651700459576e-07, "logits/chosen": -2.691257953643799, "logits/rejected": -2.6782546043395996, "logps/chosen": -1259.538818359375, "logps/rejected": -1211.7760009765625, "loss": 0.6237, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.46185874938964844, "rewards/margins": 0.11213238537311554, "rewards/rejected": -0.5739911198616028, "step": 6870 }, { "epoch": 1.8, "learning_rate": 1.4994492109648151e-07, "logits/chosen": -2.708364486694336, "logits/rejected": -2.6761398315429688, "logps/chosen": -1647.710693359375, "logps/rejected": -1118.4906005859375, "loss": 0.6351, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5295812487602234, "rewards/margins": 0.058738600462675095, "rewards/rejected": -0.5883198976516724, "step": 6880 }, { "epoch": 1.8, "learning_rate": 1.4607371536766695e-07, "logits/chosen": -2.714329481124878, "logits/rejected": -2.723923921585083, "logps/chosen": -1706.525146484375, "logps/rejected": -1742.4144287109375, "loss": 0.6241, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.40046390891075134, "rewards/margins": 0.2260729968547821, "rewards/rejected": -0.6265369057655334, "step": 6890 }, { "epoch": 1.81, "learning_rate": 1.4225163364757655e-07, "logits/chosen": -2.7081897258758545, "logits/rejected": -2.6999526023864746, "logps/chosen": -1839.845703125, "logps/rejected": -1673.989013671875, "loss": 0.6503, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35931825637817383, "rewards/margins": 0.24749819934368134, "rewards/rejected": -0.6068164706230164, "step": 6900 }, { "epoch": 1.81, "eval_logits/chosen": -2.7030396461486816, "eval_logits/rejected": -2.694699287414551, "eval_logps/chosen": -1604.7735595703125, "eval_logps/rejected": -1409.9207763671875, "eval_loss": 0.644517719745636, "eval_rewards/accuracies": 0.6349206566810608, "eval_rewards/chosen": -0.42224541306495667, "eval_rewards/margins": 0.16031552851200104, "eval_rewards/rejected": -0.5825609564781189, "eval_runtime": 221.8784, "eval_samples_per_second": 9.014, "eval_steps_per_second": 0.284, "step": 6900 }, { "epoch": 1.81, "learning_rate": 1.3847875569910462e-07, "logits/chosen": -2.725914716720581, "logits/rejected": -2.7312119007110596, "logps/chosen": -1705.2923583984375, "logps/rejected": -1547.054443359375, "loss": 0.6337, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3760010004043579, "rewards/margins": 0.27584725618362427, "rewards/rejected": -0.6518482565879822, "step": 6910 }, { "epoch": 1.81, "learning_rate": 1.3475516025831552e-07, "logits/chosen": -2.7132887840270996, "logits/rejected": -2.7062325477600098, "logps/chosen": -1419.587646484375, "logps/rejected": -1096.178466796875, "loss": 0.6266, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4135599136352539, "rewards/margins": 0.21963760256767273, "rewards/rejected": -0.633197546005249, "step": 6920 }, { "epoch": 1.81, "learning_rate": 1.310809250327974e-07, "logits/chosen": -2.7178874015808105, "logits/rejected": -2.687227487564087, "logps/chosen": -1344.69091796875, "logps/rejected": -1032.5728759765625, "loss": 0.6345, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.42153486609458923, "rewards/margins": 0.18064364790916443, "rewards/rejected": -0.6021785140037537, "step": 6930 }, { "epoch": 1.82, "learning_rate": 1.2745612670004153e-07, "logits/chosen": -2.6864330768585205, "logits/rejected": -2.6985878944396973, "logps/chosen": -1467.687744140625, "logps/rejected": -1250.664306640625, "loss": 0.6185, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4021376669406891, "rewards/margins": 0.30139079689979553, "rewards/rejected": -0.7035284638404846, "step": 6940 }, { "epoch": 1.82, "learning_rate": 1.2388084090584395e-07, "logits/chosen": -2.6969826221466064, "logits/rejected": -2.687559127807617, "logps/chosen": -1678.2601318359375, "logps/rejected": -1455.1302490234375, "loss": 0.6654, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5606560111045837, "rewards/margins": 0.09997959434986115, "rewards/rejected": -0.6606355905532837, "step": 6950 }, { "epoch": 1.82, "learning_rate": 1.2035514226272305e-07, "logits/chosen": -2.63489031791687, "logits/rejected": -2.646099805831909, "logps/chosen": -1444.5469970703125, "logps/rejected": -1406.29541015625, "loss": 0.6466, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43905216455459595, "rewards/margins": 0.15955057740211487, "rewards/rejected": -0.5986027717590332, "step": 6960 }, { "epoch": 1.82, "learning_rate": 1.1687910434836607e-07, "logits/chosen": -2.6887829303741455, "logits/rejected": -2.684683084487915, "logps/chosen": -1515.1943359375, "logps/rejected": -1243.822998046875, "loss": 0.6022, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3628450036048889, "rewards/margins": 0.33365586400032043, "rewards/rejected": -0.696500837802887, "step": 6970 }, { "epoch": 1.83, "learning_rate": 1.1345279970409128e-07, "logits/chosen": -2.6827330589294434, "logits/rejected": -2.6747491359710693, "logps/chosen": -1549.365966796875, "logps/rejected": -1341.9677734375, "loss": 0.639, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.497032105922699, "rewards/margins": 0.08594363182783127, "rewards/rejected": -0.5829757452011108, "step": 6980 }, { "epoch": 1.83, "learning_rate": 1.1007629983333629e-07, "logits/chosen": -2.6825199127197266, "logits/rejected": -2.682945728302002, "logps/chosen": -1833.2259521484375, "logps/rejected": -1173.13427734375, "loss": 0.6461, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.431971937417984, "rewards/margins": 0.20928311347961426, "rewards/rejected": -0.6412550210952759, "step": 6990 }, { "epoch": 1.83, "learning_rate": 1.067496752001626e-07, "logits/chosen": -2.7132515907287598, "logits/rejected": -2.7097418308258057, "logps/chosen": -1519.0521240234375, "logps/rejected": -1273.3426513671875, "loss": 0.6318, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4385862350463867, "rewards/margins": 0.07204887270927429, "rewards/rejected": -0.5106351375579834, "step": 7000 }, { "epoch": 1.83, "eval_logits/chosen": -2.7010436058044434, "eval_logits/rejected": -2.692471742630005, "eval_logps/chosen": -1604.7110595703125, "eval_logps/rejected": -1409.8387451171875, "eval_loss": 0.6445296406745911, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.4216185212135315, "eval_rewards/margins": 0.1601227968931198, "eval_rewards/rejected": -0.581741213798523, "eval_runtime": 222.0778, "eval_samples_per_second": 9.006, "eval_steps_per_second": 0.284, "step": 7000 }, { "epoch": 1.83, "learning_rate": 1.0347299522778909e-07, "logits/chosen": -2.683096408843994, "logits/rejected": -2.65643572807312, "logps/chosen": -1366.478759765625, "logps/rejected": -1104.1004638671875, "loss": 0.6471, "rewards/accuracies": 0.625, "rewards/chosen": -0.4307782053947449, "rewards/margins": 0.23234939575195312, "rewards/rejected": -0.663127601146698, "step": 7010 }, { "epoch": 1.84, "learning_rate": 1.0024632829713971e-07, "logits/chosen": -2.6967613697052, "logits/rejected": -2.6798155307769775, "logps/chosen": -1273.128173828125, "logps/rejected": -1150.955078125, "loss": 0.6447, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.39100247621536255, "rewards/margins": 0.12937206029891968, "rewards/rejected": -0.5203745365142822, "step": 7020 }, { "epoch": 1.84, "learning_rate": 9.706974174541889e-08, "logits/chosen": -2.6890580654144287, "logits/rejected": -2.6844520568847656, "logps/chosen": -1574.6275634765625, "logps/rejected": -1529.202880859375, "loss": 0.6431, "rewards/accuracies": 0.5, "rewards/chosen": -0.4391850531101227, "rewards/margins": 0.01236086804419756, "rewards/rejected": -0.4515458941459656, "step": 7030 }, { "epoch": 1.84, "learning_rate": 9.39433018647043e-08, "logits/chosen": -2.6954872608184814, "logits/rejected": -2.686889171600342, "logps/chosen": -1622.2542724609375, "logps/rejected": -1530.85888671875, "loss": 0.6221, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32484784722328186, "rewards/margins": 0.20420944690704346, "rewards/rejected": -0.5290572643280029, "step": 7040 }, { "epoch": 1.85, "learning_rate": 9.086707390056543e-08, "logits/chosen": -2.716411828994751, "logits/rejected": -2.7104315757751465, "logps/chosen": -1577.9422607421875, "logps/rejected": -1285.207763671875, "loss": 0.6442, "rewards/accuracies": 0.625, "rewards/chosen": -0.37132635712623596, "rewards/margins": 0.163702130317688, "rewards/rejected": -0.5350284576416016, "step": 7050 }, { "epoch": 1.85, "learning_rate": 8.784112205070083e-08, "logits/chosen": -2.6795337200164795, "logits/rejected": -2.689896583557129, "logps/chosen": -1706.0953369140625, "logps/rejected": -1669.0413818359375, "loss": 0.6446, "rewards/accuracies": 0.625, "rewards/chosen": -0.45116591453552246, "rewards/margins": 0.21349339187145233, "rewards/rejected": -0.6646592617034912, "step": 7060 }, { "epoch": 1.85, "learning_rate": 8.486550946359779e-08, "logits/chosen": -2.7108492851257324, "logits/rejected": -2.698906421661377, "logps/chosen": -1502.298095703125, "logps/rejected": -1157.237060546875, "loss": 0.6307, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.48375552892684937, "rewards/margins": 0.08426637947559357, "rewards/rejected": -0.5680218935012817, "step": 7070 }, { "epoch": 1.85, "learning_rate": 8.194029823721556e-08, "logits/chosen": -2.697640895843506, "logits/rejected": -2.6725358963012695, "logps/chosen": -1769.990234375, "logps/rejected": -1637.005126953125, "loss": 0.6669, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3244650363922119, "rewards/margins": 0.21794219315052032, "rewards/rejected": -0.542407214641571, "step": 7080 }, { "epoch": 1.86, "learning_rate": 7.906554941768896e-08, "logits/chosen": -2.7125067710876465, "logits/rejected": -2.7144935131073, "logps/chosen": -1664.8450927734375, "logps/rejected": -1524.383544921875, "loss": 0.6529, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.49117860198020935, "rewards/margins": 0.029221346601843834, "rewards/rejected": -0.5203999876976013, "step": 7090 }, { "epoch": 1.86, "learning_rate": 7.624132299805575e-08, "logits/chosen": -2.6807377338409424, "logits/rejected": -2.694531202316284, "logps/chosen": -1521.4224853515625, "logps/rejected": -1667.6331787109375, "loss": 0.6493, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5217372179031372, "rewards/margins": 0.11972247064113617, "rewards/rejected": -0.641459584236145, "step": 7100 }, { "epoch": 1.86, "eval_logits/chosen": -2.702463388442993, "eval_logits/rejected": -2.6940252780914307, "eval_logps/chosen": -1604.70263671875, "eval_logps/rejected": -1409.81787109375, "eval_loss": 0.6445424556732178, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.42153695225715637, "eval_rewards/margins": 0.15999405086040497, "eval_rewards/rejected": -0.5815309882164001, "eval_runtime": 221.796, "eval_samples_per_second": 9.017, "eval_steps_per_second": 0.284, "step": 7100 }, { "epoch": 1.86, "learning_rate": 7.346767791700127e-08, "logits/chosen": -2.706827163696289, "logits/rejected": -2.714289665222168, "logps/chosen": -1348.616455078125, "logps/rejected": -1254.7540283203125, "loss": 0.6332, "rewards/accuracies": 0.625, "rewards/chosen": -0.47383370995521545, "rewards/margins": 0.15515919029712677, "rewards/rejected": -0.6289928555488586, "step": 7110 }, { "epoch": 1.86, "learning_rate": 7.07446720576327e-08, "logits/chosen": -2.7369492053985596, "logits/rejected": -2.73970890045166, "logps/chosen": -1749.2347412109375, "logps/rejected": -1596.642333984375, "loss": 0.6375, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.46583041548728943, "rewards/margins": 0.057837147265672684, "rewards/rejected": -0.523667573928833, "step": 7120 }, { "epoch": 1.87, "learning_rate": 6.807236224626701e-08, "logits/chosen": -2.689509868621826, "logits/rejected": -2.672372341156006, "logps/chosen": -1553.8916015625, "logps/rejected": -1422.921875, "loss": 0.6388, "rewards/accuracies": 0.625, "rewards/chosen": -0.4380861222743988, "rewards/margins": 0.23138687014579773, "rewards/rejected": -0.6694729924201965, "step": 7130 }, { "epoch": 1.87, "learning_rate": 6.545080425124888e-08, "logits/chosen": -2.7350094318389893, "logits/rejected": -2.7102842330932617, "logps/chosen": -1565.059814453125, "logps/rejected": -983.6837768554688, "loss": 0.6344, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3876848518848419, "rewards/margins": 0.2866832911968231, "rewards/rejected": -0.6743682026863098, "step": 7140 }, { "epoch": 1.87, "learning_rate": 6.288005278178382e-08, "logits/chosen": -2.7340786457061768, "logits/rejected": -2.7070319652557373, "logps/chosen": -1651.146240234375, "logps/rejected": -1310.5484619140625, "loss": 0.6414, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4129902720451355, "rewards/margins": 0.18922848999500275, "rewards/rejected": -0.6022188067436218, "step": 7150 }, { "epoch": 1.87, "learning_rate": 6.036016148679825e-08, "logits/chosen": -2.685917377471924, "logits/rejected": -2.673793315887451, "logps/chosen": -1528.411865234375, "logps/rejected": -1308.5904541015625, "loss": 0.6236, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4521363377571106, "rewards/margins": 0.14088140428066254, "rewards/rejected": -0.5930176973342896, "step": 7160 }, { "epoch": 1.88, "learning_rate": 5.7891182953819235e-08, "logits/chosen": -2.68344783782959, "logits/rejected": -2.680868625640869, "logps/chosen": -1653.4996337890625, "logps/rejected": -1406.031494140625, "loss": 0.6425, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5184036493301392, "rewards/margins": 0.12173338234424591, "rewards/rejected": -0.6401370167732239, "step": 7170 }, { "epoch": 1.88, "learning_rate": 5.547316870787689e-08, "logits/chosen": -2.7209701538085938, "logits/rejected": -2.7010300159454346, "logps/chosen": -1677.9332275390625, "logps/rejected": -1209.544677734375, "loss": 0.6339, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5062988996505737, "rewards/margins": 0.18664419651031494, "rewards/rejected": -0.6929429769515991, "step": 7180 }, { "epoch": 1.88, "learning_rate": 5.310616921042927e-08, "logits/chosen": -2.660794734954834, "logits/rejected": -2.6702022552490234, "logps/chosen": -1407.279052734375, "logps/rejected": -1222.977294921875, "loss": 0.6418, "rewards/accuracies": 0.625, "rewards/chosen": -0.39660215377807617, "rewards/margins": 0.10538413375616074, "rewards/rejected": -0.5019862651824951, "step": 7190 }, { "epoch": 1.88, "learning_rate": 5.079023385830939e-08, "logits/chosen": -2.6666197776794434, "logits/rejected": -2.6605982780456543, "logps/chosen": -1460.6802978515625, "logps/rejected": -1327.25244140625, "loss": 0.6292, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31100255250930786, "rewards/margins": 0.2334582507610321, "rewards/rejected": -0.5444608926773071, "step": 7200 }, { "epoch": 1.88, "eval_logits/chosen": -2.702709674835205, "eval_logits/rejected": -2.69431209564209, "eval_logps/chosen": -1604.719482421875, "eval_logps/rejected": -1409.822265625, "eval_loss": 0.6445853114128113, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.4217035472393036, "eval_rewards/margins": 0.15987294912338257, "eval_rewards/rejected": -0.5815765261650085, "eval_runtime": 221.9519, "eval_samples_per_second": 9.011, "eval_steps_per_second": 0.284, "step": 7200 }, { "epoch": 1.89, "learning_rate": 4.8525410982695476e-08, "logits/chosen": -2.654919147491455, "logits/rejected": -2.6503231525421143, "logps/chosen": -1528.534423828125, "logps/rejected": -1125.8482666015625, "loss": 0.6448, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33400973677635193, "rewards/margins": 0.23223035037517548, "rewards/rejected": -0.5662400722503662, "step": 7210 }, { "epoch": 1.89, "learning_rate": 4.6311747848099e-08, "logits/chosen": -2.6991829872131348, "logits/rejected": -2.6920909881591797, "logps/chosen": -1656.469970703125, "logps/rejected": -1294.0587158203125, "loss": 0.6691, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.45552200078964233, "rewards/margins": 0.07368168979883194, "rewards/rejected": -0.5292037725448608, "step": 7220 }, { "epoch": 1.89, "learning_rate": 4.4149290651382405e-08, "logits/chosen": -2.652975082397461, "logits/rejected": -2.6496188640594482, "logps/chosen": -1439.092041015625, "logps/rejected": -1270.6805419921875, "loss": 0.6202, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.49305492639541626, "rewards/margins": 0.0813017338514328, "rewards/rejected": -0.5743566155433655, "step": 7230 }, { "epoch": 1.89, "learning_rate": 4.203808452079211e-08, "logits/chosen": -2.737536907196045, "logits/rejected": -2.713040828704834, "logps/chosen": -1595.1773681640625, "logps/rejected": -1512.947021484375, "loss": 0.6126, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3274495005607605, "rewards/margins": 0.3223855197429657, "rewards/rejected": -0.6498350501060486, "step": 7240 }, { "epoch": 1.9, "learning_rate": 3.9978173515018427e-08, "logits/chosen": -2.7084734439849854, "logits/rejected": -2.69303035736084, "logps/chosen": -1404.4498291015625, "logps/rejected": -1423.9259033203125, "loss": 0.6338, "rewards/accuracies": 0.625, "rewards/chosen": -0.5082755088806152, "rewards/margins": 0.11508085578680038, "rewards/rejected": -0.623356282711029, "step": 7250 }, { "epoch": 1.9, "learning_rate": 3.7969600622274614e-08, "logits/chosen": -2.7099738121032715, "logits/rejected": -2.6851272583007812, "logps/chosen": -1558.6534423828125, "logps/rejected": -1573.78076171875, "loss": 0.6469, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38475877046585083, "rewards/margins": 0.12393616139888763, "rewards/rejected": -0.5086949467658997, "step": 7260 }, { "epoch": 1.9, "learning_rate": 3.601240775940151e-08, "logits/chosen": -2.683004856109619, "logits/rejected": -2.681644916534424, "logps/chosen": -1144.97802734375, "logps/rejected": -1189.7843017578125, "loss": 0.6497, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4038308560848236, "rewards/margins": 0.14985907077789307, "rewards/rejected": -0.5536898970603943, "step": 7270 }, { "epoch": 1.91, "learning_rate": 3.410663577099071e-08, "logits/chosen": -2.709052562713623, "logits/rejected": -2.668560266494751, "logps/chosen": -1598.82861328125, "logps/rejected": -1156.0931396484375, "loss": 0.6498, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44105497002601624, "rewards/margins": 0.27089494466781616, "rewards/rejected": -0.7119500041007996, "step": 7280 }, { "epoch": 1.91, "learning_rate": 3.2252324428534986e-08, "logits/chosen": -2.734286069869995, "logits/rejected": -2.7319412231445312, "logps/chosen": -1638.3583984375, "logps/rejected": -1431.2901611328125, "loss": 0.6428, "rewards/accuracies": 0.625, "rewards/chosen": -0.44060295820236206, "rewards/margins": 0.13731783628463745, "rewards/rejected": -0.5779208540916443, "step": 7290 }, { "epoch": 1.91, "learning_rate": 3.0449512429594486e-08, "logits/chosen": -2.7285077571868896, "logits/rejected": -2.7121500968933105, "logps/chosen": -1547.3211669921875, "logps/rejected": -1328.563720703125, "loss": 0.625, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39875486493110657, "rewards/margins": 0.27655380964279175, "rewards/rejected": -0.6753085851669312, "step": 7300 }, { "epoch": 1.91, "eval_logits/chosen": -2.702165365219116, "eval_logits/rejected": -2.693725109100342, "eval_logps/chosen": -1604.7012939453125, "eval_logps/rejected": -1409.8218994140625, "eval_loss": 0.6445257067680359, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": -0.42152103781700134, "eval_rewards/margins": 0.16004998981952667, "eval_rewards/rejected": -0.5815710425376892, "eval_runtime": 221.9011, "eval_samples_per_second": 9.013, "eval_steps_per_second": 0.284, "step": 7300 }, { "epoch": 1.91, "learning_rate": 2.8698237396992956e-08, "logits/chosen": -2.7125720977783203, "logits/rejected": -2.710803508758545, "logps/chosen": -1958.1751708984375, "logps/rejected": -1697.903076171875, "loss": 0.6355, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3638436198234558, "rewards/margins": 0.1362782120704651, "rewards/rejected": -0.5001217722892761, "step": 7310 }, { "epoch": 1.92, "learning_rate": 2.6998535878030584e-08, "logits/chosen": -2.687643527984619, "logits/rejected": -2.6909823417663574, "logps/chosen": -1759.569091796875, "logps/rejected": -1552.631591796875, "loss": 0.6333, "rewards/accuracies": 0.625, "rewards/chosen": -0.39130860567092896, "rewards/margins": 0.19722767174243927, "rewards/rejected": -0.5885363221168518, "step": 7320 }, { "epoch": 1.92, "learning_rate": 2.535044334372072e-08, "logits/chosen": -2.698178768157959, "logits/rejected": -2.7111730575561523, "logps/chosen": -1635.5369873046875, "logps/rejected": -1726.328369140625, "loss": 0.649, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42986369132995605, "rewards/margins": 0.16456125676631927, "rewards/rejected": -0.5944249033927917, "step": 7330 }, { "epoch": 1.92, "learning_rate": 2.3753994188051853e-08, "logits/chosen": -2.7270450592041016, "logits/rejected": -2.7080130577087402, "logps/chosen": -1637.684814453125, "logps/rejected": -1292.521484375, "loss": 0.615, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.38862666487693787, "rewards/margins": 0.2333899438381195, "rewards/rejected": -0.6220166087150574, "step": 7340 }, { "epoch": 1.92, "learning_rate": 2.220922172726764e-08, "logits/chosen": -2.6930315494537354, "logits/rejected": -2.685628652572632, "logps/chosen": -1648.4945068359375, "logps/rejected": -1635.29248046875, "loss": 0.6388, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.4903673529624939, "rewards/margins": 0.09882920235395432, "rewards/rejected": -0.5891965627670288, "step": 7350 }, { "epoch": 1.93, "learning_rate": 2.071615819917244e-08, "logits/chosen": -2.726003885269165, "logits/rejected": -2.7233633995056152, "logps/chosen": -1906.630615234375, "logps/rejected": -1663.8984375, "loss": 0.6585, "rewards/accuracies": 0.625, "rewards/chosen": -0.3871636986732483, "rewards/margins": 0.10992630571126938, "rewards/rejected": -0.49709001183509827, "step": 7360 }, { "epoch": 1.93, "learning_rate": 1.9274834762459393e-08, "logits/chosen": -2.6870107650756836, "logits/rejected": -2.6754543781280518, "logps/chosen": -1455.197509765625, "logps/rejected": -1335.19140625, "loss": 0.6257, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4132939279079437, "rewards/margins": 0.26970240473747253, "rewards/rejected": -0.682996392250061, "step": 7370 }, { "epoch": 1.93, "learning_rate": 1.7885281496058947e-08, "logits/chosen": -2.7087411880493164, "logits/rejected": -2.6738803386688232, "logps/chosen": -1738.7806396484375, "logps/rejected": -1245.742919921875, "loss": 0.6571, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3533250391483307, "rewards/margins": 0.21966774761676788, "rewards/rejected": -0.5729928016662598, "step": 7380 }, { "epoch": 1.93, "learning_rate": 1.654752739851134e-08, "logits/chosen": -2.7014052867889404, "logits/rejected": -2.693253993988037, "logps/chosen": -1632.851806640625, "logps/rejected": -1454.01416015625, "loss": 0.6522, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.36678558588027954, "rewards/margins": 0.1973138153553009, "rewards/rejected": -0.5640994310379028, "step": 7390 }, { "epoch": 1.94, "learning_rate": 1.526160038736235e-08, "logits/chosen": -2.6802146434783936, "logits/rejected": -2.6759490966796875, "logps/chosen": -1576.840576171875, "logps/rejected": -1372.6458740234375, "loss": 0.6306, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5172873139381409, "rewards/margins": 0.12491671741008759, "rewards/rejected": -0.6422039270401001, "step": 7400 }, { "epoch": 1.94, "eval_logits/chosen": -2.702148914337158, "eval_logits/rejected": -2.6937015056610107, "eval_logps/chosen": -1604.724365234375, "eval_logps/rejected": -1409.8013916015625, "eval_loss": 0.6445860862731934, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.4217517077922821, "eval_rewards/margins": 0.15961501002311707, "eval_rewards/rejected": -0.5813668370246887, "eval_runtime": 221.8611, "eval_samples_per_second": 9.015, "eval_steps_per_second": 0.284, "step": 7400 }, { "epoch": 1.94, "learning_rate": 1.402752729857959e-08, "logits/chosen": -2.701080322265625, "logits/rejected": -2.704324722290039, "logps/chosen": -1448.97900390625, "logps/rejected": -1565.101806640625, "loss": 0.6573, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.45339909195899963, "rewards/margins": 0.009469692595303059, "rewards/rejected": -0.4628687798976898, "step": 7410 }, { "epoch": 1.94, "learning_rate": 1.2845333885992683e-08, "logits/chosen": -2.6895458698272705, "logits/rejected": -2.691756010055542, "logps/chosen": -1508.894287109375, "logps/rejected": -1402.9140625, "loss": 0.6277, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39680013060569763, "rewards/margins": 0.17420104146003723, "rewards/rejected": -0.5710011720657349, "step": 7420 }, { "epoch": 1.94, "learning_rate": 1.171504482075675e-08, "logits/chosen": -2.687668800354004, "logits/rejected": -2.6926398277282715, "logps/chosen": -1556.4830322265625, "logps/rejected": -1661.039794921875, "loss": 0.613, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4860979914665222, "rewards/margins": 0.2525389492511749, "rewards/rejected": -0.7386370301246643, "step": 7430 }, { "epoch": 1.95, "learning_rate": 1.0636683690836147e-08, "logits/chosen": -2.7024621963500977, "logits/rejected": -2.6893680095672607, "logps/chosen": -1638.73046875, "logps/rejected": -1485.549072265625, "loss": 0.6639, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5557451248168945, "rewards/margins": -0.012395946308970451, "rewards/rejected": -0.5433492064476013, "step": 7440 }, { "epoch": 1.95, "learning_rate": 9.610273000513203e-09, "logits/chosen": -2.6708273887634277, "logits/rejected": -2.675523281097412, "logps/chosen": -1538.5250244140625, "logps/rejected": -1047.2149658203125, "loss": 0.6475, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4156258702278137, "rewards/margins": 0.1110345870256424, "rewards/rejected": -0.5266603827476501, "step": 7450 }, { "epoch": 1.95, "learning_rate": 8.635834169918312e-09, "logits/chosen": -2.7205698490142822, "logits/rejected": -2.7201004028320312, "logps/chosen": -1408.6478271484375, "logps/rejected": -1399.743408203125, "loss": 0.6223, "rewards/accuracies": 0.625, "rewards/chosen": -0.4861675798892975, "rewards/margins": 0.19877155125141144, "rewards/rejected": -0.6849390864372253, "step": 7460 }, { "epoch": 1.95, "learning_rate": 7.713387534582506e-09, "logits/chosen": -2.6804215908050537, "logits/rejected": -2.6637444496154785, "logps/chosen": -1541.433349609375, "logps/rejected": -1571.2962646484375, "loss": 0.6215, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3475147783756256, "rewards/margins": 0.16994670033454895, "rewards/rejected": -0.5174614787101746, "step": 7470 }, { "epoch": 1.96, "learning_rate": 6.84295234501392e-09, "logits/chosen": -2.7089502811431885, "logits/rejected": -2.712880849838257, "logps/chosen": -1744.5091552734375, "logps/rejected": -1501.75244140625, "loss": 0.624, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4259180426597595, "rewards/margins": 0.2688801884651184, "rewards/rejected": -0.6947982907295227, "step": 7480 }, { "epoch": 1.96, "learning_rate": 6.024546766295325e-09, "logits/chosen": -2.668334484100342, "logits/rejected": -2.6527392864227295, "logps/chosen": -1190.046142578125, "logps/rejected": -1228.147216796875, "loss": 0.6477, "rewards/accuracies": 0.625, "rewards/chosen": -0.5390468835830688, "rewards/margins": 0.11457918584346771, "rewards/rejected": -0.653626024723053, "step": 7490 }, { "epoch": 1.96, "learning_rate": 5.2581878777049895e-09, "logits/chosen": -2.663644552230835, "logits/rejected": -2.6548047065734863, "logps/chosen": -1755.512451171875, "logps/rejected": -1462.0538330078125, "loss": 0.6446, "rewards/accuracies": 0.625, "rewards/chosen": -0.4531777501106262, "rewards/margins": 0.19328074157238007, "rewards/rejected": -0.6464585065841675, "step": 7500 }, { "epoch": 1.96, "eval_logits/chosen": -2.702141284942627, "eval_logits/rejected": -2.6936912536621094, "eval_logps/chosen": -1604.7235107421875, "eval_logps/rejected": -1409.80029296875, "eval_loss": 0.6445866227149963, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.4217440187931061, "eval_rewards/margins": 0.15961241722106934, "eval_rewards/rejected": -0.5813564658164978, "eval_runtime": 221.8894, "eval_samples_per_second": 9.013, "eval_steps_per_second": 0.284, "step": 7500 }, { "epoch": 1.97, "learning_rate": 4.543891672361411e-09, "logits/chosen": -2.713366746902466, "logits/rejected": -2.734687328338623, "logps/chosen": -1524.438232421875, "logps/rejected": -1415.349365234375, "loss": 0.6461, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3279759883880615, "rewards/margins": 0.17746631801128387, "rewards/rejected": -0.5054423213005066, "step": 7510 }, { "epoch": 1.97, "learning_rate": 3.881673056887747e-09, "logits/chosen": -2.6779403686523438, "logits/rejected": -2.6494932174682617, "logps/chosen": -1766.4049072265625, "logps/rejected": -1750.7711181640625, "loss": 0.6198, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4086696207523346, "rewards/margins": 0.3214671015739441, "rewards/rejected": -0.7301367521286011, "step": 7520 }, { "epoch": 1.97, "learning_rate": 3.2715458511023425e-09, "logits/chosen": -2.7254676818847656, "logits/rejected": -2.7072927951812744, "logps/chosen": -1588.3248291015625, "logps/rejected": -1312.022705078125, "loss": 0.6114, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3616897463798523, "rewards/margins": 0.27325281500816345, "rewards/rejected": -0.6349425315856934, "step": 7530 }, { "epoch": 1.97, "learning_rate": 2.7135227877289617e-09, "logits/chosen": -2.699939727783203, "logits/rejected": -2.6992363929748535, "logps/chosen": -1314.650146484375, "logps/rejected": -1154.2691650390625, "loss": 0.6657, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.4900095462799072, "rewards/margins": 0.06088308244943619, "rewards/rejected": -0.55089271068573, "step": 7540 }, { "epoch": 1.98, "learning_rate": 2.2076155121328326e-09, "logits/chosen": -2.7241501808166504, "logits/rejected": -2.705294609069824, "logps/chosen": -1565.5633544921875, "logps/rejected": -1411.188720703125, "loss": 0.6372, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.36127161979675293, "rewards/margins": 0.26764652132987976, "rewards/rejected": -0.6289182305335999, "step": 7550 }, { "epoch": 1.98, "learning_rate": 1.7538345820755641e-09, "logits/chosen": -2.730531692504883, "logits/rejected": -2.7179620265960693, "logps/chosen": -1499.7626953125, "logps/rejected": -1195.4996337890625, "loss": 0.6541, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4179004728794098, "rewards/margins": 0.2592325508594513, "rewards/rejected": -0.6771329641342163, "step": 7560 }, { "epoch": 1.98, "learning_rate": 1.3521894674961567e-09, "logits/chosen": -2.6926231384277344, "logits/rejected": -2.711402177810669, "logps/chosen": -1196.3023681640625, "logps/rejected": -1280.1029052734375, "loss": 0.6327, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.47418227791786194, "rewards/margins": 0.17429625988006592, "rewards/rejected": -0.6484785676002502, "step": 7570 }, { "epoch": 1.98, "learning_rate": 1.0026885503131023e-09, "logits/chosen": -2.729646921157837, "logits/rejected": -2.731315851211548, "logps/chosen": -1656.604248046875, "logps/rejected": -1487.271728515625, "loss": 0.6553, "rewards/accuracies": 0.625, "rewards/chosen": -0.5302258729934692, "rewards/margins": 0.10667018592357635, "rewards/rejected": -0.6368960738182068, "step": 7580 }, { "epoch": 1.99, "learning_rate": 7.053391242492491e-10, "logits/chosen": -2.6965746879577637, "logits/rejected": -2.7005808353424072, "logps/chosen": -1340.211181640625, "logps/rejected": -1152.303955078125, "loss": 0.6525, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4404354691505432, "rewards/margins": 0.14564061164855957, "rewards/rejected": -0.5860761404037476, "step": 7590 }, { "epoch": 1.99, "learning_rate": 4.6014739467997725e-10, "logits/chosen": -2.7206387519836426, "logits/rejected": -2.7116284370422363, "logps/chosen": -1512.256103515625, "logps/rejected": -1473.5595703125, "loss": 0.6394, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44393259286880493, "rewards/margins": 0.21005916595458984, "rewards/rejected": -0.6539917588233948, "step": 7600 }, { "epoch": 1.99, "eval_logits/chosen": -2.702141284942627, "eval_logits/rejected": -2.6936912536621094, "eval_logps/chosen": -1604.7235107421875, "eval_logps/rejected": -1409.80029296875, "eval_loss": 0.6445866227149963, "eval_rewards/accuracies": 0.6289682388305664, "eval_rewards/chosen": -0.4217440187931061, "eval_rewards/margins": 0.15961241722106934, "eval_rewards/rejected": -0.5813564658164978, "eval_runtime": 222.104, "eval_samples_per_second": 9.005, "eval_steps_per_second": 0.284, "step": 7600 }, { "epoch": 1.99, "learning_rate": 2.671184785033032e-10, "logits/chosen": -2.687058925628662, "logits/rejected": -2.6820104122161865, "logps/chosen": -1634.839599609375, "logps/rejected": -1303.875244140625, "loss": 0.6623, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.48275700211524963, "rewards/margins": -0.0027821571566164494, "rewards/rejected": -0.4799748957157135, "step": 7610 }, { "epoch": 1.99, "learning_rate": 1.2625640403302054e-10, "logits/chosen": -2.698061466217041, "logits/rejected": -2.6827805042266846, "logps/chosen": -1436.16748046875, "logps/rejected": -1356.511962890625, "loss": 0.6492, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.47642627358436584, "rewards/margins": 0.1004827618598938, "rewards/rejected": -0.5769090056419373, "step": 7620 }, { "epoch": 2.0, "learning_rate": 3.756411091515588e-11, "logits/chosen": -2.6850998401641846, "logits/rejected": -2.6807219982147217, "logps/chosen": -1597.2109375, "logps/rejected": -1336.142578125, "loss": 0.6746, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43785446882247925, "rewards/margins": 0.1036500483751297, "rewards/rejected": -0.5415045022964478, "step": 7630 }, { "epoch": 2.0, "learning_rate": 1.0434500657963143e-12, "logits/chosen": -2.703892946243286, "logits/rejected": -2.7021007537841797, "logps/chosen": -1478.006591796875, "logps/rejected": -1315.51318359375, "loss": 0.6228, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37986981868743896, "rewards/margins": 0.44364243745803833, "rewards/rejected": -0.8235122561454773, "step": 7640 }, { "epoch": 2.0, "step": 7642, "total_flos": 0.0, "train_loss": 0.6517634629204897, "train_runtime": 44544.264, "train_samples_per_second": 2.745, "train_steps_per_second": 0.172 } ], "logging_steps": 10, "max_steps": 7642, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }