diff --git "a/checkpoint-2400/trainer_state.json" "b/checkpoint-2400/trainer_state.json" deleted file mode 100644--- "a/checkpoint-2400/trainer_state.json" +++ /dev/null @@ -1,33621 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9073724007561437, - "eval_steps": 500, - "global_step": 2400, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -2.754106044769287, - "logits/rejected": -4.593050479888916, - "logps/chosen": -319.7142333984375, - "logps/rejected": -104.09795379638672, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 1 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -3.1771628856658936, - "logits/rejected": -3.680676221847534, - "logps/chosen": -397.6637878417969, - "logps/rejected": -373.21380615234375, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 2 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -2.5064761638641357, - "logits/rejected": -3.782898426055908, - "logps/chosen": -254.76165771484375, - "logps/rejected": -186.08839416503906, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 3 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -2.2775089740753174, - "logits/rejected": -3.833817958831787, - "logps/chosen": -304.353271484375, - "logps/rejected": -213.43109130859375, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 4 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -5.249998092651367, - "logits/rejected": -2.7008368968963623, - "logps/chosen": -359.8149108886719, - "logps/rejected": -1406.999755859375, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 5 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -1.0565773248672485, - "logits/rejected": -5.632026195526123, - "logps/chosen": -387.4065856933594, - "logps/rejected": -51.38404846191406, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 6 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -4.299613952636719, - "logits/rejected": -2.990806818008423, - "logps/chosen": -245.5072784423828, - "logps/rejected": -565.55078125, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 7 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -5.084601402282715, - "logits/rejected": -2.5835258960723877, - "logps/chosen": -526.484619140625, - "logps/rejected": -2664.6533203125, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 8 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -2.468440294265747, - "logits/rejected": -5.035132884979248, - "logps/chosen": -462.2491149902344, - "logps/rejected": -33.14249801635742, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 9 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -5.792623996734619, - "logits/rejected": -4.809487819671631, - "logps/chosen": -285.28399658203125, - "logps/rejected": -483.89483642578125, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 10 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -2.0572822093963623, - "logits/rejected": -2.9805736541748047, - "logps/chosen": -533.1488037109375, - "logps/rejected": -438.1114501953125, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 11 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -2.287609100341797, - "logits/rejected": -1.9588426351547241, - "logps/chosen": -401.70391845703125, - "logps/rejected": -587.4236450195312, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 12 - }, - { - "epoch": 0.0, - "learning_rate": 0.0, - "logits/chosen": -7.634253978729248, - "logits/rejected": -6.030396461486816, - "logps/chosen": -62.753334045410156, - "logps/rejected": -334.4805603027344, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 13 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -6.540445804595947, - "logits/rejected": -3.5662786960601807, - "logps/chosen": -639.546875, - "logps/rejected": -2772.361083984375, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 14 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -3.831629514694214, - "logits/rejected": -4.850943088531494, - "logps/chosen": -353.9005432128906, - "logps/rejected": -376.6553649902344, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 15 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -7.671483993530273, - "logits/rejected": -3.282883644104004, - "logps/chosen": -626.7948608398438, - "logps/rejected": -2634.756591796875, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 16 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -3.201785087585449, - "logits/rejected": -4.0167059898376465, - "logps/chosen": -261.7278137207031, - "logps/rejected": -62.94657897949219, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 17 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -6.3873443603515625, - "logits/rejected": -5.318516254425049, - "logps/chosen": -235.19747924804688, - "logps/rejected": -996.428955078125, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 18 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -1.6961345672607422, - "logits/rejected": -0.9290814995765686, - "logps/chosen": -253.11538696289062, - "logps/rejected": -441.8524169921875, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 19 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -3.5792934894561768, - "logits/rejected": -7.104396343231201, - "logps/chosen": -948.2760009765625, - "logps/rejected": -188.777099609375, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 20 - }, - { - "epoch": 0.01, - "learning_rate": 0.0, - "logits/chosen": -3.410085678100586, - "logits/rejected": -4.809041500091553, - "logps/chosen": -314.84613037109375, - "logps/rejected": -379.8197021484375, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 21 - }, - { - "epoch": 0.01, - "learning_rate": 2.0000000000000003e-06, - "logits/chosen": -4.367404460906982, - "logits/rejected": -2.4189741611480713, - "logps/chosen": -304.67376708984375, - "logps/rejected": -701.9650268554688, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 22 - }, - { - "epoch": 0.01, - "learning_rate": 4.000000000000001e-06, - "logits/chosen": -1.2450003623962402, - "logits/rejected": -4.998924255371094, - "logps/chosen": -516.7353515625, - "logps/rejected": -90.0650634765625, - "loss": 0.6931, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 23 - }, - { - "epoch": 0.01, - "learning_rate": 6e-06, - "logits/chosen": -3.260876417160034, - "logits/rejected": -3.143871545791626, - "logps/chosen": -213.996337890625, - "logps/rejected": -251.9791259765625, - "loss": 0.6906, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.004101562779396772, - "rewards/margins": 0.00343933142721653, - "rewards/rejected": 0.0006622314685955644, - "step": 24 - }, - { - "epoch": 0.01, - "learning_rate": 8.000000000000001e-06, - "logits/chosen": -4.295136451721191, - "logits/rejected": -6.048272609710693, - "logps/chosen": -628.1040649414062, - "logps/rejected": -241.5625, - "loss": 0.6883, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.012615966610610485, - "rewards/margins": 0.01692657545208931, - "rewards/rejected": -0.00431060791015625, - "step": 25 - }, - { - "epoch": 0.01, - "learning_rate": 1e-05, - "logits/chosen": -3.1094841957092285, - "logits/rejected": -4.578867435455322, - "logps/chosen": -573.9639892578125, - "logps/rejected": -311.6768493652344, - "loss": 0.6858, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.01740722730755806, - "rewards/margins": 0.02233581617474556, - "rewards/rejected": -0.0049285888671875, - "step": 26 - }, - { - "epoch": 0.01, - "learning_rate": 1e-05, - "logits/chosen": -3.15834379196167, - "logits/rejected": -6.178736209869385, - "logps/chosen": -547.9263305664062, - "logps/rejected": -87.46117401123047, - "loss": 0.6704, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.06633911281824112, - "rewards/margins": 0.06366043537855148, - "rewards/rejected": 0.0026786804664880037, - "step": 27 - }, - { - "epoch": 0.01, - "learning_rate": 1.2e-05, - "logits/chosen": -2.3840253353118896, - "logits/rejected": -1.4133598804473877, - "logps/chosen": -245.08758544921875, - "logps/rejected": -424.10626220703125, - "loss": 0.6679, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.007951355539262295, - "rewards/margins": 0.03429412841796875, - "rewards/rejected": -0.02634277381002903, - "step": 28 - }, - { - "epoch": 0.01, - "learning_rate": 1.4e-05, - "logits/chosen": -4.299242973327637, - "logits/rejected": -1.9464861154556274, - "logps/chosen": -196.5806884765625, - "logps/rejected": -598.736572265625, - "loss": 0.6535, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.002363586565479636, - "rewards/margins": 0.08014068752527237, - "rewards/rejected": -0.07777710258960724, - "step": 29 - }, - { - "epoch": 0.01, - "learning_rate": 1.6000000000000003e-05, - "logits/chosen": -5.861788272857666, - "logits/rejected": -3.32490611076355, - "logps/chosen": -172.5289306640625, - "logps/rejected": -670.6824951171875, - "loss": 0.6563, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.05348968505859375, - "rewards/margins": 0.03555755317211151, - "rewards/rejected": 0.01793213002383709, - "step": 30 - }, - { - "epoch": 0.01, - "learning_rate": 1.8e-05, - "logits/chosen": -3.3046510219573975, - "logits/rejected": -2.9898486137390137, - "logps/chosen": -417.6094665527344, - "logps/rejected": -598.5953979492188, - "loss": 0.6183, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12948913872241974, - "rewards/margins": 0.1696319580078125, - "rewards/rejected": -0.04014282301068306, - "step": 31 - }, - { - "epoch": 0.01, - "learning_rate": 2e-05, - "logits/chosen": -2.9050347805023193, - "logits/rejected": -3.058382987976074, - "logps/chosen": -407.98974609375, - "logps/rejected": -489.9923095703125, - "loss": 0.5855, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.05571288987994194, - "rewards/margins": 0.21007080376148224, - "rewards/rejected": -0.15435791015625, - "step": 32 - }, - { - "epoch": 0.01, - "learning_rate": 1.9999992892637624e-05, - "logits/chosen": -4.2447509765625, - "logits/rejected": -2.3894360065460205, - "logps/chosen": -607.8339233398438, - "logps/rejected": -1271.582763671875, - "loss": 0.567, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09686889499425888, - "rewards/margins": 0.596356213092804, - "rewards/rejected": -0.4994873106479645, - "step": 33 - }, - { - "epoch": 0.01, - "learning_rate": 1.9999971570560586e-05, - "logits/chosen": -4.606659889221191, - "logits/rejected": -5.297774314880371, - "logps/chosen": -123.59156799316406, - "logps/rejected": -121.62969970703125, - "loss": 0.5753, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.03215789794921875, - "rewards/margins": 0.07233963161706924, - "rewards/rejected": -0.040181733667850494, - "step": 34 - }, - { - "epoch": 0.01, - "learning_rate": 1.99999360337992e-05, - "logits/chosen": -2.618955135345459, - "logits/rejected": -4.968425273895264, - "logps/chosen": -296.46923828125, - "logps/rejected": -110.76615905761719, - "loss": 0.5248, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12125854939222336, - "rewards/margins": 0.3984207212924957, - "rewards/rejected": -0.2771621644496918, - "step": 35 - }, - { - "epoch": 0.01, - "learning_rate": 1.9999886282403983e-05, - "logits/chosen": -2.9885246753692627, - "logits/rejected": -1.1557713747024536, - "logps/chosen": -285.805419921875, - "logps/rejected": -621.9231567382812, - "loss": 0.4964, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.253244012594223, - "rewards/margins": 0.46789246797561646, - "rewards/rejected": -0.21464844048023224, - "step": 36 - }, - { - "epoch": 0.01, - "learning_rate": 1.9999822316445652e-05, - "logits/chosen": -5.461811542510986, - "logits/rejected": -5.280825614929199, - "logps/chosen": -116.93585205078125, - "logps/rejected": -109.59356689453125, - "loss": 0.4606, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.04782715067267418, - "rewards/margins": 0.11667175590991974, - "rewards/rejected": -0.06884460896253586, - "step": 37 - }, - { - "epoch": 0.01, - "learning_rate": 1.9999744136015132e-05, - "logits/chosen": -4.099742412567139, - "logits/rejected": -6.354801177978516, - "logps/chosen": -401.599365234375, - "logps/rejected": -139.0527801513672, - "loss": 0.4284, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12218628078699112, - "rewards/margins": 0.2756362855434418, - "rewards/rejected": -0.15345001220703125, - "step": 38 - }, - { - "epoch": 0.01, - "learning_rate": 1.9999651741223557e-05, - "logits/chosen": -1.981978416442871, - "logits/rejected": -4.168496608734131, - "logps/chosen": -402.2864990234375, - "logps/rejected": -122.88456726074219, - "loss": 0.3424, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3137664794921875, - "rewards/margins": 0.2931221127510071, - "rewards/rejected": 0.020644379779696465, - "step": 39 - }, - { - "epoch": 0.02, - "learning_rate": 1.9999545132202263e-05, - "logits/chosen": -1.7157127857208252, - "logits/rejected": -3.8543760776519775, - "logps/chosen": -840.398193359375, - "logps/rejected": -633.2039184570312, - "loss": 0.335, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2186279296875, - "rewards/margins": 2.1947388648986816, - "rewards/rejected": -0.9761108756065369, - "step": 40 - }, - { - "epoch": 0.02, - "learning_rate": 1.999942430910279e-05, - "logits/chosen": -8.137284278869629, - "logits/rejected": -4.068392753601074, - "logps/chosen": -204.39158630371094, - "logps/rejected": -2998.416015625, - "loss": 0.3229, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2520645260810852, - "rewards/margins": 1.593226671218872, - "rewards/rejected": -1.341162085533142, - "step": 41 - }, - { - "epoch": 0.02, - "learning_rate": 1.9999289272096886e-05, - "logits/chosen": -2.077958583831787, - "logits/rejected": -2.173232078552246, - "logps/chosen": -327.01361083984375, - "logps/rejected": -425.9393310546875, - "loss": 0.3453, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5012878775596619, - "rewards/margins": 1.2230744361877441, - "rewards/rejected": -0.7217864990234375, - "step": 42 - }, - { - "epoch": 0.02, - "learning_rate": 1.9999140021376505e-05, - "logits/chosen": -2.9561045169830322, - "logits/rejected": -4.785356521606445, - "logps/chosen": -357.7396240234375, - "logps/rejected": -49.677589416503906, - "loss": 0.3172, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.569580078125, - "rewards/margins": 1.063636064529419, - "rewards/rejected": -0.49405595660209656, - "step": 43 - }, - { - "epoch": 0.02, - "learning_rate": 1.9998976557153797e-05, - "logits/chosen": -7.935493469238281, - "logits/rejected": -4.468094825744629, - "logps/chosen": -346.6300964355469, - "logps/rejected": -3305.17724609375, - "loss": 0.3236, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09026183933019638, - "rewards/margins": 1.3510284423828125, - "rewards/rejected": -1.260766625404358, - "step": 44 - }, - { - "epoch": 0.02, - "learning_rate": 1.9998798879661128e-05, - "logits/chosen": -5.771402359008789, - "logits/rejected": -3.7255914211273193, - "logps/chosen": -438.2363586425781, - "logps/rejected": -1131.2255859375, - "loss": 0.1864, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.308969110250473, - "rewards/margins": 1.8924590349197388, - "rewards/rejected": -2.201428174972534, - "step": 45 - }, - { - "epoch": 0.02, - "learning_rate": 1.9998606989151057e-05, - "logits/chosen": -4.269525527954102, - "logits/rejected": -2.764293909072876, - "logps/chosen": -699.1025390625, - "logps/rejected": -1219.428466796875, - "loss": 0.3091, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.658038318157196, - "rewards/margins": 1.9010682106018066, - "rewards/rejected": -1.2430298328399658, - "step": 46 - }, - { - "epoch": 0.02, - "learning_rate": 1.9998400885896355e-05, - "logits/chosen": -1.957014560699463, - "logits/rejected": -1.686045527458191, - "logps/chosen": -504.3639831542969, - "logps/rejected": -662.4091796875, - "loss": 0.119, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3018432557582855, - "rewards/margins": 1.938439965248108, - "rewards/rejected": -1.6365966796875, - "step": 47 - }, - { - "epoch": 0.02, - "learning_rate": 1.9998180570189986e-05, - "logits/chosen": -5.8333210945129395, - "logits/rejected": -3.7435801029205322, - "logps/chosen": -450.4902648925781, - "logps/rejected": -1613.641357421875, - "loss": 0.1796, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6597869992256165, - "rewards/margins": 3.2764618396759033, - "rewards/rejected": -2.6166749000549316, - "step": 48 - }, - { - "epoch": 0.02, - "learning_rate": 1.9997946042345128e-05, - "logits/chosen": -2.930065870285034, - "logits/rejected": -3.838714599609375, - "logps/chosen": -227.214111328125, - "logps/rejected": -207.2259521484375, - "loss": 0.1491, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5689544677734375, - "rewards/margins": 1.280303955078125, - "rewards/rejected": -0.7113494873046875, - "step": 49 - }, - { - "epoch": 0.02, - "learning_rate": 1.9997697302695157e-05, - "logits/chosen": -4.785912990570068, - "logits/rejected": -5.55073881149292, - "logps/chosen": -299.2391357421875, - "logps/rejected": -515.567138671875, - "loss": 0.1097, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4051147401332855, - "rewards/margins": 2.961871385574341, - "rewards/rejected": -2.5567567348480225, - "step": 50 - }, - { - "epoch": 0.02, - "learning_rate": 1.9997434351593648e-05, - "logits/chosen": -3.161895990371704, - "logits/rejected": -5.999098777770996, - "logps/chosen": -277.05999755859375, - "logps/rejected": -81.1312255859375, - "loss": 0.243, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5358185172080994, - "rewards/margins": 1.7302308082580566, - "rewards/rejected": -1.1944122314453125, - "step": 51 - }, - { - "epoch": 0.02, - "learning_rate": 1.9997157189414373e-05, - "logits/chosen": -3.1607820987701416, - "logits/rejected": -3.652836322784424, - "logps/chosen": -346.51165771484375, - "logps/rejected": -413.726318359375, - "loss": 0.0817, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1129150390625, - "rewards/margins": 2.8472230434417725, - "rewards/rejected": -2.7343080043792725, - "step": 52 - }, - { - "epoch": 0.02, - "learning_rate": 1.9996865816551317e-05, - "logits/chosen": -7.132923126220703, - "logits/rejected": -6.842004776000977, - "logps/chosen": -423.08282470703125, - "logps/rejected": -861.88671875, - "loss": 0.1189, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5608459711074829, - "rewards/margins": 2.3332338333129883, - "rewards/rejected": -1.7723877429962158, - "step": 53 - }, - { - "epoch": 0.02, - "learning_rate": 1.999656023341866e-05, - "logits/chosen": -4.457879066467285, - "logits/rejected": -5.857478618621826, - "logps/chosen": -359.17169189453125, - "logps/rejected": -291.2760314941406, - "loss": 0.1012, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8214569091796875, - "rewards/margins": 2.79449462890625, - "rewards/rejected": -1.9730377197265625, - "step": 54 - }, - { - "epoch": 0.02, - "learning_rate": 1.9996240440450773e-05, - "logits/chosen": -4.702744483947754, - "logits/rejected": -4.427407741546631, - "logps/chosen": -323.13714599609375, - "logps/rejected": -541.5930786132812, - "loss": 0.0685, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.912982165813446, - "rewards/margins": 2.6899170875549316, - "rewards/rejected": -1.7769348621368408, - "step": 55 - }, - { - "epoch": 0.02, - "learning_rate": 1.9995906438102238e-05, - "logits/chosen": -8.236844062805176, - "logits/rejected": -2.542020797729492, - "logps/chosen": -356.0667419433594, - "logps/rejected": -1764.3857421875, - "loss": 0.0487, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.56878662109375, - "rewards/margins": 4.100781440734863, - "rewards/rejected": -3.531994581222534, - "step": 56 - }, - { - "epoch": 0.02, - "learning_rate": 1.999555822684783e-05, - "logits/chosen": -8.46875286102295, - "logits/rejected": -2.5474486351013184, - "logps/chosen": -351.33233642578125, - "logps/rejected": -3705.753662109375, - "loss": 0.1296, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.134796142578125, - "rewards/margins": 4.877227783203125, - "rewards/rejected": -3.742431640625, - "step": 57 - }, - { - "epoch": 0.02, - "learning_rate": 1.999519580718252e-05, - "logits/chosen": -2.6453170776367188, - "logits/rejected": -4.194319248199463, - "logps/chosen": -351.42327880859375, - "logps/rejected": -223.67315673828125, - "loss": 0.036, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.885296642780304, - "rewards/margins": 3.9720687866210938, - "rewards/rejected": -3.0867722034454346, - "step": 58 - }, - { - "epoch": 0.02, - "learning_rate": 1.9994819179621477e-05, - "logits/chosen": -8.465778350830078, - "logits/rejected": -2.8213281631469727, - "logps/chosen": -437.4476013183594, - "logps/rejected": -2098.09619140625, - "loss": 0.0485, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.271890252828598, - "rewards/margins": 5.0482025146484375, - "rewards/rejected": -5.320092678070068, - "step": 59 - }, - { - "epoch": 0.02, - "learning_rate": 1.999442834470007e-05, - "logits/chosen": -5.800083637237549, - "logits/rejected": -3.958115339279175, - "logps/chosen": -341.32989501953125, - "logps/rejected": -699.39013671875, - "loss": 0.035, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8531494140625, - "rewards/margins": 4.652521133422852, - "rewards/rejected": -2.7993714809417725, - "step": 60 - }, - { - "epoch": 0.02, - "learning_rate": 1.9994023302973854e-05, - "logits/chosen": -1.3375214338302612, - "logits/rejected": -4.949047088623047, - "logps/chosen": -318.719482421875, - "logps/rejected": -196.1619873046875, - "loss": 0.127, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.573590099811554, - "rewards/margins": 2.6428680419921875, - "rewards/rejected": -2.0692780017852783, - "step": 61 - }, - { - "epoch": 0.02, - "learning_rate": 1.999360405501859e-05, - "logits/chosen": -2.5050299167633057, - "logits/rejected": -1.7068266868591309, - "logps/chosen": -466.2985534667969, - "logps/rejected": -612.2509155273438, - "loss": 0.0489, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8090179562568665, - "rewards/margins": 1.8272614479064941, - "rewards/rejected": -2.636279344558716, - "step": 62 - }, - { - "epoch": 0.02, - "learning_rate": 1.9993170601430233e-05, - "logits/chosen": -5.4187164306640625, - "logits/rejected": -2.909184217453003, - "logps/chosen": -394.2977294921875, - "logps/rejected": -968.8502197265625, - "loss": 0.0377, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4638671875, - "rewards/margins": 8.662220001220703, - "rewards/rejected": -7.198352336883545, - "step": 63 - }, - { - "epoch": 0.02, - "learning_rate": 1.999272294282491e-05, - "logits/chosen": -4.725447654724121, - "logits/rejected": -4.099420547485352, - "logps/chosen": -1180.620361328125, - "logps/rejected": -1845.8486328125, - "loss": 0.0433, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.547228991985321, - "rewards/margins": 12.25494384765625, - "rewards/rejected": -11.707715034484863, - "step": 64 - }, - { - "epoch": 0.02, - "learning_rate": 1.9992261079838966e-05, - "logits/chosen": -1.8770055770874023, - "logits/rejected": -4.888939380645752, - "logps/chosen": -148.6839599609375, - "logps/rejected": -41.254581451416016, - "loss": 0.0517, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7092728018760681, - "rewards/margins": 2.215604543685913, - "rewards/rejected": -1.5063318014144897, - "step": 65 - }, - { - "epoch": 0.02, - "learning_rate": 1.9991785013128922e-05, - "logits/chosen": -6.117021560668945, - "logits/rejected": -1.8913120031356812, - "logps/chosen": -266.7498779296875, - "logps/rejected": -923.9742431640625, - "loss": 0.0104, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5719146728515625, - "rewards/margins": 10.152847290039062, - "rewards/rejected": -9.5809326171875, - "step": 66 - }, - { - "epoch": 0.03, - "learning_rate": 1.99912947433715e-05, - "logits/chosen": -4.465034484863281, - "logits/rejected": -6.284864902496338, - "logps/chosen": -256.6780090332031, - "logps/rejected": -123.60851287841797, - "loss": 0.0542, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.27767640352249146, - "rewards/margins": 1.4370644092559814, - "rewards/rejected": -1.7147407531738281, - "step": 67 - }, - { - "epoch": 0.03, - "learning_rate": 1.99907902712636e-05, - "logits/chosen": -5.843249320983887, - "logits/rejected": -3.1001908779144287, - "logps/chosen": -412.59503173828125, - "logps/rejected": -965.9727172851562, - "loss": 0.0549, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.11783447116613388, - "rewards/margins": 5.958258152008057, - "rewards/rejected": -5.840423583984375, - "step": 68 - }, - { - "epoch": 0.03, - "learning_rate": 1.9990271597522318e-05, - "logits/chosen": -7.12353515625, - "logits/rejected": -2.3208212852478027, - "logps/chosen": -200.72509765625, - "logps/rejected": -1436.088623046875, - "loss": 0.0162, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.17402343451976776, - "rewards/margins": 10.17996883392334, - "rewards/rejected": -10.005945205688477, - "step": 69 - }, - { - "epoch": 0.03, - "learning_rate": 1.998973872288493e-05, - "logits/chosen": -5.054675102233887, - "logits/rejected": -2.2859678268432617, - "logps/chosen": -524.64208984375, - "logps/rejected": -1112.5081787109375, - "loss": 0.0621, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5274658203125, - "rewards/margins": 9.352404594421387, - "rewards/rejected": -8.824938774108887, - "step": 70 - }, - { - "epoch": 0.03, - "learning_rate": 1.9989191648108907e-05, - "logits/chosen": -5.076954364776611, - "logits/rejected": -3.6740002632141113, - "logps/chosen": -714.1792602539062, - "logps/rejected": -1353.0906982421875, - "loss": 0.0114, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6124939322471619, - "rewards/margins": 10.613945960998535, - "rewards/rejected": -10.001452445983887, - "step": 71 - }, - { - "epoch": 0.03, - "learning_rate": 1.9988630373971896e-05, - "logits/chosen": -3.7075090408325195, - "logits/rejected": -4.695156097412109, - "logps/chosen": -327.32977294921875, - "logps/rejected": -342.9130859375, - "loss": 0.0213, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6192840933799744, - "rewards/margins": 7.488104343414307, - "rewards/rejected": -8.107388496398926, - "step": 72 - }, - { - "epoch": 0.03, - "learning_rate": 1.998805490127174e-05, - "logits/chosen": -2.939310073852539, - "logits/rejected": -5.74268102645874, - "logps/chosen": -378.4302673339844, - "logps/rejected": -153.2266845703125, - "loss": 0.1097, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07834777981042862, - "rewards/margins": 2.533526659011841, - "rewards/rejected": -2.45517897605896, - "step": 73 - }, - { - "epoch": 0.03, - "learning_rate": 1.998746523082645e-05, - "logits/chosen": -4.420934200286865, - "logits/rejected": -6.163727283477783, - "logps/chosen": -202.08009338378906, - "logps/rejected": -167.63909912109375, - "loss": 0.0221, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0726394653320312, - "rewards/margins": 3.250605821609497, - "rewards/rejected": -2.177966356277466, - "step": 74 - }, - { - "epoch": 0.03, - "learning_rate": 1.998686136347423e-05, - "logits/chosen": -4.982058048248291, - "logits/rejected": -6.289169788360596, - "logps/chosen": -178.29364013671875, - "logps/rejected": -66.37471771240234, - "loss": 0.0144, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8167358636856079, - "rewards/margins": 2.126889228820801, - "rewards/rejected": -1.3101532459259033, - "step": 75 - }, - { - "epoch": 0.03, - "learning_rate": 1.998624330007346e-05, - "logits/chosen": -3.948838472366333, - "logits/rejected": -1.9309172630310059, - "logps/chosen": -440.7708435058594, - "logps/rejected": -911.2651977539062, - "loss": 0.0094, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6012512445449829, - "rewards/margins": 13.15230655670166, - "rewards/rejected": -13.753558158874512, - "step": 76 - }, - { - "epoch": 0.03, - "learning_rate": 1.9985611041502704e-05, - "logits/chosen": -5.814752578735352, - "logits/rejected": -1.8379935026168823, - "logps/chosen": -463.1951904296875, - "logps/rejected": -1368.0374755859375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.24691466987133026, - "rewards/margins": 16.09623908996582, - "rewards/rejected": -16.34315299987793, - "step": 77 - }, - { - "epoch": 0.03, - "learning_rate": 1.9984964588660692e-05, - "logits/chosen": -3.4870142936706543, - "logits/rejected": -4.108331680297852, - "logps/chosen": -230.69448852539062, - "logps/rejected": -295.8138427734375, - "loss": 0.038, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.185540795326233, - "rewards/margins": 4.064462184906006, - "rewards/rejected": -2.8789215087890625, - "step": 78 - }, - { - "epoch": 0.03, - "learning_rate": 1.9984303942466346e-05, - "logits/chosen": -8.335082054138184, - "logits/rejected": -1.7776228189468384, - "logps/chosen": -476.1983642578125, - "logps/rejected": -3532.11083984375, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.626306176185608, - "rewards/margins": 20.244251251220703, - "rewards/rejected": -18.617944717407227, - "step": 79 - }, - { - "epoch": 0.03, - "learning_rate": 1.9983629103858754e-05, - "logits/chosen": -2.973788022994995, - "logits/rejected": -6.184659481048584, - "logps/chosen": -668.766357421875, - "logps/rejected": -290.8888854980469, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1106079816818237, - "rewards/margins": 7.420170783996582, - "rewards/rejected": -6.309562683105469, - "step": 80 - }, - { - "epoch": 0.03, - "learning_rate": 1.998294007379718e-05, - "logits/chosen": -8.47293472290039, - "logits/rejected": -5.01866340637207, - "logps/chosen": -344.4534912109375, - "logps/rejected": -3073.607421875, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.511312961578369, - "rewards/margins": 15.996030807495117, - "rewards/rejected": -13.48471736907959, - "step": 81 - }, - { - "epoch": 0.03, - "learning_rate": 1.9982236853261067e-05, - "logits/chosen": -5.896878242492676, - "logits/rejected": -3.157705783843994, - "logps/chosen": -584.26318359375, - "logps/rejected": -1114.5748291015625, - "loss": 0.0299, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.37855225801467896, - "rewards/margins": 10.648449897766113, - "rewards/rejected": -10.2698974609375, - "step": 82 - }, - { - "epoch": 0.03, - "learning_rate": 1.998151944325001e-05, - "logits/chosen": -7.370048999786377, - "logits/rejected": -2.7184062004089355, - "logps/chosen": -337.2394714355469, - "logps/rejected": -1099.166748046875, - "loss": 0.0126, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7602936029434204, - "rewards/margins": 4.754629611968994, - "rewards/rejected": -3.994335889816284, - "step": 83 - }, - { - "epoch": 0.03, - "learning_rate": 1.99807878447838e-05, - "logits/chosen": -4.166452884674072, - "logits/rejected": -6.201119422912598, - "logps/chosen": -569.5947265625, - "logps/rejected": -492.9515380859375, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8138916492462158, - "rewards/margins": 9.166543960571289, - "rewards/rejected": -7.352652072906494, - "step": 84 - }, - { - "epoch": 0.03, - "learning_rate": 1.9980042058902383e-05, - "logits/chosen": -4.8434224128723145, - "logits/rejected": -3.8287124633789062, - "logps/chosen": -279.0879821777344, - "logps/rejected": -493.5381164550781, - "loss": 0.0649, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3300904035568237, - "rewards/margins": 6.664444446563721, - "rewards/rejected": -5.334353923797607, - "step": 85 - }, - { - "epoch": 0.03, - "learning_rate": 1.9979282086665864e-05, - "logits/chosen": -7.2263898849487305, - "logits/rejected": -2.8624587059020996, - "logps/chosen": -440.3831481933594, - "logps/rejected": -1735.85400390625, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0744476318359375, - "rewards/margins": 10.287643432617188, - "rewards/rejected": -8.21319580078125, - "step": 86 - }, - { - "epoch": 0.03, - "learning_rate": 1.9978507929154534e-05, - "logits/chosen": -6.36445951461792, - "logits/rejected": -2.4054019451141357, - "logps/chosen": -363.63494873046875, - "logps/rejected": -1116.3482666015625, - "loss": 0.03, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.910082995891571, - "rewards/margins": 8.690136909484863, - "rewards/rejected": -9.6002197265625, - "step": 87 - }, - { - "epoch": 0.03, - "learning_rate": 1.997771958746883e-05, - "logits/chosen": -4.886641979217529, - "logits/rejected": -4.535892486572266, - "logps/chosen": -315.1290283203125, - "logps/rejected": -446.9580078125, - "loss": 0.0084, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9988526105880737, - "rewards/margins": 5.534271240234375, - "rewards/rejected": -7.533123970031738, - "step": 88 - }, - { - "epoch": 0.03, - "learning_rate": 1.997691706272936e-05, - "logits/chosen": -7.006683826446533, - "logits/rejected": -2.3233485221862793, - "logps/chosen": -361.67401123046875, - "logps/rejected": -1279.86181640625, - "loss": 0.0054, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.263723760843277, - "rewards/margins": 20.082082748413086, - "rewards/rejected": -19.818359375, - "step": 89 - }, - { - "epoch": 0.03, - "learning_rate": 1.997610035607689e-05, - "logits/chosen": -2.9294044971466064, - "logits/rejected": -3.7460992336273193, - "logps/chosen": -439.51556396484375, - "logps/rejected": -521.2588500976562, - "loss": 0.0105, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1246674060821533, - "rewards/margins": 11.371420860290527, - "rewards/rejected": -12.496088027954102, - "step": 90 - }, - { - "epoch": 0.03, - "learning_rate": 1.9975269468672342e-05, - "logits/chosen": -6.103137493133545, - "logits/rejected": -4.462825298309326, - "logps/chosen": -354.447998046875, - "logps/rejected": -752.3604736328125, - "loss": 0.0047, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.17543946206569672, - "rewards/margins": 11.079156875610352, - "rewards/rejected": -10.903717041015625, - "step": 91 - }, - { - "epoch": 0.03, - "learning_rate": 1.997442440169681e-05, - "logits/chosen": -2.1691737174987793, - "logits/rejected": -5.466336250305176, - "logps/chosen": -586.4537353515625, - "logps/rejected": -253.18479919433594, - "loss": 0.1069, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.555859327316284, - "rewards/margins": 9.776704788208008, - "rewards/rejected": -7.2208452224731445, - "step": 92 - }, - { - "epoch": 0.04, - "learning_rate": 1.9973565156351524e-05, - "logits/chosen": -6.339323043823242, - "logits/rejected": -1.6482622623443604, - "logps/chosen": -354.17095947265625, - "logps/rejected": -1213.8446044921875, - "loss": 0.0042, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4057464599609375, - "rewards/margins": 13.397177696228027, - "rewards/rejected": -12.99143123626709, - "step": 93 - }, - { - "epoch": 0.04, - "learning_rate": 1.997269173385788e-05, - "logits/chosen": -3.5599794387817383, - "logits/rejected": -6.776650905609131, - "logps/chosen": -261.6131896972656, - "logps/rejected": -143.18701171875, - "loss": 0.0198, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0962737798690796, - "rewards/margins": 2.923417568206787, - "rewards/rejected": -1.827143907546997, - "step": 94 - }, - { - "epoch": 0.04, - "learning_rate": 1.997180413545743e-05, - "logits/chosen": -5.571203231811523, - "logits/rejected": -2.574028968811035, - "logps/chosen": -405.7864990234375, - "logps/rejected": -1018.5718383789062, - "loss": 0.1205, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.607830762863159, - "rewards/margins": 13.371386528015137, - "rewards/rejected": -16.979217529296875, - "step": 95 - }, - { - "epoch": 0.04, - "learning_rate": 1.997090236241186e-05, - "logits/chosen": -6.833282470703125, - "logits/rejected": -4.149623870849609, - "logps/chosen": -281.6329345703125, - "logps/rejected": -678.8135375976562, - "loss": 0.0264, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21195678412914276, - "rewards/margins": 5.586633205413818, - "rewards/rejected": -5.798590183258057, - "step": 96 - }, - { - "epoch": 0.04, - "learning_rate": 1.9969986416003026e-05, - "logits/chosen": -2.5956637859344482, - "logits/rejected": -5.708453178405762, - "logps/chosen": -276.5341796875, - "logps/rejected": -173.77059936523438, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.238311767578125, - "rewards/margins": 6.544538974761963, - "rewards/rejected": -6.306227207183838, - "step": 97 - }, - { - "epoch": 0.04, - "learning_rate": 1.9969056297532914e-05, - "logits/chosen": -3.285088062286377, - "logits/rejected": -5.798844814300537, - "logps/chosen": -289.45703125, - "logps/rejected": -336.13067626953125, - "loss": 0.0386, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.126391649246216, - "rewards/margins": 9.15063762664795, - "rewards/rejected": -7.0242462158203125, - "step": 98 - }, - { - "epoch": 0.04, - "learning_rate": 1.996811200832366e-05, - "logits/chosen": -4.099182605743408, - "logits/rejected": -5.978377819061279, - "logps/chosen": -378.1873779296875, - "logps/rejected": -372.22882080078125, - "loss": 0.0024, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.48145753145217896, - "rewards/margins": 7.037066459655762, - "rewards/rejected": -7.518524169921875, - "step": 99 - }, - { - "epoch": 0.04, - "learning_rate": 1.996715354971755e-05, - "logits/chosen": -4.914427757263184, - "logits/rejected": -7.239197254180908, - "logps/chosen": -228.14027404785156, - "logps/rejected": -146.02520751953125, - "loss": 0.039, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.06816864013671875, - "rewards/margins": 6.2541046142578125, - "rewards/rejected": -6.185935974121094, - "step": 100 - }, - { - "epoch": 0.04, - "learning_rate": 1.996618092307701e-05, - "logits/chosen": -0.8135594129562378, - "logits/rejected": -4.607048034667969, - "logps/chosen": -298.3559875488281, - "logps/rejected": -221.80715942382812, - "loss": 0.0065, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7041229605674744, - "rewards/margins": 6.720036506652832, - "rewards/rejected": -6.015913486480713, - "step": 101 - }, - { - "epoch": 0.04, - "learning_rate": 1.9965194129784597e-05, - "logits/chosen": -5.1363115310668945, - "logits/rejected": -6.352015495300293, - "logps/chosen": -476.09246826171875, - "logps/rejected": -355.73828125, - "loss": 0.0276, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7141082882881165, - "rewards/margins": 3.9554901123046875, - "rewards/rejected": -3.241381883621216, - "step": 102 - }, - { - "epoch": 0.04, - "learning_rate": 1.9964193171243006e-05, - "logits/chosen": -3.880309820175171, - "logits/rejected": -6.312504768371582, - "logps/chosen": -389.19610595703125, - "logps/rejected": -479.43402099609375, - "loss": 0.0087, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6238373517990112, - "rewards/margins": 13.71996784210205, - "rewards/rejected": -12.09613037109375, - "step": 103 - }, - { - "epoch": 0.04, - "learning_rate": 1.996317804887508e-05, - "logits/chosen": -8.193633079528809, - "logits/rejected": -2.855452060699463, - "logps/chosen": -448.0120544433594, - "logps/rejected": -1338.9013671875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4639434814453125, - "rewards/margins": 10.376986503601074, - "rewards/rejected": -10.840929985046387, - "step": 104 - }, - { - "epoch": 0.04, - "learning_rate": 1.9962148764123785e-05, - "logits/chosen": -1.7371668815612793, - "logits/rejected": -5.1052165031433105, - "logps/chosen": -208.36817932128906, - "logps/rejected": -208.4148406982422, - "loss": 0.0091, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.257348656654358, - "rewards/margins": 9.123275756835938, - "rewards/rejected": -7.865927219390869, - "step": 105 - }, - { - "epoch": 0.04, - "learning_rate": 1.996110531845222e-05, - "logits/chosen": -2.824465751647949, - "logits/rejected": -5.705981731414795, - "logps/chosen": -288.32879638671875, - "logps/rejected": -73.27459716796875, - "loss": 0.0182, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.758392333984375, - "rewards/margins": 2.5973732471466064, - "rewards/rejected": -3.3557655811309814, - "step": 106 - }, - { - "epoch": 0.04, - "learning_rate": 1.996004771334361e-05, - "logits/chosen": -2.699617624282837, - "logits/rejected": -6.487457752227783, - "logps/chosen": -204.4813232421875, - "logps/rejected": -38.926979064941406, - "loss": 0.0538, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.978271484375, - "rewards/margins": 0.6289657354354858, - "rewards/rejected": -1.6072372198104858, - "step": 107 - }, - { - "epoch": 0.04, - "learning_rate": 1.9958975950301322e-05, - "logits/chosen": -6.178709983825684, - "logits/rejected": -4.086261749267578, - "logps/chosen": -246.6056365966797, - "logps/rejected": -938.9789428710938, - "loss": 0.1077, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.37934112548828125, - "rewards/margins": 22.24030876159668, - "rewards/rejected": -21.8609676361084, - "step": 108 - }, - { - "epoch": 0.04, - "learning_rate": 1.9957890030848828e-05, - "logits/chosen": -9.278807640075684, - "logits/rejected": -3.858706474304199, - "logps/chosen": -198.94570922851562, - "logps/rejected": -1565.369873046875, - "loss": 0.041, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5885452628135681, - "rewards/margins": 20.685909271240234, - "rewards/rejected": -20.09736442565918, - "step": 109 - }, - { - "epoch": 0.04, - "learning_rate": 1.9956789956529738e-05, - "logits/chosen": -3.9996559619903564, - "logits/rejected": -2.941821336746216, - "logps/chosen": -573.21142578125, - "logps/rejected": -1033.73876953125, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5092529654502869, - "rewards/margins": 20.24663734436035, - "rewards/rejected": -20.755889892578125, - "step": 110 - }, - { - "epoch": 0.04, - "learning_rate": 1.9955675728907776e-05, - "logits/chosen": -4.364656448364258, - "logits/rejected": -5.203469753265381, - "logps/chosen": -424.19268798828125, - "logps/rejected": -403.6236877441406, - "loss": 0.0209, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.545965552330017, - "rewards/margins": 2.9557557106018066, - "rewards/rejected": -4.501721382141113, - "step": 111 - }, - { - "epoch": 0.04, - "learning_rate": 1.9954547349566783e-05, - "logits/chosen": -1.3791449069976807, - "logits/rejected": -5.995753765106201, - "logps/chosen": -607.0721435546875, - "logps/rejected": -430.78558349609375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3212769031524658, - "rewards/margins": 11.258502960205078, - "rewards/rejected": -12.579779624938965, - "step": 112 - }, - { - "epoch": 0.04, - "learning_rate": 1.9953404820110725e-05, - "logits/chosen": -3.817617416381836, - "logits/rejected": -7.156817436218262, - "logps/chosen": -124.04191589355469, - "logps/rejected": -85.6280288696289, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.25736162066459656, - "rewards/margins": 4.0908203125, - "rewards/rejected": -3.833458662033081, - "step": 113 - }, - { - "epoch": 0.04, - "learning_rate": 1.9952248142163672e-05, - "logits/chosen": -4.1900739669799805, - "logits/rejected": -7.638676166534424, - "logps/chosen": -159.9197235107422, - "logps/rejected": -45.83409118652344, - "loss": 0.324, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06466827541589737, - "rewards/margins": 1.9510859251022339, - "rewards/rejected": -2.015754222869873, - "step": 114 - }, - { - "epoch": 0.04, - "learning_rate": 1.995107731736981e-05, - "logits/chosen": -5.0567426681518555, - "logits/rejected": -4.86803674697876, - "logps/chosen": -235.42257690429688, - "logps/rejected": -577.4153442382812, - "loss": 0.0096, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.705731213092804, - "rewards/margins": 16.18417739868164, - "rewards/rejected": -15.478446006774902, - "step": 115 - }, - { - "epoch": 0.04, - "learning_rate": 1.9949892347393438e-05, - "logits/chosen": -7.559631824493408, - "logits/rejected": -1.2290022373199463, - "logps/chosen": -355.69525146484375, - "logps/rejected": -1776.329833984375, - "loss": 0.0053, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.47410890460014343, - "rewards/margins": 28.063819885253906, - "rewards/rejected": -27.589710235595703, - "step": 116 - }, - { - "epoch": 0.04, - "learning_rate": 1.994869323391895e-05, - "logits/chosen": -3.846001625061035, - "logits/rejected": -6.981395721435547, - "logps/chosen": -116.50575256347656, - "logps/rejected": -107.03923034667969, - "loss": 0.0281, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.350015252828598, - "rewards/margins": 4.863354682922363, - "rewards/rejected": -4.513339519500732, - "step": 117 - }, - { - "epoch": 0.04, - "learning_rate": 1.9947479978650867e-05, - "logits/chosen": -6.463746547698975, - "logits/rejected": -2.580820322036743, - "logps/chosen": -694.028076171875, - "logps/rejected": -1898.532958984375, - "loss": 0.0291, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.14639893174171448, - "rewards/margins": 14.086291313171387, - "rewards/rejected": -13.939892768859863, - "step": 118 - }, - { - "epoch": 0.04, - "learning_rate": 1.994625258331378e-05, - "logits/chosen": -8.094979286193848, - "logits/rejected": -3.081444025039673, - "logps/chosen": -593.2371826171875, - "logps/rejected": -2224.9375, - "loss": 0.0052, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7167297601699829, - "rewards/margins": 8.547198295593262, - "rewards/rejected": -7.830468654632568, - "step": 119 - }, - { - "epoch": 0.05, - "learning_rate": 1.994501104965241e-05, - "logits/chosen": -3.2045087814331055, - "logits/rejected": -2.7506906986236572, - "logps/chosen": -766.0457763671875, - "logps/rejected": -1009.9659423828125, - "loss": 0.018, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.112274169921875, - "rewards/margins": 10.478827476501465, - "rewards/rejected": -11.59110164642334, - "step": 120 - }, - { - "epoch": 0.05, - "learning_rate": 1.994375537943156e-05, - "logits/chosen": -2.4099833965301514, - "logits/rejected": -5.945492267608643, - "logps/chosen": -233.18411254882812, - "logps/rejected": -125.41255187988281, - "loss": 0.0134, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2875869870185852, - "rewards/margins": 6.934247016906738, - "rewards/rejected": -6.646659851074219, - "step": 121 - }, - { - "epoch": 0.05, - "learning_rate": 1.994248557443613e-05, - "logits/chosen": -2.8987021446228027, - "logits/rejected": -3.156189203262329, - "logps/chosen": -252.7705841064453, - "logps/rejected": -530.3934936523438, - "loss": 0.0154, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2576645016670227, - "rewards/margins": 14.279845237731934, - "rewards/rejected": -14.022180557250977, - "step": 122 - }, - { - "epoch": 0.05, - "learning_rate": 1.9941201636471115e-05, - "logits/chosen": -2.90376877784729, - "logits/rejected": -2.5515480041503906, - "logps/chosen": -238.89016723632812, - "logps/rejected": -412.4499206542969, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5056930780410767, - "rewards/margins": 9.685683250427246, - "rewards/rejected": -9.1799898147583, - "step": 123 - }, - { - "epoch": 0.05, - "learning_rate": 1.9939903567361594e-05, - "logits/chosen": -5.251322269439697, - "logits/rejected": -2.473566770553589, - "logps/chosen": -262.9138488769531, - "logps/rejected": -761.151611328125, - "loss": 0.0182, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7202240228652954, - "rewards/margins": 9.854819297790527, - "rewards/rejected": -9.134594917297363, - "step": 124 - }, - { - "epoch": 0.05, - "learning_rate": 1.993859136895274e-05, - "logits/chosen": -3.6007041931152344, - "logits/rejected": -1.590465784072876, - "logps/chosen": -291.8656005859375, - "logps/rejected": -511.55517578125, - "loss": 0.006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.857928454875946, - "rewards/margins": 5.39561128616333, - "rewards/rejected": -4.537683010101318, - "step": 125 - }, - { - "epoch": 0.05, - "learning_rate": 1.99372650431098e-05, - "logits/chosen": -2.874293804168701, - "logits/rejected": -5.137197017669678, - "logps/chosen": -477.43548583984375, - "logps/rejected": -285.9767150878906, - "loss": 0.0051, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3102874755859375, - "rewards/margins": 4.013699531555176, - "rewards/rejected": -4.323987007141113, - "step": 126 - }, - { - "epoch": 0.05, - "learning_rate": 1.993592459171812e-05, - "logits/chosen": -5.449049472808838, - "logits/rejected": -2.013814687728882, - "logps/chosen": -541.7307739257812, - "logps/rejected": -1822.1513671875, - "loss": 0.0326, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.469451904296875, - "rewards/margins": 27.358564376831055, - "rewards/rejected": -26.88911247253418, - "step": 127 - }, - { - "epoch": 0.05, - "learning_rate": 1.993457001668311e-05, - "logits/chosen": -2.8777334690093994, - "logits/rejected": -4.764051914215088, - "logps/chosen": -298.2347717285156, - "logps/rejected": -223.43673706054688, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5006011724472046, - "rewards/margins": 9.105242729187012, - "rewards/rejected": -7.604641914367676, - "step": 128 - }, - { - "epoch": 0.05, - "learning_rate": 1.9933201319930257e-05, - "logits/chosen": -0.8786669969558716, - "logits/rejected": -1.079286813735962, - "logps/chosen": -243.9727020263672, - "logps/rejected": -286.10943603515625, - "loss": 0.0139, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2899520993232727, - "rewards/margins": 2.9015610218048096, - "rewards/rejected": -2.6116089820861816, - "step": 129 - }, - { - "epoch": 0.05, - "learning_rate": 1.9931818503405132e-05, - "logits/chosen": -3.1726222038269043, - "logits/rejected": -7.029757499694824, - "logps/chosen": -725.6841430664062, - "logps/rejected": -436.59765625, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4380737245082855, - "rewards/margins": 11.597082138061523, - "rewards/rejected": -12.03515625, - "step": 130 - }, - { - "epoch": 0.05, - "learning_rate": 1.9930421569073365e-05, - "logits/chosen": -5.526457786560059, - "logits/rejected": -2.6745102405548096, - "logps/chosen": -251.55682373046875, - "logps/rejected": -831.2916259765625, - "loss": 0.0622, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1374787092208862, - "rewards/margins": 12.763589859008789, - "rewards/rejected": -11.626111030578613, - "step": 131 - }, - { - "epoch": 0.05, - "learning_rate": 1.9929010518920667e-05, - "logits/chosen": -4.108985900878906, - "logits/rejected": -3.664000988006592, - "logps/chosen": -136.60552978515625, - "logps/rejected": -401.3514709472656, - "loss": 0.0127, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.20086669921875, - "rewards/margins": 11.173727989196777, - "rewards/rejected": -10.972861289978027, - "step": 132 - }, - { - "epoch": 0.05, - "learning_rate": 1.99275853549528e-05, - "logits/chosen": -3.661506175994873, - "logits/rejected": -4.885263442993164, - "logps/chosen": -430.8046875, - "logps/rejected": -812.9200439453125, - "loss": 0.0899, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8764466047286987, - "rewards/margins": 19.05877113342285, - "rewards/rejected": -17.18232536315918, - "step": 133 - }, - { - "epoch": 0.05, - "learning_rate": 1.9926146079195597e-05, - "logits/chosen": -6.3614420890808105, - "logits/rejected": -2.295203924179077, - "logps/chosen": -237.1094207763672, - "logps/rejected": -941.857421875, - "loss": 0.0035, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6770416498184204, - "rewards/margins": 12.837052345275879, - "rewards/rejected": -11.16001033782959, - "step": 134 - }, - { - "epoch": 0.05, - "learning_rate": 1.9924692693694953e-05, - "logits/chosen": -4.513888835906982, - "logits/rejected": -2.6905908584594727, - "logps/chosen": -360.6258544921875, - "logps/rejected": -762.69140625, - "loss": 0.0098, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.916271984577179, - "rewards/margins": 10.630724906921387, - "rewards/rejected": -11.5469970703125, - "step": 135 - }, - { - "epoch": 0.05, - "learning_rate": 1.992322520051681e-05, - "logits/chosen": -3.7957234382629395, - "logits/rejected": -4.347226619720459, - "logps/chosen": -313.5115966796875, - "logps/rejected": -323.4020080566406, - "loss": 0.0149, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.391519159078598, - "rewards/margins": 2.2844057083129883, - "rewards/rejected": -2.675924777984619, - "step": 136 - }, - { - "epoch": 0.05, - "learning_rate": 1.992174360174717e-05, - "logits/chosen": -8.195249557495117, - "logits/rejected": -2.40106463432312, - "logps/chosen": -453.20880126953125, - "logps/rejected": -2613.9404296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.14409790933132172, - "rewards/margins": 23.3531436920166, - "rewards/rejected": -23.497241973876953, - "step": 137 - }, - { - "epoch": 0.05, - "learning_rate": 1.992024789949209e-05, - "logits/chosen": -3.189124584197998, - "logits/rejected": -6.69189453125, - "logps/chosen": -333.4563903808594, - "logps/rejected": -136.7754364013672, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7327759265899658, - "rewards/margins": 8.296027183532715, - "rewards/rejected": -6.563251495361328, - "step": 138 - }, - { - "epoch": 0.05, - "learning_rate": 1.9918738095877665e-05, - "logits/chosen": -7.812728404998779, - "logits/rejected": -1.2284549474716187, - "logps/chosen": -556.6259155273438, - "logps/rejected": -3377.249267578125, - "loss": 0.0623, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12672118842601776, - "rewards/margins": 46.400230407714844, - "rewards/rejected": -46.27350997924805, - "step": 139 - }, - { - "epoch": 0.05, - "learning_rate": 1.991721419305004e-05, - "logits/chosen": -3.3961496353149414, - "logits/rejected": -1.738175630569458, - "logps/chosen": -281.2100524902344, - "logps/rejected": -690.3389892578125, - "loss": 0.0267, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4554901123046875, - "rewards/margins": 13.505343437194824, - "rewards/rejected": -12.049853324890137, - "step": 140 - }, - { - "epoch": 0.05, - "learning_rate": 1.9915676193175405e-05, - "logits/chosen": -1.7703593969345093, - "logits/rejected": -4.792140960693359, - "logps/chosen": -358.58551025390625, - "logps/rejected": -293.1186218261719, - "loss": 0.0035, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1877288818359375, - "rewards/margins": 7.846686363220215, - "rewards/rejected": -8.034415245056152, - "step": 141 - }, - { - "epoch": 0.05, - "learning_rate": 1.9914124098439976e-05, - "logits/chosen": -2.230325937271118, - "logits/rejected": -3.2845518589019775, - "logps/chosen": -375.7663879394531, - "logps/rejected": -374.44451904296875, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.20063476264476776, - "rewards/margins": 4.775799751281738, - "rewards/rejected": -4.575164794921875, - "step": 142 - }, - { - "epoch": 0.05, - "learning_rate": 1.991255791105002e-05, - "logits/chosen": -9.214265823364258, - "logits/rejected": -4.469902992248535, - "logps/chosen": -355.7351379394531, - "logps/rejected": -1765.747802734375, - "loss": 0.0041, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0269378423690796, - "rewards/margins": 14.130648612976074, - "rewards/rejected": -13.103711128234863, - "step": 143 - }, - { - "epoch": 0.05, - "learning_rate": 1.9910977633231826e-05, - "logits/chosen": -3.669193983078003, - "logits/rejected": -4.649416446685791, - "logps/chosen": -419.5503845214844, - "logps/rejected": -237.3112030029297, - "loss": 0.0441, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5722748041152954, - "rewards/margins": 4.830793857574463, - "rewards/rejected": -4.258519172668457, - "step": 144 - }, - { - "epoch": 0.05, - "learning_rate": 1.9909383267231715e-05, - "logits/chosen": -4.114679336547852, - "logits/rejected": -4.341124057769775, - "logps/chosen": -367.02557373046875, - "logps/rejected": -429.9582824707031, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3209381103515625, - "rewards/margins": 6.079761028289795, - "rewards/rejected": -4.758822917938232, - "step": 145 - }, - { - "epoch": 0.06, - "learning_rate": 1.9907774815316037e-05, - "logits/chosen": -5.551944255828857, - "logits/rejected": -2.111201524734497, - "logps/chosen": -242.19117736816406, - "logps/rejected": -738.878662109375, - "loss": 0.0648, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3620315790176392, - "rewards/margins": 14.285921096801758, - "rewards/rejected": -12.92388916015625, - "step": 146 - }, - { - "epoch": 0.06, - "learning_rate": 1.9906152279771162e-05, - "logits/chosen": -2.1124191284179688, - "logits/rejected": -2.961057186126709, - "logps/chosen": -216.24392700195312, - "logps/rejected": -333.4666442871094, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3373947143554688, - "rewards/margins": 7.484163284301758, - "rewards/rejected": -8.821557998657227, - "step": 147 - }, - { - "epoch": 0.06, - "learning_rate": 1.990451566290348e-05, - "logits/chosen": -2.937197208404541, - "logits/rejected": -3.7667832374572754, - "logps/chosen": -423.0109558105469, - "logps/rejected": -607.331298828125, - "loss": 0.002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2836090326309204, - "rewards/margins": 9.759415626525879, - "rewards/rejected": -8.47580623626709, - "step": 148 - }, - { - "epoch": 0.06, - "learning_rate": 1.990286496703939e-05, - "logits/chosen": -6.163055896759033, - "logits/rejected": -1.2630265951156616, - "logps/chosen": -422.18121337890625, - "logps/rejected": -1839.30078125, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3711365461349487, - "rewards/margins": 24.928752899169922, - "rewards/rejected": -23.5576171875, - "step": 149 - }, - { - "epoch": 0.06, - "learning_rate": 1.990120019452532e-05, - "logits/chosen": -1.2292606830596924, - "logits/rejected": -1.3154398202896118, - "logps/chosen": -407.78863525390625, - "logps/rejected": -687.8659057617188, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8954498171806335, - "rewards/margins": 11.373434066772461, - "rewards/rejected": -10.477984428405762, - "step": 150 - }, - { - "epoch": 0.06, - "learning_rate": 1.989952134772769e-05, - "logits/chosen": -8.306554794311523, - "logits/rejected": -3.28462290763855, - "logps/chosen": -390.5511169433594, - "logps/rejected": -1799.1591796875, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.493551641702652, - "rewards/margins": 5.297506809234619, - "rewards/rejected": -4.803955078125, - "step": 151 - }, - { - "epoch": 0.06, - "learning_rate": 1.9897828429032946e-05, - "logits/chosen": -8.621878623962402, - "logits/rejected": -3.114506483078003, - "logps/chosen": -373.916748046875, - "logps/rejected": -1532.2066650390625, - "loss": 0.0028, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.535479724407196, - "rewards/margins": 16.68242835998535, - "rewards/rejected": -17.21790885925293, - "step": 152 - }, - { - "epoch": 0.06, - "learning_rate": 1.9896121440847515e-05, - "logits/chosen": -3.3127572536468506, - "logits/rejected": -4.020143985748291, - "logps/chosen": -307.5784912109375, - "logps/rejected": -395.43292236328125, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6476837396621704, - "rewards/margins": 6.847116470336914, - "rewards/rejected": -7.494800090789795, - "step": 153 - }, - { - "epoch": 0.06, - "learning_rate": 1.9894400385597835e-05, - "logits/chosen": -3.450198173522949, - "logits/rejected": -3.523461103439331, - "logps/chosen": -255.005126953125, - "logps/rejected": -292.20904541015625, - "loss": 0.0092, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7154815793037415, - "rewards/margins": 5.051710605621338, - "rewards/rejected": -4.336228847503662, - "step": 154 - }, - { - "epoch": 0.06, - "learning_rate": 1.9892665265730344e-05, - "logits/chosen": -4.420480728149414, - "logits/rejected": -4.083358287811279, - "logps/chosen": -184.41064453125, - "logps/rejected": -484.8167724609375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.03351898118853569, - "rewards/margins": 14.970805168151855, - "rewards/rejected": -14.937286376953125, - "step": 155 - }, - { - "epoch": 0.06, - "learning_rate": 1.9890916083711463e-05, - "logits/chosen": -6.118536949157715, - "logits/rejected": -3.876357316970825, - "logps/chosen": -160.71884155273438, - "logps/rejected": -687.8800048828125, - "loss": 0.0617, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.738751232624054, - "rewards/margins": 16.53217124938965, - "rewards/rejected": -15.793420791625977, - "step": 156 - }, - { - "epoch": 0.06, - "learning_rate": 1.9889152842027607e-05, - "logits/chosen": -2.4313807487487793, - "logits/rejected": -5.11497163772583, - "logps/chosen": -236.02615356445312, - "logps/rejected": -230.71542358398438, - "loss": 0.0126, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6474655270576477, - "rewards/margins": 8.285856246948242, - "rewards/rejected": -8.933321952819824, - "step": 157 - }, - { - "epoch": 0.06, - "learning_rate": 1.988737554318517e-05, - "logits/chosen": -8.369000434875488, - "logits/rejected": -2.9735963344573975, - "logps/chosen": -533.4119873046875, - "logps/rejected": -2814.5654296875, - "loss": 0.0172, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2786498963832855, - "rewards/margins": 20.611339569091797, - "rewards/rejected": -20.889989852905273, - "step": 158 - }, - { - "epoch": 0.06, - "learning_rate": 1.9885584189710546e-05, - "logits/chosen": -2.569835662841797, - "logits/rejected": -2.9975032806396484, - "logps/chosen": -258.51507568359375, - "logps/rejected": -341.2959899902344, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7340744137763977, - "rewards/margins": 4.3500657081604, - "rewards/rejected": -5.084140300750732, - "step": 159 - }, - { - "epoch": 0.06, - "learning_rate": 1.9883778784150083e-05, - "logits/chosen": -5.044733047485352, - "logits/rejected": -6.348597049713135, - "logps/chosen": -192.9871826171875, - "logps/rejected": -199.52085876464844, - "loss": 0.0071, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.0106201171875, - "rewards/margins": 2.904179334640503, - "rewards/rejected": -2.893559217453003, - "step": 160 - }, - { - "epoch": 0.06, - "learning_rate": 1.9881959329070123e-05, - "logits/chosen": -6.081510543823242, - "logits/rejected": -2.9104228019714355, - "logps/chosen": -288.66845703125, - "logps/rejected": -1179.3680419921875, - "loss": 0.0095, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.12438354641199112, - "rewards/margins": 31.422710418701172, - "rewards/rejected": -31.547094345092773, - "step": 161 - }, - { - "epoch": 0.06, - "learning_rate": 1.9880125827056967e-05, - "logits/chosen": -7.948723793029785, - "logits/rejected": -3.8333396911621094, - "logps/chosen": -353.84075927734375, - "logps/rejected": -1001.7994995117188, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.53643798828125, - "rewards/margins": 7.552117824554443, - "rewards/rejected": -7.015679836273193, - "step": 162 - }, - { - "epoch": 0.06, - "learning_rate": 1.9878278280716885e-05, - "logits/chosen": -3.2980899810791016, - "logits/rejected": -6.869227409362793, - "logps/chosen": -110.28030395507812, - "logps/rejected": -102.10591125488281, - "loss": 0.0064, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1113319396972656, - "rewards/margins": 6.142398357391357, - "rewards/rejected": -5.031066417694092, - "step": 163 - }, - { - "epoch": 0.06, - "learning_rate": 1.9876416692676123e-05, - "logits/chosen": -2.372762441635132, - "logits/rejected": -1.9667719602584839, - "logps/chosen": -294.3548583984375, - "logps/rejected": -392.60980224609375, - "loss": 0.0029, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.158111572265625, - "rewards/margins": 3.737692356109619, - "rewards/rejected": -3.895803928375244, - "step": 164 - }, - { - "epoch": 0.06, - "learning_rate": 1.9874541065580865e-05, - "logits/chosen": -2.092344045639038, - "logits/rejected": -3.7284915447235107, - "logps/chosen": -275.8125305175781, - "logps/rejected": -220.84234619140625, - "loss": 0.016, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5344207882881165, - "rewards/margins": 2.1774537563323975, - "rewards/rejected": -2.711874485015869, - "step": 165 - }, - { - "epoch": 0.06, - "learning_rate": 1.9872651402097273e-05, - "logits/chosen": -1.6237424612045288, - "logits/rejected": -3.9445581436157227, - "logps/chosen": -309.572509765625, - "logps/rejected": -440.19677734375, - "loss": 0.0028, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4499267637729645, - "rewards/margins": 18.04232406616211, - "rewards/rejected": -17.592397689819336, - "step": 166 - }, - { - "epoch": 0.06, - "learning_rate": 1.987074770491145e-05, - "logits/chosen": -1.8493833541870117, - "logits/rejected": -5.813567161560059, - "logps/chosen": -191.8631591796875, - "logps/rejected": -85.19422149658203, - "loss": 0.0028, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4924560487270355, - "rewards/margins": 3.9601709842681885, - "rewards/rejected": -3.46771502494812, - "step": 167 - }, - { - "epoch": 0.06, - "learning_rate": 1.9868829976729444e-05, - "logits/chosen": -8.995770454406738, - "logits/rejected": -2.4487664699554443, - "logps/chosen": -213.33364868164062, - "logps/rejected": -3227.0302734375, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16425171494483948, - "rewards/margins": 35.90923309326172, - "rewards/rejected": -36.073486328125, - "step": 168 - }, - { - "epoch": 0.06, - "learning_rate": 1.9866898220277256e-05, - "logits/chosen": -2.8629541397094727, - "logits/rejected": -5.268250942230225, - "logps/chosen": -264.5804138183594, - "logps/rejected": -255.40504455566406, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8336731195449829, - "rewards/margins": 8.165120124816895, - "rewards/rejected": -7.331447124481201, - "step": 169 - }, - { - "epoch": 0.06, - "learning_rate": 1.9864952438300826e-05, - "logits/chosen": -6.068964958190918, - "logits/rejected": -2.378211498260498, - "logps/chosen": -387.261962890625, - "logps/rejected": -1537.1881103515625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.2820465564727783, - "rewards/margins": 24.64109230041504, - "rewards/rejected": -22.359045028686523, - "step": 170 - }, - { - "epoch": 0.06, - "learning_rate": 1.9862992633566026e-05, - "logits/chosen": -5.577524185180664, - "logits/rejected": -2.5075387954711914, - "logps/chosen": -401.24542236328125, - "logps/rejected": -1048.162353515625, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.021081566810608, - "rewards/margins": 14.714086532592773, - "rewards/rejected": -15.73516845703125, - "step": 171 - }, - { - "epoch": 0.07, - "learning_rate": 1.986101880885867e-05, - "logits/chosen": -3.850322723388672, - "logits/rejected": -2.597146511077881, - "logps/chosen": -293.26361083984375, - "logps/rejected": -616.14892578125, - "loss": 0.0055, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0422455072402954, - "rewards/margins": 14.525101661682129, - "rewards/rejected": -13.482855796813965, - "step": 172 - }, - { - "epoch": 0.07, - "learning_rate": 1.985903096698449e-05, - "logits/chosen": -4.59067964553833, - "logits/rejected": -2.2413883209228516, - "logps/chosen": -409.08868408203125, - "logps/rejected": -750.541015625, - "loss": 0.0144, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6738128662109375, - "rewards/margins": 7.120761394500732, - "rewards/rejected": -6.446948528289795, - "step": 173 - }, - { - "epoch": 0.07, - "learning_rate": 1.9857029110769154e-05, - "logits/chosen": -2.6793322563171387, - "logits/rejected": -5.3941874504089355, - "logps/chosen": -395.7564697265625, - "logps/rejected": -171.42434692382812, - "loss": 0.0075, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2582275867462158, - "rewards/margins": 7.455705642700195, - "rewards/rejected": -6.1974778175354, - "step": 174 - }, - { - "epoch": 0.07, - "learning_rate": 1.985501324305824e-05, - "logits/chosen": -3.66511607170105, - "logits/rejected": -1.4760938882827759, - "logps/chosen": -498.1602783203125, - "logps/rejected": -1009.25, - "loss": 0.0057, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.20118407905101776, - "rewards/margins": 13.255602836608887, - "rewards/rejected": -13.456787109375, - "step": 175 - }, - { - "epoch": 0.07, - "learning_rate": 1.9852983366717248e-05, - "logits/chosen": -2.6965932846069336, - "logits/rejected": -5.393041133880615, - "logps/chosen": -355.7259521484375, - "logps/rejected": -305.15869140625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.291265845298767, - "rewards/margins": 5.251739501953125, - "rewards/rejected": -6.543005466461182, - "step": 176 - }, - { - "epoch": 0.07, - "learning_rate": 1.9850939484631598e-05, - "logits/chosen": -7.777625560760498, - "logits/rejected": -8.358348846435547, - "logps/chosen": -67.8677978515625, - "logps/rejected": -191.492919921875, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.045133210718631744, - "rewards/margins": 8.026688575744629, - "rewards/rejected": -8.071822166442871, - "step": 177 - }, - { - "epoch": 0.07, - "learning_rate": 1.9848881599706605e-05, - "logits/chosen": -5.980942249298096, - "logits/rejected": -4.311253070831299, - "logps/chosen": -280.119140625, - "logps/rejected": -876.177490234375, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7807037234306335, - "rewards/margins": 12.656015396118164, - "rewards/rejected": -13.436718940734863, - "step": 178 - }, - { - "epoch": 0.07, - "learning_rate": 1.98468097148675e-05, - "logits/chosen": -2.7399306297302246, - "logits/rejected": -2.287069320678711, - "logps/chosen": -460.60626220703125, - "logps/rejected": -651.4832763671875, - "loss": 0.0242, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7429291009902954, - "rewards/margins": 6.282223701477051, - "rewards/rejected": -7.025152683258057, - "step": 179 - }, - { - "epoch": 0.07, - "learning_rate": 1.984472383305941e-05, - "logits/chosen": -1.8354194164276123, - "logits/rejected": -4.021738052368164, - "logps/chosen": -288.276611328125, - "logps/rejected": -374.2378234863281, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3299499452114105, - "rewards/margins": 10.874655723571777, - "rewards/rejected": -10.544705390930176, - "step": 180 - }, - { - "epoch": 0.07, - "learning_rate": 1.9842623957247355e-05, - "logits/chosen": -1.3504747152328491, - "logits/rejected": -5.7311787605285645, - "logps/chosen": -376.89361572265625, - "logps/rejected": -88.35364532470703, - "loss": 0.0215, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17180481553077698, - "rewards/margins": 2.2426369190216064, - "rewards/rejected": -2.4144418239593506, - "step": 181 - }, - { - "epoch": 0.07, - "learning_rate": 1.984051009041626e-05, - "logits/chosen": -8.129803657531738, - "logits/rejected": -3.1656816005706787, - "logps/chosen": -359.45892333984375, - "logps/rejected": -1564.0634765625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6740784049034119, - "rewards/margins": 17.618366241455078, - "rewards/rejected": -16.94428825378418, - "step": 182 - }, - { - "epoch": 0.07, - "learning_rate": 1.9838382235570915e-05, - "logits/chosen": -4.0679216384887695, - "logits/rejected": -4.819329261779785, - "logps/chosen": -370.0264892578125, - "logps/rejected": -506.97222900390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09187927097082138, - "rewards/margins": 9.739950180053711, - "rewards/rejected": -9.6480712890625, - "step": 183 - }, - { - "epoch": 0.07, - "learning_rate": 1.9836240395736017e-05, - "logits/chosen": -3.6352694034576416, - "logits/rejected": -3.520509958267212, - "logps/chosen": -250.33023071289062, - "logps/rejected": -421.6981201171875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.11604004353284836, - "rewards/margins": 14.707324028015137, - "rewards/rejected": -14.8233642578125, - "step": 184 - }, - { - "epoch": 0.07, - "learning_rate": 1.983408457395613e-05, - "logits/chosen": -9.05216121673584, - "logits/rejected": -2.64894700050354, - "logps/chosen": -386.36627197265625, - "logps/rejected": -4212.3447265625, - "loss": 0.0089, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0009826660389080644, - "rewards/margins": 18.297210693359375, - "rewards/rejected": -18.298192977905273, - "step": 185 - }, - { - "epoch": 0.07, - "learning_rate": 1.983191477329569e-05, - "logits/chosen": -2.4788763523101807, - "logits/rejected": -6.9549994468688965, - "logps/chosen": -408.62860107421875, - "logps/rejected": -198.49244689941406, - "loss": 0.0057, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1621917486190796, - "rewards/margins": 4.250373840332031, - "rewards/rejected": -5.4125657081604, - "step": 186 - }, - { - "epoch": 0.07, - "learning_rate": 1.982973099683902e-05, - "logits/chosen": -3.3027853965759277, - "logits/rejected": -2.2976858615875244, - "logps/chosen": -105.06053161621094, - "logps/rejected": -266.1261901855469, - "loss": 0.0027, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8288528323173523, - "rewards/margins": 7.478427886962891, - "rewards/rejected": -8.307280540466309, - "step": 187 - }, - { - "epoch": 0.07, - "learning_rate": 1.982753324769029e-05, - "logits/chosen": -1.5049316883087158, - "logits/rejected": -8.731840133666992, - "logps/chosen": -319.7847900390625, - "logps/rejected": -110.75350952148438, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7532593011856079, - "rewards/margins": 5.142917633056641, - "rewards/rejected": -5.896176815032959, - "step": 188 - }, - { - "epoch": 0.07, - "learning_rate": 1.982532152897354e-05, - "logits/chosen": -7.725649833679199, - "logits/rejected": -0.28556370735168457, - "logps/chosen": -412.23931884765625, - "logps/rejected": -2150.07568359375, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5142852663993835, - "rewards/margins": 17.97124481201172, - "rewards/rejected": -17.456958770751953, - "step": 189 - }, - { - "epoch": 0.07, - "learning_rate": 1.982309584383267e-05, - "logits/chosen": -3.406536817550659, - "logits/rejected": -7.212151050567627, - "logps/chosen": -422.91192626953125, - "logps/rejected": -199.07369995117188, - "loss": 0.003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21325074136257172, - "rewards/margins": 8.6825532913208, - "rewards/rejected": -8.895804405212402, - "step": 190 - }, - { - "epoch": 0.07, - "learning_rate": 1.9820856195431428e-05, - "logits/chosen": -6.6533331871032715, - "logits/rejected": -3.472789764404297, - "logps/chosen": -439.40576171875, - "logps/rejected": -1155.353271484375, - "loss": 0.0097, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5655884146690369, - "rewards/margins": 14.923376083374023, - "rewards/rejected": -14.3577880859375, - "step": 191 - }, - { - "epoch": 0.07, - "learning_rate": 1.9818602586953414e-05, - "logits/chosen": -4.3159942626953125, - "logits/rejected": -2.875436544418335, - "logps/chosen": -325.07257080078125, - "logps/rejected": -619.9034423828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0984314680099487, - "rewards/margins": 14.382837295532227, - "rewards/rejected": -15.481268882751465, - "step": 192 - }, - { - "epoch": 0.07, - "learning_rate": 1.9816335021602072e-05, - "logits/chosen": -8.52599811553955, - "logits/rejected": -2.1589479446411133, - "logps/chosen": -625.4209594726562, - "logps/rejected": -3145.490478515625, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.365826368331909, - "rewards/margins": 14.196906089782715, - "rewards/rejected": -11.831079483032227, - "step": 193 - }, - { - "epoch": 0.07, - "learning_rate": 1.9814053502600683e-05, - "logits/chosen": -5.908425331115723, - "logits/rejected": -1.2289998531341553, - "logps/chosen": -406.826904296875, - "logps/rejected": -1174.3826904296875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5022644400596619, - "rewards/margins": 14.70859432220459, - "rewards/rejected": -15.210858345031738, - "step": 194 - }, - { - "epoch": 0.07, - "learning_rate": 1.981175803319236e-05, - "logits/chosen": -2.88136887550354, - "logits/rejected": -5.14921236038208, - "logps/chosen": -507.61846923828125, - "logps/rejected": -457.48236083984375, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7327148914337158, - "rewards/margins": 8.915643692016602, - "rewards/rejected": -10.648358345031738, - "step": 195 - }, - { - "epoch": 0.07, - "learning_rate": 1.9809448616640052e-05, - "logits/chosen": -2.678927421569824, - "logits/rejected": -7.6108198165893555, - "logps/chosen": -366.68023681640625, - "logps/rejected": -54.70207977294922, - "loss": 0.0152, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.15222778916358948, - "rewards/margins": 2.0476887226104736, - "rewards/rejected": -2.1999166011810303, - "step": 196 - }, - { - "epoch": 0.07, - "learning_rate": 1.9807125256226532e-05, - "logits/chosen": -7.610185623168945, - "logits/rejected": -4.743247032165527, - "logps/chosen": -397.09088134765625, - "logps/rejected": -1453.7967529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7479461431503296, - "rewards/margins": 25.450260162353516, - "rewards/rejected": -27.198205947875977, - "step": 197 - }, - { - "epoch": 0.07, - "learning_rate": 1.980478795525439e-05, - "logits/chosen": -6.55247163772583, - "logits/rejected": -2.896327018737793, - "logps/chosen": -188.98086547851562, - "logps/rejected": -1036.889404296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6238388419151306, - "rewards/margins": 33.31098175048828, - "rewards/rejected": -32.68714141845703, - "step": 198 - }, - { - "epoch": 0.08, - "learning_rate": 1.980243671704604e-05, - "logits/chosen": -4.10009765625, - "logits/rejected": -5.240772724151611, - "logps/chosen": -193.93084716796875, - "logps/rejected": -199.46267700195312, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4506806135177612, - "rewards/margins": 5.731407642364502, - "rewards/rejected": -7.182088375091553, - "step": 199 - }, - { - "epoch": 0.08, - "learning_rate": 1.9800071544943696e-05, - "logits/chosen": -2.9615426063537598, - "logits/rejected": -2.3044207096099854, - "logps/chosen": -333.75030517578125, - "logps/rejected": -453.90557861328125, - "loss": 0.0476, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.347930908203125, - "rewards/margins": 1.0842865705490112, - "rewards/rejected": -1.4322174787521362, - "step": 200 - }, - { - "epoch": 0.08, - "learning_rate": 1.9797692442309387e-05, - "logits/chosen": -8.1212797164917, - "logits/rejected": -2.651017189025879, - "logps/chosen": -488.1044921875, - "logps/rejected": -2586.7822265625, - "loss": 0.0085, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4600463807582855, - "rewards/margins": 38.443668365478516, - "rewards/rejected": -38.90371322631836, - "step": 201 - }, - { - "epoch": 0.08, - "learning_rate": 1.9795299412524948e-05, - "logits/chosen": -3.3703055381774902, - "logits/rejected": -2.727461338043213, - "logps/chosen": -429.4046325683594, - "logps/rejected": -764.3721923828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.26870423555374146, - "rewards/margins": 17.790298461914062, - "rewards/rejected": -17.521595001220703, - "step": 202 - }, - { - "epoch": 0.08, - "learning_rate": 1.9792892458991995e-05, - "logits/chosen": -3.9010159969329834, - "logits/rejected": -5.513745307922363, - "logps/chosen": -216.68804931640625, - "logps/rejected": -94.18428802490234, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9100891351699829, - "rewards/margins": 5.699523448944092, - "rewards/rejected": -4.789434432983398, - "step": 203 - }, - { - "epoch": 0.08, - "learning_rate": 1.9790471585131956e-05, - "logits/chosen": -7.830135822296143, - "logits/rejected": -3.2409822940826416, - "logps/chosen": -259.99322509765625, - "logps/rejected": -1040.86962890625, - "loss": 0.0076, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.15823058784008026, - "rewards/margins": 8.230228424072266, - "rewards/rejected": -8.07199764251709, - "step": 204 - }, - { - "epoch": 0.08, - "learning_rate": 1.978803679438603e-05, - "logits/chosen": -7.001162052154541, - "logits/rejected": -2.5105347633361816, - "logps/chosen": -374.54302978515625, - "logps/rejected": -1520.896240234375, - "loss": 0.0149, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.9148162603378296, - "rewards/margins": 25.04726219177246, - "rewards/rejected": -23.1324462890625, - "step": 205 - }, - { - "epoch": 0.08, - "learning_rate": 1.9785588090215205e-05, - "logits/chosen": -0.9297477006912231, - "logits/rejected": -2.357343912124634, - "logps/chosen": -233.85256958007812, - "logps/rejected": -315.9771728515625, - "loss": 0.0029, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1520187854766846, - "rewards/margins": 8.61760425567627, - "rewards/rejected": -7.465585231781006, - "step": 206 - }, - { - "epoch": 0.08, - "learning_rate": 1.9783125476100254e-05, - "logits/chosen": -2.8025968074798584, - "logits/rejected": -4.3868327140808105, - "logps/chosen": -222.6510772705078, - "logps/rejected": -253.1702423095703, - "loss": 0.0288, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18386231362819672, - "rewards/margins": 3.0013811588287354, - "rewards/rejected": -3.1852433681488037, - "step": 207 - }, - { - "epoch": 0.08, - "learning_rate": 1.9780648955541706e-05, - "logits/chosen": -2.937283515930176, - "logits/rejected": -6.30315637588501, - "logps/chosen": -321.6627197265625, - "logps/rejected": -363.42987060546875, - "loss": 0.0844, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.669970691204071, - "rewards/margins": 15.681699752807617, - "rewards/rejected": -15.01172924041748, - "step": 208 - }, - { - "epoch": 0.08, - "learning_rate": 1.977815853205987e-05, - "logits/chosen": -9.523770332336426, - "logits/rejected": -3.6942975521087646, - "logps/chosen": -254.48019409179688, - "logps/rejected": -2888.037841796875, - "loss": 0.0044, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.357696533203125, - "rewards/margins": 52.62412643432617, - "rewards/rejected": -52.26642990112305, - "step": 209 - }, - { - "epoch": 0.08, - "learning_rate": 1.9775654209194816e-05, - "logits/chosen": -3.0402774810791016, - "logits/rejected": -2.2941737174987793, - "logps/chosen": -284.1275939941406, - "logps/rejected": -633.8983764648438, - "loss": 0.024, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0965301990509033, - "rewards/margins": 13.201400756835938, - "rewards/rejected": -12.104870796203613, - "step": 210 - }, - { - "epoch": 0.08, - "learning_rate": 1.977313599050637e-05, - "logits/chosen": -0.9662387371063232, - "logits/rejected": -0.633746325969696, - "logps/chosen": -226.57901000976562, - "logps/rejected": -321.0753173828125, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19714049994945526, - "rewards/margins": 4.444582939147949, - "rewards/rejected": -4.6417236328125, - "step": 211 - }, - { - "epoch": 0.08, - "learning_rate": 1.9770603879574108e-05, - "logits/chosen": -2.90165114402771, - "logits/rejected": -7.469164848327637, - "logps/chosen": -233.10696411132812, - "logps/rejected": -128.65391540527344, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2784057557582855, - "rewards/margins": 6.836155891418457, - "rewards/rejected": -6.557750225067139, - "step": 212 - }, - { - "epoch": 0.08, - "learning_rate": 1.976805787999736e-05, - "logits/chosen": -8.283646583557129, - "logits/rejected": -3.619302272796631, - "logps/chosen": -461.98004150390625, - "logps/rejected": -3100.5966796875, - "loss": 0.0027, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6447845697402954, - "rewards/margins": 23.960166931152344, - "rewards/rejected": -22.31538200378418, - "step": 213 - }, - { - "epoch": 0.08, - "learning_rate": 1.976549799539519e-05, - "logits/chosen": -9.7518892288208, - "logits/rejected": -4.7590484619140625, - "logps/chosen": -320.98065185546875, - "logps/rejected": -2742.08837890625, - "loss": 0.0039, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2752288579940796, - "rewards/margins": 18.431528091430664, - "rewards/rejected": -17.156299591064453, - "step": 214 - }, - { - "epoch": 0.08, - "learning_rate": 1.9762924229406405e-05, - "logits/chosen": -1.6029655933380127, - "logits/rejected": -5.326481342315674, - "logps/chosen": -349.75421142578125, - "logps/rejected": -259.12603759765625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3629058599472046, - "rewards/margins": 8.943779945373535, - "rewards/rejected": -10.306685447692871, - "step": 215 - }, - { - "epoch": 0.08, - "learning_rate": 1.9760336585689544e-05, - "logits/chosen": -1.3811391592025757, - "logits/rejected": -7.038777828216553, - "logps/chosen": -695.1199951171875, - "logps/rejected": -267.850830078125, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.135009765625, - "rewards/margins": 9.307385444641113, - "rewards/rejected": -8.172375679016113, - "step": 216 - }, - { - "epoch": 0.08, - "learning_rate": 1.975773506792287e-05, - "logits/chosen": -1.9570508003234863, - "logits/rejected": -6.327121257781982, - "logps/chosen": -565.9732666015625, - "logps/rejected": -355.22491455078125, - "loss": 0.0023, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.764379858970642, - "rewards/margins": 13.536457061767578, - "rewards/rejected": -15.300836563110352, - "step": 217 - }, - { - "epoch": 0.08, - "learning_rate": 1.975511967980437e-05, - "logits/chosen": -1.6149832010269165, - "logits/rejected": -6.215789794921875, - "logps/chosen": -265.8162536621094, - "logps/rejected": -118.79293060302734, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.9128296375274658, - "rewards/margins": 6.717551231384277, - "rewards/rejected": -4.804721355438232, - "step": 218 - }, - { - "epoch": 0.08, - "learning_rate": 1.975249042505174e-05, - "logits/chosen": -2.339282751083374, - "logits/rejected": -4.212043285369873, - "logps/chosen": -235.06414794921875, - "logps/rejected": -303.6802978515625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.890069603919983, - "rewards/margins": 10.531424522399902, - "rewards/rejected": -8.64135456085205, - "step": 219 - }, - { - "epoch": 0.08, - "learning_rate": 1.9749847307402406e-05, - "logits/chosen": -2.9903106689453125, - "logits/rejected": -5.144967079162598, - "logps/chosen": -278.4345703125, - "logps/rejected": -316.28179931640625, - "loss": 0.0106, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18435059487819672, - "rewards/margins": 9.565449714660645, - "rewards/rejected": -9.749800682067871, - "step": 220 - }, - { - "epoch": 0.08, - "learning_rate": 1.9747190330613475e-05, - "logits/chosen": -3.3171494007110596, - "logits/rejected": -4.460268020629883, - "logps/chosen": -216.46469116210938, - "logps/rejected": -279.6846923828125, - "loss": 0.0347, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9818267822265625, - "rewards/margins": 10.35650634765625, - "rewards/rejected": -9.374679565429688, - "step": 221 - }, - { - "epoch": 0.08, - "learning_rate": 1.974451949846177e-05, - "logits/chosen": -3.9362246990203857, - "logits/rejected": -6.718437194824219, - "logps/chosen": -304.8087158203125, - "logps/rejected": -240.50701904296875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1988953351974487, - "rewards/margins": 5.827456951141357, - "rewards/rejected": -4.628561496734619, - "step": 222 - }, - { - "epoch": 0.08, - "learning_rate": 1.9741834814743812e-05, - "logits/chosen": -0.5278604030609131, - "logits/rejected": -3.1780545711517334, - "logps/chosen": -242.968017578125, - "logps/rejected": -278.20538330078125, - "loss": 0.0052, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.15605317056179047, - "rewards/margins": 9.14519214630127, - "rewards/rejected": -8.98913860321045, - "step": 223 - }, - { - "epoch": 0.08, - "learning_rate": 1.9739136283275794e-05, - "logits/chosen": -6.977881908416748, - "logits/rejected": -5.775460720062256, - "logps/chosen": -177.75933837890625, - "logps/rejected": -612.8715209960938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4641662538051605, - "rewards/margins": 13.52030086517334, - "rewards/rejected": -13.056134223937988, - "step": 224 - }, - { - "epoch": 0.09, - "learning_rate": 1.9736423907893612e-05, - "logits/chosen": -7.275716781616211, - "logits/rejected": -3.902735948562622, - "logps/chosen": -632.85009765625, - "logps/rejected": -1444.0628662109375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3178649842739105, - "rewards/margins": 8.141253471374512, - "rewards/rejected": -8.459118843078613, - "step": 225 - }, - { - "epoch": 0.09, - "learning_rate": 1.973369769245283e-05, - "logits/chosen": -4.321075916290283, - "logits/rejected": -2.6683924198150635, - "logps/chosen": -254.5089111328125, - "logps/rejected": -474.41461181640625, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.014710999093949795, - "rewards/margins": 4.052857875823975, - "rewards/rejected": -4.03814697265625, - "step": 226 - }, - { - "epoch": 0.09, - "learning_rate": 1.973095764082869e-05, - "logits/chosen": -3.1059930324554443, - "logits/rejected": -0.2087278664112091, - "logps/chosen": -1116.303955078125, - "logps/rejected": -1813.02099609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.69384765625, - "rewards/margins": 19.996679306030273, - "rewards/rejected": -23.690526962280273, - "step": 227 - }, - { - "epoch": 0.09, - "learning_rate": 1.97282037569161e-05, - "logits/chosen": -4.995663166046143, - "logits/rejected": -3.2643632888793945, - "logps/chosen": -267.8795471191406, - "logps/rejected": -825.7095947265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.17983093857765198, - "rewards/margins": 17.491939544677734, - "rewards/rejected": -17.312108993530273, - "step": 228 - }, - { - "epoch": 0.09, - "learning_rate": 1.9725436044629625e-05, - "logits/chosen": -5.404122829437256, - "logits/rejected": -3.7910895347595215, - "logps/chosen": -227.9188995361328, - "logps/rejected": -605.9249877929688, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2415755987167358, - "rewards/margins": 17.184764862060547, - "rewards/rejected": -15.943188667297363, - "step": 229 - }, - { - "epoch": 0.09, - "learning_rate": 1.9722654507903497e-05, - "logits/chosen": -2.91347074508667, - "logits/rejected": -2.402101993560791, - "logps/chosen": -382.3998107910156, - "logps/rejected": -620.5694580078125, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.5711822509765625, - "rewards/margins": 9.525131225585938, - "rewards/rejected": -6.953948974609375, - "step": 230 - }, - { - "epoch": 0.09, - "learning_rate": 1.9719859150691595e-05, - "logits/chosen": -3.664963960647583, - "logits/rejected": -3.6253843307495117, - "logps/chosen": -268.11456298828125, - "logps/rejected": -477.5546569824219, - "loss": 0.061, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6224395632743835, - "rewards/margins": 17.205554962158203, - "rewards/rejected": -16.583114624023438, - "step": 231 - }, - { - "epoch": 0.09, - "learning_rate": 1.9717049976967437e-05, - "logits/chosen": -4.596846103668213, - "logits/rejected": -3.573773145675659, - "logps/chosen": -494.466552734375, - "logps/rejected": -1101.575439453125, - "loss": 0.006, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6730774641036987, - "rewards/margins": 23.6186466217041, - "rewards/rejected": -25.291723251342773, - "step": 232 - }, - { - "epoch": 0.09, - "learning_rate": 1.971422699072419e-05, - "logits/chosen": -3.0640206336975098, - "logits/rejected": -1.0860474109649658, - "logps/chosen": -434.69964599609375, - "logps/rejected": -702.1586303710938, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.33436891436576843, - "rewards/margins": 5.688776016235352, - "rewards/rejected": -6.023144721984863, - "step": 233 - }, - { - "epoch": 0.09, - "learning_rate": 1.971139019597465e-05, - "logits/chosen": -3.706866979598999, - "logits/rejected": -3.194345235824585, - "logps/chosen": -329.77923583984375, - "logps/rejected": -666.0379028320312, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.39984130859375, - "rewards/margins": 23.54984474182129, - "rewards/rejected": -23.94968605041504, - "step": 234 - }, - { - "epoch": 0.09, - "learning_rate": 1.9708539596751245e-05, - "logits/chosen": -5.293097019195557, - "logits/rejected": -3.43345046043396, - "logps/chosen": -486.6744384765625, - "logps/rejected": -1293.5399169921875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07324524223804474, - "rewards/margins": 39.06473159790039, - "rewards/rejected": -38.991485595703125, - "step": 235 - }, - { - "epoch": 0.09, - "learning_rate": 1.970567519710602e-05, - "logits/chosen": -3.9698286056518555, - "logits/rejected": -5.5913286209106445, - "logps/chosen": -425.1443176269531, - "logps/rejected": -205.31881713867188, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.292596459388733, - "rewards/margins": 5.494947910308838, - "rewards/rejected": -6.787544250488281, - "step": 236 - }, - { - "epoch": 0.09, - "learning_rate": 1.9702797001110642e-05, - "logits/chosen": -1.3065253496170044, - "logits/rejected": -2.75030255317688, - "logps/chosen": -456.0109558105469, - "logps/rejected": -431.6904602050781, - "loss": 0.0088, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6279266476631165, - "rewards/margins": 6.143713474273682, - "rewards/rejected": -5.515786647796631, - "step": 237 - }, - { - "epoch": 0.09, - "learning_rate": 1.9699905012856383e-05, - "logits/chosen": -7.714234352111816, - "logits/rejected": -2.4495439529418945, - "logps/chosen": -388.8133850097656, - "logps/rejected": -2504.844970703125, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8114502429962158, - "rewards/margins": 37.7327880859375, - "rewards/rejected": -39.54423904418945, - "step": 238 - }, - { - "epoch": 0.09, - "learning_rate": 1.9696999236454135e-05, - "logits/chosen": -5.474602222442627, - "logits/rejected": -2.1276626586914062, - "logps/chosen": -235.34889221191406, - "logps/rejected": -517.561279296875, - "loss": 0.007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12571869790554047, - "rewards/margins": 6.956019401550293, - "rewards/rejected": -6.830300807952881, - "step": 239 - }, - { - "epoch": 0.09, - "learning_rate": 1.969407967603437e-05, - "logits/chosen": -1.7901273965835571, - "logits/rejected": -5.262421607971191, - "logps/chosen": -386.22186279296875, - "logps/rejected": -343.69384765625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.34796142578125, - "rewards/margins": 13.011324882507324, - "rewards/rejected": -11.663363456726074, - "step": 240 - }, - { - "epoch": 0.09, - "learning_rate": 1.9691146335747165e-05, - "logits/chosen": -2.6722230911254883, - "logits/rejected": -3.0324225425720215, - "logps/chosen": -345.09490966796875, - "logps/rejected": -597.5801391601562, - "loss": 0.0372, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.152124047279358, - "rewards/margins": 13.38535213470459, - "rewards/rejected": -12.233227729797363, - "step": 241 - }, - { - "epoch": 0.09, - "learning_rate": 1.9688199219762183e-05, - "logits/chosen": -1.3375697135925293, - "logits/rejected": -4.744736194610596, - "logps/chosen": -497.58172607421875, - "logps/rejected": -458.78094482421875, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08443603664636612, - "rewards/margins": 10.353615760803223, - "rewards/rejected": -10.4380521774292, - "step": 242 - }, - { - "epoch": 0.09, - "learning_rate": 1.968523833226867e-05, - "logits/chosen": -3.165595769882202, - "logits/rejected": -4.689352035522461, - "logps/chosen": -182.01136779785156, - "logps/rejected": -425.4637145996094, - "loss": 0.006, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1033645868301392, - "rewards/margins": 17.280824661254883, - "rewards/rejected": -18.38418960571289, - "step": 243 - }, - { - "epoch": 0.09, - "learning_rate": 1.9682263677475442e-05, - "logits/chosen": -3.144899606704712, - "logits/rejected": -6.575649261474609, - "logps/chosen": -264.81829833984375, - "logps/rejected": -239.96615600585938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9414398074150085, - "rewards/margins": 7.588090419769287, - "rewards/rejected": -6.646650791168213, - "step": 244 - }, - { - "epoch": 0.09, - "learning_rate": 1.9679275259610894e-05, - "logits/chosen": -1.9692946672439575, - "logits/rejected": -4.109780788421631, - "logps/chosen": -177.8677978515625, - "logps/rejected": -246.0126953125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5445343255996704, - "rewards/margins": 12.646831512451172, - "rewards/rejected": -12.102296829223633, - "step": 245 - }, - { - "epoch": 0.09, - "learning_rate": 1.9676273082922973e-05, - "logits/chosen": -2.733067274093628, - "logits/rejected": -6.679800987243652, - "logps/chosen": -589.105224609375, - "logps/rejected": -279.3554382324219, - "loss": 0.0086, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0100953578948975, - "rewards/margins": 6.413027763366699, - "rewards/rejected": -9.423123359680176, - "step": 246 - }, - { - "epoch": 0.09, - "learning_rate": 1.96732571516792e-05, - "logits/chosen": -6.535902976989746, - "logits/rejected": -1.9025511741638184, - "logps/chosen": -248.27008056640625, - "logps/rejected": -1053.286376953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8177154660224915, - "rewards/margins": 24.602035522460938, - "rewards/rejected": -23.784320831298828, - "step": 247 - }, - { - "epoch": 0.09, - "learning_rate": 1.967022747016663e-05, - "logits/chosen": -7.557191371917725, - "logits/rejected": -1.594400405883789, - "logps/chosen": -341.0063781738281, - "logps/rejected": -2379.299560546875, - "loss": 0.0873, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0022796392440796, - "rewards/margins": 48.70755386352539, - "rewards/rejected": -47.70527267456055, - "step": 248 - }, - { - "epoch": 0.09, - "learning_rate": 1.9667184042691877e-05, - "logits/chosen": -4.067295074462891, - "logits/rejected": -4.620616912841797, - "logps/chosen": -114.40190887451172, - "logps/rejected": -208.31654357910156, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4538719356060028, - "rewards/margins": 6.723642826080322, - "rewards/rejected": -6.269771099090576, - "step": 249 - }, - { - "epoch": 0.09, - "learning_rate": 1.9664126873581086e-05, - "logits/chosen": -5.157289981842041, - "logits/rejected": -4.8302130699157715, - "logps/chosen": -299.16522216796875, - "logps/rejected": -482.7085876464844, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6876434683799744, - "rewards/margins": 11.691794395446777, - "rewards/rejected": -12.379437446594238, - "step": 250 - }, - { - "epoch": 0.09, - "learning_rate": 1.966105596717994e-05, - "logits/chosen": -7.689395427703857, - "logits/rejected": -3.666313886642456, - "logps/chosen": -245.95938110351562, - "logps/rejected": -1020.7611083984375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.367788702249527, - "rewards/margins": 17.744461059570312, - "rewards/rejected": -17.376672744750977, - "step": 251 - }, - { - "epoch": 0.1, - "learning_rate": 1.9657971327853644e-05, - "logits/chosen": -8.108428955078125, - "logits/rejected": -2.68985915184021, - "logps/chosen": -286.0048522949219, - "logps/rejected": -1834.869384765625, - "loss": 0.0346, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2014191150665283, - "rewards/margins": 23.94683074951172, - "rewards/rejected": -22.745410919189453, - "step": 252 - }, - { - "epoch": 0.1, - "learning_rate": 1.9654872959986936e-05, - "logits/chosen": -7.23150110244751, - "logits/rejected": -8.8958740234375, - "logps/chosen": -349.76666259765625, - "logps/rejected": -668.0003662109375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.835992455482483, - "rewards/margins": 9.002814292907715, - "rewards/rejected": -7.166821479797363, - "step": 253 - }, - { - "epoch": 0.1, - "learning_rate": 1.9651760867984054e-05, - "logits/chosen": -6.457620620727539, - "logits/rejected": -1.6513458490371704, - "logps/chosen": -392.4010009765625, - "logps/rejected": -1052.50048828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.549224853515625, - "rewards/margins": 8.177997589111328, - "rewards/rejected": -7.628772258758545, - "step": 254 - }, - { - "epoch": 0.1, - "learning_rate": 1.9648635056268757e-05, - "logits/chosen": -7.361368656158447, - "logits/rejected": -8.071610450744629, - "logps/chosen": -232.84912109375, - "logps/rejected": -40.907867431640625, - "loss": 0.0586, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5723525881767273, - "rewards/margins": 0.8164082169532776, - "rewards/rejected": -1.3887608051300049, - "step": 255 - }, - { - "epoch": 0.1, - "learning_rate": 1.9645495529284292e-05, - "logits/chosen": -2.1296608448028564, - "logits/rejected": -0.37244468927383423, - "logps/chosen": -503.26470947265625, - "logps/rejected": -1000.8273315429688, - "loss": 0.0231, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1069397926330566, - "rewards/margins": 26.286998748779297, - "rewards/rejected": -24.1800594329834, - "step": 256 - }, - { - "epoch": 0.1, - "learning_rate": 1.964234229149342e-05, - "logits/chosen": -8.99018383026123, - "logits/rejected": -1.420329213142395, - "logps/chosen": -324.853515625, - "logps/rejected": -2215.4521484375, - "loss": 0.0132, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.0025970458518713713, - "rewards/margins": 28.680320739746094, - "rewards/rejected": -28.677722930908203, - "step": 257 - }, - { - "epoch": 0.1, - "learning_rate": 1.9639175347378378e-05, - "logits/chosen": -3.7456955909729004, - "logits/rejected": -4.533328056335449, - "logps/chosen": -352.77716064453125, - "logps/rejected": -490.74310302734375, - "loss": 0.0023, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6015808582305908, - "rewards/margins": 11.900713920593262, - "rewards/rejected": -10.29913330078125, - "step": 258 - }, - { - "epoch": 0.1, - "learning_rate": 1.9635994701440882e-05, - "logits/chosen": -3.4464943408966064, - "logits/rejected": -2.3265132904052734, - "logps/chosen": -568.973876953125, - "logps/rejected": -925.4170532226562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.287060737609863, - "rewards/margins": 12.539294242858887, - "rewards/rejected": -16.82635498046875, - "step": 259 - }, - { - "epoch": 0.1, - "learning_rate": 1.9632800358202142e-05, - "logits/chosen": -5.094399452209473, - "logits/rejected": -3.550612449645996, - "logps/chosen": -380.9692077636719, - "logps/rejected": -894.42431640625, - "loss": 0.0287, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7369109988212585, - "rewards/margins": 15.4911527633667, - "rewards/rejected": -14.754241943359375, - "step": 260 - }, - { - "epoch": 0.1, - "learning_rate": 1.962959232220283e-05, - "logits/chosen": -3.4354069232940674, - "logits/rejected": -2.991022825241089, - "logps/chosen": -193.85801696777344, - "logps/rejected": -369.20306396484375, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6792984008789062, - "rewards/margins": 8.367705345153809, - "rewards/rejected": -9.047003746032715, - "step": 261 - }, - { - "epoch": 0.1, - "learning_rate": 1.962637059800307e-05, - "logits/chosen": -0.535505473613739, - "logits/rejected": -0.5510726571083069, - "logps/chosen": -405.626708984375, - "logps/rejected": -641.4395751953125, - "loss": 0.003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5946015119552612, - "rewards/margins": 13.981478691101074, - "rewards/rejected": -15.576080322265625, - "step": 262 - }, - { - "epoch": 0.1, - "learning_rate": 1.962313519018247e-05, - "logits/chosen": -0.610321581363678, - "logits/rejected": -4.570650577545166, - "logps/chosen": -281.57769775390625, - "logps/rejected": -195.302734375, - "loss": 0.0052, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.08159179985523224, - "rewards/margins": 3.2200164794921875, - "rewards/rejected": -3.1384246349334717, - "step": 263 - }, - { - "epoch": 0.1, - "learning_rate": 1.9619886103340056e-05, - "logits/chosen": -7.566278457641602, - "logits/rejected": -2.108807325363159, - "logps/chosen": -489.6228332519531, - "logps/rejected": -1883.3173828125, - "loss": 0.0141, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3326354920864105, - "rewards/margins": 17.51837921142578, - "rewards/rejected": -17.18574333190918, - "step": 264 - }, - { - "epoch": 0.1, - "learning_rate": 1.9616623342094328e-05, - "logits/chosen": -5.340235233306885, - "logits/rejected": -9.162593841552734, - "logps/chosen": -501.9441833496094, - "logps/rejected": -60.32843780517578, - "loss": 0.0286, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07918091118335724, - "rewards/margins": 1.5361828804016113, - "rewards/rejected": -1.4570019245147705, - "step": 265 - }, - { - "epoch": 0.1, - "learning_rate": 1.961334691108321e-05, - "logits/chosen": -2.1387059688568115, - "logits/rejected": -1.2864676713943481, - "logps/chosen": -162.6860809326172, - "logps/rejected": -323.9210205078125, - "loss": 0.0024, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.111637830734253, - "rewards/margins": 4.325492858886719, - "rewards/rejected": -2.213855028152466, - "step": 266 - }, - { - "epoch": 0.1, - "learning_rate": 1.9610056814964053e-05, - "logits/chosen": -8.217029571533203, - "logits/rejected": -1.9880825281143188, - "logps/chosen": -248.58444213867188, - "logps/rejected": -1480.9061279296875, - "loss": 0.0232, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8662872314453125, - "rewards/margins": 10.709732055664062, - "rewards/rejected": -9.84344482421875, - "step": 267 - }, - { - "epoch": 0.1, - "learning_rate": 1.960675305841364e-05, - "logits/chosen": -4.9772539138793945, - "logits/rejected": -0.6124101281166077, - "logps/chosen": -519.309326171875, - "logps/rejected": -1607.982666015625, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.302398681640625, - "rewards/margins": 21.0633487701416, - "rewards/rejected": -22.365747451782227, - "step": 268 - }, - { - "epoch": 0.1, - "learning_rate": 1.9603435646128172e-05, - "logits/chosen": -1.294649362564087, - "logits/rejected": -6.297434329986572, - "logps/chosen": -386.611572265625, - "logps/rejected": -49.10198211669922, - "loss": 0.0214, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.314239501953125, - "rewards/margins": 1.8006935119628906, - "rewards/rejected": -2.1149330139160156, - "step": 269 - }, - { - "epoch": 0.1, - "learning_rate": 1.960010458282326e-05, - "logits/chosen": -8.656562805175781, - "logits/rejected": -2.2799808979034424, - "logps/chosen": -396.7870178222656, - "logps/rejected": -2346.756103515625, - "loss": 0.0027, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9642486572265625, - "rewards/margins": 26.25663185119629, - "rewards/rejected": -25.292383193969727, - "step": 270 - }, - { - "epoch": 0.1, - "learning_rate": 1.9596759873233916e-05, - "logits/chosen": -7.87455940246582, - "logits/rejected": -9.119364738464355, - "logps/chosen": -176.22854614257812, - "logps/rejected": -1072.4676513671875, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.35290834307670593, - "rewards/margins": 5.879153251647949, - "rewards/rejected": -5.5262451171875, - "step": 271 - }, - { - "epoch": 0.1, - "learning_rate": 1.959340152211455e-05, - "logits/chosen": -2.820374011993408, - "logits/rejected": -5.396991729736328, - "logps/chosen": -242.45513916015625, - "logps/rejected": -273.45782470703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5933288931846619, - "rewards/margins": 10.992707252502441, - "rewards/rejected": -10.399378776550293, - "step": 272 - }, - { - "epoch": 0.1, - "learning_rate": 1.9590029534238977e-05, - "logits/chosen": -2.7121033668518066, - "logits/rejected": -3.118978261947632, - "logps/chosen": -533.172607421875, - "logps/rejected": -567.2615966796875, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4570556581020355, - "rewards/margins": 6.540256023406982, - "rewards/rejected": -6.997311592102051, - "step": 273 - }, - { - "epoch": 0.1, - "learning_rate": 1.9586643914400372e-05, - "logits/chosen": -3.704258441925049, - "logits/rejected": -2.508335590362549, - "logps/chosen": -701.3446044921875, - "logps/rejected": -1166.908447265625, - "loss": 0.0039, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5737060904502869, - "rewards/margins": 26.603174209594727, - "rewards/rejected": -26.029468536376953, - "step": 274 - }, - { - "epoch": 0.1, - "learning_rate": 1.9583244667411306e-05, - "logits/chosen": -2.723653793334961, - "logits/rejected": -2.5562963485717773, - "logps/chosen": -354.6741943359375, - "logps/rejected": -529.8425903320312, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.70794677734375, - "rewards/margins": 10.518160820007324, - "rewards/rejected": -11.226107597351074, - "step": 275 - }, - { - "epoch": 0.1, - "learning_rate": 1.9579831798103716e-05, - "logits/chosen": -3.7720911502838135, - "logits/rejected": -2.5868115425109863, - "logps/chosen": -231.26309204101562, - "logps/rejected": -437.2344970703125, - "loss": 0.0032, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3235809803009033, - "rewards/margins": 10.090311050415039, - "rewards/rejected": -8.766730308532715, - "step": 276 - }, - { - "epoch": 0.1, - "learning_rate": 1.95764053113289e-05, - "logits/chosen": -7.6099677085876465, - "logits/rejected": -3.0796751976013184, - "logps/chosen": -644.1988525390625, - "logps/rejected": -2130.50927734375, - "loss": 0.0097, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.835235595703125, - "rewards/margins": 29.8552303314209, - "rewards/rejected": -29.019994735717773, - "step": 277 - }, - { - "epoch": 0.11, - "learning_rate": 1.9572965211957515e-05, - "logits/chosen": -7.2586822509765625, - "logits/rejected": -3.6364636421203613, - "logps/chosen": -451.0693359375, - "logps/rejected": -1550.6422119140625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.536279320716858, - "rewards/margins": 18.496240615844727, - "rewards/rejected": -16.9599609375, - "step": 278 - }, - { - "epoch": 0.11, - "learning_rate": 1.956951150487957e-05, - "logits/chosen": -1.9905290603637695, - "logits/rejected": -4.720874786376953, - "logps/chosen": -495.6787109375, - "logps/rejected": -664.4122314453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.819732666015625, - "rewards/margins": 22.198556900024414, - "rewards/rejected": -21.37882423400879, - "step": 279 - }, - { - "epoch": 0.11, - "learning_rate": 1.956604419500441e-05, - "logits/chosen": -6.534451007843018, - "logits/rejected": -1.9393223524093628, - "logps/chosen": -209.6212158203125, - "logps/rejected": -842.0643310546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.24153748154640198, - "rewards/margins": 12.67211627960205, - "rewards/rejected": -12.43057918548584, - "step": 280 - }, - { - "epoch": 0.11, - "learning_rate": 1.9562563287260724e-05, - "logits/chosen": -3.5247364044189453, - "logits/rejected": -2.196540117263794, - "logps/chosen": -209.09710693359375, - "logps/rejected": -319.91717529296875, - "loss": 0.0894, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2017868012189865, - "rewards/margins": 4.131980895996094, - "rewards/rejected": -3.930194139480591, - "step": 281 - }, - { - "epoch": 0.11, - "learning_rate": 1.9559068786596526e-05, - "logits/chosen": -5.849279880523682, - "logits/rejected": -1.6964237689971924, - "logps/chosen": -232.2515869140625, - "logps/rejected": -649.9090576171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.45100709795951843, - "rewards/margins": 10.987768173217773, - "rewards/rejected": -10.536761283874512, - "step": 282 - }, - { - "epoch": 0.11, - "learning_rate": 1.9555560697979147e-05, - "logits/chosen": -5.69732141494751, - "logits/rejected": -2.071847438812256, - "logps/chosen": -326.60882568359375, - "logps/rejected": -1006.7421875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1670349836349487, - "rewards/margins": 21.840967178344727, - "rewards/rejected": -23.00800132751465, - "step": 283 - }, - { - "epoch": 0.11, - "learning_rate": 1.955203902639525e-05, - "logits/chosen": -4.794885158538818, - "logits/rejected": -4.268442153930664, - "logps/chosen": -337.8975524902344, - "logps/rejected": -385.6924133300781, - "loss": 0.0354, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.32169508934021, - "rewards/margins": 2.79537034034729, - "rewards/rejected": -6.1170654296875, - "step": 284 - }, - { - "epoch": 0.11, - "learning_rate": 1.9548503776850785e-05, - "logits/chosen": -8.051592826843262, - "logits/rejected": -2.6632823944091797, - "logps/chosen": -286.995849609375, - "logps/rejected": -1242.20849609375, - "loss": 0.0075, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19722290337085724, - "rewards/margins": 9.135578155517578, - "rewards/rejected": -9.33280086517334, - "step": 285 - }, - { - "epoch": 0.11, - "learning_rate": 1.9544954954371013e-05, - "logits/chosen": -8.050623893737793, - "logits/rejected": -7.244107723236084, - "logps/chosen": -401.236328125, - "logps/rejected": -1110.3507080078125, - "loss": 0.0183, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.02996826171875, - "rewards/margins": 11.015490531921387, - "rewards/rejected": -12.045458793640137, - "step": 286 - }, - { - "epoch": 0.11, - "learning_rate": 1.954139256400049e-05, - "logits/chosen": -8.212361335754395, - "logits/rejected": -6.819372177124023, - "logps/chosen": -130.84515380859375, - "logps/rejected": -617.4396362304688, - "loss": 0.0322, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.25203704833984375, - "rewards/margins": 16.122589111328125, - "rewards/rejected": -15.870551109313965, - "step": 287 - }, - { - "epoch": 0.11, - "learning_rate": 1.9537816610803056e-05, - "logits/chosen": -3.638612747192383, - "logits/rejected": -3.0054240226745605, - "logps/chosen": -199.0789794921875, - "logps/rejected": -370.83966064453125, - "loss": 0.0129, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1095551252365112, - "rewards/margins": 8.935214042663574, - "rewards/rejected": -7.825659275054932, - "step": 288 - }, - { - "epoch": 0.11, - "learning_rate": 1.9534227099861827e-05, - "logits/chosen": -5.0737385749816895, - "logits/rejected": -2.442038059234619, - "logps/chosen": -225.429931640625, - "logps/rejected": -660.71826171875, - "loss": 0.0197, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.004296898841858, - "rewards/margins": 11.46286678314209, - "rewards/rejected": -10.458569526672363, - "step": 289 - }, - { - "epoch": 0.11, - "learning_rate": 1.95306240362792e-05, - "logits/chosen": -4.301759243011475, - "logits/rejected": -3.555366039276123, - "logps/chosen": -209.15554809570312, - "logps/rejected": -488.2810974121094, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5225830078125, - "rewards/margins": 17.84149742126465, - "rewards/rejected": -18.36408042907715, - "step": 290 - }, - { - "epoch": 0.11, - "learning_rate": 1.952700742517682e-05, - "logits/chosen": -2.444464921951294, - "logits/rejected": -3.520402669906616, - "logps/chosen": -283.9183654785156, - "logps/rejected": -412.84259033203125, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3997467756271362, - "rewards/margins": 12.237277030944824, - "rewards/rejected": -13.63702392578125, - "step": 291 - }, - { - "epoch": 0.11, - "learning_rate": 1.952337727169561e-05, - "logits/chosen": -3.599322557449341, - "logits/rejected": -4.479174613952637, - "logps/chosen": -458.8424377441406, - "logps/rejected": -635.3580322265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2857849597930908, - "rewards/margins": 17.78999137878418, - "rewards/rejected": -16.50420570373535, - "step": 292 - }, - { - "epoch": 0.11, - "learning_rate": 1.951973358099573e-05, - "logits/chosen": -4.035405158996582, - "logits/rejected": -5.226729393005371, - "logps/chosen": -247.89447021484375, - "logps/rejected": -306.2225341796875, - "loss": 0.0134, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7962799072265625, - "rewards/margins": 11.155728340148926, - "rewards/rejected": -12.952008247375488, - "step": 293 - }, - { - "epoch": 0.11, - "learning_rate": 1.9516076358256585e-05, - "logits/chosen": -8.488781929016113, - "logits/rejected": -2.363119602203369, - "logps/chosen": -471.072265625, - "logps/rejected": -3206.093017578125, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.04809875413775444, - "rewards/margins": 22.213895797729492, - "rewards/rejected": -22.165796279907227, - "step": 294 - }, - { - "epoch": 0.11, - "learning_rate": 1.9512405608676822e-05, - "logits/chosen": -1.380361795425415, - "logits/rejected": -6.132800102233887, - "logps/chosen": -186.3629150390625, - "logps/rejected": -96.19561004638672, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.027252197265625, - "rewards/margins": 5.6264238357543945, - "rewards/rejected": -5.6536760330200195, - "step": 295 - }, - { - "epoch": 0.11, - "learning_rate": 1.95087213374743e-05, - "logits/chosen": -7.068907260894775, - "logits/rejected": -7.572510242462158, - "logps/chosen": -197.3795166015625, - "logps/rejected": -401.05865478515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7110046744346619, - "rewards/margins": 10.704198837280273, - "rewards/rejected": -9.993194580078125, - "step": 296 - }, - { - "epoch": 0.11, - "learning_rate": 1.950502354988612e-05, - "logits/chosen": -4.882904529571533, - "logits/rejected": -2.237173557281494, - "logps/chosen": -500.27618408203125, - "logps/rejected": -908.949951171875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.389129638671875, - "rewards/margins": 9.829376220703125, - "rewards/rejected": -7.44024658203125, - "step": 297 - }, - { - "epoch": 0.11, - "learning_rate": 1.9501312251168574e-05, - "logits/chosen": -2.0154199600219727, - "logits/rejected": -1.2746981382369995, - "logps/chosen": -348.03240966796875, - "logps/rejected": -502.97552490234375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3075927793979645, - "rewards/margins": 7.082550048828125, - "rewards/rejected": -7.390142917633057, - "step": 298 - }, - { - "epoch": 0.11, - "learning_rate": 1.9497587446597185e-05, - "logits/chosen": -5.967316150665283, - "logits/rejected": -2.518038034439087, - "logps/chosen": -361.13897705078125, - "logps/rejected": -1049.320068359375, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9055420160293579, - "rewards/margins": 18.198205947875977, - "rewards/rejected": -17.29266357421875, - "step": 299 - }, - { - "epoch": 0.11, - "learning_rate": 1.949384914146665e-05, - "logits/chosen": -6.13284158706665, - "logits/rejected": -1.5196619033813477, - "logps/chosen": -449.7428283691406, - "logps/rejected": -1305.279541015625, - "loss": 0.0036, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6152466535568237, - "rewards/margins": 18.732067108154297, - "rewards/rejected": -17.1168212890625, - "step": 300 - }, - { - "epoch": 0.11, - "learning_rate": 1.9490097341090868e-05, - "logits/chosen": -4.918934345245361, - "logits/rejected": -4.299050331115723, - "logps/chosen": -330.3537902832031, - "logps/rejected": -658.9326171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07766418904066086, - "rewards/margins": 11.400161743164062, - "rewards/rejected": -11.322497367858887, - "step": 301 - }, - { - "epoch": 0.11, - "learning_rate": 1.948633205080292e-05, - "logits/chosen": -9.543011665344238, - "logits/rejected": -1.3891898393630981, - "logps/chosen": -181.64300537109375, - "logps/rejected": -5245.20068359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2041671723127365, - "rewards/margins": 29.166568756103516, - "rewards/rejected": -28.96240234375, - "step": 302 - }, - { - "epoch": 0.11, - "learning_rate": 1.9482553275955067e-05, - "logits/chosen": -4.05569314956665, - "logits/rejected": -2.976581573486328, - "logps/chosen": -159.00161743164062, - "logps/rejected": -395.6207275390625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.205169677734375, - "rewards/margins": 11.707674980163574, - "rewards/rejected": -11.5025053024292, - "step": 303 - }, - { - "epoch": 0.11, - "learning_rate": 1.947876102191873e-05, - "logits/chosen": -3.6118414402008057, - "logits/rejected": -8.6051025390625, - "logps/chosen": -243.01698303222656, - "logps/rejected": -173.8104248046875, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1484939604997635, - "rewards/margins": 7.624300956726074, - "rewards/rejected": -7.475807189941406, - "step": 304 - }, - { - "epoch": 0.12, - "learning_rate": 1.9474955294084492e-05, - "logits/chosen": -2.26293683052063, - "logits/rejected": -6.367428779602051, - "logps/chosen": -341.7620544433594, - "logps/rejected": -202.28475952148438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8392486572265625, - "rewards/margins": 8.305843353271484, - "rewards/rejected": -7.466594696044922, - "step": 305 - }, - { - "epoch": 0.12, - "learning_rate": 1.9471136097862093e-05, - "logits/chosen": -1.97501802444458, - "logits/rejected": -1.975164532661438, - "logps/chosen": -223.29612731933594, - "logps/rejected": -653.9149169921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8356064558029175, - "rewards/margins": 15.661503791809082, - "rewards/rejected": -13.825897216796875, - "step": 306 - }, - { - "epoch": 0.12, - "learning_rate": 1.9467303438680414e-05, - "logits/chosen": -6.115452766418457, - "logits/rejected": -2.9663705825805664, - "logps/chosen": -330.524658203125, - "logps/rejected": -781.5615234375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7296600341796875, - "rewards/margins": 7.695792198181152, - "rewards/rejected": -8.42545223236084, - "step": 307 - }, - { - "epoch": 0.12, - "learning_rate": 1.946345732198748e-05, - "logits/chosen": -2.3268234729766846, - "logits/rejected": -1.23207688331604, - "logps/chosen": -549.3635864257812, - "logps/rejected": -1206.457275390625, - "loss": 0.0107, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8054016828536987, - "rewards/margins": 28.181421279907227, - "rewards/rejected": -29.9868221282959, - "step": 308 - }, - { - "epoch": 0.12, - "learning_rate": 1.945959775325043e-05, - "logits/chosen": -0.6930338740348816, - "logits/rejected": -5.327260494232178, - "logps/chosen": -397.9732971191406, - "logps/rejected": -422.291259765625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.23824158310890198, - "rewards/margins": 17.38691520690918, - "rewards/rejected": -17.14867401123047, - "step": 309 - }, - { - "epoch": 0.12, - "learning_rate": 1.945572473795554e-05, - "logits/chosen": -2.092176914215088, - "logits/rejected": -5.683188438415527, - "logps/chosen": -767.407958984375, - "logps/rejected": -824.3773803710938, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.130731105804443, - "rewards/margins": 22.961231231689453, - "rewards/rejected": -27.091962814331055, - "step": 310 - }, - { - "epoch": 0.12, - "learning_rate": 1.94518382816082e-05, - "logits/chosen": -3.5396840572357178, - "logits/rejected": -2.916445732116699, - "logps/chosen": -370.7032775878906, - "logps/rejected": -659.40087890625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1912139654159546, - "rewards/margins": 17.5407657623291, - "rewards/rejected": -16.349552154541016, - "step": 311 - }, - { - "epoch": 0.12, - "learning_rate": 1.944793838973289e-05, - "logits/chosen": -4.348126411437988, - "logits/rejected": -2.7369344234466553, - "logps/chosen": -325.99639892578125, - "logps/rejected": -1022.3756713867188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8018829226493835, - "rewards/margins": 31.69717788696289, - "rewards/rejected": -30.895294189453125, - "step": 312 - }, - { - "epoch": 0.12, - "learning_rate": 1.9444025067873205e-05, - "logits/chosen": -7.305613040924072, - "logits/rejected": -3.4861063957214355, - "logps/chosen": -242.69305419921875, - "logps/rejected": -1169.4019775390625, - "loss": 0.0067, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.23016051948070526, - "rewards/margins": 10.601767539978027, - "rewards/rejected": -10.371606826782227, - "step": 313 - }, - { - "epoch": 0.12, - "learning_rate": 1.9440098321591825e-05, - "logits/chosen": -2.436505079269409, - "logits/rejected": -2.638185739517212, - "logps/chosen": -853.64208984375, - "logps/rejected": -933.1539306640625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.959448337554932, - "rewards/margins": 7.992468357086182, - "rewards/rejected": -12.951916694641113, - "step": 314 - }, - { - "epoch": 0.12, - "learning_rate": 1.943615815647051e-05, - "logits/chosen": -2.109037399291992, - "logits/rejected": -2.7056519985198975, - "logps/chosen": -241.2688446044922, - "logps/rejected": -222.62811279296875, - "loss": 0.0368, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1905258893966675, - "rewards/margins": 1.1679672002792358, - "rewards/rejected": -2.3584930896759033, - "step": 315 - }, - { - "epoch": 0.12, - "learning_rate": 1.9432204578110094e-05, - "logits/chosen": -2.890486717224121, - "logits/rejected": -2.3419370651245117, - "logps/chosen": -667.6866455078125, - "logps/rejected": -1166.7139892578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6397949457168579, - "rewards/margins": 31.1612548828125, - "rewards/rejected": -30.521459579467773, - "step": 316 - }, - { - "epoch": 0.12, - "learning_rate": 1.9428237592130487e-05, - "logits/chosen": -5.568444728851318, - "logits/rejected": -3.8215956687927246, - "logps/chosen": -446.5609436035156, - "logps/rejected": -934.8877563476562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16151733696460724, - "rewards/margins": 11.150177001953125, - "rewards/rejected": -11.311694145202637, - "step": 317 - }, - { - "epoch": 0.12, - "learning_rate": 1.9424257204170643e-05, - "logits/chosen": -3.118877410888672, - "logits/rejected": -2.9128105640411377, - "logps/chosen": -210.49111938476562, - "logps/rejected": -415.01702880859375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.149078369140625, - "rewards/margins": 12.625213623046875, - "rewards/rejected": -12.47613525390625, - "step": 318 - }, - { - "epoch": 0.12, - "learning_rate": 1.9420263419888577e-05, - "logits/chosen": -2.839186668395996, - "logits/rejected": -3.219777822494507, - "logps/chosen": -258.25518798828125, - "logps/rejected": -472.0949401855469, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9810974597930908, - "rewards/margins": 15.024744987487793, - "rewards/rejected": -17.005842208862305, - "step": 319 - }, - { - "epoch": 0.12, - "learning_rate": 1.9416256244961348e-05, - "logits/chosen": -2.5475549697875977, - "logits/rejected": -5.940627098083496, - "logps/chosen": -429.63165283203125, - "logps/rejected": -275.93341064453125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.261016845703125, - "rewards/margins": 7.080895900726318, - "rewards/rejected": -6.819879055023193, - "step": 320 - }, - { - "epoch": 0.12, - "learning_rate": 1.9412235685085034e-05, - "logits/chosen": -5.264909744262695, - "logits/rejected": -3.6032392978668213, - "logps/chosen": -345.74041748046875, - "logps/rejected": -642.5079345703125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16685791313648224, - "rewards/margins": 11.536731719970703, - "rewards/rejected": -11.70358943939209, - "step": 321 - }, - { - "epoch": 0.12, - "learning_rate": 1.940820174597476e-05, - "logits/chosen": -4.423511981964111, - "logits/rejected": -2.607524871826172, - "logps/chosen": -233.60691833496094, - "logps/rejected": -654.545654296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3998458981513977, - "rewards/margins": 26.590517044067383, - "rewards/rejected": -26.9903621673584, - "step": 322 - }, - { - "epoch": 0.12, - "learning_rate": 1.9404154433364652e-05, - "logits/chosen": -8.690425872802734, - "logits/rejected": -3.4995670318603516, - "logps/chosen": -187.46099853515625, - "logps/rejected": -1658.1966552734375, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.47650909423828125, - "rewards/margins": 17.5351619720459, - "rewards/rejected": -18.01167106628418, - "step": 323 - }, - { - "epoch": 0.12, - "learning_rate": 1.940009375300786e-05, - "logits/chosen": -5.1698317527771, - "logits/rejected": -2.7681467533111572, - "logps/chosen": -270.297119140625, - "logps/rejected": -647.24267578125, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05178222805261612, - "rewards/margins": 11.529278755187988, - "rewards/rejected": -11.581061363220215, - "step": 324 - }, - { - "epoch": 0.12, - "learning_rate": 1.9396019710676527e-05, - "logits/chosen": -0.7420932054519653, - "logits/rejected": -3.2544236183166504, - "logps/chosen": -350.4496154785156, - "logps/rejected": -640.4312133789062, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.37941285967826843, - "rewards/margins": 23.928491592407227, - "rewards/rejected": -23.5490779876709, - "step": 325 - }, - { - "epoch": 0.12, - "learning_rate": 1.9391932312161786e-05, - "logits/chosen": -7.17381477355957, - "logits/rejected": -1.4625076055526733, - "logps/chosen": -204.75762939453125, - "logps/rejected": -1226.260009765625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7570419311523438, - "rewards/margins": 24.837553024291992, - "rewards/rejected": -24.08051109313965, - "step": 326 - }, - { - "epoch": 0.12, - "learning_rate": 1.9387831563273775e-05, - "logits/chosen": -5.796679973602295, - "logits/rejected": -2.7690789699554443, - "logps/chosen": -777.336181640625, - "logps/rejected": -2132.86181640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1631592512130737, - "rewards/margins": 38.83137130737305, - "rewards/rejected": -37.668212890625, - "step": 327 - }, - { - "epoch": 0.12, - "learning_rate": 1.938371746984158e-05, - "logits/chosen": -4.427911758422852, - "logits/rejected": -4.0133771896362305, - "logps/chosen": -493.2630310058594, - "logps/rejected": -652.042724609375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3798675537109375, - "rewards/margins": 11.252020835876465, - "rewards/rejected": -14.631888389587402, - "step": 328 - }, - { - "epoch": 0.12, - "learning_rate": 1.9379590037713287e-05, - "logits/chosen": -1.9013030529022217, - "logits/rejected": -1.415598750114441, - "logps/chosen": -265.5330810546875, - "logps/rejected": -434.684814453125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.11155395954847336, - "rewards/margins": 10.221399307250977, - "rewards/rejected": -10.109845161437988, - "step": 329 - }, - { - "epoch": 0.12, - "learning_rate": 1.9375449272755917e-05, - "logits/chosen": -0.8637969493865967, - "logits/rejected": -6.422396183013916, - "logps/chosen": -543.60693359375, - "logps/rejected": -260.0291442871094, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.543621838092804, - "rewards/margins": 13.374262809753418, - "rewards/rejected": -13.917884826660156, - "step": 330 - }, - { - "epoch": 0.13, - "learning_rate": 1.9371295180855454e-05, - "logits/chosen": -1.5606319904327393, - "logits/rejected": -1.8227932453155518, - "logps/chosen": -393.7984619140625, - "logps/rejected": -632.1459350585938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8271331787109375, - "rewards/margins": 16.47892189025879, - "rewards/rejected": -18.306055068969727, - "step": 331 - }, - { - "epoch": 0.13, - "learning_rate": 1.9367127767916828e-05, - "logits/chosen": -7.250857353210449, - "logits/rejected": -3.675037145614624, - "logps/chosen": -175.639892578125, - "logps/rejected": -1037.8695068359375, - "loss": 0.0267, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.753796398639679, - "rewards/margins": 32.865379333496094, - "rewards/rejected": -32.1115837097168, - "step": 332 - }, - { - "epoch": 0.13, - "learning_rate": 1.93629470398639e-05, - "logits/chosen": -7.677213191986084, - "logits/rejected": -7.71237850189209, - "logps/chosen": -208.28216552734375, - "logps/rejected": -646.439453125, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5369583368301392, - "rewards/margins": 10.837141990661621, - "rewards/rejected": -10.300183296203613, - "step": 333 - }, - { - "epoch": 0.13, - "learning_rate": 1.9358753002639466e-05, - "logits/chosen": -4.739018440246582, - "logits/rejected": -1.9397497177124023, - "logps/chosen": -558.03955078125, - "logps/rejected": -1133.945556640625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0794007778167725, - "rewards/margins": 10.730364799499512, - "rewards/rejected": -13.809765815734863, - "step": 334 - }, - { - "epoch": 0.13, - "learning_rate": 1.935454566220522e-05, - "logits/chosen": -4.667544364929199, - "logits/rejected": -2.050302267074585, - "logps/chosen": -308.16619873046875, - "logps/rejected": -853.5905151367188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1075897216796875, - "rewards/margins": 12.327865600585938, - "rewards/rejected": -11.22027587890625, - "step": 335 - }, - { - "epoch": 0.13, - "learning_rate": 1.93503250245418e-05, - "logits/chosen": -3.9327495098114014, - "logits/rejected": -5.563740253448486, - "logps/chosen": -449.06622314453125, - "logps/rejected": -746.483642578125, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.866290271282196, - "rewards/margins": 30.158618927001953, - "rewards/rejected": -29.292327880859375, - "step": 336 - }, - { - "epoch": 0.13, - "learning_rate": 1.9346091095648712e-05, - "logits/chosen": -5.7023797035217285, - "logits/rejected": -3.6795384883880615, - "logps/chosen": -213.393310546875, - "logps/rejected": -493.5028991699219, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08349609375, - "rewards/margins": 6.366662502288818, - "rewards/rejected": -6.450158596038818, - "step": 337 - }, - { - "epoch": 0.13, - "learning_rate": 1.9341843881544372e-05, - "logits/chosen": -0.2316390573978424, - "logits/rejected": -6.352477073669434, - "logps/chosen": -349.21441650390625, - "logps/rejected": -240.19174194335938, - "loss": 0.0044, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.4324281215667725, - "rewards/margins": 13.192056655883789, - "rewards/rejected": -10.759628295898438, - "step": 338 - }, - { - "epoch": 0.13, - "learning_rate": 1.933758338826608e-05, - "logits/chosen": -2.509366512298584, - "logits/rejected": -1.7552622556686401, - "logps/chosen": -351.93096923828125, - "logps/rejected": -823.3248291015625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7213806509971619, - "rewards/margins": 33.32902526855469, - "rewards/rejected": -32.607643127441406, - "step": 339 - }, - { - "epoch": 0.13, - "learning_rate": 1.9333309621870012e-05, - "logits/chosen": -6.730276584625244, - "logits/rejected": -2.204925775527954, - "logps/chosen": -188.78482055664062, - "logps/rejected": -1626.8280029296875, - "loss": 0.0069, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.442622423171997, - "rewards/margins": 38.02360534667969, - "rewards/rejected": -35.58098220825195, - "step": 340 - }, - { - "epoch": 0.13, - "learning_rate": 1.9329022588431204e-05, - "logits/chosen": -4.823989391326904, - "logits/rejected": -2.465388059616089, - "logps/chosen": -261.28228759765625, - "logps/rejected": -604.3908081054688, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8808655142784119, - "rewards/margins": 8.830774307250977, - "rewards/rejected": -9.711639404296875, - "step": 341 - }, - { - "epoch": 0.13, - "learning_rate": 1.932472229404356e-05, - "logits/chosen": -4.707321643829346, - "logits/rejected": -2.19405198097229, - "logps/chosen": -353.96990966796875, - "logps/rejected": -1000.1801147460938, - "loss": 0.0041, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.47747802734375, - "rewards/margins": 21.73895835876465, - "rewards/rejected": -21.2614803314209, - "step": 342 - }, - { - "epoch": 0.13, - "learning_rate": 1.932040874481983e-05, - "logits/chosen": -6.038529872894287, - "logits/rejected": -3.4694740772247314, - "logps/chosen": -471.4718322753906, - "logps/rejected": -1175.583984375, - "loss": 0.0056, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09964294731616974, - "rewards/margins": 18.34982681274414, - "rewards/rejected": -18.25018310546875, - "step": 343 - }, - { - "epoch": 0.13, - "learning_rate": 1.9316081946891604e-05, - "logits/chosen": -2.5451455116271973, - "logits/rejected": -4.259688854217529, - "logps/chosen": -507.01336669921875, - "logps/rejected": -674.1217651367188, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.839245617389679, - "rewards/margins": 22.381000518798828, - "rewards/rejected": -23.220245361328125, - "step": 344 - }, - { - "epoch": 0.13, - "learning_rate": 1.9311741906409302e-05, - "logits/chosen": -1.6816555261611938, - "logits/rejected": -5.229318141937256, - "logps/chosen": -364.65972900390625, - "logps/rejected": -128.00352478027344, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8855316638946533, - "rewards/margins": 8.309100151062012, - "rewards/rejected": -6.4235687255859375, - "step": 345 - }, - { - "epoch": 0.13, - "learning_rate": 1.9307388629542183e-05, - "logits/chosen": -7.656329154968262, - "logits/rejected": -3.97381329536438, - "logps/chosen": -291.7689208984375, - "logps/rejected": -1302.314208984375, - "loss": 0.0231, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8825135231018066, - "rewards/margins": 25.089460372924805, - "rewards/rejected": -27.971973419189453, - "step": 346 - }, - { - "epoch": 0.13, - "learning_rate": 1.9303022122478303e-05, - "logits/chosen": -6.182886123657227, - "logits/rejected": -2.5624897480010986, - "logps/chosen": -686.7092895507812, - "logps/rejected": -2217.40380859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22898559272289276, - "rewards/margins": 34.91237258911133, - "rewards/rejected": -34.683387756347656, - "step": 347 - }, - { - "epoch": 0.13, - "learning_rate": 1.929864239142453e-05, - "logits/chosen": -2.6123838424682617, - "logits/rejected": -2.614593982696533, - "logps/chosen": -268.7187805175781, - "logps/rejected": -456.35052490234375, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16933289170265198, - "rewards/margins": 13.42307186126709, - "rewards/rejected": -13.59240436553955, - "step": 348 - }, - { - "epoch": 0.13, - "learning_rate": 1.9294249442606533e-05, - "logits/chosen": -1.2794644832611084, - "logits/rejected": -5.711861610412598, - "logps/chosen": -238.87908935546875, - "logps/rejected": -205.06155395507812, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.20274963974952698, - "rewards/margins": 13.170267105102539, - "rewards/rejected": -13.373016357421875, - "step": 349 - }, - { - "epoch": 0.13, - "learning_rate": 1.9289843282268773e-05, - "logits/chosen": -4.411020278930664, - "logits/rejected": -1.5999016761779785, - "logps/chosen": -499.2969970703125, - "logps/rejected": -976.3939208984375, - "loss": 0.0125, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.430328369140625, - "rewards/margins": 10.60880184173584, - "rewards/rejected": -9.178473472595215, - "step": 350 - }, - { - "epoch": 0.13, - "learning_rate": 1.9285423916674478e-05, - "logits/chosen": -8.922526359558105, - "logits/rejected": -2.5974960327148438, - "logps/chosen": -333.0882568359375, - "logps/rejected": -2818.788818359375, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4355407655239105, - "rewards/margins": 29.5596981048584, - "rewards/rejected": -29.9952392578125, - "step": 351 - }, - { - "epoch": 0.13, - "learning_rate": 1.9280991352105656e-05, - "logits/chosen": -2.4533276557922363, - "logits/rejected": -3.8968493938446045, - "logps/chosen": -307.1236572265625, - "logps/rejected": -262.802490234375, - "loss": 0.0051, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10091247409582138, - "rewards/margins": 3.1717560291290283, - "rewards/rejected": -3.2726686000823975, - "step": 352 - }, - { - "epoch": 0.13, - "learning_rate": 1.927654559486308e-05, - "logits/chosen": -7.0699262619018555, - "logits/rejected": -1.8858680725097656, - "logps/chosen": -504.637451171875, - "logps/rejected": -2591.6435546875, - "loss": 0.0049, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.172186255455017, - "rewards/margins": 28.229461669921875, - "rewards/rejected": -27.057275772094727, - "step": 353 - }, - { - "epoch": 0.13, - "learning_rate": 1.927208665126627e-05, - "logits/chosen": -3.584186553955078, - "logits/rejected": -3.0603528022766113, - "logps/chosen": -197.27706909179688, - "logps/rejected": -397.0185546875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18690796196460724, - "rewards/margins": 7.651928901672363, - "rewards/rejected": -7.838836669921875, - "step": 354 - }, - { - "epoch": 0.13, - "learning_rate": 1.926761452765349e-05, - "logits/chosen": -6.5029072761535645, - "logits/rejected": -2.0958657264709473, - "logps/chosen": -603.8170166015625, - "logps/rejected": -2024.755615234375, - "loss": 0.0392, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8675903677940369, - "rewards/margins": 19.171154022216797, - "rewards/rejected": -18.303564071655273, - "step": 355 - }, - { - "epoch": 0.13, - "learning_rate": 1.9263129230381735e-05, - "logits/chosen": -6.303800582885742, - "logits/rejected": -1.9649847745895386, - "logps/chosen": -658.231689453125, - "logps/rejected": -1912.3948974609375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3900024890899658, - "rewards/margins": 15.801355361938477, - "rewards/rejected": -14.41135311126709, - "step": 356 - }, - { - "epoch": 0.13, - "learning_rate": 1.925863076582674e-05, - "logits/chosen": -7.367771148681641, - "logits/rejected": -2.086721658706665, - "logps/chosen": -500.419189453125, - "logps/rejected": -2413.08251953125, - "loss": 0.0064, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.30502626299858093, - "rewards/margins": 36.12324142456055, - "rewards/rejected": -35.818214416503906, - "step": 357 - }, - { - "epoch": 0.14, - "learning_rate": 1.9254119140382952e-05, - "logits/chosen": -3.497753858566284, - "logits/rejected": -1.9581148624420166, - "logps/chosen": -456.22808837890625, - "logps/rejected": -1068.7186279296875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8631744384765625, - "rewards/margins": 20.25156593322754, - "rewards/rejected": -19.388391494750977, - "step": 358 - }, - { - "epoch": 0.14, - "learning_rate": 1.9249594360463514e-05, - "logits/chosen": -7.509840488433838, - "logits/rejected": -2.3967173099517822, - "logps/chosen": -435.1610107421875, - "logps/rejected": -2123.129150390625, - "loss": 0.0026, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.101879835128784, - "rewards/margins": 21.29814338684082, - "rewards/rejected": -19.196264266967773, - "step": 359 - }, - { - "epoch": 0.14, - "learning_rate": 1.9245056432500277e-05, - "logits/chosen": -2.8940625190734863, - "logits/rejected": -0.9739224314689636, - "logps/chosen": -304.03436279296875, - "logps/rejected": -665.3532104492188, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.48992919921875, - "rewards/margins": 15.405023574829102, - "rewards/rejected": -13.915094375610352, - "step": 360 - }, - { - "epoch": 0.14, - "learning_rate": 1.9240505362943782e-05, - "logits/chosen": -6.813126564025879, - "logits/rejected": -2.939556121826172, - "logps/chosen": -309.0423278808594, - "logps/rejected": -1119.83203125, - "loss": 0.0024, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9285613894462585, - "rewards/margins": 22.342214584350586, - "rewards/rejected": -23.270776748657227, - "step": 361 - }, - { - "epoch": 0.14, - "learning_rate": 1.9235941158263253e-05, - "logits/chosen": -4.576578617095947, - "logits/rejected": -3.2936928272247314, - "logps/chosen": -456.7904052734375, - "logps/rejected": -781.5634765625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.332244873046875, - "rewards/margins": 8.385126113891602, - "rewards/rejected": -9.717370986938477, - "step": 362 - }, - { - "epoch": 0.14, - "learning_rate": 1.9231363824946572e-05, - "logits/chosen": -1.4657233953475952, - "logits/rejected": -6.670277118682861, - "logps/chosen": -423.5279541015625, - "logps/rejected": -241.24807739257812, - "loss": 0.0606, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7622498273849487, - "rewards/margins": 12.672340393066406, - "rewards/rejected": -10.910090446472168, - "step": 363 - }, - { - "epoch": 0.14, - "learning_rate": 1.9226773369500305e-05, - "logits/chosen": -1.6894512176513672, - "logits/rejected": -6.201028823852539, - "logps/chosen": -374.6197509765625, - "logps/rejected": -205.92245483398438, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.37404176592826843, - "rewards/margins": 8.655425071716309, - "rewards/rejected": -9.02946662902832, - "step": 364 - }, - { - "epoch": 0.14, - "learning_rate": 1.9222169798449647e-05, - "logits/chosen": -1.8199821710586548, - "logits/rejected": -5.15178108215332, - "logps/chosen": -407.5299377441406, - "logps/rejected": -334.7701110839844, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9960601925849915, - "rewards/margins": 10.320606231689453, - "rewards/rejected": -9.324545860290527, - "step": 365 - }, - { - "epoch": 0.14, - "learning_rate": 1.9217553118338453e-05, - "logits/chosen": -8.401362419128418, - "logits/rejected": -3.3829004764556885, - "logps/chosen": -678.8208618164062, - "logps/rejected": -3863.07763671875, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.762847900390625, - "rewards/margins": 27.8994083404541, - "rewards/rejected": -28.662256240844727, - "step": 366 - }, - { - "epoch": 0.14, - "learning_rate": 1.9212923335729206e-05, - "logits/chosen": -2.4323601722717285, - "logits/rejected": -4.273120880126953, - "logps/chosen": -411.44281005859375, - "logps/rejected": -297.5990905761719, - "loss": 0.0149, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.10345458984375, - "rewards/margins": 2.0696046352386475, - "rewards/rejected": -3.1730592250823975, - "step": 367 - }, - { - "epoch": 0.14, - "learning_rate": 1.9208280457203014e-05, - "logits/chosen": -1.632767677307129, - "logits/rejected": -1.3044661283493042, - "logps/chosen": -517.171630859375, - "logps/rejected": -1002.2053833007812, - "loss": 0.0089, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7814667224884033, - "rewards/margins": 25.844820022583008, - "rewards/rejected": -28.62628746032715, - "step": 368 - }, - { - "epoch": 0.14, - "learning_rate": 1.92036244893596e-05, - "logits/chosen": -8.23952865600586, - "logits/rejected": -2.668055534362793, - "logps/chosen": -273.4283447265625, - "logps/rejected": -1442.694091796875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.05177002027630806, - "rewards/margins": 11.290539741516113, - "rewards/rejected": -11.23876953125, - "step": 369 - }, - { - "epoch": 0.14, - "learning_rate": 1.9198955438817294e-05, - "logits/chosen": -3.216826915740967, - "logits/rejected": -4.808554172515869, - "logps/chosen": -301.5583801269531, - "logps/rejected": -357.06744384765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.12592773139476776, - "rewards/margins": 15.06092357635498, - "rewards/rejected": -15.186851501464844, - "step": 370 - }, - { - "epoch": 0.14, - "learning_rate": 1.9194273312213027e-05, - "logits/chosen": -7.990997791290283, - "logits/rejected": -2.5507452487945557, - "logps/chosen": -253.86105346679688, - "logps/rejected": -1820.97998046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.28812867403030396, - "rewards/margins": 25.46843910217285, - "rewards/rejected": -25.18031120300293, - "step": 371 - }, - { - "epoch": 0.14, - "learning_rate": 1.918957811620231e-05, - "logits/chosen": -2.6918020248413086, - "logits/rejected": -7.75776481628418, - "logps/chosen": -392.65679931640625, - "logps/rejected": -73.29937744140625, - "loss": 0.0058, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.001361084054224193, - "rewards/margins": 3.0611770153045654, - "rewards/rejected": -3.0625381469726562, - "step": 372 - }, - { - "epoch": 0.14, - "learning_rate": 1.9184869857459233e-05, - "logits/chosen": -3.9216556549072266, - "logits/rejected": -1.0932897329330444, - "logps/chosen": -429.61627197265625, - "logps/rejected": -992.0496826171875, - "loss": 0.0965, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4586670398712158, - "rewards/margins": 10.632922172546387, - "rewards/rejected": -9.17425537109375, - "step": 373 - }, - { - "epoch": 0.14, - "learning_rate": 1.9180148542676456e-05, - "logits/chosen": -0.16134928166866302, - "logits/rejected": -4.151867389678955, - "logps/chosen": -403.6969299316406, - "logps/rejected": -227.33425903320312, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6419586539268494, - "rewards/margins": 8.884243965148926, - "rewards/rejected": -9.526202201843262, - "step": 374 - }, - { - "epoch": 0.14, - "learning_rate": 1.9175414178565205e-05, - "logits/chosen": -3.557157278060913, - "logits/rejected": -4.749056816101074, - "logps/chosen": -231.4237060546875, - "logps/rejected": -583.1112060546875, - "loss": 0.0035, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04617614671587944, - "rewards/margins": 31.442907333374023, - "rewards/rejected": -31.489084243774414, - "step": 375 - }, - { - "epoch": 0.14, - "learning_rate": 1.9170666771855242e-05, - "logits/chosen": -4.467682838439941, - "logits/rejected": -4.8326191902160645, - "logps/chosen": -203.1243896484375, - "logps/rejected": -385.133056640625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7043182253837585, - "rewards/margins": 11.695101737976074, - "rewards/rejected": -10.99078369140625, - "step": 376 - }, - { - "epoch": 0.14, - "learning_rate": 1.9165906329294875e-05, - "logits/chosen": -4.80072021484375, - "logits/rejected": -4.020634174346924, - "logps/chosen": -190.05245971679688, - "logps/rejected": -484.3847961425781, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1784698963165283, - "rewards/margins": 17.776437759399414, - "rewards/rejected": -18.95490837097168, - "step": 377 - }, - { - "epoch": 0.14, - "learning_rate": 1.9161132857650945e-05, - "logits/chosen": -5.269837379455566, - "logits/rejected": -1.9111785888671875, - "logps/chosen": -293.98779296875, - "logps/rejected": -802.8192138671875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.60162353515625, - "rewards/margins": 10.771679878234863, - "rewards/rejected": -10.170056343078613, - "step": 378 - }, - { - "epoch": 0.14, - "learning_rate": 1.915634636370881e-05, - "logits/chosen": -1.715701699256897, - "logits/rejected": -3.1870853900909424, - "logps/chosen": -637.7576293945312, - "logps/rejected": -637.1826171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.894866943359375, - "rewards/margins": 11.809332847595215, - "rewards/rejected": -13.70419979095459, - "step": 379 - }, - { - "epoch": 0.14, - "learning_rate": 1.9151546854272335e-05, - "logits/chosen": -6.529548168182373, - "logits/rejected": -3.289215087890625, - "logps/chosen": -496.2402648925781, - "logps/rejected": -1223.47509765625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0889739990234375, - "rewards/margins": 8.296292304992676, - "rewards/rejected": -8.385266304016113, - "step": 380 - }, - { - "epoch": 0.14, - "learning_rate": 1.9146734336163898e-05, - "logits/chosen": -2.587442398071289, - "logits/rejected": -0.2401040941476822, - "logps/chosen": -526.640869140625, - "logps/rejected": -1137.6756591796875, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7641357779502869, - "rewards/margins": 25.100482940673828, - "rewards/rejected": -25.8646183013916, - "step": 381 - }, - { - "epoch": 0.14, - "learning_rate": 1.9141908816224356e-05, - "logits/chosen": -5.16765022277832, - "logits/rejected": -0.8285416960716248, - "logps/chosen": -498.6813659667969, - "logps/rejected": -1128.57666015625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5024017095565796, - "rewards/margins": 11.058029174804688, - "rewards/rejected": -9.555627822875977, - "step": 382 - }, - { - "epoch": 0.14, - "learning_rate": 1.9137070301313053e-05, - "logits/chosen": -2.77731990814209, - "logits/rejected": -2.072970390319824, - "logps/chosen": -340.6893310546875, - "logps/rejected": -595.6341552734375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.01727294921875, - "rewards/margins": 19.039783477783203, - "rewards/rejected": -18.022510528564453, - "step": 383 - }, - { - "epoch": 0.15, - "learning_rate": 1.9132218798307806e-05, - "logits/chosen": -8.319737434387207, - "logits/rejected": -8.344383239746094, - "logps/chosen": -115.02408599853516, - "logps/rejected": -699.550537109375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.104589082300663, - "rewards/margins": 12.725500106811523, - "rewards/rejected": -12.62091064453125, - "step": 384 - }, - { - "epoch": 0.15, - "learning_rate": 1.9127354314104892e-05, - "logits/chosen": -8.46863842010498, - "logits/rejected": -2.489837646484375, - "logps/chosen": -329.40264892578125, - "logps/rejected": -1637.842529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.565966784954071, - "rewards/margins": 15.08905029296875, - "rewards/rejected": -14.523083686828613, - "step": 385 - }, - { - "epoch": 0.15, - "learning_rate": 1.9122476855619045e-05, - "logits/chosen": -3.646260976791382, - "logits/rejected": -1.7653422355651855, - "logps/chosen": -135.30172729492188, - "logps/rejected": -447.30426025390625, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5277755856513977, - "rewards/margins": 12.698326110839844, - "rewards/rejected": -12.170550346374512, - "step": 386 - }, - { - "epoch": 0.15, - "learning_rate": 1.9117586429783433e-05, - "logits/chosen": -2.881721258163452, - "logits/rejected": -1.3261570930480957, - "logps/chosen": -340.158203125, - "logps/rejected": -726.4378051757812, - "loss": 0.0091, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1729888916015625, - "rewards/margins": 19.360584259033203, - "rewards/rejected": -21.533573150634766, - "step": 387 - }, - { - "epoch": 0.15, - "learning_rate": 1.9112683043549665e-05, - "logits/chosen": -2.1125400066375732, - "logits/rejected": -1.3697036504745483, - "logps/chosen": -371.3809814453125, - "logps/rejected": -487.43731689453125, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0140564441680908, - "rewards/margins": 5.829974174499512, - "rewards/rejected": -6.844030857086182, - "step": 388 - }, - { - "epoch": 0.15, - "learning_rate": 1.9107766703887764e-05, - "logits/chosen": -3.523296594619751, - "logits/rejected": -1.298742651939392, - "logps/chosen": -421.9143371582031, - "logps/rejected": -864.9102783203125, - "loss": 0.0052, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.581957995891571, - "rewards/margins": 15.76190185546875, - "rewards/rejected": -15.179944038391113, - "step": 389 - }, - { - "epoch": 0.15, - "learning_rate": 1.910283741778618e-05, - "logits/chosen": -1.9755581617355347, - "logits/rejected": -6.524284362792969, - "logps/chosen": -410.8788757324219, - "logps/rejected": -273.62042236328125, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.31778261065483093, - "rewards/margins": 10.359100341796875, - "rewards/rejected": -10.0413179397583, - "step": 390 - }, - { - "epoch": 0.15, - "learning_rate": 1.9097895192251754e-05, - "logits/chosen": -2.417311429977417, - "logits/rejected": -2.015655040740967, - "logps/chosen": -112.5438003540039, - "logps/rejected": -316.3580627441406, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5168235898017883, - "rewards/margins": 10.913074493408203, - "rewards/rejected": -11.429898262023926, - "step": 391 - }, - { - "epoch": 0.15, - "learning_rate": 1.909294003430972e-05, - "logits/chosen": -6.737057685852051, - "logits/rejected": -1.665785551071167, - "logps/chosen": -434.89190673828125, - "logps/rejected": -1181.93115234375, - "loss": 0.0105, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.741613745689392, - "rewards/margins": 6.962487697601318, - "rewards/rejected": -8.7041015625, - "step": 392 - }, - { - "epoch": 0.15, - "learning_rate": 1.9087971951003705e-05, - "logits/chosen": -2.0020127296447754, - "logits/rejected": -6.10560941696167, - "logps/chosen": -325.82366943359375, - "logps/rejected": -351.1170654296875, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.957348644733429, - "rewards/margins": 15.048338890075684, - "rewards/rejected": -14.09099006652832, - "step": 393 - }, - { - "epoch": 0.15, - "learning_rate": 1.9082990949395696e-05, - "logits/chosen": -4.520237922668457, - "logits/rejected": -5.96985387802124, - "logps/chosen": -959.1387329101562, - "logps/rejected": -850.906494140625, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.229913234710693, - "rewards/margins": 7.798712253570557, - "rewards/rejected": -12.02862548828125, - "step": 394 - }, - { - "epoch": 0.15, - "learning_rate": 1.9077997036566054e-05, - "logits/chosen": -4.185568332672119, - "logits/rejected": -1.7978451251983643, - "logps/chosen": -544.2710571289062, - "logps/rejected": -1181.0579833984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1058349609375, - "rewards/margins": 21.706409454345703, - "rewards/rejected": -22.812244415283203, - "step": 395 - }, - { - "epoch": 0.15, - "learning_rate": 1.9072990219613486e-05, - "logits/chosen": -2.1687657833099365, - "logits/rejected": -5.548962593078613, - "logps/chosen": -404.9643249511719, - "logps/rejected": -227.08181762695312, - "loss": 0.0117, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.422558546066284, - "rewards/margins": 5.114542007446289, - "rewards/rejected": -7.537100315093994, - "step": 396 - }, - { - "epoch": 0.15, - "learning_rate": 1.906797050565505e-05, - "logits/chosen": -6.585293769836426, - "logits/rejected": -1.5060582160949707, - "logps/chosen": -318.05255126953125, - "logps/rejected": -941.860107421875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7131744623184204, - "rewards/margins": 8.55114459991455, - "rewards/rejected": -10.26431941986084, - "step": 397 - }, - { - "epoch": 0.15, - "learning_rate": 1.9062937901826128e-05, - "logits/chosen": -2.0228304862976074, - "logits/rejected": -4.841838359832764, - "logps/chosen": -452.778076171875, - "logps/rejected": -287.383544921875, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1051971912384033, - "rewards/margins": 14.611429214477539, - "rewards/rejected": -12.506232261657715, - "step": 398 - }, - { - "epoch": 0.15, - "learning_rate": 1.905789241528042e-05, - "logits/chosen": -4.577978610992432, - "logits/rejected": -1.7589720487594604, - "logps/chosen": -462.56170654296875, - "logps/rejected": -1112.792236328125, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6805633306503296, - "rewards/margins": 23.662155151367188, - "rewards/rejected": -25.34271812438965, - "step": 399 - }, - { - "epoch": 0.15, - "learning_rate": 1.9052834053189957e-05, - "logits/chosen": -1.5866811275482178, - "logits/rejected": -4.5787248611450195, - "logps/chosen": -562.594970703125, - "logps/rejected": -494.70306396484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.641760230064392, - "rewards/margins": 16.411157608032227, - "rewards/rejected": -18.05291748046875, - "step": 400 - }, - { - "epoch": 0.15, - "learning_rate": 1.904776282274506e-05, - "logits/chosen": -1.788087248802185, - "logits/rejected": -5.5095343589782715, - "logps/chosen": -520.7947998046875, - "logps/rejected": -452.4825744628906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.210778832435608, - "rewards/margins": 17.42300796508789, - "rewards/rejected": -16.212228775024414, - "step": 401 - }, - { - "epoch": 0.15, - "learning_rate": 1.9042678731154337e-05, - "logits/chosen": -1.6419007778167725, - "logits/rejected": -1.6971651315689087, - "logps/chosen": -341.969970703125, - "logps/rejected": -499.1156005859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0309113264083862, - "rewards/margins": 10.782114028930664, - "rewards/rejected": -9.751202583312988, - "step": 402 - }, - { - "epoch": 0.15, - "learning_rate": 1.9037581785644695e-05, - "logits/chosen": -2.746701240539551, - "logits/rejected": -7.228620529174805, - "logps/chosen": -393.29168701171875, - "logps/rejected": -181.1408233642578, - "loss": 0.0039, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.153222680091858, - "rewards/margins": 5.138834476470947, - "rewards/rejected": -6.292057037353516, - "step": 403 - }, - { - "epoch": 0.15, - "learning_rate": 1.903247199346129e-05, - "logits/chosen": -0.35627588629722595, - "logits/rejected": -5.678506851196289, - "logps/chosen": -589.4991455078125, - "logps/rejected": -214.80532836914062, - "loss": 0.0043, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5629090070724487, - "rewards/margins": 11.136004447937012, - "rewards/rejected": -9.573095321655273, - "step": 404 - }, - { - "epoch": 0.15, - "learning_rate": 1.902734936186756e-05, - "logits/chosen": -7.253168106079102, - "logits/rejected": -1.3656036853790283, - "logps/chosen": -549.9807739257812, - "logps/rejected": -3108.45361328125, - "loss": 0.0974, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3665710389614105, - "rewards/margins": 35.18943786621094, - "rewards/rejected": -35.556007385253906, - "step": 405 - }, - { - "epoch": 0.15, - "learning_rate": 1.9022213898145176e-05, - "logits/chosen": -2.7543628215789795, - "logits/rejected": -7.4471235275268555, - "logps/chosen": -219.87088012695312, - "logps/rejected": -63.85586166381836, - "loss": 0.0213, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8081222772598267, - "rewards/margins": 2.072261333465576, - "rewards/rejected": -1.264139175415039, - "step": 406 - }, - { - "epoch": 0.15, - "learning_rate": 1.901706560959407e-05, - "logits/chosen": -2.7871274948120117, - "logits/rejected": -5.044607162475586, - "logps/chosen": -409.217529296875, - "logps/rejected": -537.2252197265625, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.979095458984375, - "rewards/margins": 19.591604232788086, - "rewards/rejected": -20.57069969177246, - "step": 407 - }, - { - "epoch": 0.15, - "learning_rate": 1.901190450353238e-05, - "logits/chosen": -5.9041948318481445, - "logits/rejected": -4.654779434204102, - "logps/chosen": -266.25592041015625, - "logps/rejected": -522.0159912109375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4195877313613892, - "rewards/margins": 6.155821323394775, - "rewards/rejected": -7.575408935546875, - "step": 408 - }, - { - "epoch": 0.15, - "learning_rate": 1.900673058729649e-05, - "logits/chosen": -5.101020336151123, - "logits/rejected": -1.8543164730072021, - "logps/chosen": -333.929443359375, - "logps/rejected": -801.4805908203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009448242373764515, - "rewards/margins": 9.167444229125977, - "rewards/rejected": -9.176892280578613, - "step": 409 - }, - { - "epoch": 0.16, - "learning_rate": 1.900154386824096e-05, - "logits/chosen": -6.672011375427246, - "logits/rejected": -3.396728515625, - "logps/chosen": -323.73651123046875, - "logps/rejected": -1128.746337890625, - "loss": 0.0067, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.065673828125, - "rewards/margins": 19.276525497436523, - "rewards/rejected": -21.342199325561523, - "step": 410 - }, - { - "epoch": 0.16, - "learning_rate": 1.8996344353738587e-05, - "logits/chosen": -4.387092113494873, - "logits/rejected": -4.875228404998779, - "logps/chosen": -249.63229370117188, - "logps/rejected": -305.28936767578125, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1456269025802612, - "rewards/margins": 9.117362976074219, - "rewards/rejected": -7.971736431121826, - "step": 411 - }, - { - "epoch": 0.16, - "learning_rate": 1.8991132051180332e-05, - "logits/chosen": -4.108353614807129, - "logits/rejected": -4.745050430297852, - "logps/chosen": -222.00929260253906, - "logps/rejected": -503.574462890625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1022613048553467, - "rewards/margins": 16.43148422241211, - "rewards/rejected": -14.3292236328125, - "step": 412 - }, - { - "epoch": 0.16, - "learning_rate": 1.8985906967975337e-05, - "logits/chosen": -7.425245761871338, - "logits/rejected": -3.897841453552246, - "logps/chosen": -380.1465759277344, - "logps/rejected": -1598.378173828125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5311248898506165, - "rewards/margins": 34.07842254638672, - "rewards/rejected": -34.60954666137695, - "step": 413 - }, - { - "epoch": 0.16, - "learning_rate": 1.8980669111550917e-05, - "logits/chosen": -1.8048049211502075, - "logits/rejected": -1.457174301147461, - "logps/chosen": -403.2032470703125, - "logps/rejected": -920.0721435546875, - "loss": 0.0023, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7361969351768494, - "rewards/margins": 26.147323608398438, - "rewards/rejected": -26.883520126342773, - "step": 414 - }, - { - "epoch": 0.16, - "learning_rate": 1.8975418489352534e-05, - "logits/chosen": -2.276806592941284, - "logits/rejected": -3.963989496231079, - "logps/chosen": -352.58038330078125, - "logps/rejected": -297.01263427734375, - "loss": 0.0059, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.36880189180374146, - "rewards/margins": 14.69040584564209, - "rewards/rejected": -14.321603775024414, - "step": 415 - }, - { - "epoch": 0.16, - "learning_rate": 1.8970155108843812e-05, - "logits/chosen": -1.6815211772918701, - "logits/rejected": -6.576498031616211, - "logps/chosen": -208.42041015625, - "logps/rejected": -125.70685577392578, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8506194949150085, - "rewards/margins": 7.152457237243652, - "rewards/rejected": -8.003076553344727, - "step": 416 - }, - { - "epoch": 0.16, - "learning_rate": 1.8964878977506496e-05, - "logits/chosen": -0.5271928906440735, - "logits/rejected": -3.5023090839385986, - "logps/chosen": -252.17576599121094, - "logps/rejected": -300.636962890625, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5109726190567017, - "rewards/margins": 7.7760515213012695, - "rewards/rejected": -7.265079021453857, - "step": 417 - }, - { - "epoch": 0.16, - "learning_rate": 1.8959590102840465e-05, - "logits/chosen": -4.276127815246582, - "logits/rejected": -2.9530324935913086, - "logps/chosen": -455.3387451171875, - "logps/rejected": -885.0732421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.888995349407196, - "rewards/margins": 15.208684921264648, - "rewards/rejected": -14.319689750671387, - "step": 418 - }, - { - "epoch": 0.16, - "learning_rate": 1.8954288492363708e-05, - "logits/chosen": -1.7489677667617798, - "logits/rejected": -7.219113826751709, - "logps/chosen": -216.8948974609375, - "logps/rejected": -191.17828369140625, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.44423219561576843, - "rewards/margins": 7.342970848083496, - "rewards/rejected": -6.898738861083984, - "step": 419 - }, - { - "epoch": 0.16, - "learning_rate": 1.894897415361231e-05, - "logits/chosen": -0.4682503044605255, - "logits/rejected": -6.058073043823242, - "logps/chosen": -304.150634765625, - "logps/rejected": -139.3246307373047, - "loss": 0.023, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1083557605743408, - "rewards/margins": 4.643180847167969, - "rewards/rejected": -3.534825086593628, - "step": 420 - }, - { - "epoch": 0.16, - "learning_rate": 1.8943647094140473e-05, - "logits/chosen": -8.515443801879883, - "logits/rejected": -1.1548266410827637, - "logps/chosen": -140.69969177246094, - "logps/rejected": -2358.606689453125, - "loss": 0.0047, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.238057017326355, - "rewards/margins": 25.019136428833008, - "rewards/rejected": -23.78108024597168, - "step": 421 - }, - { - "epoch": 0.16, - "learning_rate": 1.8938307321520453e-05, - "logits/chosen": -3.671024799346924, - "logits/rejected": -1.7941564321517944, - "logps/chosen": -311.15118408203125, - "logps/rejected": -791.539794921875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.040032982826233, - "rewards/margins": 19.175580978393555, - "rewards/rejected": -18.135547637939453, - "step": 422 - }, - { - "epoch": 0.16, - "learning_rate": 1.893295484334259e-05, - "logits/chosen": -3.9970366954803467, - "logits/rejected": -1.7180187702178955, - "logps/chosen": -400.042236328125, - "logps/rejected": -973.286865234375, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.53509521484375, - "rewards/margins": 23.782129287719727, - "rewards/rejected": -25.317224502563477, - "step": 423 - }, - { - "epoch": 0.16, - "learning_rate": 1.8927589667215294e-05, - "logits/chosen": -7.349233150482178, - "logits/rejected": -1.3405590057373047, - "logps/chosen": -497.1449279785156, - "logps/rejected": -2619.29931640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.25372621417045593, - "rewards/margins": 27.309099197387695, - "rewards/rejected": -27.05537223815918, - "step": 424 - }, - { - "epoch": 0.16, - "learning_rate": 1.8922211800765006e-05, - "logits/chosen": -0.988781750202179, - "logits/rejected": -6.106762886047363, - "logps/chosen": -173.17788696289062, - "logps/rejected": -198.62889099121094, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5048370361328125, - "rewards/margins": 11.232434272766113, - "rewards/rejected": -10.7275972366333, - "step": 425 - }, - { - "epoch": 0.16, - "learning_rate": 1.891682125163622e-05, - "logits/chosen": -1.7614474296569824, - "logits/rejected": -5.971066951751709, - "logps/chosen": -320.20208740234375, - "logps/rejected": -184.63241577148438, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0844147205352783, - "rewards/margins": 8.736037254333496, - "rewards/rejected": -6.651622772216797, - "step": 426 - }, - { - "epoch": 0.16, - "learning_rate": 1.8911418027491453e-05, - "logits/chosen": -1.6193233728408813, - "logits/rejected": -6.505433559417725, - "logps/chosen": -296.297119140625, - "logps/rejected": -199.94430541992188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.544696092605591, - "rewards/margins": 9.450858116149902, - "rewards/rejected": -6.906161785125732, - "step": 427 - }, - { - "epoch": 0.16, - "learning_rate": 1.8906002136011235e-05, - "logits/chosen": -2.6609303951263428, - "logits/rejected": -2.5485079288482666, - "logps/chosen": -256.9081115722656, - "logps/rejected": -484.3341064453125, - "loss": 0.0049, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5725616812705994, - "rewards/margins": 17.76239776611328, - "rewards/rejected": -18.334959030151367, - "step": 428 - }, - { - "epoch": 0.16, - "learning_rate": 1.8900573584894112e-05, - "logits/chosen": -2.714718818664551, - "logits/rejected": -3.365203857421875, - "logps/chosen": -527.8983154296875, - "logps/rejected": -702.9873046875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5390869379043579, - "rewards/margins": 11.907855987548828, - "rewards/rejected": -11.368768692016602, - "step": 429 - }, - { - "epoch": 0.16, - "learning_rate": 1.8895132381856617e-05, - "logits/chosen": -2.1214723587036133, - "logits/rejected": -3.454636812210083, - "logps/chosen": -305.7988586425781, - "logps/rejected": -533.6102294921875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12085876613855362, - "rewards/margins": 24.605512619018555, - "rewards/rejected": -24.48465347290039, - "step": 430 - }, - { - "epoch": 0.16, - "learning_rate": 1.888967853463327e-05, - "logits/chosen": -1.524285078048706, - "logits/rejected": -5.565269947052002, - "logps/chosen": -386.86865234375, - "logps/rejected": -230.86444091796875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7483459711074829, - "rewards/margins": 9.525350570678711, - "rewards/rejected": -10.273696899414062, - "step": 431 - }, - { - "epoch": 0.16, - "learning_rate": 1.8884212050976568e-05, - "logits/chosen": -2.097592353820801, - "logits/rejected": -2.4979641437530518, - "logps/chosen": -298.50140380859375, - "logps/rejected": -429.62420654296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.144866943359375, - "rewards/margins": 10.601788520812988, - "rewards/rejected": -11.746655464172363, - "step": 432 - }, - { - "epoch": 0.16, - "learning_rate": 1.8878732938656962e-05, - "logits/chosen": -1.8963249921798706, - "logits/rejected": -1.663374423980713, - "logps/chosen": -285.1385192871094, - "logps/rejected": -505.66448974609375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18466491997241974, - "rewards/margins": 14.68957233428955, - "rewards/rejected": -14.874237060546875, - "step": 433 - }, - { - "epoch": 0.16, - "learning_rate": 1.8873241205462864e-05, - "logits/chosen": -7.3338165283203125, - "logits/rejected": -0.9352037310600281, - "logps/chosen": -290.92266845703125, - "logps/rejected": -1605.76513671875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5558380484580994, - "rewards/margins": 17.638845443725586, - "rewards/rejected": -17.0830078125, - "step": 434 - }, - { - "epoch": 0.16, - "learning_rate": 1.886773685920062e-05, - "logits/chosen": -7.110229015350342, - "logits/rejected": -2.0725715160369873, - "logps/chosen": -322.15234375, - "logps/rejected": -1502.356201171875, - "loss": 0.0094, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3340301513671875, - "rewards/margins": 26.901540756225586, - "rewards/rejected": -27.235570907592773, - "step": 435 - }, - { - "epoch": 0.16, - "learning_rate": 1.8862219907694505e-05, - "logits/chosen": -0.7096309661865234, - "logits/rejected": -6.688462734222412, - "logps/chosen": -378.899169921875, - "logps/rejected": -151.34130859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10823364555835724, - "rewards/margins": 6.788919448852539, - "rewards/rejected": -6.897152900695801, - "step": 436 - }, - { - "epoch": 0.17, - "learning_rate": 1.885669035878672e-05, - "logits/chosen": -2.4732909202575684, - "logits/rejected": -4.0058979988098145, - "logps/chosen": -179.8793487548828, - "logps/rejected": -246.85415649414062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1344345808029175, - "rewards/margins": 9.753178596496582, - "rewards/rejected": -10.887613296508789, - "step": 437 - }, - { - "epoch": 0.17, - "learning_rate": 1.8851148220337357e-05, - "logits/chosen": -1.2151833772659302, - "logits/rejected": -7.541356086730957, - "logps/chosen": -765.6434326171875, - "logps/rejected": -330.2477111816406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3440430164337158, - "rewards/margins": 14.785351753234863, - "rewards/rejected": -13.441308975219727, - "step": 438 - }, - { - "epoch": 0.17, - "learning_rate": 1.8845593500224416e-05, - "logits/chosen": -7.096003532409668, - "logits/rejected": -4.138184547424316, - "logps/chosen": -504.13323974609375, - "logps/rejected": -1512.79345703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2062469720840454, - "rewards/margins": 32.98180389404297, - "rewards/rejected": -34.18804931640625, - "step": 439 - }, - { - "epoch": 0.17, - "learning_rate": 1.8840026206343786e-05, - "logits/chosen": -3.0418648719787598, - "logits/rejected": -2.1669418811798096, - "logps/chosen": -241.96511840820312, - "logps/rejected": -561.678955078125, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4748200178146362, - "rewards/margins": 16.463321685791016, - "rewards/rejected": -14.98850154876709, - "step": 440 - }, - { - "epoch": 0.17, - "learning_rate": 1.8834446346609216e-05, - "logits/chosen": -3.3075311183929443, - "logits/rejected": -3.2812392711639404, - "logps/chosen": -452.42584228515625, - "logps/rejected": -637.2973022460938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.260473608970642, - "rewards/margins": 14.497193336486816, - "rewards/rejected": -15.75766658782959, - "step": 441 - }, - { - "epoch": 0.17, - "learning_rate": 1.882885392895232e-05, - "logits/chosen": -1.342651128768921, - "logits/rejected": -1.4768052101135254, - "logps/chosen": -173.84494018554688, - "logps/rejected": -503.17803955078125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8489273190498352, - "rewards/margins": 20.426246643066406, - "rewards/rejected": -19.577320098876953, - "step": 442 - }, - { - "epoch": 0.17, - "learning_rate": 1.8823248961322572e-05, - "logits/chosen": -0.8917423486709595, - "logits/rejected": -5.044703960418701, - "logps/chosen": -286.39129638671875, - "logps/rejected": -243.3427276611328, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.44542238116264343, - "rewards/margins": 10.292363166809082, - "rewards/rejected": -9.846940994262695, - "step": 443 - }, - { - "epoch": 0.17, - "learning_rate": 1.8817631451687277e-05, - "logits/chosen": -3.2632956504821777, - "logits/rejected": -4.126514911651611, - "logps/chosen": -296.8341979980469, - "logps/rejected": -562.301513671875, - "loss": 0.0258, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7867034673690796, - "rewards/margins": 16.35341453552246, - "rewards/rejected": -14.56671142578125, - "step": 444 - }, - { - "epoch": 0.17, - "learning_rate": 1.881200140803157e-05, - "logits/chosen": -3.075791358947754, - "logits/rejected": -0.7688875198364258, - "logps/chosen": -518.9710083007812, - "logps/rejected": -952.6240234375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.419335961341858, - "rewards/margins": 13.835572242736816, - "rewards/rejected": -12.41623592376709, - "step": 445 - }, - { - "epoch": 0.17, - "learning_rate": 1.8806358838358403e-05, - "logits/chosen": -2.0741941928863525, - "logits/rejected": -1.6776167154312134, - "logps/chosen": -371.6049499511719, - "logps/rejected": -750.2793579101562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.29491883516311646, - "rewards/margins": 22.41227149963379, - "rewards/rejected": -22.707189559936523, - "step": 446 - }, - { - "epoch": 0.17, - "learning_rate": 1.8800703750688536e-05, - "logits/chosen": -6.718072891235352, - "logits/rejected": -2.6455187797546387, - "logps/chosen": -250.63348388671875, - "logps/rejected": -850.3908081054688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.296520948410034, - "rewards/margins": 19.754161834716797, - "rewards/rejected": -17.4576416015625, - "step": 447 - }, - { - "epoch": 0.17, - "learning_rate": 1.8795036153060518e-05, - "logits/chosen": -5.5424604415893555, - "logits/rejected": -1.2170997858047485, - "logps/chosen": -221.44821166992188, - "logps/rejected": -847.7346801757812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6617294549942017, - "rewards/margins": 23.701948165893555, - "rewards/rejected": -24.363677978515625, - "step": 448 - }, - { - "epoch": 0.17, - "learning_rate": 1.878935605353068e-05, - "logits/chosen": -0.6281782984733582, - "logits/rejected": -3.51938533782959, - "logps/chosen": -337.9883117675781, - "logps/rejected": -592.503173828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.46624451875686646, - "rewards/margins": 31.45853614807129, - "rewards/rejected": -31.924779891967773, - "step": 449 - }, - { - "epoch": 0.17, - "learning_rate": 1.8783663460173135e-05, - "logits/chosen": -3.215515375137329, - "logits/rejected": -6.171417236328125, - "logps/chosen": -309.47979736328125, - "logps/rejected": -225.05270385742188, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.331512451171875, - "rewards/margins": 12.338467597961426, - "rewards/rejected": -12.00695514678955, - "step": 450 - }, - { - "epoch": 0.17, - "learning_rate": 1.8777958381079737e-05, - "logits/chosen": -0.40461957454681396, - "logits/rejected": -0.8598347306251526, - "logps/chosen": -306.0897216796875, - "logps/rejected": -770.5931396484375, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0736083984375, - "rewards/margins": 31.323150634765625, - "rewards/rejected": -31.396759033203125, - "step": 451 - }, - { - "epoch": 0.17, - "learning_rate": 1.877224082436011e-05, - "logits/chosen": -5.8007707595825195, - "logits/rejected": -4.79465913772583, - "logps/chosen": -1224.99169921875, - "logps/rejected": -288.1068115234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3715576231479645, - "rewards/margins": 14.323934555053711, - "rewards/rejected": -13.952377319335938, - "step": 452 - }, - { - "epoch": 0.17, - "learning_rate": 1.8766510798141597e-05, - "logits/chosen": -1.5908445119857788, - "logits/rejected": -5.5835041999816895, - "logps/chosen": -308.9297180175781, - "logps/rejected": -280.1705627441406, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1148041486740112, - "rewards/margins": 10.3467378616333, - "rewards/rejected": -11.461542129516602, - "step": 453 - }, - { - "epoch": 0.17, - "learning_rate": 1.8760768310569273e-05, - "logits/chosen": -1.410481333732605, - "logits/rejected": -2.0463008880615234, - "logps/chosen": -468.7310791015625, - "logps/rejected": -504.72808837890625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2670532166957855, - "rewards/margins": 8.746651649475098, - "rewards/rejected": -9.013705253601074, - "step": 454 - }, - { - "epoch": 0.17, - "learning_rate": 1.8755013369805926e-05, - "logits/chosen": -2.248603105545044, - "logits/rejected": -2.107697010040283, - "logps/chosen": -203.88279724121094, - "logps/rejected": -293.1894836425781, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3666580319404602, - "rewards/margins": 4.763914585113525, - "rewards/rejected": -5.13057279586792, - "step": 455 - }, - { - "epoch": 0.17, - "learning_rate": 1.8749245984032053e-05, - "logits/chosen": 0.28424641489982605, - "logits/rejected": -3.8005645275115967, - "logps/chosen": -366.6492919921875, - "logps/rejected": -322.3013000488281, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.135467529296875, - "rewards/margins": 11.514655113220215, - "rewards/rejected": -10.37918758392334, - "step": 456 - }, - { - "epoch": 0.17, - "learning_rate": 1.8743466161445823e-05, - "logits/chosen": -7.962973117828369, - "logits/rejected": -2.6180105209350586, - "logps/chosen": -398.9927673339844, - "logps/rejected": -1785.1678466796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.505185127258301, - "rewards/margins": 11.7748441696167, - "rewards/rejected": -16.280029296875, - "step": 457 - }, - { - "epoch": 0.17, - "learning_rate": 1.87376739102631e-05, - "logits/chosen": -3.9884815216064453, - "logits/rejected": -2.153151750564575, - "logps/chosen": -307.6141662597656, - "logps/rejected": -503.521728515625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4134063720703125, - "rewards/margins": 7.408261299133301, - "rewards/rejected": -8.821667671203613, - "step": 458 - }, - { - "epoch": 0.17, - "learning_rate": 1.8731869238717413e-05, - "logits/chosen": -3.9104464054107666, - "logits/rejected": -7.424886226654053, - "logps/chosen": -979.438232421875, - "logps/rejected": -568.4903564453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.6639404296875, - "rewards/margins": 24.672300338745117, - "rewards/rejected": -29.336240768432617, - "step": 459 - }, - { - "epoch": 0.17, - "learning_rate": 1.8726052155059937e-05, - "logits/chosen": -5.5335588455200195, - "logits/rejected": -2.8706483840942383, - "logps/chosen": -474.91094970703125, - "logps/rejected": -1743.385498046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6674317121505737, - "rewards/margins": 38.84123611450195, - "rewards/rejected": -37.173805236816406, - "step": 460 - }, - { - "epoch": 0.17, - "learning_rate": 1.8720222667559498e-05, - "logits/chosen": -3.0038323402404785, - "logits/rejected": -8.079977989196777, - "logps/chosen": -559.8455810546875, - "logps/rejected": -167.636962890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.272930860519409, - "rewards/margins": 6.979534149169922, - "rewards/rejected": -9.25246524810791, - "step": 461 - }, - { - "epoch": 0.17, - "learning_rate": 1.8714380784502553e-05, - "logits/chosen": -1.7857357263565063, - "logits/rejected": -4.990614891052246, - "logps/chosen": -327.45147705078125, - "logps/rejected": -333.67108154296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7401580810546875, - "rewards/margins": 10.6283540725708, - "rewards/rejected": -9.888195991516113, - "step": 462 - }, - { - "epoch": 0.18, - "learning_rate": 1.870852651419318e-05, - "logits/chosen": -6.3915486335754395, - "logits/rejected": -2.1159636974334717, - "logps/chosen": -241.7313995361328, - "logps/rejected": -933.0364379882812, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06273956596851349, - "rewards/margins": 31.554105758666992, - "rewards/rejected": -31.616846084594727, - "step": 463 - }, - { - "epoch": 0.18, - "learning_rate": 1.870265986495306e-05, - "logits/chosen": -2.446856737136841, - "logits/rejected": -1.2173336744308472, - "logps/chosen": -320.588623046875, - "logps/rejected": -751.8148803710938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.12898559868335724, - "rewards/margins": 27.491113662719727, - "rewards/rejected": -27.620100021362305, - "step": 464 - }, - { - "epoch": 0.18, - "learning_rate": 1.869678084512147e-05, - "logits/chosen": -4.201777935028076, - "logits/rejected": -3.582885980606079, - "logps/chosen": -280.6660461425781, - "logps/rejected": -1020.893310546875, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5986480712890625, - "rewards/margins": 46.58845138549805, - "rewards/rejected": -45.989803314208984, - "step": 465 - }, - { - "epoch": 0.18, - "learning_rate": 1.8690889463055285e-05, - "logits/chosen": -4.074512481689453, - "logits/rejected": -3.271113634109497, - "logps/chosen": -434.25799560546875, - "logps/rejected": -738.258056640625, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21128235757350922, - "rewards/margins": 11.221115112304688, - "rewards/rejected": -11.432397842407227, - "step": 466 - }, - { - "epoch": 0.18, - "learning_rate": 1.8684985727128936e-05, - "logits/chosen": -2.8499715328216553, - "logits/rejected": -3.7302215099334717, - "logps/chosen": -364.90179443359375, - "logps/rejected": -400.52734375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.886767566204071, - "rewards/margins": 9.870080947875977, - "rewards/rejected": -8.98331356048584, - "step": 467 - }, - { - "epoch": 0.18, - "learning_rate": 1.867906964573442e-05, - "logits/chosen": -0.6313357949256897, - "logits/rejected": -6.48175048828125, - "logps/chosen": -311.61773681640625, - "logps/rejected": -175.2715606689453, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7823638916015625, - "rewards/margins": 7.984996318817139, - "rewards/rejected": -7.202632427215576, - "step": 468 - }, - { - "epoch": 0.18, - "learning_rate": 1.8673141227281282e-05, - "logits/chosen": -4.191955089569092, - "logits/rejected": -1.5720137357711792, - "logps/chosen": -400.0830383300781, - "logps/rejected": -981.7062377929688, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.257742315530777, - "rewards/margins": 24.58388328552246, - "rewards/rejected": -24.326141357421875, - "step": 469 - }, - { - "epoch": 0.18, - "learning_rate": 1.866720048019661e-05, - "logits/chosen": -7.527488708496094, - "logits/rejected": -2.196230173110962, - "logps/chosen": -307.060791015625, - "logps/rejected": -1170.194091796875, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1723694801330566, - "rewards/margins": 10.511846542358398, - "rewards/rejected": -12.684216499328613, - "step": 470 - }, - { - "epoch": 0.18, - "learning_rate": 1.866124741292502e-05, - "logits/chosen": -1.8032370805740356, - "logits/rejected": -2.803600311279297, - "logps/chosen": -278.12957763671875, - "logps/rejected": -488.7651062011719, - "loss": 0.04, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7995270490646362, - "rewards/margins": 17.328571319580078, - "rewards/rejected": -15.529044151306152, - "step": 471 - }, - { - "epoch": 0.18, - "learning_rate": 1.8655282033928618e-05, - "logits/chosen": -6.380809307098389, - "logits/rejected": -2.0284945964813232, - "logps/chosen": -606.6460571289062, - "logps/rejected": -2219.76416015625, - "loss": 0.0036, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5096679925918579, - "rewards/margins": 32.344703674316406, - "rewards/rejected": -31.83503532409668, - "step": 472 - }, - { - "epoch": 0.18, - "learning_rate": 1.864930435168703e-05, - "logits/chosen": -1.3737916946411133, - "logits/rejected": -5.625222206115723, - "logps/chosen": -249.24095153808594, - "logps/rejected": -222.17819213867188, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.759027123451233, - "rewards/margins": 7.9863715171813965, - "rewards/rejected": -9.74539852142334, - "step": 473 - }, - { - "epoch": 0.18, - "learning_rate": 1.8643314374697377e-05, - "logits/chosen": -5.28463077545166, - "logits/rejected": -3.2413501739501953, - "logps/chosen": -480.5890808105469, - "logps/rejected": -1084.0283203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6858795285224915, - "rewards/margins": 21.349538803100586, - "rewards/rejected": -20.663660049438477, - "step": 474 - }, - { - "epoch": 0.18, - "learning_rate": 1.8637312111474237e-05, - "logits/chosen": -3.746169328689575, - "logits/rejected": -6.870762825012207, - "logps/chosen": -242.78057861328125, - "logps/rejected": -352.48907470703125, - "loss": 0.0891, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.724725365638733, - "rewards/margins": 16.47006607055664, - "rewards/rejected": -14.745340347290039, - "step": 475 - }, - { - "epoch": 0.18, - "learning_rate": 1.863129757054966e-05, - "logits/chosen": -2.2041823863983154, - "logits/rejected": -1.2191299200057983, - "logps/chosen": -431.5123291015625, - "logps/rejected": -771.2766723632812, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7585205435752869, - "rewards/margins": 17.447620391845703, - "rewards/rejected": -18.206140518188477, - "step": 476 - }, - { - "epoch": 0.18, - "learning_rate": 1.8625270760473164e-05, - "logits/chosen": -1.5070966482162476, - "logits/rejected": -1.3566194772720337, - "logps/chosen": -517.6356201171875, - "logps/rejected": -1088.2353515625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.270306348800659, - "rewards/margins": 30.995849609375, - "rewards/rejected": -34.26615524291992, - "step": 477 - }, - { - "epoch": 0.18, - "learning_rate": 1.8619231689811673e-05, - "logits/chosen": -2.17692232131958, - "logits/rejected": -1.5240905284881592, - "logps/chosen": -384.6960754394531, - "logps/rejected": -739.173095703125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8079254627227783, - "rewards/margins": 20.096033096313477, - "rewards/rejected": -22.903959274291992, - "step": 478 - }, - { - "epoch": 0.18, - "learning_rate": 1.8613180367149577e-05, - "logits/chosen": -0.5022593140602112, - "logits/rejected": -6.124171733856201, - "logps/chosen": -399.64532470703125, - "logps/rejected": -119.0405044555664, - "loss": 0.0075, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17191772162914276, - "rewards/margins": 5.94962215423584, - "rewards/rejected": -6.121540069580078, - "step": 479 - }, - { - "epoch": 0.18, - "learning_rate": 1.8607116801088658e-05, - "logits/chosen": -1.3017401695251465, - "logits/rejected": -2.1475467681884766, - "logps/chosen": -212.85659790039062, - "logps/rejected": -239.4335174560547, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8988372683525085, - "rewards/margins": 6.555700778961182, - "rewards/rejected": -7.454537868499756, - "step": 480 - }, - { - "epoch": 0.18, - "learning_rate": 1.860104100024811e-05, - "logits/chosen": -1.5702108144760132, - "logits/rejected": -1.5702108144760132, - "logps/chosen": 0.0, - "logps/rejected": 0.0, - "loss": 0.0867, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 481 - }, - { - "epoch": 0.18, - "learning_rate": 1.8594952973264512e-05, - "logits/chosen": -5.505049228668213, - "logits/rejected": -1.7837852239608765, - "logps/chosen": -427.6378173828125, - "logps/rejected": -979.10986328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.45597535371780396, - "rewards/margins": 11.286676406860352, - "rewards/rejected": -10.830700874328613, - "step": 482 - }, - { - "epoch": 0.18, - "learning_rate": 1.8588852728791833e-05, - "logits/chosen": -6.813527584075928, - "logits/rejected": -1.0365774631500244, - "logps/chosen": -459.361328125, - "logps/rejected": -1987.5306396484375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.61102294921875, - "rewards/margins": 39.852394104003906, - "rewards/rejected": -36.241371154785156, - "step": 483 - }, - { - "epoch": 0.18, - "learning_rate": 1.85827402755014e-05, - "logits/chosen": -4.647629737854004, - "logits/rejected": -0.625507652759552, - "logps/chosen": -238.84814453125, - "logps/rejected": -863.0806884765625, - "loss": 0.005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4146347045898438, - "rewards/margins": 19.449277877807617, - "rewards/rejected": -18.034643173217773, - "step": 484 - }, - { - "epoch": 0.18, - "learning_rate": 1.8576615622081893e-05, - "logits/chosen": -1.3441673517227173, - "logits/rejected": -1.3441673517227173, - "logps/chosen": -742.8138427734375, - "logps/rejected": -742.8138427734375, - "loss": 0.0899, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.92913818359375, - "rewards/margins": 6.198883056640625e-06, - "rewards/rejected": -6.929144382476807, - "step": 485 - }, - { - "epoch": 0.18, - "learning_rate": 1.8570478777239348e-05, - "logits/chosen": -5.392859935760498, - "logits/rejected": -1.8689581155776978, - "logps/chosen": -354.0369873046875, - "logps/rejected": -1126.500244140625, - "loss": 0.0112, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2628601789474487, - "rewards/margins": 11.71343994140625, - "rewards/rejected": -10.450579643249512, - "step": 486 - }, - { - "epoch": 0.18, - "learning_rate": 1.856432974969711e-05, - "logits/chosen": -0.2526593804359436, - "logits/rejected": -7.667921543121338, - "logps/chosen": -480.8417053222656, - "logps/rejected": -83.43343353271484, - "loss": 0.0029, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9028472900390625, - "rewards/margins": 5.180295944213867, - "rewards/rejected": -4.277448654174805, - "step": 487 - }, - { - "epoch": 0.18, - "learning_rate": 1.8558168548195858e-05, - "logits/chosen": -5.118981838226318, - "logits/rejected": -2.546862840652466, - "logps/chosen": -246.5930633544922, - "logps/rejected": -901.4186401367188, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.320369005203247, - "rewards/margins": 21.37124252319336, - "rewards/rejected": -19.050872802734375, - "step": 488 - }, - { - "epoch": 0.18, - "learning_rate": 1.8551995181493574e-05, - "logits/chosen": -5.284242630004883, - "logits/rejected": -4.060103416442871, - "logps/chosen": -112.75390625, - "logps/rejected": -274.6876220703125, - "loss": 0.0075, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1608787626028061, - "rewards/margins": 2.8374197483062744, - "rewards/rejected": -2.6765410900115967, - "step": 489 - }, - { - "epoch": 0.19, - "learning_rate": 1.8545809658365522e-05, - "logits/chosen": -6.7144317626953125, - "logits/rejected": -2.394881010055542, - "logps/chosen": -334.43988037109375, - "logps/rejected": -996.1290893554688, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.224957227706909, - "rewards/margins": 15.536731719970703, - "rewards/rejected": -13.311774253845215, - "step": 490 - }, - { - "epoch": 0.19, - "learning_rate": 1.853961198760426e-05, - "logits/chosen": -2.611962080001831, - "logits/rejected": -6.447724342346191, - "logps/chosen": -207.95050048828125, - "logps/rejected": -180.72714233398438, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01790618896484375, - "rewards/margins": 11.104878425598145, - "rewards/rejected": -11.122784614562988, - "step": 491 - }, - { - "epoch": 0.19, - "learning_rate": 1.8533402178019596e-05, - "logits/chosen": -6.636769771575928, - "logits/rejected": -6.150659084320068, - "logps/chosen": -309.5867919921875, - "logps/rejected": -728.4212646484375, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0930267572402954, - "rewards/margins": 25.20895767211914, - "rewards/rejected": -24.115930557250977, - "step": 492 - }, - { - "epoch": 0.19, - "learning_rate": 1.8527180238438612e-05, - "logits/chosen": -4.0965986251831055, - "logits/rejected": -4.649560451507568, - "logps/chosen": -208.7093963623047, - "logps/rejected": -559.2412109375, - "loss": 0.0148, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6450912356376648, - "rewards/margins": 23.427165985107422, - "rewards/rejected": -22.782073974609375, - "step": 493 - }, - { - "epoch": 0.19, - "learning_rate": 1.8520946177705622e-05, - "logits/chosen": -1.9076694250106812, - "logits/rejected": -3.6880946159362793, - "logps/chosen": -332.3180236816406, - "logps/rejected": -435.79486083984375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3841949701309204, - "rewards/margins": 10.781106948852539, - "rewards/rejected": -9.39691162109375, - "step": 494 - }, - { - "epoch": 0.19, - "learning_rate": 1.8514700004682172e-05, - "logits/chosen": -1.8818912506103516, - "logits/rejected": -5.3297858238220215, - "logps/chosen": -701.7162475585938, - "logps/rejected": -680.853759765625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.46619874238967896, - "rewards/margins": 27.386987686157227, - "rewards/rejected": -26.92078971862793, - "step": 495 - }, - { - "epoch": 0.19, - "learning_rate": 1.850844172824702e-05, - "logits/chosen": -2.828038215637207, - "logits/rejected": -3.9294145107269287, - "logps/chosen": -510.3059997558594, - "logps/rejected": -632.5616455078125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9790679812431335, - "rewards/margins": 7.364614963531494, - "rewards/rejected": -6.385547161102295, - "step": 496 - }, - { - "epoch": 0.19, - "learning_rate": 1.8502171357296144e-05, - "logits/chosen": -4.848553657531738, - "logits/rejected": -3.5294456481933594, - "logps/chosen": -287.4178161621094, - "logps/rejected": -643.908203125, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5367950201034546, - "rewards/margins": 11.903531074523926, - "rewards/rejected": -10.36673641204834, - "step": 497 - }, - { - "epoch": 0.19, - "learning_rate": 1.8495888900742694e-05, - "logits/chosen": -4.8279242515563965, - "logits/rejected": -1.1618704795837402, - "logps/chosen": -226.64913940429688, - "logps/rejected": -918.291748046875, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4937973022460938, - "rewards/margins": 25.971031188964844, - "rewards/rejected": -24.47723388671875, - "step": 498 - }, - { - "epoch": 0.19, - "learning_rate": 1.8489594367517015e-05, - "logits/chosen": -7.82919454574585, - "logits/rejected": -4.245132923126221, - "logps/chosen": -354.9734802246094, - "logps/rejected": -1716.0352783203125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2381622791290283, - "rewards/margins": 31.568294525146484, - "rewards/rejected": -32.80645751953125, - "step": 499 - }, - { - "epoch": 0.19, - "learning_rate": 1.8483287766566607e-05, - "logits/chosen": -1.5840016603469849, - "logits/rejected": -6.148604393005371, - "logps/chosen": -506.7834777832031, - "logps/rejected": -168.74908447265625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16085205972194672, - "rewards/margins": 9.503379821777344, - "rewards/rejected": -9.66423225402832, - "step": 500 - }, - { - "epoch": 0.19, - "learning_rate": 1.847696910685613e-05, - "logits/chosen": -1.8980400562286377, - "logits/rejected": -3.538337469100952, - "logps/chosen": -342.414794921875, - "logps/rejected": -485.90753173828125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.931347668170929, - "rewards/margins": 10.925732612609863, - "rewards/rejected": -9.994384765625, - "step": 501 - }, - { - "epoch": 0.19, - "learning_rate": 1.8470638397367397e-05, - "logits/chosen": -1.7292606830596924, - "logits/rejected": -4.866494178771973, - "logps/chosen": -234.36692810058594, - "logps/rejected": -371.8988342285156, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3145096004009247, - "rewards/margins": 15.697493553161621, - "rewards/rejected": -16.01200294494629, - "step": 502 - }, - { - "epoch": 0.19, - "learning_rate": 1.8464295647099323e-05, - "logits/chosen": -2.9339241981506348, - "logits/rejected": -3.719318151473999, - "logps/chosen": -368.8980712890625, - "logps/rejected": -705.6353759765625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04468384012579918, - "rewards/margins": 22.3487548828125, - "rewards/rejected": -22.3934383392334, - "step": 503 - }, - { - "epoch": 0.19, - "learning_rate": 1.845794086506796e-05, - "logits/chosen": -3.1205103397369385, - "logits/rejected": -1.289431095123291, - "logps/chosen": -617.827392578125, - "logps/rejected": -1595.466064453125, - "loss": 0.0756, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.23504638671875, - "rewards/margins": 44.703468322753906, - "rewards/rejected": -44.468421936035156, - "step": 504 - }, - { - "epoch": 0.19, - "learning_rate": 1.8451574060306452e-05, - "logits/chosen": -4.525430202484131, - "logits/rejected": -2.995392084121704, - "logps/chosen": -387.36614990234375, - "logps/rejected": -789.5986938476562, - "loss": 0.003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.883770763874054, - "rewards/margins": 9.830291748046875, - "rewards/rejected": -10.714062690734863, - "step": 505 - }, - { - "epoch": 0.19, - "learning_rate": 1.844519524186504e-05, - "logits/chosen": -2.9793100357055664, - "logits/rejected": -0.5216476321220398, - "logps/chosen": -551.1449584960938, - "logps/rejected": -1168.3404541015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.696807861328125, - "rewards/margins": 13.549921035766602, - "rewards/rejected": -10.853113174438477, - "step": 506 - }, - { - "epoch": 0.19, - "learning_rate": 1.8438804418811038e-05, - "logits/chosen": -2.2566349506378174, - "logits/rejected": -2.4353830814361572, - "logps/chosen": -213.69830322265625, - "logps/rejected": -385.1478271484375, - "loss": 0.0041, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4830383360385895, - "rewards/margins": 10.347949028015137, - "rewards/rejected": -9.864911079406738, - "step": 507 - }, - { - "epoch": 0.19, - "learning_rate": 1.8432401600228823e-05, - "logits/chosen": -5.523561000823975, - "logits/rejected": -3.7573940753936768, - "logps/chosen": -355.0748291015625, - "logps/rejected": -811.0955810546875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.196874976158142, - "rewards/margins": 12.409838676452637, - "rewards/rejected": -11.212964057922363, - "step": 508 - }, - { - "epoch": 0.19, - "learning_rate": 1.8425986795219828e-05, - "logits/chosen": -2.8474180698394775, - "logits/rejected": -0.3679957687854767, - "logps/chosen": -475.2208251953125, - "logps/rejected": -930.6103515625, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9682861566543579, - "rewards/margins": 15.200562477111816, - "rewards/rejected": -14.23227596282959, - "step": 509 - }, - { - "epoch": 0.19, - "learning_rate": 1.841956001290252e-05, - "logits/chosen": -8.457695960998535, - "logits/rejected": -2.292834758758545, - "logps/chosen": -224.37063598632812, - "logps/rejected": -1489.00439453125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.555249035358429, - "rewards/margins": 23.4593505859375, - "rewards/rejected": -22.904102325439453, - "step": 510 - }, - { - "epoch": 0.19, - "learning_rate": 1.8413121262412395e-05, - "logits/chosen": -3.0410327911376953, - "logits/rejected": -5.722532749176025, - "logps/chosen": -162.31085205078125, - "logps/rejected": -142.38421630859375, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9838241934776306, - "rewards/margins": 4.476309299468994, - "rewards/rejected": -3.4924850463867188, - "step": 511 - }, - { - "epoch": 0.19, - "learning_rate": 1.8406670552901958e-05, - "logits/chosen": -1.9408811330795288, - "logits/rejected": -1.7978962659835815, - "logps/chosen": -344.2870178222656, - "logps/rejected": -419.29510498046875, - "loss": 0.002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07100830227136612, - "rewards/margins": 4.203125, - "rewards/rejected": -4.274133205413818, - "step": 512 - }, - { - "epoch": 0.19, - "learning_rate": 1.8400207893540714e-05, - "logits/chosen": -0.24440762400627136, - "logits/rejected": -7.446560859680176, - "logps/chosen": -848.430419921875, - "logps/rejected": -185.73036193847656, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.835687279701233, - "rewards/margins": 6.187417507171631, - "rewards/rejected": -8.023104667663574, - "step": 513 - }, - { - "epoch": 0.19, - "learning_rate": 1.8393733293515158e-05, - "logits/chosen": -6.01030969619751, - "logits/rejected": -3.0736284255981445, - "logps/chosen": -556.3109130859375, - "logps/rejected": -2005.941162109375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3525756895542145, - "rewards/margins": 11.15877628326416, - "rewards/rejected": -10.806200981140137, - "step": 514 - }, - { - "epoch": 0.19, - "learning_rate": 1.8387246762028758e-05, - "logits/chosen": -3.971057176589966, - "logits/rejected": -2.2303366661071777, - "logps/chosen": -1377.864990234375, - "logps/rejected": -2086.871826171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4542359113693237, - "rewards/margins": 16.897693634033203, - "rewards/rejected": -18.3519287109375, - "step": 515 - }, - { - "epoch": 0.2, - "learning_rate": 1.838074830830193e-05, - "logits/chosen": -1.2144330739974976, - "logits/rejected": -3.5988993644714355, - "logps/chosen": -365.0831298828125, - "logps/rejected": -323.98211669921875, - "loss": 0.009, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8415253162384033, - "rewards/margins": 9.378125190734863, - "rewards/rejected": -7.536599636077881, - "step": 516 - }, - { - "epoch": 0.2, - "learning_rate": 1.837423794157206e-05, - "logits/chosen": -7.245015621185303, - "logits/rejected": -0.6130936145782471, - "logps/chosen": -224.67971801757812, - "logps/rejected": -2021.2000732421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3669312000274658, - "rewards/margins": 20.956762313842773, - "rewards/rejected": -22.323694229125977, - "step": 517 - }, - { - "epoch": 0.2, - "learning_rate": 1.836771567109344e-05, - "logits/chosen": -3.047506093978882, - "logits/rejected": -5.609082221984863, - "logps/chosen": -226.6710205078125, - "logps/rejected": -304.12493896484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7878875732421875, - "rewards/margins": 10.803529739379883, - "rewards/rejected": -11.59141731262207, - "step": 518 - }, - { - "epoch": 0.2, - "learning_rate": 1.836118150613732e-05, - "logits/chosen": -3.7543070316314697, - "logits/rejected": -1.4357898235321045, - "logps/chosen": -208.68002319335938, - "logps/rejected": -536.1229858398438, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.31215211749076843, - "rewards/margins": 16.28606605529785, - "rewards/rejected": -16.59821891784668, - "step": 519 - }, - { - "epoch": 0.2, - "learning_rate": 1.835463545599181e-05, - "logits/chosen": -7.3926825523376465, - "logits/rejected": -5.002445220947266, - "logps/chosen": -324.4959716796875, - "logps/rejected": -2099.5390625, - "loss": 0.0106, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1758087873458862, - "rewards/margins": 17.979812622070312, - "rewards/rejected": -16.804004669189453, - "step": 520 - }, - { - "epoch": 0.2, - "learning_rate": 1.8348077529961957e-05, - "logits/chosen": -1.2497190237045288, - "logits/rejected": -3.445591449737549, - "logps/chosen": -416.0125732421875, - "logps/rejected": -510.63531494140625, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.25584107637405396, - "rewards/margins": 21.74635887145996, - "rewards/rejected": -21.49051856994629, - "step": 521 - }, - { - "epoch": 0.2, - "learning_rate": 1.834150773736967e-05, - "logits/chosen": -1.3684824705123901, - "logits/rejected": -3.674882650375366, - "logps/chosen": -216.48898315429688, - "logps/rejected": -267.8467102050781, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1372894048690796, - "rewards/margins": 15.148710250854492, - "rewards/rejected": -14.011421203613281, - "step": 522 - }, - { - "epoch": 0.2, - "learning_rate": 1.8334926087553727e-05, - "logits/chosen": -7.340708255767822, - "logits/rejected": -2.8953452110290527, - "logps/chosen": -348.3976135253906, - "logps/rejected": -3604.280517578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.13144226372241974, - "rewards/margins": 27.62722396850586, - "rewards/rejected": -27.7586669921875, - "step": 523 - }, - { - "epoch": 0.2, - "learning_rate": 1.8328332589869756e-05, - "logits/chosen": -2.037400722503662, - "logits/rejected": -5.185373306274414, - "logps/chosen": -330.36199951171875, - "logps/rejected": -342.9932556152344, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02518920972943306, - "rewards/margins": 14.05530071258545, - "rewards/rejected": -14.080490112304688, - "step": 524 - }, - { - "epoch": 0.2, - "learning_rate": 1.832172725369024e-05, - "logits/chosen": -1.1205374002456665, - "logits/rejected": -4.782687187194824, - "logps/chosen": -246.422607421875, - "logps/rejected": -184.67697143554688, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.288378953933716, - "rewards/margins": 9.27126407623291, - "rewards/rejected": -11.559642791748047, - "step": 525 - }, - { - "epoch": 0.2, - "learning_rate": 1.8315110088404484e-05, - "logits/chosen": -1.3424766063690186, - "logits/rejected": -1.0271644592285156, - "logps/chosen": -544.874267578125, - "logps/rejected": -873.9482421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.793212890625, - "rewards/margins": 22.331823348999023, - "rewards/rejected": -26.125036239624023, - "step": 526 - }, - { - "epoch": 0.2, - "learning_rate": 1.8308481103418597e-05, - "logits/chosen": -0.25343167781829834, - "logits/rejected": -6.808557510375977, - "logps/chosen": -219.12094116210938, - "logps/rejected": -66.56681823730469, - "loss": 0.0036, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.05886535719037056, - "rewards/margins": 5.514759540557861, - "rewards/rejected": -5.4558939933776855, - "step": 527 - }, - { - "epoch": 0.2, - "learning_rate": 1.8301840308155507e-05, - "logits/chosen": -2.939558744430542, - "logits/rejected": -1.4721148014068604, - "logps/chosen": -590.0701904296875, - "logps/rejected": -1442.297119140625, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.598004162311554, - "rewards/margins": 31.223588943481445, - "rewards/rejected": -30.625585556030273, - "step": 528 - }, - { - "epoch": 0.2, - "learning_rate": 1.8295187712054922e-05, - "logits/chosen": -0.8132861256599426, - "logits/rejected": -6.210631370544434, - "logps/chosen": -294.7041015625, - "logps/rejected": -200.29046630859375, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8611419796943665, - "rewards/margins": 11.764322280883789, - "rewards/rejected": -10.903180122375488, - "step": 529 - }, - { - "epoch": 0.2, - "learning_rate": 1.8288523324573314e-05, - "logits/chosen": -6.003669738769531, - "logits/rejected": -3.871964454650879, - "logps/chosen": -562.4854736328125, - "logps/rejected": -1436.0438232421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.4898316860198975, - "rewards/margins": 23.731164932250977, - "rewards/rejected": -21.2413330078125, - "step": 530 - }, - { - "epoch": 0.2, - "learning_rate": 1.8281847155183944e-05, - "logits/chosen": -8.104728698730469, - "logits/rejected": -2.645951271057129, - "logps/chosen": -218.5018768310547, - "logps/rejected": -1222.37548828125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2626876831054688, - "rewards/margins": 15.846916198730469, - "rewards/rejected": -14.584228515625, - "step": 531 - }, - { - "epoch": 0.2, - "learning_rate": 1.8275159213376783e-05, - "logits/chosen": -5.602217197418213, - "logits/rejected": -1.4709194898605347, - "logps/chosen": -419.915283203125, - "logps/rejected": -1110.4442138671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9277862906455994, - "rewards/margins": 17.022262573242188, - "rewards/rejected": -16.0944766998291, - "step": 532 - }, - { - "epoch": 0.2, - "learning_rate": 1.826845950865857e-05, - "logits/chosen": -2.2074878215789795, - "logits/rejected": -1.751878261566162, - "logps/chosen": -333.3983154296875, - "logps/rejected": -612.3474731445312, - "loss": 0.0064, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.2109131813049316, - "rewards/margins": 12.153888702392578, - "rewards/rejected": -8.942975044250488, - "step": 533 - }, - { - "epoch": 0.2, - "learning_rate": 1.8261748050552745e-05, - "logits/chosen": 0.4237339198589325, - "logits/rejected": -4.963410377502441, - "logps/chosen": -296.1436767578125, - "logps/rejected": -151.16946411132812, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.05129394680261612, - "rewards/margins": 7.564389228820801, - "rewards/rejected": -7.513095378875732, - "step": 534 - }, - { - "epoch": 0.2, - "learning_rate": 1.8255024848599466e-05, - "logits/chosen": -0.4891347587108612, - "logits/rejected": -2.1287596225738525, - "logps/chosen": -478.0455627441406, - "logps/rejected": -572.2664184570312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3110748529434204, - "rewards/margins": 12.348954200744629, - "rewards/rejected": -11.03787899017334, - "step": 535 - }, - { - "epoch": 0.2, - "learning_rate": 1.8248289912355575e-05, - "logits/chosen": -0.7351173162460327, - "logits/rejected": -4.597818374633789, - "logps/chosen": -619.34765625, - "logps/rejected": -424.8450927734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.777697741985321, - "rewards/margins": 12.927774429321289, - "rewards/rejected": -12.150076866149902, - "step": 536 - }, - { - "epoch": 0.2, - "learning_rate": 1.82415432513946e-05, - "logits/chosen": -5.291231632232666, - "logits/rejected": -0.84991455078125, - "logps/chosen": -161.45468139648438, - "logps/rejected": -795.4114990234375, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9781402945518494, - "rewards/margins": 15.183486938476562, - "rewards/rejected": -14.205347061157227, - "step": 537 - }, - { - "epoch": 0.2, - "learning_rate": 1.823478487530673e-05, - "logits/chosen": -2.250722646713257, - "logits/rejected": -4.798746109008789, - "logps/chosen": -688.4336547851562, - "logps/rejected": -750.181884765625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.684735119342804, - "rewards/margins": 16.2925968170166, - "rewards/rejected": -15.607861518859863, - "step": 538 - }, - { - "epoch": 0.2, - "learning_rate": 1.8228014793698817e-05, - "logits/chosen": -0.9090595245361328, - "logits/rejected": -2.472172975540161, - "logps/chosen": -212.4492645263672, - "logps/rejected": -509.9908142089844, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3769470155239105, - "rewards/margins": 21.069015502929688, - "rewards/rejected": -20.692068099975586, - "step": 539 - }, - { - "epoch": 0.2, - "learning_rate": 1.8221233016194343e-05, - "logits/chosen": -4.533653259277344, - "logits/rejected": -2.102548837661743, - "logps/chosen": -400.68609619140625, - "logps/rejected": -1112.031982421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5408753156661987, - "rewards/margins": 19.633317947387695, - "rewards/rejected": -18.092443466186523, - "step": 540 - }, - { - "epoch": 0.2, - "learning_rate": 1.8214439552433417e-05, - "logits/chosen": -6.79493522644043, - "logits/rejected": -4.882000923156738, - "logps/chosen": -181.14895629882812, - "logps/rejected": -634.11328125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07769928127527237, - "rewards/margins": 16.827760696411133, - "rewards/rejected": -16.75006103515625, - "step": 541 - }, - { - "epoch": 0.2, - "learning_rate": 1.8207634412072765e-05, - "logits/chosen": -0.33013641834259033, - "logits/rejected": -5.228031635284424, - "logps/chosen": -450.4801940917969, - "logps/rejected": -258.6798095703125, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8264404535293579, - "rewards/margins": 8.396499633789062, - "rewards/rejected": -9.222940444946289, - "step": 542 - }, - { - "epoch": 0.21, - "learning_rate": 1.8200817604785702e-05, - "logits/chosen": -4.3762383460998535, - "logits/rejected": -3.1154062747955322, - "logps/chosen": -310.3004150390625, - "logps/rejected": -721.2971801757812, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7406433820724487, - "rewards/margins": 17.632537841796875, - "rewards/rejected": -15.891894340515137, - "step": 543 - }, - { - "epoch": 0.21, - "learning_rate": 1.8193989140262133e-05, - "logits/chosen": -2.6018009185791016, - "logits/rejected": -1.2198255062103271, - "logps/chosen": -361.6746826171875, - "logps/rejected": -673.7255249023438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.38009950518608093, - "rewards/margins": 11.588571548461914, - "rewards/rejected": -11.20847225189209, - "step": 544 - }, - { - "epoch": 0.21, - "learning_rate": 1.818714902820853e-05, - "logits/chosen": -1.8416721820831299, - "logits/rejected": -3.6070590019226074, - "logps/chosen": -291.1146545410156, - "logps/rejected": -279.27557373046875, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0467681884765625, - "rewards/margins": 7.41119384765625, - "rewards/rejected": -6.3644256591796875, - "step": 545 - }, - { - "epoch": 0.21, - "learning_rate": 1.818029727834793e-05, - "logits/chosen": -6.460853576660156, - "logits/rejected": -3.1649489402770996, - "logps/chosen": -362.93975830078125, - "logps/rejected": -1094.3189697265625, - "loss": 0.0126, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2089966535568237, - "rewards/margins": 24.420557022094727, - "rewards/rejected": -25.629552841186523, - "step": 546 - }, - { - "epoch": 0.21, - "learning_rate": 1.81734339004199e-05, - "logits/chosen": -7.044636249542236, - "logits/rejected": -2.846511125564575, - "logps/chosen": -168.1865234375, - "logps/rejected": -1354.34375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6194427609443665, - "rewards/margins": 26.41963768005371, - "rewards/rejected": -25.800195693969727, - "step": 547 - }, - { - "epoch": 0.21, - "learning_rate": 1.8166558904180547e-05, - "logits/chosen": -0.9192439913749695, - "logits/rejected": -3.482778787612915, - "logps/chosen": -169.8677520751953, - "logps/rejected": -330.76678466796875, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.775805711746216, - "rewards/margins": 14.212145805358887, - "rewards/rejected": -11.43634033203125, - "step": 548 - }, - { - "epoch": 0.21, - "learning_rate": 1.8159672299402493e-05, - "logits/chosen": -2.844250202178955, - "logits/rejected": -6.653464317321777, - "logps/chosen": -173.38343811035156, - "logps/rejected": -105.87471008300781, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5012298822402954, - "rewards/margins": 6.297842025756836, - "rewards/rejected": -5.79661226272583, - "step": 549 - }, - { - "epoch": 0.21, - "learning_rate": 1.815277409587485e-05, - "logits/chosen": -6.777444362640381, - "logits/rejected": -5.37290620803833, - "logps/chosen": -317.3393249511719, - "logps/rejected": -824.9920654296875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0133453607559204, - "rewards/margins": 17.173423767089844, - "rewards/rejected": -16.160078048706055, - "step": 550 - }, - { - "epoch": 0.21, - "learning_rate": 1.8145864303403224e-05, - "logits/chosen": -4.393347263336182, - "logits/rejected": -2.5896003246307373, - "logps/chosen": -487.972900390625, - "logps/rejected": -1332.7869873046875, - "loss": 0.0026, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0735137462615967, - "rewards/margins": 5.312143325805664, - "rewards/rejected": -8.38565731048584, - "step": 551 - }, - { - "epoch": 0.21, - "learning_rate": 1.8138942931809702e-05, - "logits/chosen": -3.8123319149017334, - "logits/rejected": -3.507751226425171, - "logps/chosen": -317.624267578125, - "logps/rejected": -708.466552734375, - "loss": 0.0872, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4023376405239105, - "rewards/margins": 16.7216854095459, - "rewards/rejected": -17.1240234375, - "step": 552 - }, - { - "epoch": 0.21, - "learning_rate": 1.8132009990932816e-05, - "logits/chosen": -5.5161614418029785, - "logits/rejected": -2.8703815937042236, - "logps/chosen": -382.48834228515625, - "logps/rejected": -1604.397216796875, - "loss": 0.0028, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.783105492591858, - "rewards/margins": 22.932703018188477, - "rewards/rejected": -21.14959716796875, - "step": 553 - }, - { - "epoch": 0.21, - "learning_rate": 1.8125065490627556e-05, - "logits/chosen": -2.7332863807678223, - "logits/rejected": -7.624388217926025, - "logps/chosen": -269.72479248046875, - "logps/rejected": -152.63211059570312, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.010595679283142, - "rewards/margins": 11.470826148986816, - "rewards/rejected": -10.460230827331543, - "step": 554 - }, - { - "epoch": 0.21, - "learning_rate": 1.8118109440765334e-05, - "logits/chosen": -2.023850917816162, - "logits/rejected": -1.5872293710708618, - "logps/chosen": -693.7640991210938, - "logps/rejected": -980.0971069335938, - "loss": 0.0093, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3330872058868408, - "rewards/margins": 13.253076553344727, - "rewards/rejected": -14.586163520812988, - "step": 555 - }, - { - "epoch": 0.21, - "learning_rate": 1.8111141851233986e-05, - "logits/chosen": -5.307364463806152, - "logits/rejected": -7.559116840362549, - "logps/chosen": -1140.57080078125, - "logps/rejected": -209.02142333984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6824463605880737, - "rewards/margins": 12.240979194641113, - "rewards/rejected": -13.923425674438477, - "step": 556 - }, - { - "epoch": 0.21, - "learning_rate": 1.8104162731937746e-05, - "logits/chosen": -4.049077033996582, - "logits/rejected": -4.468079090118408, - "logps/chosen": -199.86764526367188, - "logps/rejected": -45.22489929199219, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2006438970565796, - "rewards/margins": 3.7428789138793945, - "rewards/rejected": -2.5422348976135254, - "step": 557 - }, - { - "epoch": 0.21, - "learning_rate": 1.8097172092797244e-05, - "logits/chosen": -7.783496856689453, - "logits/rejected": -0.8009142875671387, - "logps/chosen": -367.01251220703125, - "logps/rejected": -2045.6085205078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.11164551228284836, - "rewards/margins": 29.830995559692383, - "rewards/rejected": -29.94264030456543, - "step": 558 - }, - { - "epoch": 0.21, - "learning_rate": 1.8090169943749477e-05, - "logits/chosen": -4.227811813354492, - "logits/rejected": -3.271345615386963, - "logps/chosen": -388.1712646484375, - "logps/rejected": -824.1966552734375, - "loss": 0.006, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8462952375411987, - "rewards/margins": 19.78753089904785, - "rewards/rejected": -17.94123649597168, - "step": 559 - }, - { - "epoch": 0.21, - "learning_rate": 1.8083156294747807e-05, - "logits/chosen": -1.5266149044036865, - "logits/rejected": -3.7964906692504883, - "logps/chosen": -262.49560546875, - "logps/rejected": -299.08251953125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.33357545733451843, - "rewards/margins": 18.700212478637695, - "rewards/rejected": -18.366636276245117, - "step": 560 - }, - { - "epoch": 0.21, - "learning_rate": 1.807613115576195e-05, - "logits/chosen": -2.361276388168335, - "logits/rejected": -1.763494610786438, - "logps/chosen": -361.91546630859375, - "logps/rejected": -625.711181640625, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6172515749931335, - "rewards/margins": 15.782944679260254, - "rewards/rejected": -16.400196075439453, - "step": 561 - }, - { - "epoch": 0.21, - "learning_rate": 1.8069094536777938e-05, - "logits/chosen": -1.8309279680252075, - "logits/rejected": -4.845731735229492, - "logps/chosen": -203.95599365234375, - "logps/rejected": -233.74989318847656, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.0733642578125, - "rewards/margins": 13.051241874694824, - "rewards/rejected": -15.124606132507324, - "step": 562 - }, - { - "epoch": 0.21, - "learning_rate": 1.806204644779814e-05, - "logits/chosen": -3.50203013420105, - "logits/rejected": -2.4582619667053223, - "logps/chosen": -359.376708984375, - "logps/rejected": -1230.62060546875, - "loss": 0.0029, - "rewards/accuracies": 1.0, - "rewards/chosen": 4.158410549163818, - "rewards/margins": 47.12646484375, - "rewards/rejected": -42.968055725097656, - "step": 563 - }, - { - "epoch": 0.21, - "learning_rate": 1.8054986898841217e-05, - "logits/chosen": -1.7340952157974243, - "logits/rejected": -7.8657636642456055, - "logps/chosen": -262.5947265625, - "logps/rejected": -87.59368133544922, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6566528677940369, - "rewards/margins": 6.6015424728393555, - "rewards/rejected": -5.944889545440674, - "step": 564 - }, - { - "epoch": 0.21, - "learning_rate": 1.804791589994212e-05, - "logits/chosen": -5.09386682510376, - "logits/rejected": -0.49742573499679565, - "logps/chosen": -554.7858276367188, - "logps/rejected": -1192.97607421875, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.260870337486267, - "rewards/margins": 19.874536514282227, - "rewards/rejected": -21.135406494140625, - "step": 565 - }, - { - "epoch": 0.21, - "learning_rate": 1.804083346115208e-05, - "logits/chosen": -6.62583065032959, - "logits/rejected": -2.4023303985595703, - "logps/chosen": -324.38043212890625, - "logps/rejected": -1394.967529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.80645751953125, - "rewards/margins": 25.70643424987793, - "rewards/rejected": -29.51289176940918, - "step": 566 - }, - { - "epoch": 0.21, - "learning_rate": 1.8033739592538598e-05, - "logits/chosen": -6.168702602386475, - "logits/rejected": 0.000661405036225915, - "logps/chosen": -412.6188659667969, - "logps/rejected": -3426.073486328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.4442689418792725, - "rewards/margins": 73.66541290283203, - "rewards/rejected": -70.22114562988281, - "step": 567 - }, - { - "epoch": 0.21, - "learning_rate": 1.80266343041854e-05, - "logits/chosen": -4.149428367614746, - "logits/rejected": -0.5466620922088623, - "logps/chosen": -351.4603271484375, - "logps/rejected": -941.34619140625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.766510009765625, - "rewards/margins": 23.09819984436035, - "rewards/rejected": -21.331689834594727, - "step": 568 - }, - { - "epoch": 0.22, - "learning_rate": 1.8019517606192467e-05, - "logits/chosen": -7.353021144866943, - "logits/rejected": -1.4993327856063843, - "logps/chosen": -163.24771118164062, - "logps/rejected": -830.7023315429688, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2117919921875, - "rewards/margins": 10.796673774719238, - "rewards/rejected": -11.008465766906738, - "step": 569 - }, - { - "epoch": 0.22, - "learning_rate": 1.8012389508675984e-05, - "logits/chosen": -7.83337926864624, - "logits/rejected": -1.1194822788238525, - "logps/chosen": -233.73605346679688, - "logps/rejected": -2332.679931640625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9800262451171875, - "rewards/margins": 27.284835815429688, - "rewards/rejected": -26.3048095703125, - "step": 570 - }, - { - "epoch": 0.22, - "learning_rate": 1.800525002176835e-05, - "logits/chosen": -1.461614727973938, - "logits/rejected": -4.1684441566467285, - "logps/chosen": -285.38720703125, - "logps/rejected": -336.76226806640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.381231665611267, - "rewards/margins": 13.577509880065918, - "rewards/rejected": -12.19627857208252, - "step": 571 - }, - { - "epoch": 0.22, - "learning_rate": 1.7998099155618147e-05, - "logits/chosen": -7.589863300323486, - "logits/rejected": -1.8673220872879028, - "logps/chosen": -352.4530944824219, - "logps/rejected": -1420.571533203125, - "loss": 0.0336, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7822906374931335, - "rewards/margins": 22.37522315979004, - "rewards/rejected": -21.592931747436523, - "step": 572 - }, - { - "epoch": 0.22, - "learning_rate": 1.7990936920390133e-05, - "logits/chosen": -4.324038982391357, - "logits/rejected": -5.3409342765808105, - "logps/chosen": -102.9288330078125, - "logps/rejected": -222.77462768554688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.14238739013671875, - "rewards/margins": 12.871185302734375, - "rewards/rejected": -13.013572692871094, - "step": 573 - }, - { - "epoch": 0.22, - "learning_rate": 1.798376332626523e-05, - "logits/chosen": -6.026737213134766, - "logits/rejected": -1.7478866577148438, - "logps/chosen": -286.732666015625, - "logps/rejected": -866.2655029296875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3834228515625, - "rewards/margins": 13.753686904907227, - "rewards/rejected": -15.137109756469727, - "step": 574 - }, - { - "epoch": 0.22, - "learning_rate": 1.7976578383440502e-05, - "logits/chosen": -1.0817394256591797, - "logits/rejected": -2.6853787899017334, - "logps/chosen": -424.3298645019531, - "logps/rejected": -590.3975830078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.2302186489105225, - "rewards/margins": 27.60320472717285, - "rewards/rejected": -25.37298583984375, - "step": 575 - }, - { - "epoch": 0.22, - "learning_rate": 1.7969382102129153e-05, - "logits/chosen": -2.297451972961426, - "logits/rejected": -5.826806545257568, - "logps/chosen": -498.6026611328125, - "logps/rejected": -195.45956420898438, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.477868676185608, - "rewards/margins": 7.544229030609131, - "rewards/rejected": -6.0663604736328125, - "step": 576 - }, - { - "epoch": 0.22, - "learning_rate": 1.7962174492560492e-05, - "logits/chosen": -6.030299186706543, - "logits/rejected": -3.414401054382324, - "logps/chosen": -445.29425048828125, - "logps/rejected": -1069.66943359375, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3811584413051605, - "rewards/margins": 31.970142364501953, - "rewards/rejected": -31.5889835357666, - "step": 577 - }, - { - "epoch": 0.22, - "learning_rate": 1.7954955564979944e-05, - "logits/chosen": -6.361799716949463, - "logits/rejected": -4.249096393585205, - "logps/chosen": -194.58592224121094, - "logps/rejected": -754.918212890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4863204956054688, - "rewards/margins": 22.265974044799805, - "rewards/rejected": -23.752294540405273, - "step": 578 - }, - { - "epoch": 0.22, - "learning_rate": 1.794772532964901e-05, - "logits/chosen": -1.7985516786575317, - "logits/rejected": -7.110722541809082, - "logps/chosen": -348.68963623046875, - "logps/rejected": -188.10369873046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.809869408607483, - "rewards/margins": 9.874857902526855, - "rewards/rejected": -11.684727668762207, - "step": 579 - }, - { - "epoch": 0.22, - "learning_rate": 1.7940483796845275e-05, - "logits/chosen": -3.2984209060668945, - "logits/rejected": -3.8523476123809814, - "logps/chosen": -207.65823364257812, - "logps/rejected": -453.6859130859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1020218133926392, - "rewards/margins": 19.029273986816406, - "rewards/rejected": -20.131296157836914, - "step": 580 - }, - { - "epoch": 0.22, - "learning_rate": 1.7933230976862375e-05, - "logits/chosen": -2.1843764781951904, - "logits/rejected": -4.411350727081299, - "logps/chosen": -222.86062622070312, - "logps/rejected": -387.65435791015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.554583728313446, - "rewards/margins": 21.97037696838379, - "rewards/rejected": -21.41579246520996, - "step": 581 - }, - { - "epoch": 0.22, - "learning_rate": 1.7925966880009998e-05, - "logits/chosen": -3.1807262897491455, - "logits/rejected": -3.2121236324310303, - "logps/chosen": -244.95884704589844, - "logps/rejected": -302.326171875, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9493119716644287, - "rewards/margins": 4.432844161987305, - "rewards/rejected": -8.382156372070312, - "step": 582 - }, - { - "epoch": 0.22, - "learning_rate": 1.7918691516613855e-05, - "logits/chosen": -3.9167046546936035, - "logits/rejected": -5.817309856414795, - "logps/chosen": -252.84483337402344, - "logps/rejected": -380.48602294921875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.40672767162323, - "rewards/margins": 14.041007995605469, - "rewards/rejected": -15.447735786437988, - "step": 583 - }, - { - "epoch": 0.22, - "learning_rate": 1.7911404897015674e-05, - "logits/chosen": -4.499810218811035, - "logits/rejected": -3.09801983833313, - "logps/chosen": -414.2567138671875, - "logps/rejected": -789.5499267578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4321686029434204, - "rewards/margins": 17.33902359008789, - "rewards/rejected": -15.906854629516602, - "step": 584 - }, - { - "epoch": 0.22, - "learning_rate": 1.7904107031573186e-05, - "logits/chosen": -4.746896743774414, - "logits/rejected": -4.28007173538208, - "logps/chosen": -163.59323120117188, - "logps/rejected": -322.733642578125, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7243179678916931, - "rewards/margins": 12.895644187927246, - "rewards/rejected": -13.619961738586426, - "step": 585 - }, - { - "epoch": 0.22, - "learning_rate": 1.7896797930660107e-05, - "logits/chosen": -4.66705322265625, - "logits/rejected": -3.4477527141571045, - "logps/chosen": -414.440185546875, - "logps/rejected": -703.8245849609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.465249627828598, - "rewards/margins": 16.788528442382812, - "rewards/rejected": -16.323278427124023, - "step": 586 - }, - { - "epoch": 0.22, - "learning_rate": 1.7889477604666124e-05, - "logits/chosen": -2.9418630599975586, - "logits/rejected": -4.266529083251953, - "logps/chosen": -365.00335693359375, - "logps/rejected": -486.6495056152344, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1656830310821533, - "rewards/margins": 21.212099075317383, - "rewards/rejected": -19.046415328979492, - "step": 587 - }, - { - "epoch": 0.22, - "learning_rate": 1.788214606399687e-05, - "logits/chosen": -1.1088074445724487, - "logits/rejected": -2.8742878437042236, - "logps/chosen": -811.2130126953125, - "logps/rejected": -798.576416015625, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4213500916957855, - "rewards/margins": 18.88909339904785, - "rewards/rejected": -18.467742919921875, - "step": 588 - }, - { - "epoch": 0.22, - "learning_rate": 1.787480331907394e-05, - "logits/chosen": -3.0855913162231445, - "logits/rejected": -1.9655506610870361, - "logps/chosen": -490.10845947265625, - "logps/rejected": -912.6057739257812, - "loss": 0.0039, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.13690185546875, - "rewards/margins": 12.030688285827637, - "rewards/rejected": -9.893786430358887, - "step": 589 - }, - { - "epoch": 0.22, - "learning_rate": 1.7867449380334834e-05, - "logits/chosen": -1.886336326599121, - "logits/rejected": -4.050681114196777, - "logps/chosen": -390.534423828125, - "logps/rejected": -563.8286743164062, - "loss": 0.0087, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8975311517715454, - "rewards/margins": 19.572757720947266, - "rewards/rejected": -18.67522621154785, - "step": 590 - }, - { - "epoch": 0.22, - "learning_rate": 1.7860084258232978e-05, - "logits/chosen": -1.0469175577163696, - "logits/rejected": -3.028642416000366, - "logps/chosen": -543.2937622070312, - "logps/rejected": -899.8175048828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.797210693359375, - "rewards/margins": 40.10227584838867, - "rewards/rejected": -40.89948654174805, - "step": 591 - }, - { - "epoch": 0.22, - "learning_rate": 1.785270796323769e-05, - "logits/chosen": -4.71299409866333, - "logits/rejected": -0.9760003089904785, - "logps/chosen": -329.8174133300781, - "logps/rejected": -1034.6302490234375, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.48411867022514343, - "rewards/margins": 28.02851104736328, - "rewards/rejected": -27.544391632080078, - "step": 592 - }, - { - "epoch": 0.22, - "learning_rate": 1.7845320505834176e-05, - "logits/chosen": -5.148956775665283, - "logits/rejected": -0.8134559988975525, - "logps/chosen": -482.59332275390625, - "logps/rejected": -952.4432373046875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.36461183428764343, - "rewards/margins": 7.814910888671875, - "rewards/rejected": -8.179522514343262, - "step": 593 - }, - { - "epoch": 0.22, - "learning_rate": 1.783792189652349e-05, - "logits/chosen": -1.6458574533462524, - "logits/rejected": -5.542053699493408, - "logps/chosen": -268.4090881347656, - "logps/rejected": -114.87380981445312, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3845581114292145, - "rewards/margins": 7.0193023681640625, - "rewards/rejected": -7.403860569000244, - "step": 594 - }, - { - "epoch": 0.22, - "learning_rate": 1.7830512145822564e-05, - "logits/chosen": -3.5950493812561035, - "logits/rejected": -2.3685460090637207, - "logps/chosen": -249.09414672851562, - "logps/rejected": -393.5234680175781, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.131717205047607, - "rewards/margins": 6.746127605438232, - "rewards/rejected": -11.87784481048584, - "step": 595 - }, - { - "epoch": 0.23, - "learning_rate": 1.782309126426415e-05, - "logits/chosen": -1.1601828336715698, - "logits/rejected": -5.438084602355957, - "logps/chosen": -592.7842407226562, - "logps/rejected": -604.918212890625, - "loss": 0.0067, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.23218993842601776, - "rewards/margins": 31.43593978881836, - "rewards/rejected": -31.203750610351562, - "step": 596 - }, - { - "epoch": 0.23, - "learning_rate": 1.7815659262396825e-05, - "logits/chosen": -5.701388835906982, - "logits/rejected": -2.231379270553589, - "logps/chosen": -399.8741455078125, - "logps/rejected": -1311.821044921875, - "loss": 0.0074, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.6389405727386475, - "rewards/margins": 40.383750915527344, - "rewards/rejected": -37.74481201171875, - "step": 597 - }, - { - "epoch": 0.23, - "learning_rate": 1.7808216150784977e-05, - "logits/chosen": -5.63054895401001, - "logits/rejected": -3.1565186977386475, - "logps/chosen": -237.09976196289062, - "logps/rejected": -545.7921752929688, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6226593255996704, - "rewards/margins": 6.444650173187256, - "rewards/rejected": -8.067309379577637, - "step": 598 - }, - { - "epoch": 0.23, - "learning_rate": 1.780076194000879e-05, - "logits/chosen": 0.3118344247341156, - "logits/rejected": -1.887911319732666, - "logps/chosen": -268.84197998046875, - "logps/rejected": -624.7257690429688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3687866926193237, - "rewards/margins": 25.880056381225586, - "rewards/rejected": -24.51127052307129, - "step": 599 - }, - { - "epoch": 0.23, - "learning_rate": 1.7793296640664205e-05, - "logits/chosen": -8.321109771728516, - "logits/rejected": -1.9085779190063477, - "logps/chosen": -556.7095336914062, - "logps/rejected": -3132.025634765625, - "loss": 0.0098, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.084393501281738, - "rewards/margins": 22.90068817138672, - "rewards/rejected": -26.985082626342773, - "step": 600 - }, - { - "epoch": 0.23, - "learning_rate": 1.7785820263362953e-05, - "logits/chosen": -2.1745243072509766, - "logits/rejected": -1.0488945245742798, - "logps/chosen": -360.0451354980469, - "logps/rejected": -768.971435546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9955536127090454, - "rewards/margins": 20.873336791992188, - "rewards/rejected": -22.8688907623291, - "step": 601 - }, - { - "epoch": 0.23, - "learning_rate": 1.7778332818732492e-05, - "logits/chosen": -2.480034828186035, - "logits/rejected": -1.6205447912216187, - "logps/chosen": -403.77130126953125, - "logps/rejected": -777.2459716796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8835022449493408, - "rewards/margins": 22.1325626373291, - "rewards/rejected": -24.01606559753418, - "step": 602 - }, - { - "epoch": 0.23, - "learning_rate": 1.7770834317416018e-05, - "logits/chosen": -0.8606796264648438, - "logits/rejected": -2.425614833831787, - "logps/chosen": -220.40367126464844, - "logps/rejected": -588.5374755859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.155842587351799, - "rewards/margins": 28.0528564453125, - "rewards/rejected": -28.208698272705078, - "step": 603 - }, - { - "epoch": 0.23, - "learning_rate": 1.776332477007245e-05, - "logits/chosen": -2.8262128829956055, - "logits/rejected": -1.8838363885879517, - "logps/chosen": -359.1390380859375, - "logps/rejected": -718.4265747070312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.036407470703125, - "rewards/margins": 14.98590087890625, - "rewards/rejected": -12.949493408203125, - "step": 604 - }, - { - "epoch": 0.23, - "learning_rate": 1.7755804187376398e-05, - "logits/chosen": -1.9789873361587524, - "logits/rejected": -7.126310348510742, - "logps/chosen": -457.28411865234375, - "logps/rejected": -299.10089111328125, - "loss": 0.0048, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4227325916290283, - "rewards/margins": 13.960465431213379, - "rewards/rejected": -16.383197784423828, - "step": 605 - }, - { - "epoch": 0.23, - "learning_rate": 1.7748272580018168e-05, - "logits/chosen": -2.8707852363586426, - "logits/rejected": -2.914442539215088, - "logps/chosen": -306.2437744140625, - "logps/rejected": -590.7698974609375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.442657470703125, - "rewards/margins": 16.748165130615234, - "rewards/rejected": -15.305508613586426, - "step": 606 - }, - { - "epoch": 0.23, - "learning_rate": 1.7740729958703725e-05, - "logits/chosen": -3.298382043838501, - "logits/rejected": -4.1950364112854, - "logps/chosen": -180.51834106445312, - "logps/rejected": -254.51312255859375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3006347715854645, - "rewards/margins": 5.644601821899414, - "rewards/rejected": -5.343966960906982, - "step": 607 - }, - { - "epoch": 0.23, - "learning_rate": 1.7733176334154704e-05, - "logits/chosen": -1.4107491970062256, - "logits/rejected": -4.005028247833252, - "logps/chosen": -544.162841796875, - "logps/rejected": -500.4497985839844, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.10710449516773224, - "rewards/margins": 23.39683723449707, - "rewards/rejected": -23.289731979370117, - "step": 608 - }, - { - "epoch": 0.23, - "learning_rate": 1.772561171710837e-05, - "logits/chosen": -3.600465774536133, - "logits/rejected": -0.6459125876426697, - "logps/chosen": -531.4168701171875, - "logps/rejected": -1013.6046142578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3988891541957855, - "rewards/margins": 14.139562606811523, - "rewards/rejected": -14.5384521484375, - "step": 609 - }, - { - "epoch": 0.23, - "learning_rate": 1.771803611831762e-05, - "logits/chosen": -6.557182788848877, - "logits/rejected": -6.526876449584961, - "logps/chosen": -290.445068359375, - "logps/rejected": -613.3724365234375, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.910296618938446, - "rewards/margins": 11.2015380859375, - "rewards/rejected": -12.111834526062012, - "step": 610 - }, - { - "epoch": 0.23, - "learning_rate": 1.7710449548550964e-05, - "logits/chosen": -1.1337122917175293, - "logits/rejected": -0.47098079323768616, - "logps/chosen": -363.74468994140625, - "logps/rejected": -815.2286376953125, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6891968250274658, - "rewards/margins": 26.479707717895508, - "rewards/rejected": -24.790510177612305, - "step": 611 - }, - { - "epoch": 0.23, - "learning_rate": 1.7702852018592493e-05, - "logits/chosen": -0.6965864300727844, - "logits/rejected": -2.9704384803771973, - "logps/chosen": -301.7499084472656, - "logps/rejected": -552.3301391601562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8778106570243835, - "rewards/margins": 21.2437801361084, - "rewards/rejected": -22.121591567993164, - "step": 612 - }, - { - "epoch": 0.23, - "learning_rate": 1.7695243539241893e-05, - "logits/chosen": -1.5946873426437378, - "logits/rejected": -3.5659189224243164, - "logps/chosen": -281.73931884765625, - "logps/rejected": -434.27099609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9795593619346619, - "rewards/margins": 17.4081974029541, - "rewards/rejected": -16.428638458251953, - "step": 613 - }, - { - "epoch": 0.23, - "learning_rate": 1.7687624121314406e-05, - "logits/chosen": -4.959830284118652, - "logits/rejected": -1.010427474975586, - "logps/chosen": -329.1566162109375, - "logps/rejected": -1037.301025390625, - "loss": 0.0895, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.12200317531824112, - "rewards/margins": 17.257898330688477, - "rewards/rejected": -17.379901885986328, - "step": 614 - }, - { - "epoch": 0.23, - "learning_rate": 1.7679993775640824e-05, - "logits/chosen": -0.8079631328582764, - "logits/rejected": -6.402887344360352, - "logps/chosen": -342.4170837402344, - "logps/rejected": -271.05145263671875, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.1043426990509033, - "rewards/margins": 18.089012145996094, - "rewards/rejected": -14.98466968536377, - "step": 615 - }, - { - "epoch": 0.23, - "learning_rate": 1.7672352513067476e-05, - "logits/chosen": -1.020118236541748, - "logits/rejected": -1.1442584991455078, - "logps/chosen": -510.94427490234375, - "logps/rejected": -807.9567260742188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8538970947265625, - "rewards/margins": 22.67612648010254, - "rewards/rejected": -23.5300235748291, - "step": 616 - }, - { - "epoch": 0.23, - "learning_rate": 1.7664700344456198e-05, - "logits/chosen": -3.9293148517608643, - "logits/rejected": -1.6672194004058838, - "logps/chosen": -417.0105895996094, - "logps/rejected": -999.2802124023438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0485016107559204, - "rewards/margins": 22.630699157714844, - "rewards/rejected": -21.582197189331055, - "step": 617 - }, - { - "epoch": 0.23, - "learning_rate": 1.7657037280684352e-05, - "logits/chosen": -2.1778604984283447, - "logits/rejected": -2.234915018081665, - "logps/chosen": -656.6778564453125, - "logps/rejected": -1004.0933837890625, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.798510789871216, - "rewards/margins": 22.997095108032227, - "rewards/rejected": -20.198583602905273, - "step": 618 - }, - { - "epoch": 0.23, - "learning_rate": 1.764936333264476e-05, - "logits/chosen": -1.914002537727356, - "logits/rejected": -7.452852249145508, - "logps/chosen": -376.7113037109375, - "logps/rejected": -194.604736328125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.025475978851318, - "rewards/margins": 7.173206806182861, - "rewards/rejected": -11.19868278503418, - "step": 619 - }, - { - "epoch": 0.23, - "learning_rate": 1.7641678511245733e-05, - "logits/chosen": -3.208407402038574, - "logits/rejected": -2.077291965484619, - "logps/chosen": -290.2919921875, - "logps/rejected": -926.8406372070312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7011383175849915, - "rewards/margins": 34.316226959228516, - "rewards/rejected": -35.017364501953125, - "step": 620 - }, - { - "epoch": 0.23, - "learning_rate": 1.763398282741103e-05, - "logits/chosen": -7.715721607208252, - "logits/rejected": -2.230985164642334, - "logps/chosen": -385.50628662109375, - "logps/rejected": -1742.132568359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.43357545137405396, - "rewards/margins": 20.367387771606445, - "rewards/rejected": -19.933813095092773, - "step": 621 - }, - { - "epoch": 0.24, - "learning_rate": 1.762627629207986e-05, - "logits/chosen": -1.933334469795227, - "logits/rejected": -1.0404918193817139, - "logps/chosen": -318.9725341796875, - "logps/rejected": -672.8564453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.15422669053077698, - "rewards/margins": 23.646142959594727, - "rewards/rejected": -23.49191665649414, - "step": 622 - }, - { - "epoch": 0.24, - "learning_rate": 1.761855891620684e-05, - "logits/chosen": -6.225937366485596, - "logits/rejected": -6.048980236053467, - "logps/chosen": -254.11605834960938, - "logps/rejected": -2351.12353515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.47086793184280396, - "rewards/margins": 36.648799896240234, - "rewards/rejected": -37.119667053222656, - "step": 623 - }, - { - "epoch": 0.24, - "learning_rate": 1.7610830710762022e-05, - "logits/chosen": -2.1580445766448975, - "logits/rejected": -2.2113239765167236, - "logps/chosen": -298.8319396972656, - "logps/rejected": -431.4958801269531, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.14725661277771, - "rewards/margins": 13.06667709350586, - "rewards/rejected": -16.21393394470215, - "step": 624 - }, - { - "epoch": 0.24, - "learning_rate": 1.7603091686730827e-05, - "logits/chosen": -5.646581649780273, - "logits/rejected": -3.5668866634368896, - "logps/chosen": -287.5299987792969, - "logps/rejected": -618.4278564453125, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6093658208847046, - "rewards/margins": 12.173056602478027, - "rewards/rejected": -13.782422065734863, - "step": 625 - }, - { - "epoch": 0.24, - "learning_rate": 1.7595341855114065e-05, - "logits/chosen": -8.032143592834473, - "logits/rejected": -2.526287794113159, - "logps/chosen": -333.04052734375, - "logps/rejected": -1235.408447265625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.002227783203125, - "rewards/margins": 9.193804740905762, - "rewards/rejected": -9.191576957702637, - "step": 626 - }, - { - "epoch": 0.24, - "learning_rate": 1.758758122692791e-05, - "logits/chosen": -2.0191597938537598, - "logits/rejected": -3.6637351512908936, - "logps/chosen": -370.69818115234375, - "logps/rejected": -455.6361083984375, - "loss": 0.0093, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2663697004318237, - "rewards/margins": 18.79053497314453, - "rewards/rejected": -20.056903839111328, - "step": 627 - }, - { - "epoch": 0.24, - "learning_rate": 1.7579809813203883e-05, - "logits/chosen": -8.414055824279785, - "logits/rejected": -1.3566728830337524, - "logps/chosen": -258.2325439453125, - "logps/rejected": -1733.51806640625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8750961422920227, - "rewards/margins": 44.18911361694336, - "rewards/rejected": -45.064208984375, - "step": 628 - }, - { - "epoch": 0.24, - "learning_rate": 1.757202762498883e-05, - "logits/chosen": -2.2922956943511963, - "logits/rejected": -1.5358130931854248, - "logps/chosen": -345.1392822265625, - "logps/rejected": -586.3059692382812, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2161743640899658, - "rewards/margins": 13.481119155883789, - "rewards/rejected": -12.264945030212402, - "step": 629 - }, - { - "epoch": 0.24, - "learning_rate": 1.7564234673344928e-05, - "logits/chosen": -7.610987663269043, - "logits/rejected": -1.6441690921783447, - "logps/chosen": -416.06903076171875, - "logps/rejected": -1658.59814453125, - "loss": 0.0154, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.503265380859375, - "rewards/margins": 17.770124435424805, - "rewards/rejected": -18.27338981628418, - "step": 630 - }, - { - "epoch": 0.24, - "learning_rate": 1.7556430969349633e-05, - "logits/chosen": -2.1394073963165283, - "logits/rejected": -2.8358421325683594, - "logps/chosen": -180.03988647460938, - "logps/rejected": -274.5108337402344, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3660888671875, - "rewards/margins": 7.692752361297607, - "rewards/rejected": -7.326663494110107, - "step": 631 - }, - { - "epoch": 0.24, - "learning_rate": 1.7548616524095697e-05, - "logits/chosen": -1.9897600412368774, - "logits/rejected": -2.42767596244812, - "logps/chosen": -223.23834228515625, - "logps/rejected": -329.88677978515625, - "loss": 0.022, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.619494616985321, - "rewards/margins": 11.165613174438477, - "rewards/rejected": -10.54611873626709, - "step": 632 - }, - { - "epoch": 0.24, - "learning_rate": 1.7540791348691144e-05, - "logits/chosen": -1.9182827472686768, - "logits/rejected": -1.5233778953552246, - "logps/chosen": -245.22543334960938, - "logps/rejected": -476.4537353515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3609436750411987, - "rewards/margins": 16.051530838012695, - "rewards/rejected": -17.412473678588867, - "step": 633 - }, - { - "epoch": 0.24, - "learning_rate": 1.7532955454259236e-05, - "logits/chosen": -5.199847221374512, - "logits/rejected": -1.3395872116088867, - "logps/chosen": -219.94390869140625, - "logps/rejected": -842.659423828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.769305408000946, - "rewards/margins": 27.030855178833008, - "rewards/rejected": -26.26154899597168, - "step": 634 - }, - { - "epoch": 0.24, - "learning_rate": 1.752510885193849e-05, - "logits/chosen": -6.310189247131348, - "logits/rejected": -3.0161986351013184, - "logps/chosen": -360.376220703125, - "logps/rejected": -794.9297485351562, - "loss": 0.0053, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8986145257949829, - "rewards/margins": 12.747730255126953, - "rewards/rejected": -11.849115371704102, - "step": 635 - }, - { - "epoch": 0.24, - "learning_rate": 1.751725155288263e-05, - "logits/chosen": -4.768376350402832, - "logits/rejected": -1.3203065395355225, - "logps/chosen": -432.12738037109375, - "logps/rejected": -1262.419921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3121276795864105, - "rewards/margins": 25.678730010986328, - "rewards/rejected": -25.366601943969727, - "step": 636 - }, - { - "epoch": 0.24, - "learning_rate": 1.7509383568260597e-05, - "logits/chosen": -1.120279312133789, - "logits/rejected": -6.187751293182373, - "logps/chosen": -344.60693359375, - "logps/rejected": -156.02438354492188, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3782592713832855, - "rewards/margins": 9.498139381408691, - "rewards/rejected": -9.876399040222168, - "step": 637 - }, - { - "epoch": 0.24, - "learning_rate": 1.750150490925651e-05, - "logits/chosen": -4.59021520614624, - "logits/rejected": -2.013643264770508, - "logps/chosen": -231.194580078125, - "logps/rejected": -555.2468872070312, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.6313018798828125, - "rewards/margins": 4.7390594482421875, - "rewards/rejected": -7.370361328125, - "step": 638 - }, - { - "epoch": 0.24, - "learning_rate": 1.749361558706966e-05, - "logits/chosen": -6.648839950561523, - "logits/rejected": -1.668926477432251, - "logps/chosen": -223.76112365722656, - "logps/rejected": -1310.030517578125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.48860475420951843, - "rewards/margins": 30.1833553314209, - "rewards/rejected": -30.671960830688477, - "step": 639 - }, - { - "epoch": 0.24, - "learning_rate": 1.7485715612914513e-05, - "logits/chosen": -9.654121398925781, - "logits/rejected": -3.8681106567382812, - "logps/chosen": -258.4185791015625, - "logps/rejected": -1621.77099609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21313171088695526, - "rewards/margins": 12.89587688446045, - "rewards/rejected": -13.1090087890625, - "step": 640 - }, - { - "epoch": 0.24, - "learning_rate": 1.7477804998020658e-05, - "logits/chosen": -6.618179798126221, - "logits/rejected": -2.536348581314087, - "logps/chosen": -1007.6934204101562, - "logps/rejected": -2230.646240234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.001470923423767, - "rewards/margins": 14.984394073486328, - "rewards/rejected": -15.985864639282227, - "step": 641 - }, - { - "epoch": 0.24, - "learning_rate": 1.7469883753632817e-05, - "logits/chosen": -1.6359635591506958, - "logits/rejected": -4.660618782043457, - "logps/chosen": -341.1754150390625, - "logps/rejected": -290.53875732421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7224792838096619, - "rewards/margins": 10.75232219696045, - "rewards/rejected": -10.0298433303833, - "step": 642 - }, - { - "epoch": 0.24, - "learning_rate": 1.7461951891010822e-05, - "logits/chosen": -5.430933952331543, - "logits/rejected": -5.824768543243408, - "logps/chosen": -207.43661499023438, - "logps/rejected": -369.89703369140625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9891998171806335, - "rewards/margins": 13.34110164642334, - "rewards/rejected": -14.330301284790039, - "step": 643 - }, - { - "epoch": 0.24, - "learning_rate": 1.74540094214296e-05, - "logits/chosen": -3.6327030658721924, - "logits/rejected": -6.041994094848633, - "logps/chosen": -205.18148803710938, - "logps/rejected": -70.79945373535156, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.14862060546875, - "rewards/margins": 4.264625072479248, - "rewards/rejected": -4.116004467010498, - "step": 644 - }, - { - "epoch": 0.24, - "learning_rate": 1.7446056356179145e-05, - "logits/chosen": -1.233156681060791, - "logits/rejected": -2.879155397415161, - "logps/chosen": -266.29205322265625, - "logps/rejected": -414.60784912109375, - "loss": 0.0188, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2966766357421875, - "rewards/margins": 16.91193962097168, - "rewards/rejected": -16.615262985229492, - "step": 645 - }, - { - "epoch": 0.24, - "learning_rate": 1.7438092706564527e-05, - "logits/chosen": -1.606119990348816, - "logits/rejected": -2.8255615234375, - "logps/chosen": -280.57110595703125, - "logps/rejected": -388.74658203125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4557128846645355, - "rewards/margins": 14.927692413330078, - "rewards/rejected": -14.471979141235352, - "step": 646 - }, - { - "epoch": 0.24, - "learning_rate": 1.743011848390585e-05, - "logits/chosen": -1.5382044315338135, - "logits/rejected": -1.8334705829620361, - "logps/chosen": -487.60888671875, - "logps/rejected": -618.751220703125, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5333709716796875, - "rewards/margins": 11.725634574890137, - "rewards/rejected": -12.259005546569824, - "step": 647 - }, - { - "epoch": 0.24, - "learning_rate": 1.742213369953826e-05, - "logits/chosen": -3.008532762527466, - "logits/rejected": -6.571540355682373, - "logps/chosen": -259.11138916015625, - "logps/rejected": -304.4980773925781, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.81640625, - "rewards/margins": 16.874265670776367, - "rewards/rejected": -16.057859420776367, - "step": 648 - }, - { - "epoch": 0.25, - "learning_rate": 1.74141383648119e-05, - "logits/chosen": -2.6308560371398926, - "logits/rejected": -0.8025249242782593, - "logps/chosen": -438.10504150390625, - "logps/rejected": -1186.93115234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.063909888267517, - "rewards/margins": 35.71025848388672, - "rewards/rejected": -36.774169921875, - "step": 649 - }, - { - "epoch": 0.25, - "learning_rate": 1.7406132491091922e-05, - "logits/chosen": -5.578739643096924, - "logits/rejected": -1.5422563552856445, - "logps/chosen": -497.98785400390625, - "logps/rejected": -1110.036376953125, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.270654320716858, - "rewards/margins": 10.285876274108887, - "rewards/rejected": -11.556530952453613, - "step": 650 - }, - { - "epoch": 0.25, - "learning_rate": 1.7398116089758454e-05, - "logits/chosen": -2.6152985095977783, - "logits/rejected": -6.606853485107422, - "logps/chosen": -676.7291259765625, - "logps/rejected": -551.9659423828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.37324219942092896, - "rewards/margins": 17.288654327392578, - "rewards/rejected": -17.661895751953125, - "step": 651 - }, - { - "epoch": 0.25, - "learning_rate": 1.7390089172206594e-05, - "logits/chosen": -3.9234859943389893, - "logits/rejected": -2.6594905853271484, - "logps/chosen": -347.507080078125, - "logps/rejected": -526.62158203125, - "loss": 0.0051, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3192627429962158, - "rewards/margins": 6.810635566711426, - "rewards/rejected": -5.491372585296631, - "step": 652 - }, - { - "epoch": 0.25, - "learning_rate": 1.7382051749846376e-05, - "logits/chosen": -7.418234825134277, - "logits/rejected": 0.194276362657547, - "logps/chosen": -544.8983764648438, - "logps/rejected": -5036.2138671875, - "loss": 0.0364, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4989686012268066, - "rewards/margins": 23.828474044799805, - "rewards/rejected": -26.327442169189453, - "step": 653 - }, - { - "epoch": 0.25, - "learning_rate": 1.737400383410278e-05, - "logits/chosen": -4.00059700012207, - "logits/rejected": -0.19530144333839417, - "logps/chosen": -610.3568115234375, - "logps/rejected": -1309.3865966796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8656005859375, - "rewards/margins": 28.570165634155273, - "rewards/rejected": -30.435766220092773, - "step": 654 - }, - { - "epoch": 0.25, - "learning_rate": 1.73659454364157e-05, - "logits/chosen": -4.506490707397461, - "logits/rejected": -4.619430065155029, - "logps/chosen": -452.5097961425781, - "logps/rejected": -635.1160888671875, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1781829595565796, - "rewards/margins": 11.146814346313477, - "rewards/rejected": -12.324996948242188, - "step": 655 - }, - { - "epoch": 0.25, - "learning_rate": 1.735787656823992e-05, - "logits/chosen": -1.525413990020752, - "logits/rejected": -2.8204712867736816, - "logps/chosen": -428.48016357421875, - "logps/rejected": -510.46990966796875, - "loss": 0.037, - "rewards/accuracies": 1.0, - "rewards/chosen": -9.099505424499512, - "rewards/margins": 9.30533504486084, - "rewards/rejected": -18.40484046936035, - "step": 656 - }, - { - "epoch": 0.25, - "learning_rate": 1.7349797241045115e-05, - "logits/chosen": -1.1284661293029785, - "logits/rejected": -2.3114497661590576, - "logps/chosen": -276.3497314453125, - "logps/rejected": -406.58642578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.118811011314392, - "rewards/margins": 16.886207580566406, - "rewards/rejected": -18.00501823425293, - "step": 657 - }, - { - "epoch": 0.25, - "learning_rate": 1.734170746631583e-05, - "logits/chosen": -6.107041358947754, - "logits/rejected": -2.8482630252838135, - "logps/chosen": -465.742431640625, - "logps/rejected": -1746.845458984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.5360169410705566, - "rewards/margins": 28.70015525817871, - "rewards/rejected": -32.23617172241211, - "step": 658 - }, - { - "epoch": 0.25, - "learning_rate": 1.7333607255551455e-05, - "logits/chosen": -5.604310512542725, - "logits/rejected": -1.229734182357788, - "logps/chosen": -460.3715515136719, - "logps/rejected": -1718.7154541015625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.288308709859848, - "rewards/margins": 22.790340423583984, - "rewards/rejected": -23.078649520874023, - "step": 659 - }, - { - "epoch": 0.25, - "learning_rate": 1.7325496620266216e-05, - "logits/chosen": -4.041955947875977, - "logits/rejected": -3.9457030296325684, - "logps/chosen": -237.9458465576172, - "logps/rejected": -560.9512939453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2628525495529175, - "rewards/margins": 25.46318244934082, - "rewards/rejected": -24.20033073425293, - "step": 660 - }, - { - "epoch": 0.25, - "learning_rate": 1.7317375571989158e-05, - "logits/chosen": -1.7954823970794678, - "logits/rejected": -6.3270158767700195, - "logps/chosen": -305.9674072265625, - "logps/rejected": -80.21904754638672, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09733276814222336, - "rewards/margins": 4.172031879425049, - "rewards/rejected": -4.0746989250183105, - "step": 661 - }, - { - "epoch": 0.25, - "learning_rate": 1.730924412226413e-05, - "logits/chosen": -5.378231048583984, - "logits/rejected": -3.5127172470092773, - "logps/chosen": -276.0059814453125, - "logps/rejected": -964.9885864257812, - "loss": 0.0272, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6604340076446533, - "rewards/margins": 15.208730697631836, - "rewards/rejected": -13.548296928405762, - "step": 662 - }, - { - "epoch": 0.25, - "learning_rate": 1.730110228264976e-05, - "logits/chosen": -6.0053229331970215, - "logits/rejected": -2.675398826599121, - "logps/chosen": -242.06602478027344, - "logps/rejected": -1052.033203125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.22454071044921875, - "rewards/margins": 25.942903518676758, - "rewards/rejected": -26.167444229125977, - "step": 663 - }, - { - "epoch": 0.25, - "learning_rate": 1.729295006471945e-05, - "logits/chosen": -3.877349376678467, - "logits/rejected": -4.186975479125977, - "logps/chosen": -393.15570068359375, - "logps/rejected": -663.8441772460938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6116577386856079, - "rewards/margins": 18.32368278503418, - "rewards/rejected": -17.712024688720703, - "step": 664 - }, - { - "epoch": 0.25, - "learning_rate": 1.728478748006136e-05, - "logits/chosen": -7.561477184295654, - "logits/rejected": -3.999514579772949, - "logps/chosen": -496.5167236328125, - "logps/rejected": -2194.84033203125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3365662097930908, - "rewards/margins": 22.887712478637695, - "rewards/rejected": -24.224279403686523, - "step": 665 - }, - { - "epoch": 0.25, - "learning_rate": 1.7276614540278368e-05, - "logits/chosen": -2.176785707473755, - "logits/rejected": -3.778254985809326, - "logps/chosen": -418.7000427246094, - "logps/rejected": -301.8463439941406, - "loss": 0.006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2320709228515625, - "rewards/margins": 6.2770538330078125, - "rewards/rejected": -6.509124755859375, - "step": 666 - }, - { - "epoch": 0.25, - "learning_rate": 1.726843125698809e-05, - "logits/chosen": -2.1555655002593994, - "logits/rejected": -2.7033848762512207, - "logps/chosen": -312.88433837890625, - "logps/rejected": -348.588623046875, - "loss": 0.0026, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.492767333984375, - "rewards/margins": 3.8591065406799316, - "rewards/rejected": -3.3663392066955566, - "step": 667 - }, - { - "epoch": 0.25, - "learning_rate": 1.726023764182284e-05, - "logits/chosen": -7.718526840209961, - "logits/rejected": -1.6335408687591553, - "logps/chosen": -225.19232177734375, - "logps/rejected": -1458.875732421875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.697839379310608, - "rewards/margins": 25.04107666015625, - "rewards/rejected": -26.738916397094727, - "step": 668 - }, - { - "epoch": 0.25, - "learning_rate": 1.725203370642961e-05, - "logits/chosen": -3.915968894958496, - "logits/rejected": -1.7060285806655884, - "logps/chosen": -460.2361145019531, - "logps/rejected": -1165.484130859375, - "loss": 0.019, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.724325656890869, - "rewards/margins": 16.986557006835938, - "rewards/rejected": -14.262231826782227, - "step": 669 - }, - { - "epoch": 0.25, - "learning_rate": 1.724381946247007e-05, - "logits/chosen": -1.4632774591445923, - "logits/rejected": -1.2713277339935303, - "logps/chosen": -256.00946044921875, - "logps/rejected": -399.7527160644531, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8116104006767273, - "rewards/margins": 7.383467674255371, - "rewards/rejected": -8.195077896118164, - "step": 670 - }, - { - "epoch": 0.25, - "learning_rate": 1.7235594921620546e-05, - "logits/chosen": -3.467059850692749, - "logits/rejected": -6.370800495147705, - "logps/chosen": -458.33660888671875, - "logps/rejected": -95.6766128540039, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.625082492828369, - "rewards/margins": 8.856714248657227, - "rewards/rejected": -6.231631755828857, - "step": 671 - }, - { - "epoch": 0.25, - "learning_rate": 1.7227360095571992e-05, - "logits/chosen": -7.9844651222229, - "logits/rejected": -3.4122095108032227, - "logps/chosen": -244.75006103515625, - "logps/rejected": -2003.67822265625, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3159439265727997, - "rewards/margins": 9.37885856628418, - "rewards/rejected": -9.062914848327637, - "step": 672 - }, - { - "epoch": 0.25, - "learning_rate": 1.721911499602999e-05, - "logits/chosen": -2.6655774116516113, - "logits/rejected": -2.33178448677063, - "logps/chosen": -321.5491943359375, - "logps/rejected": -587.204833984375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4501098692417145, - "rewards/margins": 8.394842147827148, - "rewards/rejected": -7.944732666015625, - "step": 673 - }, - { - "epoch": 0.25, - "learning_rate": 1.721085963471472e-05, - "logits/chosen": -4.816535472869873, - "logits/rejected": -4.671488285064697, - "logps/chosen": -341.3226318359375, - "logps/rejected": -477.1842956542969, - "loss": 0.0394, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.214599609375, - "rewards/margins": 1.9037842750549316, - "rewards/rejected": -2.1183838844299316, - "step": 674 - }, - { - "epoch": 0.26, - "learning_rate": 1.720259402336095e-05, - "logits/chosen": -3.264086961746216, - "logits/rejected": -1.7249469757080078, - "logps/chosen": -332.5718078613281, - "logps/rejected": -756.4112548828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.21798400580883026, - "rewards/margins": 14.970138549804688, - "rewards/rejected": -14.752154350280762, - "step": 675 - }, - { - "epoch": 0.26, - "learning_rate": 1.719431817371802e-05, - "logits/chosen": -4.147507667541504, - "logits/rejected": -6.148311138153076, - "logps/chosen": -309.98193359375, - "logps/rejected": -287.96539306640625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18107910454273224, - "rewards/margins": 15.251852989196777, - "rewards/rejected": -15.432931900024414, - "step": 676 - }, - { - "epoch": 0.26, - "learning_rate": 1.7186032097549822e-05, - "logits/chosen": -3.3798635005950928, - "logits/rejected": -0.6999723315238953, - "logps/chosen": -381.7336120605469, - "logps/rejected": -815.008544921875, - "loss": 0.0055, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.08869323879480362, - "rewards/margins": 10.21105670928955, - "rewards/rejected": -10.122363090515137, - "step": 677 - }, - { - "epoch": 0.26, - "learning_rate": 1.717773580663479e-05, - "logits/chosen": -0.750411331653595, - "logits/rejected": -1.4365054368972778, - "logps/chosen": -498.19683837890625, - "logps/rejected": -669.1280517578125, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3713440001010895, - "rewards/margins": 15.818804740905762, - "rewards/rejected": -15.447461128234863, - "step": 678 - }, - { - "epoch": 0.26, - "learning_rate": 1.7169429312765863e-05, - "logits/chosen": -2.0954372882843018, - "logits/rejected": -4.320059299468994, - "logps/chosen": -487.8276672363281, - "logps/rejected": -397.45361328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.45050048828125, - "rewards/margins": 10.534875869750977, - "rewards/rejected": -11.985376358032227, - "step": 679 - }, - { - "epoch": 0.26, - "learning_rate": 1.7161112627750503e-05, - "logits/chosen": -7.169397354125977, - "logits/rejected": -2.2640182971954346, - "logps/chosen": -341.2708435058594, - "logps/rejected": -1489.523193359375, - "loss": 0.0072, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7482361197471619, - "rewards/margins": 13.632489204406738, - "rewards/rejected": -12.88425350189209, - "step": 680 - }, - { - "epoch": 0.26, - "learning_rate": 1.7152785763410648e-05, - "logits/chosen": -5.968217849731445, - "logits/rejected": -0.852644681930542, - "logps/chosen": -289.194091796875, - "logps/rejected": -1303.7685546875, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.928570568561554, - "rewards/margins": 22.985490798950195, - "rewards/rejected": -22.056921005249023, - "step": 681 - }, - { - "epoch": 0.26, - "learning_rate": 1.7144448731582698e-05, - "logits/chosen": -7.365390777587891, - "logits/rejected": -1.1501580476760864, - "logps/chosen": -316.4031982421875, - "logps/rejected": -3049.7783203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.48590394854545593, - "rewards/margins": 33.553802490234375, - "rewards/rejected": -33.06789779663086, - "step": 682 - }, - { - "epoch": 0.26, - "learning_rate": 1.7136101544117526e-05, - "logits/chosen": -1.4336435794830322, - "logits/rejected": -5.288978099822998, - "logps/chosen": -241.58567810058594, - "logps/rejected": -155.36366271972656, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0369430780410767, - "rewards/margins": 6.893486022949219, - "rewards/rejected": -7.930428981781006, - "step": 683 - }, - { - "epoch": 0.26, - "learning_rate": 1.712774421288042e-05, - "logits/chosen": -3.3552134037017822, - "logits/rejected": -2.7561256885528564, - "logps/chosen": -208.76080322265625, - "logps/rejected": -524.01953125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2493561506271362, - "rewards/margins": 10.339630126953125, - "rewards/rejected": -9.0902738571167, - "step": 684 - }, - { - "epoch": 0.26, - "learning_rate": 1.71193767497511e-05, - "logits/chosen": -7.108851909637451, - "logits/rejected": -1.5226227045059204, - "logps/chosen": -223.05796813964844, - "logps/rejected": -1132.6387939453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6555420160293579, - "rewards/margins": 10.01773738861084, - "rewards/rejected": -9.362195014953613, - "step": 685 - }, - { - "epoch": 0.26, - "learning_rate": 1.7110999166623683e-05, - "logits/chosen": -2.2619681358337402, - "logits/rejected": -5.361622333526611, - "logps/chosen": -201.83566284179688, - "logps/rejected": -185.79730224609375, - "loss": 0.0032, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8291824460029602, - "rewards/margins": 11.550539016723633, - "rewards/rejected": -10.721356391906738, - "step": 686 - }, - { - "epoch": 0.26, - "learning_rate": 1.7102611475406676e-05, - "logits/chosen": -7.384420394897461, - "logits/rejected": -1.423459529876709, - "logps/chosen": -336.954833984375, - "logps/rejected": -1533.978759765625, - "loss": 0.0073, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0307098627090454, - "rewards/margins": 8.773733139038086, - "rewards/rejected": -9.804443359375, - "step": 687 - }, - { - "epoch": 0.26, - "learning_rate": 1.7094213688022947e-05, - "logits/chosen": -2.1432104110717773, - "logits/rejected": -2.165012836456299, - "logps/chosen": -356.0462341308594, - "logps/rejected": -441.36322021484375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2070770263671875, - "rewards/margins": 8.040692329406738, - "rewards/rejected": -8.247769355773926, - "step": 688 - }, - { - "epoch": 0.26, - "learning_rate": 1.7085805816409723e-05, - "logits/chosen": -3.2450180053710938, - "logits/rejected": -6.16736364364624, - "logps/chosen": -289.12799072265625, - "logps/rejected": -189.16525268554688, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8650116324424744, - "rewards/margins": 7.747148036956787, - "rewards/rejected": -8.612159729003906, - "step": 689 - }, - { - "epoch": 0.26, - "learning_rate": 1.707738787251856e-05, - "logits/chosen": -5.492600917816162, - "logits/rejected": -1.0638905763626099, - "logps/chosen": -558.4315795898438, - "logps/rejected": -1561.69189453125, - "loss": 0.0023, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2475708723068237, - "rewards/margins": 12.42535400390625, - "rewards/rejected": -13.672924995422363, - "step": 690 - }, - { - "epoch": 0.26, - "learning_rate": 1.7068959868315334e-05, - "logits/chosen": -5.063064098358154, - "logits/rejected": -1.4261212348937988, - "logps/chosen": -426.5548095703125, - "logps/rejected": -986.6898803710938, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.225738525390625, - "rewards/margins": 19.858814239501953, - "rewards/rejected": -22.084552764892578, - "step": 691 - }, - { - "epoch": 0.26, - "learning_rate": 1.7060521815780225e-05, - "logits/chosen": -1.0726594924926758, - "logits/rejected": -2.78525710105896, - "logps/chosen": -546.0614013671875, - "logps/rejected": -664.278076171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.33148193359375, - "rewards/margins": 19.494979858398438, - "rewards/rejected": -17.163497924804688, - "step": 692 - }, - { - "epoch": 0.26, - "learning_rate": 1.705207372690769e-05, - "logits/chosen": -3.9307587146759033, - "logits/rejected": -4.1442365646362305, - "logps/chosen": -496.78515625, - "logps/rejected": -673.98876953125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.057586669921875, - "rewards/margins": 11.817193984985352, - "rewards/rejected": -11.874780654907227, - "step": 693 - }, - { - "epoch": 0.26, - "learning_rate": 1.7043615613706448e-05, - "logits/chosen": -6.248693943023682, - "logits/rejected": -1.0073909759521484, - "logps/chosen": -286.62060546875, - "logps/rejected": -1209.8709716796875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.985064685344696, - "rewards/margins": 15.539593696594238, - "rewards/rejected": -14.554529190063477, - "step": 694 - }, - { - "epoch": 0.26, - "learning_rate": 1.703514748819948e-05, - "logits/chosen": -7.600813865661621, - "logits/rejected": -3.583566427230835, - "logps/chosen": -277.004638671875, - "logps/rejected": -1249.855224609375, - "loss": 0.0029, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5608184933662415, - "rewards/margins": 21.61245346069336, - "rewards/rejected": -21.0516357421875, - "step": 695 - }, - { - "epoch": 0.26, - "learning_rate": 1.7026669362423995e-05, - "logits/chosen": -1.6758710145950317, - "logits/rejected": -5.313637733459473, - "logps/chosen": -171.49398803710938, - "logps/rejected": -134.11346435546875, - "loss": 0.0045, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2915467023849487, - "rewards/margins": 3.338815212249756, - "rewards/rejected": -4.630362033843994, - "step": 696 - }, - { - "epoch": 0.26, - "learning_rate": 1.7018181248431416e-05, - "logits/chosen": -1.57595694065094, - "logits/rejected": -1.7519766092300415, - "logps/chosen": -300.9286804199219, - "logps/rejected": -302.22259521484375, - "loss": 0.0186, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.24215392768383026, - "rewards/margins": 1.89080810546875, - "rewards/rejected": -1.6486542224884033, - "step": 697 - }, - { - "epoch": 0.26, - "learning_rate": 1.700968315828736e-05, - "logits/chosen": -3.732140064239502, - "logits/rejected": -0.7815713286399841, - "logps/chosen": -406.01483154296875, - "logps/rejected": -1237.029296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6373047232627869, - "rewards/margins": 32.93692398071289, - "rewards/rejected": -32.299617767333984, - "step": 698 - }, - { - "epoch": 0.26, - "learning_rate": 1.7001175104071625e-05, - "logits/chosen": -1.5581010580062866, - "logits/rejected": -2.671450138092041, - "logps/chosen": -318.7662658691406, - "logps/rejected": -395.1690673828125, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.37240907549858093, - "rewards/margins": 6.803094387054443, - "rewards/rejected": -6.430685520172119, - "step": 699 - }, - { - "epoch": 0.26, - "learning_rate": 1.6992657097878184e-05, - "logits/chosen": -2.8144049644470215, - "logits/rejected": -1.615714192390442, - "logps/chosen": -282.4354248046875, - "logps/rejected": -522.515380859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6378418207168579, - "rewards/margins": 13.842307090759277, - "rewards/rejected": -13.20446491241455, - "step": 700 - }, - { - "epoch": 0.27, - "learning_rate": 1.6984129151815147e-05, - "logits/chosen": -5.551654815673828, - "logits/rejected": -1.8232406377792358, - "logps/chosen": -421.0106201171875, - "logps/rejected": -1204.8480224609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.059423804283142, - "rewards/margins": 9.827564239501953, - "rewards/rejected": -10.886987686157227, - "step": 701 - }, - { - "epoch": 0.27, - "learning_rate": 1.6975591278004747e-05, - "logits/chosen": -5.32221794128418, - "logits/rejected": -4.646737575531006, - "logps/chosen": -239.39906311035156, - "logps/rejected": -307.70245361328125, - "loss": 0.007, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.992961406707764, - "rewards/margins": 2.872201442718506, - "rewards/rejected": -7.8651628494262695, - "step": 702 - }, - { - "epoch": 0.27, - "learning_rate": 1.6967043488583342e-05, - "logits/chosen": -6.248621940612793, - "logits/rejected": -6.5932769775390625, - "logps/chosen": -185.95118713378906, - "logps/rejected": -441.3093566894531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5526139736175537, - "rewards/margins": 9.839055061340332, - "rewards/rejected": -12.391669273376465, - "step": 703 - }, - { - "epoch": 0.27, - "learning_rate": 1.695848579570138e-05, - "logits/chosen": -1.825717568397522, - "logits/rejected": -2.1628971099853516, - "logps/chosen": -190.9292449951172, - "logps/rejected": -337.6517333984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6116974353790283, - "rewards/margins": 14.94410228729248, - "rewards/rejected": -13.332405090332031, - "step": 704 - }, - { - "epoch": 0.27, - "learning_rate": 1.6949918211523386e-05, - "logits/chosen": -2.4471869468688965, - "logits/rejected": -1.4813789129257202, - "logps/chosen": -454.0907287597656, - "logps/rejected": -728.9014282226562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.829821765422821, - "rewards/margins": 11.780120849609375, - "rewards/rejected": -12.609942436218262, - "step": 705 - }, - { - "epoch": 0.27, - "learning_rate": 1.6941340748227942e-05, - "logits/chosen": -2.9255776405334473, - "logits/rejected": -4.513108253479004, - "logps/chosen": -699.802978515625, - "logps/rejected": -180.8344268798828, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.322509765625, - "rewards/margins": 9.209749221801758, - "rewards/rejected": -8.887239456176758, - "step": 706 - }, - { - "epoch": 0.27, - "learning_rate": 1.6932753418007683e-05, - "logits/chosen": -3.1287879943847656, - "logits/rejected": -2.4017393589019775, - "logps/chosen": -355.36920166015625, - "logps/rejected": -630.623779296875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6114104986190796, - "rewards/margins": 13.431191444396973, - "rewards/rejected": -11.819781303405762, - "step": 707 - }, - { - "epoch": 0.27, - "learning_rate": 1.6924156233069253e-05, - "logits/chosen": -2.595036745071411, - "logits/rejected": -2.4733176231384277, - "logps/chosen": -187.68057250976562, - "logps/rejected": -570.1896362304688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7045349478721619, - "rewards/margins": 18.824390411376953, - "rewards/rejected": -19.5289249420166, - "step": 708 - }, - { - "epoch": 0.27, - "learning_rate": 1.691554920563332e-05, - "logits/chosen": -7.1791582107543945, - "logits/rejected": -1.1827946901321411, - "logps/chosen": -566.02734375, - "logps/rejected": -2538.2119140625, - "loss": 0.0881, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.619903564453125, - "rewards/margins": 25.36834144592285, - "rewards/rejected": -23.748437881469727, - "step": 709 - }, - { - "epoch": 0.27, - "learning_rate": 1.690693234793453e-05, - "logits/chosen": -6.728582859039307, - "logits/rejected": -3.498844623565674, - "logps/chosen": -600.5430908203125, - "logps/rejected": -3323.6728515625, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1414246559143066, - "rewards/margins": 37.67555618286133, - "rewards/rejected": -35.53413009643555, - "step": 710 - }, - { - "epoch": 0.27, - "learning_rate": 1.6898305672221523e-05, - "logits/chosen": -1.803963541984558, - "logits/rejected": -2.905219554901123, - "logps/chosen": -357.8081359863281, - "logps/rejected": -411.31646728515625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8502228260040283, - "rewards/margins": 21.934776306152344, - "rewards/rejected": -23.78499984741211, - "step": 711 - }, - { - "epoch": 0.27, - "learning_rate": 1.688966919075687e-05, - "logits/chosen": -7.886115550994873, - "logits/rejected": -1.848327398300171, - "logps/chosen": -248.46701049804688, - "logps/rejected": -1334.9296875, - "loss": 0.0048, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5108475089073181, - "rewards/margins": 14.790901184082031, - "rewards/rejected": -14.280054092407227, - "step": 712 - }, - { - "epoch": 0.27, - "learning_rate": 1.6881022915817088e-05, - "logits/chosen": -4.47295618057251, - "logits/rejected": -2.132091522216797, - "logps/chosen": -193.4853515625, - "logps/rejected": -857.948486328125, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.532911777496338, - "rewards/margins": 31.60203742980957, - "rewards/rejected": -34.13494873046875, - "step": 713 - }, - { - "epoch": 0.27, - "learning_rate": 1.687236685969263e-05, - "logits/chosen": -5.270910263061523, - "logits/rejected": -0.6704632043838501, - "logps/chosen": -467.14324951171875, - "logps/rejected": -1213.954345703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4516724348068237, - "rewards/margins": 17.327966690063477, - "rewards/rejected": -18.779638290405273, - "step": 714 - }, - { - "epoch": 0.27, - "learning_rate": 1.686370103468783e-05, - "logits/chosen": -1.5605521202087402, - "logits/rejected": -0.39452898502349854, - "logps/chosen": -685.9468994140625, - "logps/rejected": -1011.4760131835938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.67724609375, - "rewards/margins": 10.851788520812988, - "rewards/rejected": -9.174542427062988, - "step": 715 - }, - { - "epoch": 0.27, - "learning_rate": 1.6855025453120935e-05, - "logits/chosen": -4.035589218139648, - "logits/rejected": -2.326416254043579, - "logps/chosen": -263.6562805175781, - "logps/rejected": -571.73974609375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6136810183525085, - "rewards/margins": 11.6162109375, - "rewards/rejected": -12.229891777038574, - "step": 716 - }, - { - "epoch": 0.27, - "learning_rate": 1.684634012732403e-05, - "logits/chosen": -0.8075994253158569, - "logits/rejected": -2.757901191711426, - "logps/chosen": -433.3796081542969, - "logps/rejected": -428.50762939453125, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5488921999931335, - "rewards/margins": 13.491131782531738, - "rewards/rejected": -14.040023803710938, - "step": 717 - }, - { - "epoch": 0.27, - "learning_rate": 1.6837645069643075e-05, - "logits/chosen": -2.2662925720214844, - "logits/rejected": -0.9995133876800537, - "logps/chosen": -403.158203125, - "logps/rejected": -828.2935180664062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1633728742599487, - "rewards/margins": 20.727832794189453, - "rewards/rejected": -21.891204833984375, - "step": 718 - }, - { - "epoch": 0.27, - "learning_rate": 1.682894029243785e-05, - "logits/chosen": -2.4700393676757812, - "logits/rejected": -0.8557465672492981, - "logps/chosen": -221.49099731445312, - "logps/rejected": -378.8505859375, - "loss": 0.0873, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.325636386871338, - "rewards/margins": 5.263758659362793, - "rewards/rejected": -7.589395046234131, - "step": 719 - }, - { - "epoch": 0.27, - "learning_rate": 1.682022580808196e-05, - "logits/chosen": -5.197132110595703, - "logits/rejected": -4.135492324829102, - "logps/chosen": -379.8403015136719, - "logps/rejected": -861.22802734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.41915589570999146, - "rewards/margins": 16.25882911682129, - "rewards/rejected": -16.6779842376709, - "step": 720 - }, - { - "epoch": 0.27, - "learning_rate": 1.6811501628962807e-05, - "logits/chosen": -2.0057950019836426, - "logits/rejected": -5.599956512451172, - "logps/chosen": -420.8204345703125, - "logps/rejected": -174.01870727539062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8718994855880737, - "rewards/margins": 7.913557052612305, - "rewards/rejected": -9.785456657409668, - "step": 721 - }, - { - "epoch": 0.27, - "learning_rate": 1.680276776748157e-05, - "logits/chosen": -5.410819053649902, - "logits/rejected": -0.7267279624938965, - "logps/chosen": -654.6044921875, - "logps/rejected": -1328.849853515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08795776218175888, - "rewards/margins": 14.531720161437988, - "rewards/rejected": -14.619677543640137, - "step": 722 - }, - { - "epoch": 0.27, - "learning_rate": 1.6794024236053186e-05, - "logits/chosen": -3.508352041244507, - "logits/rejected": -1.7799149751663208, - "logps/chosen": -357.1905212402344, - "logps/rejected": -954.135986328125, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8022400140762329, - "rewards/margins": 29.645551681518555, - "rewards/rejected": -28.843311309814453, - "step": 723 - }, - { - "epoch": 0.27, - "learning_rate": 1.6785271047106352e-05, - "logits/chosen": -1.3170005083084106, - "logits/rejected": -5.732455730438232, - "logps/chosen": -250.426025390625, - "logps/rejected": -230.35177612304688, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4114990234375, - "rewards/margins": 14.54101276397705, - "rewards/rejected": -13.12951374053955, - "step": 724 - }, - { - "epoch": 0.27, - "learning_rate": 1.6776508213083484e-05, - "logits/chosen": -2.7980289459228516, - "logits/rejected": -3.203319787979126, - "logps/chosen": -402.34808349609375, - "logps/rejected": -499.70672607421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.043478488922119, - "rewards/margins": 10.313405990600586, - "rewards/rejected": -13.356884956359863, - "step": 725 - }, - { - "epoch": 0.27, - "learning_rate": 1.6767735746440705e-05, - "logits/chosen": -1.7136080265045166, - "logits/rejected": -1.9550880193710327, - "logps/chosen": -619.388671875, - "logps/rejected": -906.086181640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.104028224945068, - "rewards/margins": 21.845928192138672, - "rewards/rejected": -27.9499568939209, - "step": 726 - }, - { - "epoch": 0.27, - "learning_rate": 1.6758953659647838e-05, - "logits/chosen": -2.4055259227752686, - "logits/rejected": -4.056077003479004, - "logps/chosen": -274.1620178222656, - "logps/rejected": -343.8797302246094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2727478742599487, - "rewards/margins": 13.714673042297363, - "rewards/rejected": -14.987421035766602, - "step": 727 - }, - { - "epoch": 0.28, - "learning_rate": 1.6750161965188376e-05, - "logits/chosen": -5.345430374145508, - "logits/rejected": -4.038645267486572, - "logps/chosen": -653.8165893554688, - "logps/rejected": -1444.276123046875, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.0512757301330566, - "rewards/margins": 12.374738693237305, - "rewards/rejected": -9.32346248626709, - "step": 728 - }, - { - "epoch": 0.28, - "learning_rate": 1.6741360675559475e-05, - "logits/chosen": -1.0482676029205322, - "logits/rejected": -3.0113179683685303, - "logps/chosen": -376.4202880859375, - "logps/rejected": -433.37860107421875, - "loss": 0.0093, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09488525241613388, - "rewards/margins": 25.203838348388672, - "rewards/rejected": -25.10895347595215, - "step": 729 - }, - { - "epoch": 0.28, - "learning_rate": 1.6732549803271922e-05, - "logits/chosen": -1.29671311378479, - "logits/rejected": -6.392415523529053, - "logps/chosen": -280.53192138671875, - "logps/rejected": -138.61099243164062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.14643554389476776, - "rewards/margins": 8.812490463256836, - "rewards/rejected": -8.9589262008667, - "step": 730 - }, - { - "epoch": 0.28, - "learning_rate": 1.672372936085013e-05, - "logits/chosen": -4.837675094604492, - "logits/rejected": -2.7766854763031006, - "logps/chosen": -259.9169616699219, - "logps/rejected": -614.4256591796875, - "loss": 0.0897, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.39731141924858093, - "rewards/margins": 14.975540161132812, - "rewards/rejected": -14.578228950500488, - "step": 731 - }, - { - "epoch": 0.28, - "learning_rate": 1.6714899360832118e-05, - "logits/chosen": -0.9481593370437622, - "logits/rejected": -1.4933174848556519, - "logps/chosen": -505.4759216308594, - "logps/rejected": -794.1666259765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.12649230659008026, - "rewards/margins": 23.96952247619629, - "rewards/rejected": -24.09601402282715, - "step": 732 - }, - { - "epoch": 0.28, - "learning_rate": 1.6706059815769483e-05, - "logits/chosen": -3.2809460163116455, - "logits/rejected": -2.5982532501220703, - "logps/chosen": -304.6788635253906, - "logps/rejected": -754.1346435546875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.008306860923767, - "rewards/margins": 24.444599151611328, - "rewards/rejected": -25.452905654907227, - "step": 733 - }, - { - "epoch": 0.28, - "learning_rate": 1.6697210738227402e-05, - "logits/chosen": -2.268296003341675, - "logits/rejected": -2.650717258453369, - "logps/chosen": -317.7857666015625, - "logps/rejected": -607.4154052734375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03351440653204918, - "rewards/margins": 21.807987213134766, - "rewards/rejected": -21.841501235961914, - "step": 734 - }, - { - "epoch": 0.28, - "learning_rate": 1.6688352140784587e-05, - "logits/chosen": -1.9802178144454956, - "logits/rejected": -3.751408338546753, - "logps/chosen": -492.43121337890625, - "logps/rejected": -499.94830322265625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.6364166736602783, - "rewards/margins": 10.835989952087402, - "rewards/rejected": -13.472406387329102, - "step": 735 - }, - { - "epoch": 0.28, - "learning_rate": 1.6679484036033295e-05, - "logits/chosen": -1.0307416915893555, - "logits/rejected": -3.869518518447876, - "logps/chosen": -913.45458984375, - "logps/rejected": -893.57861328125, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8814697265625, - "rewards/margins": 31.51312255859375, - "rewards/rejected": -32.39459228515625, - "step": 736 - }, - { - "epoch": 0.28, - "learning_rate": 1.667060643657929e-05, - "logits/chosen": -1.4882934093475342, - "logits/rejected": -4.647378444671631, - "logps/chosen": -424.96099853515625, - "logps/rejected": -166.98245239257812, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.384930372238159, - "rewards/margins": 6.697458267211914, - "rewards/rejected": -10.082388877868652, - "step": 737 - }, - { - "epoch": 0.28, - "learning_rate": 1.6661719355041838e-05, - "logits/chosen": -2.9079060554504395, - "logits/rejected": -3.5321450233459473, - "logps/chosen": -294.7632751464844, - "logps/rejected": -483.8546142578125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.146737813949585, - "rewards/margins": 17.9483585357666, - "rewards/rejected": -20.095096588134766, - "step": 738 - }, - { - "epoch": 0.28, - "learning_rate": 1.6652822804053683e-05, - "logits/chosen": -0.7997777462005615, - "logits/rejected": -1.5887805223464966, - "logps/chosen": -214.48959350585938, - "logps/rejected": -614.5391235351562, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.196420431137085, - "rewards/margins": 30.649213790893555, - "rewards/rejected": -33.84563446044922, - "step": 739 - }, - { - "epoch": 0.28, - "learning_rate": 1.6643916796261025e-05, - "logits/chosen": -4.012192249298096, - "logits/rejected": -1.0709916353225708, - "logps/chosen": -262.1194152832031, - "logps/rejected": -1112.0849609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2436065673828125, - "rewards/margins": 44.176517486572266, - "rewards/rejected": -45.42012405395508, - "step": 740 - }, - { - "epoch": 0.28, - "learning_rate": 1.6635001344323506e-05, - "logits/chosen": -3.5924742221832275, - "logits/rejected": -2.5488359928131104, - "logps/chosen": -627.5142822265625, - "logps/rejected": -1043.454345703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.773303270339966, - "rewards/margins": 15.912549018859863, - "rewards/rejected": -18.68585205078125, - "step": 741 - }, - { - "epoch": 0.28, - "learning_rate": 1.66260764609142e-05, - "logits/chosen": -3.302394390106201, - "logits/rejected": -7.155035972595215, - "logps/chosen": -397.827392578125, - "logps/rejected": -196.67816162109375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19298096001148224, - "rewards/margins": 5.532763957977295, - "rewards/rejected": -5.725744724273682, - "step": 742 - }, - { - "epoch": 0.28, - "learning_rate": 1.6617142158719577e-05, - "logits/chosen": -5.042423248291016, - "logits/rejected": -1.0992001295089722, - "logps/chosen": -741.4293212890625, - "logps/rejected": -2014.248291015625, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.788293480873108, - "rewards/margins": 43.12869644165039, - "rewards/rejected": -41.34040451049805, - "step": 743 - }, - { - "epoch": 0.28, - "learning_rate": 1.660819845043951e-05, - "logits/chosen": -1.4309355020523071, - "logits/rejected": -2.14861798286438, - "logps/chosen": -323.4967041015625, - "logps/rejected": -451.2830810546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.948449730873108, - "rewards/margins": 11.409067153930664, - "rewards/rejected": -9.460617065429688, - "step": 744 - }, - { - "epoch": 0.28, - "learning_rate": 1.659924534878723e-05, - "logits/chosen": -7.259391784667969, - "logits/rejected": -1.8473713397979736, - "logps/chosen": -305.21697998046875, - "logps/rejected": -1409.363037109375, - "loss": 0.009, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.480917364358902, - "rewards/margins": 17.977279663085938, - "rewards/rejected": -17.496362686157227, - "step": 745 - }, - { - "epoch": 0.28, - "learning_rate": 1.659028286648932e-05, - "logits/chosen": -6.477328300476074, - "logits/rejected": -1.2379205226898193, - "logps/chosen": -512.1973266601562, - "logps/rejected": -1650.969482421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.487542748451233, - "rewards/margins": 17.61212921142578, - "rewards/rejected": -16.12458610534668, - "step": 746 - }, - { - "epoch": 0.28, - "learning_rate": 1.658131101628571e-05, - "logits/chosen": -1.008010745048523, - "logits/rejected": -6.7385125160217285, - "logps/chosen": -524.937744140625, - "logps/rejected": -139.02023315429688, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.969097912311554, - "rewards/margins": 9.068971633911133, - "rewards/rejected": -10.038069725036621, - "step": 747 - }, - { - "epoch": 0.28, - "learning_rate": 1.6572329810929635e-05, - "logits/chosen": -4.521857261657715, - "logits/rejected": -6.178060531616211, - "logps/chosen": -338.6219482421875, - "logps/rejected": -215.32589721679688, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4313751459121704, - "rewards/margins": 9.910443305969238, - "rewards/rejected": -8.4790678024292, - "step": 748 - }, - { - "epoch": 0.28, - "learning_rate": 1.656333926318763e-05, - "logits/chosen": -4.0655083656311035, - "logits/rejected": -1.6797527074813843, - "logps/chosen": -440.51837158203125, - "logps/rejected": -1016.5387573242188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.08831787109375, - "rewards/margins": 22.128009796142578, - "rewards/rejected": -22.039691925048828, - "step": 749 - }, - { - "epoch": 0.28, - "learning_rate": 1.655433938583951e-05, - "logits/chosen": -1.4807703495025635, - "logits/rejected": -2.9845468997955322, - "logps/chosen": -290.8880615234375, - "logps/rejected": -350.45867919921875, - "loss": 0.0162, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.710174560546875, - "rewards/margins": 23.727344512939453, - "rewards/rejected": -22.017169952392578, - "step": 750 - }, - { - "epoch": 0.28, - "learning_rate": 1.6545330191678356e-05, - "logits/chosen": -3.7606027126312256, - "logits/rejected": -1.4596397876739502, - "logps/chosen": -296.53680419921875, - "logps/rejected": -730.53662109375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.24176636338233948, - "rewards/margins": 17.2329044342041, - "rewards/rejected": -17.47467041015625, - "step": 751 - }, - { - "epoch": 0.28, - "learning_rate": 1.653631169351049e-05, - "logits/chosen": -3.171327590942383, - "logits/rejected": -4.459228515625, - "logps/chosen": -540.5325317382812, - "logps/rejected": -852.0968017578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.46794435381889343, - "rewards/margins": 27.913354873657227, - "rewards/rejected": -28.38129997253418, - "step": 752 - }, - { - "epoch": 0.28, - "learning_rate": 1.6527283904155457e-05, - "logits/chosen": -3.0404584407806396, - "logits/rejected": -5.81345272064209, - "logps/chosen": -416.8873291015625, - "logps/rejected": -287.58050537109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.413433790206909, - "rewards/margins": 8.75798511505127, - "rewards/rejected": -12.171419143676758, - "step": 753 - }, - { - "epoch": 0.29, - "learning_rate": 1.651824683644601e-05, - "logits/chosen": -2.629298686981201, - "logits/rejected": -4.151940822601318, - "logps/chosen": -317.50360107421875, - "logps/rejected": -403.55291748046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.09168090671300888, - "rewards/margins": 16.478641510009766, - "rewards/rejected": -16.570322036743164, - "step": 754 - }, - { - "epoch": 0.29, - "learning_rate": 1.6509200503228092e-05, - "logits/chosen": -3.084336280822754, - "logits/rejected": -5.623964309692383, - "logps/chosen": -296.84405517578125, - "logps/rejected": -356.5215759277344, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0062286853790283, - "rewards/margins": 6.187276840209961, - "rewards/rejected": -7.193505764007568, - "step": 755 - }, - { - "epoch": 0.29, - "learning_rate": 1.650014491736082e-05, - "logits/chosen": -1.5668869018554688, - "logits/rejected": -6.7708024978637695, - "logps/chosen": -793.7401123046875, - "logps/rejected": -366.92572021484375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.908496081829071, - "rewards/margins": 14.652948379516602, - "rewards/rejected": -13.744452476501465, - "step": 756 - }, - { - "epoch": 0.29, - "learning_rate": 1.6491080091716457e-05, - "logits/chosen": -3.1339025497436523, - "logits/rejected": -4.423862457275391, - "logps/chosen": -429.38623046875, - "logps/rejected": -464.411865234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01855163648724556, - "rewards/margins": 17.533655166625977, - "rewards/rejected": -17.55220603942871, - "step": 757 - }, - { - "epoch": 0.29, - "learning_rate": 1.6482006039180406e-05, - "logits/chosen": -8.536022186279297, - "logits/rejected": -0.9206287860870361, - "logps/chosen": -320.07550048828125, - "logps/rejected": -3055.25, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3215759992599487, - "rewards/margins": 42.05509567260742, - "rewards/rejected": -40.7335205078125, - "step": 758 - }, - { - "epoch": 0.29, - "learning_rate": 1.6472922772651182e-05, - "logits/chosen": -3.7859623432159424, - "logits/rejected": -3.0358211994171143, - "logps/chosen": -389.77642822265625, - "logps/rejected": -724.1123046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05036010965704918, - "rewards/margins": 15.816608428955078, - "rewards/rejected": -15.866968154907227, - "step": 759 - }, - { - "epoch": 0.29, - "learning_rate": 1.6463830305040395e-05, - "logits/chosen": -6.790244102478027, - "logits/rejected": -2.921854019165039, - "logps/chosen": -523.9609375, - "logps/rejected": -1951.5819091796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.185797095298767, - "rewards/margins": 11.404499053955078, - "rewards/rejected": -12.590295791625977, - "step": 760 - }, - { - "epoch": 0.29, - "learning_rate": 1.6454728649272743e-05, - "logits/chosen": -6.677464485168457, - "logits/rejected": -3.0527684688568115, - "logps/chosen": -299.6720275878906, - "logps/rejected": -1250.169921875, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9979614615440369, - "rewards/margins": 18.269445419311523, - "rewards/rejected": -17.271484375, - "step": 761 - }, - { - "epoch": 0.29, - "learning_rate": 1.6445617818285974e-05, - "logits/chosen": -3.1088364124298096, - "logits/rejected": -2.1398839950561523, - "logps/chosen": -351.7869873046875, - "logps/rejected": -507.45794677734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2864623963832855, - "rewards/margins": 8.892786026000977, - "rewards/rejected": -8.6063232421875, - "step": 762 - }, - { - "epoch": 0.29, - "learning_rate": 1.6436497825030886e-05, - "logits/chosen": -4.2516961097717285, - "logits/rejected": -1.795404314994812, - "logps/chosen": -314.21380615234375, - "logps/rejected": -731.7727661132812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.9144043922424316, - "rewards/margins": 18.177021026611328, - "rewards/rejected": -21.0914249420166, - "step": 763 - }, - { - "epoch": 0.29, - "learning_rate": 1.6427368682471302e-05, - "logits/chosen": -3.6858198642730713, - "logits/rejected": -5.718173980712891, - "logps/chosen": -489.0721740722656, - "logps/rejected": -346.6761169433594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.960638403892517, - "rewards/margins": 16.491788864135742, - "rewards/rejected": -18.45242691040039, - "step": 764 - }, - { - "epoch": 0.29, - "learning_rate": 1.6418230403584034e-05, - "logits/chosen": -5.8222808837890625, - "logits/rejected": -3.979322671890259, - "logps/chosen": -291.46221923828125, - "logps/rejected": -663.2265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12462158501148224, - "rewards/margins": 9.898303031921387, - "rewards/rejected": -9.773681640625, - "step": 765 - }, - { - "epoch": 0.29, - "learning_rate": 1.640908300135891e-05, - "logits/chosen": -1.5854555368423462, - "logits/rejected": -3.6421701908111572, - "logps/chosen": -217.95594787597656, - "logps/rejected": -192.5752716064453, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3439895808696747, - "rewards/margins": 5.358835220336914, - "rewards/rejected": -5.702824592590332, - "step": 766 - }, - { - "epoch": 0.29, - "learning_rate": 1.6399926488798702e-05, - "logits/chosen": -1.2902547121047974, - "logits/rejected": -3.3586442470550537, - "logps/chosen": -237.71847534179688, - "logps/rejected": -559.20654296875, - "loss": 0.0211, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2599228024482727, - "rewards/margins": 30.077707290649414, - "rewards/rejected": -30.337629318237305, - "step": 767 - }, - { - "epoch": 0.29, - "learning_rate": 1.639076087891914e-05, - "logits/chosen": -6.021817684173584, - "logits/rejected": -1.885598063468933, - "logps/chosen": -361.33563232421875, - "logps/rejected": -1756.6925048828125, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7967773675918579, - "rewards/margins": 32.620140075683594, - "rewards/rejected": -31.8233642578125, - "step": 768 - }, - { - "epoch": 0.29, - "learning_rate": 1.638158618474889e-05, - "logits/chosen": -7.092784881591797, - "logits/rejected": -3.761108160018921, - "logps/chosen": -254.60650634765625, - "logps/rejected": -1178.305908203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.134542942047119, - "rewards/margins": 27.31985092163086, - "rewards/rejected": -25.1853084564209, - "step": 769 - }, - { - "epoch": 0.29, - "learning_rate": 1.6372402419329523e-05, - "logits/chosen": -4.837840557098389, - "logits/rejected": -1.7779072523117065, - "logps/chosen": -220.13153076171875, - "logps/rejected": -840.228759765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6702789664268494, - "rewards/margins": 26.719242095947266, - "rewards/rejected": -27.3895206451416, - "step": 770 - }, - { - "epoch": 0.29, - "learning_rate": 1.6363209595715518e-05, - "logits/chosen": -0.10277184844017029, - "logits/rejected": -3.4453649520874023, - "logps/chosen": -228.67208862304688, - "logps/rejected": -243.7440643310547, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4406143426895142, - "rewards/margins": 10.229911804199219, - "rewards/rejected": -11.670526504516602, - "step": 771 - }, - { - "epoch": 0.29, - "learning_rate": 1.6354007726974205e-05, - "logits/chosen": -1.9416308403015137, - "logits/rejected": -2.8910257816314697, - "logps/chosen": -481.803955078125, - "logps/rejected": -786.351318359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.861132860183716, - "rewards/margins": 28.916481018066406, - "rewards/rejected": -26.055347442626953, - "step": 772 - }, - { - "epoch": 0.29, - "learning_rate": 1.63447968261858e-05, - "logits/chosen": -5.755320072174072, - "logits/rejected": -0.8890361785888672, - "logps/chosen": -760.85400390625, - "logps/rejected": -2915.73974609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.24379883706569672, - "rewards/margins": 31.81865119934082, - "rewards/rejected": -32.06245040893555, - "step": 773 - }, - { - "epoch": 0.29, - "learning_rate": 1.633557690644334e-05, - "logits/chosen": -2.2114498615264893, - "logits/rejected": -6.549992561340332, - "logps/chosen": -370.17578125, - "logps/rejected": -311.0666809082031, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.04825439676642418, - "rewards/margins": 15.277871131896973, - "rewards/rejected": -15.22961711883545, - "step": 774 - }, - { - "epoch": 0.29, - "learning_rate": 1.632634798085269e-05, - "logits/chosen": -3.3943893909454346, - "logits/rejected": -3.9570932388305664, - "logps/chosen": -230.46713256835938, - "logps/rejected": -416.1595764160156, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7120270133018494, - "rewards/margins": 17.86307716369629, - "rewards/rejected": -17.151050567626953, - "step": 775 - }, - { - "epoch": 0.29, - "learning_rate": 1.631711006253251e-05, - "logits/chosen": -2.5766513347625732, - "logits/rejected": -3.3013370037078857, - "logps/chosen": -174.65118408203125, - "logps/rejected": -515.9585571289062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6028854250907898, - "rewards/margins": 19.51067543029785, - "rewards/rejected": -18.90778923034668, - "step": 776 - }, - { - "epoch": 0.29, - "learning_rate": 1.630786316461425e-05, - "logits/chosen": -5.480849742889404, - "logits/rejected": -3.1930243968963623, - "logps/chosen": -256.44744873046875, - "logps/rejected": -645.13525390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3727600574493408, - "rewards/margins": 15.760828971862793, - "rewards/rejected": -17.133588790893555, - "step": 777 - }, - { - "epoch": 0.29, - "learning_rate": 1.6298607300242117e-05, - "logits/chosen": -2.4163951873779297, - "logits/rejected": -1.2295106649398804, - "logps/chosen": -454.0774841308594, - "logps/rejected": -754.18212890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8069671988487244, - "rewards/margins": 9.945950508117676, - "rewards/rejected": -10.752917289733887, - "step": 778 - }, - { - "epoch": 0.29, - "learning_rate": 1.6289342482573073e-05, - "logits/chosen": -5.15702486038208, - "logits/rejected": -1.2840055227279663, - "logps/chosen": -403.0402526855469, - "logps/rejected": -1478.33203125, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9119354486465454, - "rewards/margins": 40.10382843017578, - "rewards/rejected": -39.19189453125, - "step": 779 - }, - { - "epoch": 0.29, - "learning_rate": 1.6280068724776795e-05, - "logits/chosen": -3.9899415969848633, - "logits/rejected": -2.992215156555176, - "logps/chosen": -174.97976684570312, - "logps/rejected": -383.09326171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.36665040254592896, - "rewards/margins": 9.987622261047363, - "rewards/rejected": -9.6209716796875, - "step": 780 - }, - { - "epoch": 0.3, - "learning_rate": 1.6270786040035678e-05, - "logits/chosen": -1.6648508310317993, - "logits/rejected": -2.293607234954834, - "logps/chosen": -277.5532531738281, - "logps/rejected": -636.8568115234375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.48236083984375, - "rewards/margins": 28.657651901245117, - "rewards/rejected": -29.140012741088867, - "step": 781 - }, - { - "epoch": 0.3, - "learning_rate": 1.6261494441544805e-05, - "logits/chosen": -3.438751697540283, - "logits/rejected": -1.4906314611434937, - "logps/chosen": -472.0419006347656, - "logps/rejected": -900.9793701171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05062561109662056, - "rewards/margins": 12.194198608398438, - "rewards/rejected": -12.244824409484863, - "step": 782 - }, - { - "epoch": 0.3, - "learning_rate": 1.625219394251192e-05, - "logits/chosen": -4.199590682983398, - "logits/rejected": -4.59289026260376, - "logps/chosen": -319.0233154296875, - "logps/rejected": -536.275634765625, - "loss": 0.0043, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6190277338027954, - "rewards/margins": 9.539846420288086, - "rewards/rejected": -10.15887451171875, - "step": 783 - }, - { - "epoch": 0.3, - "learning_rate": 1.6242884556157438e-05, - "logits/chosen": -2.46726393699646, - "logits/rejected": -5.448080539703369, - "logps/chosen": -427.6426086425781, - "logps/rejected": -336.4337463378906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7685333490371704, - "rewards/margins": 19.87978744506836, - "rewards/rejected": -19.11125373840332, - "step": 784 - }, - { - "epoch": 0.3, - "learning_rate": 1.6233566295714384e-05, - "logits/chosen": -5.057069778442383, - "logits/rejected": -3.5298192501068115, - "logps/chosen": -344.87359619140625, - "logps/rejected": -728.463134765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05980835109949112, - "rewards/margins": 12.098638534545898, - "rewards/rejected": -12.158447265625, - "step": 785 - }, - { - "epoch": 0.3, - "learning_rate": 1.6224239174428414e-05, - "logits/chosen": -5.174770832061768, - "logits/rejected": -0.41070085763931274, - "logps/chosen": -273.7756652832031, - "logps/rejected": -1185.6744384765625, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1984161138534546, - "rewards/margins": 16.33536720275879, - "rewards/rejected": -15.136950492858887, - "step": 786 - }, - { - "epoch": 0.3, - "learning_rate": 1.6214903205557774e-05, - "logits/chosen": -7.066616535186768, - "logits/rejected": -3.1128032207489014, - "logps/chosen": -272.1332702636719, - "logps/rejected": -1441.5966796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6448028683662415, - "rewards/margins": 38.094608306884766, - "rewards/rejected": -37.449806213378906, - "step": 787 - }, - { - "epoch": 0.3, - "learning_rate": 1.6205558402373286e-05, - "logits/chosen": -7.665639400482178, - "logits/rejected": -1.3756868839263916, - "logps/chosen": -210.74618530273438, - "logps/rejected": -3291.150390625, - "loss": 0.0085, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02528991736471653, - "rewards/margins": 55.30327224731445, - "rewards/rejected": -55.32856369018555, - "step": 788 - }, - { - "epoch": 0.3, - "learning_rate": 1.6196204778158334e-05, - "logits/chosen": -0.7077654600143433, - "logits/rejected": -1.042932152748108, - "logps/chosen": -329.28369140625, - "logps/rejected": -747.8328857421875, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6062134504318237, - "rewards/margins": 28.11546516418457, - "rewards/rejected": -26.509252548217773, - "step": 789 - }, - { - "epoch": 0.3, - "learning_rate": 1.618684234620883e-05, - "logits/chosen": -1.3271952867507935, - "logits/rejected": -4.8737592697143555, - "logps/chosen": -382.88580322265625, - "logps/rejected": -351.80279541015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8138214349746704, - "rewards/margins": 22.483495712280273, - "rewards/rejected": -24.297317504882812, - "step": 790 - }, - { - "epoch": 0.3, - "learning_rate": 1.617747111983322e-05, - "logits/chosen": -6.548660755157471, - "logits/rejected": -4.272058486938477, - "logps/chosen": -742.1351318359375, - "logps/rejected": -3468.774169921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.245214819908142, - "rewards/margins": 34.19089126586914, - "rewards/rejected": -35.43610763549805, - "step": 791 - }, - { - "epoch": 0.3, - "learning_rate": 1.6168091112352443e-05, - "logits/chosen": -6.76589822769165, - "logits/rejected": -2.4912450313568115, - "logps/chosen": -289.30902099609375, - "logps/rejected": -1330.173583984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9955291748046875, - "rewards/margins": 33.42867660522461, - "rewards/rejected": -35.4242057800293, - "step": 792 - }, - { - "epoch": 0.3, - "learning_rate": 1.615870233709992e-05, - "logits/chosen": -3.644087076187134, - "logits/rejected": -4.015446186065674, - "logps/chosen": -239.07470703125, - "logps/rejected": -157.90333557128906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7601760625839233, - "rewards/margins": 8.107540130615234, - "rewards/rejected": -9.867715835571289, - "step": 793 - }, - { - "epoch": 0.3, - "learning_rate": 1.6149304807421535e-05, - "logits/chosen": -5.1412200927734375, - "logits/rejected": -1.7745084762573242, - "logps/chosen": -215.99441528320312, - "logps/rejected": -835.800537109375, - "loss": 0.087, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.740313708782196, - "rewards/margins": 20.99567413330078, - "rewards/rejected": -20.255359649658203, - "step": 794 - }, - { - "epoch": 0.3, - "learning_rate": 1.6139898536675622e-05, - "logits/chosen": -2.9629063606262207, - "logits/rejected": -1.7653652429580688, - "logps/chosen": -223.21502685546875, - "logps/rejected": -395.5135192871094, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.651043713092804, - "rewards/margins": 9.66589069366455, - "rewards/rejected": -9.014846801757812, - "step": 795 - }, - { - "epoch": 0.3, - "learning_rate": 1.6130483538232932e-05, - "logits/chosen": -0.097666434943676, - "logits/rejected": -0.3089295029640198, - "logps/chosen": -384.6640625, - "logps/rejected": -685.3489379882812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3523803651332855, - "rewards/margins": 22.024059295654297, - "rewards/rejected": -22.376440048217773, - "step": 796 - }, - { - "epoch": 0.3, - "learning_rate": 1.612105982547663e-05, - "logits/chosen": -1.9937857389450073, - "logits/rejected": -3.5350871086120605, - "logps/chosen": -352.47625732421875, - "logps/rejected": -663.90966796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.14528809487819672, - "rewards/margins": 16.38580322265625, - "rewards/rejected": -16.531091690063477, - "step": 797 - }, - { - "epoch": 0.3, - "learning_rate": 1.6111627411802263e-05, - "logits/chosen": -3.2542929649353027, - "logits/rejected": -2.5157625675201416, - "logps/chosen": -241.06210327148438, - "logps/rejected": -366.85028076171875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8045746088027954, - "rewards/margins": 7.498868465423584, - "rewards/rejected": -9.30344295501709, - "step": 798 - }, - { - "epoch": 0.3, - "learning_rate": 1.6102186310617744e-05, - "logits/chosen": -4.34858512878418, - "logits/rejected": -3.527116060256958, - "logps/chosen": -206.34115600585938, - "logps/rejected": -531.5035400390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.49491578340530396, - "rewards/margins": 17.44683837890625, - "rewards/rejected": -16.951923370361328, - "step": 799 - }, - { - "epoch": 0.3, - "learning_rate": 1.6092736535343343e-05, - "logits/chosen": -3.5342018604278564, - "logits/rejected": -4.215993404388428, - "logps/chosen": -387.1755676269531, - "logps/rejected": -458.9976806640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6986786127090454, - "rewards/margins": 11.985857963562012, - "rewards/rejected": -12.684536933898926, - "step": 800 - }, - { - "epoch": 0.3, - "learning_rate": 1.608327809941165e-05, - "logits/chosen": -4.510397911071777, - "logits/rejected": -2.965515613555908, - "logps/chosen": -270.3390808105469, - "logps/rejected": -718.6063232421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5448700189590454, - "rewards/margins": 22.899930953979492, - "rewards/rejected": -22.355060577392578, - "step": 801 - }, - { - "epoch": 0.3, - "learning_rate": 1.607381101626758e-05, - "logits/chosen": -7.3468804359436035, - "logits/rejected": -2.4850189685821533, - "logps/chosen": -222.68963623046875, - "logps/rejected": -978.3743286132812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1141265630722046, - "rewards/margins": 10.9750337600708, - "rewards/rejected": -9.860907554626465, - "step": 802 - }, - { - "epoch": 0.3, - "learning_rate": 1.6064335299368322e-05, - "logits/chosen": -6.1096086502075195, - "logits/rejected": -4.00395393371582, - "logps/chosen": -368.1821594238281, - "logps/rejected": -1195.984130859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.1359925270080566, - "rewards/margins": 37.286895751953125, - "rewards/rejected": -40.422889709472656, - "step": 803 - }, - { - "epoch": 0.3, - "learning_rate": 1.6054850962183354e-05, - "logits/chosen": -4.652542591094971, - "logits/rejected": -1.9516109228134155, - "logps/chosen": -313.04901123046875, - "logps/rejected": -895.265625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0925140380859375, - "rewards/margins": 24.84531021118164, - "rewards/rejected": -23.752796173095703, - "step": 804 - }, - { - "epoch": 0.3, - "learning_rate": 1.60453580181944e-05, - "logits/chosen": -3.1109652519226074, - "logits/rejected": -2.738631010055542, - "logps/chosen": -499.7956848144531, - "logps/rejected": -758.4774169921875, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.159942626953125, - "rewards/margins": 31.558286666870117, - "rewards/rejected": -30.398344039916992, - "step": 805 - }, - { - "epoch": 0.3, - "learning_rate": 1.603585648089541e-05, - "logits/chosen": -1.9091475009918213, - "logits/rejected": -4.603617191314697, - "logps/chosen": -472.3627014160156, - "logps/rejected": -500.9920349121094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.138693332672119, - "rewards/margins": 22.369518280029297, - "rewards/rejected": -20.230825424194336, - "step": 806 - }, - { - "epoch": 0.31, - "learning_rate": 1.6026346363792565e-05, - "logits/chosen": -0.445425420999527, - "logits/rejected": -3.9230422973632812, - "logps/chosen": -526.1372680664062, - "logps/rejected": -407.92926025390625, - "loss": 0.0096, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18742065131664276, - "rewards/margins": 21.76679229736328, - "rewards/rejected": -21.954212188720703, - "step": 807 - }, - { - "epoch": 0.31, - "learning_rate": 1.6016827680404236e-05, - "logits/chosen": -3.002941131591797, - "logits/rejected": -2.732555389404297, - "logps/chosen": -319.80279541015625, - "logps/rejected": -460.23095703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.20740357041358948, - "rewards/margins": 10.146051406860352, - "rewards/rejected": -10.35345458984375, - "step": 808 - }, - { - "epoch": 0.31, - "learning_rate": 1.6007300444260963e-05, - "logits/chosen": -2.4952282905578613, - "logits/rejected": -2.9697513580322266, - "logps/chosen": -362.8951721191406, - "logps/rejected": -598.4901123046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1694488525390625, - "rewards/margins": 22.724454879760742, - "rewards/rejected": -20.55500602722168, - "step": 809 - }, - { - "epoch": 0.31, - "learning_rate": 1.5997764668905455e-05, - "logits/chosen": -4.9964919090271, - "logits/rejected": -3.1070444583892822, - "logps/chosen": -419.26318359375, - "logps/rejected": -1209.7076416015625, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8183075189590454, - "rewards/margins": 28.99880599975586, - "rewards/rejected": -29.817113876342773, - "step": 810 - }, - { - "epoch": 0.31, - "learning_rate": 1.598822036789255e-05, - "logits/chosen": -2.9376604557037354, - "logits/rejected": -4.623376846313477, - "logps/chosen": -301.24798583984375, - "logps/rejected": -283.6089782714844, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.940814197063446, - "rewards/margins": 14.918098449707031, - "rewards/rejected": -13.97728443145752, - "step": 811 - }, - { - "epoch": 0.31, - "learning_rate": 1.5978667554789216e-05, - "logits/chosen": -3.7958860397338867, - "logits/rejected": -2.0545969009399414, - "logps/chosen": -622.6834716796875, - "logps/rejected": -1291.67041015625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.15335693955421448, - "rewards/margins": 29.005273818969727, - "rewards/rejected": -29.15863037109375, - "step": 812 - }, - { - "epoch": 0.31, - "learning_rate": 1.596910624317451e-05, - "logits/chosen": -2.507129430770874, - "logits/rejected": -1.0849545001983643, - "logps/chosen": -641.7704467773438, - "logps/rejected": -1200.5030517578125, - "loss": 0.0029, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21812133491039276, - "rewards/margins": 19.339149475097656, - "rewards/rejected": -19.557270050048828, - "step": 813 - }, - { - "epoch": 0.31, - "learning_rate": 1.5959536446639572e-05, - "logits/chosen": -1.664772629737854, - "logits/rejected": -5.44380521774292, - "logps/chosen": -207.00840759277344, - "logps/rejected": -121.90606689453125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7445968985557556, - "rewards/margins": 6.901907444000244, - "rewards/rejected": -7.6465044021606445, - "step": 814 - }, - { - "epoch": 0.31, - "learning_rate": 1.5949958178787605e-05, - "logits/chosen": -0.9752120971679688, - "logits/rejected": -4.872575759887695, - "logps/chosen": -593.0572509765625, - "logps/rejected": -165.6619873046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.338433861732483, - "rewards/margins": 9.087616920471191, - "rewards/rejected": -10.426051139831543, - "step": 815 - }, - { - "epoch": 0.31, - "learning_rate": 1.5940371453233853e-05, - "logits/chosen": -1.8007574081420898, - "logits/rejected": -4.526155471801758, - "logps/chosen": -472.9814147949219, - "logps/rejected": -396.85882568359375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9931915402412415, - "rewards/margins": 28.1702938079834, - "rewards/rejected": -27.17710304260254, - "step": 816 - }, - { - "epoch": 0.31, - "learning_rate": 1.5930776283605585e-05, - "logits/chosen": -1.1878782510757446, - "logits/rejected": -3.4006805419921875, - "logps/chosen": -511.861083984375, - "logps/rejected": -443.22247314453125, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5716155767440796, - "rewards/margins": 17.481321334838867, - "rewards/rejected": -19.052936553955078, - "step": 817 - }, - { - "epoch": 0.31, - "learning_rate": 1.592117268354207e-05, - "logits/chosen": -2.3879332542419434, - "logits/rejected": -2.031684637069702, - "logps/chosen": -525.6354370117188, - "logps/rejected": -855.9268188476562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.317144751548767, - "rewards/margins": 18.55097770690918, - "rewards/rejected": -19.868122100830078, - "step": 818 - }, - { - "epoch": 0.31, - "learning_rate": 1.5911560666694557e-05, - "logits/chosen": -8.071041107177734, - "logits/rejected": -2.9481041431427, - "logps/chosen": -407.79632568359375, - "logps/rejected": -1860.628662109375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.13347779214382172, - "rewards/margins": 21.28325843811035, - "rewards/rejected": -21.416736602783203, - "step": 819 - }, - { - "epoch": 0.31, - "learning_rate": 1.5901940246726268e-05, - "logits/chosen": -5.898058891296387, - "logits/rejected": -1.3636616468429565, - "logps/chosen": -306.17376708984375, - "logps/rejected": -1088.62158203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.478335857391357, - "rewards/margins": 28.70340347290039, - "rewards/rejected": -33.181739807128906, - "step": 820 - }, - { - "epoch": 0.31, - "learning_rate": 1.589231143731236e-05, - "logits/chosen": -8.526997566223145, - "logits/rejected": -2.5751583576202393, - "logps/chosen": -259.5245056152344, - "logps/rejected": -1421.736083984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.575579822063446, - "rewards/margins": 17.4554386138916, - "rewards/rejected": -18.03101921081543, - "step": 821 - }, - { - "epoch": 0.31, - "learning_rate": 1.5882674252139928e-05, - "logits/chosen": -1.321479082107544, - "logits/rejected": -2.486997604370117, - "logps/chosen": -322.658447265625, - "logps/rejected": -864.9168701171875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7682159543037415, - "rewards/margins": 46.052574157714844, - "rewards/rejected": -46.8207893371582, - "step": 822 - }, - { - "epoch": 0.31, - "learning_rate": 1.587302870490796e-05, - "logits/chosen": -4.022005081176758, - "logits/rejected": -4.536108493804932, - "logps/chosen": -180.2249755859375, - "logps/rejected": -343.42840576171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.259237766265869, - "rewards/margins": 9.98453140258789, - "rewards/rejected": -12.243768692016602, - "step": 823 - }, - { - "epoch": 0.31, - "learning_rate": 1.5863374809327337e-05, - "logits/chosen": -1.053084135055542, - "logits/rejected": -7.417935371398926, - "logps/chosen": -415.84649658203125, - "logps/rejected": -176.75160217285156, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2855377197265625, - "rewards/margins": 10.702688217163086, - "rewards/rejected": -10.417150497436523, - "step": 824 - }, - { - "epoch": 0.31, - "learning_rate": 1.5853712579120807e-05, - "logits/chosen": -6.871009826660156, - "logits/rejected": -3.9546196460723877, - "logps/chosen": -471.247802734375, - "logps/rejected": -2433.048095703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.908001720905304, - "rewards/margins": 25.36528968811035, - "rewards/rejected": -26.273290634155273, - "step": 825 - }, - { - "epoch": 0.31, - "learning_rate": 1.584404202802296e-05, - "logits/chosen": -1.0297231674194336, - "logits/rejected": -5.345248222351074, - "logps/chosen": -376.39202880859375, - "logps/rejected": -157.58497619628906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.571362316608429, - "rewards/margins": 10.245321273803711, - "rewards/rejected": -10.816683769226074, - "step": 826 - }, - { - "epoch": 0.31, - "learning_rate": 1.5834363169780227e-05, - "logits/chosen": -1.6756938695907593, - "logits/rejected": -6.668999195098877, - "logps/chosen": -548.9986572265625, - "logps/rejected": -50.37418746948242, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.363134741783142, - "rewards/margins": 4.39694881439209, - "rewards/rejected": -3.033813953399658, - "step": 827 - }, - { - "epoch": 0.31, - "learning_rate": 1.582467601815083e-05, - "logits/chosen": -2.129729986190796, - "logits/rejected": -2.3951292037963867, - "logps/chosen": -327.4803466796875, - "logps/rejected": -405.30816650390625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.257092237472534, - "rewards/margins": 7.135955810546875, - "rewards/rejected": -9.393048286437988, - "step": 828 - }, - { - "epoch": 0.31, - "learning_rate": 1.5814980586904795e-05, - "logits/chosen": -2.01412034034729, - "logits/rejected": -3.969203233718872, - "logps/chosen": -256.5850524902344, - "logps/rejected": -370.134033203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.6597182750701904, - "rewards/margins": 15.943577766418457, - "rewards/rejected": -18.603296279907227, - "step": 829 - }, - { - "epoch": 0.31, - "learning_rate": 1.5805276889823903e-05, - "logits/chosen": -6.280507564544678, - "logits/rejected": -1.9800963401794434, - "logps/chosen": -420.91876220703125, - "logps/rejected": -1746.6287841796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7392852902412415, - "rewards/margins": 28.84898567199707, - "rewards/rejected": -29.58827018737793, - "step": 830 - }, - { - "epoch": 0.31, - "learning_rate": 1.57955649407017e-05, - "logits/chosen": -8.139763832092285, - "logits/rejected": -2.426115036010742, - "logps/chosen": -472.60174560546875, - "logps/rejected": -3606.685791015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.715802013874054, - "rewards/margins": 19.09052276611328, - "rewards/rejected": -19.806324005126953, - "step": 831 - }, - { - "epoch": 0.31, - "learning_rate": 1.578584475334345e-05, - "logits/chosen": -1.8968262672424316, - "logits/rejected": -3.9264087677001953, - "logps/chosen": -651.0703125, - "logps/rejected": -688.00390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.621417224407196, - "rewards/margins": 18.7185001373291, - "rewards/rejected": -18.097082138061523, - "step": 832 - }, - { - "epoch": 0.31, - "learning_rate": 1.5776116341566134e-05, - "logits/chosen": -4.418582916259766, - "logits/rejected": -4.712064743041992, - "logps/chosen": -411.7496337890625, - "logps/rejected": -394.34259033203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.0629334449768066, - "rewards/margins": 16.631868362426758, - "rewards/rejected": -18.694801330566406, - "step": 833 - }, - { - "epoch": 0.32, - "learning_rate": 1.5766379719198418e-05, - "logits/chosen": -1.9360095262527466, - "logits/rejected": -3.178572177886963, - "logps/chosen": -263.3213806152344, - "logps/rejected": -451.5997619628906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.803802490234375, - "rewards/margins": 22.223703384399414, - "rewards/rejected": -23.02750587463379, - "step": 834 - }, - { - "epoch": 0.32, - "learning_rate": 1.575663490008065e-05, - "logits/chosen": -4.077666759490967, - "logits/rejected": -1.8610451221466064, - "logps/chosen": -462.70989990234375, - "logps/rejected": -843.4554443359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1932740211486816, - "rewards/margins": 12.242181777954102, - "rewards/rejected": -14.435455322265625, - "step": 835 - }, - { - "epoch": 0.32, - "learning_rate": 1.5746881898064813e-05, - "logits/chosen": -5.416261672973633, - "logits/rejected": -2.2519419193267822, - "logps/chosen": -291.50006103515625, - "logps/rejected": -727.4002685546875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.12103271484375, - "rewards/margins": 7.306933879852295, - "rewards/rejected": -7.427966594696045, - "step": 836 - }, - { - "epoch": 0.32, - "learning_rate": 1.5737120727014535e-05, - "logits/chosen": -5.042913913726807, - "logits/rejected": -1.7876508235931396, - "logps/chosen": -358.8048095703125, - "logps/rejected": -1309.3016357421875, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3845306634902954, - "rewards/margins": 31.179996490478516, - "rewards/rejected": -29.79546546936035, - "step": 837 - }, - { - "epoch": 0.32, - "learning_rate": 1.5727351400805054e-05, - "logits/chosen": -2.026035785675049, - "logits/rejected": -2.3170177936553955, - "logps/chosen": -216.13653564453125, - "logps/rejected": -625.8255004882812, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5007171630859375, - "rewards/margins": 21.025043487548828, - "rewards/rejected": -22.525760650634766, - "step": 838 - }, - { - "epoch": 0.32, - "learning_rate": 1.5717573933323195e-05, - "logits/chosen": -2.6867482662200928, - "logits/rejected": -5.330452919006348, - "logps/chosen": -950.5338134765625, - "logps/rejected": -1060.8502197265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.255895972251892, - "rewards/margins": 35.925987243652344, - "rewards/rejected": -37.181884765625, - "step": 839 - }, - { - "epoch": 0.32, - "learning_rate": 1.5707788338467362e-05, - "logits/chosen": -2.495518684387207, - "logits/rejected": -4.428520679473877, - "logps/chosen": -467.8257141113281, - "logps/rejected": -528.2054443359375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1033233404159546, - "rewards/margins": 18.650461196899414, - "rewards/rejected": -17.547138214111328, - "step": 840 - }, - { - "epoch": 0.32, - "learning_rate": 1.569799463014751e-05, - "logits/chosen": -3.3007867336273193, - "logits/rejected": -1.7791675329208374, - "logps/chosen": -423.23309326171875, - "logps/rejected": -827.2571411132812, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.7110657691955566, - "rewards/margins": 16.074451446533203, - "rewards/rejected": -19.7855167388916, - "step": 841 - }, - { - "epoch": 0.32, - "learning_rate": 1.5688192822285116e-05, - "logits/chosen": -3.776355266571045, - "logits/rejected": -4.486720561981201, - "logps/chosen": -299.7957763671875, - "logps/rejected": -251.15805053710938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.25777894258499146, - "rewards/margins": 9.480243682861328, - "rewards/rejected": -9.222464561462402, - "step": 842 - }, - { - "epoch": 0.32, - "learning_rate": 1.567838292881319e-05, - "logits/chosen": -5.305450916290283, - "logits/rejected": -2.128270149230957, - "logps/chosen": -259.5921630859375, - "logps/rejected": -590.1390380859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.675537109375, - "rewards/margins": 11.091708183288574, - "rewards/rejected": -10.416171073913574, - "step": 843 - }, - { - "epoch": 0.32, - "learning_rate": 1.5668564963676224e-05, - "logits/chosen": -2.058183431625366, - "logits/rejected": -3.5151748657226562, - "logps/chosen": -195.92266845703125, - "logps/rejected": -312.98681640625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07503052055835724, - "rewards/margins": 12.405105590820312, - "rewards/rejected": -12.33007526397705, - "step": 844 - }, - { - "epoch": 0.32, - "learning_rate": 1.5658738940830185e-05, - "logits/chosen": -6.005126953125, - "logits/rejected": -2.0511069297790527, - "logps/chosen": -408.6481628417969, - "logps/rejected": -1536.20263671875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7392975091934204, - "rewards/margins": 27.13854217529297, - "rewards/rejected": -26.39924430847168, - "step": 845 - }, - { - "epoch": 0.32, - "learning_rate": 1.564890487424249e-05, - "logits/chosen": -1.6810226440429688, - "logits/rejected": -5.924104690551758, - "logps/chosen": -326.43609619140625, - "logps/rejected": -227.30767822265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8205932974815369, - "rewards/margins": 12.17119312286377, - "rewards/rejected": -11.350600242614746, - "step": 846 - }, - { - "epoch": 0.32, - "learning_rate": 1.5639062777892e-05, - "logits/chosen": -2.604872465133667, - "logits/rejected": -5.122077465057373, - "logps/chosen": -371.59912109375, - "logps/rejected": -265.2523498535156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7601258754730225, - "rewards/margins": 14.81422233581543, - "rewards/rejected": -17.57434844970703, - "step": 847 - }, - { - "epoch": 0.32, - "learning_rate": 1.562921266576898e-05, - "logits/chosen": -5.161761283874512, - "logits/rejected": -3.8619983196258545, - "logps/chosen": -227.81454467773438, - "logps/rejected": -603.5448608398438, - "loss": 0.0023, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.601452648639679, - "rewards/margins": 19.33453369140625, - "rewards/rejected": -18.733081817626953, - "step": 848 - }, - { - "epoch": 0.32, - "learning_rate": 1.5619354551875093e-05, - "logits/chosen": -4.2889533042907715, - "logits/rejected": -2.7206385135650635, - "logps/chosen": -413.000244140625, - "logps/rejected": -862.193603515625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4971405267715454, - "rewards/margins": 23.733177185058594, - "rewards/rejected": -22.23603630065918, - "step": 849 - }, - { - "epoch": 0.32, - "learning_rate": 1.560948845022338e-05, - "logits/chosen": -3.668792724609375, - "logits/rejected": -3.829651117324829, - "logps/chosen": -271.0925598144531, - "logps/rejected": -445.53106689453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9110870361328125, - "rewards/margins": 15.29003620147705, - "rewards/rejected": -14.378949165344238, - "step": 850 - }, - { - "epoch": 0.32, - "learning_rate": 1.5599614374838226e-05, - "logits/chosen": -0.8207170367240906, - "logits/rejected": -4.205193996429443, - "logps/chosen": -473.301025390625, - "logps/rejected": -316.5613098144531, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.297027587890625, - "rewards/margins": 18.34845733642578, - "rewards/rejected": -20.645484924316406, - "step": 851 - }, - { - "epoch": 0.32, - "learning_rate": 1.5589732339755362e-05, - "logits/chosen": -0.9136608242988586, - "logits/rejected": -2.5364437103271484, - "logps/chosen": -569.7882080078125, - "logps/rejected": -678.9422607421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.422375440597534, - "rewards/margins": 33.007747650146484, - "rewards/rejected": -36.43012237548828, - "step": 852 - }, - { - "epoch": 0.32, - "learning_rate": 1.5579842359021834e-05, - "logits/chosen": -3.3029675483703613, - "logits/rejected": -5.13830041885376, - "logps/chosen": -397.5853271484375, - "logps/rejected": -411.7459411621094, - "loss": 0.065, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02186889760196209, - "rewards/margins": 13.825244903564453, - "rewards/rejected": -13.847113609313965, - "step": 853 - }, - { - "epoch": 0.32, - "learning_rate": 1.556994444669597e-05, - "logits/chosen": -4.216238975524902, - "logits/rejected": -2.5456314086914062, - "logps/chosen": -423.8450927734375, - "logps/rejected": -1266.9285888671875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.265188604593277, - "rewards/margins": 58.49336242675781, - "rewards/rejected": -58.75855255126953, - "step": 854 - }, - { - "epoch": 0.32, - "learning_rate": 1.5560038616847384e-05, - "logits/chosen": -5.478473663330078, - "logits/rejected": -2.065547466278076, - "logps/chosen": -454.577392578125, - "logps/rejected": -1500.744384765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9310547113418579, - "rewards/margins": 37.43203353881836, - "rewards/rejected": -38.36308670043945, - "step": 855 - }, - { - "epoch": 0.32, - "learning_rate": 1.5550124883556938e-05, - "logits/chosen": -3.98691725730896, - "logits/rejected": -1.7813276052474976, - "logps/chosen": -256.0030517578125, - "logps/rejected": -456.83868408203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8561813831329346, - "rewards/margins": 7.179762840270996, - "rewards/rejected": -10.035943984985352, - "step": 856 - }, - { - "epoch": 0.32, - "learning_rate": 1.5540203260916728e-05, - "logits/chosen": -3.7890679836273193, - "logits/rejected": -6.827752113342285, - "logps/chosen": -492.2259216308594, - "logps/rejected": -327.25238037109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4145141541957855, - "rewards/margins": 15.710092544555664, - "rewards/rejected": -16.12460708618164, - "step": 857 - }, - { - "epoch": 0.32, - "learning_rate": 1.5530273763030076e-05, - "logits/chosen": -3.6219873428344727, - "logits/rejected": -2.7720799446105957, - "logps/chosen": -108.37477111816406, - "logps/rejected": -424.0787048339844, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.978642225265503, - "rewards/margins": 18.087448120117188, - "rewards/rejected": -21.066089630126953, - "step": 858 - }, - { - "epoch": 0.32, - "learning_rate": 1.5520336404011483e-05, - "logits/chosen": -6.9780168533325195, - "logits/rejected": -3.4680612087249756, - "logps/chosen": -302.52276611328125, - "logps/rejected": -1159.0478515625, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.32588502764701843, - "rewards/margins": 19.12647247314453, - "rewards/rejected": -18.800586700439453, - "step": 859 - }, - { - "epoch": 0.33, - "learning_rate": 1.5510391197986635e-05, - "logits/chosen": -7.237054824829102, - "logits/rejected": -2.5692591667175293, - "logps/chosen": -334.5677490234375, - "logps/rejected": -1555.8720703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4963775873184204, - "rewards/margins": 39.85943603515625, - "rewards/rejected": -41.355812072753906, - "step": 860 - }, - { - "epoch": 0.33, - "learning_rate": 1.550043815909237e-05, - "logits/chosen": -3.906733512878418, - "logits/rejected": -1.3963533639907837, - "logps/chosen": -206.9257049560547, - "logps/rejected": -935.4628295898438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7000686526298523, - "rewards/margins": 27.500289916992188, - "rewards/rejected": -26.800220489501953, - "step": 861 - }, - { - "epoch": 0.33, - "learning_rate": 1.5490477301476648e-05, - "logits/chosen": -5.008357048034668, - "logits/rejected": -0.6581388115882874, - "logps/chosen": -242.65151977539062, - "logps/rejected": -1024.26171875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.300959825515747, - "rewards/margins": 34.511600494384766, - "rewards/rejected": -35.81256103515625, - "step": 862 - }, - { - "epoch": 0.33, - "learning_rate": 1.548050863929857e-05, - "logits/chosen": -2.6241095066070557, - "logits/rejected": -2.585268259048462, - "logps/chosen": -271.29541015625, - "logps/rejected": -460.15570068359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.05080566555261612, - "rewards/margins": 15.01916217803955, - "rewards/rejected": -14.968356132507324, - "step": 863 - }, - { - "epoch": 0.33, - "learning_rate": 1.5470532186728303e-05, - "logits/chosen": -3.9810075759887695, - "logits/rejected": -2.3980348110198975, - "logps/chosen": -483.07781982421875, - "logps/rejected": -1061.829345703125, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5045074820518494, - "rewards/margins": 28.478965759277344, - "rewards/rejected": -28.98347282409668, - "step": 864 - }, - { - "epoch": 0.33, - "learning_rate": 1.5460547957947105e-05, - "logits/chosen": -4.974402904510498, - "logits/rejected": -3.913308620452881, - "logps/chosen": -405.50787353515625, - "logps/rejected": -1220.338623046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.414947509765625, - "rewards/margins": 34.831939697265625, - "rewards/rejected": -34.4169921875, - "step": 865 - }, - { - "epoch": 0.33, - "learning_rate": 1.5450555967147282e-05, - "logits/chosen": -7.992591857910156, - "logits/rejected": -2.055237054824829, - "logps/chosen": -476.5001220703125, - "logps/rejected": -2731.689697265625, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7684448957443237, - "rewards/margins": 26.959924697875977, - "rewards/rejected": -28.728368759155273, - "step": 866 - }, - { - "epoch": 0.33, - "learning_rate": 1.5440556228532168e-05, - "logits/chosen": -6.126350402832031, - "logits/rejected": -1.2156282663345337, - "logps/chosen": -339.02679443359375, - "logps/rejected": -1959.933837890625, - "loss": 0.0082, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2154022455215454, - "rewards/margins": 57.13853454589844, - "rewards/rejected": -55.923133850097656, - "step": 867 - }, - { - "epoch": 0.33, - "learning_rate": 1.5430548756316127e-05, - "logits/chosen": -2.161606550216675, - "logits/rejected": -1.638352870941162, - "logps/chosen": -212.90170288085938, - "logps/rejected": -642.54296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2382782697677612, - "rewards/margins": 27.210786819458008, - "rewards/rejected": -25.972509384155273, - "step": 868 - }, - { - "epoch": 0.33, - "learning_rate": 1.5420533564724495e-05, - "logits/chosen": -1.163578748703003, - "logits/rejected": -4.60414981842041, - "logps/chosen": -318.51922607421875, - "logps/rejected": -295.2796630859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.060522437095642, - "rewards/margins": 12.193768501281738, - "rewards/rejected": -13.254290580749512, - "step": 869 - }, - { - "epoch": 0.33, - "learning_rate": 1.5410510667993596e-05, - "logits/chosen": -3.301091432571411, - "logits/rejected": -5.634791374206543, - "logps/chosen": -293.32220458984375, - "logps/rejected": -232.11996459960938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3225158751010895, - "rewards/margins": 15.398974418640137, - "rewards/rejected": -15.076458930969238, - "step": 870 - }, - { - "epoch": 0.33, - "learning_rate": 1.5400480080370702e-05, - "logits/chosen": -2.285127878189087, - "logits/rejected": -3.123353958129883, - "logps/chosen": -311.93914794921875, - "logps/rejected": -465.8014831542969, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.374176025390625, - "rewards/margins": 14.945703506469727, - "rewards/rejected": -17.31987953186035, - "step": 871 - }, - { - "epoch": 0.33, - "learning_rate": 1.5390441816114022e-05, - "logits/chosen": -3.4536755084991455, - "logits/rejected": -4.241966247558594, - "logps/chosen": -147.86575317382812, - "logps/rejected": -264.36346435546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03914184495806694, - "rewards/margins": 9.763114929199219, - "rewards/rejected": -9.80225658416748, - "step": 872 - }, - { - "epoch": 0.33, - "learning_rate": 1.538039588949266e-05, - "logits/chosen": -1.6937050819396973, - "logits/rejected": -4.509896755218506, - "logps/chosen": -289.23486328125, - "logps/rejected": -402.58465576171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.64487624168396, - "rewards/margins": 19.313066482543945, - "rewards/rejected": -15.668190002441406, - "step": 873 - }, - { - "epoch": 0.33, - "learning_rate": 1.5370342314786638e-05, - "logits/chosen": -1.6857430934906006, - "logits/rejected": -4.449159145355225, - "logps/chosen": -293.3109130859375, - "logps/rejected": -397.1782531738281, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0577545166015625, - "rewards/margins": 16.325061798095703, - "rewards/rejected": -17.382816314697266, - "step": 874 - }, - { - "epoch": 0.33, - "learning_rate": 1.5360281106286823e-05, - "logits/chosen": -4.560848236083984, - "logits/rejected": -2.8477694988250732, - "logps/chosen": -306.7747497558594, - "logps/rejected": -683.18896484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.230841040611267, - "rewards/margins": 12.764978408813477, - "rewards/rejected": -13.995819091796875, - "step": 875 - }, - { - "epoch": 0.33, - "learning_rate": 1.5350212278294952e-05, - "logits/chosen": -4.318857669830322, - "logits/rejected": -0.9654077291488647, - "logps/chosen": -408.21978759765625, - "logps/rejected": -1189.36767578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5729736685752869, - "rewards/margins": 35.761436462402344, - "rewards/rejected": -36.33441162109375, - "step": 876 - }, - { - "epoch": 0.33, - "learning_rate": 1.534013584512359e-05, - "logits/chosen": -1.8323723077774048, - "logits/rejected": -5.895205974578857, - "logps/chosen": -366.6181640625, - "logps/rejected": -134.21282958984375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.10950317233800888, - "rewards/margins": 5.42671537399292, - "rewards/rejected": -5.317212104797363, - "step": 877 - }, - { - "epoch": 0.33, - "learning_rate": 1.5330051821096107e-05, - "logits/chosen": -5.22997522354126, - "logits/rejected": -4.446528911590576, - "logps/chosen": -240.1925048828125, - "logps/rejected": -640.077392578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3977386951446533, - "rewards/margins": 15.059002876281738, - "rewards/rejected": -13.661264419555664, - "step": 878 - }, - { - "epoch": 0.33, - "learning_rate": 1.531996022054666e-05, - "logits/chosen": -6.072815418243408, - "logits/rejected": -0.8177178502082825, - "logps/chosen": -503.99090576171875, - "logps/rejected": -1558.0277099609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1953338384628296, - "rewards/margins": 19.37139129638672, - "rewards/rejected": -20.56672477722168, - "step": 879 - }, - { - "epoch": 0.33, - "learning_rate": 1.530986105782019e-05, - "logits/chosen": -0.5721476674079895, - "logits/rejected": -2.7547388076782227, - "logps/chosen": -426.71087646484375, - "logps/rejected": -660.0233154296875, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.572759985923767, - "rewards/margins": 24.424423217773438, - "rewards/rejected": -25.997182846069336, - "step": 880 - }, - { - "epoch": 0.33, - "learning_rate": 1.5299754347272375e-05, - "logits/chosen": -4.013236045837402, - "logits/rejected": -7.850884437561035, - "logps/chosen": -360.09814453125, - "logps/rejected": -155.79550170898438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5574615597724915, - "rewards/margins": 9.282767295837402, - "rewards/rejected": -8.725305557250977, - "step": 881 - }, - { - "epoch": 0.33, - "learning_rate": 1.5289640103269626e-05, - "logits/chosen": -1.9382398128509521, - "logits/rejected": -0.8236268758773804, - "logps/chosen": -414.5980224609375, - "logps/rejected": -919.985595703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.220330834388733, - "rewards/margins": 30.21649742126465, - "rewards/rejected": -31.43682861328125, - "step": 882 - }, - { - "epoch": 0.33, - "learning_rate": 1.5279518340189063e-05, - "logits/chosen": -8.336654663085938, - "logits/rejected": -3.1586127281188965, - "logps/chosen": -374.1446533203125, - "logps/rejected": -5130.20947265625, - "loss": 0.0134, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17225037515163422, - "rewards/margins": 37.922576904296875, - "rewards/rejected": -38.094825744628906, - "step": 883 - }, - { - "epoch": 0.33, - "learning_rate": 1.5269389072418488e-05, - "logits/chosen": -1.5406309366226196, - "logits/rejected": -3.406872510910034, - "logps/chosen": -318.64105224609375, - "logps/rejected": -257.80999755859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5805420279502869, - "rewards/margins": 12.047988891601562, - "rewards/rejected": -11.467447280883789, - "step": 884 - }, - { - "epoch": 0.33, - "learning_rate": 1.5259252314356387e-05, - "logits/chosen": -1.4499844312667847, - "logits/rejected": -3.7957615852355957, - "logps/chosen": -195.1678009033203, - "logps/rejected": -215.16415405273438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6485443115234375, - "rewards/margins": 8.04131031036377, - "rewards/rejected": -8.689854621887207, - "step": 885 - }, - { - "epoch": 0.33, - "learning_rate": 1.5249108080411876e-05, - "logits/chosen": -6.908774375915527, - "logits/rejected": -2.440603256225586, - "logps/chosen": -356.6402587890625, - "logps/rejected": -1403.4959716796875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.18400268256664276, - "rewards/margins": 19.789093017578125, - "rewards/rejected": -19.605091094970703, - "step": 886 - }, - { - "epoch": 0.34, - "learning_rate": 1.5238956385004703e-05, - "logits/chosen": -7.209001541137695, - "logits/rejected": -1.7143363952636719, - "logps/chosen": -451.779052734375, - "logps/rejected": -1665.705078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.13414612412452698, - "rewards/margins": 12.603232383728027, - "rewards/rejected": -12.737378120422363, - "step": 887 - }, - { - "epoch": 0.34, - "learning_rate": 1.5228797242565229e-05, - "logits/chosen": -0.9409687519073486, - "logits/rejected": -5.026749134063721, - "logps/chosen": -352.6836853027344, - "logps/rejected": -199.2499542236328, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6768218874931335, - "rewards/margins": 6.417109966278076, - "rewards/rejected": -7.093931674957275, - "step": 888 - }, - { - "epoch": 0.34, - "learning_rate": 1.5218630667534391e-05, - "logits/chosen": -5.383164882659912, - "logits/rejected": -3.9134581089019775, - "logps/chosen": -219.7340545654297, - "logps/rejected": -907.1258544921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5143997073173523, - "rewards/margins": 26.90420150756836, - "rewards/rejected": -26.389801025390625, - "step": 889 - }, - { - "epoch": 0.34, - "learning_rate": 1.5208456674363693e-05, - "logits/chosen": -6.853000640869141, - "logits/rejected": -2.1987650394439697, - "logps/chosen": -297.2287902832031, - "logps/rejected": -1069.776123046875, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8401458859443665, - "rewards/margins": 10.803359985351562, - "rewards/rejected": -11.643506050109863, - "step": 890 - }, - { - "epoch": 0.34, - "learning_rate": 1.5198275277515195e-05, - "logits/chosen": -1.300344467163086, - "logits/rejected": -4.986264228820801, - "logps/chosen": -596.2808227539062, - "logps/rejected": -244.8779296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7883362770080566, - "rewards/margins": 7.609256267547607, - "rewards/rejected": -10.397592544555664, - "step": 891 - }, - { - "epoch": 0.34, - "learning_rate": 1.5188086491461467e-05, - "logits/chosen": -4.239584445953369, - "logits/rejected": -1.7836834192276, - "logps/chosen": -360.65057373046875, - "logps/rejected": -1129.69189453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.06791992485523224, - "rewards/margins": 22.494922637939453, - "rewards/rejected": -22.427001953125, - "step": 892 - }, - { - "epoch": 0.34, - "learning_rate": 1.5177890330685588e-05, - "logits/chosen": -2.4390780925750732, - "logits/rejected": -3.3958945274353027, - "logps/chosen": -213.98358154296875, - "logps/rejected": -377.6634826660156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0452606678009033, - "rewards/margins": 13.67615032196045, - "rewards/rejected": -12.630889892578125, - "step": 893 - }, - { - "epoch": 0.34, - "learning_rate": 1.5167686809681117e-05, - "logits/chosen": -0.5720546841621399, - "logits/rejected": -5.236979961395264, - "logps/chosen": -435.97625732421875, - "logps/rejected": -291.8106384277344, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.778063952922821, - "rewards/margins": 14.139485359191895, - "rewards/rejected": -14.917549133300781, - "step": 894 - }, - { - "epoch": 0.34, - "learning_rate": 1.5157475942952085e-05, - "logits/chosen": -0.28485938906669617, - "logits/rejected": -4.179095268249512, - "logps/chosen": -409.39910888671875, - "logps/rejected": -285.3432922363281, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6145049929618835, - "rewards/margins": 17.049488067626953, - "rewards/rejected": -16.434982299804688, - "step": 895 - }, - { - "epoch": 0.34, - "learning_rate": 1.5147257745012956e-05, - "logits/chosen": -7.216695308685303, - "logits/rejected": -3.152127981185913, - "logps/chosen": -649.3301391601562, - "logps/rejected": -3019.4345703125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.742706298828125, - "rewards/margins": 50.086116790771484, - "rewards/rejected": -49.34341049194336, - "step": 896 - }, - { - "epoch": 0.34, - "learning_rate": 1.5137032230388613e-05, - "logits/chosen": -2.763584852218628, - "logits/rejected": -2.677694320678711, - "logps/chosen": -426.7242126464844, - "logps/rejected": -796.480224609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5464508533477783, - "rewards/margins": 21.998666763305664, - "rewards/rejected": -20.45221519470215, - "step": 897 - }, - { - "epoch": 0.34, - "learning_rate": 1.5126799413614346e-05, - "logits/chosen": -5.893125534057617, - "logits/rejected": -1.8952137231826782, - "logps/chosen": -369.0450744628906, - "logps/rejected": -1704.76806640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0026336670853197575, - "rewards/margins": 34.43241500854492, - "rewards/rejected": -34.4350471496582, - "step": 898 - }, - { - "epoch": 0.34, - "learning_rate": 1.5116559309235825e-05, - "logits/chosen": -7.13116455078125, - "logits/rejected": -1.3636746406555176, - "logps/chosen": -255.88970947265625, - "logps/rejected": -1494.09716796875, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4472412168979645, - "rewards/margins": 22.304271697998047, - "rewards/rejected": -21.857030868530273, - "step": 899 - }, - { - "epoch": 0.34, - "learning_rate": 1.510631193180907e-05, - "logits/chosen": -1.4112637042999268, - "logits/rejected": -5.231418609619141, - "logps/chosen": -205.43472290039062, - "logps/rejected": -152.79437255859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3796372413635254, - "rewards/margins": 8.962566375732422, - "rewards/rejected": -11.342203140258789, - "step": 900 - }, - { - "epoch": 0.34, - "learning_rate": 1.5096057295900455e-05, - "logits/chosen": -3.182907819747925, - "logits/rejected": -1.8152530193328857, - "logps/chosen": -319.88482666015625, - "logps/rejected": -706.4393310546875, - "loss": 0.0043, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.600274562835693, - "rewards/margins": 18.19818687438965, - "rewards/rejected": -22.7984619140625, - "step": 901 - }, - { - "epoch": 0.34, - "learning_rate": 1.5085795416086655e-05, - "logits/chosen": -2.2809267044067383, - "logits/rejected": -3.9823617935180664, - "logps/chosen": -408.90631103515625, - "logps/rejected": -171.31219482421875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.366802930831909, - "rewards/margins": 13.005465507507324, - "rewards/rejected": -10.638662338256836, - "step": 902 - }, - { - "epoch": 0.34, - "learning_rate": 1.5075526306954653e-05, - "logits/chosen": -0.6451720595359802, - "logits/rejected": -1.6351882219314575, - "logps/chosen": -319.67779541015625, - "logps/rejected": -580.7993774414062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.495828241109848, - "rewards/margins": 23.76743507385254, - "rewards/rejected": -23.2716064453125, - "step": 903 - }, - { - "epoch": 0.34, - "learning_rate": 1.5065249983101706e-05, - "logits/chosen": -1.29038667678833, - "logits/rejected": -3.3314504623413086, - "logps/chosen": -122.61613464355469, - "logps/rejected": -237.10507202148438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3837051391601562, - "rewards/margins": 13.999025344848633, - "rewards/rejected": -12.615320205688477, - "step": 904 - }, - { - "epoch": 0.34, - "learning_rate": 1.5054966459135323e-05, - "logits/chosen": -5.725091934204102, - "logits/rejected": -1.646787166595459, - "logps/chosen": -381.50042724609375, - "logps/rejected": -1168.8607177734375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.819500684738159, - "rewards/margins": 9.238250732421875, - "rewards/rejected": -13.057751655578613, - "step": 905 - }, - { - "epoch": 0.34, - "learning_rate": 1.5044675749673254e-05, - "logits/chosen": -2.4005472660064697, - "logits/rejected": -0.9966301918029785, - "logps/chosen": -537.9364624023438, - "logps/rejected": -1310.579833984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.24152831733226776, - "rewards/margins": 37.50102615356445, - "rewards/rejected": -37.259498596191406, - "step": 906 - }, - { - "epoch": 0.34, - "learning_rate": 1.5034377869343453e-05, - "logits/chosen": -1.9725167751312256, - "logits/rejected": -4.97484827041626, - "logps/chosen": -432.0873718261719, - "logps/rejected": -546.626220703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.2290711402893066, - "rewards/margins": 20.728271484375, - "rewards/rejected": -18.49920082092285, - "step": 907 - }, - { - "epoch": 0.34, - "learning_rate": 1.5024072832784078e-05, - "logits/chosen": -0.5373910665512085, - "logits/rejected": -4.4776997566223145, - "logps/chosen": -585.2174682617188, - "logps/rejected": -292.01470947265625, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1195862293243408, - "rewards/margins": 12.99110221862793, - "rewards/rejected": -11.871516227722168, - "step": 908 - }, - { - "epoch": 0.34, - "learning_rate": 1.5013760654643456e-05, - "logits/chosen": -1.7978912591934204, - "logits/rejected": -1.3184561729431152, - "logps/chosen": -410.81988525390625, - "logps/rejected": -852.6058349609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6353393793106079, - "rewards/margins": 29.685266494750977, - "rewards/rejected": -30.320606231689453, - "step": 909 - }, - { - "epoch": 0.34, - "learning_rate": 1.5003441349580059e-05, - "logits/chosen": -1.2659696340560913, - "logits/rejected": -4.169007301330566, - "logps/chosen": -215.84915161132812, - "logps/rejected": -122.3460922241211, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.392657518386841, - "rewards/margins": 10.321593284606934, - "rewards/rejected": -7.928936004638672, - "step": 910 - }, - { - "epoch": 0.34, - "learning_rate": 1.49931149322625e-05, - "logits/chosen": -1.8287544250488281, - "logits/rejected": -5.199499607086182, - "logps/chosen": -188.19122314453125, - "logps/rejected": -197.75460815429688, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1947342157363892, - "rewards/margins": 11.622724533081055, - "rewards/rejected": -12.817459106445312, - "step": 911 - }, - { - "epoch": 0.34, - "learning_rate": 1.4982781417369496e-05, - "logits/chosen": -1.683445692062378, - "logits/rejected": -1.373482346534729, - "logps/chosen": -213.93756103515625, - "logps/rejected": -313.6580810546875, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0144928693771362, - "rewards/margins": 5.595654010772705, - "rewards/rejected": -6.610146999359131, - "step": 912 - }, - { - "epoch": 0.35, - "learning_rate": 1.4972440819589853e-05, - "logits/chosen": -2.609800100326538, - "logits/rejected": -2.061066150665283, - "logps/chosen": -396.8770446777344, - "logps/rejected": -1190.8271484375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.94754958152771, - "rewards/margins": 47.17201232910156, - "rewards/rejected": -44.224464416503906, - "step": 913 - }, - { - "epoch": 0.35, - "learning_rate": 1.4962093153622445e-05, - "logits/chosen": -3.6813340187072754, - "logits/rejected": -1.8329112529754639, - "logps/chosen": -204.87258911132812, - "logps/rejected": -701.8762817382812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22202758491039276, - "rewards/margins": 18.8754825592041, - "rewards/rejected": -18.65345573425293, - "step": 914 - }, - { - "epoch": 0.35, - "learning_rate": 1.4951738434176197e-05, - "logits/chosen": -5.56651496887207, - "logits/rejected": -2.3376569747924805, - "logps/chosen": -290.3860168457031, - "logps/rejected": -1231.68115234375, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2493499517440796, - "rewards/margins": 37.84587478637695, - "rewards/rejected": -36.59652328491211, - "step": 915 - }, - { - "epoch": 0.35, - "learning_rate": 1.4941376675970058e-05, - "logits/chosen": -4.7152862548828125, - "logits/rejected": -5.296983242034912, - "logps/chosen": -425.88177490234375, - "logps/rejected": -699.2251586914062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19287720322608948, - "rewards/margins": 19.27280616760254, - "rewards/rejected": -19.465682983398438, - "step": 916 - }, - { - "epoch": 0.35, - "learning_rate": 1.4931007893732981e-05, - "logits/chosen": -6.687345504760742, - "logits/rejected": -0.7276544570922852, - "logps/chosen": -595.75537109375, - "logps/rejected": -3170.13330078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.081939697265625, - "rewards/margins": 36.84430694580078, - "rewards/rejected": -36.926246643066406, - "step": 917 - }, - { - "epoch": 0.35, - "learning_rate": 1.4920632102203902e-05, - "logits/chosen": -1.3596229553222656, - "logits/rejected": -2.3815207481384277, - "logps/chosen": -461.3852233886719, - "logps/rejected": -695.6019287109375, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0194915533065796, - "rewards/margins": 24.222787857055664, - "rewards/rejected": -23.203296661376953, - "step": 918 - }, - { - "epoch": 0.35, - "learning_rate": 1.4910249316131728e-05, - "logits/chosen": -3.2185275554656982, - "logits/rejected": -6.7604498863220215, - "logps/chosen": -408.13690185546875, - "logps/rejected": -328.53985595703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0375123023986816, - "rewards/margins": 21.370906829833984, - "rewards/rejected": -19.33339500427246, - "step": 919 - }, - { - "epoch": 0.35, - "learning_rate": 1.48998595502753e-05, - "logits/chosen": -3.110208749771118, - "logits/rejected": -1.793789267539978, - "logps/chosen": -1017.9207153320312, - "logps/rejected": -1748.8553466796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.784466505050659, - "rewards/margins": 30.233917236328125, - "rewards/rejected": -27.449451446533203, - "step": 920 - }, - { - "epoch": 0.35, - "learning_rate": 1.4889462819403381e-05, - "logits/chosen": -0.8072516918182373, - "logits/rejected": -1.5271313190460205, - "logps/chosen": -379.4502258300781, - "logps/rejected": -606.3563232421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1223907470703125, - "rewards/margins": 16.298025131225586, - "rewards/rejected": -16.175634384155273, - "step": 921 - }, - { - "epoch": 0.35, - "learning_rate": 1.4879059138294647e-05, - "logits/chosen": -1.9735182523727417, - "logits/rejected": -3.439201831817627, - "logps/chosen": -188.31427001953125, - "logps/rejected": -315.07080078125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4043686389923096, - "rewards/margins": 13.173322677612305, - "rewards/rejected": -11.768954277038574, - "step": 922 - }, - { - "epoch": 0.35, - "learning_rate": 1.4868648521737638e-05, - "logits/chosen": -3.0013887882232666, - "logits/rejected": -0.6388388872146606, - "logps/chosen": -289.14630126953125, - "logps/rejected": -698.8761596679688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.071746826171875, - "rewards/margins": 17.03790855407715, - "rewards/rejected": -16.966161727905273, - "step": 923 - }, - { - "epoch": 0.35, - "learning_rate": 1.4858230984530759e-05, - "logits/chosen": -1.931453824043274, - "logits/rejected": -5.33122444152832, - "logps/chosen": -447.85028076171875, - "logps/rejected": -183.2857666015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.8568055629730225, - "rewards/margins": 15.836688995361328, - "rewards/rejected": -12.979883193969727, - "step": 924 - }, - { - "epoch": 0.35, - "learning_rate": 1.4847806541482253e-05, - "logits/chosen": -5.970593452453613, - "logits/rejected": -6.6540069580078125, - "logps/chosen": -1090.04345703125, - "logps/rejected": -931.0345458984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.502001941204071, - "rewards/margins": 37.88551712036133, - "rewards/rejected": -38.38751983642578, - "step": 925 - }, - { - "epoch": 0.35, - "learning_rate": 1.4837375207410182e-05, - "logits/chosen": -4.1180877685546875, - "logits/rejected": -1.226327657699585, - "logps/chosen": -301.70391845703125, - "logps/rejected": -892.9058227539062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9199493527412415, - "rewards/margins": 25.048213958740234, - "rewards/rejected": -24.128265380859375, - "step": 926 - }, - { - "epoch": 0.35, - "learning_rate": 1.4826936997142399e-05, - "logits/chosen": -6.920558452606201, - "logits/rejected": -3.3987865447998047, - "logps/chosen": -251.2444610595703, - "logps/rejected": -949.2733154296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5944473147392273, - "rewards/margins": 9.736790657043457, - "rewards/rejected": -10.33123779296875, - "step": 927 - }, - { - "epoch": 0.35, - "learning_rate": 1.4816491925516528e-05, - "logits/chosen": -4.018407821655273, - "logits/rejected": -1.6899296045303345, - "logps/chosen": -297.4830017089844, - "logps/rejected": -759.1002197265625, - "loss": 0.0071, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.369439721107483, - "rewards/margins": 22.09815216064453, - "rewards/rejected": -20.72871208190918, - "step": 928 - }, - { - "epoch": 0.35, - "learning_rate": 1.4806040007379955e-05, - "logits/chosen": -0.9687780141830444, - "logits/rejected": -1.6776689291000366, - "logps/chosen": -215.95838928222656, - "logps/rejected": -287.6887512207031, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4471633434295654, - "rewards/margins": 9.045924186706543, - "rewards/rejected": -11.493087768554688, - "step": 929 - }, - { - "epoch": 0.35, - "learning_rate": 1.4795581257589793e-05, - "logits/chosen": -1.635133981704712, - "logits/rejected": -1.5519343614578247, - "logps/chosen": -346.2505798339844, - "logps/rejected": -531.6761474609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7459839582443237, - "rewards/margins": 11.518585205078125, - "rewards/rejected": -9.772601127624512, - "step": 930 - }, - { - "epoch": 0.35, - "learning_rate": 1.4785115691012866e-05, - "logits/chosen": -6.720938205718994, - "logits/rejected": -4.589206695556641, - "logps/chosen": -366.4517822265625, - "logps/rejected": -3086.33544921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.02225646935403347, - "rewards/margins": 16.228286743164062, - "rewards/rejected": -16.206029891967773, - "step": 931 - }, - { - "epoch": 0.35, - "learning_rate": 1.4774643322525691e-05, - "logits/chosen": -3.3651816844940186, - "logits/rejected": -2.553265333175659, - "logps/chosen": -431.291748046875, - "logps/rejected": -838.2828369140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0520081520080566, - "rewards/margins": 23.076038360595703, - "rewards/rejected": -21.024030685424805, - "step": 932 - }, - { - "epoch": 0.35, - "learning_rate": 1.4764164167014451e-05, - "logits/chosen": 0.10280583798885345, - "logits/rejected": -4.846896648406982, - "logps/chosen": -348.87847900390625, - "logps/rejected": -130.92935180664062, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8069671988487244, - "rewards/margins": 6.768592834472656, - "rewards/rejected": -7.575560092926025, - "step": 933 - }, - { - "epoch": 0.35, - "learning_rate": 1.4753678239374977e-05, - "logits/chosen": -0.31799206137657166, - "logits/rejected": -1.040506362915039, - "logps/chosen": -279.45904541015625, - "logps/rejected": -648.6900024414062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9013031125068665, - "rewards/margins": 30.770166397094727, - "rewards/rejected": -31.67146873474121, - "step": 934 - }, - { - "epoch": 0.35, - "learning_rate": 1.4743185554512725e-05, - "logits/chosen": -2.9981422424316406, - "logits/rejected": -1.7812275886535645, - "logps/chosen": -246.555908203125, - "logps/rejected": -594.026123046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.33099365234375, - "rewards/margins": 16.868213653564453, - "rewards/rejected": -15.537219047546387, - "step": 935 - }, - { - "epoch": 0.35, - "learning_rate": 1.473268612734276e-05, - "logits/chosen": -1.5450061559677124, - "logits/rejected": -3.714916944503784, - "logps/chosen": -478.5575866699219, - "logps/rejected": -498.416259765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5783722400665283, - "rewards/margins": 14.650619506835938, - "rewards/rejected": -13.072247505187988, - "step": 936 - }, - { - "epoch": 0.35, - "learning_rate": 1.4722179972789725e-05, - "logits/chosen": -2.0316643714904785, - "logits/rejected": -2.123488187789917, - "logps/chosen": -295.619384765625, - "logps/rejected": -637.349609375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.355743408203125, - "rewards/margins": 24.22283935546875, - "rewards/rejected": -23.867095947265625, - "step": 937 - }, - { - "epoch": 0.35, - "learning_rate": 1.4711667105787835e-05, - "logits/chosen": -1.4225324392318726, - "logits/rejected": -3.482154369354248, - "logps/chosen": -215.02474975585938, - "logps/rejected": -212.89523315429688, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.521136462688446, - "rewards/margins": 11.509700775146484, - "rewards/rejected": -10.988564491271973, - "step": 938 - }, - { - "epoch": 0.36, - "learning_rate": 1.4701147541280836e-05, - "logits/chosen": -1.6755808591842651, - "logits/rejected": -5.430564880371094, - "logps/chosen": -434.90484619140625, - "logps/rejected": -169.04161071777344, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.110434055328369, - "rewards/margins": 10.943849563598633, - "rewards/rejected": -8.833415985107422, - "step": 939 - }, - { - "epoch": 0.36, - "learning_rate": 1.4690621294222002e-05, - "logits/chosen": -7.01036262512207, - "logits/rejected": -1.5060429573059082, - "logps/chosen": -526.5580444335938, - "logps/rejected": -2538.252197265625, - "loss": 0.0115, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.558624267578125, - "rewards/margins": 14.906890869140625, - "rewards/rejected": -14.3482666015625, - "step": 940 - }, - { - "epoch": 0.36, - "learning_rate": 1.4680088379574104e-05, - "logits/chosen": -0.7357252240180969, - "logits/rejected": -4.1422600746154785, - "logps/chosen": -311.8200378417969, - "logps/rejected": -212.6060791015625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.20148010551929474, - "rewards/margins": 11.829365730285645, - "rewards/rejected": -11.627885818481445, - "step": 941 - }, - { - "epoch": 0.36, - "learning_rate": 1.466954881230939e-05, - "logits/chosen": -4.7827348709106445, - "logits/rejected": -2.1298561096191406, - "logps/chosen": -520.8544311523438, - "logps/rejected": -1125.160888671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.164770483970642, - "rewards/margins": 18.278797149658203, - "rewards/rejected": -17.11402702331543, - "step": 942 - }, - { - "epoch": 0.36, - "learning_rate": 1.4659002607409565e-05, - "logits/chosen": -6.292568683624268, - "logits/rejected": -2.7102150917053223, - "logps/chosen": -249.19680786132812, - "logps/rejected": -783.9668579101562, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.285491943359375, - "rewards/margins": 7.244623184204102, - "rewards/rejected": -8.530115127563477, - "step": 943 - }, - { - "epoch": 0.36, - "learning_rate": 1.4648449779865763e-05, - "logits/chosen": -2.410153865814209, - "logits/rejected": -2.4254887104034424, - "logps/chosen": -198.35765075683594, - "logps/rejected": -432.39691162109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4299026429653168, - "rewards/margins": 26.63361358642578, - "rewards/rejected": -26.203710556030273, - "step": 944 - }, - { - "epoch": 0.36, - "learning_rate": 1.4637890344678547e-05, - "logits/chosen": -6.3757243156433105, - "logits/rejected": -2.369504690170288, - "logps/chosen": -269.0664978027344, - "logps/rejected": -1117.068115234375, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8099884390830994, - "rewards/margins": 29.432205200195312, - "rewards/rejected": -30.2421932220459, - "step": 945 - }, - { - "epoch": 0.36, - "learning_rate": 1.4627324316857858e-05, - "logits/chosen": -2.138131618499756, - "logits/rejected": -2.4709665775299072, - "logps/chosen": -457.7879638671875, - "logps/rejected": -762.3670654296875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.613568067550659, - "rewards/margins": 23.193952560424805, - "rewards/rejected": -25.807519912719727, - "step": 946 - }, - { - "epoch": 0.36, - "learning_rate": 1.4616751711423016e-05, - "logits/chosen": -2.7858967781066895, - "logits/rejected": -4.5120110511779785, - "logps/chosen": -278.55645751953125, - "logps/rejected": -296.0784606933594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4595704078674316, - "rewards/margins": 9.18227767944336, - "rewards/rejected": -12.641847610473633, - "step": 947 - }, - { - "epoch": 0.36, - "learning_rate": 1.4606172543402685e-05, - "logits/chosen": -3.492466688156128, - "logits/rejected": -1.5979301929473877, - "logps/chosen": -241.41995239257812, - "logps/rejected": -610.647705078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16147156059741974, - "rewards/margins": 13.660745620727539, - "rewards/rejected": -13.822216987609863, - "step": 948 - }, - { - "epoch": 0.36, - "learning_rate": 1.4595586827834869e-05, - "logits/chosen": -3.2304980754852295, - "logits/rejected": -2.609030246734619, - "logps/chosen": -297.6733093261719, - "logps/rejected": -682.7982177734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.6610352993011475, - "rewards/margins": 19.475616455078125, - "rewards/rejected": -16.8145809173584, - "step": 949 - }, - { - "epoch": 0.36, - "learning_rate": 1.4584994579766865e-05, - "logits/chosen": -1.2998052835464478, - "logits/rejected": -2.896683692932129, - "logps/chosen": -209.86712646484375, - "logps/rejected": -536.0645751953125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18677063286304474, - "rewards/margins": 22.205657958984375, - "rewards/rejected": -22.39242935180664, - "step": 950 - }, - { - "epoch": 0.36, - "learning_rate": 1.457439581425526e-05, - "logits/chosen": -1.3030797243118286, - "logits/rejected": -3.43355655670166, - "logps/chosen": -1010.2011108398438, - "logps/rejected": -1272.205322265625, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.618988037109375, - "rewards/margins": 48.50171661376953, - "rewards/rejected": -47.882728576660156, - "step": 951 - }, - { - "epoch": 0.36, - "learning_rate": 1.4563790546365914e-05, - "logits/chosen": -6.95395040512085, - "logits/rejected": -5.12584924697876, - "logps/chosen": -363.02960205078125, - "logps/rejected": -1082.7646484375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.553576648235321, - "rewards/margins": 16.701562881469727, - "rewards/rejected": -17.25514030456543, - "step": 952 - }, - { - "epoch": 0.36, - "learning_rate": 1.455317879117392e-05, - "logits/chosen": -2.4081366062164307, - "logits/rejected": -5.528341770172119, - "logps/chosen": -661.9794921875, - "logps/rejected": -410.42822265625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04556884989142418, - "rewards/margins": 23.448814392089844, - "rewards/rejected": -23.494382858276367, - "step": 953 - }, - { - "epoch": 0.36, - "learning_rate": 1.4542560563763594e-05, - "logits/chosen": -4.397747993469238, - "logits/rejected": -2.2336838245391846, - "logps/chosen": -296.47686767578125, - "logps/rejected": -681.2279052734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5827087759971619, - "rewards/margins": 13.103344917297363, - "rewards/rejected": -13.686053276062012, - "step": 954 - }, - { - "epoch": 0.36, - "learning_rate": 1.4531935879228457e-05, - "logits/chosen": -3.6849725246429443, - "logits/rejected": -2.4399795532226562, - "logps/chosen": -371.1570129394531, - "logps/rejected": -1030.7115478515625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7608398795127869, - "rewards/margins": 33.75149154663086, - "rewards/rejected": -32.99065017700195, - "step": 955 - }, - { - "epoch": 0.36, - "learning_rate": 1.4521304752671209e-05, - "logits/chosen": -6.069145202636719, - "logits/rejected": -0.45738402009010315, - "logps/chosen": -653.0706787109375, - "logps/rejected": -2181.5947265625, - "loss": 0.002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.31999513506889343, - "rewards/margins": 24.724193572998047, - "rewards/rejected": -25.044189453125, - "step": 956 - }, - { - "epoch": 0.36, - "learning_rate": 1.4510667199203697e-05, - "logits/chosen": -2.513505220413208, - "logits/rejected": -5.294083595275879, - "logps/chosen": -209.6301727294922, - "logps/rejected": -215.5802459716797, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.209599256515503, - "rewards/margins": 12.343477249145508, - "rewards/rejected": -15.55307674407959, - "step": 957 - }, - { - "epoch": 0.36, - "learning_rate": 1.4500023233946907e-05, - "logits/chosen": -4.993988513946533, - "logits/rejected": -1.938533902168274, - "logps/chosen": -607.239990234375, - "logps/rejected": -2836.8291015625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08796997368335724, - "rewards/margins": 31.365739822387695, - "rewards/rejected": -31.453710556030273, - "step": 958 - }, - { - "epoch": 0.36, - "learning_rate": 1.4489372872030955e-05, - "logits/chosen": -2.3126866817474365, - "logits/rejected": -2.69526743888855, - "logps/chosen": -343.5238037109375, - "logps/rejected": -586.6883544921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.19913649559021, - "rewards/margins": 17.860000610351562, - "rewards/rejected": -15.66086483001709, - "step": 959 - }, - { - "epoch": 0.36, - "learning_rate": 1.4478716128595032e-05, - "logits/chosen": -1.4323378801345825, - "logits/rejected": -2.834601879119873, - "logps/chosen": -376.99737548828125, - "logps/rejected": -578.3153076171875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9643829464912415, - "rewards/margins": 18.42348289489746, - "rewards/rejected": -17.4591007232666, - "step": 960 - }, - { - "epoch": 0.36, - "learning_rate": 1.4468053018787402e-05, - "logits/chosen": -4.0952067375183105, - "logits/rejected": -3.5162758827209473, - "logps/chosen": -337.9957580566406, - "logps/rejected": -534.09716796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.296774387359619, - "rewards/margins": 9.224075317382812, - "rewards/rejected": -12.52085018157959, - "step": 961 - }, - { - "epoch": 0.36, - "learning_rate": 1.4457383557765385e-05, - "logits/chosen": -0.9585447907447815, - "logits/rejected": -2.002443313598633, - "logps/chosen": -421.5686340332031, - "logps/rejected": -514.7720947265625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3133544921875, - "rewards/margins": 6.656476020812988, - "rewards/rejected": -5.343121528625488, - "step": 962 - }, - { - "epoch": 0.36, - "learning_rate": 1.4446707760695324e-05, - "logits/chosen": -3.9082789421081543, - "logits/rejected": -3.5687286853790283, - "logps/chosen": -620.8314208984375, - "logps/rejected": -834.8570556640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3593383729457855, - "rewards/margins": 8.620434761047363, - "rewards/rejected": -8.97977352142334, - "step": 963 - }, - { - "epoch": 0.36, - "learning_rate": 1.4436025642752575e-05, - "logits/chosen": -0.7821770906448364, - "logits/rejected": -5.707052707672119, - "logps/chosen": -556.9736328125, - "logps/rejected": -255.58609008789062, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.815197765827179, - "rewards/margins": 13.031036376953125, - "rewards/rejected": -13.846234321594238, - "step": 964 - }, - { - "epoch": 0.36, - "learning_rate": 1.442533721912147e-05, - "logits/chosen": -2.7041664123535156, - "logits/rejected": -1.294926643371582, - "logps/chosen": -354.6056213378906, - "logps/rejected": -690.2084350585938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8235321044921875, - "rewards/margins": 15.826376914978027, - "rewards/rejected": -15.00284481048584, - "step": 965 - }, - { - "epoch": 0.37, - "learning_rate": 1.4414642504995315e-05, - "logits/chosen": -0.09591107815504074, - "logits/rejected": -4.0736985206604, - "logps/chosen": -337.44805908203125, - "logps/rejected": -151.2340545654297, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8167328238487244, - "rewards/margins": 5.849796295166016, - "rewards/rejected": -6.666529178619385, - "step": 966 - }, - { - "epoch": 0.37, - "learning_rate": 1.4403941515576344e-05, - "logits/chosen": -1.5191148519515991, - "logits/rejected": -2.1808273792266846, - "logps/chosen": -417.52838134765625, - "logps/rejected": -467.9444274902344, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.735278367996216, - "rewards/margins": 10.993579864501953, - "rewards/rejected": -13.72885799407959, - "step": 967 - }, - { - "epoch": 0.37, - "learning_rate": 1.4393234266075725e-05, - "logits/chosen": -0.5347993969917297, - "logits/rejected": -5.19551420211792, - "logps/chosen": -387.82623291015625, - "logps/rejected": -117.23458862304688, - "loss": 0.014, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.221930027008057, - "rewards/margins": 3.6562790870666504, - "rewards/rejected": -7.878209114074707, - "step": 968 - }, - { - "epoch": 0.37, - "learning_rate": 1.4382520771713517e-05, - "logits/chosen": -0.902773916721344, - "logits/rejected": -4.787004470825195, - "logps/chosen": -707.3365478515625, - "logps/rejected": -637.9859619140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.107715129852295, - "rewards/margins": 32.9320068359375, - "rewards/rejected": -38.03972244262695, - "step": 969 - }, - { - "epoch": 0.37, - "learning_rate": 1.4371801047718654e-05, - "logits/chosen": -0.14976456761360168, - "logits/rejected": -3.933645725250244, - "logps/chosen": -430.348876953125, - "logps/rejected": -554.0485229492188, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5648712515830994, - "rewards/margins": 31.153451919555664, - "rewards/rejected": -30.588581085205078, - "step": 970 - }, - { - "epoch": 0.37, - "learning_rate": 1.436107510932893e-05, - "logits/chosen": -6.935613632202148, - "logits/rejected": -1.1626415252685547, - "logps/chosen": -362.5122375488281, - "logps/rejected": -1783.176025390625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.943859815597534, - "rewards/margins": 42.49398422241211, - "rewards/rejected": -45.437843322753906, - "step": 971 - }, - { - "epoch": 0.37, - "learning_rate": 1.4350342971790979e-05, - "logits/chosen": -1.5910204648971558, - "logits/rejected": -3.897312879562378, - "logps/chosen": -324.562744140625, - "logps/rejected": -213.68722534179688, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03180847316980362, - "rewards/margins": 10.373655319213867, - "rewards/rejected": -10.405464172363281, - "step": 972 - }, - { - "epoch": 0.37, - "learning_rate": 1.4339604650360227e-05, - "logits/chosen": -5.742951393127441, - "logits/rejected": -0.9223275184631348, - "logps/chosen": -509.0324401855469, - "logps/rejected": -1585.996337890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.74833083152771, - "rewards/margins": 30.899234771728516, - "rewards/rejected": -27.150903701782227, - "step": 973 - }, - { - "epoch": 0.37, - "learning_rate": 1.4328860160300909e-05, - "logits/chosen": -0.5703563690185547, - "logits/rejected": -5.75612735748291, - "logps/chosen": -487.19647216796875, - "logps/rejected": -276.25714111328125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16993407905101776, - "rewards/margins": 15.153292655944824, - "rewards/rejected": -15.323226928710938, - "step": 974 - }, - { - "epoch": 0.37, - "learning_rate": 1.431810951688602e-05, - "logits/chosen": -1.1373450756072998, - "logits/rejected": -3.3359339237213135, - "logps/chosen": -286.3591003417969, - "logps/rejected": -313.1551818847656, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8635987043380737, - "rewards/margins": 10.04223346710205, - "rewards/rejected": -11.905832290649414, - "step": 975 - }, - { - "epoch": 0.37, - "learning_rate": 1.4307352735397307e-05, - "logits/chosen": -5.122059345245361, - "logits/rejected": -2.0352985858917236, - "logps/chosen": -237.29872131347656, - "logps/rejected": -876.7222900390625, - "loss": 0.0094, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5203689336776733, - "rewards/margins": 29.489091873168945, - "rewards/rejected": -31.00946044921875, - "step": 976 - }, - { - "epoch": 0.37, - "learning_rate": 1.4296589831125234e-05, - "logits/chosen": -4.747432231903076, - "logits/rejected": -6.8045196533203125, - "logps/chosen": -365.8202819824219, - "logps/rejected": -142.47076416015625, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.018310546875, - "rewards/margins": 4.613781929016113, - "rewards/rejected": -7.632092475891113, - "step": 977 - }, - { - "epoch": 0.37, - "learning_rate": 1.4285820819368976e-05, - "logits/chosen": -6.154963970184326, - "logits/rejected": -3.1258585453033447, - "logps/chosen": -824.7650146484375, - "logps/rejected": -2391.2109375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8550171256065369, - "rewards/margins": 30.587854385375977, - "rewards/rejected": -31.44287109375, - "step": 978 - }, - { - "epoch": 0.37, - "learning_rate": 1.4275045715436387e-05, - "logits/chosen": -4.106791019439697, - "logits/rejected": -2.1011226177215576, - "logps/chosen": -335.2095947265625, - "logps/rejected": -1172.172607421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4040100574493408, - "rewards/margins": 38.92131423950195, - "rewards/rejected": -37.517303466796875, - "step": 979 - }, - { - "epoch": 0.37, - "learning_rate": 1.4264264534643979e-05, - "logits/chosen": -0.14621688425540924, - "logits/rejected": -3.608797788619995, - "logps/chosen": -413.65118408203125, - "logps/rejected": -445.98480224609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.408059686422348, - "rewards/margins": 20.4057559967041, - "rewards/rejected": -19.997695922851562, - "step": 980 - }, - { - "epoch": 0.37, - "learning_rate": 1.4253477292316907e-05, - "logits/chosen": -8.106539726257324, - "logits/rejected": -0.5012695789337158, - "logps/chosen": -331.9234619140625, - "logps/rejected": -2335.183349609375, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.451135158538818, - "rewards/margins": 47.659584045410156, - "rewards/rejected": -52.1107177734375, - "step": 981 - }, - { - "epoch": 0.37, - "learning_rate": 1.4242684003788934e-05, - "logits/chosen": -6.169016361236572, - "logits/rejected": -0.5321062207221985, - "logps/chosen": -288.3209533691406, - "logps/rejected": -1377.708251953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8342193365097046, - "rewards/margins": 15.756065368652344, - "rewards/rejected": -17.59028434753418, - "step": 982 - }, - { - "epoch": 0.37, - "learning_rate": 1.4231884684402428e-05, - "logits/chosen": -2.3648436069488525, - "logits/rejected": -5.48659086227417, - "logps/chosen": -448.4335632324219, - "logps/rejected": -185.65428161621094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17282409965991974, - "rewards/margins": 12.49074649810791, - "rewards/rejected": -12.663570404052734, - "step": 983 - }, - { - "epoch": 0.37, - "learning_rate": 1.422107934950832e-05, - "logits/chosen": -6.388079643249512, - "logits/rejected": -3.9388558864593506, - "logps/chosen": -448.77496337890625, - "logps/rejected": -2308.2705078125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0009368896717205644, - "rewards/margins": 22.89708709716797, - "rewards/rejected": -22.89802360534668, - "step": 984 - }, - { - "epoch": 0.37, - "learning_rate": 1.4210268014466097e-05, - "logits/chosen": -8.243359565734863, - "logits/rejected": -1.9896653890609741, - "logps/chosen": -323.98236083984375, - "logps/rejected": -2849.650634765625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.463665723800659, - "rewards/margins": 20.485578536987305, - "rewards/rejected": -23.949243545532227, - "step": 985 - }, - { - "epoch": 0.37, - "learning_rate": 1.4199450694643777e-05, - "logits/chosen": -2.451887369155884, - "logits/rejected": -6.395621299743652, - "logps/chosen": -615.8948364257812, - "logps/rejected": -459.3419189453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.595251441001892, - "rewards/margins": 20.377262115478516, - "rewards/rejected": -21.97251319885254, - "step": 986 - }, - { - "epoch": 0.37, - "learning_rate": 1.418862740541788e-05, - "logits/chosen": -1.4531670808792114, - "logits/rejected": -2.44388484954834, - "logps/chosen": -255.3473663330078, - "logps/rejected": -294.8472595214844, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.987687826156616, - "rewards/margins": 5.740095138549805, - "rewards/rejected": -9.727783203125, - "step": 987 - }, - { - "epoch": 0.37, - "learning_rate": 1.4177798162173412e-05, - "logits/chosen": -0.5569493174552917, - "logits/rejected": -2.8548967838287354, - "logps/chosen": -219.53262329101562, - "logps/rejected": -357.0575256347656, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.318988084793091, - "rewards/margins": 18.057544708251953, - "rewards/rejected": -15.738555908203125, - "step": 988 - }, - { - "epoch": 0.37, - "learning_rate": 1.4166962980303849e-05, - "logits/chosen": -5.243434906005859, - "logits/rejected": -0.8927885293960571, - "logps/chosen": -329.51544189453125, - "logps/rejected": -1225.217529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.295617699623108, - "rewards/margins": 16.486682891845703, - "rewards/rejected": -15.191064834594727, - "step": 989 - }, - { - "epoch": 0.37, - "learning_rate": 1.4156121875211101e-05, - "logits/chosen": -0.9568042159080505, - "logits/rejected": -2.723543405532837, - "logps/chosen": -238.6383514404297, - "logps/rejected": -468.4671936035156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.373133897781372, - "rewards/margins": 20.20663070678711, - "rewards/rejected": -18.83349609375, - "step": 990 - }, - { - "epoch": 0.37, - "learning_rate": 1.4145274862305497e-05, - "logits/chosen": -0.9782654643058777, - "logits/rejected": -0.5671600699424744, - "logps/chosen": -852.022705078125, - "logps/rejected": -1177.446533203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.574310302734375, - "rewards/margins": 12.918188095092773, - "rewards/rejected": -16.49249839782715, - "step": 991 - }, - { - "epoch": 0.38, - "learning_rate": 1.4134421957005775e-05, - "logits/chosen": -2.2668161392211914, - "logits/rejected": -1.3725355863571167, - "logps/chosen": -302.86822509765625, - "logps/rejected": -704.8399047851562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9383911490440369, - "rewards/margins": 32.8414421081543, - "rewards/rejected": -31.903051376342773, - "step": 992 - }, - { - "epoch": 0.38, - "learning_rate": 1.4123563174739036e-05, - "logits/chosen": -3.512396812438965, - "logits/rejected": -1.4329320192337036, - "logps/chosen": -481.3402404785156, - "logps/rejected": -1597.1953125, - "loss": 0.002, - "rewards/accuracies": 1.0, - "rewards/chosen": 4.205356121063232, - "rewards/margins": 51.8219108581543, - "rewards/rejected": -47.616554260253906, - "step": 993 - }, - { - "epoch": 0.38, - "learning_rate": 1.411269853094074e-05, - "logits/chosen": -5.9788970947265625, - "logits/rejected": -1.286310076713562, - "logps/chosen": -281.9703063964844, - "logps/rejected": -876.2073974609375, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.726829528808594, - "rewards/margins": 5.218897819519043, - "rewards/rejected": -10.945727348327637, - "step": 994 - }, - { - "epoch": 0.38, - "learning_rate": 1.4101828041054682e-05, - "logits/chosen": -2.1339235305786133, - "logits/rejected": -3.4233672618865967, - "logps/chosen": -183.10403442382812, - "logps/rejected": -283.5545654296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7935165762901306, - "rewards/margins": 16.07659149169922, - "rewards/rejected": -15.283075332641602, - "step": 995 - }, - { - "epoch": 0.38, - "learning_rate": 1.4090951720532965e-05, - "logits/chosen": -0.33472585678100586, - "logits/rejected": -3.777862548828125, - "logps/chosen": -599.930419921875, - "logps/rejected": -327.8896484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.273584008216858, - "rewards/margins": 19.88855743408203, - "rewards/rejected": -18.614973068237305, - "step": 996 - }, - { - "epoch": 0.38, - "learning_rate": 1.4080069584835971e-05, - "logits/chosen": -0.9114305973052979, - "logits/rejected": -2.854731559753418, - "logps/chosen": -306.19171142578125, - "logps/rejected": -424.2364196777344, - "loss": 0.0036, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5611785650253296, - "rewards/margins": 24.57465171813965, - "rewards/rejected": -23.013473510742188, - "step": 997 - }, - { - "epoch": 0.38, - "learning_rate": 1.4069181649432363e-05, - "logits/chosen": -3.532780170440674, - "logits/rejected": -0.8168178796768188, - "logps/chosen": -463.908447265625, - "logps/rejected": -1024.0799560546875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1898010969161987, - "rewards/margins": 9.123968124389648, - "rewards/rejected": -10.313769340515137, - "step": 998 - }, - { - "epoch": 0.38, - "learning_rate": 1.4058287929799042e-05, - "logits/chosen": -3.7013845443725586, - "logits/rejected": -1.1599527597427368, - "logps/chosen": -219.0364990234375, - "logps/rejected": -566.4005126953125, - "loss": 0.0065, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1802003383636475, - "rewards/margins": 3.8754212856292725, - "rewards/rejected": -6.05562162399292, - "step": 999 - }, - { - "epoch": 0.38, - "learning_rate": 1.404738844142113e-05, - "logits/chosen": -2.4721274375915527, - "logits/rejected": -5.417307376861572, - "logps/chosen": -303.1116027832031, - "logps/rejected": -178.76156616210938, - "loss": 0.0116, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6826752424240112, - "rewards/margins": 9.157987594604492, - "rewards/rejected": -10.840662956237793, - "step": 1000 - }, - { - "epoch": 0.38, - "learning_rate": 1.4036483199791949e-05, - "logits/chosen": -3.0624146461486816, - "logits/rejected": -2.7537529468536377, - "logps/chosen": -600.91259765625, - "logps/rejected": -1203.921142578125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3818482160568237, - "rewards/margins": 36.98345184326172, - "rewards/rejected": -35.60160446166992, - "step": 1001 - }, - { - "epoch": 0.38, - "learning_rate": 1.4025572220412998e-05, - "logits/chosen": -7.292146682739258, - "logits/rejected": 0.09650199115276337, - "logps/chosen": -402.75775146484375, - "logps/rejected": -3615.43505859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9312225580215454, - "rewards/margins": 31.456052780151367, - "rewards/rejected": -30.524829864501953, - "step": 1002 - }, - { - "epoch": 0.38, - "learning_rate": 1.4014655518793936e-05, - "logits/chosen": -2.1712234020233154, - "logits/rejected": -1.6204249858856201, - "logps/chosen": -275.97052001953125, - "logps/rejected": -455.871337890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6495819091796875, - "rewards/margins": 11.01193904876709, - "rewards/rejected": -10.362357139587402, - "step": 1003 - }, - { - "epoch": 0.38, - "learning_rate": 1.4003733110452556e-05, - "logits/chosen": -1.4722992181777954, - "logits/rejected": 0.6475964784622192, - "logps/chosen": -315.2167663574219, - "logps/rejected": -1029.041015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3764129877090454, - "rewards/margins": 37.45291519165039, - "rewards/rejected": -36.07650375366211, - "step": 1004 - }, - { - "epoch": 0.38, - "learning_rate": 1.3992805010914754e-05, - "logits/chosen": -4.631802558898926, - "logits/rejected": -1.6748734712600708, - "logps/chosen": -187.38107299804688, - "logps/rejected": -687.3441162109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.601266384124756, - "rewards/margins": 23.725345611572266, - "rewards/rejected": -28.32661247253418, - "step": 1005 - }, - { - "epoch": 0.38, - "learning_rate": 1.3981871235714532e-05, - "logits/chosen": -4.1028056144714355, - "logits/rejected": -4.608330726623535, - "logps/chosen": -239.38011169433594, - "logps/rejected": -260.161865234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.34771424531936646, - "rewards/margins": 9.559887886047363, - "rewards/rejected": -9.212173461914062, - "step": 1006 - }, - { - "epoch": 0.38, - "learning_rate": 1.3970931800393943e-05, - "logits/chosen": -1.9720077514648438, - "logits/rejected": -5.418359756469727, - "logps/chosen": -668.598876953125, - "logps/rejected": -185.4329376220703, - "loss": 0.0385, - "rewards/accuracies": 1.0, - "rewards/chosen": -9.968579292297363, - "rewards/margins": 1.0242586135864258, - "rewards/rejected": -10.992837905883789, - "step": 1007 - }, - { - "epoch": 0.38, - "learning_rate": 1.3959986720503093e-05, - "logits/chosen": -1.8341708183288574, - "logits/rejected": -3.278458595275879, - "logps/chosen": -307.84942626953125, - "logps/rejected": -525.0643310546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4821960926055908, - "rewards/margins": 31.499481201171875, - "rewards/rejected": -32.9816780090332, - "step": 1008 - }, - { - "epoch": 0.38, - "learning_rate": 1.394903601160012e-05, - "logits/chosen": -3.0271027088165283, - "logits/rejected": -2.192362070083618, - "logps/chosen": -369.08837890625, - "logps/rejected": -656.3759155273438, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9422851800918579, - "rewards/margins": 14.374225616455078, - "rewards/rejected": -13.431940078735352, - "step": 1009 - }, - { - "epoch": 0.38, - "learning_rate": 1.3938079689251147e-05, - "logits/chosen": -2.9341766834259033, - "logits/rejected": -1.6565955877304077, - "logps/chosen": -490.674560546875, - "logps/rejected": -1028.91650390625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.399822950363159, - "rewards/margins": 18.422746658325195, - "rewards/rejected": -16.022924423217773, - "step": 1010 - }, - { - "epoch": 0.38, - "learning_rate": 1.3927117769030287e-05, - "logits/chosen": -2.6651930809020996, - "logits/rejected": -0.9578068852424622, - "logps/chosen": -441.23583984375, - "logps/rejected": -765.846435546875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.562335193157196, - "rewards/margins": 9.403863906860352, - "rewards/rejected": -8.84152889251709, - "step": 1011 - }, - { - "epoch": 0.38, - "learning_rate": 1.391615026651961e-05, - "logits/chosen": -0.856333315372467, - "logits/rejected": -1.7210720777511597, - "logps/chosen": -145.28973388671875, - "logps/rejected": -214.88851928710938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2010329961776733, - "rewards/margins": 6.9227447509765625, - "rewards/rejected": -5.7217116355896, - "step": 1012 - }, - { - "epoch": 0.38, - "learning_rate": 1.3905177197309116e-05, - "logits/chosen": -3.202805280685425, - "logits/rejected": -3.988278388977051, - "logps/chosen": -275.2820739746094, - "logps/rejected": -413.41949462890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1741851568222046, - "rewards/margins": 10.518521308898926, - "rewards/rejected": -11.692706108093262, - "step": 1013 - }, - { - "epoch": 0.38, - "learning_rate": 1.3894198576996722e-05, - "logits/chosen": -1.9949290752410889, - "logits/rejected": -5.8443498611450195, - "logps/chosen": -351.6651916503906, - "logps/rejected": -112.48094177246094, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.690380871295929, - "rewards/margins": 7.27218770980835, - "rewards/rejected": -7.962568759918213, - "step": 1014 - }, - { - "epoch": 0.38, - "learning_rate": 1.3883214421188239e-05, - "logits/chosen": -5.00584077835083, - "logits/rejected": -2.4247450828552246, - "logps/chosen": -277.04058837890625, - "logps/rejected": -702.04638671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.863897681236267, - "rewards/margins": 16.12164306640625, - "rewards/rejected": -14.257745742797852, - "step": 1015 - }, - { - "epoch": 0.38, - "learning_rate": 1.3872224745497334e-05, - "logits/chosen": -1.8114231824874878, - "logits/rejected": -1.1281739473342896, - "logps/chosen": -469.98876953125, - "logps/rejected": -1029.0894775390625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7946197986602783, - "rewards/margins": 22.666915893554688, - "rewards/rejected": -24.461536407470703, - "step": 1016 - }, - { - "epoch": 0.38, - "learning_rate": 1.3861229565545532e-05, - "logits/chosen": -7.73305082321167, - "logits/rejected": -2.750107526779175, - "logps/chosen": -716.1678466796875, - "logps/rejected": -3330.89306640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.9768860340118408, - "rewards/margins": 12.444928169250488, - "rewards/rejected": -10.468042373657227, - "step": 1017 - }, - { - "epoch": 0.38, - "learning_rate": 1.3850228896962178e-05, - "logits/chosen": -1.4739198684692383, - "logits/rejected": -2.537524938583374, - "logps/chosen": -315.856201171875, - "logps/rejected": -412.126708984375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.13870850205421448, - "rewards/margins": 14.895703315734863, - "rewards/rejected": -15.034411430358887, - "step": 1018 - }, - { - "epoch": 0.39, - "learning_rate": 1.3839222755384428e-05, - "logits/chosen": -6.7007341384887695, - "logits/rejected": -2.0917322635650635, - "logps/chosen": -499.3362731933594, - "logps/rejected": -2508.2734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.11696472018957138, - "rewards/margins": 29.71738624572754, - "rewards/rejected": -29.8343505859375, - "step": 1019 - }, - { - "epoch": 0.39, - "learning_rate": 1.3828211156457196e-05, - "logits/chosen": -5.392590045928955, - "logits/rejected": -2.9918274879455566, - "logps/chosen": -582.974365234375, - "logps/rejected": -2095.654296875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.29632568359375, - "rewards/margins": 40.420387268066406, - "rewards/rejected": -38.124061584472656, - "step": 1020 - }, - { - "epoch": 0.39, - "learning_rate": 1.3817194115833174e-05, - "logits/chosen": -3.764953374862671, - "logits/rejected": -1.672505259513855, - "logps/chosen": -272.76226806640625, - "logps/rejected": -661.9170532226562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.674731433391571, - "rewards/margins": 17.065929412841797, - "rewards/rejected": -17.74066162109375, - "step": 1021 - }, - { - "epoch": 0.39, - "learning_rate": 1.3806171649172782e-05, - "logits/chosen": -4.019514083862305, - "logits/rejected": -4.245757102966309, - "logps/chosen": -258.866455078125, - "logps/rejected": -536.646728515625, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2729904651641846, - "rewards/margins": 17.738239288330078, - "rewards/rejected": -19.01123046875, - "step": 1022 - }, - { - "epoch": 0.39, - "learning_rate": 1.3795143772144151e-05, - "logits/chosen": -4.8424601554870605, - "logits/rejected": -2.6867868900299072, - "logps/chosen": -324.433837890625, - "logps/rejected": -661.5651245117188, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8945984244346619, - "rewards/margins": 10.0274658203125, - "rewards/rejected": -9.132867813110352, - "step": 1023 - }, - { - "epoch": 0.39, - "learning_rate": 1.3784110500423104e-05, - "logits/chosen": -0.9080745577812195, - "logits/rejected": -2.309922456741333, - "logps/chosen": -477.60223388671875, - "logps/rejected": -768.552978515625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.184582471847534, - "rewards/margins": 23.411266326904297, - "rewards/rejected": -21.2266845703125, - "step": 1024 - }, - { - "epoch": 0.39, - "learning_rate": 1.3773071849693137e-05, - "logits/chosen": -5.200369358062744, - "logits/rejected": -2.3944461345672607, - "logps/chosen": -215.2384033203125, - "logps/rejected": -941.5869140625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.860919177532196, - "rewards/margins": 13.852727890014648, - "rewards/rejected": -12.991808891296387, - "step": 1025 - }, - { - "epoch": 0.39, - "learning_rate": 1.3762027835645384e-05, - "logits/chosen": -3.7448651790618896, - "logits/rejected": 0.27946847677230835, - "logps/chosen": -282.65386962890625, - "logps/rejected": -686.133544921875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02102050743997097, - "rewards/margins": 5.816272258758545, - "rewards/rejected": -5.837292671203613, - "step": 1026 - }, - { - "epoch": 0.39, - "learning_rate": 1.3750978473978611e-05, - "logits/chosen": -6.525028228759766, - "logits/rejected": -1.2257944345474243, - "logps/chosen": -281.3544921875, - "logps/rejected": -887.91943359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8811401724815369, - "rewards/margins": 8.319489479064941, - "rewards/rejected": -9.200629234313965, - "step": 1027 - }, - { - "epoch": 0.39, - "learning_rate": 1.3739923780399176e-05, - "logits/chosen": -2.9738574028015137, - "logits/rejected": -0.47758060693740845, - "logps/chosen": -483.6650390625, - "logps/rejected": -997.0036010742188, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2567169666290283, - "rewards/margins": 20.536983489990234, - "rewards/rejected": -21.793701171875, - "step": 1028 - }, - { - "epoch": 0.39, - "learning_rate": 1.3728863770621028e-05, - "logits/chosen": -1.7110992670059204, - "logits/rejected": -2.9042980670928955, - "logps/chosen": -292.16705322265625, - "logps/rejected": -470.59344482421875, - "loss": 0.0109, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.93121337890625, - "rewards/margins": 18.594161987304688, - "rewards/rejected": -17.662948608398438, - "step": 1029 - }, - { - "epoch": 0.39, - "learning_rate": 1.3717798460365663e-05, - "logits/chosen": -5.115594387054443, - "logits/rejected": -1.5771032571792603, - "logps/chosen": -247.2740478515625, - "logps/rejected": -551.9476318359375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.497808814048767, - "rewards/margins": 7.517553806304932, - "rewards/rejected": -6.019744873046875, - "step": 1030 - }, - { - "epoch": 0.39, - "learning_rate": 1.3706727865362113e-05, - "logits/chosen": -6.1490302085876465, - "logits/rejected": -1.3107777833938599, - "logps/chosen": -357.81927490234375, - "logps/rejected": -1816.314208984375, - "loss": 0.0878, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.01189270056784153, - "rewards/margins": 21.196340560913086, - "rewards/rejected": -21.1844482421875, - "step": 1031 - }, - { - "epoch": 0.39, - "learning_rate": 1.3695652001346928e-05, - "logits/chosen": -2.350944995880127, - "logits/rejected": -1.3889005184173584, - "logps/chosen": -518.2063598632812, - "logps/rejected": -1033.9580078125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5479004383087158, - "rewards/margins": 24.239845275878906, - "rewards/rejected": -22.691944122314453, - "step": 1032 - }, - { - "epoch": 0.39, - "learning_rate": 1.3684570884064146e-05, - "logits/chosen": -1.5868607759475708, - "logits/rejected": -4.526296138763428, - "logps/chosen": -225.25750732421875, - "logps/rejected": -235.4884033203125, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7182464599609375, - "rewards/margins": 11.646611213684082, - "rewards/rejected": -12.36485767364502, - "step": 1033 - }, - { - "epoch": 0.39, - "learning_rate": 1.3673484529265262e-05, - "logits/chosen": -1.0809530019760132, - "logits/rejected": -0.3582228124141693, - "logps/chosen": -285.44732666015625, - "logps/rejected": -674.721923828125, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7913818359375, - "rewards/margins": 18.407629013061523, - "rewards/rejected": -17.616247177124023, - "step": 1034 - }, - { - "epoch": 0.39, - "learning_rate": 1.366239295270923e-05, - "logits/chosen": -1.63909113407135, - "logits/rejected": -6.1439385414123535, - "logps/chosen": -776.6180419921875, - "logps/rejected": -315.73126220703125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.992785632610321, - "rewards/margins": 9.583383560180664, - "rewards/rejected": -8.590598106384277, - "step": 1035 - }, - { - "epoch": 0.39, - "learning_rate": 1.3651296170162417e-05, - "logits/chosen": -6.699101448059082, - "logits/rejected": -2.6993324756622314, - "logps/chosen": -314.1218566894531, - "logps/rejected": -1541.227783203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.3083558082580566, - "rewards/margins": 30.34417152404785, - "rewards/rejected": -27.035816192626953, - "step": 1036 - }, - { - "epoch": 0.39, - "learning_rate": 1.36401941973986e-05, - "logits/chosen": -4.741733074188232, - "logits/rejected": -2.712083339691162, - "logps/chosen": -466.66632080078125, - "logps/rejected": -1212.34423828125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.077423095703125, - "rewards/margins": 16.5057430267334, - "rewards/rejected": -17.583166122436523, - "step": 1037 - }, - { - "epoch": 0.39, - "learning_rate": 1.3629087050198924e-05, - "logits/chosen": -6.639388084411621, - "logits/rejected": -0.21148905158042908, - "logps/chosen": -298.365966796875, - "logps/rejected": -1479.205810546875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0553162097930908, - "rewards/margins": 20.52881622314453, - "rewards/rejected": -19.473499298095703, - "step": 1038 - }, - { - "epoch": 0.39, - "learning_rate": 1.3617974744351891e-05, - "logits/chosen": -1.0965224504470825, - "logits/rejected": -5.285111427307129, - "logps/chosen": -707.7129516601562, - "logps/rejected": -377.6910705566406, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0186035633087158, - "rewards/margins": 19.365262985229492, - "rewards/rejected": -18.34665870666504, - "step": 1039 - }, - { - "epoch": 0.39, - "learning_rate": 1.3606857295653341e-05, - "logits/chosen": -3.1169984340667725, - "logits/rejected": -4.541771411895752, - "logps/chosen": -488.1170654296875, - "logps/rejected": -759.3621826171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.441473364830017, - "rewards/margins": 20.989025115966797, - "rewards/rejected": -19.54755210876465, - "step": 1040 - }, - { - "epoch": 0.39, - "learning_rate": 1.3595734719906421e-05, - "logits/chosen": -4.088079452514648, - "logits/rejected": -0.9518080353736877, - "logps/chosen": -226.03244018554688, - "logps/rejected": -784.3434448242188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8401153683662415, - "rewards/margins": 18.07090187072754, - "rewards/rejected": -17.23078727722168, - "step": 1041 - }, - { - "epoch": 0.39, - "learning_rate": 1.3584607032921566e-05, - "logits/chosen": -3.3595526218414307, - "logits/rejected": -4.405177593231201, - "logps/chosen": -315.1900634765625, - "logps/rejected": -166.933837890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.30534669756889343, - "rewards/margins": 11.006978988647461, - "rewards/rejected": -11.312325477600098, - "step": 1042 - }, - { - "epoch": 0.39, - "learning_rate": 1.357347425051648e-05, - "logits/chosen": -2.887566566467285, - "logits/rejected": -1.9590818881988525, - "logps/chosen": -269.07904052734375, - "logps/rejected": -500.62652587890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4153259992599487, - "rewards/margins": 8.378561019897461, - "rewards/rejected": -9.7938871383667, - "step": 1043 - }, - { - "epoch": 0.39, - "learning_rate": 1.3562336388516097e-05, - "logits/chosen": -1.6409834623336792, - "logits/rejected": -8.120970726013184, - "logps/chosen": -410.63128662109375, - "logps/rejected": -147.87493896484375, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8047729730606079, - "rewards/margins": 10.71573543548584, - "rewards/rejected": -9.910962104797363, - "step": 1044 - }, - { - "epoch": 0.4, - "learning_rate": 1.3551193462752587e-05, - "logits/chosen": -7.243612289428711, - "logits/rejected": -0.8348671793937683, - "logps/chosen": -336.0624694824219, - "logps/rejected": -1781.331298828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009320068173110485, - "rewards/margins": 25.035358428955078, - "rewards/rejected": -25.044677734375, - "step": 1045 - }, - { - "epoch": 0.4, - "learning_rate": 1.3540045489065318e-05, - "logits/chosen": -6.04591703414917, - "logits/rejected": -1.8742144107818604, - "logps/chosen": -460.9709777832031, - "logps/rejected": -2372.3779296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.063433885574341, - "rewards/margins": 42.76200866699219, - "rewards/rejected": -40.69857406616211, - "step": 1046 - }, - { - "epoch": 0.4, - "learning_rate": 1.3528892483300821e-05, - "logits/chosen": -0.9757571220397949, - "logits/rejected": -6.6868815422058105, - "logps/chosen": -539.764404296875, - "logps/rejected": -308.77764892578125, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.888940453529358, - "rewards/margins": 15.340821266174316, - "rewards/rejected": -13.45188045501709, - "step": 1047 - }, - { - "epoch": 0.4, - "learning_rate": 1.3517734461312789e-05, - "logits/chosen": -0.30431821942329407, - "logits/rejected": -1.594966173171997, - "logps/chosen": -373.3133544921875, - "logps/rejected": -716.8109130859375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.474017381668091, - "rewards/margins": 36.28046417236328, - "rewards/rejected": -33.80644607543945, - "step": 1048 - }, - { - "epoch": 0.4, - "learning_rate": 1.350657143896204e-05, - "logits/chosen": -5.908556938171387, - "logits/rejected": -2.747534990310669, - "logps/chosen": -412.3085021972656, - "logps/rejected": -1010.843017578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.45277100801467896, - "rewards/margins": 10.531005859375, - "rewards/rejected": -10.983777046203613, - "step": 1049 - }, - { - "epoch": 0.4, - "learning_rate": 1.3495403432116507e-05, - "logits/chosen": -5.854925155639648, - "logits/rejected": -6.2280426025390625, - "logps/chosen": -243.63134765625, - "logps/rejected": -335.99700927734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5816468000411987, - "rewards/margins": 14.857315063476562, - "rewards/rejected": -13.275668144226074, - "step": 1050 - }, - { - "epoch": 0.4, - "learning_rate": 1.3484230456651202e-05, - "logits/chosen": -4.048248291015625, - "logits/rejected": -0.9287815093994141, - "logps/chosen": -267.6718444824219, - "logps/rejected": -899.255859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1912078857421875, - "rewards/margins": 19.63077163696289, - "rewards/rejected": -18.439563751220703, - "step": 1051 - }, - { - "epoch": 0.4, - "learning_rate": 1.3473052528448203e-05, - "logits/chosen": -7.593653202056885, - "logits/rejected": -2.705264091491699, - "logps/chosen": -340.9833984375, - "logps/rejected": -2138.71826171875, - "loss": 0.0024, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8789856433868408, - "rewards/margins": 21.491802215576172, - "rewards/rejected": -23.37078857421875, - "step": 1052 - }, - { - "epoch": 0.4, - "learning_rate": 1.3461869663396629e-05, - "logits/chosen": -5.661430835723877, - "logits/rejected": -3.2392473220825195, - "logps/chosen": -274.144775390625, - "logps/rejected": -618.7837524414062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6991943717002869, - "rewards/margins": 7.879315376281738, - "rewards/rejected": -7.180120944976807, - "step": 1053 - }, - { - "epoch": 0.4, - "learning_rate": 1.345068187739261e-05, - "logits/chosen": -3.1377274990081787, - "logits/rejected": -1.9971827268600464, - "logps/chosen": -274.8274230957031, - "logps/rejected": -551.379150390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7776153087615967, - "rewards/margins": 13.63051700592041, - "rewards/rejected": -16.408132553100586, - "step": 1054 - }, - { - "epoch": 0.4, - "learning_rate": 1.3439489186339283e-05, - "logits/chosen": -4.053652286529541, - "logits/rejected": -3.406700849533081, - "logps/chosen": -490.4859619140625, - "logps/rejected": -743.4149169921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.267779588699341, - "rewards/margins": 11.157928466796875, - "rewards/rejected": -13.425707817077637, - "step": 1055 - }, - { - "epoch": 0.4, - "learning_rate": 1.3428291606146747e-05, - "logits/chosen": -8.37868881225586, - "logits/rejected": -2.8679420948028564, - "logps/chosen": -441.08660888671875, - "logps/rejected": -2435.060791015625, - "loss": 0.0313, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.692950487136841, - "rewards/margins": 18.872209548950195, - "rewards/rejected": -21.565160751342773, - "step": 1056 - }, - { - "epoch": 0.4, - "learning_rate": 1.3417089152732049e-05, - "logits/chosen": -4.8246331214904785, - "logits/rejected": -1.6701329946517944, - "logps/chosen": -464.1206970214844, - "logps/rejected": -1510.978515625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8388274908065796, - "rewards/margins": 30.58077049255371, - "rewards/rejected": -28.741943359375, - "step": 1057 - }, - { - "epoch": 0.4, - "learning_rate": 1.340588184201917e-05, - "logits/chosen": -0.44913873076438904, - "logits/rejected": -4.81514835357666, - "logps/chosen": -377.38299560546875, - "logps/rejected": -207.45474243164062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.906030297279358, - "rewards/margins": 11.102437973022461, - "rewards/rejected": -9.196407318115234, - "step": 1058 - }, - { - "epoch": 0.4, - "learning_rate": 1.3394669689939002e-05, - "logits/chosen": -3.478986978530884, - "logits/rejected": -5.049716949462891, - "logps/chosen": -278.7183532714844, - "logps/rejected": -140.31541442871094, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8166412711143494, - "rewards/margins": 9.794564247131348, - "rewards/rejected": -8.977923393249512, - "step": 1059 - }, - { - "epoch": 0.4, - "learning_rate": 1.3383452712429304e-05, - "logits/chosen": -3.1661901473999023, - "logits/rejected": -2.0924301147460938, - "logps/chosen": -415.509765625, - "logps/rejected": -674.5863037109375, - "loss": 0.0028, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.67486572265625, - "rewards/margins": 10.53472900390625, - "rewards/rejected": -12.2095947265625, - "step": 1060 - }, - { - "epoch": 0.4, - "learning_rate": 1.3372230925434699e-05, - "logits/chosen": -7.224064826965332, - "logits/rejected": -2.1552162170410156, - "logps/chosen": -454.33782958984375, - "logps/rejected": -3850.68896484375, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21098022162914276, - "rewards/margins": 23.945148468017578, - "rewards/rejected": -24.1561279296875, - "step": 1061 - }, - { - "epoch": 0.4, - "learning_rate": 1.3361004344906652e-05, - "logits/chosen": -2.622727155685425, - "logits/rejected": -0.9823749661445618, - "logps/chosen": -325.0920715332031, - "logps/rejected": -797.4526977539062, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8201934695243835, - "rewards/margins": 21.379148483276367, - "rewards/rejected": -20.5589542388916, - "step": 1062 - }, - { - "epoch": 0.4, - "learning_rate": 1.3349772986803438e-05, - "logits/chosen": -1.421085000038147, - "logits/rejected": -2.7376458644866943, - "logps/chosen": -270.8910827636719, - "logps/rejected": -396.9358215332031, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7479767203330994, - "rewards/margins": 13.594869613647461, - "rewards/rejected": -12.846893310546875, - "step": 1063 - }, - { - "epoch": 0.4, - "learning_rate": 1.333853686709012e-05, - "logits/chosen": -1.2308385372161865, - "logits/rejected": -5.54002571105957, - "logps/chosen": -388.36767578125, - "logps/rejected": -304.82012939453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.074597120285034, - "rewards/margins": 19.385164260864258, - "rewards/rejected": -17.31056785583496, - "step": 1064 - }, - { - "epoch": 0.4, - "learning_rate": 1.3327296001738536e-05, - "logits/chosen": -3.2881662845611572, - "logits/rejected": -5.616011619567871, - "logps/chosen": -178.5684814453125, - "logps/rejected": -59.88681411743164, - "loss": 0.0069, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4943527281284332, - "rewards/margins": 2.8768067359924316, - "rewards/rejected": -2.3824539184570312, - "step": 1065 - }, - { - "epoch": 0.4, - "learning_rate": 1.331605040672727e-05, - "logits/chosen": -4.807661056518555, - "logits/rejected": -1.0234616994857788, - "logps/chosen": -333.339599609375, - "logps/rejected": -1154.42138671875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.923431396484375, - "rewards/margins": 23.163959503173828, - "rewards/rejected": -25.087390899658203, - "step": 1066 - }, - { - "epoch": 0.4, - "learning_rate": 1.330480009804162e-05, - "logits/chosen": 0.6361247897148132, - "logits/rejected": -2.767072916030884, - "logps/chosen": -457.532470703125, - "logps/rejected": -276.7005615234375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.6750550270080566, - "rewards/margins": 18.76803970336914, - "rewards/rejected": -16.092985153198242, - "step": 1067 - }, - { - "epoch": 0.4, - "learning_rate": 1.3293545091673587e-05, - "logits/chosen": -5.234868049621582, - "logits/rejected": -1.3794963359832764, - "logps/chosen": -269.54803466796875, - "logps/rejected": -764.97900390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.301727294921875, - "rewards/margins": 9.33251953125, - "rewards/rejected": -10.634246826171875, - "step": 1068 - }, - { - "epoch": 0.4, - "learning_rate": 1.3282285403621864e-05, - "logits/chosen": -0.06963890790939331, - "logits/rejected": -1.587038516998291, - "logps/chosen": -252.76876831054688, - "logps/rejected": -572.5631713867188, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.393820196390152, - "rewards/margins": 26.34421730041504, - "rewards/rejected": -25.950397491455078, - "step": 1069 - }, - { - "epoch": 0.4, - "learning_rate": 1.3271021049891778e-05, - "logits/chosen": -6.475007057189941, - "logits/rejected": -4.451111793518066, - "logps/chosen": -133.7144012451172, - "logps/rejected": -646.556884765625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7580238580703735, - "rewards/margins": 14.02757740020752, - "rewards/rejected": -14.785601615905762, - "step": 1070 - }, - { - "epoch": 0.4, - "learning_rate": 1.3259752046495304e-05, - "logits/chosen": -4.507430076599121, - "logits/rejected": -1.7698137760162354, - "logps/chosen": -532.137939453125, - "logps/rejected": -1024.7806396484375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.353100776672363, - "rewards/margins": 7.086871147155762, - "rewards/rejected": -11.439971923828125, - "step": 1071 - }, - { - "epoch": 0.41, - "learning_rate": 1.3248478409451017e-05, - "logits/chosen": -2.8652334213256836, - "logits/rejected": -2.301222801208496, - "logps/chosen": -490.9986267089844, - "logps/rejected": -658.331787109375, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.329510450363159, - "rewards/margins": 7.316583633422852, - "rewards/rejected": -9.64609432220459, - "step": 1072 - }, - { - "epoch": 0.41, - "learning_rate": 1.3237200154784083e-05, - "logits/chosen": -0.009559035301208496, - "logits/rejected": -4.238869667053223, - "logps/chosen": -243.70358276367188, - "logps/rejected": -174.7407684326172, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6291961669921875, - "rewards/margins": 13.60997486114502, - "rewards/rejected": -11.980778694152832, - "step": 1073 - }, - { - "epoch": 0.41, - "learning_rate": 1.3225917298526225e-05, - "logits/chosen": -2.164818286895752, - "logits/rejected": -3.0247020721435547, - "logps/chosen": -304.9925537109375, - "logps/rejected": -359.9444580078125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.903161644935608, - "rewards/margins": 6.901764392852783, - "rewards/rejected": -8.804925918579102, - "step": 1074 - }, - { - "epoch": 0.41, - "learning_rate": 1.3214629856715721e-05, - "logits/chosen": -4.703683853149414, - "logits/rejected": -2.5471978187561035, - "logps/chosen": -287.85357666015625, - "logps/rejected": -746.9094848632812, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0717499256134033, - "rewards/margins": 12.106979370117188, - "rewards/rejected": -10.035229682922363, - "step": 1075 - }, - { - "epoch": 0.41, - "learning_rate": 1.3203337845397358e-05, - "logits/chosen": -5.305564880371094, - "logits/rejected": -2.222858190536499, - "logps/chosen": -333.8046569824219, - "logps/rejected": -1001.754638671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.309274286031723, - "rewards/margins": 19.669437408447266, - "rewards/rejected": -19.97871208190918, - "step": 1076 - }, - { - "epoch": 0.41, - "learning_rate": 1.3192041280622409e-05, - "logits/chosen": -2.551710844039917, - "logits/rejected": -5.912822246551514, - "logps/chosen": -359.79498291015625, - "logps/rejected": -207.1944580078125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.757739245891571, - "rewards/margins": 7.6972808837890625, - "rewards/rejected": -8.4550199508667, - "step": 1077 - }, - { - "epoch": 0.41, - "learning_rate": 1.3180740178448641e-05, - "logits/chosen": -3.5593738555908203, - "logits/rejected": -4.449256420135498, - "logps/chosen": -249.80108642578125, - "logps/rejected": -470.56414794921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1005829572677612, - "rewards/margins": 21.395662307739258, - "rewards/rejected": -20.295080184936523, - "step": 1078 - }, - { - "epoch": 0.41, - "learning_rate": 1.3169434554940259e-05, - "logits/chosen": -3.1681036949157715, - "logits/rejected": -2.57877254486084, - "logps/chosen": -286.7291259765625, - "logps/rejected": -647.039794921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6693450808525085, - "rewards/margins": 13.31375503540039, - "rewards/rejected": -13.983099937438965, - "step": 1079 - }, - { - "epoch": 0.41, - "learning_rate": 1.3158124426167891e-05, - "logits/chosen": -2.6833786964416504, - "logits/rejected": -0.7547755241394043, - "logps/chosen": -522.298095703125, - "logps/rejected": -1687.205322265625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.41887208819389343, - "rewards/margins": 33.7904052734375, - "rewards/rejected": -33.37153244018555, - "step": 1080 - }, - { - "epoch": 0.41, - "learning_rate": 1.3146809808208574e-05, - "logits/chosen": -4.144729137420654, - "logits/rejected": -1.8968626260757446, - "logps/chosen": -414.1907958984375, - "logps/rejected": -767.3989868164062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.7655274868011475, - "rewards/margins": 12.678498268127441, - "rewards/rejected": -9.912970542907715, - "step": 1081 - }, - { - "epoch": 0.41, - "learning_rate": 1.3135490717145726e-05, - "logits/chosen": -2.4580094814300537, - "logits/rejected": -2.6170897483825684, - "logps/chosen": -352.69329833984375, - "logps/rejected": -523.5527954101562, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2453094720840454, - "rewards/margins": 12.646804809570312, - "rewards/rejected": -13.892114639282227, - "step": 1082 - }, - { - "epoch": 0.41, - "learning_rate": 1.312416716906913e-05, - "logits/chosen": -1.4675618410110474, - "logits/rejected": -2.6281511783599854, - "logps/chosen": -368.85235595703125, - "logps/rejected": -438.130615234375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5554962158203125, - "rewards/margins": 11.4912691116333, - "rewards/rejected": -10.935772895812988, - "step": 1083 - }, - { - "epoch": 0.41, - "learning_rate": 1.3112839180074892e-05, - "logits/chosen": -0.8671691417694092, - "logits/rejected": -5.955227851867676, - "logps/chosen": -548.879638671875, - "logps/rejected": -188.53860473632812, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.16384278237819672, - "rewards/margins": 7.463743686676025, - "rewards/rejected": -7.299901008605957, - "step": 1084 - }, - { - "epoch": 0.41, - "learning_rate": 1.3101506766265435e-05, - "logits/chosen": -3.029693126678467, - "logits/rejected": -0.786283016204834, - "logps/chosen": -600.7635498046875, - "logps/rejected": -1290.2906494140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.100579857826233, - "rewards/margins": 16.833904266357422, - "rewards/rejected": -15.733325004577637, - "step": 1085 - }, - { - "epoch": 0.41, - "learning_rate": 1.3090169943749475e-05, - "logits/chosen": -5.146925926208496, - "logits/rejected": -2.5236074924468994, - "logps/chosen": -658.7933959960938, - "logps/rejected": -1718.609130859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.576647937297821, - "rewards/margins": 23.188684463500977, - "rewards/rejected": -22.612035751342773, - "step": 1086 - }, - { - "epoch": 0.41, - "learning_rate": 1.3078828728641994e-05, - "logits/chosen": -3.229682445526123, - "logits/rejected": -3.844348192214966, - "logps/chosen": -151.19256591796875, - "logps/rejected": -477.687744140625, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.30439454317092896, - "rewards/margins": 23.687719345092773, - "rewards/rejected": -23.99211311340332, - "step": 1087 - }, - { - "epoch": 0.41, - "learning_rate": 1.306748313706422e-05, - "logits/chosen": -0.7554544806480408, - "logits/rejected": -0.3032688498497009, - "logps/chosen": -238.5058135986328, - "logps/rejected": -453.8275451660156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.309051513671875, - "rewards/margins": 13.269586563110352, - "rewards/rejected": -11.960535049438477, - "step": 1088 - }, - { - "epoch": 0.41, - "learning_rate": 1.3056133185143596e-05, - "logits/chosen": -6.700985431671143, - "logits/rejected": -2.9158053398132324, - "logps/chosen": -259.3643493652344, - "logps/rejected": -1387.7872314453125, - "loss": 0.0069, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.259796142578125, - "rewards/margins": 19.859663009643555, - "rewards/rejected": -19.59986686706543, - "step": 1089 - }, - { - "epoch": 0.41, - "learning_rate": 1.3044778889013764e-05, - "logits/chosen": -4.616621494293213, - "logits/rejected": -1.4895179271697998, - "logps/chosen": -305.7828063964844, - "logps/rejected": -990.5755004882812, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3175506591796875, - "rewards/margins": 15.919339179992676, - "rewards/rejected": -15.601788520812988, - "step": 1090 - }, - { - "epoch": 0.41, - "learning_rate": 1.3033420264814547e-05, - "logits/chosen": -7.966914176940918, - "logits/rejected": -1.0999888181686401, - "logps/chosen": -357.77178955078125, - "logps/rejected": -1975.625, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.02996215783059597, - "rewards/margins": 13.918610572814941, - "rewards/rejected": -13.88864803314209, - "step": 1091 - }, - { - "epoch": 0.41, - "learning_rate": 1.3022057328691915e-05, - "logits/chosen": -0.7513139843940735, - "logits/rejected": -2.239795207977295, - "logps/chosen": -418.0223388671875, - "logps/rejected": -545.2529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.0987184047698975, - "rewards/margins": 16.470129013061523, - "rewards/rejected": -13.371411323547363, - "step": 1092 - }, - { - "epoch": 0.41, - "learning_rate": 1.3010690096797971e-05, - "logits/chosen": -0.6502572894096375, - "logits/rejected": -2.01469087600708, - "logps/chosen": -137.29327392578125, - "logps/rejected": -383.7384033203125, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.450582891702652, - "rewards/margins": 20.23545265197754, - "rewards/rejected": -19.784870147705078, - "step": 1093 - }, - { - "epoch": 0.41, - "learning_rate": 1.2999318585290919e-05, - "logits/chosen": -0.835849940776825, - "logits/rejected": -1.2420004606246948, - "logps/chosen": -413.9691162109375, - "logps/rejected": -722.2332763671875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.735382080078125, - "rewards/margins": 22.773969650268555, - "rewards/rejected": -21.03858757019043, - "step": 1094 - }, - { - "epoch": 0.41, - "learning_rate": 1.298794281033505e-05, - "logits/chosen": -2.7577309608459473, - "logits/rejected": -1.2668348550796509, - "logps/chosen": -286.522705078125, - "logps/rejected": -403.49407958984375, - "loss": 0.0101, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.479681372642517, - "rewards/margins": 2.470428466796875, - "rewards/rejected": -0.9907470941543579, - "step": 1095 - }, - { - "epoch": 0.41, - "learning_rate": 1.297656278810072e-05, - "logits/chosen": -5.745113372802734, - "logits/rejected": -0.8009029030799866, - "logps/chosen": -382.412841796875, - "logps/rejected": -1452.5916748046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10209351032972336, - "rewards/margins": 20.1127872467041, - "rewards/rejected": -20.214879989624023, - "step": 1096 - }, - { - "epoch": 0.41, - "learning_rate": 1.2965178534764311e-05, - "logits/chosen": -2.500925064086914, - "logits/rejected": -6.216385364532471, - "logps/chosen": -196.88967895507812, - "logps/rejected": -126.87930297851562, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5568909049034119, - "rewards/margins": 8.069360733032227, - "rewards/rejected": -7.512470245361328, - "step": 1097 - }, - { - "epoch": 0.42, - "learning_rate": 1.295379006650823e-05, - "logits/chosen": -0.754546582698822, - "logits/rejected": -0.19943805038928986, - "logps/chosen": -150.1750030517578, - "logps/rejected": -450.1876525878906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.08752899616956711, - "rewards/margins": 19.5770206451416, - "rewards/rejected": -19.489492416381836, - "step": 1098 - }, - { - "epoch": 0.42, - "learning_rate": 1.2942397399520867e-05, - "logits/chosen": -1.3250384330749512, - "logits/rejected": -2.7079107761383057, - "logps/chosen": -170.59304809570312, - "logps/rejected": -363.52099609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.14096222817897797, - "rewards/margins": 18.689130783081055, - "rewards/rejected": -18.548168182373047, - "step": 1099 - }, - { - "epoch": 0.42, - "learning_rate": 1.293100054999659e-05, - "logits/chosen": -3.869577407836914, - "logits/rejected": -1.500819206237793, - "logps/chosen": -180.38839721679688, - "logps/rejected": -529.40771484375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07531585544347763, - "rewards/margins": 8.635268211364746, - "rewards/rejected": -8.710583686828613, - "step": 1100 - }, - { - "epoch": 0.42, - "learning_rate": 1.2919599534135703e-05, - "logits/chosen": -4.593672275543213, - "logits/rejected": -1.4921895265579224, - "logps/chosen": -321.1290283203125, - "logps/rejected": -1103.5186767578125, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3069031238555908, - "rewards/margins": 31.915876388549805, - "rewards/rejected": -30.608972549438477, - "step": 1101 - }, - { - "epoch": 0.42, - "learning_rate": 1.2908194368144437e-05, - "logits/chosen": -1.0702483654022217, - "logits/rejected": -3.913233995437622, - "logps/chosen": -417.26324462890625, - "logps/rejected": -138.3511505126953, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.43470460176467896, - "rewards/margins": 8.15830135345459, - "rewards/rejected": -7.723597049713135, - "step": 1102 - }, - { - "epoch": 0.42, - "learning_rate": 1.2896785068234925e-05, - "logits/chosen": -2.384672164916992, - "logits/rejected": -2.797131061553955, - "logps/chosen": -185.6265411376953, - "logps/rejected": -318.35955810546875, - "loss": 0.0029, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6907196044921875, - "rewards/margins": 12.818219184875488, - "rewards/rejected": -11.1274995803833, - "step": 1103 - }, - { - "epoch": 0.42, - "learning_rate": 1.2885371650625164e-05, - "logits/chosen": -7.058355808258057, - "logits/rejected": -3.492342233657837, - "logps/chosen": -252.09036254882812, - "logps/rejected": -2016.0462646484375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.95682692527771, - "rewards/margins": 9.998623847961426, - "rewards/rejected": -7.041797161102295, - "step": 1104 - }, - { - "epoch": 0.42, - "learning_rate": 1.2873954131539024e-05, - "logits/chosen": -0.8972697257995605, - "logits/rejected": -3.4349658489227295, - "logps/chosen": -260.8355712890625, - "logps/rejected": -196.04428100585938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.11369019001722336, - "rewards/margins": 9.623065948486328, - "rewards/rejected": -9.50937557220459, - "step": 1105 - }, - { - "epoch": 0.42, - "learning_rate": 1.2862532527206186e-05, - "logits/chosen": -4.677799224853516, - "logits/rejected": -1.371017575263977, - "logps/chosen": -425.91363525390625, - "logps/rejected": -1410.0340576171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.778918445110321, - "rewards/margins": 18.295276641845703, - "rewards/rejected": -17.516357421875, - "step": 1106 - }, - { - "epoch": 0.42, - "learning_rate": 1.285110685386215e-05, - "logits/chosen": -6.3758721351623535, - "logits/rejected": -2.097581148147583, - "logps/chosen": -540.695556640625, - "logps/rejected": -3669.5830078125, - "loss": 0.0168, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.8809356689453125, - "rewards/margins": 27.331832885742188, - "rewards/rejected": -32.2127685546875, - "step": 1107 - }, - { - "epoch": 0.42, - "learning_rate": 1.2839677127748195e-05, - "logits/chosen": -4.852079391479492, - "logits/rejected": -7.5017266273498535, - "logps/chosen": -604.3306884765625, - "logps/rejected": -245.99539184570312, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.131970405578613, - "rewards/margins": 7.681139945983887, - "rewards/rejected": -13.8131103515625, - "step": 1108 - }, - { - "epoch": 0.42, - "learning_rate": 1.2828243365111364e-05, - "logits/chosen": -6.330704212188721, - "logits/rejected": -3.023848533630371, - "logps/chosen": -174.14877319335938, - "logps/rejected": -4614.6064453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.43997499346733093, - "rewards/margins": 19.723989486694336, - "rewards/rejected": -20.163965225219727, - "step": 1109 - }, - { - "epoch": 0.42, - "learning_rate": 1.281680558220443e-05, - "logits/chosen": -3.2199175357818604, - "logits/rejected": -2.1951470375061035, - "logps/chosen": -225.0700225830078, - "logps/rejected": -588.3533325195312, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.041642904281616, - "rewards/margins": 14.33224105834961, - "rewards/rejected": -12.290597915649414, - "step": 1110 - }, - { - "epoch": 0.42, - "learning_rate": 1.2805363795285895e-05, - "logits/chosen": -7.055121421813965, - "logits/rejected": -1.0504878759384155, - "logps/chosen": -397.32305908203125, - "logps/rejected": -1804.312744140625, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.02115783654153347, - "rewards/margins": 16.557693481445312, - "rewards/rejected": -16.536535263061523, - "step": 1111 - }, - { - "epoch": 0.42, - "learning_rate": 1.2793918020619937e-05, - "logits/chosen": -1.9037216901779175, - "logits/rejected": 0.5523313879966736, - "logps/chosen": -467.0695495605469, - "logps/rejected": -1129.9715576171875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.928546190261841, - "rewards/margins": 26.11176872253418, - "rewards/rejected": -23.1832218170166, - "step": 1112 - }, - { - "epoch": 0.42, - "learning_rate": 1.2782468274476416e-05, - "logits/chosen": -1.5215874910354614, - "logits/rejected": -2.9140915870666504, - "logps/chosen": -564.8785400390625, - "logps/rejected": -674.627197265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5326660871505737, - "rewards/margins": 14.50632381439209, - "rewards/rejected": -12.973657608032227, - "step": 1113 - }, - { - "epoch": 0.42, - "learning_rate": 1.2771014573130822e-05, - "logits/chosen": -1.3914164304733276, - "logits/rejected": -4.691014289855957, - "logps/chosen": -260.34820556640625, - "logps/rejected": -187.49343872070312, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.151251316070557, - "rewards/margins": 7.127196788787842, - "rewards/rejected": -11.278448104858398, - "step": 1114 - }, - { - "epoch": 0.42, - "learning_rate": 1.2759556932864285e-05, - "logits/chosen": -3.054739236831665, - "logits/rejected": -2.791029214859009, - "logps/chosen": -589.0455322265625, - "logps/rejected": -1036.044677734375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.348297119140625, - "rewards/margins": 19.0994930267334, - "rewards/rejected": -16.751195907592773, - "step": 1115 - }, - { - "epoch": 0.42, - "learning_rate": 1.2759556932864285e-05, - "logits/chosen": -5.032949447631836, - "logits/rejected": -2.7337567806243896, - "logps/chosen": -495.39410400390625, - "logps/rejected": -992.2042846679688, - "loss": 0.0286, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.928210437297821, - "rewards/margins": 16.895038604736328, - "rewards/rejected": -15.966827392578125, - "step": 1116 - }, - { - "epoch": 0.42, - "learning_rate": 1.2748095369963524e-05, - "logits/chosen": -5.359252452850342, - "logits/rejected": -4.069729328155518, - "logps/chosen": -472.4719543457031, - "logps/rejected": -1501.8515625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0341460704803467, - "rewards/margins": 25.996597290039062, - "rewards/rejected": -23.962451934814453, - "step": 1117 - }, - { - "epoch": 0.42, - "learning_rate": 1.2736629900720832e-05, - "logits/chosen": -3.3354239463806152, - "logits/rejected": -4.460382461547852, - "logps/chosen": -642.5178833007812, - "logps/rejected": -716.0924072265625, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.343048095703125, - "rewards/margins": 23.239465713500977, - "rewards/rejected": -24.5825138092041, - "step": 1118 - }, - { - "epoch": 0.42, - "learning_rate": 1.2725160541434053e-05, - "logits/chosen": -2.8234758377075195, - "logits/rejected": -0.9147989749908447, - "logps/chosen": -501.0015869140625, - "logps/rejected": -885.4539794921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.2164247035980225, - "rewards/margins": 12.164826393127441, - "rewards/rejected": -9.94840145111084, - "step": 1119 - }, - { - "epoch": 0.42, - "learning_rate": 1.2713687308406581e-05, - "logits/chosen": -2.374633550643921, - "logits/rejected": -2.386945962905884, - "logps/chosen": -481.55364990234375, - "logps/rejected": -720.0571899414062, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07646484673023224, - "rewards/margins": 13.388935089111328, - "rewards/rejected": -13.465399742126465, - "step": 1120 - }, - { - "epoch": 0.42, - "learning_rate": 1.2702210217947289e-05, - "logits/chosen": -1.7449915409088135, - "logits/rejected": -7.045405864715576, - "logps/chosen": -272.1461181640625, - "logps/rejected": -99.97683715820312, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9063873291015625, - "rewards/margins": 8.776103973388672, - "rewards/rejected": -7.869716167449951, - "step": 1121 - }, - { - "epoch": 0.42, - "learning_rate": 1.2690729286370546e-05, - "logits/chosen": -3.9981560707092285, - "logits/rejected": -5.248332500457764, - "logps/chosen": -211.44635009765625, - "logps/rejected": -418.37969970703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5598877668380737, - "rewards/margins": 22.310928344726562, - "rewards/rejected": -20.751041412353516, - "step": 1122 - }, - { - "epoch": 0.42, - "learning_rate": 1.2679244529996182e-05, - "logits/chosen": -7.044925212860107, - "logits/rejected": -0.596447765827179, - "logps/chosen": -243.39254760742188, - "logps/rejected": -2112.652587890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.266122579574585, - "rewards/margins": 44.76782989501953, - "rewards/rejected": -42.501708984375, - "step": 1123 - }, - { - "epoch": 0.42, - "learning_rate": 1.2667755965149464e-05, - "logits/chosen": -2.185353994369507, - "logits/rejected": -2.21224045753479, - "logps/chosen": -245.31051635742188, - "logps/rejected": -602.0286865234375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.077374219894409, - "rewards/margins": 27.09622573852539, - "rewards/rejected": -29.173599243164062, - "step": 1124 - }, - { - "epoch": 0.43, - "learning_rate": 1.2656263608161067e-05, - "logits/chosen": -6.660023212432861, - "logits/rejected": -3.717778444290161, - "logps/chosen": -474.206787109375, - "logps/rejected": -2197.8798828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5222808718681335, - "rewards/margins": 26.136972427368164, - "rewards/rejected": -26.65925407409668, - "step": 1125 - }, - { - "epoch": 0.43, - "learning_rate": 1.2644767475367063e-05, - "logits/chosen": -3.444024085998535, - "logits/rejected": -0.809241533279419, - "logps/chosen": -229.66888427734375, - "logps/rejected": -883.20751953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.52545166015625, - "rewards/margins": 31.01429557800293, - "rewards/rejected": -31.53974723815918, - "step": 1126 - }, - { - "epoch": 0.43, - "learning_rate": 1.263326758310889e-05, - "logits/chosen": -5.788127422332764, - "logits/rejected": -0.898065984249115, - "logps/chosen": -458.34832763671875, - "logps/rejected": -2404.224853515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.12396240234375, - "rewards/margins": 28.576135635375977, - "rewards/rejected": -27.452173233032227, - "step": 1127 - }, - { - "epoch": 0.43, - "learning_rate": 1.262176394773332e-05, - "logits/chosen": -2.769296646118164, - "logits/rejected": -5.5914626121521, - "logps/chosen": -167.48385620117188, - "logps/rejected": -237.27565002441406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.32806703448295593, - "rewards/margins": 16.183340072631836, - "rewards/rejected": -15.855273246765137, - "step": 1128 - }, - { - "epoch": 0.43, - "learning_rate": 1.2610256585592464e-05, - "logits/chosen": -0.4749630093574524, - "logits/rejected": -3.006800413131714, - "logps/chosen": -278.4504089355469, - "logps/rejected": -220.64361572265625, - "loss": 0.0415, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8165619373321533, - "rewards/margins": 5.439175605773926, - "rewards/rejected": -7.2557373046875, - "step": 1129 - }, - { - "epoch": 0.43, - "learning_rate": 1.2598745513043716e-05, - "logits/chosen": -1.0064643621444702, - "logits/rejected": 0.04465543478727341, - "logps/chosen": -469.8092041015625, - "logps/rejected": -1184.11181640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8836395144462585, - "rewards/margins": 27.641036987304688, - "rewards/rejected": -28.524677276611328, - "step": 1130 - }, - { - "epoch": 0.43, - "learning_rate": 1.258723074644975e-05, - "logits/chosen": -7.405534267425537, - "logits/rejected": -0.45000159740448, - "logps/chosen": -267.06207275390625, - "logps/rejected": -1826.5301513671875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.685255527496338, - "rewards/margins": 26.87767219543457, - "rewards/rejected": -29.56292724609375, - "step": 1131 - }, - { - "epoch": 0.43, - "learning_rate": 1.2575712302178489e-05, - "logits/chosen": -3.81479549407959, - "logits/rejected": -1.2770291566848755, - "logps/chosen": -433.03204345703125, - "logps/rejected": -1039.0367431640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.302487373352051, - "rewards/margins": 20.275283813476562, - "rewards/rejected": -24.57777214050293, - "step": 1132 - }, - { - "epoch": 0.43, - "learning_rate": 1.256419019660308e-05, - "logits/chosen": -3.5965490341186523, - "logits/rejected": -4.474925518035889, - "logps/chosen": -272.405029296875, - "logps/rejected": -487.23382568359375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.23576660454273224, - "rewards/margins": 19.561534881591797, - "rewards/rejected": -19.79730224609375, - "step": 1133 - }, - { - "epoch": 0.43, - "learning_rate": 1.2552664446101888e-05, - "logits/chosen": -2.2990190982818604, - "logits/rejected": -2.680142879486084, - "logps/chosen": -249.91416931152344, - "logps/rejected": -619.4161376953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9111557006835938, - "rewards/margins": 29.907859802246094, - "rewards/rejected": -31.819015502929688, - "step": 1134 - }, - { - "epoch": 0.43, - "learning_rate": 1.2541135067058443e-05, - "logits/chosen": -2.353527307510376, - "logits/rejected": -2.204014301300049, - "logps/chosen": -249.21420288085938, - "logps/rejected": -505.1107177734375, - "loss": 0.0223, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.906108379364014, - "rewards/margins": 15.173713684082031, - "rewards/rejected": -22.079822540283203, - "step": 1135 - }, - { - "epoch": 0.43, - "learning_rate": 1.2529602075861443e-05, - "logits/chosen": -0.6777176856994629, - "logits/rejected": -3.3328840732574463, - "logps/chosen": -556.2242431640625, - "logps/rejected": -548.1979370117188, - "loss": 0.0128, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6042847037315369, - "rewards/margins": 17.35321807861328, - "rewards/rejected": -17.957502365112305, - "step": 1136 - }, - { - "epoch": 0.43, - "learning_rate": 1.2518065488904717e-05, - "logits/chosen": -1.4410704374313354, - "logits/rejected": -5.188778400421143, - "logps/chosen": -522.768798828125, - "logps/rejected": -195.26568603515625, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.590832531452179, - "rewards/margins": 12.599682807922363, - "rewards/rejected": -12.00885009765625, - "step": 1137 - }, - { - "epoch": 0.43, - "learning_rate": 1.2506525322587207e-05, - "logits/chosen": -3.254796266555786, - "logits/rejected": -3.504969358444214, - "logps/chosen": -288.87646484375, - "logps/rejected": -385.574951171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.374795436859131, - "rewards/margins": 11.063329696655273, - "rewards/rejected": -17.438125610351562, - "step": 1138 - }, - { - "epoch": 0.43, - "learning_rate": 1.249498159331294e-05, - "logits/chosen": -1.442971110343933, - "logits/rejected": -1.8728772401809692, - "logps/chosen": -1222.3599853515625, - "logps/rejected": -2096.55810546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.566662609577179, - "rewards/margins": 56.811485290527344, - "rewards/rejected": -56.24482345581055, - "step": 1139 - }, - { - "epoch": 0.43, - "learning_rate": 1.2483434317491008e-05, - "logits/chosen": -1.0277818441390991, - "logits/rejected": -0.1144632026553154, - "logps/chosen": -381.6443176269531, - "logps/rejected": -776.6334838867188, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2010955810546875, - "rewards/margins": 21.478134155273438, - "rewards/rejected": -23.679229736328125, - "step": 1140 - }, - { - "epoch": 0.43, - "learning_rate": 1.2471883511535552e-05, - "logits/chosen": -0.8834692239761353, - "logits/rejected": -3.5211379528045654, - "logps/chosen": -440.1963195800781, - "logps/rejected": -454.5452880859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.046865940093994, - "rewards/margins": 17.53101921081543, - "rewards/rejected": -21.577884674072266, - "step": 1141 - }, - { - "epoch": 0.43, - "learning_rate": 1.2460329191865716e-05, - "logits/chosen": -7.382632255554199, - "logits/rejected": -2.1080808639526367, - "logps/chosen": -369.12030029296875, - "logps/rejected": -2095.408203125, - "loss": 0.037, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.345916748046875, - "rewards/margins": 37.196895599365234, - "rewards/rejected": -41.54281234741211, - "step": 1142 - }, - { - "epoch": 0.43, - "learning_rate": 1.2448771374905655e-05, - "logits/chosen": -5.672251224517822, - "logits/rejected": -1.8704372644424438, - "logps/chosen": -539.4666748046875, - "logps/rejected": -2834.58837890625, - "loss": 0.0057, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.609063744544983, - "rewards/margins": 48.684818267822266, - "rewards/rejected": -47.07575607299805, - "step": 1143 - }, - { - "epoch": 0.43, - "learning_rate": 1.2437210077084485e-05, - "logits/chosen": -2.918215751647949, - "logits/rejected": -0.8686614036560059, - "logps/chosen": -290.9164733886719, - "logps/rejected": -663.260498046875, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.653942883014679, - "rewards/margins": 21.488733291625977, - "rewards/rejected": -20.83479118347168, - "step": 1144 - }, - { - "epoch": 0.43, - "learning_rate": 1.242564531483627e-05, - "logits/chosen": -4.7534990310668945, - "logits/rejected": -4.62734842300415, - "logps/chosen": -469.17498779296875, - "logps/rejected": -579.6329345703125, - "loss": 0.0137, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.554617404937744, - "rewards/margins": 2.168221950531006, - "rewards/rejected": -5.72283935546875, - "step": 1145 - }, - { - "epoch": 0.43, - "learning_rate": 1.2414077104600004e-05, - "logits/chosen": -0.7571396231651306, - "logits/rejected": -4.009729862213135, - "logps/chosen": -380.7556457519531, - "logps/rejected": -438.08477783203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2174072265625, - "rewards/margins": 29.372177124023438, - "rewards/rejected": -29.154769897460938, - "step": 1146 - }, - { - "epoch": 0.43, - "learning_rate": 1.240250546281958e-05, - "logits/chosen": -3.782992124557495, - "logits/rejected": -5.656318187713623, - "logps/chosen": -937.2815551757812, - "logps/rejected": -674.1420288085938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.636517524719238, - "rewards/margins": 17.84783172607422, - "rewards/rejected": -24.484350204467773, - "step": 1147 - }, - { - "epoch": 0.43, - "learning_rate": 1.2390930405943766e-05, - "logits/chosen": -1.075545072555542, - "logits/rejected": -4.190987586975098, - "logps/chosen": -543.3314208984375, - "logps/rejected": -698.437744140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.7799928188323975, - "rewards/margins": 32.66737365722656, - "rewards/rejected": -28.887380599975586, - "step": 1148 - }, - { - "epoch": 0.43, - "learning_rate": 1.2379351950426188e-05, - "logits/chosen": -1.7100398540496826, - "logits/rejected": -4.036055564880371, - "logps/chosen": -528.906494140625, - "logps/rejected": -669.427734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.667340099811554, - "rewards/margins": 31.29547882080078, - "rewards/rejected": -31.962818145751953, - "step": 1149 - }, - { - "epoch": 0.43, - "learning_rate": 1.2367770112725303e-05, - "logits/chosen": 0.19935929775238037, - "logits/rejected": -3.8076767921447754, - "logps/chosen": -498.6068420410156, - "logps/rejected": -525.1956787109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7851775884628296, - "rewards/margins": 29.564716339111328, - "rewards/rejected": -27.779539108276367, - "step": 1150 - }, - { - "epoch": 0.44, - "learning_rate": 1.2356184909304373e-05, - "logits/chosen": -0.490835040807724, - "logits/rejected": -4.654898643493652, - "logps/chosen": -472.7083740234375, - "logps/rejected": -490.631103515625, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.079629421234131, - "rewards/margins": 20.7221622467041, - "rewards/rejected": -24.80179214477539, - "step": 1151 - }, - { - "epoch": 0.44, - "learning_rate": 1.2344596356631446e-05, - "logits/chosen": -2.9393248558044434, - "logits/rejected": -2.7837536334991455, - "logps/chosen": -217.8785400390625, - "logps/rejected": -331.8061828613281, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.908860921859741, - "rewards/margins": 8.517014503479004, - "rewards/rejected": -12.425875663757324, - "step": 1152 - }, - { - "epoch": 0.44, - "learning_rate": 1.233300447117933e-05, - "logits/chosen": -1.2213547229766846, - "logits/rejected": -1.1265474557876587, - "logps/chosen": -679.749267578125, - "logps/rejected": -1028.137451171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.692456007003784, - "rewards/margins": 23.807031631469727, - "rewards/rejected": -26.499486923217773, - "step": 1153 - }, - { - "epoch": 0.44, - "learning_rate": 1.2321409269425575e-05, - "logits/chosen": -5.760436534881592, - "logits/rejected": -2.8062448501586914, - "logps/chosen": -331.8685302734375, - "logps/rejected": -1296.1632080078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.674542188644409, - "rewards/margins": 28.778547286987305, - "rewards/rejected": -31.453088760375977, - "step": 1154 - }, - { - "epoch": 0.44, - "learning_rate": 1.2309810767852435e-05, - "logits/chosen": -3.140184164047241, - "logits/rejected": -1.5093739032745361, - "logps/chosen": -742.6741943359375, - "logps/rejected": -1823.172119140625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.5026612281799316, - "rewards/margins": 35.534366607666016, - "rewards/rejected": -33.03170394897461, - "step": 1155 - }, - { - "epoch": 0.44, - "learning_rate": 1.2298208982946862e-05, - "logits/chosen": -2.4625556468963623, - "logits/rejected": -4.317638874053955, - "logps/chosen": -442.12603759765625, - "logps/rejected": -559.5013427734375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.23256531357765198, - "rewards/margins": 19.98012351989746, - "rewards/rejected": -19.74755859375, - "step": 1156 - }, - { - "epoch": 0.44, - "learning_rate": 1.228660393120048e-05, - "logits/chosen": -4.125799655914307, - "logits/rejected": -0.5028548836708069, - "logps/chosen": -320.6873779296875, - "logps/rejected": -1162.01904296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4133697748184204, - "rewards/margins": 26.622568130493164, - "rewards/rejected": -28.035938262939453, - "step": 1157 - }, - { - "epoch": 0.44, - "learning_rate": 1.2274995629109545e-05, - "logits/chosen": -1.3195765018463135, - "logits/rejected": -1.6367425918579102, - "logps/chosen": -461.9844970703125, - "logps/rejected": -651.1940307617188, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.701251268386841, - "rewards/margins": 15.371542930603027, - "rewards/rejected": -18.07279396057129, - "step": 1158 - }, - { - "epoch": 0.44, - "learning_rate": 1.2263384093174939e-05, - "logits/chosen": -4.703484535217285, - "logits/rejected": -1.2023398876190186, - "logps/chosen": -399.1058044433594, - "logps/rejected": -1026.292724609375, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.192596435546875, - "rewards/margins": 15.692365646362305, - "rewards/rejected": -17.88496208190918, - "step": 1159 - }, - { - "epoch": 0.44, - "learning_rate": 1.2251769339902143e-05, - "logits/chosen": -2.1344070434570312, - "logits/rejected": -7.102719783782959, - "logps/chosen": -276.5030517578125, - "logps/rejected": -92.22659301757812, - "loss": 0.0073, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9575744867324829, - "rewards/margins": 2.8140029907226562, - "rewards/rejected": -3.7715775966644287, - "step": 1160 - }, - { - "epoch": 0.44, - "learning_rate": 1.224015138580121e-05, - "logits/chosen": -3.0315968990325928, - "logits/rejected": -4.174244403839111, - "logps/chosen": -407.3978271484375, - "logps/rejected": -386.07562255859375, - "loss": 0.0879, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1943329572677612, - "rewards/margins": 5.478720188140869, - "rewards/rejected": -6.67305326461792, - "step": 1161 - }, - { - "epoch": 0.44, - "learning_rate": 1.2228530247386737e-05, - "logits/chosen": -5.818602561950684, - "logits/rejected": -0.5993756651878357, - "logps/chosen": -399.1173095703125, - "logps/rejected": -2452.197998046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.820574939250946, - "rewards/margins": 37.38505554199219, - "rewards/rejected": -36.56447982788086, - "step": 1162 - }, - { - "epoch": 0.44, - "learning_rate": 1.2216905941177854e-05, - "logits/chosen": -0.6253194808959961, - "logits/rejected": -3.8941264152526855, - "logps/chosen": -348.90411376953125, - "logps/rejected": -297.2229309082031, - "loss": 0.0868, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6229248046875, - "rewards/margins": 14.896661758422852, - "rewards/rejected": -16.51958656311035, - "step": 1163 - }, - { - "epoch": 0.44, - "learning_rate": 1.22052784836982e-05, - "logits/chosen": -5.608143329620361, - "logits/rejected": -2.100320339202881, - "logps/chosen": -146.0672149658203, - "logps/rejected": -684.0809326171875, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2718978822231293, - "rewards/margins": 18.97266387939453, - "rewards/rejected": -19.24456214904785, - "step": 1164 - }, - { - "epoch": 0.44, - "learning_rate": 1.2193647891475873e-05, - "logits/chosen": -2.103553056716919, - "logits/rejected": -4.237771987915039, - "logps/chosen": -365.41448974609375, - "logps/rejected": -365.5233154296875, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6535797119140625, - "rewards/margins": 11.324869155883789, - "rewards/rejected": -11.978448867797852, - "step": 1165 - }, - { - "epoch": 0.44, - "learning_rate": 1.218201418104345e-05, - "logits/chosen": 0.05024344101548195, - "logits/rejected": -4.185094356536865, - "logps/chosen": -584.7581176757812, - "logps/rejected": -371.8308410644531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.328039526939392, - "rewards/margins": 14.785104751586914, - "rewards/rejected": -16.113143920898438, - "step": 1166 - }, - { - "epoch": 0.44, - "learning_rate": 1.2170377368937926e-05, - "logits/chosen": -5.427840709686279, - "logits/rejected": -0.7526771426200867, - "logps/chosen": -249.12060546875, - "logps/rejected": -729.1656494140625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5849335193634033, - "rewards/margins": 12.71045207977295, - "rewards/rejected": -11.125518798828125, - "step": 1167 - }, - { - "epoch": 0.44, - "learning_rate": 1.215873747170071e-05, - "logits/chosen": -1.7905969619750977, - "logits/rejected": -2.227731943130493, - "logps/chosen": -409.57330322265625, - "logps/rejected": -620.711181640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.322314500808716, - "rewards/margins": 14.115139961242676, - "rewards/rejected": -16.437454223632812, - "step": 1168 - }, - { - "epoch": 0.44, - "learning_rate": 1.2147094505877593e-05, - "logits/chosen": -2.839097499847412, - "logits/rejected": -2.500433921813965, - "logps/chosen": -359.69903564453125, - "logps/rejected": -777.8431396484375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.483041286468506, - "rewards/margins": 23.204410552978516, - "rewards/rejected": -27.68745231628418, - "step": 1169 - }, - { - "epoch": 0.44, - "learning_rate": 1.2135448488018734e-05, - "logits/chosen": -3.093254804611206, - "logits/rejected": -2.6353464126586914, - "logps/chosen": -299.06365966796875, - "logps/rejected": -726.530029296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.195659160614014, - "rewards/margins": 29.080917358398438, - "rewards/rejected": -34.27657699584961, - "step": 1170 - }, - { - "epoch": 0.44, - "learning_rate": 1.2123799434678624e-05, - "logits/chosen": -2.5266950130462646, - "logits/rejected": 0.08432045578956604, - "logps/chosen": -670.3599243164062, - "logps/rejected": -1599.5531005859375, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22824707627296448, - "rewards/margins": 31.611034393310547, - "rewards/rejected": -31.382787704467773, - "step": 1171 - }, - { - "epoch": 0.44, - "learning_rate": 1.2112147362416076e-05, - "logits/chosen": -3.9635283946990967, - "logits/rejected": -6.985587120056152, - "logps/chosen": -444.500732421875, - "logps/rejected": -193.50437927246094, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.25477296113967896, - "rewards/margins": 7.005821704864502, - "rewards/rejected": -7.260594844818115, - "step": 1172 - }, - { - "epoch": 0.44, - "learning_rate": 1.2100492287794186e-05, - "logits/chosen": -3.5550780296325684, - "logits/rejected": -1.5003652572631836, - "logps/chosen": -201.96929931640625, - "logps/rejected": -918.3556518554688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.623828113079071, - "rewards/margins": 32.272705078125, - "rewards/rejected": -32.89653396606445, - "step": 1173 - }, - { - "epoch": 0.44, - "learning_rate": 1.2088834227380321e-05, - "logits/chosen": -1.9085578918457031, - "logits/rejected": -3.695871114730835, - "logps/chosen": -224.64659118652344, - "logps/rejected": -592.6260986328125, - "loss": 0.0045, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2840743958950043, - "rewards/margins": 30.872299194335938, - "rewards/rejected": -30.588224411010742, - "step": 1174 - }, - { - "epoch": 0.44, - "learning_rate": 1.2077173197746097e-05, - "logits/chosen": -1.736525058746338, - "logits/rejected": -1.5584900379180908, - "logps/chosen": -285.4165954589844, - "logps/rejected": -590.7217407226562, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8729645013809204, - "rewards/margins": 19.018775939941406, - "rewards/rejected": -18.145811080932617, - "step": 1175 - }, - { - "epoch": 0.44, - "learning_rate": 1.2065509215467345e-05, - "logits/chosen": -4.971863746643066, - "logits/rejected": -0.9908391237258911, - "logps/chosen": -227.0366973876953, - "logps/rejected": -631.1319580078125, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1359574794769287, - "rewards/margins": 5.6541547775268555, - "rewards/rejected": -7.790112495422363, - "step": 1176 - }, - { - "epoch": 0.44, - "learning_rate": 1.2053842297124094e-05, - "logits/chosen": -4.588089466094971, - "logits/rejected": -1.884565830230713, - "logps/chosen": -287.06915283203125, - "logps/rejected": -958.41650390625, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22728577256202698, - "rewards/margins": 24.520980834960938, - "rewards/rejected": -24.2936954498291, - "step": 1177 - }, - { - "epoch": 0.45, - "learning_rate": 1.2042172459300546e-05, - "logits/chosen": -1.9216969013214111, - "logits/rejected": -1.3787574768066406, - "logps/chosen": -476.08282470703125, - "logps/rejected": -929.6199340820312, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5357910394668579, - "rewards/margins": 22.98821449279785, - "rewards/rejected": -23.524005889892578, - "step": 1178 - }, - { - "epoch": 0.45, - "learning_rate": 1.2030499718585054e-05, - "logits/chosen": -2.237445831298828, - "logits/rejected": -3.462423324584961, - "logps/chosen": -293.7458801269531, - "logps/rejected": -402.1707458496094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.109994411468506, - "rewards/margins": 16.627779006958008, - "rewards/rejected": -21.737773895263672, - "step": 1179 - }, - { - "epoch": 0.45, - "learning_rate": 1.2018824091570103e-05, - "logits/chosen": -3.668961524963379, - "logits/rejected": -1.4978187084197998, - "logps/chosen": -201.79202270507812, - "logps/rejected": -667.302490234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.687634229660034, - "rewards/margins": 22.753833770751953, - "rewards/rejected": -25.44146728515625, - "step": 1180 - }, - { - "epoch": 0.45, - "learning_rate": 1.2007145594852274e-05, - "logits/chosen": -5.842826843261719, - "logits/rejected": -3.505401134490967, - "logps/chosen": -341.021728515625, - "logps/rejected": -1231.347412109375, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2807587385177612, - "rewards/margins": 36.98685073852539, - "rewards/rejected": -35.706092834472656, - "step": 1181 - }, - { - "epoch": 0.45, - "learning_rate": 1.1995464245032222e-05, - "logits/chosen": -4.039808750152588, - "logits/rejected": -4.026541233062744, - "logps/chosen": -188.2400360107422, - "logps/rejected": -430.61370849609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6446823477745056, - "rewards/margins": 13.454262733459473, - "rewards/rejected": -14.098944664001465, - "step": 1182 - }, - { - "epoch": 0.45, - "learning_rate": 1.198378005871467e-05, - "logits/chosen": -4.903400897979736, - "logits/rejected": -3.4862070083618164, - "logps/chosen": -496.4410705566406, - "logps/rejected": -760.8304443359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.752166748046875, - "rewards/margins": 13.755254745483398, - "rewards/rejected": -18.507421493530273, - "step": 1183 - }, - { - "epoch": 0.45, - "learning_rate": 1.197209305250837e-05, - "logits/chosen": -5.314198017120361, - "logits/rejected": -3.6010866165161133, - "logps/chosen": -159.140869140625, - "logps/rejected": -518.1849975585938, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2619385719299316, - "rewards/margins": 14.930280685424805, - "rewards/rejected": -17.192218780517578, - "step": 1184 - }, - { - "epoch": 0.45, - "learning_rate": 1.1960403243026072e-05, - "logits/chosen": -4.173173904418945, - "logits/rejected": -1.623190999031067, - "logps/chosen": -229.87060546875, - "logps/rejected": -744.065185546875, - "loss": 0.0055, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.287359595298767, - "rewards/margins": 30.47206687927246, - "rewards/rejected": -29.184707641601562, - "step": 1185 - }, - { - "epoch": 0.45, - "learning_rate": 1.1948710646884522e-05, - "logits/chosen": -2.5021495819091797, - "logits/rejected": -4.384222984313965, - "logps/chosen": -464.04986572265625, - "logps/rejected": -470.02435302734375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5958892703056335, - "rewards/margins": 14.714896202087402, - "rewards/rejected": -15.310785293579102, - "step": 1186 - }, - { - "epoch": 0.45, - "learning_rate": 1.1937015280704425e-05, - "logits/chosen": -1.0675839185714722, - "logits/rejected": -5.78521203994751, - "logps/chosen": -345.3133239746094, - "logps/rejected": -279.2110595703125, - "loss": 0.0676, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3748931884765625, - "rewards/margins": 17.459970474243164, - "rewards/rejected": -17.0850772857666, - "step": 1187 - }, - { - "epoch": 0.45, - "learning_rate": 1.192531716111042e-05, - "logits/chosen": -0.7958028316497803, - "logits/rejected": -2.199901819229126, - "logps/chosen": -409.6477966308594, - "logps/rejected": -544.80126953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.6289093494415283, - "rewards/margins": 26.527576446533203, - "rewards/rejected": -23.898666381835938, - "step": 1188 - }, - { - "epoch": 0.45, - "learning_rate": 1.1913616304731064e-05, - "logits/chosen": -6.266024589538574, - "logits/rejected": -1.1352232694625854, - "logps/chosen": -426.7962646484375, - "logps/rejected": -1654.7379150390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0727112293243408, - "rewards/margins": 38.85114288330078, - "rewards/rejected": -37.7784309387207, - "step": 1189 - }, - { - "epoch": 0.45, - "learning_rate": 1.1901912728198802e-05, - "logits/chosen": -9.994468688964844, - "logits/rejected": -10.532854080200195, - "logps/chosen": -261.07489013671875, - "logps/rejected": -794.3466796875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6014373898506165, - "rewards/margins": 10.45592975616455, - "rewards/rejected": -11.057367324829102, - "step": 1190 - }, - { - "epoch": 0.45, - "learning_rate": 1.1890206448149945e-05, - "logits/chosen": -1.7462763786315918, - "logits/rejected": -5.13332986831665, - "logps/chosen": -299.859130859375, - "logps/rejected": -208.13877868652344, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.891699194908142, - "rewards/margins": 11.40499210357666, - "rewards/rejected": -9.513293266296387, - "step": 1191 - }, - { - "epoch": 0.45, - "learning_rate": 1.1878497481224649e-05, - "logits/chosen": 0.8573393821716309, - "logits/rejected": -2.5453219413757324, - "logps/chosen": -224.8390655517578, - "logps/rejected": -395.4967041015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0738571882247925, - "rewards/margins": 16.773401260375977, - "rewards/rejected": -17.847257614135742, - "step": 1192 - }, - { - "epoch": 0.45, - "learning_rate": 1.1866785844066884e-05, - "logits/chosen": -5.071150302886963, - "logits/rejected": -3.440109968185425, - "logps/chosen": -510.24383544921875, - "logps/rejected": -1229.379150390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.383136034011841, - "rewards/margins": 24.262056350708008, - "rewards/rejected": -20.87891960144043, - "step": 1193 - }, - { - "epoch": 0.45, - "learning_rate": 1.1855071553324427e-05, - "logits/chosen": -3.09578537940979, - "logits/rejected": -1.67397940158844, - "logps/chosen": -247.6927947998047, - "logps/rejected": -612.3223876953125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0720138549804688, - "rewards/margins": 21.054439544677734, - "rewards/rejected": -22.126453399658203, - "step": 1194 - }, - { - "epoch": 0.45, - "learning_rate": 1.1843354625648813e-05, - "logits/chosen": -4.166008949279785, - "logits/rejected": -3.0577969551086426, - "logps/chosen": -227.05020141601562, - "logps/rejected": -557.1519775390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8818389773368835, - "rewards/margins": 11.553656578063965, - "rewards/rejected": -12.435495376586914, - "step": 1195 - }, - { - "epoch": 0.45, - "learning_rate": 1.1831635077695337e-05, - "logits/chosen": -3.743734359741211, - "logits/rejected": -5.195467948913574, - "logps/chosen": -539.0379028320312, - "logps/rejected": -355.0203857421875, - "loss": 0.0053, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.377948045730591, - "rewards/margins": 14.625020027160645, - "rewards/rejected": -17.002967834472656, - "step": 1196 - }, - { - "epoch": 0.45, - "learning_rate": 1.181991292612301e-05, - "logits/chosen": -2.132581949234009, - "logits/rejected": -4.491199016571045, - "logps/chosen": -375.2071533203125, - "logps/rejected": -487.858154296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0602539777755737, - "rewards/margins": 24.370256423950195, - "rewards/rejected": -23.31000328063965, - "step": 1197 - }, - { - "epoch": 0.45, - "learning_rate": 1.1808188187594549e-05, - "logits/chosen": -2.857313632965088, - "logits/rejected": -3.586529016494751, - "logps/chosen": -201.60897827148438, - "logps/rejected": -528.2880859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.011322021484375, - "rewards/margins": 22.5775089263916, - "rewards/rejected": -20.566186904907227, - "step": 1198 - }, - { - "epoch": 0.45, - "learning_rate": 1.179646087877635e-05, - "logits/chosen": -2.614720582962036, - "logits/rejected": -3.8811252117156982, - "logps/chosen": -339.57537841796875, - "logps/rejected": -412.3442077636719, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.972558617591858, - "rewards/margins": 10.867962837219238, - "rewards/rejected": -8.895403861999512, - "step": 1199 - }, - { - "epoch": 0.45, - "learning_rate": 1.1784731016338458e-05, - "logits/chosen": -3.0958468914031982, - "logits/rejected": -1.2193233966827393, - "logps/chosen": -856.5424194335938, - "logps/rejected": -1543.4876708984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.815600574016571, - "rewards/margins": 29.612634658813477, - "rewards/rejected": -28.797033309936523, - "step": 1200 - }, - { - "epoch": 0.45, - "learning_rate": 1.177299861695455e-05, - "logits/chosen": -4.4142069816589355, - "logits/rejected": -1.539219617843628, - "logps/chosen": -683.0384521484375, - "logps/rejected": -2034.587646484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4977783262729645, - "rewards/margins": 37.40629959106445, - "rewards/rejected": -36.90851974487305, - "step": 1201 - }, - { - "epoch": 0.45, - "learning_rate": 1.1761263697301906e-05, - "logits/chosen": -3.065619707107544, - "logits/rejected": -7.119437217712402, - "logps/chosen": -432.6429443359375, - "logps/rejected": -149.60523986816406, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.105017066001892, - "rewards/margins": 6.045422554016113, - "rewards/rejected": -7.150439739227295, - "step": 1202 - }, - { - "epoch": 0.45, - "learning_rate": 1.1749526274061394e-05, - "logits/chosen": -3.2517364025115967, - "logits/rejected": -6.597442626953125, - "logps/chosen": -768.2076416015625, - "logps/rejected": -388.45550537109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.731054663658142, - "rewards/margins": 10.929532051086426, - "rewards/rejected": -12.6605863571167, - "step": 1203 - }, - { - "epoch": 0.46, - "learning_rate": 1.1737786363917438e-05, - "logits/chosen": -5.4774250984191895, - "logits/rejected": -3.3798022270202637, - "logps/chosen": -478.6776428222656, - "logps/rejected": -983.644287109375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0967254638671875, - "rewards/margins": 17.564456939697266, - "rewards/rejected": -15.467730522155762, - "step": 1204 - }, - { - "epoch": 0.46, - "learning_rate": 1.1726043983557996e-05, - "logits/chosen": -2.1015162467956543, - "logits/rejected": -3.6721854209899902, - "logps/chosen": -175.21823120117188, - "logps/rejected": -519.1842041015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0122392177581787, - "rewards/margins": 28.828716278076172, - "rewards/rejected": -26.816476821899414, - "step": 1205 - }, - { - "epoch": 0.46, - "learning_rate": 1.1714299149674538e-05, - "logits/chosen": -1.6122280359268188, - "logits/rejected": -2.6413862705230713, - "logps/chosen": -303.6815185546875, - "logps/rejected": -312.7509460449219, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.13716736435890198, - "rewards/margins": 4.946896553039551, - "rewards/rejected": -4.809729099273682, - "step": 1206 - }, - { - "epoch": 0.46, - "learning_rate": 1.1702551878962025e-05, - "logits/chosen": -2.744434356689453, - "logits/rejected": -5.75366735458374, - "logps/chosen": -336.67767333984375, - "logps/rejected": -237.2457733154297, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5920898914337158, - "rewards/margins": 7.802729606628418, - "rewards/rejected": -6.210639953613281, - "step": 1207 - }, - { - "epoch": 0.46, - "learning_rate": 1.1690802188118878e-05, - "logits/chosen": -3.1545064449310303, - "logits/rejected": -3.7169978618621826, - "logps/chosen": -423.5762023925781, - "logps/rejected": -681.273681640625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6016815900802612, - "rewards/margins": 18.66624641418457, - "rewards/rejected": -17.064565658569336, - "step": 1208 - }, - { - "epoch": 0.46, - "learning_rate": 1.1679050093846956e-05, - "logits/chosen": -3.420034170150757, - "logits/rejected": -1.7094991207122803, - "logps/chosen": -402.3920593261719, - "logps/rejected": -938.212890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6360229849815369, - "rewards/margins": 19.679943084716797, - "rewards/rejected": -19.043920516967773, - "step": 1209 - }, - { - "epoch": 0.46, - "learning_rate": 1.166729561285154e-05, - "logits/chosen": -2.5682365894317627, - "logits/rejected": -3.535339117050171, - "logps/chosen": -190.56117248535156, - "logps/rejected": -347.09466552734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.585005283355713, - "rewards/margins": 18.565885543823242, - "rewards/rejected": -15.980880737304688, - "step": 1210 - }, - { - "epoch": 0.46, - "learning_rate": 1.16555387618413e-05, - "logits/chosen": -0.2345745712518692, - "logits/rejected": -3.0612943172454834, - "logps/chosen": -612.090087890625, - "logps/rejected": -699.6430053710938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.9296753406524658, - "rewards/margins": 30.467756271362305, - "rewards/rejected": -28.5380802154541, - "step": 1211 - }, - { - "epoch": 0.46, - "learning_rate": 1.1643779557528278e-05, - "logits/chosen": -3.946134328842163, - "logits/rejected": -6.681797027587891, - "logps/chosen": -286.6518249511719, - "logps/rejected": -151.9879608154297, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4217376708984375, - "rewards/margins": 6.75641393661499, - "rewards/rejected": -7.178151607513428, - "step": 1212 - }, - { - "epoch": 0.46, - "learning_rate": 1.1632018016627859e-05, - "logits/chosen": -7.6296706199646, - "logits/rejected": -3.897444725036621, - "logps/chosen": -625.9835205078125, - "logps/rejected": -2159.799560546875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.813244581222534, - "rewards/margins": 20.942052841186523, - "rewards/rejected": -17.128808975219727, - "step": 1213 - }, - { - "epoch": 0.46, - "learning_rate": 1.1620254155858752e-05, - "logits/chosen": -4.64841365814209, - "logits/rejected": -6.46793270111084, - "logps/chosen": -297.80194091796875, - "logps/rejected": -222.9084014892578, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.8201050758361816, - "rewards/margins": 11.131189346313477, - "rewards/rejected": -7.311084270477295, - "step": 1214 - }, - { - "epoch": 0.46, - "learning_rate": 1.1608487991942956e-05, - "logits/chosen": -6.696191310882568, - "logits/rejected": -6.001954555511475, - "logps/chosen": -140.09295654296875, - "logps/rejected": -321.6461486816406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0452911853790283, - "rewards/margins": 8.585975646972656, - "rewards/rejected": -7.540684700012207, - "step": 1215 - }, - { - "epoch": 0.46, - "learning_rate": 1.159671954160575e-05, - "logits/chosen": -3.7251906394958496, - "logits/rejected": -2.0619261264801025, - "logps/chosen": -422.68524169921875, - "logps/rejected": -703.4443359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.732287585735321, - "rewards/margins": 8.01932430267334, - "rewards/rejected": -8.751611709594727, - "step": 1216 - }, - { - "epoch": 0.46, - "learning_rate": 1.1584948821575666e-05, - "logits/chosen": -0.08705443888902664, - "logits/rejected": -6.0769524574279785, - "logps/chosen": -280.951171875, - "logps/rejected": -140.14990234375, - "loss": 0.0397, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.121368408203125, - "rewards/margins": 9.430610656738281, - "rewards/rejected": -7.309242248535156, - "step": 1217 - }, - { - "epoch": 0.46, - "learning_rate": 1.1573175848584455e-05, - "logits/chosen": -7.530139923095703, - "logits/rejected": -6.397308349609375, - "logps/chosen": -206.65533447265625, - "logps/rejected": -689.141357421875, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.9940292835235596, - "rewards/margins": 11.357969284057617, - "rewards/rejected": -9.363940238952637, - "step": 1218 - }, - { - "epoch": 0.46, - "learning_rate": 1.1561400639367075e-05, - "logits/chosen": -1.6087194681167603, - "logits/rejected": -4.239225387573242, - "logps/chosen": -256.4360046386719, - "logps/rejected": -496.4027404785156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4694092273712158, - "rewards/margins": 23.60259246826172, - "rewards/rejected": -22.133182525634766, - "step": 1219 - }, - { - "epoch": 0.46, - "learning_rate": 1.1549623210661663e-05, - "logits/chosen": -2.1412835121154785, - "logits/rejected": -4.141549587249756, - "logps/chosen": -404.58453369140625, - "logps/rejected": -401.835693359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.45163270831108093, - "rewards/margins": 20.43943214416504, - "rewards/rejected": -19.9877986907959, - "step": 1220 - }, - { - "epoch": 0.46, - "learning_rate": 1.1537843579209509e-05, - "logits/chosen": -4.948379993438721, - "logits/rejected": -0.8211522698402405, - "logps/chosen": -558.549072265625, - "logps/rejected": -1607.474853515625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.049322485923767, - "rewards/margins": 21.123004913330078, - "rewards/rejected": -20.07368278503418, - "step": 1221 - }, - { - "epoch": 0.46, - "learning_rate": 1.1526061761755033e-05, - "logits/chosen": -5.91740608215332, - "logits/rejected": -1.6514525413513184, - "logps/chosen": -577.2767333984375, - "logps/rejected": -2857.416015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.20989990234375, - "rewards/margins": 78.36617279052734, - "rewards/rejected": -79.5760726928711, - "step": 1222 - }, - { - "epoch": 0.46, - "learning_rate": 1.1514277775045768e-05, - "logits/chosen": -1.2285672426223755, - "logits/rejected": -7.848071098327637, - "logps/chosen": -351.508056640625, - "logps/rejected": -166.54498291015625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.23705749213695526, - "rewards/margins": 10.867002487182617, - "rewards/rejected": -11.104060173034668, - "step": 1223 - }, - { - "epoch": 0.46, - "learning_rate": 1.1502491635832327e-05, - "logits/chosen": -5.398290157318115, - "logits/rejected": -2.0728442668914795, - "logps/chosen": -496.80889892578125, - "logps/rejected": -1294.4130859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8444000482559204, - "rewards/margins": 30.962326049804688, - "rewards/rejected": -31.806726455688477, - "step": 1224 - }, - { - "epoch": 0.46, - "learning_rate": 1.1490703360868373e-05, - "logits/chosen": -2.8468382358551025, - "logits/rejected": -1.0015219449996948, - "logps/chosen": -627.5270385742188, - "logps/rejected": -1218.486083984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.775854468345642, - "rewards/margins": 26.242406845092773, - "rewards/rejected": -24.466552734375, - "step": 1225 - }, - { - "epoch": 0.46, - "learning_rate": 1.1478912966910627e-05, - "logits/chosen": -1.184692621231079, - "logits/rejected": -3.590175151824951, - "logps/chosen": -314.3569641113281, - "logps/rejected": -289.51080322265625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.823492527008057, - "rewards/margins": 10.045679092407227, - "rewards/rejected": -14.869171142578125, - "step": 1226 - }, - { - "epoch": 0.46, - "learning_rate": 1.1467120470718805e-05, - "logits/chosen": -2.249973773956299, - "logits/rejected": -2.4208872318267822, - "logps/chosen": -611.890380859375, - "logps/rejected": -1096.979248046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.50946044921875, - "rewards/margins": 36.51128005981445, - "rewards/rejected": -43.0207405090332, - "step": 1227 - }, - { - "epoch": 0.46, - "learning_rate": 1.1455325889055616e-05, - "logits/chosen": -3.165377616882324, - "logits/rejected": -2.222607135772705, - "logps/chosen": -346.3670654296875, - "logps/rejected": -644.11767578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.13478422164917, - "rewards/margins": 16.877702713012695, - "rewards/rejected": -21.012487411499023, - "step": 1228 - }, - { - "epoch": 0.46, - "learning_rate": 1.1443529238686726e-05, - "logits/chosen": -7.263782978057861, - "logits/rejected": -2.6532366275787354, - "logps/chosen": -271.5249328613281, - "logps/rejected": -1098.90673828125, - "loss": 0.0129, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.597639560699463, - "rewards/margins": 16.948766708374023, - "rewards/rejected": -20.546405792236328, - "step": 1229 - }, - { - "epoch": 0.47, - "learning_rate": 1.1431730536380759e-05, - "logits/chosen": -2.256208896636963, - "logits/rejected": -3.715451717376709, - "logps/chosen": -229.37652587890625, - "logps/rejected": -220.33731079101562, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.651864767074585, - "rewards/margins": 5.368472099304199, - "rewards/rejected": -9.020337104797363, - "step": 1230 - }, - { - "epoch": 0.47, - "learning_rate": 1.1419929798909241e-05, - "logits/chosen": -5.174729824066162, - "logits/rejected": -0.5369938611984253, - "logps/chosen": -406.2663879394531, - "logps/rejected": -1134.745849609375, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4872405529022217, - "rewards/margins": 15.021279335021973, - "rewards/rejected": -17.508520126342773, - "step": 1231 - }, - { - "epoch": 0.47, - "learning_rate": 1.1408127043046598e-05, - "logits/chosen": -0.36390239000320435, - "logits/rejected": -1.8649389743804932, - "logps/chosen": -625.920654296875, - "logps/rejected": -1159.055419921875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.30961915850639343, - "rewards/margins": 52.54541015625, - "rewards/rejected": -52.23579025268555, - "step": 1232 - }, - { - "epoch": 0.47, - "learning_rate": 1.1396322285570119e-05, - "logits/chosen": -1.5347785949707031, - "logits/rejected": -1.263379454612732, - "logps/chosen": -320.1771240234375, - "logps/rejected": -804.876953125, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0194365978240967, - "rewards/margins": 27.530637741088867, - "rewards/rejected": -30.550073623657227, - "step": 1233 - }, - { - "epoch": 0.47, - "learning_rate": 1.1384515543259943e-05, - "logits/chosen": -5.198877334594727, - "logits/rejected": -0.699828565120697, - "logps/chosen": -326.42828369140625, - "logps/rejected": -908.5909423828125, - "loss": 0.0055, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.84138822555542, - "rewards/margins": 12.377985000610352, - "rewards/rejected": -17.21937370300293, - "step": 1234 - }, - { - "epoch": 0.47, - "learning_rate": 1.1372706832899027e-05, - "logits/chosen": -3.1902260780334473, - "logits/rejected": -2.062955141067505, - "logps/chosen": -384.331787109375, - "logps/rejected": -743.6978759765625, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.7051331996917725, - "rewards/margins": 15.362171173095703, - "rewards/rejected": -19.067304611206055, - "step": 1235 - }, - { - "epoch": 0.47, - "learning_rate": 1.1360896171273134e-05, - "logits/chosen": -0.807688295841217, - "logits/rejected": -6.129022598266602, - "logps/chosen": -338.1445617675781, - "logps/rejected": -157.67587280273438, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8751312494277954, - "rewards/margins": 7.539543628692627, - "rewards/rejected": -8.414674758911133, - "step": 1236 - }, - { - "epoch": 0.47, - "learning_rate": 1.134908357517079e-05, - "logits/chosen": -2.9184656143188477, - "logits/rejected": -3.422973871231079, - "logps/chosen": -300.191650390625, - "logps/rejected": -410.5356750488281, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.139971971511841, - "rewards/margins": 7.675403594970703, - "rewards/rejected": -10.815375328063965, - "step": 1237 - }, - { - "epoch": 0.47, - "learning_rate": 1.1337269061383278e-05, - "logits/chosen": -0.3020566403865814, - "logits/rejected": -0.26977595686912537, - "logps/chosen": -299.6357421875, - "logps/rejected": -612.1279296875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9455597400665283, - "rewards/margins": 19.110342025756836, - "rewards/rejected": -23.0559024810791, - "step": 1238 - }, - { - "epoch": 0.47, - "learning_rate": 1.1325452646704597e-05, - "logits/chosen": -1.4473555088043213, - "logits/rejected": -5.148748874664307, - "logps/chosen": -685.519775390625, - "logps/rejected": -250.88168334960938, - "loss": 0.259, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.289623975753784, - "rewards/margins": 11.513372421264648, - "rewards/rejected": -14.802996635437012, - "step": 1239 - }, - { - "epoch": 0.47, - "learning_rate": 1.1313634347931466e-05, - "logits/chosen": -0.5642944574356079, - "logits/rejected": -0.47060948610305786, - "logps/chosen": -822.0537109375, - "logps/rejected": -1345.720947265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -8.209851264953613, - "rewards/margins": 34.72978591918945, - "rewards/rejected": -42.93963623046875, - "step": 1240 - }, - { - "epoch": 0.47, - "learning_rate": 1.1301814181863265e-05, - "logits/chosen": -1.744125247001648, - "logits/rejected": -1.6612101793289185, - "logps/chosen": -352.3385009765625, - "logps/rejected": -462.9463195800781, - "loss": 0.1009, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.52850341796875, - "rewards/margins": 7.1469879150390625, - "rewards/rejected": -10.675491333007812, - "step": 1241 - }, - { - "epoch": 0.47, - "learning_rate": 1.1289992165302036e-05, - "logits/chosen": -3.7203452587127686, - "logits/rejected": -2.581650733947754, - "logps/chosen": -380.1888427734375, - "logps/rejected": -770.3766479492188, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3484130799770355, - "rewards/margins": 18.115686416625977, - "rewards/rejected": -18.464099884033203, - "step": 1242 - }, - { - "epoch": 0.47, - "learning_rate": 1.1278168315052445e-05, - "logits/chosen": -0.9211792945861816, - "logits/rejected": -4.845757961273193, - "logps/chosen": -452.77081298828125, - "logps/rejected": -143.71807861328125, - "loss": 0.0055, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.982769727706909, - "rewards/margins": 6.849405288696289, - "rewards/rejected": -9.832175254821777, - "step": 1243 - }, - { - "epoch": 0.47, - "learning_rate": 1.126634264792178e-05, - "logits/chosen": -3.4765307903289795, - "logits/rejected": -1.4521522521972656, - "logps/chosen": -320.4718017578125, - "logps/rejected": -932.6845703125, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.514187812805176, - "rewards/margins": 19.292301177978516, - "rewards/rejected": -24.806488037109375, - "step": 1244 - }, - { - "epoch": 0.47, - "learning_rate": 1.1254515180719893e-05, - "logits/chosen": -5.955656051635742, - "logits/rejected": -2.217977285385132, - "logps/chosen": -267.5289611816406, - "logps/rejected": -719.5186767578125, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.48732301592826843, - "rewards/margins": 9.912836074829102, - "rewards/rejected": -9.42551326751709, - "step": 1245 - }, - { - "epoch": 0.47, - "learning_rate": 1.1242685930259207e-05, - "logits/chosen": -0.32916969060897827, - "logits/rejected": -3.7286250591278076, - "logps/chosen": -511.46343994140625, - "logps/rejected": -387.4632568359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -7.614654541015625, - "rewards/margins": 15.994659423828125, - "rewards/rejected": -23.60931396484375, - "step": 1246 - }, - { - "epoch": 0.47, - "learning_rate": 1.1230854913354674e-05, - "logits/chosen": -7.808509826660156, - "logits/rejected": -3.3010778427124023, - "logps/chosen": -209.6903533935547, - "logps/rejected": -983.203857421875, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3533554077148438, - "rewards/margins": 11.088685989379883, - "rewards/rejected": -13.442041397094727, - "step": 1247 - }, - { - "epoch": 0.47, - "learning_rate": 1.1219022146823762e-05, - "logits/chosen": 0.03362048417329788, - "logits/rejected": -6.671877861022949, - "logps/chosen": -704.6075439453125, - "logps/rejected": -123.51445007324219, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.998632788658142, - "rewards/margins": 6.897927761077881, - "rewards/rejected": -8.896560668945312, - "step": 1248 - }, - { - "epoch": 0.47, - "learning_rate": 1.1207187647486418e-05, - "logits/chosen": -5.119964122772217, - "logits/rejected": -3.7130286693573, - "logps/chosen": -581.52197265625, - "logps/rejected": -1192.5994873046875, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.344897508621216, - "rewards/margins": 4.67887020111084, - "rewards/rejected": -8.023767471313477, - "step": 1249 - }, - { - "epoch": 0.47, - "learning_rate": 1.119535143216506e-05, - "logits/chosen": -6.692777156829834, - "logits/rejected": -1.716914415359497, - "logps/chosen": -417.79583740234375, - "logps/rejected": -2069.28271484375, - "loss": 0.0934, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.34495240449905396, - "rewards/margins": 34.67156219482422, - "rewards/rejected": -34.32661056518555, - "step": 1250 - }, - { - "epoch": 0.47, - "learning_rate": 1.1183513517684546e-05, - "logits/chosen": -1.6178444623947144, - "logits/rejected": -0.49735227227211, - "logps/chosen": -387.94415283203125, - "logps/rejected": -880.2200927734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.1777284145355225, - "rewards/margins": 29.192975997924805, - "rewards/rejected": -32.370704650878906, - "step": 1251 - }, - { - "epoch": 0.47, - "learning_rate": 1.1171673920872134e-05, - "logits/chosen": -3.0098609924316406, - "logits/rejected": -1.570181965827942, - "logps/chosen": -359.942626953125, - "logps/rejected": -773.1083984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.120929002761841, - "rewards/margins": 9.138824462890625, - "rewards/rejected": -11.259753227233887, - "step": 1252 - }, - { - "epoch": 0.47, - "learning_rate": 1.1159832658557498e-05, - "logits/chosen": -5.00116491317749, - "logits/rejected": -3.119239330291748, - "logps/chosen": -373.4866027832031, - "logps/rejected": -1173.02783203125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8834503293037415, - "rewards/margins": 24.853879928588867, - "rewards/rejected": -25.737329483032227, - "step": 1253 - }, - { - "epoch": 0.47, - "learning_rate": 1.1147989747572662e-05, - "logits/chosen": -2.549421548843384, - "logits/rejected": -0.7679648995399475, - "logps/chosen": -376.90625, - "logps/rejected": -686.64453125, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.151074171066284, - "rewards/margins": 5.472406387329102, - "rewards/rejected": -8.623480796813965, - "step": 1254 - }, - { - "epoch": 0.47, - "learning_rate": 1.1136145204751995e-05, - "logits/chosen": -3.5351333618164062, - "logits/rejected": -0.5108442902565002, - "logps/chosen": -361.3166809082031, - "logps/rejected": -1225.8604736328125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5658050775527954, - "rewards/margins": 36.180362701416016, - "rewards/rejected": -37.74616622924805, - "step": 1255 - }, - { - "epoch": 0.47, - "learning_rate": 1.1124299046932192e-05, - "logits/chosen": -0.9042820930480957, - "logits/rejected": -1.8821868896484375, - "logps/chosen": -226.70884704589844, - "logps/rejected": -201.61627197265625, - "loss": 0.0169, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.835519552230835, - "rewards/margins": 1.9488370418548584, - "rewards/rejected": -4.784356594085693, - "step": 1256 - }, - { - "epoch": 0.48, - "learning_rate": 1.1112451290952238e-05, - "logits/chosen": -0.5807422995567322, - "logits/rejected": -1.467499017715454, - "logps/chosen": -193.2430877685547, - "logps/rejected": -326.402587890625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1307967901229858, - "rewards/margins": 13.13283920288086, - "rewards/rejected": -14.263635635375977, - "step": 1257 - }, - { - "epoch": 0.48, - "learning_rate": 1.1100601953653393e-05, - "logits/chosen": -1.8101598024368286, - "logits/rejected": -1.3507134914398193, - "logps/chosen": -468.220703125, - "logps/rejected": -901.2989501953125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.523120164871216, - "rewards/margins": 26.4522647857666, - "rewards/rejected": -29.975385665893555, - "step": 1258 - }, - { - "epoch": 0.48, - "learning_rate": 1.1088751051879166e-05, - "logits/chosen": -4.456623077392578, - "logits/rejected": -3.794877529144287, - "logps/chosen": -362.735107421875, - "logps/rejected": -625.5130004882812, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.6455140113830566, - "rewards/margins": 7.580926418304443, - "rewards/rejected": -11.2264404296875, - "step": 1259 - }, - { - "epoch": 0.48, - "learning_rate": 1.107689860247528e-05, - "logits/chosen": -5.8197021484375, - "logits/rejected": -0.4428431987762451, - "logps/chosen": -365.5262451171875, - "logps/rejected": -1279.00439453125, - "loss": 0.0072, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.01883239857852459, - "rewards/margins": 12.4146089553833, - "rewards/rejected": -12.395776748657227, - "step": 1260 - }, - { - "epoch": 0.48, - "learning_rate": 1.1065044622289675e-05, - "logits/chosen": -3.0976803302764893, - "logits/rejected": -1.7098884582519531, - "logps/chosen": -431.1121520996094, - "logps/rejected": -977.5443725585938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9539703726768494, - "rewards/margins": 19.286300659179688, - "rewards/rejected": -20.240270614624023, - "step": 1261 - }, - { - "epoch": 0.48, - "learning_rate": 1.1053189128172454e-05, - "logits/chosen": -1.4373173713684082, - "logits/rejected": -1.5192455053329468, - "logps/chosen": -240.08607482910156, - "logps/rejected": -533.2758178710938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.7769393920898438, - "rewards/margins": 20.860530853271484, - "rewards/rejected": -24.637470245361328, - "step": 1262 - }, - { - "epoch": 0.48, - "learning_rate": 1.1041332136975874e-05, - "logits/chosen": -5.589410305023193, - "logits/rejected": -1.5720710754394531, - "logps/chosen": -289.95513916015625, - "logps/rejected": -1267.0072021484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7281128168106079, - "rewards/margins": 39.80013656616211, - "rewards/rejected": -40.52824783325195, - "step": 1263 - }, - { - "epoch": 0.48, - "learning_rate": 1.1029473665554327e-05, - "logits/chosen": -3.0074737071990967, - "logits/rejected": -2.1270592212677, - "logps/chosen": -203.11807250976562, - "logps/rejected": -541.0159912109375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5576003789901733, - "rewards/margins": 19.294410705566406, - "rewards/rejected": -20.85201072692871, - "step": 1264 - }, - { - "epoch": 0.48, - "learning_rate": 1.1017613730764295e-05, - "logits/chosen": -1.9759199619293213, - "logits/rejected": -2.076465606689453, - "logps/chosen": -280.1798095703125, - "logps/rejected": -509.77508544921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.556654453277588, - "rewards/margins": 18.586803436279297, - "rewards/rejected": -22.143457412719727, - "step": 1265 - }, - { - "epoch": 0.48, - "learning_rate": 1.1005752349464353e-05, - "logits/chosen": -6.190802574157715, - "logits/rejected": -0.5048680901527405, - "logps/chosen": -283.6297607421875, - "logps/rejected": -1707.4053955078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1670684814453125, - "rewards/margins": 11.665170669555664, - "rewards/rejected": -12.832239151000977, - "step": 1266 - }, - { - "epoch": 0.48, - "learning_rate": 1.0993889538515135e-05, - "logits/chosen": -3.63555645942688, - "logits/rejected": -2.9763104915618896, - "logps/chosen": -119.47589874267578, - "logps/rejected": -232.42994689941406, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.382582902908325, - "rewards/margins": 5.856725692749023, - "rewards/rejected": -8.23930835723877, - "step": 1267 - }, - { - "epoch": 0.48, - "learning_rate": 1.0982025314779287e-05, - "logits/chosen": -0.5350019335746765, - "logits/rejected": -3.6594367027282715, - "logps/chosen": -293.97052001953125, - "logps/rejected": -252.43988037109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4056762754917145, - "rewards/margins": 16.384567260742188, - "rewards/rejected": -16.79024314880371, - "step": 1268 - }, - { - "epoch": 0.48, - "learning_rate": 1.0970159695121488e-05, - "logits/chosen": -1.183760643005371, - "logits/rejected": -2.097731113433838, - "logps/chosen": -287.2699279785156, - "logps/rejected": -512.3817138671875, - "loss": 0.0083, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5731475353240967, - "rewards/margins": 22.54865264892578, - "rewards/rejected": -25.12179946899414, - "step": 1269 - }, - { - "epoch": 0.48, - "learning_rate": 1.0958292696408381e-05, - "logits/chosen": -3.3501298427581787, - "logits/rejected": -2.7552146911621094, - "logps/chosen": -208.99752807617188, - "logps/rejected": -496.34661865234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7709380984306335, - "rewards/margins": 11.934518814086914, - "rewards/rejected": -12.705456733703613, - "step": 1270 - }, - { - "epoch": 0.48, - "learning_rate": 1.0946424335508585e-05, - "logits/chosen": -2.853306293487549, - "logits/rejected": -3.977323293685913, - "logps/chosen": -210.25563049316406, - "logps/rejected": -350.28271484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7244064807891846, - "rewards/margins": 21.177162170410156, - "rewards/rejected": -23.901569366455078, - "step": 1271 - }, - { - "epoch": 0.48, - "learning_rate": 1.0934554629292645e-05, - "logits/chosen": -7.369534969329834, - "logits/rejected": -4.15666389465332, - "logps/chosen": -270.39532470703125, - "logps/rejected": -1255.300048828125, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8793823719024658, - "rewards/margins": 8.396717071533203, - "rewards/rejected": -10.27609920501709, - "step": 1272 - }, - { - "epoch": 0.48, - "learning_rate": 1.092268359463302e-05, - "logits/chosen": -0.5066320300102234, - "logits/rejected": -4.383983135223389, - "logps/chosen": -251.61575317382812, - "logps/rejected": -105.63009643554688, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9006240963935852, - "rewards/margins": 6.472357273101807, - "rewards/rejected": -7.372981548309326, - "step": 1273 - }, - { - "epoch": 0.48, - "learning_rate": 1.0910811248404064e-05, - "logits/chosen": 0.2519144117832184, - "logits/rejected": -4.311489105224609, - "logps/chosen": -568.7548217773438, - "logps/rejected": -244.18661499023438, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -7.2324371337890625, - "rewards/margins": 6.952516555786133, - "rewards/rejected": -14.184953689575195, - "step": 1274 - }, - { - "epoch": 0.48, - "learning_rate": 1.0898937607481985e-05, - "logits/chosen": 0.02246897667646408, - "logits/rejected": -1.9050300121307373, - "logps/chosen": -307.2270202636719, - "logps/rejected": -483.18939208984375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2568787336349487, - "rewards/margins": 18.1039981842041, - "rewards/rejected": -19.360876083374023, - "step": 1275 - }, - { - "epoch": 0.48, - "learning_rate": 1.0887062688744843e-05, - "logits/chosen": -4.139034271240234, - "logits/rejected": -4.732961654663086, - "logps/chosen": -311.76544189453125, - "logps/rejected": -428.2138366699219, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.496203899383545, - "rewards/margins": 16.642621994018555, - "rewards/rejected": -21.138826370239258, - "step": 1276 - }, - { - "epoch": 0.48, - "learning_rate": 1.0875186509072502e-05, - "logits/chosen": -1.9770358800888062, - "logits/rejected": -2.9902775287628174, - "logps/chosen": -739.29052734375, - "logps/rejected": -1147.760986328125, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8924804925918579, - "rewards/margins": 32.17767333984375, - "rewards/rejected": -31.285192489624023, - "step": 1277 - }, - { - "epoch": 0.48, - "learning_rate": 1.086330908534663e-05, - "logits/chosen": -2.254669189453125, - "logits/rejected": -4.990213394165039, - "logps/chosen": -373.00213623046875, - "logps/rejected": -158.70338439941406, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7369781732559204, - "rewards/margins": 9.171810150146484, - "rewards/rejected": -9.908788681030273, - "step": 1278 - }, - { - "epoch": 0.48, - "learning_rate": 1.0851430434450652e-05, - "logits/chosen": -1.0078800916671753, - "logits/rejected": -2.927133798599243, - "logps/chosen": -594.5687866210938, - "logps/rejected": -566.8789672851562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.370294213294983, - "rewards/margins": 15.565832138061523, - "rewards/rejected": -16.936126708984375, - "step": 1279 - }, - { - "epoch": 0.48, - "learning_rate": 1.0839550573269744e-05, - "logits/chosen": -1.4991881847381592, - "logits/rejected": -2.348588228225708, - "logps/chosen": -318.2377014160156, - "logps/rejected": -581.00927734375, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1706178188323975, - "rewards/margins": 24.43453025817871, - "rewards/rejected": -26.605148315429688, - "step": 1280 - }, - { - "epoch": 0.48, - "learning_rate": 1.0827669518690806e-05, - "logits/chosen": -5.541778564453125, - "logits/rejected": -3.127486228942871, - "logps/chosen": -320.8170471191406, - "logps/rejected": -1053.6414794921875, - "loss": 0.0035, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6276520490646362, - "rewards/margins": 8.754959106445312, - "rewards/rejected": -10.382611274719238, - "step": 1281 - }, - { - "epoch": 0.48, - "learning_rate": 1.0815787287602428e-05, - "logits/chosen": -2.446776866912842, - "logits/rejected": -1.7258113622665405, - "logps/chosen": -337.93438720703125, - "logps/rejected": -688.8345336914062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.796392917633057, - "rewards/margins": 13.604551315307617, - "rewards/rejected": -19.400943756103516, - "step": 1282 - }, - { - "epoch": 0.49, - "learning_rate": 1.0803903896894877e-05, - "logits/chosen": -3.9421989917755127, - "logits/rejected": -4.873663902282715, - "logps/chosen": -1367.187255859375, - "logps/rejected": -1522.39013671875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.438574194908142, - "rewards/margins": 27.189735412597656, - "rewards/rejected": -28.62830924987793, - "step": 1283 - }, - { - "epoch": 0.49, - "learning_rate": 1.079201936346006e-05, - "logits/chosen": -5.843512535095215, - "logits/rejected": -2.036407470703125, - "logps/chosen": -278.35211181640625, - "logps/rejected": -863.0346069335938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.608312964439392, - "rewards/margins": 8.198749542236328, - "rewards/rejected": -9.807062149047852, - "step": 1284 - }, - { - "epoch": 0.49, - "learning_rate": 1.0780133704191517e-05, - "logits/chosen": -2.068169593811035, - "logits/rejected": -5.396670818328857, - "logps/chosen": -440.5213928222656, - "logps/rejected": -580.8775634765625, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.27691957354545593, - "rewards/margins": 28.06622314453125, - "rewards/rejected": -28.343143463134766, - "step": 1285 - }, - { - "epoch": 0.49, - "learning_rate": 1.0768246935984387e-05, - "logits/chosen": -1.2633936405181885, - "logits/rejected": -4.68316125869751, - "logps/chosen": -432.1666259765625, - "logps/rejected": -289.35516357421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.435110569000244, - "rewards/margins": 11.253299713134766, - "rewards/rejected": -17.68840980529785, - "step": 1286 - }, - { - "epoch": 0.49, - "learning_rate": 1.0756359075735385e-05, - "logits/chosen": -3.8450639247894287, - "logits/rejected": -1.2954113483428955, - "logps/chosen": -680.398681640625, - "logps/rejected": -1609.6475830078125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.766308546066284, - "rewards/margins": 34.69478988647461, - "rewards/rejected": -37.461097717285156, - "step": 1287 - }, - { - "epoch": 0.49, - "learning_rate": 1.0744470140342775e-05, - "logits/chosen": -6.073845863342285, - "logits/rejected": -2.253239154815674, - "logps/chosen": -538.79296875, - "logps/rejected": -1349.6322021484375, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.6650390625, - "rewards/margins": 8.939172744750977, - "rewards/rejected": -11.604211807250977, - "step": 1288 - }, - { - "epoch": 0.49, - "learning_rate": 1.0732580146706347e-05, - "logits/chosen": -1.0317977666854858, - "logits/rejected": -0.8160890936851501, - "logps/chosen": -228.82298278808594, - "logps/rejected": -334.8452453613281, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.043238878250122, - "rewards/margins": 6.9701128005981445, - "rewards/rejected": -8.013351440429688, - "step": 1289 - }, - { - "epoch": 0.49, - "learning_rate": 1.0720689111727407e-05, - "logits/chosen": -3.3394885063171387, - "logits/rejected": -1.3159115314483643, - "logps/chosen": -530.6771240234375, - "logps/rejected": -1350.77734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.869982898235321, - "rewards/margins": 18.33099365234375, - "rewards/rejected": -19.200977325439453, - "step": 1290 - }, - { - "epoch": 0.49, - "learning_rate": 1.070879705230873e-05, - "logits/chosen": -4.052453994750977, - "logits/rejected": 0.32267263531684875, - "logps/chosen": -387.4433288574219, - "logps/rejected": -1542.7767333984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6766144037246704, - "rewards/margins": 43.789306640625, - "rewards/rejected": -44.465919494628906, - "step": 1291 - }, - { - "epoch": 0.49, - "learning_rate": 1.0696903985354555e-05, - "logits/chosen": -0.9066455364227295, - "logits/rejected": 1.167014479637146, - "logps/chosen": -436.1910095214844, - "logps/rejected": -1071.1947021484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5121582746505737, - "rewards/margins": 30.6175537109375, - "rewards/rejected": -32.12971115112305, - "step": 1292 - }, - { - "epoch": 0.49, - "learning_rate": 1.0685009927770542e-05, - "logits/chosen": -7.137041091918945, - "logits/rejected": -1.2375147342681885, - "logps/chosen": -232.18255615234375, - "logps/rejected": -3067.96630859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.948251485824585, - "rewards/margins": 15.53048324584961, - "rewards/rejected": -18.478734970092773, - "step": 1293 - }, - { - "epoch": 0.49, - "learning_rate": 1.0673114896463772e-05, - "logits/chosen": -0.2800365388393402, - "logits/rejected": -2.4363276958465576, - "logps/chosen": -255.8470001220703, - "logps/rejected": -195.70916748046875, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.0390825271606445, - "rewards/margins": 5.482638359069824, - "rewards/rejected": -9.521720886230469, - "step": 1294 - }, - { - "epoch": 0.49, - "learning_rate": 1.0661218908342705e-05, - "logits/chosen": -4.910533428192139, - "logits/rejected": -2.0674023628234863, - "logps/chosen": -292.8337707519531, - "logps/rejected": -991.3253173828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.14475417137146, - "rewards/margins": 22.961746215820312, - "rewards/rejected": -26.10650062561035, - "step": 1295 - }, - { - "epoch": 0.49, - "learning_rate": 1.0649321980317158e-05, - "logits/chosen": -1.2692612409591675, - "logits/rejected": -2.442415714263916, - "logps/chosen": -354.49884033203125, - "logps/rejected": -350.06158447265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21457214653491974, - "rewards/margins": 9.946782112121582, - "rewards/rejected": -10.161354064941406, - "step": 1296 - }, - { - "epoch": 0.49, - "learning_rate": 1.0637424129298288e-05, - "logits/chosen": -3.285691976547241, - "logits/rejected": -0.6411380171775818, - "logps/chosen": -470.93292236328125, - "logps/rejected": -1333.8487548828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.648458957672119, - "rewards/margins": 19.290714263916016, - "rewards/rejected": -21.939172744750977, - "step": 1297 - }, - { - "epoch": 0.49, - "learning_rate": 1.0625525372198564e-05, - "logits/chosen": -5.760701656341553, - "logits/rejected": -1.9476691484451294, - "logps/chosen": -269.17742919921875, - "logps/rejected": -1006.0796508789062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.49183350801467896, - "rewards/margins": 11.01279354095459, - "rewards/rejected": -10.520959854125977, - "step": 1298 - }, - { - "epoch": 0.49, - "learning_rate": 1.0613625725931738e-05, - "logits/chosen": -6.953944206237793, - "logits/rejected": -7.058317184448242, - "logps/chosen": -108.54988098144531, - "logps/rejected": -351.4346923828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9787231683731079, - "rewards/margins": 13.463592529296875, - "rewards/rejected": -14.442316055297852, - "step": 1299 - }, - { - "epoch": 0.49, - "learning_rate": 1.0601725207412835e-05, - "logits/chosen": -3.1501476764678955, - "logits/rejected": -4.684582233428955, - "logps/chosen": -408.49127197265625, - "logps/rejected": -560.9812622070312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.494189739227295, - "rewards/margins": 25.608339309692383, - "rewards/rejected": -30.102529525756836, - "step": 1300 - }, - { - "epoch": 0.49, - "learning_rate": 1.0589823833558111e-05, - "logits/chosen": -0.5244022607803345, - "logits/rejected": -2.2544713020324707, - "logps/chosen": -301.960693359375, - "logps/rejected": -308.64373779296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5914214849472046, - "rewards/margins": 14.369490623474121, - "rewards/rejected": -15.960911750793457, - "step": 1301 - }, - { - "epoch": 0.49, - "learning_rate": 1.0577921621285041e-05, - "logits/chosen": -4.54231071472168, - "logits/rejected": -1.5771952867507935, - "logps/chosen": -271.4455871582031, - "logps/rejected": -1154.9063720703125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8681610226631165, - "rewards/margins": 23.416826248168945, - "rewards/rejected": -24.28498649597168, - "step": 1302 - }, - { - "epoch": 0.49, - "learning_rate": 1.056601858751229e-05, - "logits/chosen": -5.795045852661133, - "logits/rejected": 0.005209166556596756, - "logps/chosen": -332.9808349609375, - "logps/rejected": -2316.552490234375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.24313659965991974, - "rewards/margins": 34.23764419555664, - "rewards/rejected": -33.9945068359375, - "step": 1303 - }, - { - "epoch": 0.49, - "learning_rate": 1.05541147491597e-05, - "logits/chosen": 0.8391966819763184, - "logits/rejected": -2.9472744464874268, - "logps/chosen": -434.08416748046875, - "logps/rejected": -473.23345947265625, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.24196167290210724, - "rewards/margins": 28.841848373413086, - "rewards/rejected": -29.083810806274414, - "step": 1304 - }, - { - "epoch": 0.49, - "learning_rate": 1.0542210123148246e-05, - "logits/chosen": -4.241867542266846, - "logits/rejected": -0.190777987241745, - "logps/chosen": -629.561767578125, - "logps/rejected": -2663.912109375, - "loss": 0.0024, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.358428955078125, - "rewards/margins": 26.539276123046875, - "rewards/rejected": -29.897705078125, - "step": 1305 - }, - { - "epoch": 0.49, - "learning_rate": 1.0530304726400025e-05, - "logits/chosen": -3.7700467109680176, - "logits/rejected": 0.2720489203929901, - "logps/chosen": -532.0014038085938, - "logps/rejected": -1425.8271484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.226574659347534, - "rewards/margins": 32.5689582824707, - "rewards/rejected": -34.7955322265625, - "step": 1306 - }, - { - "epoch": 0.49, - "learning_rate": 1.051839857583823e-05, - "logits/chosen": -1.064810037612915, - "logits/rejected": -0.9202111959457397, - "logps/chosen": -592.6732177734375, - "logps/rejected": -633.6718139648438, - "loss": 0.0066, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.46221923828125, - "rewards/margins": 2.966796875, - "rewards/rejected": -4.42901611328125, - "step": 1307 - }, - { - "epoch": 0.49, - "learning_rate": 1.0506491688387128e-05, - "logits/chosen": -0.8912250995635986, - "logits/rejected": -3.6157138347625732, - "logps/chosen": -283.6550598144531, - "logps/rejected": -122.33663940429688, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.24750976264476776, - "rewards/margins": 5.43056583404541, - "rewards/rejected": -5.183055877685547, - "step": 1308 - }, - { - "epoch": 0.49, - "learning_rate": 1.0494584080972032e-05, - "logits/chosen": -3.667257785797119, - "logits/rejected": -1.516420602798462, - "logps/chosen": -316.3009948730469, - "logps/rejected": -522.5285034179688, - "loss": 0.0039, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.02909545972943306, - "rewards/margins": 8.715680122375488, - "rewards/rejected": -8.68658447265625, - "step": 1309 - }, - { - "epoch": 0.5, - "learning_rate": 1.0482675770519279e-05, - "logits/chosen": -4.128982067108154, - "logits/rejected": -3.642009973526001, - "logps/chosen": -96.68687438964844, - "logps/rejected": -342.46807861328125, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7125656604766846, - "rewards/margins": 11.040814399719238, - "rewards/rejected": -13.753379821777344, - "step": 1310 - }, - { - "epoch": 0.5, - "learning_rate": 1.0470766773956205e-05, - "logits/chosen": -4.435621738433838, - "logits/rejected": -2.0907270908355713, - "logps/chosen": -353.35491943359375, - "logps/rejected": -1268.729736328125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5218505859375, - "rewards/margins": 29.410785675048828, - "rewards/rejected": -29.932636260986328, - "step": 1311 - }, - { - "epoch": 0.5, - "learning_rate": 1.0458857108211113e-05, - "logits/chosen": -3.8952796459198, - "logits/rejected": -2.514794111251831, - "logps/chosen": -358.97259521484375, - "logps/rejected": -900.1898193359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2756288051605225, - "rewards/margins": 28.177570343017578, - "rewards/rejected": -30.45319938659668, - "step": 1312 - }, - { - "epoch": 0.5, - "learning_rate": 1.0446946790213275e-05, - "logits/chosen": -6.19942045211792, - "logits/rejected": 0.2577512562274933, - "logps/chosen": -472.05084228515625, - "logps/rejected": -1889.521728515625, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9264892935752869, - "rewards/margins": 15.85200309753418, - "rewards/rejected": -16.778491973876953, - "step": 1313 - }, - { - "epoch": 0.5, - "learning_rate": 1.0435035836892879e-05, - "logits/chosen": -2.958664894104004, - "logits/rejected": -2.5664188861846924, - "logps/chosen": -194.85740661621094, - "logps/rejected": -671.2271728515625, - "loss": 0.01, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3314285278320312, - "rewards/margins": 24.65435028076172, - "rewards/rejected": -25.98577880859375, - "step": 1314 - }, - { - "epoch": 0.5, - "learning_rate": 1.0423124265181012e-05, - "logits/chosen": -4.2458415031433105, - "logits/rejected": -0.3934740722179413, - "logps/chosen": -655.7218627929688, - "logps/rejected": -1504.15673828125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.426239013671875, - "rewards/margins": 11.020367622375488, - "rewards/rejected": -13.446606636047363, - "step": 1315 - }, - { - "epoch": 0.5, - "learning_rate": 1.0411212092009647e-05, - "logits/chosen": -0.5267317295074463, - "logits/rejected": -4.19296407699585, - "logps/chosen": -606.3734741210938, - "logps/rejected": -510.93310546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.342529296875, - "rewards/margins": 28.80250358581543, - "rewards/rejected": -27.45997428894043, - "step": 1316 - }, - { - "epoch": 0.5, - "learning_rate": 1.0399299334311615e-05, - "logits/chosen": -7.245974063873291, - "logits/rejected": -1.8178287744522095, - "logps/chosen": -385.6427001953125, - "logps/rejected": -4117.73779296875, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4940308332443237, - "rewards/margins": 22.844642639160156, - "rewards/rejected": -24.338672637939453, - "step": 1317 - }, - { - "epoch": 0.5, - "learning_rate": 1.0387386009020569e-05, - "logits/chosen": -4.134701251983643, - "logits/rejected": -5.404897689819336, - "logps/chosen": -285.5185852050781, - "logps/rejected": -455.7350158691406, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.971099853515625, - "rewards/margins": 8.228732109069824, - "rewards/rejected": -10.19983196258545, - "step": 1318 - }, - { - "epoch": 0.5, - "learning_rate": 1.0375472133070969e-05, - "logits/chosen": -4.395586967468262, - "logits/rejected": -3.0859336853027344, - "logps/chosen": -179.65457153320312, - "logps/rejected": -553.945068359375, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.43608400225639343, - "rewards/margins": 19.763206481933594, - "rewards/rejected": -19.32712173461914, - "step": 1319 - }, - { - "epoch": 0.5, - "learning_rate": 1.036355772339807e-05, - "logits/chosen": -3.009683847427368, - "logits/rejected": -1.8848950862884521, - "logps/chosen": -228.37030029296875, - "logps/rejected": -727.9234619140625, - "loss": 0.006, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.680578589439392, - "rewards/margins": 24.910465240478516, - "rewards/rejected": -26.59104347229004, - "step": 1320 - }, - { - "epoch": 0.5, - "learning_rate": 1.0351642796937873e-05, - "logits/chosen": -2.4179301261901855, - "logits/rejected": -3.7309060096740723, - "logps/chosen": -248.3048095703125, - "logps/rejected": -385.3970947265625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.01669921912252903, - "rewards/margins": 15.995513916015625, - "rewards/rejected": -15.978815078735352, - "step": 1321 - }, - { - "epoch": 0.5, - "learning_rate": 1.0339727370627119e-05, - "logits/chosen": -3.8276212215423584, - "logits/rejected": -0.3609098792076111, - "logps/chosen": -271.1544494628906, - "logps/rejected": -614.3846435546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3556152284145355, - "rewards/margins": 9.46527099609375, - "rewards/rejected": -9.820886611938477, - "step": 1322 - }, - { - "epoch": 0.5, - "learning_rate": 1.032781146140326e-05, - "logits/chosen": -1.0748018026351929, - "logits/rejected": -4.422270774841309, - "logps/chosen": -480.1663513183594, - "logps/rejected": -376.0444030761719, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8176727294921875, - "rewards/margins": 10.689859390258789, - "rewards/rejected": -13.507532119750977, - "step": 1323 - }, - { - "epoch": 0.5, - "learning_rate": 1.031589508620443e-05, - "logits/chosen": -2.782715320587158, - "logits/rejected": -4.306484699249268, - "logps/chosen": -398.379638671875, - "logps/rejected": -343.57904052734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.058197021484375, - "rewards/margins": 10.831768989562988, - "rewards/rejected": -10.773571968078613, - "step": 1324 - }, - { - "epoch": 0.5, - "learning_rate": 1.030397826196943e-05, - "logits/chosen": -1.2996553182601929, - "logits/rejected": -3.936270236968994, - "logps/chosen": -302.60888671875, - "logps/rejected": -172.57980346679688, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.45361024141311646, - "rewards/margins": 11.737837791442871, - "rewards/rejected": -11.28422737121582, - "step": 1325 - }, - { - "epoch": 0.5, - "learning_rate": 1.0292061005637695e-05, - "logits/chosen": -6.114161491394043, - "logits/rejected": -0.19630113244056702, - "logps/chosen": -391.3878479003906, - "logps/rejected": -1754.29052734375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.35227370262146, - "rewards/margins": 21.64436912536621, - "rewards/rejected": -23.99664306640625, - "step": 1326 - }, - { - "epoch": 0.5, - "learning_rate": 1.028014333414928e-05, - "logits/chosen": 0.5620214939117432, - "logits/rejected": -5.003384590148926, - "logps/chosen": -255.6071319580078, - "logps/rejected": -298.4267272949219, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.21610260009765625, - "rewards/margins": 16.70489501953125, - "rewards/rejected": -16.488792419433594, - "step": 1327 - }, - { - "epoch": 0.5, - "learning_rate": 1.0268225264444829e-05, - "logits/chosen": -1.0472307205200195, - "logits/rejected": -0.358889102935791, - "logps/chosen": -238.5166015625, - "logps/rejected": -284.5572509765625, - "loss": 0.0807, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.055490255355835, - "rewards/margins": 0.09784531593322754, - "rewards/rejected": -2.1533355712890625, - "step": 1328 - }, - { - "epoch": 0.5, - "learning_rate": 1.0256306813465545e-05, - "logits/chosen": -3.5152876377105713, - "logits/rejected": -4.1824259757995605, - "logps/chosen": -280.3853454589844, - "logps/rejected": -433.38043212890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5258057117462158, - "rewards/margins": 17.5538330078125, - "rewards/rejected": -19.079639434814453, - "step": 1329 - }, - { - "epoch": 0.5, - "learning_rate": 1.0244387998153179e-05, - "logits/chosen": -0.6295050382614136, - "logits/rejected": -0.2317010462284088, - "logps/chosen": -221.24441528320312, - "logps/rejected": -500.17041015625, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7390655279159546, - "rewards/margins": 14.101828575134277, - "rewards/rejected": -15.840893745422363, - "step": 1330 - }, - { - "epoch": 0.5, - "learning_rate": 1.0232468835450002e-05, - "logits/chosen": -1.3261841535568237, - "logits/rejected": -3.888812780380249, - "logps/chosen": -292.71160888671875, - "logps/rejected": -532.296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4996337890625, - "rewards/margins": 30.29475212097168, - "rewards/rejected": -31.79438591003418, - "step": 1331 - }, - { - "epoch": 0.5, - "learning_rate": 1.0220549342298774e-05, - "logits/chosen": -4.490443229675293, - "logits/rejected": -2.527529001235962, - "logps/chosen": -452.61907958984375, - "logps/rejected": -1050.301025390625, - "loss": 0.0113, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2365387678146362, - "rewards/margins": 17.44188117980957, - "rewards/rejected": -18.67841911315918, - "step": 1332 - }, - { - "epoch": 0.5, - "learning_rate": 1.0208629535642726e-05, - "logits/chosen": -0.6007236838340759, - "logits/rejected": -2.5034236907958984, - "logps/chosen": -260.8358154296875, - "logps/rejected": -202.8150634765625, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09091796725988388, - "rewards/margins": 5.294714450836182, - "rewards/rejected": -5.20379638671875, - "step": 1333 - }, - { - "epoch": 0.5, - "learning_rate": 1.0196709432425535e-05, - "logits/chosen": 0.6305313110351562, - "logits/rejected": -2.5132434368133545, - "logps/chosen": -494.77069091796875, - "logps/rejected": -689.3271484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.051727294921875, - "rewards/margins": 28.04522705078125, - "rewards/rejected": -29.096954345703125, - "step": 1334 - }, - { - "epoch": 0.5, - "learning_rate": 1.01847890495913e-05, - "logits/chosen": -3.6589627265930176, - "logits/rejected": -1.9229307174682617, - "logps/chosen": -253.3970947265625, - "logps/rejected": -894.5450439453125, - "loss": 0.0026, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1316651105880737, - "rewards/margins": 23.93010139465332, - "rewards/rejected": -22.798437118530273, - "step": 1335 - }, - { - "epoch": 0.51, - "learning_rate": 1.0172868404084518e-05, - "logits/chosen": -1.9070608615875244, - "logits/rejected": -4.328812122344971, - "logps/chosen": -199.66015625, - "logps/rejected": -336.3454284667969, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5083557367324829, - "rewards/margins": 15.582886695861816, - "rewards/rejected": -15.074530601501465, - "step": 1336 - }, - { - "epoch": 0.51, - "learning_rate": 1.0160947512850057e-05, - "logits/chosen": -1.2656711339950562, - "logits/rejected": -4.80845308303833, - "logps/chosen": -321.651123046875, - "logps/rejected": -229.27377319335938, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3657654523849487, - "rewards/margins": 13.932779312133789, - "rewards/rejected": -15.298544883728027, - "step": 1337 - }, - { - "epoch": 0.51, - "learning_rate": 1.0149026392833137e-05, - "logits/chosen": 0.5400798916816711, - "logits/rejected": -3.816879987716675, - "logps/chosen": -340.10400390625, - "logps/rejected": -267.44586181640625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.38621827960014343, - "rewards/margins": 11.956875801086426, - "rewards/rejected": -12.343093872070312, - "step": 1338 - }, - { - "epoch": 0.51, - "learning_rate": 1.0137105060979301e-05, - "logits/chosen": -1.2142035961151123, - "logits/rejected": -1.6242743730545044, - "logps/chosen": -390.258544921875, - "logps/rejected": -629.5867919921875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.477789282798767, - "rewards/margins": 8.783617973327637, - "rewards/rejected": -7.305829048156738, - "step": 1339 - }, - { - "epoch": 0.51, - "learning_rate": 1.0125183534234392e-05, - "logits/chosen": -3.9639315605163574, - "logits/rejected": -1.959285855293274, - "logps/chosen": -241.66806030273438, - "logps/rejected": -821.484130859375, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0753768682479858, - "rewards/margins": 17.494735717773438, - "rewards/rejected": -18.570112228393555, - "step": 1340 - }, - { - "epoch": 0.51, - "learning_rate": 1.0113261829544541e-05, - "logits/chosen": -4.17437744140625, - "logits/rejected": -0.6222302317619324, - "logps/chosen": -442.3927001953125, - "logps/rejected": -1271.966552734375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8129242658615112, - "rewards/margins": 18.16691017150879, - "rewards/rejected": -19.979833602905273, - "step": 1341 - }, - { - "epoch": 0.51, - "learning_rate": 1.0101339963856112e-05, - "logits/chosen": -0.8539913296699524, - "logits/rejected": -2.359447717666626, - "logps/chosen": -251.33543395996094, - "logps/rejected": -300.1622314453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.240081787109375, - "rewards/margins": 17.016508102416992, - "rewards/rejected": -19.256589889526367, - "step": 1342 - }, - { - "epoch": 0.51, - "learning_rate": 1.0089417954115715e-05, - "logits/chosen": -2.3343873023986816, - "logits/rejected": -2.2072064876556396, - "logps/chosen": -276.1666564941406, - "logps/rejected": -441.65826416015625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5784912109375, - "rewards/margins": 9.421722412109375, - "rewards/rejected": -12.000213623046875, - "step": 1343 - }, - { - "epoch": 0.51, - "learning_rate": 1.0077495817270155e-05, - "logits/chosen": -0.4331881105899811, - "logits/rejected": -2.027379274368286, - "logps/chosen": -362.382080078125, - "logps/rejected": -261.8318786621094, - "loss": 0.003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5097901821136475, - "rewards/margins": 3.734143018722534, - "rewards/rejected": -6.243933200836182, - "step": 1344 - }, - { - "epoch": 0.51, - "learning_rate": 1.0065573570266424e-05, - "logits/chosen": -4.2350945472717285, - "logits/rejected": -1.9643659591674805, - "logps/chosen": -349.0313415527344, - "logps/rejected": -1518.876220703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.651324450969696, - "rewards/margins": 65.09342193603516, - "rewards/rejected": -64.44210052490234, - "step": 1345 - }, - { - "epoch": 0.51, - "learning_rate": 1.0053651230051671e-05, - "logits/chosen": 0.8670258522033691, - "logits/rejected": -1.494952917098999, - "logps/chosen": -296.26947021484375, - "logps/rejected": -494.9871520996094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4583190977573395, - "rewards/margins": 23.5811710357666, - "rewards/rejected": -24.03948974609375, - "step": 1346 - }, - { - "epoch": 0.51, - "learning_rate": 1.0041728813573168e-05, - "logits/chosen": -5.353938102722168, - "logits/rejected": -2.574118137359619, - "logps/chosen": -1132.83740234375, - "logps/rejected": -2646.646728515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2678955793380737, - "rewards/margins": 58.48808670043945, - "rewards/rejected": -57.220191955566406, - "step": 1347 - }, - { - "epoch": 0.51, - "learning_rate": 1.002980633777831e-05, - "logits/chosen": -3.5685110092163086, - "logits/rejected": -1.0083705186843872, - "logps/chosen": -213.99916076660156, - "logps/rejected": -672.0269775390625, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4462021589279175, - "rewards/margins": 9.442585945129395, - "rewards/rejected": -10.888788223266602, - "step": 1348 - }, - { - "epoch": 0.51, - "learning_rate": 1.0017883819614558e-05, - "logits/chosen": -2.1910576820373535, - "logits/rejected": -1.0846742391586304, - "logps/chosen": -306.5779113769531, - "logps/rejected": -1007.0957641601562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09293518215417862, - "rewards/margins": 39.69801712036133, - "rewards/rejected": -39.60508346557617, - "step": 1349 - }, - { - "epoch": 0.51, - "learning_rate": 1.000596127602945e-05, - "logits/chosen": -5.415533065795898, - "logits/rejected": -1.1362595558166504, - "logps/chosen": -415.520751953125, - "logps/rejected": -1217.1087646484375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5640442371368408, - "rewards/margins": 10.726691246032715, - "rewards/rejected": -12.290735244750977, - "step": 1350 - }, - { - "epoch": 0.51, - "learning_rate": 9.994038723970551e-06, - "logits/chosen": 0.11993187665939331, - "logits/rejected": -3.1114375591278076, - "logps/chosen": -307.51690673828125, - "logps/rejected": -453.21624755859375, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.08660888671875, - "rewards/margins": 24.93686866760254, - "rewards/rejected": -27.02347755432129, - "step": 1351 - }, - { - "epoch": 0.51, - "learning_rate": 9.982116180385444e-06, - "logits/chosen": -5.32357931137085, - "logits/rejected": 0.2991771399974823, - "logps/chosen": -388.173095703125, - "logps/rejected": -1535.46826171875, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.555755615234375, - "rewards/margins": 12.299822807312012, - "rewards/rejected": -12.855578422546387, - "step": 1352 - }, - { - "epoch": 0.51, - "learning_rate": 9.970193662221694e-06, - "logits/chosen": 0.07111308723688126, - "logits/rejected": -3.6820671558380127, - "logps/chosen": -243.3407440185547, - "logps/rejected": -205.22470092773438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9126632809638977, - "rewards/margins": 13.967567443847656, - "rewards/rejected": -14.880230903625488, - "step": 1353 - }, - { - "epoch": 0.51, - "learning_rate": 9.958271186426834e-06, - "logits/chosen": -0.19162246584892273, - "logits/rejected": 0.24129405617713928, - "logps/chosen": -784.1917114257812, - "logps/rejected": -1371.9296875, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.089508056640625, - "rewards/margins": 40.57111740112305, - "rewards/rejected": -40.66062545776367, - "step": 1354 - }, - { - "epoch": 0.51, - "learning_rate": 9.946348769948332e-06, - "logits/chosen": -6.623574256896973, - "logits/rejected": -3.7112369537353516, - "logps/chosen": -454.148681640625, - "logps/rejected": -1302.467529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.20717163383960724, - "rewards/margins": 22.330854415893555, - "rewards/rejected": -22.123682022094727, - "step": 1355 - }, - { - "epoch": 0.51, - "learning_rate": 9.934426429733577e-06, - "logits/chosen": -1.9633090496063232, - "logits/rejected": -1.749502420425415, - "logps/chosen": -203.18580627441406, - "logps/rejected": -403.9772644042969, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3278671503067017, - "rewards/margins": 12.435746192932129, - "rewards/rejected": -13.7636137008667, - "step": 1356 - }, - { - "epoch": 0.51, - "learning_rate": 9.922504182729848e-06, - "logits/chosen": -2.235685348510742, - "logits/rejected": -3.877281427383423, - "logps/chosen": -398.06072998046875, - "logps/rejected": -426.84405517578125, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.018316626548767, - "rewards/margins": 10.551996231079102, - "rewards/rejected": -11.5703125, - "step": 1357 - }, - { - "epoch": 0.51, - "learning_rate": 9.910582045884292e-06, - "logits/chosen": -1.5282480716705322, - "logits/rejected": 0.9971303343772888, - "logps/chosen": -303.85211181640625, - "logps/rejected": -841.227294921875, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7867645025253296, - "rewards/margins": 28.916446685791016, - "rewards/rejected": -30.703210830688477, - "step": 1358 - }, - { - "epoch": 0.51, - "learning_rate": 9.898660036143893e-06, - "logits/chosen": -5.616562366485596, - "logits/rejected": -1.7492655515670776, - "logps/chosen": -203.6559600830078, - "logps/rejected": -798.3328857421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6792221069335938, - "rewards/margins": 16.95339012145996, - "rewards/rejected": -18.632612228393555, - "step": 1359 - }, - { - "epoch": 0.51, - "learning_rate": 9.886738170455464e-06, - "logits/chosen": -0.40428289771080017, - "logits/rejected": -0.5263437032699585, - "logps/chosen": -331.5045166015625, - "logps/rejected": -500.4013671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.615270972251892, - "rewards/margins": 9.743847846984863, - "rewards/rejected": -11.359118461608887, - "step": 1360 - }, - { - "epoch": 0.51, - "learning_rate": 9.874816465765607e-06, - "logits/chosen": -0.5189632177352905, - "logits/rejected": -0.9383388757705688, - "logps/chosen": -213.8059844970703, - "logps/rejected": -211.19711303710938, - "loss": 0.0252, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.034010410308838, - "rewards/margins": 1.5025620460510254, - "rewards/rejected": -4.536572456359863, - "step": 1361 - }, - { - "epoch": 0.51, - "learning_rate": 9.862894939020702e-06, - "logits/chosen": -0.2998826205730438, - "logits/rejected": -3.7280094623565674, - "logps/chosen": -311.76483154296875, - "logps/rejected": -532.523681640625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.7198150157928467, - "rewards/margins": 28.40938949584961, - "rewards/rejected": -24.6895751953125, - "step": 1362 - }, - { - "epoch": 0.52, - "learning_rate": 9.850973607166865e-06, - "logits/chosen": -4.396988391876221, - "logits/rejected": -2.4324729442596436, - "logps/chosen": -333.40875244140625, - "logps/rejected": -659.9285278320312, - "loss": 0.0036, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1698365211486816, - "rewards/margins": 4.710089206695557, - "rewards/rejected": -6.879925727844238, - "step": 1363 - }, - { - "epoch": 0.52, - "learning_rate": 9.839052487149947e-06, - "logits/chosen": -1.1253241300582886, - "logits/rejected": -0.3733006715774536, - "logps/chosen": -218.5584259033203, - "logps/rejected": -865.870361328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.1736085414886475, - "rewards/margins": 39.68080520629883, - "rewards/rejected": -42.85441207885742, - "step": 1364 - }, - { - "epoch": 0.52, - "learning_rate": 9.827131595915486e-06, - "logits/chosen": -5.504205226898193, - "logits/rejected": -0.47448742389678955, - "logps/chosen": -582.1187744140625, - "logps/rejected": -2581.353515625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.410424828529358, - "rewards/margins": 47.09382629394531, - "rewards/rejected": -48.504249572753906, - "step": 1365 - }, - { - "epoch": 0.52, - "learning_rate": 9.815210950408703e-06, - "logits/chosen": -1.9483885765075684, - "logits/rejected": -1.7802988290786743, - "logps/chosen": -792.953857421875, - "logps/rejected": -1243.6737060546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.039904832839966, - "rewards/margins": 27.36933135986328, - "rewards/rejected": -25.329425811767578, - "step": 1366 - }, - { - "epoch": 0.52, - "learning_rate": 9.803290567574468e-06, - "logits/chosen": -6.253389358520508, - "logits/rejected": -1.903885006904602, - "logps/chosen": -305.96038818359375, - "logps/rejected": -1425.6181640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.339611768722534, - "rewards/margins": 27.188697814941406, - "rewards/rejected": -29.528308868408203, - "step": 1367 - }, - { - "epoch": 0.52, - "learning_rate": 9.791370464357279e-06, - "logits/chosen": -1.1272023916244507, - "logits/rejected": -2.3883612155914307, - "logps/chosen": -504.93017578125, - "logps/rejected": -695.6619873046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8745361566543579, - "rewards/margins": 26.057266235351562, - "rewards/rejected": -25.182729721069336, - "step": 1368 - }, - { - "epoch": 0.52, - "learning_rate": 9.779450657701227e-06, - "logits/chosen": -2.826051712036133, - "logits/rejected": -0.6443123817443848, - "logps/chosen": -153.48951721191406, - "logps/rejected": -655.7928466796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.17108917236328125, - "rewards/margins": 12.34727668762207, - "rewards/rejected": -12.518365859985352, - "step": 1369 - }, - { - "epoch": 0.52, - "learning_rate": 9.76753116455e-06, - "logits/chosen": -0.3129982650279999, - "logits/rejected": -1.5013436079025269, - "logps/chosen": -303.1639709472656, - "logps/rejected": -313.87237548828125, - "loss": 0.0018, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3646453619003296, - "rewards/margins": 4.221859931945801, - "rewards/rejected": -5.58650541305542, - "step": 1370 - }, - { - "epoch": 0.52, - "learning_rate": 9.755612001846823e-06, - "logits/chosen": -2.3091955184936523, - "logits/rejected": -0.01998022012412548, - "logps/chosen": -524.89208984375, - "logps/rejected": -1546.0570068359375, - "loss": 0.0073, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.32252198457717896, - "rewards/margins": 38.620338439941406, - "rewards/rejected": -38.9428596496582, - "step": 1371 - }, - { - "epoch": 0.52, - "learning_rate": 9.743693186534458e-06, - "logits/chosen": -1.5153712034225464, - "logits/rejected": -0.9612948894500732, - "logps/chosen": -310.5956726074219, - "logps/rejected": -975.1083984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.333148241043091, - "rewards/margins": 35.030364990234375, - "rewards/rejected": -37.3635139465332, - "step": 1372 - }, - { - "epoch": 0.52, - "learning_rate": 9.731774735555174e-06, - "logits/chosen": -1.2116597890853882, - "logits/rejected": -3.92856502532959, - "logps/chosen": -644.4063720703125, - "logps/rejected": -842.7720947265625, - "loss": 0.0041, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7747131586074829, - "rewards/margins": 34.88938903808594, - "rewards/rejected": -35.664100646972656, - "step": 1373 - }, - { - "epoch": 0.52, - "learning_rate": 9.719856665850724e-06, - "logits/chosen": -1.5707687139511108, - "logits/rejected": -2.205592632293701, - "logps/chosen": -602.1619873046875, - "logps/rejected": -970.5755615234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3601928651332855, - "rewards/margins": 32.44535827636719, - "rewards/rejected": -32.80554962158203, - "step": 1374 - }, - { - "epoch": 0.52, - "learning_rate": 9.707938994362309e-06, - "logits/chosen": -3.398554563522339, - "logits/rejected": -2.501502275466919, - "logps/chosen": -239.32803344726562, - "logps/rejected": -414.2398986816406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.6411101818084717, - "rewards/margins": 8.117846488952637, - "rewards/rejected": -11.758956909179688, - "step": 1375 - }, - { - "epoch": 0.52, - "learning_rate": 9.696021738030575e-06, - "logits/chosen": -5.178009986877441, - "logits/rejected": -4.164665222167969, - "logps/chosen": -234.50027465820312, - "logps/rejected": -564.38037109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.9756622314453125, - "rewards/margins": 15.649930953979492, - "rewards/rejected": -18.625593185424805, - "step": 1376 - }, - { - "epoch": 0.52, - "learning_rate": 9.684104913795575e-06, - "logits/chosen": -2.1141860485076904, - "logits/rejected": -5.923010349273682, - "logps/chosen": -486.4467468261719, - "logps/rejected": -115.21525573730469, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1394011974334717, - "rewards/margins": 6.232440948486328, - "rewards/rejected": -8.371842384338379, - "step": 1377 - }, - { - "epoch": 0.52, - "learning_rate": 9.672188538596746e-06, - "logits/chosen": 0.8542755842208862, - "logits/rejected": -3.1283795833587646, - "logps/chosen": -388.1765441894531, - "logps/rejected": -490.2620849609375, - "loss": 0.0569, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5249725580215454, - "rewards/margins": 32.548500061035156, - "rewards/rejected": -32.023529052734375, - "step": 1378 - }, - { - "epoch": 0.52, - "learning_rate": 9.660272629372881e-06, - "logits/chosen": -5.725246906280518, - "logits/rejected": -0.0256374329328537, - "logps/chosen": -451.0921630859375, - "logps/rejected": -1330.3487548828125, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.42547607421875, - "rewards/margins": 19.448339462280273, - "rewards/rejected": -19.022863388061523, - "step": 1379 - }, - { - "epoch": 0.52, - "learning_rate": 9.648357203062127e-06, - "logits/chosen": -1.2038465738296509, - "logits/rejected": -1.4977223873138428, - "logps/chosen": -265.4911804199219, - "logps/rejected": -504.6771545410156, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.41296693682670593, - "rewards/margins": 20.441171646118164, - "rewards/rejected": -20.85413932800293, - "step": 1380 - }, - { - "epoch": 0.52, - "learning_rate": 9.636442276601932e-06, - "logits/chosen": -0.8484458327293396, - "logits/rejected": -3.3453474044799805, - "logps/chosen": -304.26239013671875, - "logps/rejected": -381.5698547363281, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.42956238985061646, - "rewards/margins": 15.816479682922363, - "rewards/rejected": -16.246042251586914, - "step": 1381 - }, - { - "epoch": 0.52, - "learning_rate": 9.624527866929033e-06, - "logits/chosen": -2.468595504760742, - "logits/rejected": -2.467869758605957, - "logps/chosen": -351.0419006347656, - "logps/rejected": -660.8840942382812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.257153511047363, - "rewards/margins": 19.424022674560547, - "rewards/rejected": -24.681177139282227, - "step": 1382 - }, - { - "epoch": 0.52, - "learning_rate": 9.612613990979436e-06, - "logits/chosen": -0.8773534893989563, - "logits/rejected": -3.423366069793701, - "logps/chosen": -470.15478515625, - "logps/rejected": -518.2952880859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8044647574424744, - "rewards/margins": 29.95741844177246, - "rewards/rejected": -29.1529541015625, - "step": 1383 - }, - { - "epoch": 0.52, - "learning_rate": 9.600700665688388e-06, - "logits/chosen": -1.9513957500457764, - "logits/rejected": -5.892955780029297, - "logps/chosen": -506.61883544921875, - "logps/rejected": -410.9945373535156, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.39750978350639343, - "rewards/margins": 25.14312171936035, - "rewards/rejected": -25.540632247924805, - "step": 1384 - }, - { - "epoch": 0.52, - "learning_rate": 9.588787907990356e-06, - "logits/chosen": -2.978391647338867, - "logits/rejected": -3.229759931564331, - "logps/chosen": -215.54580688476562, - "logps/rejected": -389.6518249511719, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.613063097000122, - "rewards/margins": 13.646127700805664, - "rewards/rejected": -15.259190559387207, - "step": 1385 - }, - { - "epoch": 0.52, - "learning_rate": 9.576875734818993e-06, - "logits/chosen": -3.0199177265167236, - "logits/rejected": -0.4018506109714508, - "logps/chosen": -554.1763916015625, - "logps/rejected": -2007.7357177734375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4538818299770355, - "rewards/margins": 43.06562423706055, - "rewards/rejected": -42.61174392700195, - "step": 1386 - }, - { - "epoch": 0.52, - "learning_rate": 9.564964163107125e-06, - "logits/chosen": -0.07465499639511108, - "logits/rejected": -1.5485951900482178, - "logps/chosen": -436.7066650390625, - "logps/rejected": -784.00146484375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.15375366806983948, - "rewards/margins": 34.28337860107422, - "rewards/rejected": -34.12962341308594, - "step": 1387 - }, - { - "epoch": 0.52, - "learning_rate": 9.553053209786725e-06, - "logits/chosen": 1.0401384830474854, - "logits/rejected": -0.09289945662021637, - "logps/chosen": -431.6871032714844, - "logps/rejected": -762.8082275390625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4924042224884033, - "rewards/margins": 26.23330307006836, - "rewards/rejected": -27.7257080078125, - "step": 1388 - }, - { - "epoch": 0.53, - "learning_rate": 9.541142891788887e-06, - "logits/chosen": -0.9711090922355652, - "logits/rejected": -0.8682693839073181, - "logps/chosen": -274.8927307128906, - "logps/rejected": -770.8189697265625, - "loss": 0.004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2926971912384033, - "rewards/margins": 30.244760513305664, - "rewards/rejected": -31.537458419799805, - "step": 1389 - }, - { - "epoch": 0.53, - "learning_rate": 9.529233226043799e-06, - "logits/chosen": -2.7582361698150635, - "logits/rejected": -2.48246431350708, - "logps/chosen": -134.805419921875, - "logps/rejected": -246.8944091796875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.085012912750244, - "rewards/margins": 11.52964973449707, - "rewards/rejected": -13.614662170410156, - "step": 1390 - }, - { - "epoch": 0.53, - "learning_rate": 9.517324229480724e-06, - "logits/chosen": -4.977705955505371, - "logits/rejected": -1.990543246269226, - "logps/chosen": -476.51531982421875, - "logps/rejected": -1342.013671875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.013458251953125, - "rewards/margins": 24.967792510986328, - "rewards/rejected": -24.954334259033203, - "step": 1391 - }, - { - "epoch": 0.53, - "learning_rate": 9.505415919027971e-06, - "logits/chosen": -4.251014232635498, - "logits/rejected": -2.6103055477142334, - "logps/chosen": -430.73748779296875, - "logps/rejected": -795.9845581054688, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0318543910980225, - "rewards/margins": 13.282953262329102, - "rewards/rejected": -16.314807891845703, - "step": 1392 - }, - { - "epoch": 0.53, - "learning_rate": 9.493508311612874e-06, - "logits/chosen": -2.7825961112976074, - "logits/rejected": -3.627495527267456, - "logps/chosen": -244.91380310058594, - "logps/rejected": -505.6843566894531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7845901250839233, - "rewards/margins": 22.36107063293457, - "rewards/rejected": -20.576480865478516, - "step": 1393 - }, - { - "epoch": 0.53, - "learning_rate": 9.481601424161772e-06, - "logits/chosen": 0.2608652710914612, - "logits/rejected": -2.080345869064331, - "logps/chosen": -442.85888671875, - "logps/rejected": -561.5452880859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8247559070587158, - "rewards/margins": 20.972448348999023, - "rewards/rejected": -22.797204971313477, - "step": 1394 - }, - { - "epoch": 0.53, - "learning_rate": 9.469695273599982e-06, - "logits/chosen": -0.41496092081069946, - "logits/rejected": -2.9532675743103027, - "logps/chosen": -235.93861389160156, - "logps/rejected": -267.1343994140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.6360886096954346, - "rewards/margins": 16.394227981567383, - "rewards/rejected": -19.030317306518555, - "step": 1395 - }, - { - "epoch": 0.53, - "learning_rate": 9.457789876851759e-06, - "logits/chosen": -0.2511911988258362, - "logits/rejected": -0.267696350812912, - "logps/chosen": -560.3782348632812, - "logps/rejected": -1146.57666015625, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4072814881801605, - "rewards/margins": 41.922855377197266, - "rewards/rejected": -41.51557540893555, - "step": 1396 - }, - { - "epoch": 0.53, - "learning_rate": 9.445885250840301e-06, - "logits/chosen": -1.0343188047409058, - "logits/rejected": -1.6966255903244019, - "logps/chosen": -466.5814514160156, - "logps/rejected": -394.16680908203125, - "loss": 0.0064, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.845062255859375, - "rewards/margins": 2.94451904296875, - "rewards/rejected": -5.789581298828125, - "step": 1397 - }, - { - "epoch": 0.53, - "learning_rate": 9.433981412487711e-06, - "logits/chosen": -0.5486343502998352, - "logits/rejected": -1.1807539463043213, - "logps/chosen": -532.8990478515625, - "logps/rejected": -646.3995971679688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6432923078536987, - "rewards/margins": 10.467676162719727, - "rewards/rejected": -12.110968589782715, - "step": 1398 - }, - { - "epoch": 0.53, - "learning_rate": 9.422078378714962e-06, - "logits/chosen": -1.5763174295425415, - "logits/rejected": -3.105213165283203, - "logps/chosen": -287.15374755859375, - "logps/rejected": -450.3163757324219, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4748291969299316, - "rewards/margins": 27.945541381835938, - "rewards/rejected": -31.42037010192871, - "step": 1399 - }, - { - "epoch": 0.53, - "learning_rate": 9.410176166441892e-06, - "logits/chosen": -3.689234495162964, - "logits/rejected": -1.1961305141448975, - "logps/chosen": -181.38763427734375, - "logps/rejected": -715.548583984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7172424793243408, - "rewards/margins": 26.585506439208984, - "rewards/rejected": -28.302749633789062, - "step": 1400 - }, - { - "epoch": 0.53, - "learning_rate": 9.398274792587169e-06, - "logits/chosen": -0.5021085143089294, - "logits/rejected": -3.009723663330078, - "logps/chosen": -239.87503051757812, - "logps/rejected": -131.58424377441406, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2696624994277954, - "rewards/margins": 6.789994716644287, - "rewards/rejected": -8.059657096862793, - "step": 1401 - }, - { - "epoch": 0.53, - "learning_rate": 9.386374274068263e-06, - "logits/chosen": -3.0856668949127197, - "logits/rejected": -0.2268216907978058, - "logps/chosen": -460.6933898925781, - "logps/rejected": -1463.77783203125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5265533924102783, - "rewards/margins": 49.82588195800781, - "rewards/rejected": -52.35243606567383, - "step": 1402 - }, - { - "epoch": 0.53, - "learning_rate": 9.374474627801439e-06, - "logits/chosen": -7.347964286804199, - "logits/rejected": 0.40778404474258423, - "logps/chosen": -329.0279846191406, - "logps/rejected": -2868.7607421875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.724356174468994, - "rewards/margins": 17.349594116210938, - "rewards/rejected": -20.073949813842773, - "step": 1403 - }, - { - "epoch": 0.53, - "learning_rate": 9.362575870701715e-06, - "logits/chosen": -0.3084821403026581, - "logits/rejected": -4.83336877822876, - "logps/chosen": -431.54498291015625, - "logps/rejected": -192.51889038085938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3565338850021362, - "rewards/margins": 9.360424041748047, - "rewards/rejected": -8.003890037536621, - "step": 1404 - }, - { - "epoch": 0.53, - "learning_rate": 9.350678019682847e-06, - "logits/chosen": -7.309391021728516, - "logits/rejected": -2.1236283779144287, - "logps/chosen": -239.61904907226562, - "logps/rejected": -1460.810302734375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8235397338867188, - "rewards/margins": 24.391536712646484, - "rewards/rejected": -26.215076446533203, - "step": 1405 - }, - { - "epoch": 0.53, - "learning_rate": 9.338781091657297e-06, - "logits/chosen": -2.487541437149048, - "logits/rejected": -2.689911365509033, - "logps/chosen": -131.1120147705078, - "logps/rejected": -208.56838989257812, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6293998956680298, - "rewards/margins": 6.318467140197754, - "rewards/rejected": -7.947866916656494, - "step": 1406 - }, - { - "epoch": 0.53, - "learning_rate": 9.32688510353623e-06, - "logits/chosen": -1.1951215267181396, - "logits/rejected": -2.1052379608154297, - "logps/chosen": -373.6872863769531, - "logps/rejected": -604.4244384765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0253357887268066, - "rewards/margins": 24.140710830688477, - "rewards/rejected": -22.115375518798828, - "step": 1407 - }, - { - "epoch": 0.53, - "learning_rate": 9.314990072229461e-06, - "logits/chosen": -1.4606214761734009, - "logits/rejected": -4.711808681488037, - "logps/chosen": -544.0733642578125, - "logps/rejected": -336.9178771972656, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7322876453399658, - "rewards/margins": 16.063993453979492, - "rewards/rejected": -17.796281814575195, - "step": 1408 - }, - { - "epoch": 0.53, - "learning_rate": 9.30309601464545e-06, - "logits/chosen": -5.123553276062012, - "logits/rejected": -1.092147946357727, - "logps/chosen": -358.4889831542969, - "logps/rejected": -2294.5908203125, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.216339111328125, - "rewards/margins": 40.20343780517578, - "rewards/rejected": -42.419776916503906, - "step": 1409 - }, - { - "epoch": 0.53, - "learning_rate": 9.291202947691272e-06, - "logits/chosen": -2.2372405529022217, - "logits/rejected": -1.4573708772659302, - "logps/chosen": -394.534912109375, - "logps/rejected": -901.4514770507812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.206610083580017, - "rewards/margins": 20.293041229248047, - "rewards/rejected": -19.0864315032959, - "step": 1410 - }, - { - "epoch": 0.53, - "learning_rate": 9.279310888272596e-06, - "logits/chosen": -2.526177406311035, - "logits/rejected": 1.3092862367630005, - "logps/chosen": -501.7373046875, - "logps/rejected": -1448.5135498046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.663604736328125, - "rewards/margins": 16.200347900390625, - "rewards/rejected": -19.86395263671875, - "step": 1411 - }, - { - "epoch": 0.53, - "learning_rate": 9.267419853293656e-06, - "logits/chosen": -1.7808188199996948, - "logits/rejected": -3.9036924839019775, - "logps/chosen": -305.8198547363281, - "logps/rejected": -217.47348022460938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.432672142982483, - "rewards/margins": 8.791152000427246, - "rewards/rejected": -10.223824501037598, - "step": 1412 - }, - { - "epoch": 0.53, - "learning_rate": 9.25552985965723e-06, - "logits/chosen": -1.1971449851989746, - "logits/rejected": -0.3426433503627777, - "logps/chosen": -541.4661865234375, - "logps/rejected": -903.4903564453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.334545850753784, - "rewards/margins": 21.792158126831055, - "rewards/rejected": -24.1267032623291, - "step": 1413 - }, - { - "epoch": 0.53, - "learning_rate": 9.243640924264619e-06, - "logits/chosen": -2.495596408843994, - "logits/rejected": -1.3633592128753662, - "logps/chosen": -797.9093017578125, - "logps/rejected": -1544.9742431640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.7390382289886475, - "rewards/margins": 36.2558479309082, - "rewards/rejected": -33.51681137084961, - "step": 1414 - }, - { - "epoch": 0.53, - "learning_rate": 9.231753064015611e-06, - "logits/chosen": -8.065765380859375, - "logits/rejected": -2.366013526916504, - "logps/chosen": -408.583984375, - "logps/rejected": -2617.900390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.306249976158142, - "rewards/margins": 34.18447494506836, - "rewards/rejected": -32.87822341918945, - "step": 1415 - }, - { - "epoch": 0.54, - "learning_rate": 9.219866295808481e-06, - "logits/chosen": -7.963124752044678, - "logits/rejected": -0.9629100561141968, - "logps/chosen": -349.0181579589844, - "logps/rejected": -2085.352783203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5055510997772217, - "rewards/margins": 20.637638092041016, - "rewards/rejected": -23.1431884765625, - "step": 1416 - }, - { - "epoch": 0.54, - "learning_rate": 9.207980636539943e-06, - "logits/chosen": -2.4579551219940186, - "logits/rejected": -1.525899052619934, - "logps/chosen": -353.9599609375, - "logps/rejected": -863.6617431640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.09361877292394638, - "rewards/margins": 18.429868698120117, - "rewards/rejected": -18.523487091064453, - "step": 1417 - }, - { - "epoch": 0.54, - "learning_rate": 9.196096103105127e-06, - "logits/chosen": -4.975925922393799, - "logits/rejected": -1.4829330444335938, - "logps/chosen": -526.4923095703125, - "logps/rejected": -1257.0048828125, - "loss": 0.003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7104248404502869, - "rewards/margins": 10.555737495422363, - "rewards/rejected": -11.266161918640137, - "step": 1418 - }, - { - "epoch": 0.54, - "learning_rate": 9.184212712397574e-06, - "logits/chosen": -7.381072044372559, - "logits/rejected": -2.3866305351257324, - "logps/chosen": -319.5765380859375, - "logps/rejected": -2020.4019775390625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0997406244277954, - "rewards/margins": 48.891475677490234, - "rewards/rejected": -47.7917366027832, - "step": 1419 - }, - { - "epoch": 0.54, - "learning_rate": 9.172330481309195e-06, - "logits/chosen": -4.319873332977295, - "logits/rejected": -1.7845252752304077, - "logps/chosen": -336.2357177734375, - "logps/rejected": -990.9310302734375, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.572778344154358, - "rewards/margins": 25.224830627441406, - "rewards/rejected": -23.65205192565918, - "step": 1420 - }, - { - "epoch": 0.54, - "learning_rate": 9.160449426730261e-06, - "logits/chosen": -3.056992292404175, - "logits/rejected": -3.716557264328003, - "logps/chosen": -155.243408203125, - "logps/rejected": -514.6322631835938, - "loss": 0.0025, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3017090559005737, - "rewards/margins": 26.6278018951416, - "rewards/rejected": -27.92951011657715, - "step": 1421 - }, - { - "epoch": 0.54, - "learning_rate": 9.148569565549355e-06, - "logits/chosen": -4.089067459106445, - "logits/rejected": -0.9669445753097534, - "logps/chosen": -400.6002197265625, - "logps/rejected": -1347.7685546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0969818830490112, - "rewards/margins": 25.911100387573242, - "rewards/rejected": -27.008081436157227, - "step": 1422 - }, - { - "epoch": 0.54, - "learning_rate": 9.136690914653377e-06, - "logits/chosen": -1.4314879179000854, - "logits/rejected": -0.4614073634147644, - "logps/chosen": -272.7966003417969, - "logps/rejected": -582.378173828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7938263416290283, - "rewards/margins": 16.308320999145508, - "rewards/rejected": -18.102148056030273, - "step": 1423 - }, - { - "epoch": 0.54, - "learning_rate": 9.1248134909275e-06, - "logits/chosen": -6.5750579833984375, - "logits/rejected": -1.2756199836730957, - "logps/chosen": -523.4326782226562, - "logps/rejected": -1971.968994140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.645410180091858, - "rewards/margins": 26.476966857910156, - "rewards/rejected": -24.83155632019043, - "step": 1424 - }, - { - "epoch": 0.54, - "learning_rate": 9.112937311255158e-06, - "logits/chosen": -2.6717445850372314, - "logits/rejected": -0.5263413190841675, - "logps/chosen": -509.0849609375, - "logps/rejected": -1262.42578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0912872552871704, - "rewards/margins": 33.986541748046875, - "rewards/rejected": -35.07782745361328, - "step": 1425 - }, - { - "epoch": 0.54, - "learning_rate": 9.101062392518015e-06, - "logits/chosen": -7.776784896850586, - "logits/rejected": -1.851035237312317, - "logps/chosen": -231.8957977294922, - "logps/rejected": -4962.7529296875, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.8988234996795654, - "rewards/margins": 42.26372528076172, - "rewards/rejected": -46.16254806518555, - "step": 1426 - }, - { - "epoch": 0.54, - "learning_rate": 9.089188751595937e-06, - "logits/chosen": -1.806230068206787, - "logits/rejected": -0.6190628409385681, - "logps/chosen": -520.0235595703125, - "logps/rejected": -1334.640380859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0548218488693237, - "rewards/margins": 42.057029724121094, - "rewards/rejected": -41.0022087097168, - "step": 1427 - }, - { - "epoch": 0.54, - "learning_rate": 9.07731640536698e-06, - "logits/chosen": -1.7742931842803955, - "logits/rejected": -1.1294910907745361, - "logps/chosen": -278.8248596191406, - "logps/rejected": -716.0293579101562, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2073943614959717, - "rewards/margins": 23.659025192260742, - "rewards/rejected": -25.866418838500977, - "step": 1428 - }, - { - "epoch": 0.54, - "learning_rate": 9.065445370707359e-06, - "logits/chosen": -6.5483903884887695, - "logits/rejected": -1.234164834022522, - "logps/chosen": -464.7692565917969, - "logps/rejected": -1473.656982421875, - "loss": 0.0085, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9220611453056335, - "rewards/margins": 9.19360065460205, - "rewards/rejected": -10.11566162109375, - "step": 1429 - }, - { - "epoch": 0.54, - "learning_rate": 9.053575664491417e-06, - "logits/chosen": 0.2027631402015686, - "logits/rejected": -3.6500024795532227, - "logps/chosen": -349.93890380859375, - "logps/rejected": -190.37686157226562, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.117169141769409, - "rewards/margins": 9.195055961608887, - "rewards/rejected": -12.312225341796875, - "step": 1430 - }, - { - "epoch": 0.54, - "learning_rate": 9.04170730359162e-06, - "logits/chosen": -4.281748294830322, - "logits/rejected": -3.3236238956451416, - "logps/chosen": -318.0118408203125, - "logps/rejected": -771.924560546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.35390931367874146, - "rewards/margins": 18.75138282775879, - "rewards/rejected": -19.10529136657715, - "step": 1431 - }, - { - "epoch": 0.54, - "learning_rate": 9.029840304878517e-06, - "logits/chosen": -1.352589726448059, - "logits/rejected": -1.5973891019821167, - "logps/chosen": -410.37225341796875, - "logps/rejected": -744.2418212890625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.20003080368042, - "rewards/margins": 24.469263076782227, - "rewards/rejected": -29.669294357299805, - "step": 1432 - }, - { - "epoch": 0.54, - "learning_rate": 9.017974685220716e-06, - "logits/chosen": -4.586985111236572, - "logits/rejected": -2.0893301963806152, - "logps/chosen": -437.53582763671875, - "logps/rejected": -1408.9150390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.983831763267517, - "rewards/margins": 23.511444091796875, - "rewards/rejected": -21.527612686157227, - "step": 1433 - }, - { - "epoch": 0.54, - "learning_rate": 9.006110461484869e-06, - "logits/chosen": -2.019224166870117, - "logits/rejected": -2.041138172149658, - "logps/chosen": -368.6097412109375, - "logps/rejected": -812.112060546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4629760682582855, - "rewards/margins": 30.38376235961914, - "rewards/rejected": -30.846738815307617, - "step": 1434 - }, - { - "epoch": 0.54, - "learning_rate": 8.994247650535645e-06, - "logits/chosen": -2.8501462936401367, - "logits/rejected": -5.496240139007568, - "logps/chosen": -270.507568359375, - "logps/rejected": -303.9619140625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.742260754108429, - "rewards/margins": 17.865352630615234, - "rewards/rejected": -17.123092651367188, - "step": 1435 - }, - { - "epoch": 0.54, - "learning_rate": 8.982386269235706e-06, - "logits/chosen": -2.200164556503296, - "logits/rejected": -5.130789279937744, - "logps/chosen": -404.0867919921875, - "logps/rejected": -374.9920349121094, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7653564810752869, - "rewards/margins": 25.044523239135742, - "rewards/rejected": -24.27916717529297, - "step": 1436 - }, - { - "epoch": 0.54, - "learning_rate": 8.970526334445675e-06, - "logits/chosen": -3.228536367416382, - "logits/rejected": -4.815658092498779, - "logps/chosen": -457.76715087890625, - "logps/rejected": -130.16851806640625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6619293093681335, - "rewards/margins": 8.414311408996582, - "rewards/rejected": -9.076240539550781, - "step": 1437 - }, - { - "epoch": 0.54, - "learning_rate": 8.958667863024127e-06, - "logits/chosen": 0.7737511396408081, - "logits/rejected": -3.108632802963257, - "logps/chosen": -328.87860107421875, - "logps/rejected": -399.0677490234375, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4584289491176605, - "rewards/margins": 24.89116859436035, - "rewards/rejected": -24.4327392578125, - "step": 1438 - }, - { - "epoch": 0.54, - "learning_rate": 8.946810871827548e-06, - "logits/chosen": -4.388679027557373, - "logits/rejected": -1.0018852949142456, - "logps/chosen": -219.32493591308594, - "logps/rejected": -653.052978515625, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.605976939201355, - "rewards/margins": 17.056283950805664, - "rewards/rejected": -18.662260055541992, - "step": 1439 - }, - { - "epoch": 0.54, - "learning_rate": 8.934955377710326e-06, - "logits/chosen": -2.4487311840057373, - "logits/rejected": -2.2031192779541016, - "logps/chosen": -160.94577026367188, - "logps/rejected": -394.8059997558594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.314836025238037, - "rewards/margins": 16.25950813293457, - "rewards/rejected": -20.574344635009766, - "step": 1440 - }, - { - "epoch": 0.54, - "learning_rate": 8.923101397524721e-06, - "logits/chosen": -5.275384426116943, - "logits/rejected": -0.8135845065116882, - "logps/chosen": -219.04335021972656, - "logps/rejected": -1237.3818359375, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8394729495048523, - "rewards/margins": 42.10513687133789, - "rewards/rejected": -42.944610595703125, - "step": 1441 - }, - { - "epoch": 0.55, - "learning_rate": 8.91124894812084e-06, - "logits/chosen": -7.265462398529053, - "logits/rejected": -8.100560188293457, - "logps/chosen": -106.83404541015625, - "logps/rejected": -276.36767578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5886589288711548, - "rewards/margins": 18.3309268951416, - "rewards/rejected": -16.742267608642578, - "step": 1442 - }, - { - "epoch": 0.55, - "learning_rate": 8.899398046346608e-06, - "logits/chosen": -1.4698399305343628, - "logits/rejected": -1.8556890487670898, - "logps/chosen": -335.93841552734375, - "logps/rejected": -671.5421142578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10394897311925888, - "rewards/margins": 24.52573013305664, - "rewards/rejected": -24.62967872619629, - "step": 1443 - }, - { - "epoch": 0.55, - "learning_rate": 8.887548709047765e-06, - "logits/chosen": -4.418691158294678, - "logits/rejected": -2.1169135570526123, - "logps/chosen": -294.69415283203125, - "logps/rejected": -650.6444702148438, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6647247672080994, - "rewards/margins": 15.266427993774414, - "rewards/rejected": -15.93115234375, - "step": 1444 - }, - { - "epoch": 0.55, - "learning_rate": 8.87570095306781e-06, - "logits/chosen": -4.243619441986084, - "logits/rejected": -4.007175445556641, - "logps/chosen": -295.5057373046875, - "logps/rejected": -333.1319274902344, - "loss": 0.1082, - "rewards/accuracies": 0.0, - "rewards/chosen": -3.345410108566284, - "rewards/margins": -0.15515422821044922, - "rewards/rejected": -3.190255880355835, - "step": 1445 - }, - { - "epoch": 0.55, - "learning_rate": 8.863854795248007e-06, - "logits/chosen": -1.0931059122085571, - "logits/rejected": -1.3343820571899414, - "logps/chosen": -218.241943359375, - "logps/rejected": -359.4576416015625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7806214094161987, - "rewards/margins": 10.589747428894043, - "rewards/rejected": -12.370368957519531, - "step": 1446 - }, - { - "epoch": 0.55, - "learning_rate": 8.85201025242734e-06, - "logits/chosen": -0.49189096689224243, - "logits/rejected": -5.222900867462158, - "logps/chosen": -595.024658203125, - "logps/rejected": -303.56622314453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4838074445724487, - "rewards/margins": 18.970590591430664, - "rewards/rejected": -17.486783981323242, - "step": 1447 - }, - { - "epoch": 0.55, - "learning_rate": 8.840167341442505e-06, - "logits/chosen": 0.7609790563583374, - "logits/rejected": -3.6377816200256348, - "logps/chosen": -667.794921875, - "logps/rejected": -641.3125610351562, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9669435024261475, - "rewards/margins": 28.872163772583008, - "rewards/rejected": -32.839107513427734, - "step": 1448 - }, - { - "epoch": 0.55, - "learning_rate": 8.828326079127867e-06, - "logits/chosen": -1.6974968910217285, - "logits/rejected": -4.730013847351074, - "logps/chosen": -260.21954345703125, - "logps/rejected": -119.05999755859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.742169201374054, - "rewards/margins": 7.2451887130737305, - "rewards/rejected": -7.987358093261719, - "step": 1449 - }, - { - "epoch": 0.55, - "learning_rate": 8.816486482315459e-06, - "logits/chosen": -6.729095935821533, - "logits/rejected": -3.4218218326568604, - "logps/chosen": -406.1853332519531, - "logps/rejected": -1942.859130859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6136718988418579, - "rewards/margins": 25.810083389282227, - "rewards/rejected": -26.423755645751953, - "step": 1450 - }, - { - "epoch": 0.55, - "learning_rate": 8.804648567834943e-06, - "logits/chosen": 0.23474176228046417, - "logits/rejected": -4.250158309936523, - "logps/chosen": -529.8001098632812, - "logps/rejected": -477.4034423828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7050110101699829, - "rewards/margins": 28.07280921936035, - "rewards/rejected": -28.777820587158203, - "step": 1451 - }, - { - "epoch": 0.55, - "learning_rate": 8.792812352513584e-06, - "logits/chosen": -6.35849666595459, - "logits/rejected": -2.0309624671936035, - "logps/chosen": -544.7647094726562, - "logps/rejected": -3482.60888671875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.33416748046875, - "rewards/margins": 36.3183708190918, - "rewards/rejected": -36.65253829956055, - "step": 1452 - }, - { - "epoch": 0.55, - "learning_rate": 8.78097785317624e-06, - "logits/chosen": -6.1935601234436035, - "logits/rejected": -1.400423288345337, - "logps/chosen": -389.9393310546875, - "logps/rejected": -2022.272705078125, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.174694776535034, - "rewards/margins": 27.170351028442383, - "rewards/rejected": -30.34504508972168, - "step": 1453 - }, - { - "epoch": 0.55, - "learning_rate": 8.769145086645328e-06, - "logits/chosen": 0.2110893577337265, - "logits/rejected": -3.176065444946289, - "logps/chosen": -403.8567810058594, - "logps/rejected": -391.5015869140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.9715301990509033, - "rewards/margins": 19.887676239013672, - "rewards/rejected": -22.859207153320312, - "step": 1454 - }, - { - "epoch": 0.55, - "learning_rate": 8.757314069740795e-06, - "logits/chosen": -0.98102867603302, - "logits/rejected": -4.890040397644043, - "logps/chosen": -273.9019775390625, - "logps/rejected": -175.9797821044922, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9734588861465454, - "rewards/margins": 10.018327713012695, - "rewards/rejected": -11.99178695678711, - "step": 1455 - }, - { - "epoch": 0.55, - "learning_rate": 8.745484819280108e-06, - "logits/chosen": -1.7585315704345703, - "logits/rejected": -3.6883246898651123, - "logps/chosen": -210.39697265625, - "logps/rejected": -502.3150939941406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5476531982421875, - "rewards/margins": 27.662540435791016, - "rewards/rejected": -27.114887237548828, - "step": 1456 - }, - { - "epoch": 0.55, - "learning_rate": 8.733657352078223e-06, - "logits/chosen": -0.969414234161377, - "logits/rejected": -0.8036404848098755, - "logps/chosen": -335.37371826171875, - "logps/rejected": -682.9301147460938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.45355224609375, - "rewards/margins": 25.175186157226562, - "rewards/rejected": -24.721633911132812, - "step": 1457 - }, - { - "epoch": 0.55, - "learning_rate": 8.721831684947557e-06, - "logits/chosen": -5.022331237792969, - "logits/rejected": -2.1986992359161377, - "logps/chosen": -428.5692138671875, - "logps/rejected": -1224.442626953125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.24091187119483948, - "rewards/margins": 18.868452072143555, - "rewards/rejected": -19.109363555908203, - "step": 1458 - }, - { - "epoch": 0.55, - "learning_rate": 8.71000783469797e-06, - "logits/chosen": -1.4495658874511719, - "logits/rejected": -1.8876972198486328, - "logps/chosen": -236.88558959960938, - "logps/rejected": -312.25067138671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2418365478515625, - "rewards/margins": 9.326666831970215, - "rewards/rejected": -11.568503379821777, - "step": 1459 - }, - { - "epoch": 0.55, - "learning_rate": 8.69818581813674e-06, - "logits/chosen": 0.3250615894794464, - "logits/rejected": -2.2597873210906982, - "logps/chosen": -287.3686218261719, - "logps/rejected": -729.3392333984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.126500129699707, - "rewards/margins": 38.36231994628906, - "rewards/rejected": -43.48881912231445, - "step": 1460 - }, - { - "epoch": 0.55, - "learning_rate": 8.686365652068536e-06, - "logits/chosen": -1.1039046049118042, - "logits/rejected": -1.1039046049118042, - "logps/chosen": 0.0, - "logps/rejected": 0.0, - "loss": 0.0868, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 1461 - }, - { - "epoch": 0.55, - "learning_rate": 8.674547353295401e-06, - "logits/chosen": -0.9060045480728149, - "logits/rejected": -1.458185076713562, - "logps/chosen": -304.1993408203125, - "logps/rejected": -485.9407653808594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7079193592071533, - "rewards/margins": 15.88144588470459, - "rewards/rejected": -17.589365005493164, - "step": 1462 - }, - { - "epoch": 0.55, - "learning_rate": 8.662730938616724e-06, - "logits/chosen": -0.6602842211723328, - "logits/rejected": -2.1621041297912598, - "logps/chosen": -301.13397216796875, - "logps/rejected": -520.3472900390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6853821277618408, - "rewards/margins": 20.033788681030273, - "rewards/rejected": -21.71917152404785, - "step": 1463 - }, - { - "epoch": 0.55, - "learning_rate": 8.650916424829213e-06, - "logits/chosen": -5.772907733917236, - "logits/rejected": -1.5233550071716309, - "logps/chosen": -277.58831787109375, - "logps/rejected": -1448.4755859375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.512597680091858, - "rewards/margins": 20.384252548217773, - "rewards/rejected": -21.8968505859375, - "step": 1464 - }, - { - "epoch": 0.55, - "learning_rate": 8.63910382872687e-06, - "logits/chosen": -3.155045747756958, - "logits/rejected": -2.419130563735962, - "logps/chosen": -337.3587646484375, - "logps/rejected": -546.085205078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9741851687431335, - "rewards/margins": 15.641093254089355, - "rewards/rejected": -16.615278244018555, - "step": 1465 - }, - { - "epoch": 0.55, - "learning_rate": 8.627293167100975e-06, - "logits/chosen": -5.06260871887207, - "logits/rejected": -0.9777190685272217, - "logps/chosen": -256.7058410644531, - "logps/rejected": -1097.00146484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1155669689178467, - "rewards/margins": 30.558238983154297, - "rewards/rejected": -32.673805236816406, - "step": 1466 - }, - { - "epoch": 0.55, - "learning_rate": 8.615484456740062e-06, - "logits/chosen": -0.4878593683242798, - "logits/rejected": -1.4262142181396484, - "logps/chosen": -199.3361053466797, - "logps/rejected": -400.1070556640625, - "loss": 0.0102, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0250566005706787, - "rewards/margins": 17.23503303527832, - "rewards/rejected": -20.260089874267578, - "step": 1467 - }, - { - "epoch": 0.56, - "learning_rate": 8.603677714429888e-06, - "logits/chosen": -1.4639865159988403, - "logits/rejected": -3.1315414905548096, - "logps/chosen": -486.79638671875, - "logps/rejected": -411.34039306640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.587432861328125, - "rewards/margins": 17.026350021362305, - "rewards/rejected": -19.61378288269043, - "step": 1468 - }, - { - "epoch": 0.56, - "learning_rate": 8.591872956953409e-06, - "logits/chosen": -5.1880669593811035, - "logits/rejected": -2.0571951866149902, - "logps/chosen": -459.00341796875, - "logps/rejected": -1659.6336669921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.04392409324646, - "rewards/margins": 24.97603416442871, - "rewards/rejected": -28.01995849609375, - "step": 1469 - }, - { - "epoch": 0.56, - "learning_rate": 8.580070201090759e-06, - "logits/chosen": -3.008727550506592, - "logits/rejected": -4.106616973876953, - "logps/chosen": -683.7614135742188, - "logps/rejected": -492.7938537597656, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.932830810546875, - "rewards/margins": 25.596769332885742, - "rewards/rejected": -24.663938522338867, - "step": 1470 - }, - { - "epoch": 0.56, - "learning_rate": 8.568269463619243e-06, - "logits/chosen": -5.653758525848389, - "logits/rejected": -1.2169750928878784, - "logps/chosen": -339.90277099609375, - "logps/rejected": -1562.241943359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.23734131455421448, - "rewards/margins": 38.9819221496582, - "rewards/rejected": -39.21926498413086, - "step": 1471 - }, - { - "epoch": 0.56, - "learning_rate": 8.556470761313275e-06, - "logits/chosen": -0.40801066160202026, - "logits/rejected": -5.328763008117676, - "logps/chosen": -205.79042053222656, - "logps/rejected": -191.63949584960938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.013832092285156, - "rewards/margins": 10.220173835754395, - "rewards/rejected": -14.23400592803955, - "step": 1472 - }, - { - "epoch": 0.56, - "learning_rate": 8.54467411094439e-06, - "logits/chosen": -1.6956275701522827, - "logits/rejected": -4.267882347106934, - "logps/chosen": -729.9274291992188, - "logps/rejected": -845.6298828125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.512725830078125, - "rewards/margins": 30.94257926940918, - "rewards/rejected": -31.455305099487305, - "step": 1473 - }, - { - "epoch": 0.56, - "learning_rate": 8.532879529281199e-06, - "logits/chosen": -1.8710488080978394, - "logits/rejected": -1.1440980434417725, - "logps/chosen": -335.3839111328125, - "logps/rejected": -729.1693115234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -7.012060642242432, - "rewards/margins": 20.213336944580078, - "rewards/rejected": -27.22539710998535, - "step": 1474 - }, - { - "epoch": 0.56, - "learning_rate": 8.521087033089374e-06, - "logits/chosen": -3.1621451377868652, - "logits/rejected": -3.3095428943634033, - "logps/chosen": -470.3033752441406, - "logps/rejected": -797.3219604492188, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.930920362472534, - "rewards/margins": 16.472980499267578, - "rewards/rejected": -19.403900146484375, - "step": 1475 - }, - { - "epoch": 0.56, - "learning_rate": 8.509296639131628e-06, - "logits/chosen": -0.46047285199165344, - "logits/rejected": -3.8321115970611572, - "logps/chosen": -557.8777465820312, - "logps/rejected": -419.8601989746094, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.557281494140625, - "rewards/margins": 20.361358642578125, - "rewards/rejected": -17.8040771484375, - "step": 1476 - }, - { - "epoch": 0.56, - "learning_rate": 8.497508364167678e-06, - "logits/chosen": -1.6782448291778564, - "logits/rejected": -4.695511817932129, - "logps/chosen": -223.58404541015625, - "logps/rejected": -251.5269317626953, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3990159034729004, - "rewards/margins": 16.110864639282227, - "rewards/rejected": -18.50988006591797, - "step": 1477 - }, - { - "epoch": 0.56, - "learning_rate": 8.485722224954237e-06, - "logits/chosen": -3.528632879257202, - "logits/rejected": -1.5426387786865234, - "logps/chosen": -508.7057189941406, - "logps/rejected": -819.1124267578125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.8804383277893066, - "rewards/margins": 5.11268949508667, - "rewards/rejected": -8.993127822875977, - "step": 1478 - }, - { - "epoch": 0.56, - "learning_rate": 8.473938238244972e-06, - "logits/chosen": -2.3307816982269287, - "logits/rejected": -3.4964494705200195, - "logps/chosen": -249.86837768554688, - "logps/rejected": -623.6058959960938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6737183332443237, - "rewards/margins": 32.244693756103516, - "rewards/rejected": -30.570974349975586, - "step": 1479 - }, - { - "epoch": 0.56, - "learning_rate": 8.462156420790493e-06, - "logits/chosen": -2.6508371829986572, - "logits/rejected": -4.192511558532715, - "logps/chosen": -237.14065551757812, - "logps/rejected": -151.54379272460938, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4736099243164062, - "rewards/margins": 6.852564811706543, - "rewards/rejected": -9.32617473602295, - "step": 1480 - }, - { - "epoch": 0.56, - "learning_rate": 8.45037678933834e-06, - "logits/chosen": -5.880062580108643, - "logits/rejected": -1.149057388305664, - "logps/chosen": -252.50558471679688, - "logps/rejected": -2039.545654296875, - "loss": 0.0884, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5618149042129517, - "rewards/margins": 27.14875602722168, - "rewards/rejected": -28.7105712890625, - "step": 1481 - }, - { - "epoch": 0.56, - "learning_rate": 8.438599360632927e-06, - "logits/chosen": -6.7383222579956055, - "logits/rejected": -1.2213771343231201, - "logps/chosen": -257.8107604980469, - "logps/rejected": -4028.93505859375, - "loss": 0.0136, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.486955404281616, - "rewards/margins": 42.1276969909668, - "rewards/rejected": -44.61465072631836, - "step": 1482 - }, - { - "epoch": 0.56, - "learning_rate": 8.426824151415548e-06, - "logits/chosen": -3.063838005065918, - "logits/rejected": -3.4095683097839355, - "logps/chosen": -240.94070434570312, - "logps/rejected": -400.8323669433594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9840729236602783, - "rewards/margins": 8.66339111328125, - "rewards/rejected": -12.64746379852295, - "step": 1483 - }, - { - "epoch": 0.56, - "learning_rate": 8.415051178424337e-06, - "logits/chosen": -0.33005186915397644, - "logits/rejected": -5.096457004547119, - "logps/chosen": -349.123046875, - "logps/rejected": -141.42849731445312, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.127871990203857, - "rewards/margins": 6.5944743156433105, - "rewards/rejected": -10.722346305847168, - "step": 1484 - }, - { - "epoch": 0.56, - "learning_rate": 8.403280458394255e-06, - "logits/chosen": -2.5906078815460205, - "logits/rejected": -2.23753023147583, - "logps/chosen": -453.9968566894531, - "logps/rejected": -815.1961059570312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02363281324505806, - "rewards/margins": 44.35409164428711, - "rewards/rejected": -44.377723693847656, - "step": 1485 - }, - { - "epoch": 0.56, - "learning_rate": 8.391512008057049e-06, - "logits/chosen": -3.4534084796905518, - "logits/rejected": -1.3773826360702515, - "logps/chosen": -592.8170166015625, - "logps/rejected": -1310.4326171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9323364496231079, - "rewards/margins": 25.504053115844727, - "rewards/rejected": -24.57171630859375, - "step": 1486 - }, - { - "epoch": 0.56, - "learning_rate": 8.379745844141253e-06, - "logits/chosen": -1.4802844524383545, - "logits/rejected": -5.268544673919678, - "logps/chosen": -1338.887451171875, - "logps/rejected": -554.9526977539062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -7.494604587554932, - "rewards/margins": 20.73023223876953, - "rewards/rejected": -28.224836349487305, - "step": 1487 - }, - { - "epoch": 0.56, - "learning_rate": 8.367981983372143e-06, - "logits/chosen": -1.6707241535186768, - "logits/rejected": -2.831881284713745, - "logps/chosen": -407.49127197265625, - "logps/rejected": -616.605712890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09112548828125, - "rewards/margins": 30.312625885009766, - "rewards/rejected": -30.221500396728516, - "step": 1488 - }, - { - "epoch": 0.56, - "learning_rate": 8.35622044247172e-06, - "logits/chosen": -0.9443669319152832, - "logits/rejected": -1.3126200437545776, - "logps/chosen": -276.55157470703125, - "logps/rejected": -420.4788818359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.521890163421631, - "rewards/margins": 13.771108627319336, - "rewards/rejected": -18.292999267578125, - "step": 1489 - }, - { - "epoch": 0.56, - "learning_rate": 8.3444612381587e-06, - "logits/chosen": -6.658431529998779, - "logits/rejected": -6.135228633880615, - "logps/chosen": -306.56451416015625, - "logps/rejected": -1602.375, - "loss": 0.0918, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.896350145339966, - "rewards/margins": 15.866198539733887, - "rewards/rejected": -18.762548446655273, - "step": 1490 - }, - { - "epoch": 0.56, - "learning_rate": 8.332704387148463e-06, - "logits/chosen": -1.894669771194458, - "logits/rejected": -1.161506175994873, - "logps/chosen": -316.50091552734375, - "logps/rejected": -615.2626953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1370025873184204, - "rewards/margins": 19.443201065063477, - "rewards/rejected": -18.306198120117188, - "step": 1491 - }, - { - "epoch": 0.56, - "learning_rate": 8.320949906153048e-06, - "logits/chosen": -2.370445728302002, - "logits/rejected": 0.13371531665325165, - "logps/chosen": -427.516357421875, - "logps/rejected": -1302.03662109375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.2269287109375, - "rewards/margins": 42.932613372802734, - "rewards/rejected": -48.159542083740234, - "step": 1492 - }, - { - "epoch": 0.56, - "learning_rate": 8.309197811881128e-06, - "logits/chosen": -4.0925612449646, - "logits/rejected": -0.6299101114273071, - "logps/chosen": -404.72833251953125, - "logps/rejected": -1116.7259521484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.800329566001892, - "rewards/margins": 21.589618682861328, - "rewards/rejected": -23.38994789123535, - "step": 1493 - }, - { - "epoch": 0.56, - "learning_rate": 8.297448121037978e-06, - "logits/chosen": -7.933444023132324, - "logits/rejected": -2.6082351207733154, - "logps/chosen": -291.51092529296875, - "logps/rejected": -1575.977294921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.68446683883667, - "rewards/margins": 16.6235408782959, - "rewards/rejected": -21.308008193969727, - "step": 1494 - }, - { - "epoch": 0.57, - "learning_rate": 8.285700850325467e-06, - "logits/chosen": -0.45987242460250854, - "logits/rejected": -3.7477798461914062, - "logps/chosen": -381.69061279296875, - "logps/rejected": -306.0849914550781, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.060589551925659, - "rewards/margins": 23.179948806762695, - "rewards/rejected": -21.119359970092773, - "step": 1495 - }, - { - "epoch": 0.57, - "learning_rate": 8.27395601644201e-06, - "logits/chosen": -1.4436314105987549, - "logits/rejected": -2.5167529582977295, - "logps/chosen": -247.020263671875, - "logps/rejected": -581.4817504882812, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5934784412384033, - "rewards/margins": 34.97777557373047, - "rewards/rejected": -36.57125473022461, - "step": 1496 - }, - { - "epoch": 0.57, - "learning_rate": 8.262213636082567e-06, - "logits/chosen": -5.2864789962768555, - "logits/rejected": -0.8954302668571472, - "logps/chosen": -328.6760559082031, - "logps/rejected": -1187.61767578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.121966600418091, - "rewards/margins": 18.578533172607422, - "rewards/rejected": -20.70050048828125, - "step": 1497 - }, - { - "epoch": 0.57, - "learning_rate": 8.250473725938608e-06, - "logits/chosen": -0.023029958829283714, - "logits/rejected": -4.389449119567871, - "logps/chosen": -542.4865112304688, - "logps/rejected": -548.865234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.011633276939392, - "rewards/margins": 30.89813232421875, - "rewards/rejected": -29.886499404907227, - "step": 1498 - }, - { - "epoch": 0.57, - "learning_rate": 8.238736302698096e-06, - "logits/chosen": -3.8174619674682617, - "logits/rejected": -1.7038331031799316, - "logps/chosen": -407.4464111328125, - "logps/rejected": -1141.4879150390625, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.21688233315944672, - "rewards/margins": 24.514934539794922, - "rewards/rejected": -24.7318172454834, - "step": 1499 - }, - { - "epoch": 0.57, - "learning_rate": 8.227001383045453e-06, - "logits/chosen": -2.0118937492370605, - "logits/rejected": -4.476647853851318, - "logps/chosen": -517.8910522460938, - "logps/rejected": -379.0518798828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3255554139614105, - "rewards/margins": 16.09726333618164, - "rewards/rejected": -16.422819137573242, - "step": 1500 - }, - { - "epoch": 0.57, - "learning_rate": 8.215268983661547e-06, - "logits/chosen": -3.3516018390655518, - "logits/rejected": -2.557251453399658, - "logps/chosen": -308.1563720703125, - "logps/rejected": -640.5975341796875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4383697509765625, - "rewards/margins": 21.369403839111328, - "rewards/rejected": -24.80777359008789, - "step": 1501 - }, - { - "epoch": 0.57, - "learning_rate": 8.203539121223653e-06, - "logits/chosen": -0.6763976812362671, - "logits/rejected": -2.38930606842041, - "logps/chosen": -239.28392028808594, - "logps/rejected": -451.5226745605469, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4041976928710938, - "rewards/margins": 24.4553279876709, - "rewards/rejected": -26.859525680541992, - "step": 1502 - }, - { - "epoch": 0.57, - "learning_rate": 8.191811812405453e-06, - "logits/chosen": -4.0055108070373535, - "logits/rejected": -0.515762448310852, - "logps/chosen": -901.7894287109375, - "logps/rejected": -1985.3251953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.337487816810608, - "rewards/margins": 25.543811798095703, - "rewards/rejected": -26.88129997253418, - "step": 1503 - }, - { - "epoch": 0.57, - "learning_rate": 8.180087073876995e-06, - "logits/chosen": -5.485777378082275, - "logits/rejected": -3.1949920654296875, - "logps/chosen": -289.3025817871094, - "logps/rejected": -2921.007568359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7836549282073975, - "rewards/margins": 65.79005432128906, - "rewards/rejected": -68.5737075805664, - "step": 1504 - }, - { - "epoch": 0.57, - "learning_rate": 8.16836492230467e-06, - "logits/chosen": -6.101933002471924, - "logits/rejected": -0.8497409224510193, - "logps/chosen": -209.74822998046875, - "logps/rejected": -1600.123779296875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.071820020675659, - "rewards/margins": 29.470294952392578, - "rewards/rejected": -32.5421142578125, - "step": 1505 - }, - { - "epoch": 0.57, - "learning_rate": 8.156645374351194e-06, - "logits/chosen": 0.3446216881275177, - "logits/rejected": -6.9101948738098145, - "logps/chosen": -541.53759765625, - "logps/rejected": -209.70639038085938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.959698438644409, - "rewards/margins": 8.594461441040039, - "rewards/rejected": -11.554160118103027, - "step": 1506 - }, - { - "epoch": 0.57, - "learning_rate": 8.144928446675578e-06, - "logits/chosen": -4.731912612915039, - "logits/rejected": -2.817486047744751, - "logps/chosen": -591.4027099609375, - "logps/rejected": -1876.568603515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.583941638469696, - "rewards/margins": 60.25221633911133, - "rewards/rejected": -60.836158752441406, - "step": 1507 - }, - { - "epoch": 0.57, - "learning_rate": 8.133214155933118e-06, - "logits/chosen": -1.6409392356872559, - "logits/rejected": -1.4019181728363037, - "logps/chosen": -266.6821594238281, - "logps/rejected": -459.6664733886719, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.5936906337738037, - "rewards/margins": 11.776154518127441, - "rewards/rejected": -15.369845390319824, - "step": 1508 - }, - { - "epoch": 0.57, - "learning_rate": 8.121502518775355e-06, - "logits/chosen": 1.0703518390655518, - "logits/rejected": -2.5025596618652344, - "logps/chosen": -546.90234375, - "logps/rejected": -424.235595703125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.750152587890625, - "rewards/margins": 13.590681076049805, - "rewards/rejected": -16.34083366394043, - "step": 1509 - }, - { - "epoch": 0.57, - "learning_rate": 8.109793551850057e-06, - "logits/chosen": -3.014331579208374, - "logits/rejected": -2.1674439907073975, - "logps/chosen": -183.7606964111328, - "logps/rejected": -453.30230712890625, - "loss": 0.0076, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.212275743484497, - "rewards/margins": 12.324355125427246, - "rewards/rejected": -13.536630630493164, - "step": 1510 - }, - { - "epoch": 0.57, - "learning_rate": 8.0980872718012e-06, - "logits/chosen": -2.8088204860687256, - "logits/rejected": -1.6753779649734497, - "logps/chosen": -317.4294128417969, - "logps/rejected": -572.7408447265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1882904767990112, - "rewards/margins": 16.208030700683594, - "rewards/rejected": -17.396320343017578, - "step": 1511 - }, - { - "epoch": 0.57, - "learning_rate": 8.086383695268937e-06, - "logits/chosen": -1.082186222076416, - "logits/rejected": -1.108720064163208, - "logps/chosen": -180.197265625, - "logps/rejected": -258.24658203125, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.345611572265625, - "rewards/margins": 3.7445435523986816, - "rewards/rejected": -7.090155124664307, - "step": 1512 - }, - { - "epoch": 0.57, - "learning_rate": 8.074682838889581e-06, - "logits/chosen": -2.800203323364258, - "logits/rejected": -4.075171947479248, - "logps/chosen": -426.3487854003906, - "logps/rejected": -559.043212890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1673920154571533, - "rewards/margins": 20.531038284301758, - "rewards/rejected": -21.69843101501465, - "step": 1513 - }, - { - "epoch": 0.57, - "learning_rate": 8.062984719295576e-06, - "logits/chosen": -3.662621021270752, - "logits/rejected": -2.4066858291625977, - "logps/chosen": -315.55072021484375, - "logps/rejected": -1204.780029296875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.23664550483226776, - "rewards/margins": 43.52602767944336, - "rewards/rejected": -43.762672424316406, - "step": 1514 - }, - { - "epoch": 0.57, - "learning_rate": 8.051289353115483e-06, - "logits/chosen": -1.7749648094177246, - "logits/rejected": -2.5902655124664307, - "logps/chosen": -341.08294677734375, - "logps/rejected": -795.7887573242188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.21080322563648224, - "rewards/margins": 38.5504264831543, - "rewards/rejected": -38.339622497558594, - "step": 1515 - }, - { - "epoch": 0.57, - "learning_rate": 8.039596756973928e-06, - "logits/chosen": -1.826314926147461, - "logits/rejected": -1.7920678853988647, - "logps/chosen": -283.8103942871094, - "logps/rejected": -486.05633544921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8669707775115967, - "rewards/margins": 17.8405704498291, - "rewards/rejected": -20.70754051208496, - "step": 1516 - }, - { - "epoch": 0.57, - "learning_rate": 8.027906947491634e-06, - "logits/chosen": -0.07085296511650085, - "logits/rejected": -4.792103290557861, - "logps/chosen": -488.1961669921875, - "logps/rejected": -434.44134521484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5771149396896362, - "rewards/margins": 31.168743133544922, - "rewards/rejected": -29.591629028320312, - "step": 1517 - }, - { - "epoch": 0.57, - "learning_rate": 8.01621994128533e-06, - "logits/chosen": -2.7837562561035156, - "logits/rejected": -0.30493754148483276, - "logps/chosen": -513.4822998046875, - "logps/rejected": -1032.013427734375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.80877685546875, - "rewards/margins": 20.747669219970703, - "rewards/rejected": -19.938892364501953, - "step": 1518 - }, - { - "epoch": 0.57, - "learning_rate": 8.00453575496778e-06, - "logits/chosen": -7.238423824310303, - "logits/rejected": -2.316061496734619, - "logps/chosen": -328.71307373046875, - "logps/rejected": -3639.66357421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8603088855743408, - "rewards/margins": 38.92189407348633, - "rewards/rejected": -40.782203674316406, - "step": 1519 - }, - { - "epoch": 0.57, - "learning_rate": 7.99285440514773e-06, - "logits/chosen": -2.028505563735962, - "logits/rejected": -2.3341732025146484, - "logps/chosen": -334.65771484375, - "logps/rejected": -552.7000122070312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.339508056640625, - "rewards/margins": 18.905420303344727, - "rewards/rejected": -19.24492835998535, - "step": 1520 - }, - { - "epoch": 0.58, - "learning_rate": 7.9811759084299e-06, - "logits/chosen": -5.943637847900391, - "logits/rejected": -1.2549442052841187, - "logps/chosen": -508.6832275390625, - "logps/rejected": -2174.76171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2716125547885895, - "rewards/margins": 23.744037628173828, - "rewards/rejected": -23.47242546081543, - "step": 1521 - }, - { - "epoch": 0.58, - "learning_rate": 7.969500281414947e-06, - "logits/chosen": -1.9748414754867554, - "logits/rejected": -2.145195960998535, - "logps/chosen": -443.12884521484375, - "logps/rejected": -632.2881469726562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.281256198883057, - "rewards/margins": 11.822242736816406, - "rewards/rejected": -16.103498458862305, - "step": 1522 - }, - { - "epoch": 0.58, - "learning_rate": 7.95782754069946e-06, - "logits/chosen": -5.264256477355957, - "logits/rejected": -5.117517471313477, - "logps/chosen": -259.7140197753906, - "logps/rejected": -411.0413818359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3888076543807983, - "rewards/margins": 8.480229377746582, - "rewards/rejected": -9.869036674499512, - "step": 1523 - }, - { - "epoch": 0.58, - "learning_rate": 7.946157702875911e-06, - "logits/chosen": -1.4620404243469238, - "logits/rejected": -5.778438091278076, - "logps/chosen": -412.24432373046875, - "logps/rejected": -393.3521423339844, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6259796023368835, - "rewards/margins": 26.744693756103516, - "rewards/rejected": -26.11871337890625, - "step": 1524 - }, - { - "epoch": 0.58, - "learning_rate": 7.934490784532657e-06, - "logits/chosen": -1.571434497833252, - "logits/rejected": -1.913867712020874, - "logps/chosen": -417.2557067871094, - "logps/rejected": -743.98193359375, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.204290747642517, - "rewards/margins": 26.838600158691406, - "rewards/rejected": -28.042890548706055, - "step": 1525 - }, - { - "epoch": 0.58, - "learning_rate": 7.922826802253904e-06, - "logits/chosen": -1.0131378173828125, - "logits/rejected": -0.6249361038208008, - "logps/chosen": -786.2881469726562, - "logps/rejected": -1050.092529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.419049024581909, - "rewards/margins": 11.866192817687988, - "rewards/rejected": -15.285242080688477, - "step": 1526 - }, - { - "epoch": 0.58, - "learning_rate": 7.91116577261968e-06, - "logits/chosen": -4.495415687561035, - "logits/rejected": 0.07045896351337433, - "logps/chosen": -282.1162109375, - "logps/rejected": -917.4540405273438, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.997997999191284, - "rewards/margins": 17.324298858642578, - "rewards/rejected": -20.322296142578125, - "step": 1527 - }, - { - "epoch": 0.58, - "learning_rate": 7.899507712205818e-06, - "logits/chosen": -3.3515260219573975, - "logits/rejected": -1.6525427103042603, - "logps/chosen": -957.99853515625, - "logps/rejected": -2070.58837890625, - "loss": 0.0043, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.057116985321045, - "rewards/margins": 54.983726501464844, - "rewards/rejected": -61.04084396362305, - "step": 1528 - }, - { - "epoch": 0.58, - "learning_rate": 7.887852637583927e-06, - "logits/chosen": -5.287898540496826, - "logits/rejected": -2.0853240489959717, - "logps/chosen": -311.18951416015625, - "logps/rejected": -2418.63818359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.816278100013733, - "rewards/margins": 41.73582458496094, - "rewards/rejected": -43.552101135253906, - "step": 1529 - }, - { - "epoch": 0.58, - "learning_rate": 7.876200565321377e-06, - "logits/chosen": -0.9981927275657654, - "logits/rejected": -2.7408738136291504, - "logps/chosen": -279.983642578125, - "logps/rejected": -472.4093322753906, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.015008568763733, - "rewards/margins": 23.08350372314453, - "rewards/rejected": -24.098512649536133, - "step": 1530 - }, - { - "epoch": 0.58, - "learning_rate": 7.864551511981269e-06, - "logits/chosen": -1.5522964000701904, - "logits/rejected": -4.2874627113342285, - "logps/chosen": -206.59005737304688, - "logps/rejected": -213.61386108398438, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9623138904571533, - "rewards/margins": 11.827213287353516, - "rewards/rejected": -13.78952693939209, - "step": 1531 - }, - { - "epoch": 0.58, - "learning_rate": 7.852905494122412e-06, - "logits/chosen": -6.492083549499512, - "logits/rejected": -1.343627691268921, - "logps/chosen": -964.7706298828125, - "logps/rejected": -3958.1650390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0814454555511475, - "rewards/margins": 62.519920349121094, - "rewards/rejected": -60.4384765625, - "step": 1532 - }, - { - "epoch": 0.58, - "learning_rate": 7.841262528299296e-06, - "logits/chosen": -2.7377781867980957, - "logits/rejected": -1.4533097743988037, - "logps/chosen": -398.2926025390625, - "logps/rejected": -852.28125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.077056884765625, - "rewards/margins": 24.9825382232666, - "rewards/rejected": -27.059595108032227, - "step": 1533 - }, - { - "epoch": 0.58, - "learning_rate": 7.829622631062079e-06, - "logits/chosen": -4.0439677238464355, - "logits/rejected": -1.6597371101379395, - "logps/chosen": -353.3988342285156, - "logps/rejected": -1266.150390625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2855377197265625, - "rewards/margins": 35.74815368652344, - "rewards/rejected": -36.03369140625, - "step": 1534 - }, - { - "epoch": 0.58, - "learning_rate": 7.817985818956552e-06, - "logits/chosen": -2.9465513229370117, - "logits/rejected": -5.010274887084961, - "logps/chosen": -178.93177795410156, - "logps/rejected": -190.25180053710938, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.839019775390625, - "rewards/margins": 5.725890636444092, - "rewards/rejected": -6.564910411834717, - "step": 1535 - }, - { - "epoch": 0.58, - "learning_rate": 7.806352108524129e-06, - "logits/chosen": -4.232742786407471, - "logits/rejected": -0.06477980315685272, - "logps/chosen": -419.11334228515625, - "logps/rejected": -1057.477294921875, - "loss": 0.0031, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.528106689453125, - "rewards/margins": 23.676776885986328, - "rewards/rejected": -26.204883575439453, - "step": 1536 - }, - { - "epoch": 0.58, - "learning_rate": 7.794721516301804e-06, - "logits/chosen": -4.121250152587891, - "logits/rejected": -1.1991345882415771, - "logps/chosen": -204.80245971679688, - "logps/rejected": -602.9429931640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.978643774986267, - "rewards/margins": 17.710824966430664, - "rewards/rejected": -19.689468383789062, - "step": 1537 - }, - { - "epoch": 0.58, - "learning_rate": 7.783094058822147e-06, - "logits/chosen": -3.5083866119384766, - "logits/rejected": -0.8759223222732544, - "logps/chosen": -345.32684326171875, - "logps/rejected": -817.717041015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.879736304283142, - "rewards/margins": 13.991864204406738, - "rewards/rejected": -15.871600151062012, - "step": 1538 - }, - { - "epoch": 0.58, - "learning_rate": 7.771469752613267e-06, - "logits/chosen": -3.854048728942871, - "logits/rejected": -1.8195035457611084, - "logps/chosen": -505.5870056152344, - "logps/rejected": -1059.429931640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.523611545562744, - "rewards/margins": 23.160667419433594, - "rewards/rejected": -28.68427848815918, - "step": 1539 - }, - { - "epoch": 0.58, - "learning_rate": 7.759848614198795e-06, - "logits/chosen": -1.730194091796875, - "logits/rejected": -3.3386895656585693, - "logps/chosen": -288.2272644042969, - "logps/rejected": -240.7576904296875, - "loss": 0.0048, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.473681926727295, - "rewards/margins": 3.246786594390869, - "rewards/rejected": -8.720468521118164, - "step": 1540 - }, - { - "epoch": 0.58, - "learning_rate": 7.74823066009786e-06, - "logits/chosen": -0.4520922303199768, - "logits/rejected": -2.877636432647705, - "logps/chosen": -499.2132568359375, - "logps/rejected": -282.96954345703125, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3849732875823975, - "rewards/margins": 12.59760570526123, - "rewards/rejected": -15.982579231262207, - "step": 1541 - }, - { - "epoch": 0.58, - "learning_rate": 7.736615906825065e-06, - "logits/chosen": -0.21535944938659668, - "logits/rejected": -2.1929492950439453, - "logps/chosen": -315.3226318359375, - "logps/rejected": -572.0932006835938, - "loss": 0.0324, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.938952684402466, - "rewards/margins": 26.622344970703125, - "rewards/rejected": -29.561298370361328, - "step": 1542 - }, - { - "epoch": 0.58, - "learning_rate": 7.72500437089046e-06, - "logits/chosen": -2.157404661178589, - "logits/rejected": -1.9566930532455444, - "logps/chosen": -367.8067321777344, - "logps/rejected": -1015.9075927734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.46029359102249146, - "rewards/margins": 46.77517318725586, - "rewards/rejected": -46.31488037109375, - "step": 1543 - }, - { - "epoch": 0.58, - "learning_rate": 7.713396068799521e-06, - "logits/chosen": -7.6717848777771, - "logits/rejected": -2.143686056137085, - "logps/chosen": -367.8266906738281, - "logps/rejected": -1528.6124267578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6530914306640625, - "rewards/margins": 9.113656997680664, - "rewards/rejected": -10.766748428344727, - "step": 1544 - }, - { - "epoch": 0.58, - "learning_rate": 7.701791017053137e-06, - "logits/chosen": -2.7715518474578857, - "logits/rejected": -1.4762383699417114, - "logps/chosen": -455.3709411621094, - "logps/rejected": -779.099609375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.21360182762146, - "rewards/margins": 17.302791595458984, - "rewards/rejected": -20.516393661499023, - "step": 1545 - }, - { - "epoch": 0.58, - "learning_rate": 7.690189232147566e-06, - "logits/chosen": -2.855215311050415, - "logits/rejected": -1.9881147146224976, - "logps/chosen": -170.54232788085938, - "logps/rejected": -411.7854309082031, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.445281982421875, - "rewards/margins": 8.378409385681152, - "rewards/rejected": -10.823691368103027, - "step": 1546 - }, - { - "epoch": 0.58, - "learning_rate": 7.678590730574429e-06, - "logits/chosen": -5.527812957763672, - "logits/rejected": -2.34340763092041, - "logps/chosen": -934.5831298828125, - "logps/rejected": -2871.02734375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.174670696258545, - "rewards/margins": 21.746912002563477, - "rewards/rejected": -25.92158317565918, - "step": 1547 - }, - { - "epoch": 0.59, - "learning_rate": 7.666995528820673e-06, - "logits/chosen": -5.155070781707764, - "logits/rejected": -1.639756441116333, - "logps/chosen": -172.7230224609375, - "logps/rejected": -791.1527099609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.9156434535980225, - "rewards/margins": 13.370819091796875, - "rewards/rejected": -16.286462783813477, - "step": 1548 - }, - { - "epoch": 0.59, - "learning_rate": 7.655403643368557e-06, - "logits/chosen": -1.6597506999969482, - "logits/rejected": -2.0333333015441895, - "logps/chosen": -323.15960693359375, - "logps/rejected": -463.3924865722656, - "loss": 0.0174, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.180713176727295, - "rewards/margins": 13.410411834716797, - "rewards/rejected": -18.59112548828125, - "step": 1549 - }, - { - "epoch": 0.59, - "learning_rate": 7.64381509069563e-06, - "logits/chosen": -0.5316349864006042, - "logits/rejected": -0.9807306528091431, - "logps/chosen": -534.5026245117188, - "logps/rejected": -1568.662353515625, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.6227967739105225, - "rewards/margins": 81.13673400878906, - "rewards/rejected": -77.5139389038086, - "step": 1550 - }, - { - "epoch": 0.59, - "learning_rate": 7.632229887274699e-06, - "logits/chosen": -6.5421295166015625, - "logits/rejected": -1.0551657676696777, - "logps/chosen": -591.2350463867188, - "logps/rejected": -3408.3193359375, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.442340135574341, - "rewards/margins": 30.727191925048828, - "rewards/rejected": -33.169532775878906, - "step": 1551 - }, - { - "epoch": 0.59, - "learning_rate": 7.620648049573815e-06, - "logits/chosen": -7.865602016448975, - "logits/rejected": -3.453221082687378, - "logps/chosen": -805.264404296875, - "logps/rejected": -2571.15185546875, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.737170696258545, - "rewards/margins": 27.077526092529297, - "rewards/rejected": -31.814697265625, - "step": 1552 - }, - { - "epoch": 0.59, - "learning_rate": 7.609069594056234e-06, - "logits/chosen": -2.328727960586548, - "logits/rejected": -1.6908949613571167, - "logps/chosen": -184.58843994140625, - "logps/rejected": -572.948974609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.085601806640625, - "rewards/margins": 31.792236328125, - "rewards/rejected": -32.877838134765625, - "step": 1553 - }, - { - "epoch": 0.59, - "learning_rate": 7.597494537180423e-06, - "logits/chosen": -5.569035053253174, - "logits/rejected": -1.3577680587768555, - "logps/chosen": -803.976318359375, - "logps/rejected": -2380.745849609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.848095715045929, - "rewards/margins": 26.337623596191406, - "rewards/rejected": -27.185718536376953, - "step": 1554 - }, - { - "epoch": 0.59, - "learning_rate": 7.585922895399997e-06, - "logits/chosen": -2.9743497371673584, - "logits/rejected": -1.1379618644714355, - "logps/chosen": -703.9278564453125, - "logps/rejected": -1663.11279296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3938964903354645, - "rewards/margins": 36.608036041259766, - "rewards/rejected": -36.21413803100586, - "step": 1555 - }, - { - "epoch": 0.59, - "learning_rate": 7.574354685163732e-06, - "logits/chosen": -0.02684994786977768, - "logits/rejected": 0.33749833703041077, - "logps/chosen": -365.9678955078125, - "logps/rejected": -505.5304870605469, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.839120626449585, - "rewards/margins": 8.6325044631958, - "rewards/rejected": -11.471625328063965, - "step": 1556 - }, - { - "epoch": 0.59, - "learning_rate": 7.562789922915518e-06, - "logits/chosen": -0.7896668910980225, - "logits/rejected": -1.485767126083374, - "logps/chosen": -325.8497314453125, - "logps/rejected": -839.407470703125, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3125367164611816, - "rewards/margins": 44.227630615234375, - "rewards/rejected": -46.54016876220703, - "step": 1557 - }, - { - "epoch": 0.59, - "learning_rate": 7.551228625094349e-06, - "logits/chosen": -5.956546306610107, - "logits/rejected": -1.0091776847839355, - "logps/chosen": -526.7860107421875, - "logps/rejected": -2023.370849609375, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.12615357339382172, - "rewards/margins": 19.8864688873291, - "rewards/rejected": -20.012622833251953, - "step": 1558 - }, - { - "epoch": 0.59, - "learning_rate": 7.539670808134286e-06, - "logits/chosen": -2.4553489685058594, - "logits/rejected": -2.023653745651245, - "logps/chosen": -461.88623046875, - "logps/rejected": -742.8302001953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.999505639076233, - "rewards/margins": 13.710174560546875, - "rewards/rejected": -15.709680557250977, - "step": 1559 - }, - { - "epoch": 0.59, - "learning_rate": 7.528116488464452e-06, - "logits/chosen": -6.628399848937988, - "logits/rejected": -3.2016875743865967, - "logps/chosen": -390.2359313964844, - "logps/rejected": -2919.82568359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7834381461143494, - "rewards/margins": 19.10196304321289, - "rewards/rejected": -19.885400772094727, - "step": 1560 - }, - { - "epoch": 0.59, - "learning_rate": 7.516565682508994e-06, - "logits/chosen": -6.404021739959717, - "logits/rejected": -2.0206518173217773, - "logps/chosen": -501.6492614746094, - "logps/rejected": -1973.329833984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6851959228515625, - "rewards/margins": 31.108266830444336, - "rewards/rejected": -30.423070907592773, - "step": 1561 - }, - { - "epoch": 0.59, - "learning_rate": 7.505018406687062e-06, - "logits/chosen": -6.178516387939453, - "logits/rejected": -2.228914737701416, - "logps/chosen": -236.89784240722656, - "logps/rejected": -1465.950439453125, - "loss": 0.0868, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4486160278320312, - "rewards/margins": 40.395328521728516, - "rewards/rejected": -42.84394454956055, - "step": 1562 - }, - { - "epoch": 0.59, - "learning_rate": 7.493474677412795e-06, - "logits/chosen": -0.5094344019889832, - "logits/rejected": -4.971706390380859, - "logps/chosen": -793.8399047851562, - "logps/rejected": -419.20849609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.84883451461792, - "rewards/margins": 23.355653762817383, - "rewards/rejected": -28.20448875427246, - "step": 1563 - }, - { - "epoch": 0.59, - "learning_rate": 7.481934511095286e-06, - "logits/chosen": -7.030279159545898, - "logits/rejected": -4.750469207763672, - "logps/chosen": -203.982421875, - "logps/rejected": -3339.2255859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8559814691543579, - "rewards/margins": 32.50862121582031, - "rewards/rejected": -33.364601135253906, - "step": 1564 - }, - { - "epoch": 0.59, - "learning_rate": 7.4703979241385595e-06, - "logits/chosen": -5.174955368041992, - "logits/rejected": -0.8516485095024109, - "logps/chosen": -358.6292724609375, - "logps/rejected": -1300.0260009765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5509979724884033, - "rewards/margins": 34.12861633300781, - "rewards/rejected": -36.67961502075195, - "step": 1565 - }, - { - "epoch": 0.59, - "learning_rate": 7.458864932941559e-06, - "logits/chosen": -1.7332209348678589, - "logits/rejected": -0.2043258100748062, - "logps/chosen": -283.80816650390625, - "logps/rejected": -637.918701171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.231131076812744, - "rewards/margins": 15.857004165649414, - "rewards/rejected": -18.088134765625, - "step": 1566 - }, - { - "epoch": 0.59, - "learning_rate": 7.447335553898115e-06, - "logits/chosen": -0.4980299770832062, - "logits/rejected": -4.107448101043701, - "logps/chosen": -447.74755859375, - "logps/rejected": -550.0767211914062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06426697224378586, - "rewards/margins": 26.841800689697266, - "rewards/rejected": -26.90606689453125, - "step": 1567 - }, - { - "epoch": 0.59, - "learning_rate": 7.435809803396923e-06, - "logits/chosen": -5.067383289337158, - "logits/rejected": -2.5526418685913086, - "logps/chosen": -698.3010864257812, - "logps/rejected": -2016.5216064453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4913513660430908, - "rewards/margins": 39.49626159667969, - "rewards/rejected": -38.00490951538086, - "step": 1568 - }, - { - "epoch": 0.59, - "learning_rate": 7.424287697821517e-06, - "logits/chosen": -4.180531024932861, - "logits/rejected": -0.8935380578041077, - "logps/chosen": -480.5188293457031, - "logps/rejected": -1190.9111328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3192412853240967, - "rewards/margins": 15.604727745056152, - "rewards/rejected": -18.923969268798828, - "step": 1569 - }, - { - "epoch": 0.59, - "learning_rate": 7.412769253550255e-06, - "logits/chosen": -1.2310600280761719, - "logits/rejected": -3.7645132541656494, - "logps/chosen": -246.97525024414062, - "logps/rejected": -267.49554443359375, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.444561719894409, - "rewards/margins": 14.016203880310059, - "rewards/rejected": -16.460765838623047, - "step": 1570 - }, - { - "epoch": 0.59, - "learning_rate": 7.401254486956285e-06, - "logits/chosen": -1.5969663858413696, - "logits/rejected": -3.0649750232696533, - "logps/chosen": -245.178955078125, - "logps/rejected": -487.73834228515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07635803520679474, - "rewards/margins": 21.27572250366211, - "rewards/rejected": -21.352081298828125, - "step": 1571 - }, - { - "epoch": 0.59, - "learning_rate": 7.389743414407536e-06, - "logits/chosen": -6.579865455627441, - "logits/rejected": 0.08186475187540054, - "logps/chosen": -250.63214111328125, - "logps/rejected": -1639.9249267578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.892331123352051, - "rewards/margins": 21.17339324951172, - "rewards/rejected": -26.065723419189453, - "step": 1572 - }, - { - "epoch": 0.59, - "learning_rate": 7.37823605226668e-06, - "logits/chosen": -1.073002815246582, - "logits/rejected": -0.9760008454322815, - "logps/chosen": -322.98016357421875, - "logps/rejected": -699.6650390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19411011040210724, - "rewards/margins": 27.583715438842773, - "rewards/rejected": -27.7778263092041, - "step": 1573 - }, - { - "epoch": 0.6, - "learning_rate": 7.3667324168911125e-06, - "logits/chosen": -0.3479287922382355, - "logits/rejected": -2.405710458755493, - "logps/chosen": -333.6055908203125, - "logps/rejected": -452.1396789550781, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.691162109375, - "rewards/margins": 20.447750091552734, - "rewards/rejected": -19.756587982177734, - "step": 1574 - }, - { - "epoch": 0.6, - "learning_rate": 7.35523252463294e-06, - "logits/chosen": -3.4136853218078613, - "logits/rejected": -1.6073317527770996, - "logps/chosen": -324.6468200683594, - "logps/rejected": -1115.39111328125, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.1255462169647217, - "rewards/margins": 36.03092956542969, - "rewards/rejected": -39.15647506713867, - "step": 1575 - }, - { - "epoch": 0.6, - "learning_rate": 7.343736391838936e-06, - "logits/chosen": -4.989363670349121, - "logits/rejected": -4.5777130126953125, - "logps/chosen": -288.3857421875, - "logps/rejected": -1184.05419921875, - "loss": 0.0046, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7982391715049744, - "rewards/margins": 38.673526763916016, - "rewards/rejected": -39.47176742553711, - "step": 1576 - }, - { - "epoch": 0.6, - "learning_rate": 7.33224403485054e-06, - "logits/chosen": 0.01232177671045065, - "logits/rejected": -5.335483074188232, - "logps/chosen": -588.2403564453125, - "logps/rejected": -538.1226196289062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.49165651202201843, - "rewards/margins": 31.645179748535156, - "rewards/rejected": -31.153522491455078, - "step": 1577 - }, - { - "epoch": 0.6, - "learning_rate": 7.320755470003822e-06, - "logits/chosen": -3.441133737564087, - "logits/rejected": -1.946915864944458, - "logps/chosen": -272.1875, - "logps/rejected": -631.2158813476562, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5026168823242188, - "rewards/margins": 13.794404983520508, - "rewards/rejected": -16.297021865844727, - "step": 1578 - }, - { - "epoch": 0.6, - "learning_rate": 7.309270713629459e-06, - "logits/chosen": -6.814424514770508, - "logits/rejected": -1.9787676334381104, - "logps/chosen": -566.7262573242188, - "logps/rejected": -2034.51708984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6513733267784119, - "rewards/margins": 29.9980525970459, - "rewards/rejected": -29.3466796875, - "step": 1579 - }, - { - "epoch": 0.6, - "learning_rate": 7.297789782052716e-06, - "logits/chosen": -4.0982489585876465, - "logits/rejected": -1.3006724119186401, - "logps/chosen": -245.24790954589844, - "logps/rejected": -679.6924438476562, - "loss": 0.0068, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9155731201171875, - "rewards/margins": 16.630380630493164, - "rewards/rejected": -18.54595375061035, - "step": 1580 - }, - { - "epoch": 0.6, - "learning_rate": 7.2863126915934215e-06, - "logits/chosen": -2.526946783065796, - "logits/rejected": -1.8055057525634766, - "logps/chosen": -326.3048095703125, - "logps/rejected": -623.9586181640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4968719482421875, - "rewards/margins": 18.764923095703125, - "rewards/rejected": -19.261795043945312, - "step": 1581 - }, - { - "epoch": 0.6, - "learning_rate": 7.274839458565945e-06, - "logits/chosen": -1.9011696577072144, - "logits/rejected": -5.125708103179932, - "logps/chosen": -271.0177001953125, - "logps/rejected": -86.53463745117188, - "loss": 0.0059, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5367370843887329, - "rewards/margins": 4.1494245529174805, - "rewards/rejected": -4.686161518096924, - "step": 1582 - }, - { - "epoch": 0.6, - "learning_rate": 7.263370099279173e-06, - "logits/chosen": -4.930027961730957, - "logits/rejected": -1.2518097162246704, - "logps/chosen": -430.39617919921875, - "logps/rejected": -2454.764404296875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.442242383956909, - "rewards/margins": 56.584625244140625, - "rewards/rejected": -53.14238357543945, - "step": 1583 - }, - { - "epoch": 0.6, - "learning_rate": 7.251904630036479e-06, - "logits/chosen": -2.13692307472229, - "logits/rejected": -1.1643685102462769, - "logps/chosen": -350.6458740234375, - "logps/rejected": -487.8671875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.517163038253784, - "rewards/margins": 6.723943710327148, - "rewards/rejected": -10.241106986999512, - "step": 1584 - }, - { - "epoch": 0.6, - "learning_rate": 7.240443067135718e-06, - "logits/chosen": -1.0959930419921875, - "logits/rejected": -1.410110354423523, - "logps/chosen": -336.41778564453125, - "logps/rejected": -654.3931884765625, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3988494873046875, - "rewards/margins": 23.00490379333496, - "rewards/rejected": -25.40375328063965, - "step": 1585 - }, - { - "epoch": 0.6, - "learning_rate": 7.22898542686918e-06, - "logits/chosen": -6.445734977722168, - "logits/rejected": -1.0342777967453003, - "logps/chosen": -304.2590637207031, - "logps/rejected": -2820.710205078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.48532411456108093, - "rewards/margins": 40.75820541381836, - "rewards/rejected": -41.2435302734375, - "step": 1586 - }, - { - "epoch": 0.6, - "learning_rate": 7.21753172552359e-06, - "logits/chosen": -3.835505485534668, - "logits/rejected": -3.489379644393921, - "logps/chosen": -174.67410278320312, - "logps/rejected": -441.36669921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.719409167766571, - "rewards/margins": 13.40240478515625, - "rewards/rejected": -14.121813774108887, - "step": 1587 - }, - { - "epoch": 0.6, - "learning_rate": 7.2060819793800665e-06, - "logits/chosen": 0.28955078125, - "logits/rejected": -0.32798969745635986, - "logps/chosen": -448.89886474609375, - "logps/rejected": -961.0678100585938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9474122524261475, - "rewards/margins": 31.92106819152832, - "rewards/rejected": -35.86848068237305, - "step": 1588 - }, - { - "epoch": 0.6, - "learning_rate": 7.194636204714112e-06, - "logits/chosen": -0.7367807626724243, - "logits/rejected": -1.1897401809692383, - "logps/chosen": -251.6015625, - "logps/rejected": -480.6746520996094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.639195203781128, - "rewards/margins": 16.019479751586914, - "rewards/rejected": -18.658674240112305, - "step": 1589 - }, - { - "epoch": 0.6, - "learning_rate": 7.183194417795569e-06, - "logits/chosen": -3.1097919940948486, - "logits/rejected": -5.169285297393799, - "logps/chosen": -619.9364013671875, - "logps/rejected": -499.64703369140625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.498083591461182, - "rewards/margins": 9.055627822875977, - "rewards/rejected": -15.5537109375, - "step": 1590 - }, - { - "epoch": 0.6, - "learning_rate": 7.171756634888641e-06, - "logits/chosen": -0.05086921527981758, - "logits/rejected": -6.80126428604126, - "logps/chosen": -392.25146484375, - "logps/rejected": -120.31922912597656, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.585583448410034, - "rewards/margins": 4.762871742248535, - "rewards/rejected": -7.34845495223999, - "step": 1591 - }, - { - "epoch": 0.6, - "learning_rate": 7.160322872251807e-06, - "logits/chosen": -1.7702972888946533, - "logits/rejected": -2.0332882404327393, - "logps/chosen": -233.61236572265625, - "logps/rejected": -826.98681640625, - "loss": 0.0092, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8317977786064148, - "rewards/margins": 38.130126953125, - "rewards/rejected": -38.9619255065918, - "step": 1592 - }, - { - "epoch": 0.6, - "learning_rate": 7.148893146137852e-06, - "logits/chosen": -6.972503662109375, - "logits/rejected": -0.37172335386276245, - "logps/chosen": -478.29498291015625, - "logps/rejected": -1576.6568603515625, - "loss": 0.0096, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6324707269668579, - "rewards/margins": 17.386486053466797, - "rewards/rejected": -18.018957138061523, - "step": 1593 - }, - { - "epoch": 0.6, - "learning_rate": 7.137467472793815e-06, - "logits/chosen": -7.001683712005615, - "logits/rejected": -1.5833897590637207, - "logps/chosen": -480.3886413574219, - "logps/rejected": -2566.5703125, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.250714063644409, - "rewards/margins": 62.59836196899414, - "rewards/rejected": -65.84907531738281, - "step": 1594 - }, - { - "epoch": 0.6, - "learning_rate": 7.12604586846098e-06, - "logits/chosen": -4.386227607727051, - "logits/rejected": -1.2557260990142822, - "logps/chosen": -463.065673828125, - "logps/rejected": -1615.6396484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4875977039337158, - "rewards/margins": 43.755149841308594, - "rewards/rejected": -45.24274826049805, - "step": 1595 - }, - { - "epoch": 0.6, - "learning_rate": 7.114628349374838e-06, - "logits/chosen": -1.8186756372451782, - "logits/rejected": -4.931894779205322, - "logps/chosen": -166.6048583984375, - "logps/rejected": -160.39434814453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2589691877365112, - "rewards/margins": 10.69517707824707, - "rewards/rejected": -11.954146385192871, - "step": 1596 - }, - { - "epoch": 0.6, - "learning_rate": 7.10321493176508e-06, - "logits/chosen": -1.5675048828125, - "logits/rejected": -2.582763671875, - "logps/chosen": -471.6644592285156, - "logps/rejected": -658.5045166015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1741180419921875, - "rewards/margins": 15.606206893920898, - "rewards/rejected": -17.780324935913086, - "step": 1597 - }, - { - "epoch": 0.6, - "learning_rate": 7.091805631855566e-06, - "logits/chosen": -7.330448150634766, - "logits/rejected": -1.1295831203460693, - "logps/chosen": -450.4561767578125, - "logps/rejected": -2300.302001953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.414746284484863, - "rewards/margins": 28.839710235595703, - "rewards/rejected": -33.25445556640625, - "step": 1598 - }, - { - "epoch": 0.6, - "learning_rate": 7.0804004658642975e-06, - "logits/chosen": -4.238296985626221, - "logits/rejected": -7.213418960571289, - "logps/chosen": -590.1846923828125, - "logps/rejected": -413.4452819824219, - "loss": 0.0027, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012127685360610485, - "rewards/margins": 10.632461547851562, - "rewards/rejected": -10.6445894241333, - "step": 1599 - }, - { - "epoch": 0.6, - "learning_rate": 7.068999450003411e-06, - "logits/chosen": -2.4760758876800537, - "logits/rejected": -3.1634016036987305, - "logps/chosen": -368.8224792480469, - "logps/rejected": -459.050537109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9831513166427612, - "rewards/margins": 12.824472427368164, - "rewards/rejected": -14.807623863220215, - "step": 1600 - }, - { - "epoch": 0.61, - "learning_rate": 7.0576026004791345e-06, - "logits/chosen": -7.741376876831055, - "logits/rejected": -3.8738889694213867, - "logps/chosen": -264.4184265136719, - "logps/rejected": -2031.20068359375, - "loss": 0.002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.635577380657196, - "rewards/margins": 19.7016544342041, - "rewards/rejected": -20.33723258972168, - "step": 1601 - }, - { - "epoch": 0.61, - "learning_rate": 7.0462099334917745e-06, - "logits/chosen": -6.912502765655518, - "logits/rejected": -1.7697404623031616, - "logps/chosen": -368.468505859375, - "logps/rejected": -1201.627685546875, - "loss": 0.0028, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.259625196456909, - "rewards/margins": 7.222516059875488, - "rewards/rejected": -10.482141494750977, - "step": 1602 - }, - { - "epoch": 0.61, - "learning_rate": 7.034821465235693e-06, - "logits/chosen": -1.1377036571502686, - "logits/rejected": -3.355588674545288, - "logps/chosen": -276.8458251953125, - "logps/rejected": -406.09893798828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8902252912521362, - "rewards/margins": 18.33322525024414, - "rewards/rejected": -20.22344970703125, - "step": 1603 - }, - { - "epoch": 0.61, - "learning_rate": 7.023437211899285e-06, - "logits/chosen": -6.614558696746826, - "logits/rejected": -2.3798842430114746, - "logps/chosen": -161.8936309814453, - "logps/rejected": -801.791015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5266662836074829, - "rewards/margins": 18.0902042388916, - "rewards/rejected": -18.616870880126953, - "step": 1604 - }, - { - "epoch": 0.61, - "learning_rate": 7.012057189664954e-06, - "logits/chosen": -4.584068775177002, - "logits/rejected": -0.1075434535741806, - "logps/chosen": -603.2547607421875, - "logps/rejected": -1710.9478759765625, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.10883789509534836, - "rewards/margins": 20.42253303527832, - "rewards/rejected": -20.313695907592773, - "step": 1605 - }, - { - "epoch": 0.61, - "learning_rate": 7.000681414709086e-06, - "logits/chosen": -3.9886631965637207, - "logits/rejected": -1.3812199831008911, - "logps/chosen": -335.20782470703125, - "logps/rejected": -771.8572998046875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.1885437965393066, - "rewards/margins": 10.975323677062988, - "rewards/rejected": -7.786779880523682, - "step": 1606 - }, - { - "epoch": 0.61, - "learning_rate": 6.989309903202035e-06, - "logits/chosen": -0.8469581604003906, - "logits/rejected": -6.86841344833374, - "logps/chosen": -345.54608154296875, - "logps/rejected": -238.42132568359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5644897818565369, - "rewards/margins": 13.624692916870117, - "rewards/rejected": -13.060203552246094, - "step": 1607 - }, - { - "epoch": 0.61, - "learning_rate": 6.977942671308087e-06, - "logits/chosen": -2.313453197479248, - "logits/rejected": -5.310988903045654, - "logps/chosen": -351.8242492675781, - "logps/rejected": -335.2683410644531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.4137847423553467, - "rewards/margins": 19.637672424316406, - "rewards/rejected": -22.051456451416016, - "step": 1608 - }, - { - "epoch": 0.61, - "learning_rate": 6.966579735185455e-06, - "logits/chosen": -0.8271838426589966, - "logits/rejected": -1.1614004373550415, - "logps/chosen": -359.775390625, - "logps/rejected": -923.2001953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.572845458984375, - "rewards/margins": 41.578243255615234, - "rewards/rejected": -44.15108871459961, - "step": 1609 - }, - { - "epoch": 0.61, - "learning_rate": 6.955221110986237e-06, - "logits/chosen": -4.092020034790039, - "logits/rejected": -3.20725679397583, - "logps/chosen": -299.7333679199219, - "logps/rejected": -533.2952270507812, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0110931396484375, - "rewards/margins": 6.32358455657959, - "rewards/rejected": -9.334677696228027, - "step": 1610 - }, - { - "epoch": 0.61, - "learning_rate": 6.943866814856408e-06, - "logits/chosen": -2.954495906829834, - "logits/rejected": -5.447693824768066, - "logps/chosen": -375.2939758300781, - "logps/rejected": -392.0218200683594, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6067474484443665, - "rewards/margins": 18.599809646606445, - "rewards/rejected": -17.99306297302246, - "step": 1611 - }, - { - "epoch": 0.61, - "learning_rate": 6.932516862935783e-06, - "logits/chosen": -2.315122127532959, - "logits/rejected": -1.8990249633789062, - "logps/chosen": -180.82928466796875, - "logps/rejected": -293.0933837890625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.879718005657196, - "rewards/margins": 6.413864612579346, - "rewards/rejected": -7.293582439422607, - "step": 1612 - }, - { - "epoch": 0.61, - "learning_rate": 6.921171271358007e-06, - "logits/chosen": -2.5748836994171143, - "logits/rejected": -3.515620708465576, - "logps/chosen": -200.9853515625, - "logps/rejected": -520.934326171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.079089403152466, - "rewards/margins": 26.27935218811035, - "rewards/rejected": -28.358442306518555, - "step": 1613 - }, - { - "epoch": 0.61, - "learning_rate": 6.909830056250527e-06, - "logits/chosen": -4.195859909057617, - "logits/rejected": -0.6667245030403137, - "logps/chosen": -287.117919921875, - "logps/rejected": -788.5768432617188, - "loss": 0.0148, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.8099305629730225, - "rewards/margins": 14.991125106811523, - "rewards/rejected": -18.801055908203125, - "step": 1614 - }, - { - "epoch": 0.61, - "learning_rate": 6.898493233734571e-06, - "logits/chosen": -0.020825982093811035, - "logits/rejected": -1.0777277946472168, - "logps/chosen": -219.62506103515625, - "logps/rejected": -456.2406921386719, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.64276123046875, - "rewards/margins": 23.179931640625, - "rewards/rejected": -25.82269287109375, - "step": 1615 - }, - { - "epoch": 0.61, - "learning_rate": 6.8871608199251135e-06, - "logits/chosen": -0.38562777638435364, - "logits/rejected": -4.930355548858643, - "logps/chosen": -510.26348876953125, - "logps/rejected": -253.36032104492188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.937695324420929, - "rewards/margins": 8.829505920410156, - "rewards/rejected": -9.76720142364502, - "step": 1616 - }, - { - "epoch": 0.61, - "learning_rate": 6.875832830930872e-06, - "logits/chosen": -4.021607398986816, - "logits/rejected": -4.154234886169434, - "logps/chosen": -950.1102294921875, - "logps/rejected": -1352.242431640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.019336223602295, - "rewards/margins": 14.55961799621582, - "rewards/rejected": -18.578954696655273, - "step": 1617 - }, - { - "epoch": 0.61, - "learning_rate": 6.864509282854272e-06, - "logits/chosen": -2.5355474948883057, - "logits/rejected": -2.598653793334961, - "logps/chosen": -245.6525421142578, - "logps/rejected": -401.89593505859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4187377989292145, - "rewards/margins": 7.479623317718506, - "rewards/rejected": -7.8983612060546875, - "step": 1618 - }, - { - "epoch": 0.61, - "learning_rate": 6.853190191791428e-06, - "logits/chosen": -2.1184585094451904, - "logits/rejected": -4.421306133270264, - "logps/chosen": -522.689208984375, - "logps/rejected": -711.7120361328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6567749381065369, - "rewards/margins": 28.329675674438477, - "rewards/rejected": -28.9864501953125, - "step": 1619 - }, - { - "epoch": 0.61, - "learning_rate": 6.841875573832111e-06, - "logits/chosen": -0.9530197381973267, - "logits/rejected": -3.308924436569214, - "logps/chosen": -542.1182250976562, - "logps/rejected": -720.2980346679688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.8638978004455566, - "rewards/margins": 26.582244873046875, - "rewards/rejected": -23.718347549438477, - "step": 1620 - }, - { - "epoch": 0.61, - "learning_rate": 6.830565445059745e-06, - "logits/chosen": -1.5824698209762573, - "logits/rejected": -7.1667094230651855, - "logps/chosen": -453.87939453125, - "logps/rejected": -173.11138916015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.534222364425659, - "rewards/margins": 9.242612838745117, - "rewards/rejected": -12.776835441589355, - "step": 1621 - }, - { - "epoch": 0.61, - "learning_rate": 6.819259821551361e-06, - "logits/chosen": -1.9068795442581177, - "logits/rejected": -4.2818803787231445, - "logps/chosen": -665.0640869140625, - "logps/rejected": -401.3212890625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.990362524986267, - "rewards/margins": 6.136218547821045, - "rewards/rejected": -8.126581192016602, - "step": 1622 - }, - { - "epoch": 0.61, - "learning_rate": 6.8079587193775935e-06, - "logits/chosen": -6.814977645874023, - "logits/rejected": -2.7388627529144287, - "logps/chosen": -213.8468475341797, - "logps/rejected": -3184.51513671875, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.29169464111328125, - "rewards/margins": 89.64434051513672, - "rewards/rejected": -89.93603515625, - "step": 1623 - }, - { - "epoch": 0.61, - "learning_rate": 6.796662154602648e-06, - "logits/chosen": -1.7811553478240967, - "logits/rejected": -0.5898949503898621, - "logps/chosen": -423.5433654785156, - "logps/rejected": -966.5225830078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.07438063621521, - "rewards/margins": 26.684804916381836, - "rewards/rejected": -28.759185791015625, - "step": 1624 - }, - { - "epoch": 0.61, - "learning_rate": 6.785370143284285e-06, - "logits/chosen": -2.6367197036743164, - "logits/rejected": -1.269245982170105, - "logps/chosen": -198.79689025878906, - "logps/rejected": -619.619384765625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.865434408187866, - "rewards/margins": 26.868595123291016, - "rewards/rejected": -29.73402976989746, - "step": 1625 - }, - { - "epoch": 0.61, - "learning_rate": 6.774082701473774e-06, - "logits/chosen": -5.417619705200195, - "logits/rejected": -0.9593294262886047, - "logps/chosen": -390.27947998046875, - "logps/rejected": -1528.3973388671875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.191558837890625, - "rewards/margins": 40.802520751953125, - "rewards/rejected": -40.6109619140625, - "step": 1626 - }, - { - "epoch": 0.62, - "learning_rate": 6.76279984521592e-06, - "logits/chosen": -2.660972833633423, - "logits/rejected": -0.3752053678035736, - "logps/chosen": -320.961669921875, - "logps/rejected": -965.6904907226562, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2980499267578125, - "rewards/margins": 34.23040008544922, - "rewards/rejected": -35.52845001220703, - "step": 1627 - }, - { - "epoch": 0.62, - "learning_rate": 6.751521590548986e-06, - "logits/chosen": -1.9251103401184082, - "logits/rejected": -2.3604204654693604, - "logps/chosen": -214.30621337890625, - "logps/rejected": -358.94580078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6547012329101562, - "rewards/margins": 12.121541023254395, - "rewards/rejected": -13.77624225616455, - "step": 1628 - }, - { - "epoch": 0.62, - "learning_rate": 6.740247953504697e-06, - "logits/chosen": -2.881946563720703, - "logits/rejected": -1.7865315675735474, - "logps/chosen": -469.994140625, - "logps/rejected": -838.2109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.28044435381889343, - "rewards/margins": 14.095001220703125, - "rewards/rejected": -14.375445365905762, - "step": 1629 - }, - { - "epoch": 0.62, - "learning_rate": 6.728978950108222e-06, - "logits/chosen": -6.343185901641846, - "logits/rejected": -0.13836582005023956, - "logps/chosen": -285.51849365234375, - "logps/rejected": -1615.0848388671875, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7556518912315369, - "rewards/margins": 28.296630859375, - "rewards/rejected": -29.052282333374023, - "step": 1630 - }, - { - "epoch": 0.62, - "learning_rate": 6.717714596378138e-06, - "logits/chosen": -5.846319198608398, - "logits/rejected": -2.0760202407836914, - "logps/chosen": -276.2255859375, - "logps/rejected": -1191.2342529296875, - "loss": 0.0023, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.421478271484375, - "rewards/margins": 7.927825927734375, - "rewards/rejected": -9.34930419921875, - "step": 1631 - }, - { - "epoch": 0.62, - "learning_rate": 6.706454908326414e-06, - "logits/chosen": -0.37354981899261475, - "logits/rejected": -2.18119478225708, - "logps/chosen": -383.34027099609375, - "logps/rejected": -698.9071044921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.672149658203125, - "rewards/margins": 38.28490447998047, - "rewards/rejected": -40.957054138183594, - "step": 1632 - }, - { - "epoch": 0.62, - "learning_rate": 6.695199901958386e-06, - "logits/chosen": -5.05733585357666, - "logits/rejected": -1.9443784952163696, - "logps/chosen": -231.24725341796875, - "logps/rejected": -1040.968017578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8944061994552612, - "rewards/margins": 42.539268493652344, - "rewards/rejected": -44.43367385864258, - "step": 1633 - }, - { - "epoch": 0.62, - "learning_rate": 6.683949593272734e-06, - "logits/chosen": -1.525512456893921, - "logits/rejected": -4.1160430908203125, - "logps/chosen": -461.4486389160156, - "logps/rejected": -665.0941162109375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3406524658203125, - "rewards/margins": 40.42951583862305, - "rewards/rejected": -40.77016830444336, - "step": 1634 - }, - { - "epoch": 0.62, - "learning_rate": 6.672703998261465e-06, - "logits/chosen": -2.258744716644287, - "logits/rejected": -2.0724124908447266, - "logps/chosen": -229.79183959960938, - "logps/rejected": -739.0274658203125, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.905651807785034, - "rewards/margins": 35.70095443725586, - "rewards/rejected": -39.606605529785156, - "step": 1635 - }, - { - "epoch": 0.62, - "learning_rate": 6.66146313290988e-06, - "logits/chosen": -0.2858636677265167, - "logits/rejected": -4.662731170654297, - "logps/chosen": -465.4156494140625, - "logps/rejected": -186.9219970703125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8428162336349487, - "rewards/margins": 9.744319915771484, - "rewards/rejected": -11.587136268615723, - "step": 1636 - }, - { - "epoch": 0.62, - "learning_rate": 6.650227013196564e-06, - "logits/chosen": -0.8936840891838074, - "logits/rejected": -2.924539804458618, - "logps/chosen": -384.6524658203125, - "logps/rejected": -375.93701171875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.24408875405788422, - "rewards/margins": 7.240329265594482, - "rewards/rejected": -7.484417915344238, - "step": 1637 - }, - { - "epoch": 0.62, - "learning_rate": 6.638995655093351e-06, - "logits/chosen": -2.5788557529449463, - "logits/rejected": 0.5530962944030762, - "logps/chosen": -717.2229614257812, - "logps/rejected": -1469.582275390625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -9.726409912109375, - "rewards/margins": 20.885040283203125, - "rewards/rejected": -30.6114501953125, - "step": 1638 - }, - { - "epoch": 0.62, - "learning_rate": 6.6277690745653044e-06, - "logits/chosen": -1.2873847484588623, - "logits/rejected": -4.520269393920898, - "logps/chosen": -200.77377319335938, - "logps/rejected": -279.40576171875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.7460572719573975, - "rewards/margins": 13.404308319091797, - "rewards/rejected": -17.150365829467773, - "step": 1639 - }, - { - "epoch": 0.62, - "learning_rate": 6.6165472875707005e-06, - "logits/chosen": -1.104099154472351, - "logits/rejected": -1.367127537727356, - "logps/chosen": -465.8834533691406, - "logps/rejected": -787.41748046875, - "loss": 0.0056, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.752829074859619, - "rewards/margins": 29.869569778442383, - "rewards/rejected": -34.622398376464844, - "step": 1640 - }, - { - "epoch": 0.62, - "learning_rate": 6.605330310061e-06, - "logits/chosen": -4.31076717376709, - "logits/rejected": -2.4973127841949463, - "logps/chosen": -163.83401489257812, - "logps/rejected": -381.114013671875, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7679460048675537, - "rewards/margins": 5.768010139465332, - "rewards/rejected": -8.535956382751465, - "step": 1641 - }, - { - "epoch": 0.62, - "learning_rate": 6.594118157980833e-06, - "logits/chosen": -2.0710978507995605, - "logits/rejected": -1.7742880582809448, - "logps/chosen": -970.9371337890625, - "logps/rejected": -1160.047607421875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -9.755993843078613, - "rewards/margins": 12.081500053405762, - "rewards/rejected": -21.837493896484375, - "step": 1642 - }, - { - "epoch": 0.62, - "learning_rate": 6.582910847267957e-06, - "logits/chosen": 0.30041995644569397, - "logits/rejected": -4.25231409072876, - "logps/chosen": -273.6934509277344, - "logps/rejected": -197.57113647460938, - "loss": 0.0469, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7348923683166504, - "rewards/margins": 7.738460063934326, - "rewards/rejected": -10.473352432250977, - "step": 1643 - }, - { - "epoch": 0.62, - "learning_rate": 6.57170839385326e-06, - "logits/chosen": -5.703388690948486, - "logits/rejected": -5.378255844116211, - "logps/chosen": -302.207763671875, - "logps/rejected": -900.282958984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5108703970909119, - "rewards/margins": 25.48308753967285, - "rewards/rejected": -25.99395751953125, - "step": 1644 - }, - { - "epoch": 0.62, - "learning_rate": 6.560510813660719e-06, - "logits/chosen": -0.5752767324447632, - "logits/rejected": -2.5579147338867188, - "logps/chosen": -216.8712921142578, - "logps/rejected": -511.866943359375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5307144522666931, - "rewards/margins": 27.277629852294922, - "rewards/rejected": -27.8083438873291, - "step": 1645 - }, - { - "epoch": 0.62, - "learning_rate": 6.549318122607389e-06, - "logits/chosen": -6.007167339324951, - "logits/rejected": -2.114438772201538, - "logps/chosen": -425.41009521484375, - "logps/rejected": -1089.08154296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6288909912109375, - "rewards/margins": 17.654281616210938, - "rewards/rejected": -18.283172607421875, - "step": 1646 - }, - { - "epoch": 0.62, - "learning_rate": 6.538130336603372e-06, - "logits/chosen": -6.6259331703186035, - "logits/rejected": -0.7111470103263855, - "logps/chosen": -628.308349609375, - "logps/rejected": -3378.43603515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8756959438323975, - "rewards/margins": 17.268688201904297, - "rewards/rejected": -20.144384384155273, - "step": 1647 - }, - { - "epoch": 0.62, - "learning_rate": 6.526947471551799e-06, - "logits/chosen": -0.7082353830337524, - "logits/rejected": -1.7111907005310059, - "logps/chosen": -440.1306457519531, - "logps/rejected": -458.8555908203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4242095947265625, - "rewards/margins": 10.95676326751709, - "rewards/rejected": -11.380972862243652, - "step": 1648 - }, - { - "epoch": 0.62, - "learning_rate": 6.5157695433488e-06, - "logits/chosen": -6.208556652069092, - "logits/rejected": -1.3524123430252075, - "logps/chosen": -324.49560546875, - "logps/rejected": -1157.1102294921875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6639556884765625, - "rewards/margins": 20.33894920349121, - "rewards/rejected": -21.002904891967773, - "step": 1649 - }, - { - "epoch": 0.62, - "learning_rate": 6.504596567883496e-06, - "logits/chosen": 0.3341713547706604, - "logits/rejected": -2.9971840381622314, - "logps/chosen": -283.3319396972656, - "logps/rejected": -530.6859130859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.20925597846508026, - "rewards/margins": 22.095035552978516, - "rewards/rejected": -22.304290771484375, - "step": 1650 - }, - { - "epoch": 0.62, - "learning_rate": 6.4934285610379624e-06, - "logits/chosen": -5.524227619171143, - "logits/rejected": -5.481115818023682, - "logps/chosen": -400.20068359375, - "logps/rejected": -797.3330078125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4068206548690796, - "rewards/margins": 16.437437057495117, - "rewards/rejected": -17.844257354736328, - "step": 1651 - }, - { - "epoch": 0.62, - "learning_rate": 6.482265538687217e-06, - "logits/chosen": -2.534317970275879, - "logits/rejected": -5.681800842285156, - "logps/chosen": -448.27471923828125, - "logps/rejected": -170.06509399414062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.5172119140625, - "rewards/margins": 15.973886489868164, - "rewards/rejected": -12.456674575805664, - "step": 1652 - }, - { - "epoch": 0.62, - "learning_rate": 6.471107516699183e-06, - "logits/chosen": -7.279799938201904, - "logits/rejected": -2.1103439331054688, - "logps/chosen": -199.7398681640625, - "logps/rejected": -4190.4228515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8526687622070312, - "rewards/margins": 27.04515838623047, - "rewards/rejected": -28.8978271484375, - "step": 1653 - }, - { - "epoch": 0.63, - "learning_rate": 6.459954510934684e-06, - "logits/chosen": -2.2792601585388184, - "logits/rejected": -4.837536811828613, - "logps/chosen": -697.7921752929688, - "logps/rejected": -232.38417053222656, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3226380348205566, - "rewards/margins": 13.475088119506836, - "rewards/rejected": -16.797725677490234, - "step": 1654 - }, - { - "epoch": 0.63, - "learning_rate": 6.448806537247412e-06, - "logits/chosen": -5.633952617645264, - "logits/rejected": -1.0834228992462158, - "logps/chosen": -277.7080993652344, - "logps/rejected": -1604.7076416015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.640402317047119, - "rewards/margins": 60.58203125, - "rewards/rejected": -64.2224349975586, - "step": 1655 - }, - { - "epoch": 0.63, - "learning_rate": 6.437663611483905e-06, - "logits/chosen": -2.9187839031219482, - "logits/rejected": -6.637794017791748, - "logps/chosen": -673.0725708007812, - "logps/rejected": -221.90249633789062, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.9403809309005737, - "rewards/margins": 13.994245529174805, - "rewards/rejected": -12.053864479064941, - "step": 1656 - }, - { - "epoch": 0.63, - "learning_rate": 6.426525749483526e-06, - "logits/chosen": -2.853909730911255, - "logits/rejected": -0.8621125817298889, - "logps/chosen": -372.85589599609375, - "logps/rejected": -953.0328369140625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.147220134735107, - "rewards/margins": 22.014266967773438, - "rewards/rejected": -26.161487579345703, - "step": 1657 - }, - { - "epoch": 0.63, - "learning_rate": 6.415392967078438e-06, - "logits/chosen": 0.10346759110689163, - "logits/rejected": -0.5095775723457336, - "logps/chosen": -288.09796142578125, - "logps/rejected": -476.52850341796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.042004346847534, - "rewards/margins": 16.14549446105957, - "rewards/rejected": -18.187498092651367, - "step": 1658 - }, - { - "epoch": 0.63, - "learning_rate": 6.404265280093581e-06, - "logits/chosen": -1.174609661102295, - "logits/rejected": -1.173991322517395, - "logps/chosen": -435.48663330078125, - "logps/rejected": -680.66064453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.3849945068359375, - "rewards/margins": 17.039127349853516, - "rewards/rejected": -14.654131889343262, - "step": 1659 - }, - { - "epoch": 0.63, - "learning_rate": 6.393142704346661e-06, - "logits/chosen": -1.5537201166152954, - "logits/rejected": -1.8978173732757568, - "logps/chosen": -476.4170837402344, - "logps/rejected": -842.4365234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.0350005626678467, - "rewards/margins": 24.661312103271484, - "rewards/rejected": -21.626312255859375, - "step": 1660 - }, - { - "epoch": 0.63, - "learning_rate": 6.382025255648112e-06, - "logits/chosen": -5.210333824157715, - "logits/rejected": -6.096562385559082, - "logps/chosen": -224.891845703125, - "logps/rejected": -803.820068359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0130035877227783, - "rewards/margins": 23.546859741210938, - "rewards/rejected": -24.559864044189453, - "step": 1661 - }, - { - "epoch": 0.63, - "learning_rate": 6.370912949801083e-06, - "logits/chosen": -1.8002431392669678, - "logits/rejected": -4.24111270904541, - "logps/chosen": -187.737060546875, - "logps/rejected": -167.95217895507812, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.432197570800781, - "rewards/margins": 6.676657676696777, - "rewards/rejected": -12.108855247497559, - "step": 1662 - }, - { - "epoch": 0.63, - "learning_rate": 6.3598058026013995e-06, - "logits/chosen": -2.99192214012146, - "logits/rejected": -5.2817487716674805, - "logps/chosen": -940.2864379882812, - "logps/rejected": -866.183837890625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6881897449493408, - "rewards/margins": 39.75848388671875, - "rewards/rejected": -38.07029342651367, - "step": 1663 - }, - { - "epoch": 0.63, - "learning_rate": 6.3487038298375836e-06, - "logits/chosen": -3.1404807567596436, - "logits/rejected": -3.9030239582061768, - "logps/chosen": -260.29119873046875, - "logps/rejected": -311.6163635253906, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5322097539901733, - "rewards/margins": 12.571346282958984, - "rewards/rejected": -14.103555679321289, - "step": 1664 - }, - { - "epoch": 0.63, - "learning_rate": 6.337607047290774e-06, - "logits/chosen": -2.0557639598846436, - "logits/rejected": -3.1567771434783936, - "logps/chosen": -145.25706481933594, - "logps/rejected": -241.37423706054688, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2646530866622925, - "rewards/margins": 9.160608291625977, - "rewards/rejected": -10.425261497497559, - "step": 1665 - }, - { - "epoch": 0.63, - "learning_rate": 6.326515470734741e-06, - "logits/chosen": -0.3121519386768341, - "logits/rejected": -2.2284302711486816, - "logps/chosen": -583.2218017578125, - "logps/rejected": -823.9343872070312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1509461402893066, - "rewards/margins": 19.407337188720703, - "rewards/rejected": -17.256391525268555, - "step": 1666 - }, - { - "epoch": 0.63, - "learning_rate": 6.315429115935858e-06, - "logits/chosen": -7.086632251739502, - "logits/rejected": -2.46244478225708, - "logps/chosen": -240.2942657470703, - "logps/rejected": -1999.4698486328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.21274109184741974, - "rewards/margins": 25.086143493652344, - "rewards/rejected": -24.873401641845703, - "step": 1667 - }, - { - "epoch": 0.63, - "learning_rate": 6.304347998653074e-06, - "logits/chosen": -5.66143798828125, - "logits/rejected": -2.467033624649048, - "logps/chosen": -223.59869384765625, - "logps/rejected": -936.5045166015625, - "loss": 0.006, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.883392333984375, - "rewards/margins": 35.94584274291992, - "rewards/rejected": -36.8292350769043, - "step": 1668 - }, - { - "epoch": 0.63, - "learning_rate": 6.293272134637889e-06, - "logits/chosen": -5.80294132232666, - "logits/rejected": -1.9347738027572632, - "logps/chosen": -474.152587890625, - "logps/rejected": -1543.9368896484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.782061815261841, - "rewards/margins": 35.61082077026367, - "rewards/rejected": -39.39288330078125, - "step": 1669 - }, - { - "epoch": 0.63, - "learning_rate": 6.282201539634341e-06, - "logits/chosen": -8.01689338684082, - "logits/rejected": -2.3350563049316406, - "logps/chosen": -228.18853759765625, - "logps/rejected": -1471.5599365234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.20810866355896, - "rewards/margins": 25.89040184020996, - "rewards/rejected": -28.0985107421875, - "step": 1670 - }, - { - "epoch": 0.63, - "learning_rate": 6.271136229378976e-06, - "logits/chosen": -8.202219009399414, - "logits/rejected": -1.8033329248428345, - "logps/chosen": -303.03863525390625, - "logps/rejected": -3767.005859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5424255728721619, - "rewards/margins": 34.21050262451172, - "rewards/rejected": -34.7529296875, - "step": 1671 - }, - { - "epoch": 0.63, - "learning_rate": 6.260076219600823e-06, - "logits/chosen": -3.8898277282714844, - "logits/rejected": -1.0196908712387085, - "logps/chosen": -306.6249694824219, - "logps/rejected": -716.656982421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.835653781890869, - "rewards/margins": 18.45062255859375, - "rewards/rejected": -24.28627586364746, - "step": 1672 - }, - { - "epoch": 0.63, - "learning_rate": 6.24902152602139e-06, - "logits/chosen": -6.264642715454102, - "logits/rejected": -3.146395444869995, - "logps/chosen": -241.04527282714844, - "logps/rejected": -1180.294921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4404067993164062, - "rewards/margins": 23.50066375732422, - "rewards/rejected": -26.941070556640625, - "step": 1673 - }, - { - "epoch": 0.63, - "learning_rate": 6.237972164354617e-06, - "logits/chosen": -5.356686115264893, - "logits/rejected": -2.455963611602783, - "logps/chosen": -267.02911376953125, - "logps/rejected": -1273.327880859375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1732513904571533, - "rewards/margins": 23.978769302368164, - "rewards/rejected": -21.805517196655273, - "step": 1674 - }, - { - "epoch": 0.63, - "learning_rate": 6.226928150306866e-06, - "logits/chosen": -7.655035972595215, - "logits/rejected": -1.9131919145584106, - "logps/chosen": -375.0463562011719, - "logps/rejected": -2473.80419921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.468942254781723, - "rewards/margins": 14.996536254882812, - "rewards/rejected": -15.465478897094727, - "step": 1675 - }, - { - "epoch": 0.63, - "learning_rate": 6.215889499576898e-06, - "logits/chosen": -3.761094570159912, - "logits/rejected": -6.508837699890137, - "logps/chosen": -361.6702880859375, - "logps/rejected": -489.43768310546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.912182629108429, - "rewards/margins": 28.76152229309082, - "rewards/rejected": -27.849340438842773, - "step": 1676 - }, - { - "epoch": 0.63, - "learning_rate": 6.204856227855853e-06, - "logits/chosen": -2.6162524223327637, - "logits/rejected": -1.212105393409729, - "logps/chosen": -503.5686340332031, - "logps/rejected": -841.8724365234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.455963134765625, - "rewards/margins": 10.286114692687988, - "rewards/rejected": -12.742077827453613, - "step": 1677 - }, - { - "epoch": 0.63, - "learning_rate": 6.193828350827222e-06, - "logits/chosen": -6.4068522453308105, - "logits/rejected": -0.8030967712402344, - "logps/chosen": -555.3776245117188, - "logps/rejected": -3761.210693359375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3688903748989105, - "rewards/margins": 51.67625427246094, - "rewards/rejected": -52.045143127441406, - "step": 1678 - }, - { - "epoch": 0.63, - "learning_rate": 6.18280588416683e-06, - "logits/chosen": -1.069506287574768, - "logits/rejected": -2.680110216140747, - "logps/chosen": -355.2396240234375, - "logps/rejected": -419.91497802734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.13221131265163422, - "rewards/margins": 13.195779800415039, - "rewards/rejected": -13.063568115234375, - "step": 1679 - }, - { - "epoch": 0.64, - "learning_rate": 6.171788843542809e-06, - "logits/chosen": -1.9981764554977417, - "logits/rejected": -0.5432388186454773, - "logps/chosen": -341.6300964355469, - "logps/rejected": -865.544921875, - "loss": 0.0193, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.28885194659233093, - "rewards/margins": 27.993776321411133, - "rewards/rejected": -28.282629013061523, - "step": 1680 - }, - { - "epoch": 0.64, - "learning_rate": 6.160777244615578e-06, - "logits/chosen": -1.1761738061904907, - "logits/rejected": -6.279900550842285, - "logps/chosen": -592.1866455078125, - "logps/rejected": -314.9005126953125, - "loss": 0.0016, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.235302686691284, - "rewards/margins": 19.920225143432617, - "rewards/rejected": -16.68492317199707, - "step": 1681 - }, - { - "epoch": 0.64, - "learning_rate": 6.149771103037821e-06, - "logits/chosen": -2.2046091556549072, - "logits/rejected": -0.866398811340332, - "logps/chosen": -396.24346923828125, - "logps/rejected": -895.4492797851562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.24393311142921448, - "rewards/margins": 25.634063720703125, - "rewards/rejected": -25.3901309967041, - "step": 1682 - }, - { - "epoch": 0.64, - "learning_rate": 6.1387704344544684e-06, - "logits/chosen": -1.545455813407898, - "logits/rejected": -2.0006535053253174, - "logps/chosen": -388.6431579589844, - "logps/rejected": -776.688720703125, - "loss": 0.0041, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3201080560684204, - "rewards/margins": 38.14446258544922, - "rewards/rejected": -36.82435607910156, - "step": 1683 - }, - { - "epoch": 0.64, - "learning_rate": 6.127775254502668e-06, - "logits/chosen": -1.8607299327850342, - "logits/rejected": -2.9925074577331543, - "logps/chosen": -226.62562561035156, - "logps/rejected": -513.4922485351562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.29925233125686646, - "rewards/margins": 24.389476776123047, - "rewards/rejected": -24.090225219726562, - "step": 1684 - }, - { - "epoch": 0.64, - "learning_rate": 6.1167855788117655e-06, - "logits/chosen": -5.781004428863525, - "logits/rejected": 0.41365060210227966, - "logps/chosen": -339.3544921875, - "logps/rejected": -1948.8927001953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.639788806438446, - "rewards/margins": 37.60913848876953, - "rewards/rejected": -38.24892807006836, - "step": 1685 - }, - { - "epoch": 0.64, - "learning_rate": 6.1058014230032795e-06, - "logits/chosen": -1.0489879846572876, - "logits/rejected": -3.744781255722046, - "logps/chosen": -385.031005859375, - "logps/rejected": -231.09512329101562, - "loss": 0.0263, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.89715576171875, - "rewards/margins": 1.4526491165161133, - "rewards/rejected": -7.349804878234863, - "step": 1686 - }, - { - "epoch": 0.64, - "learning_rate": 6.094822802690886e-06, - "logits/chosen": -1.4040993452072144, - "logits/rejected": -1.21030855178833, - "logps/chosen": -412.24359130859375, - "logps/rejected": -1054.7674560546875, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4352783262729645, - "rewards/margins": 43.740577697753906, - "rewards/rejected": -43.3052978515625, - "step": 1687 - }, - { - "epoch": 0.64, - "learning_rate": 6.083849733480394e-06, - "logits/chosen": -2.275726795196533, - "logits/rejected": -5.053549766540527, - "logps/chosen": -862.6427001953125, - "logps/rejected": -1050.68505859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0736451148986816, - "rewards/margins": 50.79206848144531, - "rewards/rejected": -48.718421936035156, - "step": 1688 - }, - { - "epoch": 0.64, - "learning_rate": 6.072882230969716e-06, - "logits/chosen": -4.568892478942871, - "logits/rejected": -0.4402199983596802, - "logps/chosen": -987.4176025390625, - "logps/rejected": -2693.52001953125, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.105560302734375, - "rewards/margins": 44.13124465942383, - "rewards/rejected": -44.02568435668945, - "step": 1689 - }, - { - "epoch": 0.64, - "learning_rate": 6.061920310748858e-06, - "logits/chosen": -1.073793888092041, - "logits/rejected": -4.849191188812256, - "logps/chosen": -329.5784912109375, - "logps/rejected": -324.56719970703125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5382202863693237, - "rewards/margins": 19.409624099731445, - "rewards/rejected": -20.947843551635742, - "step": 1690 - }, - { - "epoch": 0.64, - "learning_rate": 6.050963988399883e-06, - "logits/chosen": -7.334988117218018, - "logits/rejected": -6.274346351623535, - "logps/chosen": -399.7458801269531, - "logps/rejected": -1428.3427734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1978912353515625, - "rewards/margins": 24.494155883789062, - "rewards/rejected": -24.2962646484375, - "step": 1691 - }, - { - "epoch": 0.64, - "learning_rate": 6.040013279496908e-06, - "logits/chosen": -2.991533041000366, - "logits/rejected": -1.1843831539154053, - "logps/chosen": -404.7762451171875, - "logps/rejected": -1066.1328125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.8052124977111816, - "rewards/margins": 21.41449546813965, - "rewards/rejected": -18.609283447265625, - "step": 1692 - }, - { - "epoch": 0.64, - "learning_rate": 6.0290681996060605e-06, - "logits/chosen": -2.343977212905884, - "logits/rejected": -2.6992716789245605, - "logps/chosen": -287.0457763671875, - "logps/rejected": -338.4598083496094, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -8.10023021697998, - "rewards/margins": 6.112889289855957, - "rewards/rejected": -14.213119506835938, - "step": 1693 - }, - { - "epoch": 0.64, - "learning_rate": 6.018128764285471e-06, - "logits/chosen": -0.0028490868862718344, - "logits/rejected": -5.134888172149658, - "logps/chosen": -403.8492126464844, - "logps/rejected": -126.13229370117188, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.157501220703125, - "rewards/margins": 7.868681907653809, - "rewards/rejected": -9.026183128356934, - "step": 1694 - }, - { - "epoch": 0.64, - "learning_rate": 6.007194989085247e-06, - "logits/chosen": -0.9599739909172058, - "logits/rejected": -0.6083469390869141, - "logps/chosen": -330.70721435546875, - "logps/rejected": -668.64501953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.377691745758057, - "rewards/margins": 32.09558868408203, - "rewards/rejected": -36.47328186035156, - "step": 1695 - }, - { - "epoch": 0.64, - "learning_rate": 5.9962668895474486e-06, - "logits/chosen": -1.7451354265213013, - "logits/rejected": -7.366077423095703, - "logps/chosen": -616.0083618164062, - "logps/rejected": -220.5442352294922, - "loss": 0.0615, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.169219970703125, - "rewards/margins": 15.488226890563965, - "rewards/rejected": -15.31900691986084, - "step": 1696 - }, - { - "epoch": 0.64, - "learning_rate": 5.985344481206067e-06, - "logits/chosen": -1.00394868850708, - "logits/rejected": -0.9631649255752563, - "logps/chosen": -274.3924865722656, - "logps/rejected": -457.3146667480469, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.804516553878784, - "rewards/margins": 14.379733085632324, - "rewards/rejected": -17.184249877929688, - "step": 1697 - }, - { - "epoch": 0.64, - "learning_rate": 5.974427779587004e-06, - "logits/chosen": -3.9446239471435547, - "logits/rejected": -4.317941665649414, - "logps/chosen": -232.7604522705078, - "logps/rejected": -248.7587890625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2263672351837158, - "rewards/margins": 5.965295791625977, - "rewards/rejected": -7.191662788391113, - "step": 1698 - }, - { - "epoch": 0.64, - "learning_rate": 5.963516800208056e-06, - "logits/chosen": -1.4661762714385986, - "logits/rejected": -1.0581659078598022, - "logps/chosen": -285.5640869140625, - "logps/rejected": -531.5936279296875, - "loss": 0.0208, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9321869611740112, - "rewards/margins": 14.879288673400879, - "rewards/rejected": -16.81147575378418, - "step": 1699 - }, - { - "epoch": 0.64, - "learning_rate": 5.952611558578871e-06, - "logits/chosen": -1.8784122467041016, - "logits/rejected": -3.3402936458587646, - "logps/chosen": -297.2208557128906, - "logps/rejected": -447.7178955078125, - "loss": 0.0033, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7326934933662415, - "rewards/margins": 20.425823211669922, - "rewards/rejected": -21.15851593017578, - "step": 1700 - }, - { - "epoch": 0.64, - "learning_rate": 5.9417120702009604e-06, - "logits/chosen": -0.8128055334091187, - "logits/rejected": -3.9373295307159424, - "logps/chosen": -257.2005920410156, - "logps/rejected": -402.0806884765625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6029205322265625, - "rewards/margins": 22.334598541259766, - "rewards/rejected": -20.731678009033203, - "step": 1701 - }, - { - "epoch": 0.64, - "learning_rate": 5.930818350567639e-06, - "logits/chosen": 0.18074460327625275, - "logits/rejected": -3.1597301959991455, - "logps/chosen": -306.119384765625, - "logps/rejected": -338.4993591308594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.08192777633667, - "rewards/margins": 10.850471496582031, - "rewards/rejected": -14.932398796081543, - "step": 1702 - }, - { - "epoch": 0.64, - "learning_rate": 5.919930415164033e-06, - "logits/chosen": -3.108246088027954, - "logits/rejected": -2.5636677742004395, - "logps/chosen": -295.7674255371094, - "logps/rejected": -788.4189453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5995635986328125, - "rewards/margins": 29.998870849609375, - "rewards/rejected": -31.598434448242188, - "step": 1703 - }, - { - "epoch": 0.64, - "learning_rate": 5.90904827946704e-06, - "logits/chosen": -0.3066612780094147, - "logits/rejected": -4.315275192260742, - "logps/chosen": -570.4750366210938, - "logps/rejected": -306.8831481933594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09490356594324112, - "rewards/margins": 14.708466529846191, - "rewards/rejected": -14.61356258392334, - "step": 1704 - }, - { - "epoch": 0.64, - "learning_rate": 5.898171958945322e-06, - "logits/chosen": -3.4993770122528076, - "logits/rejected": -0.6863272786140442, - "logps/chosen": -257.6785888671875, - "logps/rejected": -974.980224609375, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2246856689453125, - "rewards/margins": 27.48769187927246, - "rewards/rejected": -28.712377548217773, - "step": 1705 - }, - { - "epoch": 0.64, - "learning_rate": 5.887301469059262e-06, - "logits/chosen": -4.546056270599365, - "logits/rejected": -1.5830382108688354, - "logps/chosen": -507.8575439453125, - "logps/rejected": -1004.9620971679688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3851380348205566, - "rewards/margins": 15.234283447265625, - "rewards/rejected": -17.619421005249023, - "step": 1706 - }, - { - "epoch": 0.65, - "learning_rate": 5.876436825260967e-06, - "logits/chosen": -1.835451602935791, - "logits/rejected": -4.931764125823975, - "logps/chosen": -781.8119506835938, - "logps/rejected": -811.8702392578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.053192138671875, - "rewards/margins": 40.4068489074707, - "rewards/rejected": -40.46004104614258, - "step": 1707 - }, - { - "epoch": 0.65, - "learning_rate": 5.865578042994227e-06, - "logits/chosen": -1.0297907590866089, - "logits/rejected": -2.582143783569336, - "logps/chosen": -357.3160400390625, - "logps/rejected": -589.9931030273438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1854493618011475, - "rewards/margins": 24.67983055114746, - "rewards/rejected": -26.865280151367188, - "step": 1708 - }, - { - "epoch": 0.65, - "learning_rate": 5.854725137694501e-06, - "logits/chosen": -1.6184178590774536, - "logits/rejected": -3.2559609413146973, - "logps/chosen": -433.74249267578125, - "logps/rejected": -691.656005859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7029541730880737, - "rewards/margins": 36.04189682006836, - "rewards/rejected": -34.33894348144531, - "step": 1709 - }, - { - "epoch": 0.65, - "learning_rate": 5.8438781247889e-06, - "logits/chosen": -6.859554767608643, - "logits/rejected": -3.4363186359405518, - "logps/chosen": -295.6103515625, - "logps/rejected": -2593.1171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4997802674770355, - "rewards/margins": 55.19601821899414, - "rewards/rejected": -54.69623947143555, - "step": 1710 - }, - { - "epoch": 0.65, - "learning_rate": 5.8330370196961506e-06, - "logits/chosen": -3.058332681655884, - "logits/rejected": -0.1997569054365158, - "logps/chosen": -335.8019714355469, - "logps/rejected": -1425.251220703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7452300786972046, - "rewards/margins": 51.35784912109375, - "rewards/rejected": -49.61261749267578, - "step": 1711 - }, - { - "epoch": 0.65, - "learning_rate": 5.822201837826589e-06, - "logits/chosen": -6.536210536956787, - "logits/rejected": -2.411839008331299, - "logps/chosen": -196.51077270507812, - "logps/rejected": -968.1102294921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.989776611328125, - "rewards/margins": 26.997394561767578, - "rewards/rejected": -29.987171173095703, - "step": 1712 - }, - { - "epoch": 0.65, - "learning_rate": 5.8113725945821245e-06, - "logits/chosen": -1.307266354560852, - "logits/rejected": -5.7484893798828125, - "logps/chosen": -530.3692016601562, - "logps/rejected": -192.31813049316406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.576940894126892, - "rewards/margins": 12.947972297668457, - "rewards/rejected": -11.371031761169434, - "step": 1713 - }, - { - "epoch": 0.65, - "learning_rate": 5.800549305356224e-06, - "logits/chosen": -0.6616543531417847, - "logits/rejected": -0.6543686985969543, - "logps/chosen": -265.93572998046875, - "logps/rejected": -557.85986328125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.37189027667045593, - "rewards/margins": 21.9896297454834, - "rewards/rejected": -22.361520767211914, - "step": 1714 - }, - { - "epoch": 0.65, - "learning_rate": 5.789731985533906e-06, - "logits/chosen": -7.6308722496032715, - "logits/rejected": -1.2498801946640015, - "logps/chosen": -326.3628845214844, - "logps/rejected": -2569.412109375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.754809558391571, - "rewards/margins": 51.87331771850586, - "rewards/rejected": -51.118507385253906, - "step": 1715 - }, - { - "epoch": 0.65, - "learning_rate": 5.7789206504916815e-06, - "logits/chosen": -2.411745071411133, - "logits/rejected": -4.358522415161133, - "logps/chosen": -311.3616943359375, - "logps/rejected": -277.8157958984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7954071164131165, - "rewards/margins": 10.471548080444336, - "rewards/rejected": -11.266955375671387, - "step": 1716 - }, - { - "epoch": 0.65, - "learning_rate": 5.7681153155975755e-06, - "logits/chosen": 0.18474145233631134, - "logits/rejected": -1.9429086446762085, - "logps/chosen": -432.1939697265625, - "logps/rejected": -499.954833984375, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9763519763946533, - "rewards/margins": 24.279705047607422, - "rewards/rejected": -26.256057739257812, - "step": 1717 - }, - { - "epoch": 0.65, - "learning_rate": 5.757315996211066e-06, - "logits/chosen": 0.5864960551261902, - "logits/rejected": -4.0330491065979, - "logps/chosen": -248.7578125, - "logps/rejected": -211.5276336669922, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.47186279296875, - "rewards/margins": 10.642976760864258, - "rewards/rejected": -12.114839553833008, - "step": 1718 - }, - { - "epoch": 0.65, - "learning_rate": 5.746522707683093e-06, - "logits/chosen": -3.5051653385162354, - "logits/rejected": -1.6517860889434814, - "logps/chosen": -366.8246154785156, - "logps/rejected": -1142.8212890625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2273805141448975, - "rewards/margins": 43.50662612915039, - "rewards/rejected": -45.734004974365234, - "step": 1719 - }, - { - "epoch": 0.65, - "learning_rate": 5.735735465356021e-06, - "logits/chosen": -2.441168785095215, - "logits/rejected": -2.3100473880767822, - "logps/chosen": -234.72665405273438, - "logps/rejected": -622.5770263671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.191502332687378, - "rewards/margins": 29.617055892944336, - "rewards/rejected": -31.808557510375977, - "step": 1720 - }, - { - "epoch": 0.65, - "learning_rate": 5.724954284563615e-06, - "logits/chosen": -7.179348945617676, - "logits/rejected": -0.970470666885376, - "logps/chosen": -410.2271423339844, - "logps/rejected": -3803.892578125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6142669916152954, - "rewards/margins": 53.509578704833984, - "rewards/rejected": -52.89531326293945, - "step": 1721 - }, - { - "epoch": 0.65, - "learning_rate": 5.714179180631024e-06, - "logits/chosen": -6.4677886962890625, - "logits/rejected": -0.7362509965896606, - "logps/chosen": -931.3265991210938, - "logps/rejected": -3323.78955078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.906451404094696, - "rewards/margins": 27.35431480407715, - "rewards/rejected": -28.260766983032227, - "step": 1722 - }, - { - "epoch": 0.65, - "learning_rate": 5.703410168874768e-06, - "logits/chosen": -0.23703859746456146, - "logits/rejected": -1.1847056150436401, - "logps/chosen": -379.09942626953125, - "logps/rejected": -537.1062622070312, - "loss": 0.015, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.104034423828125, - "rewards/margins": 15.9835844039917, - "rewards/rejected": -15.879549980163574, - "step": 1723 - }, - { - "epoch": 0.65, - "learning_rate": 5.692647264602698e-06, - "logits/chosen": -0.2667602598667145, - "logits/rejected": -3.766649007797241, - "logps/chosen": -335.6988830566406, - "logps/rejected": -188.14730834960938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.092855930328369, - "rewards/margins": 13.228479385375977, - "rewards/rejected": -11.13562297821045, - "step": 1724 - }, - { - "epoch": 0.65, - "learning_rate": 5.681890483113982e-06, - "logits/chosen": -3.4023566246032715, - "logits/rejected": -1.8346593379974365, - "logps/chosen": -604.078857421875, - "logps/rejected": -1289.21435546875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0407837629318237, - "rewards/margins": 27.022340774536133, - "rewards/rejected": -28.06312370300293, - "step": 1725 - }, - { - "epoch": 0.65, - "learning_rate": 5.671139839699096e-06, - "logits/chosen": 0.44624409079551697, - "logits/rejected": -4.859653949737549, - "logps/chosen": -263.59429931640625, - "logps/rejected": -188.28909301757812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8987823724746704, - "rewards/margins": 9.87797737121582, - "rewards/rejected": -11.77676010131836, - "step": 1726 - }, - { - "epoch": 0.65, - "learning_rate": 5.660395349639776e-06, - "logits/chosen": -0.5397381782531738, - "logits/rejected": -0.8799055218696594, - "logps/chosen": -284.9207763671875, - "logps/rejected": -554.6848754882812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.062023878097534, - "rewards/margins": 18.976966857910156, - "rewards/rejected": -22.038990020751953, - "step": 1727 - }, - { - "epoch": 0.65, - "learning_rate": 5.649657028209024e-06, - "logits/chosen": -3.0067214965820312, - "logits/rejected": -2.9504787921905518, - "logps/chosen": -395.7308349609375, - "logps/rejected": -769.062744140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9670045375823975, - "rewards/margins": 20.439661026000977, - "rewards/rejected": -24.406665802001953, - "step": 1728 - }, - { - "epoch": 0.65, - "learning_rate": 5.638924890671069e-06, - "logits/chosen": -5.3224639892578125, - "logits/rejected": -1.8006831407546997, - "logps/chosen": -341.6611633300781, - "logps/rejected": -853.1162109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.09031067043542862, - "rewards/margins": 12.080692291259766, - "rewards/rejected": -11.990381240844727, - "step": 1729 - }, - { - "epoch": 0.65, - "learning_rate": 5.62819895228135e-06, - "logits/chosen": 0.9985063672065735, - "logits/rejected": -0.37402015924453735, - "logps/chosen": -343.7839050292969, - "logps/rejected": -598.6947021484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1597352027893066, - "rewards/margins": 21.8049373626709, - "rewards/rejected": -19.64520263671875, - "step": 1730 - }, - { - "epoch": 0.65, - "learning_rate": 5.6174792282864865e-06, - "logits/chosen": -0.48314809799194336, - "logits/rejected": -2.3949620723724365, - "logps/chosen": -383.0843505859375, - "logps/rejected": -523.5531005859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.232336401939392, - "rewards/margins": 24.044639587402344, - "rewards/rejected": -25.276975631713867, - "step": 1731 - }, - { - "epoch": 0.65, - "learning_rate": 5.6067657339242785e-06, - "logits/chosen": 0.4975649118423462, - "logits/rejected": -2.515326976776123, - "logps/chosen": -404.922607421875, - "logps/rejected": -653.9204711914062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6218658685684204, - "rewards/margins": 28.046457290649414, - "rewards/rejected": -28.668323516845703, - "step": 1732 - }, - { - "epoch": 0.66, - "learning_rate": 5.5960584844236565e-06, - "logits/chosen": -2.754732370376587, - "logits/rejected": -0.6402379274368286, - "logps/chosen": -341.06591796875, - "logps/rejected": -1398.905517578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.53863525390625, - "rewards/margins": 71.5140609741211, - "rewards/rejected": -68.97542572021484, - "step": 1733 - }, - { - "epoch": 0.66, - "learning_rate": 5.585357495004689e-06, - "logits/chosen": -0.8237720727920532, - "logits/rejected": -1.0436326265335083, - "logps/chosen": -296.6575622558594, - "logps/rejected": -582.94140625, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7404999136924744, - "rewards/margins": 20.05325698852539, - "rewards/rejected": -20.79375648498535, - "step": 1734 - }, - { - "epoch": 0.66, - "learning_rate": 5.574662780878533e-06, - "logits/chosen": -6.23117733001709, - "logits/rejected": -1.9355411529541016, - "logps/chosen": -424.1733703613281, - "logps/rejected": -1782.2437744140625, - "loss": 0.0469, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9764038324356079, - "rewards/margins": 23.668041229248047, - "rewards/rejected": -24.644445419311523, - "step": 1735 - }, - { - "epoch": 0.66, - "learning_rate": 5.563974357247428e-06, - "logits/chosen": -1.3437131643295288, - "logits/rejected": -3.978137731552124, - "logps/chosen": -372.833984375, - "logps/rejected": -364.8005065917969, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.49827271699905396, - "rewards/margins": 14.812323570251465, - "rewards/rejected": -14.314050674438477, - "step": 1736 - }, - { - "epoch": 0.66, - "learning_rate": 5.553292239304675e-06, - "logits/chosen": -2.4785349369049072, - "logits/rejected": -5.2335638999938965, - "logps/chosen": -279.3006896972656, - "logps/rejected": -338.93463134765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.60137939453125, - "rewards/margins": 25.427404403686523, - "rewards/rejected": -21.826025009155273, - "step": 1737 - }, - { - "epoch": 0.66, - "learning_rate": 5.542616442234618e-06, - "logits/chosen": -3.6810057163238525, - "logits/rejected": 0.4394681751728058, - "logps/chosen": -389.0955810546875, - "logps/rejected": -1041.433837890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.105569362640381, - "rewards/margins": 13.92959976196289, - "rewards/rejected": -19.03516960144043, - "step": 1738 - }, - { - "epoch": 0.66, - "learning_rate": 5.531946981212599e-06, - "logits/chosen": -3.677375316619873, - "logits/rejected": -0.17553775012493134, - "logps/chosen": -473.9128112792969, - "logps/rejected": -1151.04931640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.433786004781723, - "rewards/margins": 14.476602554321289, - "rewards/rejected": -14.042816162109375, - "step": 1739 - }, - { - "epoch": 0.66, - "learning_rate": 5.521283871404972e-06, - "logits/chosen": -2.7584104537963867, - "logits/rejected": -0.11598027497529984, - "logps/chosen": -217.9664306640625, - "logps/rejected": -529.931884765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.9028611183166504, - "rewards/margins": 17.009572982788086, - "rewards/rejected": -19.912433624267578, - "step": 1740 - }, - { - "epoch": 0.66, - "learning_rate": 5.510627127969048e-06, - "logits/chosen": -0.8263119459152222, - "logits/rejected": -2.5995640754699707, - "logps/chosen": -226.56475830078125, - "logps/rejected": -395.8642578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5159530639648438, - "rewards/margins": 22.186288833618164, - "rewards/rejected": -22.702241897583008, - "step": 1741 - }, - { - "epoch": 0.66, - "learning_rate": 5.4999767660530925e-06, - "logits/chosen": -1.9351311922073364, - "logits/rejected": -5.077252388000488, - "logps/chosen": -385.41595458984375, - "logps/rejected": -454.62298583984375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.951641798019409, - "rewards/margins": 24.204790115356445, - "rewards/rejected": -28.156431198120117, - "step": 1742 - }, - { - "epoch": 0.66, - "learning_rate": 5.48933280079631e-06, - "logits/chosen": -5.69416618347168, - "logits/rejected": -2.7345945835113525, - "logps/chosen": -778.54541015625, - "logps/rejected": -2828.121337890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.14365234971046448, - "rewards/margins": 36.92961502075195, - "rewards/rejected": -36.78596115112305, - "step": 1743 - }, - { - "epoch": 0.66, - "learning_rate": 5.478695247328798e-06, - "logits/chosen": -1.5393218994140625, - "logits/rejected": -5.081031799316406, - "logps/chosen": -318.43414306640625, - "logps/rejected": -638.7138671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8223541975021362, - "rewards/margins": 41.932960510253906, - "rewards/rejected": -40.1106071472168, - "step": 1744 - }, - { - "epoch": 0.66, - "learning_rate": 5.468064120771544e-06, - "logits/chosen": -2.0533344745635986, - "logits/rejected": -1.652483582496643, - "logps/chosen": -334.259033203125, - "logps/rejected": -765.9406127929688, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.5858826637268066, - "rewards/margins": 25.989709854125977, - "rewards/rejected": -29.575592041015625, - "step": 1745 - }, - { - "epoch": 0.66, - "learning_rate": 5.457439436236407e-06, - "logits/chosen": -3.6500396728515625, - "logits/rejected": -0.5006041526794434, - "logps/chosen": -924.813232421875, - "logps/rejected": -3049.484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.251983642578125, - "rewards/margins": 70.21269226074219, - "rewards/rejected": -72.46467590332031, - "step": 1746 - }, - { - "epoch": 0.66, - "learning_rate": 5.446821208826083e-06, - "logits/chosen": -1.6999647617340088, - "logits/rejected": -3.1266558170318604, - "logps/chosen": -433.2916259765625, - "logps/rejected": -628.2191162109375, - "loss": 0.1835, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.611398458480835, - "rewards/margins": 18.220367431640625, - "rewards/rejected": -15.608969688415527, - "step": 1747 - }, - { - "epoch": 0.66, - "learning_rate": 5.436209453634087e-06, - "logits/chosen": -1.0648598670959473, - "logits/rejected": -2.1760690212249756, - "logps/chosen": -379.2437438964844, - "logps/rejected": -433.4465637207031, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.462533563375473, - "rewards/margins": 21.638391494750977, - "rewards/rejected": -22.10092544555664, - "step": 1748 - }, - { - "epoch": 0.66, - "learning_rate": 5.4256041857447415e-06, - "logits/chosen": -4.118107318878174, - "logits/rejected": -1.4412983655929565, - "logps/chosen": -347.444091796875, - "logps/rejected": -1024.676513671875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.33584901690483093, - "rewards/margins": 30.33928108215332, - "rewards/rejected": -30.00343132019043, - "step": 1749 - }, - { - "epoch": 0.66, - "learning_rate": 5.415005420233141e-06, - "logits/chosen": -4.577075004577637, - "logits/rejected": 0.14357320964336395, - "logps/chosen": -521.1715087890625, - "logps/rejected": -1875.2530517578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2854737043380737, - "rewards/margins": 60.5932502746582, - "rewards/rejected": -61.87872314453125, - "step": 1750 - }, - { - "epoch": 0.66, - "learning_rate": 5.404413172165133e-06, - "logits/chosen": -2.672370433807373, - "logits/rejected": -3.145216703414917, - "logps/chosen": -304.58935546875, - "logps/rejected": -461.259521484375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7185088992118835, - "rewards/margins": 16.71059799194336, - "rewards/rejected": -17.429107666015625, - "step": 1751 - }, - { - "epoch": 0.66, - "learning_rate": 5.393827456597317e-06, - "logits/chosen": -4.497753143310547, - "logits/rejected": -0.9643060564994812, - "logps/chosen": -563.3421630859375, - "logps/rejected": -1837.3809814453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.776727318763733, - "rewards/margins": 34.26161575317383, - "rewards/rejected": -36.0383415222168, - "step": 1752 - }, - { - "epoch": 0.66, - "learning_rate": 5.3832482885769855e-06, - "logits/chosen": -2.327847480773926, - "logits/rejected": -0.4126969277858734, - "logps/chosen": -298.5181884765625, - "logps/rejected": -846.773681640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8122100830078125, - "rewards/margins": 29.25453758239746, - "rewards/rejected": -31.066747665405273, - "step": 1753 - }, - { - "epoch": 0.66, - "learning_rate": 5.372675683142146e-06, - "logits/chosen": -3.030263662338257, - "logits/rejected": -2.033998966217041, - "logps/chosen": -261.2111511230469, - "logps/rejected": -727.0556640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6086700558662415, - "rewards/margins": 20.90117835998535, - "rewards/rejected": -21.50984764099121, - "step": 1754 - }, - { - "epoch": 0.66, - "learning_rate": 5.362109655321455e-06, - "logits/chosen": -1.2057136297225952, - "logits/rejected": -2.2413790225982666, - "logps/chosen": -258.9244384765625, - "logps/rejected": -517.4808349609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6581695675849915, - "rewards/margins": 25.336158752441406, - "rewards/rejected": -25.994327545166016, - "step": 1755 - }, - { - "epoch": 0.66, - "learning_rate": 5.35155022013424e-06, - "logits/chosen": -6.6896281242370605, - "logits/rejected": -3.9578843116760254, - "logps/chosen": -318.1531982421875, - "logps/rejected": -1316.64306640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4443604946136475, - "rewards/margins": 18.496484756469727, - "rewards/rejected": -21.940845489501953, - "step": 1756 - }, - { - "epoch": 0.66, - "learning_rate": 5.340997392590439e-06, - "logits/chosen": -4.764989376068115, - "logits/rejected": -2.2431788444519043, - "logps/chosen": -338.6490478515625, - "logps/rejected": -745.08642578125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.296838402748108, - "rewards/margins": 17.081995010375977, - "rewards/rejected": -18.378833770751953, - "step": 1757 - }, - { - "epoch": 0.66, - "learning_rate": 5.330451187690614e-06, - "logits/chosen": -3.9807426929473877, - "logits/rejected": -0.8604978322982788, - "logps/chosen": -222.23764038085938, - "logps/rejected": -901.5648193359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1089019775390625, - "rewards/margins": 29.641891479492188, - "rewards/rejected": -31.75079345703125, - "step": 1758 - }, - { - "epoch": 0.67, - "learning_rate": 5.319911620425897e-06, - "logits/chosen": -1.8099936246871948, - "logits/rejected": -4.163671016693115, - "logps/chosen": -310.73907470703125, - "logps/rejected": -290.95721435546875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.36694031953811646, - "rewards/margins": 7.344932556152344, - "rewards/rejected": -7.7118730545043945, - "step": 1759 - }, - { - "epoch": 0.67, - "learning_rate": 5.309378705778e-06, - "logits/chosen": -5.257638454437256, - "logits/rejected": -1.233153223991394, - "logps/chosen": -253.22914123535156, - "logps/rejected": -1387.712890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5796005725860596, - "rewards/margins": 45.5827751159668, - "rewards/rejected": -44.003173828125, - "step": 1760 - }, - { - "epoch": 0.67, - "learning_rate": 5.298852458719168e-06, - "logits/chosen": -8.103473663330078, - "logits/rejected": -1.4661288261413574, - "logps/chosen": -290.53924560546875, - "logps/rejected": -1876.9315185546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5095428824424744, - "rewards/margins": 36.9654426574707, - "rewards/rejected": -36.45589828491211, - "step": 1761 - }, - { - "epoch": 0.67, - "learning_rate": 5.288332894212168e-06, - "logits/chosen": -4.3305840492248535, - "logits/rejected": -0.5076600313186646, - "logps/chosen": -262.1795654296875, - "logps/rejected": -739.9126586914062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2800750732421875, - "rewards/margins": 12.575119018554688, - "rewards/rejected": -12.855194091796875, - "step": 1762 - }, - { - "epoch": 0.67, - "learning_rate": 5.277820027210279e-06, - "logits/chosen": -1.581613540649414, - "logits/rejected": -2.930393695831299, - "logps/chosen": -261.184814453125, - "logps/rejected": -579.8916015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.266076683998108, - "rewards/margins": 35.35628128051758, - "rewards/rejected": -36.62235641479492, - "step": 1763 - }, - { - "epoch": 0.67, - "learning_rate": 5.267313872657242e-06, - "logits/chosen": -8.158690452575684, - "logits/rejected": -1.6164847612380981, - "logps/chosen": -199.25076293945312, - "logps/rejected": -1711.351318359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7523712515830994, - "rewards/margins": 42.44905471801758, - "rewards/rejected": -41.69668197631836, - "step": 1764 - }, - { - "epoch": 0.67, - "learning_rate": 5.256814445487275e-06, - "logits/chosen": -8.25692367553711, - "logits/rejected": -1.1138501167297363, - "logps/chosen": -216.93215942382812, - "logps/rejected": -2001.529052734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2920516729354858, - "rewards/margins": 28.158143997192383, - "rewards/rejected": -29.4501953125, - "step": 1765 - }, - { - "epoch": 0.67, - "learning_rate": 5.246321760625025e-06, - "logits/chosen": -6.778804302215576, - "logits/rejected": -2.553199291229248, - "logps/chosen": -341.9083557128906, - "logps/rejected": -1468.1851806640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4629058837890625, - "rewards/margins": 45.95420837402344, - "rewards/rejected": -45.491302490234375, - "step": 1766 - }, - { - "epoch": 0.67, - "learning_rate": 5.235835832985552e-06, - "logits/chosen": -1.3970905542373657, - "logits/rejected": -1.362433910369873, - "logps/chosen": -593.8340454101562, - "logps/rejected": -1290.249267578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.674603283405304, - "rewards/margins": 50.56907272338867, - "rewards/rejected": -49.89447021484375, - "step": 1767 - }, - { - "epoch": 0.67, - "learning_rate": 5.225356677474309e-06, - "logits/chosen": -7.175508975982666, - "logits/rejected": -2.9235470294952393, - "logps/chosen": -631.67041015625, - "logps/rejected": -2803.40283203125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.13277588784694672, - "rewards/margins": 48.7631950378418, - "rewards/rejected": -48.63042068481445, - "step": 1768 - }, - { - "epoch": 0.67, - "learning_rate": 5.214884308987136e-06, - "logits/chosen": -5.417954921722412, - "logits/rejected": -1.1937623023986816, - "logps/chosen": -607.0240478515625, - "logps/rejected": -2568.634033203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.1331238746643066, - "rewards/margins": 93.94155883789062, - "rewards/rejected": -90.80843353271484, - "step": 1769 - }, - { - "epoch": 0.67, - "learning_rate": 5.204418742410212e-06, - "logits/chosen": -0.5039923191070557, - "logits/rejected": -2.082688808441162, - "logps/chosen": -301.56134033203125, - "logps/rejected": -240.05804443359375, - "loss": 0.0057, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.469311475753784, - "rewards/margins": 3.0598056316375732, - "rewards/rejected": -5.529117107391357, - "step": 1770 - }, - { - "epoch": 0.67, - "learning_rate": 5.193959992620048e-06, - "logits/chosen": -3.011136293411255, - "logits/rejected": -6.369124889373779, - "logps/chosen": -163.808837890625, - "logps/rejected": -220.65260314941406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3329315185546875, - "rewards/margins": 13.250961303710938, - "rewards/rejected": -13.583892822265625, - "step": 1771 - }, - { - "epoch": 0.67, - "learning_rate": 5.183508074483478e-06, - "logits/chosen": -4.457911491394043, - "logits/rejected": 0.7299575805664062, - "logps/chosen": -341.7496032714844, - "logps/rejected": -1179.10986328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.19287109375, - "rewards/margins": 26.179590225219727, - "rewards/rejected": -28.372461318969727, - "step": 1772 - }, - { - "epoch": 0.67, - "learning_rate": 5.1730630028576055e-06, - "logits/chosen": -1.5241948366165161, - "logits/rejected": -4.800647258758545, - "logps/chosen": -290.767578125, - "logps/rejected": -423.06689453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4488648176193237, - "rewards/margins": 24.00921630859375, - "rewards/rejected": -25.458080291748047, - "step": 1773 - }, - { - "epoch": 0.67, - "learning_rate": 5.1626247925898175e-06, - "logits/chosen": -5.857822895050049, - "logits/rejected": -0.14528757333755493, - "logps/chosen": -299.6321105957031, - "logps/rejected": -1952.9775390625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8117249011993408, - "rewards/margins": 47.50923538208008, - "rewards/rejected": -49.320960998535156, - "step": 1774 - }, - { - "epoch": 0.67, - "learning_rate": 5.1521934585177465e-06, - "logits/chosen": -3.159721612930298, - "logits/rejected": 0.4843699038028717, - "logps/chosen": -394.40533447265625, - "logps/rejected": -1629.316162109375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.868670642375946, - "rewards/margins": 47.09440612792969, - "rewards/rejected": -46.22573471069336, - "step": 1775 - }, - { - "epoch": 0.67, - "learning_rate": 5.14176901546924e-06, - "logits/chosen": -6.017335891723633, - "logits/rejected": -4.121520519256592, - "logps/chosen": -667.0463256835938, - "logps/rejected": -2593.87939453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6738342642784119, - "rewards/margins": 23.12885284423828, - "rewards/rejected": -23.80268669128418, - "step": 1776 - }, - { - "epoch": 0.67, - "learning_rate": 5.131351478262364e-06, - "logits/chosen": -5.618100643157959, - "logits/rejected": -1.5163650512695312, - "logps/chosen": -344.2745361328125, - "logps/rejected": -2044.94287109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3595550060272217, - "rewards/margins": 49.19499969482422, - "rewards/rejected": -51.5545539855957, - "step": 1777 - }, - { - "epoch": 0.67, - "learning_rate": 5.120940861705357e-06, - "logits/chosen": -5.763235569000244, - "logits/rejected": -2.9696197509765625, - "logps/chosen": -755.506591796875, - "logps/rejected": -2536.38671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7584717273712158, - "rewards/margins": 34.83291244506836, - "rewards/rejected": -33.074440002441406, - "step": 1778 - }, - { - "epoch": 0.67, - "learning_rate": 5.11053718059662e-06, - "logits/chosen": -1.5624281167984009, - "logits/rejected": -3.4344725608825684, - "logps/chosen": -470.7655029296875, - "logps/rejected": -381.8555603027344, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.8416993618011475, - "rewards/margins": 6.5770721435546875, - "rewards/rejected": -10.418771743774414, - "step": 1779 - }, - { - "epoch": 0.67, - "learning_rate": 5.1001404497247055e-06, - "logits/chosen": -2.5064516067504883, - "logits/rejected": -2.62270188331604, - "logps/chosen": -217.62765502929688, - "logps/rejected": -208.51629638671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8327850699424744, - "rewards/margins": 16.15140724182129, - "rewards/rejected": -16.98419189453125, - "step": 1780 - }, - { - "epoch": 0.67, - "learning_rate": 5.089750683868279e-06, - "logits/chosen": -5.153730392456055, - "logits/rejected": -7.351088523864746, - "logps/chosen": -173.19888305664062, - "logps/rejected": -169.6979522705078, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.2135300636291504, - "rewards/margins": 7.790515422821045, - "rewards/rejected": -11.004045486450195, - "step": 1781 - }, - { - "epoch": 0.67, - "learning_rate": 5.079367897796102e-06, - "logits/chosen": -1.2591372728347778, - "logits/rejected": -1.2363108396530151, - "logps/chosen": -290.2259521484375, - "logps/rejected": -696.9443359375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.642742931842804, - "rewards/margins": 29.970691680908203, - "rewards/rejected": -30.613433837890625, - "step": 1782 - }, - { - "epoch": 0.67, - "learning_rate": 5.068992106267021e-06, - "logits/chosen": -6.175971508026123, - "logits/rejected": -3.329375743865967, - "logps/chosen": -193.19273376464844, - "logps/rejected": -785.8245849609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.109736680984497, - "rewards/margins": 21.273283004760742, - "rewards/rejected": -24.383020401000977, - "step": 1783 - }, - { - "epoch": 0.67, - "learning_rate": 5.058623324029944e-06, - "logits/chosen": -2.732069253921509, - "logits/rejected": -3.5669267177581787, - "logps/chosen": -222.63998413085938, - "logps/rejected": -488.8447570800781, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.299432396888733, - "rewards/margins": 25.522109985351562, - "rewards/rejected": -26.821542739868164, - "step": 1784 - }, - { - "epoch": 0.67, - "learning_rate": 5.048261565823802e-06, - "logits/chosen": -0.6106477379798889, - "logits/rejected": -4.748279571533203, - "logps/chosen": -542.4224853515625, - "logps/rejected": -466.7742919921875, - "loss": 0.0028, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.1070556640625, - "rewards/margins": 32.33407974243164, - "rewards/rejected": -29.22702407836914, - "step": 1785 - }, - { - "epoch": 0.68, - "learning_rate": 5.037906846377556e-06, - "logits/chosen": -1.911941409111023, - "logits/rejected": -2.6948354244232178, - "logps/chosen": -194.29721069335938, - "logps/rejected": -340.1499328613281, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4902282655239105, - "rewards/margins": 15.83322525024414, - "rewards/rejected": -16.323453903198242, - "step": 1786 - }, - { - "epoch": 0.68, - "learning_rate": 5.027559180410151e-06, - "logits/chosen": -2.603952646255493, - "logits/rejected": -1.4494132995605469, - "logps/chosen": -886.608642578125, - "logps/rejected": -1311.8558349609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.48529052734375, - "rewards/margins": 13.295825958251953, - "rewards/rejected": -16.781116485595703, - "step": 1787 - }, - { - "epoch": 0.68, - "learning_rate": 5.017218582630507e-06, - "logits/chosen": -0.40608084201812744, - "logits/rejected": 0.08247775584459305, - "logps/chosen": -551.1524658203125, - "logps/rejected": -1166.417236328125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3214355409145355, - "rewards/margins": 38.45084762573242, - "rewards/rejected": -38.12941360473633, - "step": 1788 - }, - { - "epoch": 0.68, - "learning_rate": 5.0068850677375036e-06, - "logits/chosen": -2.350843667984009, - "logits/rejected": -4.05448055267334, - "logps/chosen": -592.4766845703125, - "logps/rejected": -933.32568359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.328991651535034, - "rewards/margins": 43.706077575683594, - "rewards/rejected": -40.3770866394043, - "step": 1789 - }, - { - "epoch": 0.68, - "learning_rate": 4.996558650419942e-06, - "logits/chosen": -1.4547593593597412, - "logits/rejected": -1.292366623878479, - "logps/chosen": -343.5788879394531, - "logps/rejected": -596.1007080078125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.660229504108429, - "rewards/margins": 16.110645294189453, - "rewards/rejected": -15.45041561126709, - "step": 1790 - }, - { - "epoch": 0.68, - "learning_rate": 4.98623934535655e-06, - "logits/chosen": -0.02113799937069416, - "logits/rejected": -1.1022510528564453, - "logps/chosen": -221.67713928222656, - "logps/rejected": -429.146240234375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9314926862716675, - "rewards/margins": 19.634986877441406, - "rewards/rejected": -21.566478729248047, - "step": 1791 - }, - { - "epoch": 0.68, - "learning_rate": 4.975927167215924e-06, - "logits/chosen": -1.0292164087295532, - "logits/rejected": -5.711843967437744, - "logps/chosen": -634.62451171875, - "logps/rejected": -139.20840454101562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.174816846847534, - "rewards/margins": 11.181873321533203, - "rewards/rejected": -9.00705623626709, - "step": 1792 - }, - { - "epoch": 0.68, - "learning_rate": 4.965622130656551e-06, - "logits/chosen": -7.814990043640137, - "logits/rejected": -1.425409197807312, - "logps/chosen": -169.67398071289062, - "logps/rejected": -5251.4912109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5339081287384033, - "rewards/margins": 27.795289993286133, - "rewards/rejected": -29.329198837280273, - "step": 1793 - }, - { - "epoch": 0.68, - "learning_rate": 4.955324250326749e-06, - "logits/chosen": -7.014678478240967, - "logits/rejected": -1.5729999542236328, - "logps/chosen": -700.013671875, - "logps/rejected": -2868.22119140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22561036050319672, - "rewards/margins": 50.98064041137695, - "rewards/rejected": -50.75503158569336, - "step": 1794 - }, - { - "epoch": 0.68, - "learning_rate": 4.94503354086468e-06, - "logits/chosen": -4.145731449127197, - "logits/rejected": -0.7114165425300598, - "logps/chosen": -322.12286376953125, - "logps/rejected": -913.5203857421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.799896240234375, - "rewards/margins": 20.11048698425293, - "rewards/rejected": -21.910383224487305, - "step": 1795 - }, - { - "epoch": 0.68, - "learning_rate": 4.934750016898295e-06, - "logits/chosen": -4.645314693450928, - "logits/rejected": -2.3184845447540283, - "logps/chosen": -384.08544921875, - "logps/rejected": -823.95654296875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.937371850013733, - "rewards/margins": 18.221160888671875, - "rewards/rejected": -20.158533096313477, - "step": 1796 - }, - { - "epoch": 0.68, - "learning_rate": 4.924473693045349e-06, - "logits/chosen": -1.201181411743164, - "logits/rejected": -4.701900005340576, - "logps/chosen": -977.2125244140625, - "logps/rejected": -139.7589111328125, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.39830324053764343, - "rewards/margins": 8.91496753692627, - "rewards/rejected": -8.516664505004883, - "step": 1797 - }, - { - "epoch": 0.68, - "learning_rate": 4.914204583913349e-06, - "logits/chosen": -3.151472330093384, - "logits/rejected": -1.4581900835037231, - "logps/chosen": -334.74267578125, - "logps/rejected": -981.214111328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.731213331222534, - "rewards/margins": 39.30555725097656, - "rewards/rejected": -43.03676986694336, - "step": 1798 - }, - { - "epoch": 0.68, - "learning_rate": 4.903942704099548e-06, - "logits/chosen": -5.663306713104248, - "logits/rejected": -4.064094066619873, - "logps/chosen": -235.14852905273438, - "logps/rejected": -1165.266357421875, - "loss": 0.0034, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.646429538726807, - "rewards/margins": 42.30064010620117, - "rewards/rejected": -46.94707107543945, - "step": 1799 - }, - { - "epoch": 0.68, - "learning_rate": 4.893688068190933e-06, - "logits/chosen": -7.608615875244141, - "logits/rejected": -0.7558350563049316, - "logps/chosen": -342.8921813964844, - "logps/rejected": -1877.5799560546875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.5304932594299316, - "rewards/margins": 17.81056022644043, - "rewards/rejected": -21.341053009033203, - "step": 1800 - }, - { - "epoch": 0.68, - "learning_rate": 4.8834406907641784e-06, - "logits/chosen": -0.931830644607544, - "logits/rejected": -2.3578991889953613, - "logps/chosen": -217.0418243408203, - "logps/rejected": -484.2434997558594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.458282470703125, - "rewards/margins": 27.322126388549805, - "rewards/rejected": -27.78040885925293, - "step": 1801 - }, - { - "epoch": 0.68, - "learning_rate": 4.8732005863856545e-06, - "logits/chosen": -1.5923174619674683, - "logits/rejected": -1.623587727546692, - "logps/chosen": -167.0509490966797, - "logps/rejected": -402.196533203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3145248591899872, - "rewards/margins": 17.73283576965332, - "rewards/rejected": -17.418310165405273, - "step": 1802 - }, - { - "epoch": 0.68, - "learning_rate": 4.862967769611389e-06, - "logits/chosen": -2.6081738471984863, - "logits/rejected": -1.0419076681137085, - "logps/chosen": -864.0963134765625, - "logps/rejected": -1961.409423828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.449304103851318, - "rewards/margins": 66.88028717041016, - "rewards/rejected": -71.32958984375, - "step": 1803 - }, - { - "epoch": 0.68, - "learning_rate": 4.8527422549870495e-06, - "logits/chosen": -5.4526896476745605, - "logits/rejected": -0.178870290517807, - "logps/chosen": -219.48004150390625, - "logps/rejected": -1374.4578857421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5836944580078125, - "rewards/margins": 56.0459098815918, - "rewards/rejected": -57.62960433959961, - "step": 1804 - }, - { - "epoch": 0.68, - "learning_rate": 4.842524057047916e-06, - "logits/chosen": -3.933063507080078, - "logits/rejected": -2.308522939682007, - "logps/chosen": -254.37741088867188, - "logps/rejected": -431.0345458984375, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.246141195297241, - "rewards/margins": 5.005890846252441, - "rewards/rejected": -8.252032279968262, - "step": 1805 - }, - { - "epoch": 0.68, - "learning_rate": 4.832313190318886e-06, - "logits/chosen": -1.4868614673614502, - "logits/rejected": -3.2197275161743164, - "logps/chosen": -249.20806884765625, - "logps/rejected": -762.78955078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2022080421447754, - "rewards/margins": 52.34237289428711, - "rewards/rejected": -54.54458236694336, - "step": 1806 - }, - { - "epoch": 0.68, - "learning_rate": 4.822109669314419e-06, - "logits/chosen": -0.3995899558067322, - "logits/rejected": -0.6667672991752625, - "logps/chosen": -497.34716796875, - "logps/rejected": -689.0765380859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.565106213092804, - "rewards/margins": 8.464605331420898, - "rewards/rejected": -7.899499416351318, - "step": 1807 - }, - { - "epoch": 0.68, - "learning_rate": 4.8119135085385375e-06, - "logits/chosen": -0.7545146942138672, - "logits/rejected": -4.779458522796631, - "logps/chosen": -348.8209533691406, - "logps/rejected": -146.5231475830078, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.07669983059167862, - "rewards/margins": 9.893877029418945, - "rewards/rejected": -9.817176818847656, - "step": 1808 - }, - { - "epoch": 0.68, - "learning_rate": 4.801724722484809e-06, - "logits/chosen": -0.030139315873384476, - "logits/rejected": -4.116797924041748, - "logps/chosen": -594.51123046875, - "logps/rejected": -372.91162109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12153320759534836, - "rewards/margins": 15.016738891601562, - "rewards/rejected": -14.8952054977417, - "step": 1809 - }, - { - "epoch": 0.68, - "learning_rate": 4.791543325636307e-06, - "logits/chosen": -1.6125855445861816, - "logits/rejected": -5.161073207855225, - "logps/chosen": -379.3472900390625, - "logps/rejected": -132.8565216064453, - "loss": 0.0872, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.080584764480591, - "rewards/margins": 5.503020286560059, - "rewards/rejected": -7.5836052894592285, - "step": 1810 - }, - { - "epoch": 0.68, - "learning_rate": 4.78136933246561e-06, - "logits/chosen": -6.653046131134033, - "logits/rejected": -0.6998561024665833, - "logps/chosen": -371.811279296875, - "logps/rejected": -2071.30908203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.662628173828125, - "rewards/margins": 44.60377883911133, - "rewards/rejected": -41.9411506652832, - "step": 1811 - }, - { - "epoch": 0.69, - "learning_rate": 4.771202757434774e-06, - "logits/chosen": -7.486441135406494, - "logits/rejected": -0.7901666164398193, - "logps/chosen": -350.91259765625, - "logps/rejected": -2813.673828125, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.2662599086761475, - "rewards/margins": 90.9422607421875, - "rewards/rejected": -94.2085189819336, - "step": 1812 - }, - { - "epoch": 0.69, - "learning_rate": 4.7610436149953e-06, - "logits/chosen": -2.144787073135376, - "logits/rejected": -4.187251567840576, - "logps/chosen": -423.0302734375, - "logps/rejected": -353.0213928222656, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2718689441680908, - "rewards/margins": 21.059114456176758, - "rewards/rejected": -22.330984115600586, - "step": 1813 - }, - { - "epoch": 0.69, - "learning_rate": 4.750891919588127e-06, - "logits/chosen": -5.047338962554932, - "logits/rejected": -1.1083347797393799, - "logps/chosen": -477.2331237792969, - "logps/rejected": -2319.9501953125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.08137817680835724, - "rewards/margins": 95.88349914550781, - "rewards/rejected": -95.8021240234375, - "step": 1814 - }, - { - "epoch": 0.69, - "learning_rate": 4.7407476856436166e-06, - "logits/chosen": -6.378210544586182, - "logits/rejected": -0.9545819163322449, - "logps/chosen": -716.4472045898438, - "logps/rejected": -2907.512939453125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.049670457839966, - "rewards/margins": 38.3884391784668, - "rewards/rejected": -40.4381103515625, - "step": 1815 - }, - { - "epoch": 0.69, - "learning_rate": 4.730610927581511e-06, - "logits/chosen": 0.18303775787353516, - "logits/rejected": -2.7111763954162598, - "logps/chosen": -477.83099365234375, - "logps/rejected": -436.39892578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.805816650390625, - "rewards/margins": 27.77083396911621, - "rewards/rejected": -26.965017318725586, - "step": 1816 - }, - { - "epoch": 0.69, - "learning_rate": 4.720481659810941e-06, - "logits/chosen": -2.455914258956909, - "logits/rejected": -4.596693515777588, - "logps/chosen": -350.6785888671875, - "logps/rejected": -901.6915283203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.94073486328125, - "rewards/margins": 45.83295440673828, - "rewards/rejected": -43.89221954345703, - "step": 1817 - }, - { - "epoch": 0.69, - "learning_rate": 4.710359896730379e-06, - "logits/chosen": -6.922851085662842, - "logits/rejected": -0.20578278601169586, - "logps/chosen": -431.5528564453125, - "logps/rejected": -3703.52099609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4829773008823395, - "rewards/margins": 39.573516845703125, - "rewards/rejected": -40.056495666503906, - "step": 1818 - }, - { - "epoch": 0.69, - "learning_rate": 4.700245652727623e-06, - "logits/chosen": -6.200436115264893, - "logits/rejected": -1.7215280532836914, - "logps/chosen": -265.3381652832031, - "logps/rejected": -1246.524658203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.074951171875, - "rewards/margins": 31.91999626159668, - "rewards/rejected": -31.99494743347168, - "step": 1819 - }, - { - "epoch": 0.69, - "learning_rate": 4.690138942179809e-06, - "logits/chosen": 0.09085731208324432, - "logits/rejected": -5.661259174346924, - "logps/chosen": -396.2265930175781, - "logps/rejected": -208.27667236328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.32122498750686646, - "rewards/margins": 14.348169326782227, - "rewards/rejected": -14.026944160461426, - "step": 1820 - }, - { - "epoch": 0.69, - "learning_rate": 4.68003977945334e-06, - "logits/chosen": -1.723062515258789, - "logits/rejected": -1.6924176216125488, - "logps/chosen": -206.77432250976562, - "logps/rejected": -509.8285217285156, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4835876524448395, - "rewards/margins": 24.184326171875, - "rewards/rejected": -24.66791343688965, - "step": 1821 - }, - { - "epoch": 0.69, - "learning_rate": 4.669948178903895e-06, - "logits/chosen": -0.47459518909454346, - "logits/rejected": -3.2601730823516846, - "logps/chosen": -309.10650634765625, - "logps/rejected": -222.70689392089844, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5741180181503296, - "rewards/margins": 16.329198837280273, - "rewards/rejected": -14.755080223083496, - "step": 1822 - }, - { - "epoch": 0.69, - "learning_rate": 4.659864154876411e-06, - "logits/chosen": -2.174567699432373, - "logits/rejected": -1.300948143005371, - "logps/chosen": -217.34767150878906, - "logps/rejected": -890.8056640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7035660147666931, - "rewards/margins": 44.4157829284668, - "rewards/rejected": -45.11935043334961, - "step": 1823 - }, - { - "epoch": 0.69, - "learning_rate": 4.6497877217050505e-06, - "logits/chosen": -6.294309139251709, - "logits/rejected": -0.04515340179204941, - "logps/chosen": -242.4598388671875, - "logps/rejected": -1560.0162353515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.28930971026420593, - "rewards/margins": 25.36803436279297, - "rewards/rejected": -25.078723907470703, - "step": 1824 - }, - { - "epoch": 0.69, - "learning_rate": 4.6397188937131785e-06, - "logits/chosen": -0.7639487385749817, - "logits/rejected": -2.424086332321167, - "logps/chosen": -179.3721923828125, - "logps/rejected": -565.85693359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.091935873031616, - "rewards/margins": 29.60981559753418, - "rewards/rejected": -32.701751708984375, - "step": 1825 - }, - { - "epoch": 0.69, - "learning_rate": 4.629657685213368e-06, - "logits/chosen": -1.372035026550293, - "logits/rejected": -5.939392566680908, - "logps/chosen": -686.3410034179688, - "logps/rejected": -231.19740295410156, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3249024152755737, - "rewards/margins": 15.298023223876953, - "rewards/rejected": -13.97312068939209, - "step": 1826 - }, - { - "epoch": 0.69, - "learning_rate": 4.6196041105073444e-06, - "logits/chosen": 0.09656502306461334, - "logits/rejected": 0.4495040476322174, - "logps/chosen": -479.449462890625, - "logps/rejected": -892.9567260742188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0176299810409546, - "rewards/margins": 34.323123931884766, - "rewards/rejected": -35.340755462646484, - "step": 1827 - }, - { - "epoch": 0.69, - "learning_rate": 4.609558183885979e-06, - "logits/chosen": -4.877511024475098, - "logits/rejected": -0.17634479701519012, - "logps/chosen": -458.6374816894531, - "logps/rejected": -1519.1837158203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.7052552700042725, - "rewards/margins": 26.3991641998291, - "rewards/rejected": -23.69390869140625, - "step": 1828 - }, - { - "epoch": 0.69, - "learning_rate": 4.599519919629297e-06, - "logits/chosen": -1.4868851900100708, - "logits/rejected": -2.5410404205322266, - "logps/chosen": -353.3028869628906, - "logps/rejected": -651.607421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4600830078125, - "rewards/margins": 22.001943588256836, - "rewards/rejected": -21.541860580444336, - "step": 1829 - }, - { - "epoch": 0.69, - "learning_rate": 4.589489332006406e-06, - "logits/chosen": -0.5410677790641785, - "logits/rejected": -5.370954990386963, - "logps/chosen": -223.5181427001953, - "logps/rejected": -246.821044921875, - "loss": 0.0037, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0298173427581787, - "rewards/margins": 13.571706771850586, - "rewards/rejected": -16.601524353027344, - "step": 1830 - }, - { - "epoch": 0.69, - "learning_rate": 4.579466435275506e-06, - "logits/chosen": -0.1575881838798523, - "logits/rejected": -1.8037645816802979, - "logps/chosen": -479.3609619140625, - "logps/rejected": -537.4598388671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1161987781524658, - "rewards/margins": 23.1649112701416, - "rewards/rejected": -24.281110763549805, - "step": 1831 - }, - { - "epoch": 0.69, - "learning_rate": 4.569451243683878e-06, - "logits/chosen": -2.3294379711151123, - "logits/rejected": -2.691850185394287, - "logps/chosen": -204.509033203125, - "logps/rejected": -861.3653564453125, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.6184234619140625, - "rewards/margins": 38.43054962158203, - "rewards/rejected": -35.81212615966797, - "step": 1832 - }, - { - "epoch": 0.69, - "learning_rate": 4.559443771467833e-06, - "logits/chosen": -4.312575340270996, - "logits/rejected": -0.5730292797088623, - "logps/chosen": -508.9474182128906, - "logps/rejected": -1320.82177734375, - "loss": 0.0048, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.0417847633361816, - "rewards/margins": 18.799304962158203, - "rewards/rejected": -20.841089248657227, - "step": 1833 - }, - { - "epoch": 0.69, - "learning_rate": 4.549444032852724e-06, - "logits/chosen": -3.645270586013794, - "logits/rejected": -1.5512986183166504, - "logps/chosen": -745.1015625, - "logps/rejected": -1736.119873046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.773510694503784, - "rewards/margins": 37.532569885253906, - "rewards/rejected": -41.30607986450195, - "step": 1834 - }, - { - "epoch": 0.69, - "learning_rate": 4.539452042052901e-06, - "logits/chosen": -1.759537696838379, - "logits/rejected": -6.908955097198486, - "logps/chosen": -251.4094696044922, - "logps/rejected": -166.06549072265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5808181762695312, - "rewards/margins": 13.359933853149414, - "rewards/rejected": -11.779115676879883, - "step": 1835 - }, - { - "epoch": 0.69, - "learning_rate": 4.5294678132717e-06, - "logits/chosen": -0.2699359059333801, - "logits/rejected": -2.6944854259490967, - "logps/chosen": -296.5418395996094, - "logps/rejected": -690.6099853515625, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4496033191680908, - "rewards/margins": 42.03485107421875, - "rewards/rejected": -40.58524703979492, - "step": 1836 - }, - { - "epoch": 0.69, - "learning_rate": 4.519491360701435e-06, - "logits/chosen": -0.917007565498352, - "logits/rejected": -2.1895580291748047, - "logps/chosen": -244.0882568359375, - "logps/rejected": -706.8386840820312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1347793340682983, - "rewards/margins": 34.33639144897461, - "rewards/rejected": -33.20161056518555, - "step": 1837 - }, - { - "epoch": 0.69, - "learning_rate": 4.509522698523352e-06, - "logits/chosen": -2.6065919399261475, - "logits/rejected": -4.5618367195129395, - "logps/chosen": -193.2567138671875, - "logps/rejected": -334.5615234375, - "loss": 0.0869, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.277796983718872, - "rewards/margins": 20.205530166625977, - "rewards/rejected": -18.927732467651367, - "step": 1838 - }, - { - "epoch": 0.7, - "learning_rate": 4.499561840907633e-06, - "logits/chosen": -0.1391042172908783, - "logits/rejected": -2.9787442684173584, - "logps/chosen": -316.7406005859375, - "logps/rejected": -389.9903869628906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9085724353790283, - "rewards/margins": 17.760719299316406, - "rewards/rejected": -19.669292449951172, - "step": 1839 - }, - { - "epoch": 0.7, - "learning_rate": 4.489608802013367e-06, - "logits/chosen": -3.9857966899871826, - "logits/rejected": -0.8592444062232971, - "logps/chosen": -509.6048583984375, - "logps/rejected": -1231.0438232421875, - "loss": 0.0035, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0052703619003296, - "rewards/margins": 18.76700782775879, - "rewards/rejected": -19.77227783203125, - "step": 1840 - }, - { - "epoch": 0.7, - "learning_rate": 4.4796635959885195e-06, - "logits/chosen": -2.2389025688171387, - "logits/rejected": -4.131963729858398, - "logps/chosen": -634.2930908203125, - "logps/rejected": -998.6859130859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.874847412109375, - "rewards/margins": 27.515974044799805, - "rewards/rejected": -26.64112663269043, - "step": 1841 - }, - { - "epoch": 0.7, - "learning_rate": 4.469726236969926e-06, - "logits/chosen": -0.5657685995101929, - "logits/rejected": -2.441761016845703, - "logps/chosen": -676.22021484375, - "logps/rejected": -1347.3074951171875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6399780511856079, - "rewards/margins": 59.38487243652344, - "rewards/rejected": -60.02484893798828, - "step": 1842 - }, - { - "epoch": 0.7, - "learning_rate": 4.4597967390832745e-06, - "logits/chosen": -2.2354347705841064, - "logits/rejected": -2.6366941928863525, - "logps/chosen": -194.59054565429688, - "logps/rejected": -393.9371032714844, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6990157961845398, - "rewards/margins": 15.908583641052246, - "rewards/rejected": -16.60759925842285, - "step": 1843 - }, - { - "epoch": 0.7, - "learning_rate": 4.449875116443069e-06, - "logits/chosen": -2.7614638805389404, - "logits/rejected": -2.106297016143799, - "logps/chosen": -343.283447265625, - "logps/rejected": -487.02099609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2152527570724487, - "rewards/margins": 7.522509574890137, - "rewards/rejected": -8.737762451171875, - "step": 1844 - }, - { - "epoch": 0.7, - "learning_rate": 4.43996138315262e-06, - "logits/chosen": -4.010688781738281, - "logits/rejected": -0.27307578921318054, - "logps/chosen": -392.30157470703125, - "logps/rejected": -1310.544189453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0598328113555908, - "rewards/margins": 27.70342445373535, - "rewards/rejected": -28.76325798034668, - "step": 1845 - }, - { - "epoch": 0.7, - "learning_rate": 4.430055553304034e-06, - "logits/chosen": -1.135157585144043, - "logits/rejected": -2.623311996459961, - "logps/chosen": -229.2406005859375, - "logps/rejected": -479.0504455566406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2445924282073975, - "rewards/margins": 24.405841827392578, - "rewards/rejected": -26.650434494018555, - "step": 1846 - }, - { - "epoch": 0.7, - "learning_rate": 4.420157640978169e-06, - "logits/chosen": -5.455322265625, - "logits/rejected": -2.838897705078125, - "logps/chosen": -317.462646484375, - "logps/rejected": -1205.0194091796875, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3835296630859375, - "rewards/margins": 27.059804916381836, - "rewards/rejected": -25.6762752532959, - "step": 1847 - }, - { - "epoch": 0.7, - "learning_rate": 4.4102676602446375e-06, - "logits/chosen": -1.8761216402053833, - "logits/rejected": -1.6292694807052612, - "logps/chosen": -247.2478790283203, - "logps/rejected": -956.0831298828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6116135120391846, - "rewards/margins": 49.50640869140625, - "rewards/rejected": -47.89479446411133, - "step": 1848 - }, - { - "epoch": 0.7, - "learning_rate": 4.4003856251617775e-06, - "logits/chosen": -0.7406859993934631, - "logits/rejected": -5.36078405380249, - "logps/chosen": -537.977783203125, - "logps/rejected": -381.5615234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.897528052330017, - "rewards/margins": 19.804330825805664, - "rewards/rejected": -17.906803131103516, - "step": 1849 - }, - { - "epoch": 0.7, - "learning_rate": 4.390511549776628e-06, - "logits/chosen": -4.697878837585449, - "logits/rejected": -1.7119861841201782, - "logps/chosen": -387.4603271484375, - "logps/rejected": -1365.539306640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5312135219573975, - "rewards/margins": 47.92927551269531, - "rewards/rejected": -50.460487365722656, - "step": 1850 - }, - { - "epoch": 0.7, - "learning_rate": 4.380645448124911e-06, - "logits/chosen": -3.7739675045013428, - "logits/rejected": -6.241698265075684, - "logps/chosen": -243.83607482910156, - "logps/rejected": -273.8043212890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.18426513671875, - "rewards/margins": 13.675707817077637, - "rewards/rejected": -13.859972953796387, - "step": 1851 - }, - { - "epoch": 0.7, - "learning_rate": 4.370787334231026e-06, - "logits/chosen": -6.63783073425293, - "logits/rejected": -1.8743464946746826, - "logps/chosen": -600.621337890625, - "logps/rejected": -1983.09619140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.172875881195068, - "rewards/margins": 36.228248596191406, - "rewards/rejected": -40.401123046875, - "step": 1852 - }, - { - "epoch": 0.7, - "learning_rate": 4.360937222108002e-06, - "logits/chosen": -0.4482370913028717, - "logits/rejected": -5.760765552520752, - "logps/chosen": -363.556884765625, - "logps/rejected": -205.666015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.354931592941284, - "rewards/margins": 13.66643238067627, - "rewards/rejected": -16.021364212036133, - "step": 1853 - }, - { - "epoch": 0.7, - "learning_rate": 4.351095125757513e-06, - "logits/chosen": -4.421876430511475, - "logits/rejected": -1.3338640928268433, - "logps/chosen": -490.8305358886719, - "logps/rejected": -1450.992431640625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7070526480674744, - "rewards/margins": 21.52317237854004, - "rewards/rejected": -22.230224609375, - "step": 1854 - }, - { - "epoch": 0.7, - "learning_rate": 4.341261059169821e-06, - "logits/chosen": -3.646530866622925, - "logits/rejected": -2.7293355464935303, - "logps/chosen": -210.474853515625, - "logps/rejected": -662.4068603515625, - "loss": 0.0113, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.19145508110523224, - "rewards/margins": 23.029924392700195, - "rewards/rejected": -22.838468551635742, - "step": 1855 - }, - { - "epoch": 0.7, - "learning_rate": 4.331435036323778e-06, - "logits/chosen": -3.287266254425049, - "logits/rejected": -3.0058488845825195, - "logps/chosen": -496.1415100097656, - "logps/rejected": -489.11395263671875, - "loss": 0.0019, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.23254089057445526, - "rewards/margins": 4.495538234710693, - "rewards/rejected": -4.728079319000244, - "step": 1856 - }, - { - "epoch": 0.7, - "learning_rate": 4.32161707118681e-06, - "logits/chosen": -7.299655437469482, - "logits/rejected": -2.5375006198883057, - "logps/chosen": -241.7687225341797, - "logps/rejected": -1579.57861328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.9335403442382812, - "rewards/margins": 45.703399658203125, - "rewards/rejected": -48.636940002441406, - "step": 1857 - }, - { - "epoch": 0.7, - "learning_rate": 4.3118071777148865e-06, - "logits/chosen": -7.546478748321533, - "logits/rejected": -3.371589183807373, - "logps/chosen": -252.00045776367188, - "logps/rejected": -1670.691650390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10276641696691513, - "rewards/margins": 39.02175521850586, - "rewards/rejected": -39.1245231628418, - "step": 1858 - }, - { - "epoch": 0.7, - "learning_rate": 4.3020053698524946e-06, - "logits/chosen": 0.29638171195983887, - "logits/rejected": -2.3139374256134033, - "logps/chosen": -781.8448486328125, - "logps/rejected": -578.4247436523438, - "loss": 0.0039, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.308685302734375, - "rewards/margins": 3.4642672538757324, - "rewards/rejected": -7.772952556610107, - "step": 1859 - }, - { - "epoch": 0.7, - "learning_rate": 4.292211661532641e-06, - "logits/chosen": -5.027463912963867, - "logits/rejected": -1.7670899629592896, - "logps/chosen": -268.2157897949219, - "logps/rejected": -859.8193359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2540374994277954, - "rewards/margins": 18.544187545776367, - "rewards/rejected": -17.290149688720703, - "step": 1860 - }, - { - "epoch": 0.7, - "learning_rate": 4.282426066676808e-06, - "logits/chosen": -3.8437891006469727, - "logits/rejected": -2.557750701904297, - "logps/chosen": -488.9664306640625, - "logps/rejected": -1340.8546142578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8993347883224487, - "rewards/margins": 45.49981689453125, - "rewards/rejected": -43.60048294067383, - "step": 1861 - }, - { - "epoch": 0.7, - "learning_rate": 4.272648599194948e-06, - "logits/chosen": -1.6481658220291138, - "logits/rejected": -5.258236885070801, - "logps/chosen": -251.5970001220703, - "logps/rejected": -188.54232788085938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.262797832489014, - "rewards/margins": 10.17623519897461, - "rewards/rejected": -14.439032554626465, - "step": 1862 - }, - { - "epoch": 0.7, - "learning_rate": 4.262879272985468e-06, - "logits/chosen": -0.9972822666168213, - "logits/rejected": -2.033628463745117, - "logps/chosen": -566.794189453125, - "logps/rejected": -1136.8896484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.31099244952201843, - "rewards/margins": 65.66364288330078, - "rewards/rejected": -65.9746322631836, - "step": 1863 - }, - { - "epoch": 0.7, - "learning_rate": 4.253118101935193e-06, - "logits/chosen": -5.277408599853516, - "logits/rejected": -2.734797716140747, - "logps/chosen": -502.0417175292969, - "logps/rejected": -1656.882568359375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.861126661300659, - "rewards/margins": 50.33284378051758, - "rewards/rejected": -53.1939697265625, - "step": 1864 - }, - { - "epoch": 0.71, - "learning_rate": 4.243365099919352e-06, - "logits/chosen": -5.914225101470947, - "logits/rejected": -2.323538303375244, - "logps/chosen": -275.4263000488281, - "logps/rejected": -1771.227783203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1053284406661987, - "rewards/margins": 91.08309936523438, - "rewards/rejected": -92.18843078613281, - "step": 1865 - }, - { - "epoch": 0.71, - "learning_rate": 4.233620280801582e-06, - "logits/chosen": -4.9086713790893555, - "logits/rejected": -2.0726544857025146, - "logps/chosen": -794.5337524414062, - "logps/rejected": -3027.62255859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.245501756668091, - "rewards/margins": 57.12016296386719, - "rewards/rejected": -54.87466049194336, - "step": 1866 - }, - { - "epoch": 0.71, - "learning_rate": 4.223883658433869e-06, - "logits/chosen": -1.3316305875778198, - "logits/rejected": -2.0056636333465576, - "logps/chosen": -353.96783447265625, - "logps/rejected": -740.7555541992188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9209747314453125, - "rewards/margins": 27.793399810791016, - "rewards/rejected": -26.872425079345703, - "step": 1867 - }, - { - "epoch": 0.71, - "learning_rate": 4.21415524665655e-06, - "logits/chosen": -1.2712243795394897, - "logits/rejected": -3.4386417865753174, - "logps/chosen": -227.94451904296875, - "logps/rejected": -318.0835876464844, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.079197645187378, - "rewards/margins": 13.032601356506348, - "rewards/rejected": -16.111799240112305, - "step": 1868 - }, - { - "epoch": 0.71, - "learning_rate": 4.204435059298303e-06, - "logits/chosen": -2.469428777694702, - "logits/rejected": -2.141343355178833, - "logps/chosen": -683.1802368164062, - "logps/rejected": -887.4627685546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -8.718719482421875, - "rewards/margins": 11.418964385986328, - "rewards/rejected": -20.137683868408203, - "step": 1869 - }, - { - "epoch": 0.71, - "learning_rate": 4.1947231101761e-06, - "logits/chosen": -4.585773468017578, - "logits/rejected": -0.9028636813163757, - "logps/chosen": -214.91795349121094, - "logps/rejected": -893.468994140625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5825424194335938, - "rewards/margins": 25.08901596069336, - "rewards/rejected": -25.671558380126953, - "step": 1870 - }, - { - "epoch": 0.71, - "learning_rate": 4.185019413095208e-06, - "logits/chosen": -3.857048511505127, - "logits/rejected": -0.44613751769065857, - "logps/chosen": -388.6595458984375, - "logps/rejected": -1076.375244140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.758188009262085, - "rewards/margins": 31.658309936523438, - "rewards/rejected": -27.900121688842773, - "step": 1871 - }, - { - "epoch": 0.71, - "learning_rate": 4.175323981849173e-06, - "logits/chosen": -5.432976245880127, - "logits/rejected": -0.6836386919021606, - "logps/chosen": -383.0931396484375, - "logps/rejected": -1453.9713134765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22044678032398224, - "rewards/margins": 30.86067008972168, - "rewards/rejected": -30.640222549438477, - "step": 1872 - }, - { - "epoch": 0.71, - "learning_rate": 4.165636830219776e-06, - "logits/chosen": -1.6057325601577759, - "logits/rejected": -4.691769123077393, - "logps/chosen": -194.26768493652344, - "logps/rejected": -333.9539794921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.132196068763733, - "rewards/margins": 19.61506462097168, - "rewards/rejected": -18.482868194580078, - "step": 1873 - }, - { - "epoch": 0.71, - "learning_rate": 4.155957971977038e-06, - "logits/chosen": -5.872292995452881, - "logits/rejected": -0.2876875400543213, - "logps/chosen": -265.7099304199219, - "logps/rejected": -1204.1829833984375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.33764344453811646, - "rewards/margins": 30.865543365478516, - "rewards/rejected": -31.20318603515625, - "step": 1874 - }, - { - "epoch": 0.71, - "learning_rate": 4.146287420879196e-06, - "logits/chosen": -6.900686740875244, - "logits/rejected": -2.9546597003936768, - "logps/chosen": -638.609619140625, - "logps/rejected": -3065.2958984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8307312726974487, - "rewards/margins": 64.17843627929688, - "rewards/rejected": -62.34770584106445, - "step": 1875 - }, - { - "epoch": 0.71, - "learning_rate": 4.136625190672664e-06, - "logits/chosen": -1.447135329246521, - "logits/rejected": -5.6490373611450195, - "logps/chosen": -369.4676513671875, - "logps/rejected": -279.0541076660156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0414246320724487, - "rewards/margins": 18.359764099121094, - "rewards/rejected": -17.318340301513672, - "step": 1876 - }, - { - "epoch": 0.71, - "learning_rate": 4.126971295092043e-06, - "logits/chosen": -0.7670559883117676, - "logits/rejected": -5.784207344055176, - "logps/chosen": -466.96826171875, - "logps/rejected": -239.92266845703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.2524750232696533, - "rewards/margins": 20.80296516418457, - "rewards/rejected": -17.55048942565918, - "step": 1877 - }, - { - "epoch": 0.71, - "learning_rate": 4.117325747860077e-06, - "logits/chosen": -0.9381347894668579, - "logits/rejected": -4.907911777496338, - "logps/chosen": -496.28411865234375, - "logps/rejected": -487.7701721191406, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9187836050987244, - "rewards/margins": 19.57477569580078, - "rewards/rejected": -20.493558883666992, - "step": 1878 - }, - { - "epoch": 0.71, - "learning_rate": 4.107688562687642e-06, - "logits/chosen": -3.6538240909576416, - "logits/rejected": -3.1013553142547607, - "logps/chosen": -153.9764404296875, - "logps/rejected": -532.9971923828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9012786746025085, - "rewards/margins": 28.18499755859375, - "rewards/rejected": -29.08627700805664, - "step": 1879 - }, - { - "epoch": 0.71, - "learning_rate": 4.098059753273738e-06, - "logits/chosen": -5.251749515533447, - "logits/rejected": -0.7987976670265198, - "logps/chosen": -469.29180908203125, - "logps/rejected": -1513.325927734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.2027955055236816, - "rewards/margins": 21.440174102783203, - "rewards/rejected": -23.642969131469727, - "step": 1880 - }, - { - "epoch": 0.71, - "learning_rate": 4.088439333305449e-06, - "logits/chosen": -7.72068977355957, - "logits/rejected": -0.3353556990623474, - "logps/chosen": -295.1380920410156, - "logps/rejected": -3700.294921875, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.27821657061576843, - "rewards/margins": 46.847015380859375, - "rewards/rejected": -46.56879806518555, - "step": 1881 - }, - { - "epoch": 0.71, - "learning_rate": 4.078827316457935e-06, - "logits/chosen": -4.284322738647461, - "logits/rejected": -0.011526787653565407, - "logps/chosen": -252.10487365722656, - "logps/rejected": -1514.22119140625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.0602707862854004, - "rewards/margins": 58.763179779052734, - "rewards/rejected": -60.82345199584961, - "step": 1882 - }, - { - "epoch": 0.71, - "learning_rate": 4.069223716394419e-06, - "logits/chosen": -2.6146857738494873, - "logits/rejected": -1.4723420143127441, - "logps/chosen": -306.7529296875, - "logps/rejected": -722.0062866210938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4107116758823395, - "rewards/margins": 18.5764217376709, - "rewards/rejected": -18.16571044921875, - "step": 1883 - }, - { - "epoch": 0.71, - "learning_rate": 4.059628546766149e-06, - "logits/chosen": -0.027659161016345024, - "logits/rejected": -4.654770851135254, - "logps/chosen": -409.6867980957031, - "logps/rejected": -295.5578918457031, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.2775421142578125, - "rewards/margins": 11.330633163452148, - "rewards/rejected": -16.60817527770996, - "step": 1884 - }, - { - "epoch": 0.71, - "learning_rate": 4.050041821212396e-06, - "logits/chosen": -2.913911819458008, - "logits/rejected": -0.3284571170806885, - "logps/chosen": -183.61758422851562, - "logps/rejected": -518.4000244140625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1854110956192017, - "rewards/margins": 15.988628387451172, - "rewards/rejected": -14.803216934204102, - "step": 1885 - }, - { - "epoch": 0.71, - "learning_rate": 4.040463553360431e-06, - "logits/chosen": -2.9917759895324707, - "logits/rejected": -0.1012149378657341, - "logps/chosen": -1236.669921875, - "logps/rejected": -2144.68994140625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -6.103247165679932, - "rewards/margins": 26.095338821411133, - "rewards/rejected": -32.198585510253906, - "step": 1886 - }, - { - "epoch": 0.71, - "learning_rate": 4.030893756825495e-06, - "logits/chosen": -0.21174019575119019, - "logits/rejected": -2.885315179824829, - "logps/chosen": -618.696044921875, - "logps/rejected": -797.1572265625, - "loss": 0.0061, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.48590087890625, - "rewards/margins": 42.8339729309082, - "rewards/rejected": -46.31987380981445, - "step": 1887 - }, - { - "epoch": 0.71, - "learning_rate": 4.021332445210785e-06, - "logits/chosen": -1.871829628944397, - "logits/rejected": -3.3413329124450684, - "logps/chosen": -491.9841003417969, - "logps/rejected": -787.98486328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3420257568359375, - "rewards/margins": 31.20559310913086, - "rewards/rejected": -32.5476188659668, - "step": 1888 - }, - { - "epoch": 0.71, - "learning_rate": 4.011779632107451e-06, - "logits/chosen": -3.239790916442871, - "logits/rejected": -1.954984426498413, - "logps/chosen": -268.3683166503906, - "logps/rejected": -402.6103515625, - "loss": 0.0045, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.675701856613159, - "rewards/margins": 3.3155887126922607, - "rewards/rejected": -6.99129056930542, - "step": 1889 - }, - { - "epoch": 0.71, - "learning_rate": 4.0022353310945474e-06, - "logits/chosen": -0.8209294080734253, - "logits/rejected": -1.2340625524520874, - "logps/chosen": -498.9014892578125, - "logps/rejected": -1033.2496337890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.056842088699341, - "rewards/margins": 41.04039001464844, - "rewards/rejected": -38.98354721069336, - "step": 1890 - }, - { - "epoch": 0.71, - "learning_rate": 3.992699555739041e-06, - "logits/chosen": -5.345739841461182, - "logits/rejected": -0.7509390115737915, - "logps/chosen": -840.8822021484375, - "logps/rejected": -3237.827392578125, - "loss": 0.0166, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.129705786705017, - "rewards/margins": 37.09258270263672, - "rewards/rejected": -38.2222900390625, - "step": 1891 - }, - { - "epoch": 0.72, - "learning_rate": 3.98317231959577e-06, - "logits/chosen": -1.6163121461868286, - "logits/rejected": -3.5501222610473633, - "logps/chosen": -189.12371826171875, - "logps/rejected": -100.49417114257812, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.066686987876892, - "rewards/margins": 6.252252578735352, - "rewards/rejected": -5.18556547164917, - "step": 1892 - }, - { - "epoch": 0.72, - "learning_rate": 3.973653636207437e-06, - "logits/chosen": -1.669709324836731, - "logits/rejected": -1.1210719347000122, - "logps/chosen": -401.30841064453125, - "logps/rejected": -486.6197814941406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.27362060546875, - "rewards/margins": 8.888083457946777, - "rewards/rejected": -13.161704063415527, - "step": 1893 - }, - { - "epoch": 0.72, - "learning_rate": 3.964143519104591e-06, - "logits/chosen": -1.1618629693984985, - "logits/rejected": -0.8376163840293884, - "logps/chosen": -271.17254638671875, - "logps/rejected": -687.3753051757812, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.7536942958831787, - "rewards/margins": 33.35224151611328, - "rewards/rejected": -36.105934143066406, - "step": 1894 - }, - { - "epoch": 0.72, - "learning_rate": 3.9546419818056045e-06, - "logits/chosen": -2.0462515354156494, - "logits/rejected": -2.011657238006592, - "logps/chosen": -191.4320831298828, - "logps/rejected": -464.0657958984375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.8037095069885254, - "rewards/margins": 22.17739486694336, - "rewards/rejected": -25.981103897094727, - "step": 1895 - }, - { - "epoch": 0.72, - "learning_rate": 3.945149037816644e-06, - "logits/chosen": -1.8713358640670776, - "logits/rejected": -2.5013177394866943, - "logps/chosen": -260.18212890625, - "logps/rejected": -369.5625305175781, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10663451999425888, - "rewards/margins": 12.217288970947266, - "rewards/rejected": -12.323923110961914, - "step": 1896 - }, - { - "epoch": 0.72, - "learning_rate": 3.935664700631679e-06, - "logits/chosen": 0.2614341974258423, - "logits/rejected": -5.710688591003418, - "logps/chosen": -441.7958984375, - "logps/rejected": -331.31036376953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.11700439453125, - "rewards/margins": 19.580068588256836, - "rewards/rejected": -19.463064193725586, - "step": 1897 - }, - { - "epoch": 0.72, - "learning_rate": 3.9261889837324245e-06, - "logits/chosen": -0.42079856991767883, - "logits/rejected": -2.7750072479248047, - "logps/chosen": -380.7184753417969, - "logps/rejected": -286.6494140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5660614371299744, - "rewards/margins": 13.137253761291504, - "rewards/rejected": -12.571192741394043, - "step": 1898 - }, - { - "epoch": 0.72, - "learning_rate": 3.9167219005883495e-06, - "logits/chosen": -3.153935194015503, - "logits/rejected": -1.1370530128479004, - "logps/chosen": -237.45358276367188, - "logps/rejected": -623.3911743164062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.377764940261841, - "rewards/margins": 16.138757705688477, - "rewards/rejected": -18.516523361206055, - "step": 1899 - }, - { - "epoch": 0.72, - "learning_rate": 3.907263464656662e-06, - "logits/chosen": -7.743753433227539, - "logits/rejected": -2.8455564975738525, - "logps/chosen": -286.8230285644531, - "logps/rejected": -2642.02587890625, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6095855832099915, - "rewards/margins": 53.93621826171875, - "rewards/rejected": -54.54580307006836, - "step": 1900 - }, - { - "epoch": 0.72, - "learning_rate": 3.897813689382262e-06, - "logits/chosen": -2.697617292404175, - "logits/rejected": -2.278156280517578, - "logps/chosen": -526.3107299804688, - "logps/rejected": -632.5075073242188, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.924951076507568, - "rewards/margins": 4.87825345993042, - "rewards/rejected": -10.803204536437988, - "step": 1901 - }, - { - "epoch": 0.72, - "learning_rate": 3.888372588197738e-06, - "logits/chosen": -1.393394112586975, - "logits/rejected": -2.1514506340026855, - "logps/chosen": -205.42420959472656, - "logps/rejected": -288.194091796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4945419430732727, - "rewards/margins": 9.207624435424805, - "rewards/rejected": -9.702166557312012, - "step": 1902 - }, - { - "epoch": 0.72, - "learning_rate": 3.878940174523371e-06, - "logits/chosen": -0.49766218662261963, - "logits/rejected": -5.330526351928711, - "logps/chosen": -406.71484375, - "logps/rejected": -131.280517578125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.899957299232483, - "rewards/margins": 6.921722888946533, - "rewards/rejected": -8.821680068969727, - "step": 1903 - }, - { - "epoch": 0.72, - "learning_rate": 3.86951646176707e-06, - "logits/chosen": -7.226356506347656, - "logits/rejected": -1.3621243238449097, - "logps/chosen": -247.6787109375, - "logps/rejected": -1706.3359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1874679327011108, - "rewards/margins": 22.11800193786621, - "rewards/rejected": -23.305469512939453, - "step": 1904 - }, - { - "epoch": 0.72, - "learning_rate": 3.860101463324379e-06, - "logits/chosen": -3.21982479095459, - "logits/rejected": -0.8485289812088013, - "logps/chosen": -292.1473693847656, - "logps/rejected": -588.8658447265625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.331658959388733, - "rewards/margins": 5.283502578735352, - "rewards/rejected": -6.615161418914795, - "step": 1905 - }, - { - "epoch": 0.72, - "learning_rate": 3.850695192578467e-06, - "logits/chosen": -0.09051563590765, - "logits/rejected": -0.3972039520740509, - "logps/chosen": -328.80859375, - "logps/rejected": -786.6580810546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9084380865097046, - "rewards/margins": 29.197153091430664, - "rewards/rejected": -31.1055908203125, - "step": 1906 - }, - { - "epoch": 0.72, - "learning_rate": 3.841297662900085e-06, - "logits/chosen": -0.5840457677841187, - "logits/rejected": -3.8307323455810547, - "logps/chosen": -331.16424560546875, - "logps/rejected": -144.6468048095703, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3629821836948395, - "rewards/margins": 11.4341402053833, - "rewards/rejected": -11.071158409118652, - "step": 1907 - }, - { - "epoch": 0.72, - "learning_rate": 3.8319088876475595e-06, - "logits/chosen": -0.5035596489906311, - "logits/rejected": -3.8853821754455566, - "logps/chosen": -219.20645141601562, - "logps/rejected": -196.85482788085938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.21144257485866547, - "rewards/margins": 11.847269058227539, - "rewards/rejected": -11.635826110839844, - "step": 1908 - }, - { - "epoch": 0.72, - "learning_rate": 3.822528880166783e-06, - "logits/chosen": -1.182187557220459, - "logits/rejected": -4.119121551513672, - "logps/chosen": -195.44696044921875, - "logps/rejected": -220.92652893066406, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.21604613959789276, - "rewards/margins": 10.081527709960938, - "rewards/rejected": -9.86548137664795, - "step": 1909 - }, - { - "epoch": 0.72, - "learning_rate": 3.813157653791171e-06, - "logits/chosen": -1.2607544660568237, - "logits/rejected": -4.906225204467773, - "logps/chosen": -817.327880859375, - "logps/rejected": -218.1146240234375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3628295958042145, - "rewards/margins": 14.957161903381348, - "rewards/rejected": -14.594332695007324, - "step": 1910 - }, - { - "epoch": 0.72, - "learning_rate": 3.8037952218416672e-06, - "logits/chosen": -1.3638169765472412, - "logits/rejected": -5.0447306632995605, - "logps/chosen": -558.4675903320312, - "logps/rejected": -425.79388427734375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 5.327404975891113, - "rewards/margins": 28.855865478515625, - "rewards/rejected": -23.528461456298828, - "step": 1911 - }, - { - "epoch": 0.72, - "learning_rate": 3.7944415976267147e-06, - "logits/chosen": -1.4999653100967407, - "logits/rejected": -2.895848035812378, - "logps/chosen": -220.618896484375, - "logps/rejected": -703.1197509765625, - "loss": 0.0044, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1392440795898438, - "rewards/margins": 36.04635238647461, - "rewards/rejected": -38.18559646606445, - "step": 1912 - }, - { - "epoch": 0.72, - "learning_rate": 3.785096794442229e-06, - "logits/chosen": -6.389041900634766, - "logits/rejected": -1.636228084564209, - "logps/chosen": -552.14404296875, - "logps/rejected": -1759.0462646484375, - "loss": 0.0178, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.279003858566284, - "rewards/margins": 15.19918155670166, - "rewards/rejected": -18.478185653686523, - "step": 1913 - }, - { - "epoch": 0.72, - "learning_rate": 3.775760825571587e-06, - "logits/chosen": -0.3765243589878082, - "logits/rejected": -4.158436298370361, - "logps/chosen": -302.96197509765625, - "logps/rejected": -143.88504028320312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5997284650802612, - "rewards/margins": 11.538044929504395, - "rewards/rejected": -9.938316345214844, - "step": 1914 - }, - { - "epoch": 0.72, - "learning_rate": 3.766433704285619e-06, - "logits/chosen": -5.711524486541748, - "logits/rejected": -2.337129831314087, - "logps/chosen": -319.3494567871094, - "logps/rejected": -1783.1112060546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1546266078948975, - "rewards/margins": 56.50458908081055, - "rewards/rejected": -54.3499641418457, - "step": 1915 - }, - { - "epoch": 0.72, - "learning_rate": 3.757115443842565e-06, - "logits/chosen": -2.8972363471984863, - "logits/rejected": -3.2136948108673096, - "logps/chosen": -302.1571960449219, - "logps/rejected": -697.2440185546875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03723449632525444, - "rewards/margins": 31.077096939086914, - "rewards/rejected": -31.11433219909668, - "step": 1916 - }, - { - "epoch": 0.72, - "learning_rate": 3.7478060574880805e-06, - "logits/chosen": -3.688323497772217, - "logits/rejected": 0.7562795877456665, - "logps/chosen": -671.256591796875, - "logps/rejected": -2268.345703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.19525146484375, - "rewards/margins": 51.142738342285156, - "rewards/rejected": -50.947486877441406, - "step": 1917 - }, - { - "epoch": 0.73, - "learning_rate": 3.7385055584552e-06, - "logits/chosen": -7.252142906188965, - "logits/rejected": -1.1561448574066162, - "logps/chosen": -321.529541015625, - "logps/rejected": -1951.6248779296875, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.411505222320557, - "rewards/margins": 19.451324462890625, - "rewards/rejected": -24.862829208374023, - "step": 1918 - }, - { - "epoch": 0.73, - "learning_rate": 3.729213959964323e-06, - "logits/chosen": -0.13725420832633972, - "logits/rejected": -2.4186527729034424, - "logps/chosen": -219.88548278808594, - "logps/rejected": -511.27069091796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.23739929497241974, - "rewards/margins": 32.819580078125, - "rewards/rejected": -33.05698013305664, - "step": 1919 - }, - { - "epoch": 0.73, - "learning_rate": 3.7199312752232053e-06, - "logits/chosen": -1.583250880241394, - "logits/rejected": -3.361196517944336, - "logps/chosen": -262.7091979980469, - "logps/rejected": -297.392578125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7253159284591675, - "rewards/margins": 12.778633117675781, - "rewards/rejected": -14.503949165344238, - "step": 1920 - }, - { - "epoch": 0.73, - "learning_rate": 3.71065751742693e-06, - "logits/chosen": -5.598901748657227, - "logits/rejected": -0.5883200764656067, - "logps/chosen": -266.4524230957031, - "logps/rejected": -2146.91650390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.37088319659233093, - "rewards/margins": 87.0943832397461, - "rewards/rejected": -87.46526336669922, - "step": 1921 - }, - { - "epoch": 0.73, - "learning_rate": 3.701392699757882e-06, - "logits/chosen": -0.12099386006593704, - "logits/rejected": -1.7704758644104004, - "logps/chosen": -535.2523193359375, - "logps/rejected": -1335.5556640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.20180664956569672, - "rewards/margins": 69.4649887084961, - "rewards/rejected": -69.26318359375, - "step": 1922 - }, - { - "epoch": 0.73, - "learning_rate": 3.6921368353857524e-06, - "logits/chosen": -5.791370868682861, - "logits/rejected": -2.803802728652954, - "logps/chosen": -176.12918090820312, - "logps/rejected": -1237.6522216796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.575714111328125, - "rewards/margins": 44.75620651245117, - "rewards/rejected": -44.18049240112305, - "step": 1923 - }, - { - "epoch": 0.73, - "learning_rate": 3.6828899374674933e-06, - "logits/chosen": -0.6377866268157959, - "logits/rejected": -3.280230760574341, - "logps/chosen": -605.7220458984375, - "logps/rejected": -370.99658203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.156103610992432, - "rewards/margins": 7.292028903961182, - "rewards/rejected": -11.448132514953613, - "step": 1924 - }, - { - "epoch": 0.73, - "learning_rate": 3.673652019147311e-06, - "logits/chosen": -3.35860276222229, - "logits/rejected": -1.340329885482788, - "logps/chosen": -208.54010009765625, - "logps/rejected": -776.8474731445312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.712268054485321, - "rewards/margins": 18.961565017700195, - "rewards/rejected": -19.6738338470459, - "step": 1925 - }, - { - "epoch": 0.73, - "learning_rate": 3.6644230935566614e-06, - "logits/chosen": -1.4614757299423218, - "logits/rejected": -1.181511640548706, - "logps/chosen": -661.7703247070312, - "logps/rejected": -1175.797119140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.614306628704071, - "rewards/margins": 31.255586624145508, - "rewards/rejected": -30.641279220581055, - "step": 1926 - }, - { - "epoch": 0.73, - "learning_rate": 3.6552031738142004e-06, - "logits/chosen": -0.44449254870414734, - "logits/rejected": -2.352458953857422, - "logps/chosen": -412.8030090332031, - "logps/rejected": -629.8385620117188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7032928466796875, - "rewards/margins": 29.816986083984375, - "rewards/rejected": -31.520278930664062, - "step": 1927 - }, - { - "epoch": 0.73, - "learning_rate": 3.645992273025797e-06, - "logits/chosen": -0.7768668532371521, - "logits/rejected": -3.003157377243042, - "logps/chosen": -319.5478820800781, - "logps/rejected": -218.9937744140625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.04881897196173668, - "rewards/margins": 6.578088760375977, - "rewards/rejected": -6.529269695281982, - "step": 1928 - }, - { - "epoch": 0.73, - "learning_rate": 3.6367904042844857e-06, - "logits/chosen": -2.3977346420288086, - "logits/rejected": -2.0377418994903564, - "logps/chosen": -237.2706298828125, - "logps/rejected": -873.2471923828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.641729712486267, - "rewards/margins": 49.5307731628418, - "rewards/rejected": -51.17250442504883, - "step": 1929 - }, - { - "epoch": 0.73, - "learning_rate": 3.6275975806704777e-06, - "logits/chosen": -2.0135254859924316, - "logits/rejected": -1.8572206497192383, - "logps/chosen": -311.0742492675781, - "logps/rejected": -518.755126953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.83917236328125, - "rewards/margins": 12.306536674499512, - "rewards/rejected": -15.145709037780762, - "step": 1930 - }, - { - "epoch": 0.73, - "learning_rate": 3.6184138152511107e-06, - "logits/chosen": -2.0143003463745117, - "logits/rejected": -1.1442662477493286, - "logps/chosen": -329.58526611328125, - "logps/rejected": -703.7149658203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.09734497219324112, - "rewards/margins": 25.145397186279297, - "rewards/rejected": -25.24274253845215, - "step": 1931 - }, - { - "epoch": 0.73, - "learning_rate": 3.609239121080862e-06, - "logits/chosen": -1.3431981801986694, - "logits/rejected": -4.4475932121276855, - "logps/chosen": -294.4691467285156, - "logps/rejected": -179.55010986328125, - "loss": 0.0193, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.50724196434021, - "rewards/margins": 8.518534660339355, - "rewards/rejected": -12.025776863098145, - "step": 1932 - }, - { - "epoch": 0.73, - "learning_rate": 3.6000735112012984e-06, - "logits/chosen": -3.4769818782806396, - "logits/rejected": -5.090476989746094, - "logps/chosen": -1061.9222412109375, - "logps/rejected": -952.4736328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8903687000274658, - "rewards/margins": 25.900453567504883, - "rewards/rejected": -24.01008415222168, - "step": 1933 - }, - { - "epoch": 0.73, - "learning_rate": 3.590916998641092e-06, - "logits/chosen": -7.718724250793457, - "logits/rejected": 0.5240508913993835, - "logps/chosen": -343.845458984375, - "logps/rejected": -2888.99951171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3704164028167725, - "rewards/margins": 23.215373992919922, - "rewards/rejected": -25.585790634155273, - "step": 1934 - }, - { - "epoch": 0.73, - "learning_rate": 3.5817695964159673e-06, - "logits/chosen": -0.7185004353523254, - "logits/rejected": -3.595306873321533, - "logps/chosen": -310.396728515625, - "logps/rejected": -464.76397705078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.783465564250946, - "rewards/margins": 32.497962951660156, - "rewards/rejected": -31.714496612548828, - "step": 1935 - }, - { - "epoch": 0.73, - "learning_rate": 3.572631317528703e-06, - "logits/chosen": -4.833860874176025, - "logits/rejected": -4.136399745941162, - "logps/chosen": -127.43238067626953, - "logps/rejected": -323.1672058105469, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4772392213344574, - "rewards/margins": 17.570199966430664, - "rewards/rejected": -18.047439575195312, - "step": 1936 - }, - { - "epoch": 0.73, - "learning_rate": 3.563502174969117e-06, - "logits/chosen": -6.033393859863281, - "logits/rejected": -1.247074842453003, - "logps/chosen": -543.2245483398438, - "logps/rejected": -3463.841552734375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7946716547012329, - "rewards/margins": 103.67453002929688, - "rewards/rejected": -102.8798599243164, - "step": 1937 - }, - { - "epoch": 0.73, - "learning_rate": 3.5543821817140313e-06, - "logits/chosen": -4.859708786010742, - "logits/rejected": -1.6156949996948242, - "logps/chosen": -281.4169616699219, - "logps/rejected": -791.7572021484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.853033423423767, - "rewards/margins": 16.30078887939453, - "rewards/rejected": -18.15382194519043, - "step": 1938 - }, - { - "epoch": 0.73, - "learning_rate": 3.545271350727257e-06, - "logits/chosen": -0.8216797113418579, - "logits/rejected": -4.0567216873168945, - "logps/chosen": -303.87933349609375, - "logps/rejected": -278.98284912109375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.571832299232483, - "rewards/margins": 12.799107551574707, - "rewards/rejected": -14.370940208435059, - "step": 1939 - }, - { - "epoch": 0.73, - "learning_rate": 3.5361696949596046e-06, - "logits/chosen": -5.827478885650635, - "logits/rejected": -3.4797661304473877, - "logps/chosen": -310.0741271972656, - "logps/rejected": -1119.40771484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.175229072570801, - "rewards/margins": 33.714447021484375, - "rewards/rejected": -37.88967514038086, - "step": 1940 - }, - { - "epoch": 0.73, - "learning_rate": 3.5270772273488206e-06, - "logits/chosen": -6.69118595123291, - "logits/rejected": -1.6526508331298828, - "logps/chosen": -417.9136962890625, - "logps/rejected": -2109.127197265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0408935546875, - "rewards/margins": 18.914697647094727, - "rewards/rejected": -21.955591201782227, - "step": 1941 - }, - { - "epoch": 0.73, - "learning_rate": 3.5179939608195935e-06, - "logits/chosen": -3.6409592628479004, - "logits/rejected": -1.1797510385513306, - "logps/chosen": -463.67218017578125, - "logps/rejected": -803.4417724609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7793670892715454, - "rewards/margins": 8.43139362335205, - "rewards/rejected": -10.210761070251465, - "step": 1942 - }, - { - "epoch": 0.73, - "learning_rate": 3.5089199082835436e-06, - "logits/chosen": -0.9062322974205017, - "logits/rejected": -3.571061849594116, - "logps/chosen": -328.7742919921875, - "logps/rejected": -268.7901916503906, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.332043409347534, - "rewards/margins": 10.476702690124512, - "rewards/rejected": -12.808746337890625, - "step": 1943 - }, - { - "epoch": 0.73, - "learning_rate": 3.499855082639183e-06, - "logits/chosen": -2.6902709007263184, - "logits/rejected": -3.9355685710906982, - "logps/chosen": -295.69085693359375, - "logps/rejected": -420.509765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.029425024986267, - "rewards/margins": 15.952789306640625, - "rewards/rejected": -16.982213973999023, - "step": 1944 - }, - { - "epoch": 0.74, - "learning_rate": 3.4907994967719096e-06, - "logits/chosen": -2.711946964263916, - "logits/rejected": -0.5526231527328491, - "logps/chosen": -296.6507568359375, - "logps/rejected": -876.1757202148438, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.039514183998108, - "rewards/margins": 31.395160675048828, - "rewards/rejected": -30.35564613342285, - "step": 1945 - }, - { - "epoch": 0.74, - "learning_rate": 3.4817531635539946e-06, - "logits/chosen": -1.6669038534164429, - "logits/rejected": -1.18873929977417, - "logps/chosen": -183.38406372070312, - "logps/rejected": -537.0557861328125, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.314386010169983, - "rewards/margins": 23.76983070373535, - "rewards/rejected": -22.4554443359375, - "step": 1946 - }, - { - "epoch": 0.74, - "learning_rate": 3.472716095844547e-06, - "logits/chosen": -2.4951462745666504, - "logits/rejected": -2.7500405311584473, - "logps/chosen": -533.13720703125, - "logps/rejected": -848.06396484375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.904095470905304, - "rewards/margins": 27.292707443237305, - "rewards/rejected": -28.196802139282227, - "step": 1947 - }, - { - "epoch": 0.74, - "learning_rate": 3.463688306489511e-06, - "logits/chosen": -3.071622371673584, - "logits/rejected": -3.4597480297088623, - "logps/chosen": -194.23109436035156, - "logps/rejected": -335.2392883300781, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.29619300365448, - "rewards/margins": 16.094594955444336, - "rewards/rejected": -17.39078712463379, - "step": 1948 - }, - { - "epoch": 0.74, - "learning_rate": 3.454669808321646e-06, - "logits/chosen": -0.9719797968864441, - "logits/rejected": -3.7119505405426025, - "logps/chosen": -256.125, - "logps/rejected": -417.7757873535156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.780285596847534, - "rewards/margins": 29.456783294677734, - "rewards/rejected": -26.676498413085938, - "step": 1949 - }, - { - "epoch": 0.74, - "learning_rate": 3.4456606141604932e-06, - "logits/chosen": -2.9277498722076416, - "logits/rejected": -3.8973472118377686, - "logps/chosen": -264.3463439941406, - "logps/rejected": -374.4630126953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6384826898574829, - "rewards/margins": 15.392843246459961, - "rewards/rejected": -16.031326293945312, - "step": 1950 - }, - { - "epoch": 0.74, - "learning_rate": 3.4366607368123727e-06, - "logits/chosen": -2.098634719848633, - "logits/rejected": -3.6689350605010986, - "logps/chosen": -296.8970947265625, - "logps/rejected": -759.4542236328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22776488959789276, - "rewards/margins": 36.18858337402344, - "rewards/rejected": -35.960819244384766, - "step": 1951 - }, - { - "epoch": 0.74, - "learning_rate": 3.427670189070369e-06, - "logits/chosen": -3.1938133239746094, - "logits/rejected": -5.531611919403076, - "logps/chosen": -391.9106140136719, - "logps/rejected": -716.1698608398438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.457022100687027, - "rewards/margins": 42.97883224487305, - "rewards/rejected": -43.435855865478516, - "step": 1952 - }, - { - "epoch": 0.74, - "learning_rate": 3.418688983714291e-06, - "logits/chosen": -6.342517375946045, - "logits/rejected": -1.8065346479415894, - "logps/chosen": -541.87646484375, - "logps/rejected": -2890.531005859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.402416944503784, - "rewards/margins": 73.3724594116211, - "rewards/rejected": -75.7748794555664, - "step": 1953 - }, - { - "epoch": 0.74, - "learning_rate": 3.409717133510683e-06, - "logits/chosen": -0.8601275682449341, - "logits/rejected": -2.584455728530884, - "logps/chosen": -511.11749267578125, - "logps/rejected": -595.477294921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7595916986465454, - "rewards/margins": 23.668148040771484, - "rewards/rejected": -25.4277400970459, - "step": 1954 - }, - { - "epoch": 0.74, - "learning_rate": 3.4007546512127764e-06, - "logits/chosen": -2.013322591781616, - "logits/rejected": -0.418317586183548, - "logps/chosen": -222.47317504882812, - "logps/rejected": -578.25341796875, - "loss": 0.0101, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.971447765827179, - "rewards/margins": 12.35247802734375, - "rewards/rejected": -13.323925971984863, - "step": 1955 - }, - { - "epoch": 0.74, - "learning_rate": 3.391801549560494e-06, - "logits/chosen": -6.984984397888184, - "logits/rejected": -1.1640088558197021, - "logps/chosen": -624.0125732421875, - "logps/rejected": -3123.228271484375, - "loss": 0.0084, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7216004133224487, - "rewards/margins": 30.767440795898438, - "rewards/rejected": -32.48904037475586, - "step": 1956 - }, - { - "epoch": 0.74, - "learning_rate": 3.3828578412804235e-06, - "logits/chosen": -2.6566929817199707, - "logits/rejected": -2.5833821296691895, - "logps/chosen": -680.1951904296875, - "logps/rejected": -979.44287109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7529785633087158, - "rewards/margins": 16.43165397644043, - "rewards/rejected": -14.678674697875977, - "step": 1957 - }, - { - "epoch": 0.74, - "learning_rate": 3.373923539085805e-06, - "logits/chosen": -1.1947472095489502, - "logits/rejected": -3.9334208965301514, - "logps/chosen": -249.66270446777344, - "logps/rejected": -266.03875732421875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8237137198448181, - "rewards/margins": 15.935993194580078, - "rewards/rejected": -16.759706497192383, - "step": 1958 - }, - { - "epoch": 0.74, - "learning_rate": 3.364998655676496e-06, - "logits/chosen": -1.2589771747589111, - "logits/rejected": -1.3771530389785767, - "logps/chosen": -252.15911865234375, - "logps/rejected": -680.5824584960938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04502105712890625, - "rewards/margins": 31.799144744873047, - "rewards/rejected": -31.844165802001953, - "step": 1959 - }, - { - "epoch": 0.74, - "learning_rate": 3.356083203738978e-06, - "logits/chosen": -3.7612974643707275, - "logits/rejected": -2.161297082901001, - "logps/chosen": -128.36166381835938, - "logps/rejected": -536.3567504882812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1175994873046875, - "rewards/margins": 19.752044677734375, - "rewards/rejected": -20.869644165039062, - "step": 1960 - }, - { - "epoch": 0.74, - "learning_rate": 3.3471771959463195e-06, - "logits/chosen": -0.5168268084526062, - "logits/rejected": -3.8441989421844482, - "logps/chosen": -177.83291625976562, - "logps/rejected": -133.9058837890625, - "loss": 0.0026, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.437924385070801, - "rewards/margins": 3.8741350173950195, - "rewards/rejected": -8.31205940246582, - "step": 1961 - }, - { - "epoch": 0.74, - "learning_rate": 3.338280644958162e-06, - "logits/chosen": -1.4293184280395508, - "logits/rejected": -4.036295413970947, - "logps/chosen": -245.1162109375, - "logps/rejected": -423.4963073730469, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3629273176193237, - "rewards/margins": 27.589460372924805, - "rewards/rejected": -28.9523868560791, - "step": 1962 - }, - { - "epoch": 0.74, - "learning_rate": 3.329393563420713e-06, - "logits/chosen": -4.762890815734863, - "logits/rejected": -2.0832901000976562, - "logps/chosen": -491.62579345703125, - "logps/rejected": -1812.5185546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.2598206996917725, - "rewards/margins": 63.79758834838867, - "rewards/rejected": -60.53776931762695, - "step": 1963 - }, - { - "epoch": 0.74, - "learning_rate": 3.3205159639667117e-06, - "logits/chosen": -3.9511189460754395, - "logits/rejected": -4.89788818359375, - "logps/chosen": -433.3775634765625, - "logps/rejected": -593.6815185546875, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.941735863685608, - "rewards/margins": 34.90850067138672, - "rewards/rejected": -32.966766357421875, - "step": 1964 - }, - { - "epoch": 0.74, - "learning_rate": 3.3116478592154177e-06, - "logits/chosen": -8.219178199768066, - "logits/rejected": -1.9404809474945068, - "logps/chosen": -277.7143859863281, - "logps/rejected": -2769.40869140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.279702752828598, - "rewards/margins": 59.257877349853516, - "rewards/rejected": -58.97817611694336, - "step": 1965 - }, - { - "epoch": 0.74, - "learning_rate": 3.302789261772601e-06, - "logits/chosen": -1.0536539554595947, - "logits/rejected": -0.3519790768623352, - "logps/chosen": -364.3785400390625, - "logps/rejected": -1508.6229248046875, - "loss": 0.004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4324249029159546, - "rewards/margins": 85.35549926757812, - "rewards/rejected": -83.9230728149414, - "step": 1966 - }, - { - "epoch": 0.74, - "learning_rate": 3.2939401842305187e-06, - "logits/chosen": -0.5823772549629211, - "logits/rejected": -4.383185386657715, - "logps/chosen": -645.4178466796875, - "logps/rejected": -778.810302734375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.978240966796875, - "rewards/margins": 42.14130783081055, - "rewards/rejected": -40.16306686401367, - "step": 1967 - }, - { - "epoch": 0.74, - "learning_rate": 3.285100639167883e-06, - "logits/chosen": -5.4707932472229, - "logits/rejected": -0.4193522334098816, - "logps/chosen": -404.59185791015625, - "logps/rejected": -2045.4925537109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.3280608654022217, - "rewards/margins": 47.96796417236328, - "rewards/rejected": -45.6399040222168, - "step": 1968 - }, - { - "epoch": 0.74, - "learning_rate": 3.2762706391498712e-06, - "logits/chosen": -3.8659543991088867, - "logits/rejected": -3.530654191970825, - "logps/chosen": -623.7296142578125, - "logps/rejected": -1232.587158203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.15587158501148224, - "rewards/margins": 28.429346084594727, - "rewards/rejected": -28.273473739624023, - "step": 1969 - }, - { - "epoch": 0.74, - "learning_rate": 3.2674501967280814e-06, - "logits/chosen": -3.4437315464019775, - "logits/rejected": -1.1659972667694092, - "logps/chosen": -292.40771484375, - "logps/rejected": -897.8687133789062, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.647467017173767, - "rewards/margins": 27.514686584472656, - "rewards/rejected": -29.162153244018555, - "step": 1970 - }, - { - "epoch": 0.75, - "learning_rate": 3.258639324440527e-06, - "logits/chosen": -0.6262006163597107, - "logits/rejected": -4.22484827041626, - "logps/chosen": -397.551513671875, - "logps/rejected": -288.19390869140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6826446652412415, - "rewards/margins": 10.095418930053711, - "rewards/rejected": -10.778063774108887, - "step": 1971 - }, - { - "epoch": 0.75, - "learning_rate": 3.2498380348116264e-06, - "logits/chosen": -1.7937144041061401, - "logits/rejected": -4.632750034332275, - "logps/chosen": -353.37298583984375, - "logps/rejected": -424.07177734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.551080346107483, - "rewards/margins": 21.90548324584961, - "rewards/rejected": -23.45656394958496, - "step": 1972 - }, - { - "epoch": 0.75, - "learning_rate": 3.2410463403521653e-06, - "logits/chosen": -0.6381181478500366, - "logits/rejected": -2.297272205352783, - "logps/chosen": -545.5927124023438, - "logps/rejected": -712.750732421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.778778076171875, - "rewards/margins": 25.582929611206055, - "rewards/rejected": -24.80415153503418, - "step": 1973 - }, - { - "epoch": 0.75, - "learning_rate": 3.2322642535592994e-06, - "logits/chosen": -0.7305485010147095, - "logits/rejected": -2.159433603286743, - "logps/chosen": -267.328369140625, - "logps/rejected": -500.19140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04729919508099556, - "rewards/margins": 19.83537483215332, - "rewards/rejected": -19.882673263549805, - "step": 1974 - }, - { - "epoch": 0.75, - "learning_rate": 3.2234917869165203e-06, - "logits/chosen": -7.125571250915527, - "logits/rejected": -0.9287546277046204, - "logps/chosen": -303.0826416015625, - "logps/rejected": -2235.83154296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.40118715167045593, - "rewards/margins": 31.696517944335938, - "rewards/rejected": -32.09770584106445, - "step": 1975 - }, - { - "epoch": 0.75, - "learning_rate": 3.214728952893649e-06, - "logits/chosen": -7.479025840759277, - "logits/rejected": -1.1966389417648315, - "logps/chosen": -211.61380004882812, - "logps/rejected": -2814.022216796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5735901594161987, - "rewards/margins": 69.17210388183594, - "rewards/rejected": -67.5985107421875, - "step": 1976 - }, - { - "epoch": 0.75, - "learning_rate": 3.205975763946815e-06, - "logits/chosen": -4.9675679206848145, - "logits/rejected": -1.689422845840454, - "logps/chosen": -263.867431640625, - "logps/rejected": -913.1878662109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3836853504180908, - "rewards/margins": 20.854631423950195, - "rewards/rejected": -22.238317489624023, - "step": 1977 - }, - { - "epoch": 0.75, - "learning_rate": 3.1972322325184347e-06, - "logits/chosen": -3.699289083480835, - "logits/rejected": -1.2464148998260498, - "logps/chosen": -717.2927856445312, - "logps/rejected": -1762.42578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02974243275821209, - "rewards/margins": 47.35717010498047, - "rewards/rejected": -47.38691329956055, - "step": 1978 - }, - { - "epoch": 0.75, - "learning_rate": 3.188498371037193e-06, - "logits/chosen": -2.5939748287200928, - "logits/rejected": -0.8289831876754761, - "logps/chosen": -391.87945556640625, - "logps/rejected": -893.62353515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.610650658607483, - "rewards/margins": 26.288654327392578, - "rewards/rejected": -27.89930534362793, - "step": 1979 - }, - { - "epoch": 0.75, - "learning_rate": 3.1797741919180403e-06, - "logits/chosen": -8.129655838012695, - "logits/rejected": -2.3382203578948975, - "logps/chosen": -237.22906494140625, - "logps/rejected": -3527.5537109375, - "loss": 0.0106, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5490814447402954, - "rewards/margins": 35.44554138183594, - "rewards/rejected": -34.896461486816406, - "step": 1980 - }, - { - "epoch": 0.75, - "learning_rate": 3.171059707562153e-06, - "logits/chosen": -0.9435599446296692, - "logits/rejected": -2.6697120666503906, - "logps/chosen": -262.7886962890625, - "logps/rejected": -615.4583129882812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.557623267173767, - "rewards/margins": 27.218067169189453, - "rewards/rejected": -25.660444259643555, - "step": 1981 - }, - { - "epoch": 0.75, - "learning_rate": 3.162354930356929e-06, - "logits/chosen": -1.1681488752365112, - "logits/rejected": -0.9773247241973877, - "logps/chosen": -246.91522216796875, - "logps/rejected": -849.9686889648438, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.321389764547348, - "rewards/margins": 41.18378829956055, - "rewards/rejected": -41.50517654418945, - "step": 1982 - }, - { - "epoch": 0.75, - "learning_rate": 3.1536598726759747e-06, - "logits/chosen": 0.8131954073905945, - "logits/rejected": -3.1843531131744385, - "logps/chosen": -540.0615234375, - "logps/rejected": -455.7146301269531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.337347388267517, - "rewards/margins": 21.746400833129883, - "rewards/rejected": -20.409053802490234, - "step": 1983 - }, - { - "epoch": 0.75, - "learning_rate": 3.144974546879069e-06, - "logits/chosen": -1.3644497394561768, - "logits/rejected": -4.797958850860596, - "logps/chosen": -459.6089172363281, - "logps/rejected": -282.7518310546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.5647430419921875, - "rewards/margins": 15.487735748291016, - "rewards/rejected": -19.052478790283203, - "step": 1984 - }, - { - "epoch": 0.75, - "learning_rate": 3.136298965312168e-06, - "logits/chosen": -3.237760305404663, - "logits/rejected": -2.361407518386841, - "logps/chosen": -196.21871948242188, - "logps/rejected": -716.7791137695312, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4062042236328125, - "rewards/margins": 29.975561141967773, - "rewards/rejected": -30.381765365600586, - "step": 1985 - }, - { - "epoch": 0.75, - "learning_rate": 3.1276331403073733e-06, - "logits/chosen": -0.48928433656692505, - "logits/rejected": -6.186308860778809, - "logps/chosen": -593.2872314453125, - "logps/rejected": -175.21951293945312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.579333543777466, - "rewards/margins": 9.809491157531738, - "rewards/rejected": -12.388824462890625, - "step": 1986 - }, - { - "epoch": 0.75, - "learning_rate": 3.1189770841829147e-06, - "logits/chosen": -8.187841415405273, - "logits/rejected": -1.6166013479232788, - "logps/chosen": -377.5539855957031, - "logps/rejected": -2386.034423828125, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7641022205352783, - "rewards/margins": 39.029136657714844, - "rewards/rejected": -40.79323959350586, - "step": 1987 - }, - { - "epoch": 0.75, - "learning_rate": 3.110330809243134e-06, - "logits/chosen": -1.1672779321670532, - "logits/rejected": -1.3173714876174927, - "logps/chosen": 0.0, - "logps/rejected": 0.0, - "loss": 0.0868, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 1988 - }, - { - "epoch": 0.75, - "learning_rate": 3.1016943277784806e-06, - "logits/chosen": -2.75417160987854, - "logits/rejected": -1.7161575555801392, - "logps/chosen": -416.29119873046875, - "logps/rejected": -733.9482421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7498779296875, - "rewards/margins": 12.91949462890625, - "rewards/rejected": -12.16961669921875, - "step": 1989 - }, - { - "epoch": 0.75, - "learning_rate": 3.093067652065468e-06, - "logits/chosen": -2.1742262840270996, - "logits/rejected": -3.5087194442749023, - "logps/chosen": -357.36346435546875, - "logps/rejected": -371.83160400390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7405213117599487, - "rewards/margins": 10.145233154296875, - "rewards/rejected": -8.404711723327637, - "step": 1990 - }, - { - "epoch": 0.75, - "learning_rate": 3.0844507943666834e-06, - "logits/chosen": -6.505568504333496, - "logits/rejected": -2.3073060512542725, - "logps/chosen": -254.13082885742188, - "logps/rejected": -1376.5711669921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5472748279571533, - "rewards/margins": 17.502370834350586, - "rewards/rejected": -19.049646377563477, - "step": 1991 - }, - { - "epoch": 0.75, - "learning_rate": 3.0758437669307516e-06, - "logits/chosen": -0.8908612728118896, - "logits/rejected": -3.104081392288208, - "logps/chosen": -321.334228515625, - "logps/rejected": -262.93743896484375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5535461902618408, - "rewards/margins": 14.176408767700195, - "rewards/rejected": -15.729954719543457, - "step": 1992 - }, - { - "epoch": 0.75, - "learning_rate": 3.0672465819923215e-06, - "logits/chosen": -2.1705431938171387, - "logits/rejected": -1.9874480962753296, - "logps/chosen": -166.4521484375, - "logps/rejected": -556.6241455078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.576942443847656, - "rewards/margins": 25.08367156982422, - "rewards/rejected": -29.660614013671875, - "step": 1993 - }, - { - "epoch": 0.75, - "learning_rate": 3.058659251772057e-06, - "logits/chosen": -3.2456886768341064, - "logits/rejected": -0.41667646169662476, - "logps/chosen": -659.5855102539062, - "logps/rejected": -1406.0029296875, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.533233642578125, - "rewards/margins": 29.442352294921875, - "rewards/rejected": -29.9755859375, - "step": 1994 - }, - { - "epoch": 0.75, - "learning_rate": 3.0500817884766155e-06, - "logits/chosen": -5.813564300537109, - "logits/rejected": -0.2984953224658966, - "logps/chosen": -354.71868896484375, - "logps/rejected": -2178.41796875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.381478875875473, - "rewards/margins": 51.85191345214844, - "rewards/rejected": -51.470436096191406, - "step": 1995 - }, - { - "epoch": 0.75, - "learning_rate": 3.041514204298619e-06, - "logits/chosen": -0.7901427745819092, - "logits/rejected": -4.955255508422852, - "logps/chosen": -476.66259765625, - "logps/rejected": -277.22613525390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.255603075027466, - "rewards/margins": 19.006547927856445, - "rewards/rejected": -16.750944137573242, - "step": 1996 - }, - { - "epoch": 0.76, - "learning_rate": 3.0329565114166592e-06, - "logits/chosen": -2.9474494457244873, - "logits/rejected": -1.8188451528549194, - "logps/chosen": -444.1932373046875, - "logps/rejected": -1037.400390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4030303955078125, - "rewards/margins": 34.83576583862305, - "rewards/rejected": -35.23879623413086, - "step": 1997 - }, - { - "epoch": 0.76, - "learning_rate": 3.0244087219952565e-06, - "logits/chosen": -7.004361152648926, - "logits/rejected": -1.6952112913131714, - "logps/chosen": -54.55963897705078, - "logps/rejected": -4046.4931640625, - "loss": 0.0853, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6823440790176392, - "rewards/margins": 44.207916259765625, - "rewards/rejected": -45.8902587890625, - "step": 1998 - }, - { - "epoch": 0.76, - "learning_rate": 3.0158708481848577e-06, - "logits/chosen": -0.47967955470085144, - "logits/rejected": -2.210216999053955, - "logps/chosen": -415.912353515625, - "logps/rejected": -731.31494140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1343291997909546, - "rewards/margins": 34.3548583984375, - "rewards/rejected": -35.48918914794922, - "step": 1999 - }, - { - "epoch": 0.76, - "learning_rate": 3.007342902121818e-06, - "logits/chosen": -4.02670955657959, - "logits/rejected": -1.9613860845565796, - "logps/chosen": -258.72808837890625, - "logps/rejected": -775.15087890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.699310302734375, - "rewards/margins": 30.04373550415039, - "rewards/rejected": -30.743045806884766, - "step": 2000 - }, - { - "epoch": 0.76, - "learning_rate": 2.9988248959283784e-06, - "logits/chosen": -1.3930522203445435, - "logits/rejected": -2.4089736938476562, - "logps/chosen": -240.02520751953125, - "logps/rejected": -592.1458740234375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3800766170024872, - "rewards/margins": 31.603368759155273, - "rewards/rejected": -31.223291397094727, - "step": 2001 - }, - { - "epoch": 0.76, - "learning_rate": 2.990316841712644e-06, - "logits/chosen": -3.974255323410034, - "logits/rejected": -3.8863532543182373, - "logps/chosen": -191.7623291015625, - "logps/rejected": -285.1455078125, - "loss": 0.01, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.5801880359649658, - "rewards/margins": 6.959619522094727, - "rewards/rejected": -8.539807319641113, - "step": 2002 - }, - { - "epoch": 0.76, - "learning_rate": 2.981818751568586e-06, - "logits/chosen": -1.7680442333221436, - "logits/rejected": -3.1939315795898438, - "logps/chosen": -256.24481201171875, - "logps/rejected": -541.628173828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.514605760574341, - "rewards/margins": 29.134437561035156, - "rewards/rejected": -26.619831085205078, - "step": 2003 - }, - { - "epoch": 0.76, - "learning_rate": 2.973330637576006e-06, - "logits/chosen": -2.9768922328948975, - "logits/rejected": -4.855275630950928, - "logps/chosen": -117.16426086425781, - "logps/rejected": -401.5535888671875, - "loss": 0.0081, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6444061398506165, - "rewards/margins": 24.35091781616211, - "rewards/rejected": -24.995323181152344, - "step": 2004 - }, - { - "epoch": 0.76, - "learning_rate": 2.964852511800519e-06, - "logits/chosen": -1.568827509880066, - "logits/rejected": -6.293356895446777, - "logps/chosen": -380.3908386230469, - "logps/rejected": -142.88070678710938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.114523410797119, - "rewards/margins": 8.460744857788086, - "rewards/rejected": -10.575268745422363, - "step": 2005 - }, - { - "epoch": 0.76, - "learning_rate": 2.956384386293555e-06, - "logits/chosen": -3.743525266647339, - "logits/rejected": -1.706878662109375, - "logps/chosen": -425.0890808105469, - "logps/rejected": -673.33544921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04042358323931694, - "rewards/margins": 21.65277671813965, - "rewards/rejected": -21.693201065063477, - "step": 2006 - }, - { - "epoch": 0.76, - "learning_rate": 2.9479262730923165e-06, - "logits/chosen": -7.467337131500244, - "logits/rejected": -0.9161192178726196, - "logps/chosen": -384.5560302734375, - "logps/rejected": -3562.44970703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1992768049240112, - "rewards/margins": 42.980525970458984, - "rewards/rejected": -41.78125, - "step": 2007 - }, - { - "epoch": 0.76, - "learning_rate": 2.939478184219777e-06, - "logits/chosen": -3.8015987873077393, - "logits/rejected": -3.355889081954956, - "logps/chosen": -394.20477294921875, - "logps/rejected": -1010.9254150390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.749957323074341, - "rewards/margins": 42.421199798583984, - "rewards/rejected": -38.671241760253906, - "step": 2008 - }, - { - "epoch": 0.76, - "learning_rate": 2.931040131684667e-06, - "logits/chosen": -2.0400683879852295, - "logits/rejected": -3.7588469982147217, - "logps/chosen": -389.4221496582031, - "logps/rejected": -489.8900451660156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0412018299102783, - "rewards/margins": 15.9058837890625, - "rewards/rejected": -13.8646821975708, - "step": 2009 - }, - { - "epoch": 0.76, - "learning_rate": 2.922612127481441e-06, - "logits/chosen": -0.374993234872818, - "logits/rejected": -1.145391583442688, - "logps/chosen": -422.76971435546875, - "logps/rejected": -653.828369140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.026440382003784, - "rewards/margins": 20.881216049194336, - "rewards/rejected": -18.85477638244629, - "step": 2010 - }, - { - "epoch": 0.76, - "learning_rate": 2.9141941835902796e-06, - "logits/chosen": -0.39807677268981934, - "logits/rejected": -2.279176950454712, - "logps/chosen": -536.6282348632812, - "logps/rejected": -753.9058837890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.3032898008823395, - "rewards/margins": 36.831268310546875, - "rewards/rejected": -37.134559631347656, - "step": 2011 - }, - { - "epoch": 0.76, - "learning_rate": 2.905786311977055e-06, - "logits/chosen": -6.299012184143066, - "logits/rejected": -1.7033475637435913, - "logps/chosen": -437.4158935546875, - "logps/rejected": -3179.497314453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5221344232559204, - "rewards/margins": 56.7895393371582, - "rewards/rejected": -56.26740646362305, - "step": 2012 - }, - { - "epoch": 0.76, - "learning_rate": 2.8973885245933287e-06, - "logits/chosen": -6.501037120819092, - "logits/rejected": -1.474788784980774, - "logps/chosen": -286.69976806640625, - "logps/rejected": -1176.1138916015625, - "loss": 0.0032, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1980469226837158, - "rewards/margins": 40.33083724975586, - "rewards/rejected": -39.132789611816406, - "step": 2013 - }, - { - "epoch": 0.76, - "learning_rate": 2.8890008333763187e-06, - "logits/chosen": -5.516674518585205, - "logits/rejected": -0.6548682451248169, - "logps/chosen": -590.4139404296875, - "logps/rejected": -1767.0103759765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8738464713096619, - "rewards/margins": 22.80866241455078, - "rewards/rejected": -23.68250846862793, - "step": 2014 - }, - { - "epoch": 0.76, - "learning_rate": 2.880623250248904e-06, - "logits/chosen": -7.02183198928833, - "logits/rejected": -2.6653809547424316, - "logps/chosen": -234.41201782226562, - "logps/rejected": -1027.01806640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.000788927078247, - "rewards/margins": 23.32133674621582, - "rewards/rejected": -25.322126388549805, - "step": 2015 - }, - { - "epoch": 0.76, - "learning_rate": 2.8722557871195822e-06, - "logits/chosen": -0.5747987627983093, - "logits/rejected": -4.0804362297058105, - "logps/chosen": -394.87249755859375, - "logps/rejected": -641.5091552734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2996124029159546, - "rewards/margins": 29.397281646728516, - "rewards/rejected": -28.09766960144043, - "step": 2016 - }, - { - "epoch": 0.76, - "learning_rate": 2.8638984558824777e-06, - "logits/chosen": -3.264230251312256, - "logits/rejected": -3.084855556488037, - "logps/chosen": -489.6361083984375, - "logps/rejected": -992.9924926757812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6326660513877869, - "rewards/margins": 40.781185150146484, - "rewards/rejected": -40.14851760864258, - "step": 2017 - }, - { - "epoch": 0.76, - "learning_rate": 2.855551268417305e-06, - "logits/chosen": -1.2915282249450684, - "logits/rejected": -3.2898330688476562, - "logps/chosen": -442.0768737792969, - "logps/rejected": -596.450927734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8503143191337585, - "rewards/margins": 39.193336486816406, - "rewards/rejected": -40.04365158081055, - "step": 2018 - }, - { - "epoch": 0.76, - "learning_rate": 2.847214236589356e-06, - "logits/chosen": -6.574177265167236, - "logits/rejected": -1.8633754253387451, - "logps/chosen": -184.15567016601562, - "logps/rejected": -1077.659912109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.13533936440944672, - "rewards/margins": 25.224695205688477, - "rewards/rejected": -25.08935546875, - "step": 2019 - }, - { - "epoch": 0.76, - "learning_rate": 2.838887372249499e-06, - "logits/chosen": -2.685364007949829, - "logits/rejected": -0.9737975001335144, - "logps/chosen": -278.7205505371094, - "logps/rejected": -632.0498657226562, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7469482421875, - "rewards/margins": 16.769601821899414, - "rewards/rejected": -17.516550064086914, - "step": 2020 - }, - { - "epoch": 0.76, - "learning_rate": 2.830570687234138e-06, - "logits/chosen": -0.7924260497093201, - "logits/rejected": -2.9502906799316406, - "logps/chosen": -224.30636596679688, - "logps/rejected": -511.43109130859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.354669213294983, - "rewards/margins": 26.434253692626953, - "rewards/rejected": -27.788923263549805, - "step": 2021 - }, - { - "epoch": 0.76, - "learning_rate": 2.822264193365212e-06, - "logits/chosen": -0.36900874972343445, - "logits/rejected": -1.7771260738372803, - "logps/chosen": -532.9525756835938, - "logps/rejected": -820.81787109375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.944244384765625, - "rewards/margins": 33.31663513183594, - "rewards/rejected": -32.37239074707031, - "step": 2022 - }, - { - "epoch": 0.76, - "learning_rate": 2.813967902450179e-06, - "logits/chosen": -1.9640469551086426, - "logits/rejected": -2.489625930786133, - "logps/chosen": -186.57186889648438, - "logps/rejected": -376.9082946777344, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1401230096817017, - "rewards/margins": 14.288834571838379, - "rewards/rejected": -15.42895793914795, - "step": 2023 - }, - { - "epoch": 0.77, - "learning_rate": 2.805681826281983e-06, - "logits/chosen": -2.362490177154541, - "logits/rejected": -2.5784265995025635, - "logps/chosen": -134.09954833984375, - "logps/rejected": -422.6541442871094, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.31147003173828125, - "rewards/margins": 20.61512565612793, - "rewards/rejected": -20.30365562438965, - "step": 2024 - }, - { - "epoch": 0.77, - "learning_rate": 2.7974059766390527e-06, - "logits/chosen": -0.5146480798721313, - "logits/rejected": -3.1430628299713135, - "logps/chosen": -432.2975158691406, - "logps/rejected": -421.2666015625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7590911984443665, - "rewards/margins": 23.947439193725586, - "rewards/rejected": -23.1883487701416, - "step": 2025 - }, - { - "epoch": 0.77, - "learning_rate": 2.7891403652852844e-06, - "logits/chosen": -1.3936281204223633, - "logits/rejected": -0.9446288347244263, - "logps/chosen": -459.7361755371094, - "logps/rejected": -1011.3463134765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.8215057849884033, - "rewards/margins": 37.11551284790039, - "rewards/rejected": -40.93701934814453, - "step": 2026 - }, - { - "epoch": 0.77, - "learning_rate": 2.780885003970012e-06, - "logits/chosen": -3.8202178478240967, - "logits/rejected": -1.9701790809631348, - "logps/chosen": -222.71226501464844, - "logps/rejected": -1257.3052978515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.458273410797119, - "rewards/margins": 53.29907989501953, - "rewards/rejected": -55.757354736328125, - "step": 2027 - }, - { - "epoch": 0.77, - "learning_rate": 2.7726399044280107e-06, - "logits/chosen": -5.620321750640869, - "logits/rejected": -2.025125741958618, - "logps/chosen": -326.1385192871094, - "logps/rejected": -980.8718872070312, - "loss": 0.0172, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2313995361328125, - "rewards/margins": 20.845951080322266, - "rewards/rejected": -20.614551544189453, - "step": 2028 - }, - { - "epoch": 0.77, - "learning_rate": 2.7644050783794586e-06, - "logits/chosen": -5.812915325164795, - "logits/rejected": -1.2055566310882568, - "logps/chosen": -315.9110107421875, - "logps/rejected": -1311.798583984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10217895358800888, - "rewards/margins": 37.41019821166992, - "rewards/rejected": -37.51237869262695, - "step": 2029 - }, - { - "epoch": 0.77, - "learning_rate": 2.756180537529932e-06, - "logits/chosen": -5.249933242797852, - "logits/rejected": -0.910394012928009, - "logps/chosen": -337.6950988769531, - "logps/rejected": -1436.51123046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4274139404296875, - "rewards/margins": 29.475845336914062, - "rewards/rejected": -32.90325927734375, - "step": 2030 - }, - { - "epoch": 0.77, - "learning_rate": 2.747966293570392e-06, - "logits/chosen": -1.822432279586792, - "logits/rejected": -1.815358281135559, - "logps/chosen": -309.8640441894531, - "logps/rejected": -555.0737915039062, - "loss": 0.011, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8041350841522217, - "rewards/margins": 21.53704261779785, - "rewards/rejected": -24.341176986694336, - "step": 2031 - }, - { - "epoch": 0.77, - "learning_rate": 2.7397623581771638e-06, - "logits/chosen": -0.41081011295318604, - "logits/rejected": -5.995853900909424, - "logps/chosen": -340.83154296875, - "logps/rejected": -179.6573944091797, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7691497802734375, - "rewards/margins": 11.323176383972168, - "rewards/rejected": -12.092326164245605, - "step": 2032 - }, - { - "epoch": 0.77, - "learning_rate": 2.7315687430119097e-06, - "logits/chosen": -1.7741508483886719, - "logits/rejected": -3.3042891025543213, - "logps/chosen": -239.40286254882812, - "logps/rejected": -531.3536376953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7134796380996704, - "rewards/margins": 33.50762939453125, - "rewards/rejected": -32.794151306152344, - "step": 2033 - }, - { - "epoch": 0.77, - "learning_rate": 2.7233854597216335e-06, - "logits/chosen": -3.1402902603149414, - "logits/rejected": -3.2154622077941895, - "logps/chosen": -375.9430236816406, - "logps/rejected": -746.2958984375, - "loss": 0.0062, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.24636231362819672, - "rewards/margins": 35.305694580078125, - "rewards/rejected": -35.05933380126953, - "step": 2034 - }, - { - "epoch": 0.77, - "learning_rate": 2.715212519938646e-06, - "logits/chosen": -3.3113653659820557, - "logits/rejected": -2.971541404724121, - "logps/chosen": -359.7942199707031, - "logps/rejected": -797.9228515625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.936383068561554, - "rewards/margins": 30.212535858154297, - "rewards/rejected": -29.276153564453125, - "step": 2035 - }, - { - "epoch": 0.77, - "learning_rate": 2.70704993528055e-06, - "logits/chosen": -3.1282899379730225, - "logits/rejected": -0.7518507838249207, - "logps/chosen": -126.20652770996094, - "logps/rejected": -220.43490600585938, - "loss": 0.034, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3420631885528564, - "rewards/margins": 1.1633050441741943, - "rewards/rejected": -4.505368232727051, - "step": 2036 - }, - { - "epoch": 0.77, - "learning_rate": 2.698897717350243e-06, - "logits/chosen": -6.010307788848877, - "logits/rejected": -2.1820755004882812, - "logps/chosen": -366.1480712890625, - "logps/rejected": -958.127685546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7902893424034119, - "rewards/margins": 11.711431503295898, - "rewards/rejected": -10.921142578125, - "step": 2037 - }, - { - "epoch": 0.77, - "learning_rate": 2.6907558777358756e-06, - "logits/chosen": -2.2156782150268555, - "logits/rejected": -1.2777655124664307, - "logps/chosen": -256.03076171875, - "logps/rejected": -794.8314208984375, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.081201195716858, - "rewards/margins": 40.45905685424805, - "rewards/rejected": -39.37785720825195, - "step": 2038 - }, - { - "epoch": 0.77, - "learning_rate": 2.6826244280108438e-06, - "logits/chosen": -1.7276028394699097, - "logits/rejected": -2.8986222743988037, - "logps/chosen": -180.48876953125, - "logps/rejected": -444.9201965332031, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7690460085868835, - "rewards/margins": 23.378097534179688, - "rewards/rejected": -24.147144317626953, - "step": 2039 - }, - { - "epoch": 0.77, - "learning_rate": 2.674503379733785e-06, - "logits/chosen": -0.46371719241142273, - "logits/rejected": -1.241154432296753, - "logps/chosen": -384.1119689941406, - "logps/rejected": -816.9044189453125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3980560302734375, - "rewards/margins": 34.147518157958984, - "rewards/rejected": -32.74946212768555, - "step": 2040 - }, - { - "epoch": 0.77, - "learning_rate": 2.6663927444485484e-06, - "logits/chosen": -1.8364356756210327, - "logits/rejected": -1.6729893684387207, - "logps/chosen": -505.0125732421875, - "logps/rejected": -749.2285766601562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.489605665206909, - "rewards/margins": 7.835138320922852, - "rewards/rejected": -11.32474422454834, - "step": 2041 - }, - { - "epoch": 0.77, - "learning_rate": 2.6582925336841705e-06, - "logits/chosen": -3.0334396362304688, - "logits/rejected": -0.691089391708374, - "logps/chosen": -348.38262939453125, - "logps/rejected": -785.8795166015625, - "loss": 0.0685, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9363159537315369, - "rewards/margins": 17.21339225769043, - "rewards/rejected": -18.149707794189453, - "step": 2042 - }, - { - "epoch": 0.77, - "learning_rate": 2.650202758954886e-06, - "logits/chosen": -4.81291389465332, - "logits/rejected": -1.4612491130828857, - "logps/chosen": -229.08766174316406, - "logps/rejected": -1041.6552734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0428680181503296, - "rewards/margins": 29.237422943115234, - "rewards/rejected": -28.194555282592773, - "step": 2043 - }, - { - "epoch": 0.77, - "learning_rate": 2.6421234317600842e-06, - "logits/chosen": -3.873335361480713, - "logits/rejected": -3.7991931438446045, - "logps/chosen": -184.97097778320312, - "logps/rejected": -330.2119140625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.867077648639679, - "rewards/margins": 7.052221775054932, - "rewards/rejected": -7.919299602508545, - "step": 2044 - }, - { - "epoch": 0.77, - "learning_rate": 2.6340545635843027e-06, - "logits/chosen": -2.327038288116455, - "logits/rejected": -2.3345303535461426, - "logps/chosen": -497.7186584472656, - "logps/rejected": -951.0357055664062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5139068961143494, - "rewards/margins": 39.35649490356445, - "rewards/rejected": -39.87040328979492, - "step": 2045 - }, - { - "epoch": 0.77, - "learning_rate": 2.625996165897222e-06, - "logits/chosen": -1.1922624111175537, - "logits/rejected": -1.9903278350830078, - "logps/chosen": -441.71038818359375, - "logps/rejected": -604.1984252929688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.870886206626892, - "rewards/margins": 22.536840438842773, - "rewards/rejected": -20.66595458984375, - "step": 2046 - }, - { - "epoch": 0.77, - "learning_rate": 2.617948250153626e-06, - "logits/chosen": -2.3742737770080566, - "logits/rejected": -2.3687663078308105, - "logps/chosen": -379.116455078125, - "logps/rejected": -725.3763427734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.555511474609375, - "rewards/margins": 23.526716232299805, - "rewards/rejected": -19.97120475769043, - "step": 2047 - }, - { - "epoch": 0.77, - "learning_rate": 2.6099108277934105e-06, - "logits/chosen": -1.859297752380371, - "logits/rejected": -3.9424850940704346, - "logps/chosen": -328.05255126953125, - "logps/rejected": -610.9749755859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.715441882610321, - "rewards/margins": 31.840879440307617, - "rewards/rejected": -31.125436782836914, - "step": 2048 - }, - { - "epoch": 0.77, - "learning_rate": 2.601883910241546e-06, - "logits/chosen": -6.1048736572265625, - "logits/rejected": -1.5840675830841064, - "logps/chosen": -509.14947509765625, - "logps/rejected": -1804.411376953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.550488233566284, - "rewards/margins": 50.1854362487793, - "rewards/rejected": -47.63494873046875, - "step": 2049 - }, - { - "epoch": 0.78, - "learning_rate": 2.59386750890808e-06, - "logits/chosen": -0.20739960670471191, - "logits/rejected": -0.7913656234741211, - "logps/chosen": -288.64630126953125, - "logps/rejected": -648.9595947265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.661700427532196, - "rewards/margins": 29.01542854309082, - "rewards/rejected": -29.6771297454834, - "step": 2050 - }, - { - "epoch": 0.78, - "learning_rate": 2.5858616351881006e-06, - "logits/chosen": -4.049881458282471, - "logits/rejected": -3.958077907562256, - "logps/chosen": -412.56683349609375, - "logps/rejected": -745.994873046875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7388855218887329, - "rewards/margins": 25.8687686920166, - "rewards/rejected": -26.607654571533203, - "step": 2051 - }, - { - "epoch": 0.78, - "learning_rate": 2.577866300461743e-06, - "logits/chosen": -6.387950897216797, - "logits/rejected": -2.0488524436950684, - "logps/chosen": -951.4249267578125, - "logps/rejected": -3921.2451171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.79339599609375, - "rewards/margins": 65.42806243896484, - "rewards/rejected": -64.6346664428711, - "step": 2052 - }, - { - "epoch": 0.78, - "learning_rate": 2.5698815160941494e-06, - "logits/chosen": -0.01761952042579651, - "logits/rejected": -3.5805957317352295, - "logps/chosen": -336.976806640625, - "logps/rejected": -209.67578125, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0784577131271362, - "rewards/margins": 14.503808975219727, - "rewards/rejected": -13.4253511428833, - "step": 2053 - }, - { - "epoch": 0.78, - "learning_rate": 2.5619072934354762e-06, - "logits/chosen": 0.8075355291366577, - "logits/rejected": -2.2398972511291504, - "logps/chosen": -360.4146728515625, - "logps/rejected": -218.24307250976562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.054962158203125, - "rewards/margins": 12.73265552520752, - "rewards/rejected": -10.677693367004395, - "step": 2054 - }, - { - "epoch": 0.78, - "learning_rate": 2.55394364382086e-06, - "logits/chosen": -0.9257726073265076, - "logits/rejected": -0.7327700853347778, - "logps/chosen": -646.819091796875, - "logps/rejected": -1023.4608764648438, - "loss": 0.0871, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2682251036167145, - "rewards/margins": 23.0772705078125, - "rewards/rejected": -22.809045791625977, - "step": 2055 - }, - { - "epoch": 0.78, - "learning_rate": 2.545990578570404e-06, - "logits/chosen": -5.299840927124023, - "logits/rejected": -6.1605072021484375, - "logps/chosen": -646.0560302734375, - "logps/rejected": -452.24688720703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.574090600013733, - "rewards/margins": 11.28505802154541, - "rewards/rejected": -12.859148979187012, - "step": 2056 - }, - { - "epoch": 0.78, - "learning_rate": 2.5380481089891804e-06, - "logits/chosen": -7.215548515319824, - "logits/rejected": -1.7240957021713257, - "logps/chosen": -264.21234130859375, - "logps/rejected": -2093.140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7918243408203125, - "rewards/margins": 22.25375747680664, - "rewards/rejected": -23.045581817626953, - "step": 2057 - }, - { - "epoch": 0.78, - "learning_rate": 2.5301162463671845e-06, - "logits/chosen": -6.358536720275879, - "logits/rejected": -0.1276591271162033, - "logps/chosen": -304.137939453125, - "logps/rejected": -2231.1845703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.12906494736671448, - "rewards/margins": 65.20751190185547, - "rewards/rejected": -65.07844543457031, - "step": 2058 - }, - { - "epoch": 0.78, - "learning_rate": 2.522195001979343e-06, - "logits/chosen": -7.337869167327881, - "logits/rejected": -1.9543687105178833, - "logps/chosen": -325.6902770996094, - "logps/rejected": -4498.06640625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.178955078125, - "rewards/margins": 42.6961669921875, - "rewards/rejected": -42.8751220703125, - "step": 2059 - }, - { - "epoch": 0.78, - "learning_rate": 2.514284387085488e-06, - "logits/chosen": -0.438471257686615, - "logits/rejected": -4.0801286697387695, - "logps/chosen": -348.87933349609375, - "logps/rejected": -317.67059326171875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.032641887664795, - "rewards/margins": 13.941400527954102, - "rewards/rejected": -17.974042892456055, - "step": 2060 - }, - { - "epoch": 0.78, - "learning_rate": 2.5063844129303416e-06, - "logits/chosen": -2.4201149940490723, - "logits/rejected": -3.9810776710510254, - "logps/chosen": -437.5386962890625, - "logps/rejected": -682.4054565429688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6814484000205994, - "rewards/margins": 40.3072395324707, - "rewards/rejected": -39.625789642333984, - "step": 2061 - }, - { - "epoch": 0.78, - "learning_rate": 2.4984950907434934e-06, - "logits/chosen": -2.7039170265197754, - "logits/rejected": -1.9713698625564575, - "logps/chosen": -282.74560546875, - "logps/rejected": -484.2218933105469, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.1354217529296875, - "rewards/margins": 11.296716690063477, - "rewards/rejected": -16.432138442993164, - "step": 2062 - }, - { - "epoch": 0.78, - "learning_rate": 2.4906164317394067e-06, - "logits/chosen": -1.9408841133117676, - "logits/rejected": -4.331515789031982, - "logps/chosen": -257.49407958984375, - "logps/rejected": -263.1289367675781, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8274993896484375, - "rewards/margins": 13.434313774108887, - "rewards/rejected": -15.261813163757324, - "step": 2063 - }, - { - "epoch": 0.78, - "learning_rate": 2.4827484471173726e-06, - "logits/chosen": -1.168167233467102, - "logits/rejected": -3.131889581680298, - "logps/chosen": -231.61526489257812, - "logps/rejected": -416.2285461425781, - "loss": 0.0038, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6943618655204773, - "rewards/margins": 16.00962257385254, - "rewards/rejected": -16.7039852142334, - "step": 2064 - }, - { - "epoch": 0.78, - "learning_rate": 2.4748911480615135e-06, - "logits/chosen": -4.952648639678955, - "logits/rejected": -0.885107159614563, - "logps/chosen": -506.8075256347656, - "logps/rejected": -1059.7216796875, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.0172791481018066, - "rewards/margins": 7.030640125274658, - "rewards/rejected": -10.047919273376465, - "step": 2065 - }, - { - "epoch": 0.78, - "learning_rate": 2.467044545740769e-06, - "logits/chosen": -5.223948955535889, - "logits/rejected": -2.0807385444641113, - "logps/chosen": -432.38861083984375, - "logps/rejected": -1526.1624755859375, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3236329555511475, - "rewards/margins": 55.35053253173828, - "rewards/rejected": -58.674163818359375, - "step": 2066 - }, - { - "epoch": 0.78, - "learning_rate": 2.459208651308862e-06, - "logits/chosen": -3.1898393630981445, - "logits/rejected": -1.225903868675232, - "logps/chosen": -225.53012084960938, - "logps/rejected": -592.3330688476562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2988662719726562, - "rewards/margins": 12.991917610168457, - "rewards/rejected": -14.290783882141113, - "step": 2067 - }, - { - "epoch": 0.78, - "learning_rate": 2.451383475904304e-06, - "logits/chosen": -6.76452112197876, - "logits/rejected": -3.5993845462799072, - "logps/chosen": -298.91485595703125, - "logps/rejected": -2544.089599609375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.284036248922348, - "rewards/margins": 34.822731018066406, - "rewards/rejected": -34.5386962890625, - "step": 2068 - }, - { - "epoch": 0.78, - "learning_rate": 2.44356903065037e-06, - "logits/chosen": -3.5402581691741943, - "logits/rejected": -0.8540423512458801, - "logps/chosen": -419.29681396484375, - "logps/rejected": -1718.15234375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.15902100503444672, - "rewards/margins": 77.59398651123047, - "rewards/rejected": -77.75300598144531, - "step": 2069 - }, - { - "epoch": 0.78, - "learning_rate": 2.435765326655073e-06, - "logits/chosen": -3.091068983078003, - "logits/rejected": -4.4417548179626465, - "logps/chosen": -278.393798828125, - "logps/rejected": -490.9779357910156, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.3290956020355225, - "rewards/margins": 20.154335021972656, - "rewards/rejected": -17.825239181518555, - "step": 2070 - }, - { - "epoch": 0.78, - "learning_rate": 2.4279723750111683e-06, - "logits/chosen": -6.83774471282959, - "logits/rejected": -2.433586359024048, - "logps/chosen": -284.07977294921875, - "logps/rejected": -953.8971557617188, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6282623410224915, - "rewards/margins": 6.273221015930176, - "rewards/rejected": -5.64495849609375, - "step": 2071 - }, - { - "epoch": 0.78, - "learning_rate": 2.42019018679612e-06, - "logits/chosen": -4.192531108856201, - "logits/rejected": -3.4977006912231445, - "logps/chosen": -454.96612548828125, - "logps/rejected": -1175.24072265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0355132818222046, - "rewards/margins": 34.15941619873047, - "rewards/rejected": -33.1239013671875, - "step": 2072 - }, - { - "epoch": 0.78, - "learning_rate": 2.4124187730720916e-06, - "logits/chosen": -0.55638587474823, - "logits/rejected": -0.714698076248169, - "logps/chosen": -172.88299560546875, - "logps/rejected": -408.40631103515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.241259813308716, - "rewards/margins": 17.877904891967773, - "rewards/rejected": -14.636645317077637, - "step": 2073 - }, - { - "epoch": 0.78, - "learning_rate": 2.4046581448859387e-06, - "logits/chosen": -4.652604579925537, - "logits/rejected": -4.0589375495910645, - "logps/chosen": -167.32069396972656, - "logps/rejected": -608.486572265625, - "loss": 0.0009, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9200973510742188, - "rewards/margins": 31.172725677490234, - "rewards/rejected": -35.09282302856445, - "step": 2074 - }, - { - "epoch": 0.78, - "learning_rate": 2.3969083132691785e-06, - "logits/chosen": -2.978842258453369, - "logits/rejected": -2.6856794357299805, - "logps/chosen": -239.9104461669922, - "logps/rejected": -510.3749694824219, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3016250133514404, - "rewards/margins": 15.942194938659668, - "rewards/rejected": -19.243820190429688, - "step": 2075 - }, - { - "epoch": 0.78, - "learning_rate": 2.389169289237978e-06, - "logits/chosen": -3.1647543907165527, - "logits/rejected": -4.793643474578857, - "logps/chosen": -441.48907470703125, - "logps/rejected": -285.56011962890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.124621629714966, - "rewards/margins": 16.93626594543457, - "rewards/rejected": -20.060888290405273, - "step": 2076 - }, - { - "epoch": 0.79, - "learning_rate": 2.3814410837931577e-06, - "logits/chosen": -1.7542860507965088, - "logits/rejected": -2.711688756942749, - "logps/chosen": -260.0954895019531, - "logps/rejected": -467.76910400390625, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9439544677734375, - "rewards/margins": 16.818527221679688, - "rewards/rejected": -17.762481689453125, - "step": 2077 - }, - { - "epoch": 0.79, - "learning_rate": 2.3737237079201437e-06, - "logits/chosen": -2.0535402297973633, - "logits/rejected": -5.039662837982178, - "logps/chosen": -219.90692138671875, - "logps/rejected": -238.93740844726562, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2963806092739105, - "rewards/margins": 16.03484344482422, - "rewards/rejected": -15.738463401794434, - "step": 2078 - }, - { - "epoch": 0.79, - "learning_rate": 2.3660171725889703e-06, - "logits/chosen": -0.06923849135637283, - "logits/rejected": -2.721463441848755, - "logps/chosen": -551.541015625, - "logps/rejected": -607.7726440429688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4065795838832855, - "rewards/margins": 29.379919052124023, - "rewards/rejected": -29.7864990234375, - "step": 2079 - }, - { - "epoch": 0.79, - "learning_rate": 2.3583214887542705e-06, - "logits/chosen": -1.8504220247268677, - "logits/rejected": -4.223132133483887, - "logps/chosen": -286.3282470703125, - "logps/rejected": -329.7151184082031, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.59906005859375, - "rewards/margins": 17.830663681030273, - "rewards/rejected": -16.231603622436523, - "step": 2080 - }, - { - "epoch": 0.79, - "learning_rate": 2.350636667355244e-06, - "logits/chosen": -1.3018622398376465, - "logits/rejected": -1.5381653308868408, - "logps/chosen": -567.776611328125, - "logps/rejected": -775.5162353515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.553472876548767, - "rewards/margins": 20.5023250579834, - "rewards/rejected": -18.9488525390625, - "step": 2081 - }, - { - "epoch": 0.79, - "learning_rate": 2.3429627193156513e-06, - "logits/chosen": -1.6187493801116943, - "logits/rejected": -3.7809059619903564, - "logps/chosen": -419.3045349121094, - "logps/rejected": -880.6094970703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02143249474465847, - "rewards/margins": 51.99675750732422, - "rewards/rejected": -52.0181884765625, - "step": 2082 - }, - { - "epoch": 0.79, - "learning_rate": 2.3352996555438036e-06, - "logits/chosen": -6.959888935089111, - "logits/rejected": -1.6375532150268555, - "logps/chosen": -563.7762451171875, - "logps/rejected": -2202.9013671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.54132080078125, - "rewards/margins": 22.695056915283203, - "rewards/rejected": -24.236377716064453, - "step": 2083 - }, - { - "epoch": 0.79, - "learning_rate": 2.3276474869325295e-06, - "logits/chosen": -2.819342613220215, - "logits/rejected": -3.1362972259521484, - "logps/chosen": -254.60025024414062, - "logps/rejected": -249.33912658691406, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7529510855674744, - "rewards/margins": 5.404779434204102, - "rewards/rejected": -4.651828289031982, - "step": 2084 - }, - { - "epoch": 0.79, - "learning_rate": 2.320006224359178e-06, - "logits/chosen": -7.7577409744262695, - "logits/rejected": -2.457014799118042, - "logps/chosen": -184.9550323486328, - "logps/rejected": -1377.55908203125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7253738641738892, - "rewards/margins": 22.079313278198242, - "rewards/rejected": -23.8046875, - "step": 2085 - }, - { - "epoch": 0.79, - "learning_rate": 2.312375878685598e-06, - "logits/chosen": -3.1620662212371826, - "logits/rejected": -6.305078506469727, - "logps/chosen": -226.24037170410156, - "logps/rejected": -242.87109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02111358754336834, - "rewards/margins": 12.256278991699219, - "rewards/rejected": -12.277392387390137, - "step": 2086 - }, - { - "epoch": 0.79, - "learning_rate": 2.304756460758111e-06, - "logits/chosen": -7.325742721557617, - "logits/rejected": -1.1917740106582642, - "logps/chosen": -403.650146484375, - "logps/rejected": -2381.0869140625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0468292236328125, - "rewards/margins": 18.109525680541992, - "rewards/rejected": -17.06269645690918, - "step": 2087 - }, - { - "epoch": 0.79, - "learning_rate": 2.297147981407509e-06, - "logits/chosen": -5.699835300445557, - "logits/rejected": -1.1644998788833618, - "logps/chosen": -482.6474914550781, - "logps/rejected": -2470.57470703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.19480286538600922, - "rewards/margins": 28.009933471679688, - "rewards/rejected": -28.204736709594727, - "step": 2088 - }, - { - "epoch": 0.79, - "learning_rate": 2.2895504514490397e-06, - "logits/chosen": -0.8321945071220398, - "logits/rejected": -1.7243307828903198, - "logps/chosen": -335.58251953125, - "logps/rejected": -349.31329345703125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.187127709388733, - "rewards/margins": 6.8386383056640625, - "rewards/rejected": -5.651510715484619, - "step": 2089 - }, - { - "epoch": 0.79, - "learning_rate": 2.2819638816823796e-06, - "logits/chosen": -2.2127442359924316, - "logits/rejected": -4.491943836212158, - "logps/chosen": -420.54730224609375, - "logps/rejected": -353.2596130371094, - "loss": 0.0093, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2816314697265625, - "rewards/margins": 17.136600494384766, - "rewards/rejected": -17.418231964111328, - "step": 2090 - }, - { - "epoch": 0.79, - "learning_rate": 2.274388282891632e-06, - "logits/chosen": -1.0292143821716309, - "logits/rejected": -0.7613568305969238, - "logps/chosen": -295.2241516113281, - "logps/rejected": -692.2030639648438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1579437255859375, - "rewards/margins": 26.977771759033203, - "rewards/rejected": -26.819828033447266, - "step": 2091 - }, - { - "epoch": 0.79, - "learning_rate": 2.2668236658453e-06, - "logits/chosen": -2.2357707023620605, - "logits/rejected": -4.121851921081543, - "logps/chosen": -317.7218322753906, - "logps/rejected": -706.6656494140625, - "loss": 0.0032, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0602447986602783, - "rewards/margins": 40.705482482910156, - "rewards/rejected": -38.64523696899414, - "step": 2092 - }, - { - "epoch": 0.79, - "learning_rate": 2.2592700412962775e-06, - "logits/chosen": -1.88284170627594, - "logits/rejected": -5.220235824584961, - "logps/chosen": -442.5682678222656, - "logps/rejected": -176.02816772460938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.34320375323295593, - "rewards/margins": 11.831008911132812, - "rewards/rejected": -12.174212455749512, - "step": 2093 - }, - { - "epoch": 0.79, - "learning_rate": 2.251727419981836e-06, - "logits/chosen": -1.0973135232925415, - "logits/rejected": -2.1296019554138184, - "logps/chosen": -435.5177917480469, - "logps/rejected": -801.79296875, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8140533566474915, - "rewards/margins": 27.73934555053711, - "rewards/rejected": -26.92529296875, - "step": 2094 - }, - { - "epoch": 0.79, - "learning_rate": 2.244195812623602e-06, - "logits/chosen": -3.5322678089141846, - "logits/rejected": -2.284849166870117, - "logps/chosen": -367.3846740722656, - "logps/rejected": -887.5264892578125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3972809314727783, - "rewards/margins": 25.365556716918945, - "rewards/rejected": -23.96827507019043, - "step": 2095 - }, - { - "epoch": 0.79, - "learning_rate": 2.2366752299275497e-06, - "logits/chosen": -0.6034621000289917, - "logits/rejected": -2.4318063259124756, - "logps/chosen": -398.73944091796875, - "logps/rejected": -417.45086669921875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.292227178812027, - "rewards/margins": 17.002803802490234, - "rewards/rejected": -16.7105770111084, - "step": 2096 - }, - { - "epoch": 0.79, - "learning_rate": 2.2291656825839814e-06, - "logits/chosen": -2.2182745933532715, - "logits/rejected": -2.391632556915283, - "logps/chosen": -220.5947265625, - "logps/rejected": -329.607666015625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.251812934875488, - "rewards/margins": 8.2816801071167, - "rewards/rejected": -13.533493041992188, - "step": 2097 - }, - { - "epoch": 0.79, - "learning_rate": 2.2216671812675118e-06, - "logits/chosen": -4.11237096786499, - "logits/rejected": -2.333618640899658, - "logps/chosen": -296.3497314453125, - "logps/rejected": -934.0437622070312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5390716791152954, - "rewards/margins": 27.25272560119629, - "rewards/rejected": -25.713653564453125, - "step": 2098 - }, - { - "epoch": 0.79, - "learning_rate": 2.2141797366370498e-06, - "logits/chosen": -2.9408860206604004, - "logits/rejected": -0.08765152841806412, - "logps/chosen": -616.0072021484375, - "logps/rejected": -1502.7213134765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.923583984375, - "rewards/margins": 55.316261291503906, - "rewards/rejected": -57.239845275878906, - "step": 2099 - }, - { - "epoch": 0.79, - "learning_rate": 2.2067033593357977e-06, - "logits/chosen": -5.8975067138671875, - "logits/rejected": -1.525479793548584, - "logps/chosen": -306.1859130859375, - "logps/rejected": -1817.8271484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0192962884902954, - "rewards/margins": 61.01721954345703, - "rewards/rejected": -59.9979248046875, - "step": 2100 - }, - { - "epoch": 0.79, - "learning_rate": 2.1992380599912176e-06, - "logits/chosen": -1.9379061460494995, - "logits/rejected": -1.9788299798965454, - "logps/chosen": -240.4343719482422, - "logps/rejected": -384.5659484863281, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5738205313682556, - "rewards/margins": 12.97724723815918, - "rewards/rejected": -12.403427124023438, - "step": 2101 - }, - { - "epoch": 0.79, - "learning_rate": 2.1917838492150245e-06, - "logits/chosen": -0.8048962354660034, - "logits/rejected": -2.1385083198547363, - "logps/chosen": -406.3687438964844, - "logps/rejected": -719.41015625, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5096282958984375, - "rewards/margins": 39.7158317565918, - "rewards/rejected": -40.225460052490234, - "step": 2102 - }, - { - "epoch": 0.8, - "learning_rate": 2.184340737603178e-06, - "logits/chosen": -2.453047275543213, - "logits/rejected": -1.1362336874008179, - "logps/chosen": -422.0049743652344, - "logps/rejected": -974.1734619140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3599456548690796, - "rewards/margins": 38.09929656982422, - "rewards/rejected": -36.739349365234375, - "step": 2103 - }, - { - "epoch": 0.8, - "learning_rate": 2.176908735735852e-06, - "logits/chosen": -8.507084846496582, - "logits/rejected": -1.628042459487915, - "logps/chosen": -352.5149841308594, - "logps/rejected": -2588.445556640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.322610467672348, - "rewards/margins": 61.45913314819336, - "rewards/rejected": -61.13652420043945, - "step": 2104 - }, - { - "epoch": 0.8, - "learning_rate": 2.1694878541774355e-06, - "logits/chosen": -1.2019176483154297, - "logits/rejected": -2.4975063800811768, - "logps/chosen": -470.2720947265625, - "logps/rejected": -796.449951171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.274423211812973, - "rewards/margins": 37.48170852661133, - "rewards/rejected": -37.2072868347168, - "step": 2105 - }, - { - "epoch": 0.8, - "learning_rate": 2.1620781034765106e-06, - "logits/chosen": -4.621382713317871, - "logits/rejected": -2.014563798904419, - "logps/chosen": -438.376708984375, - "logps/rejected": -915.7489013671875, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.721762180328369, - "rewards/margins": 25.06229591369629, - "rewards/rejected": -22.340534210205078, - "step": 2106 - }, - { - "epoch": 0.8, - "learning_rate": 2.154679494165829e-06, - "logits/chosen": -3.1805272102355957, - "logits/rejected": -1.0792977809906006, - "logps/chosen": -263.4317932128906, - "logps/rejected": -690.5443115234375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.251516729593277, - "rewards/margins": 20.712194442749023, - "rewards/rejected": -20.460678100585938, - "step": 2107 - }, - { - "epoch": 0.8, - "learning_rate": 2.1472920367623094e-06, - "logits/chosen": -0.42877912521362305, - "logits/rejected": -1.7400295734405518, - "logps/chosen": -270.34814453125, - "logps/rejected": -505.499755859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3380889892578125, - "rewards/margins": 23.89133071899414, - "rewards/rejected": -27.229419708251953, - "step": 2108 - }, - { - "epoch": 0.8, - "learning_rate": 2.1399157417670233e-06, - "logits/chosen": -7.211507797241211, - "logits/rejected": -2.9751720428466797, - "logps/chosen": -231.95611572265625, - "logps/rejected": -1460.1790771484375, - "loss": 0.0006, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1148239374160767, - "rewards/margins": 44.07508850097656, - "rewards/rejected": -42.96026611328125, - "step": 2109 - }, - { - "epoch": 0.8, - "learning_rate": 2.132550619665168e-06, - "logits/chosen": -2.6769721508026123, - "logits/rejected": -0.2934793531894684, - "logps/chosen": -191.62127685546875, - "logps/rejected": -356.6971130371094, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.117272973060608, - "rewards/margins": 4.982776165008545, - "rewards/rejected": -6.100049018859863, - "step": 2110 - }, - { - "epoch": 0.8, - "learning_rate": 2.1251966809260647e-06, - "logits/chosen": -1.9764457941055298, - "logits/rejected": -6.269716739654541, - "logps/chosen": -210.7904052734375, - "logps/rejected": -98.93195343017578, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9377502799034119, - "rewards/margins": 9.14732837677002, - "rewards/rejected": -8.209578514099121, - "step": 2111 - }, - { - "epoch": 0.8, - "learning_rate": 2.1178539360031335e-06, - "logits/chosen": -5.895196914672852, - "logits/rejected": -0.35257479548454285, - "logps/chosen": -567.4801025390625, - "logps/rejected": -2415.657470703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.044189453125, - "rewards/margins": 40.43244552612305, - "rewards/rejected": -43.47663497924805, - "step": 2112 - }, - { - "epoch": 0.8, - "learning_rate": 2.1105223953338805e-06, - "logits/chosen": -1.674513339996338, - "logits/rejected": -5.155325889587402, - "logps/chosen": -774.8364868164062, - "logps/rejected": -573.09765625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.417004346847534, - "rewards/margins": 36.279788970947266, - "rewards/rejected": -38.69679260253906, - "step": 2113 - }, - { - "epoch": 0.8, - "learning_rate": 2.1032020693398925e-06, - "logits/chosen": 0.277811199426651, - "logits/rejected": -4.699885845184326, - "logps/chosen": -271.97430419921875, - "logps/rejected": -262.9911804199219, - "loss": 0.0867, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.554058849811554, - "rewards/margins": 16.16545867919922, - "rewards/rejected": -16.71951675415039, - "step": 2114 - }, - { - "epoch": 0.8, - "learning_rate": 2.095892968426815e-06, - "logits/chosen": -4.909896373748779, - "logits/rejected": -3.287508726119995, - "logps/chosen": -201.95736694335938, - "logps/rejected": -621.6604614257812, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6013473868370056, - "rewards/margins": 25.964937210083008, - "rewards/rejected": -25.363590240478516, - "step": 2115 - }, - { - "epoch": 0.8, - "learning_rate": 2.0885951029843256e-06, - "logits/chosen": -4.886452674865723, - "logits/rejected": -1.0904783010482788, - "logps/chosen": -1140.4691162109375, - "logps/rejected": -2371.009033203125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.598388671875, - "rewards/margins": 22.0738525390625, - "rewards/rejected": -26.6722412109375, - "step": 2116 - }, - { - "epoch": 0.8, - "learning_rate": 2.0813084833861473e-06, - "logits/chosen": -5.282066822052002, - "logits/rejected": -0.7098106741905212, - "logps/chosen": -562.0419311523438, - "logps/rejected": -1926.237548828125, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7568603754043579, - "rewards/margins": 40.0743293762207, - "rewards/rejected": -39.31747055053711, - "step": 2117 - }, - { - "epoch": 0.8, - "learning_rate": 2.0740331199900053e-06, - "logits/chosen": -6.511928081512451, - "logits/rejected": -1.9565328359603882, - "logps/chosen": -365.5987548828125, - "logps/rejected": -1395.45263671875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.05498046800494194, - "rewards/margins": 29.967042922973633, - "rewards/rejected": -29.91206169128418, - "step": 2118 - }, - { - "epoch": 0.8, - "learning_rate": 2.0667690231376257e-06, - "logits/chosen": -1.060888409614563, - "logits/rejected": -5.6633381843566895, - "logps/chosen": -303.5273742675781, - "logps/rejected": -292.50592041015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.804757833480835, - "rewards/margins": 21.745525360107422, - "rewards/rejected": -24.550283432006836, - "step": 2119 - }, - { - "epoch": 0.8, - "learning_rate": 2.0595162031547287e-06, - "logits/chosen": -0.24823559820652008, - "logits/rejected": -2.526217460632324, - "logps/chosen": -428.748046875, - "logps/rejected": -706.9737548828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.10474243015050888, - "rewards/margins": 43.487918853759766, - "rewards/rejected": -43.5926628112793, - "step": 2120 - }, - { - "epoch": 0.8, - "learning_rate": 2.0522746703509944e-06, - "logits/chosen": -0.6845678091049194, - "logits/rejected": -5.047820568084717, - "logps/chosen": -325.85595703125, - "logps/rejected": -248.35667419433594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4357543885707855, - "rewards/margins": 17.309982299804688, - "rewards/rejected": -17.745737075805664, - "step": 2121 - }, - { - "epoch": 0.8, - "learning_rate": 2.0450444350200584e-06, - "logits/chosen": -0.7181488871574402, - "logits/rejected": -7.813004016876221, - "logps/chosen": -378.9914245605469, - "logps/rejected": -198.262939453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.085626244544983, - "rewards/margins": 13.256381034851074, - "rewards/rejected": -14.342007637023926, - "step": 2122 - }, - { - "epoch": 0.8, - "learning_rate": 2.0378255074395094e-06, - "logits/chosen": -7.431499004364014, - "logits/rejected": -3.0292112827301025, - "logps/chosen": -402.1447448730469, - "logps/rejected": -2709.2880859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.402227759361267, - "rewards/margins": 85.01150512695312, - "rewards/rejected": -83.6092758178711, - "step": 2123 - }, - { - "epoch": 0.8, - "learning_rate": 2.030617897870851e-06, - "logits/chosen": -2.25089168548584, - "logits/rejected": -4.994053363800049, - "logps/chosen": -162.33364868164062, - "logps/rejected": -212.4789581298828, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2454665899276733, - "rewards/margins": 13.951544761657715, - "rewards/rejected": -15.19701099395752, - "step": 2124 - }, - { - "epoch": 0.8, - "learning_rate": 2.0234216165594988e-06, - "logits/chosen": -0.8648707270622253, - "logits/rejected": -3.5852203369140625, - "logps/chosen": -556.4296264648438, - "logps/rejected": -675.3077392578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.20842896401882172, - "rewards/margins": 28.388452529907227, - "rewards/rejected": -28.596881866455078, - "step": 2125 - }, - { - "epoch": 0.8, - "learning_rate": 2.0162366737347738e-06, - "logits/chosen": -6.981208324432373, - "logits/rejected": -1.8925474882125854, - "logps/chosen": -491.0421447753906, - "logps/rejected": -1490.677734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.62973952293396, - "rewards/margins": 20.428842544555664, - "rewards/rejected": -24.058582305908203, - "step": 2126 - }, - { - "epoch": 0.8, - "learning_rate": 2.0090630796098688e-06, - "logits/chosen": -6.1489057540893555, - "logits/rejected": -2.6207942962646484, - "logps/chosen": -332.46978759765625, - "logps/rejected": -2270.998779296875, - "loss": 0.0017, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2243194580078125, - "rewards/margins": 63.84280014038086, - "rewards/rejected": -63.61848068237305, - "step": 2127 - }, - { - "epoch": 0.8, - "learning_rate": 2.001900844381857e-06, - "logits/chosen": -3.7135555744171143, - "logits/rejected": -1.5019259452819824, - "logps/chosen": -676.47900390625, - "logps/rejected": -1220.3673095703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1902039051055908, - "rewards/margins": 15.000311851501465, - "rewards/rejected": -16.190515518188477, - "step": 2128 - }, - { - "epoch": 0.8, - "learning_rate": 1.9947499782316537e-06, - "logits/chosen": -3.9743714332580566, - "logits/rejected": -0.9962400197982788, - "logps/chosen": -265.4380187988281, - "logps/rejected": -648.7703247070312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4850051403045654, - "rewards/margins": 13.52401065826416, - "rewards/rejected": -17.009016036987305, - "step": 2129 - }, - { - "epoch": 0.81, - "learning_rate": 1.9876104913240167e-06, - "logits/chosen": -8.416034698486328, - "logits/rejected": -2.5073649883270264, - "logps/chosen": -265.677490234375, - "logps/rejected": -1483.292724609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3378479480743408, - "rewards/margins": 21.370012283325195, - "rewards/rejected": -22.707860946655273, - "step": 2130 - }, - { - "epoch": 0.81, - "learning_rate": 1.9804823938075344e-06, - "logits/chosen": -0.9239839315414429, - "logits/rejected": -1.6034417152404785, - "logps/chosen": -186.77574157714844, - "logps/rejected": -512.6820068359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8987610340118408, - "rewards/margins": 26.215856552124023, - "rewards/rejected": -28.1146183013916, - "step": 2131 - }, - { - "epoch": 0.81, - "learning_rate": 1.973365695814602e-06, - "logits/chosen": -5.733143329620361, - "logits/rejected": -1.3006017208099365, - "logps/chosen": -182.87806701660156, - "logps/rejected": -1340.4365234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.115678548812866, - "rewards/margins": 30.747907638549805, - "rewards/rejected": -33.86358642578125, - "step": 2132 - }, - { - "epoch": 0.81, - "learning_rate": 1.9662604074614044e-06, - "logits/chosen": -3.1730763912200928, - "logits/rejected": -4.148113250732422, - "logps/chosen": -313.8506774902344, - "logps/rejected": -722.3927001953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.797314465045929, - "rewards/margins": 44.49383544921875, - "rewards/rejected": -43.6965217590332, - "step": 2133 - }, - { - "epoch": 0.81, - "learning_rate": 1.9591665388479196e-06, - "logits/chosen": -0.3091360032558441, - "logits/rejected": -5.483855724334717, - "logps/chosen": -659.3336181640625, - "logps/rejected": -324.945068359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.22285766899585724, - "rewards/margins": 22.256494522094727, - "rewards/rejected": -22.03363609313965, - "step": 2134 - }, - { - "epoch": 0.81, - "learning_rate": 1.952084100057884e-06, - "logits/chosen": -2.690164566040039, - "logits/rejected": -3.9076757431030273, - "logps/chosen": -325.8565368652344, - "logps/rejected": -581.578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3533966541290283, - "rewards/margins": 22.27536964416504, - "rewards/rejected": -20.921972274780273, - "step": 2135 - }, - { - "epoch": 0.81, - "learning_rate": 1.945013101158787e-06, - "logits/chosen": -1.175093412399292, - "logits/rejected": -4.529608249664307, - "logps/chosen": -301.7174377441406, - "logps/rejected": -276.481689453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.439932256937027, - "rewards/margins": 20.07421112060547, - "rewards/rejected": -20.514142990112305, - "step": 2136 - }, - { - "epoch": 0.81, - "learning_rate": 1.9379535522018623e-06, - "logits/chosen": -5.831496715545654, - "logits/rejected": -3.41617488861084, - "logps/chosen": -284.4322204589844, - "logps/rejected": -1046.45068359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.416018962860107, - "rewards/margins": 24.767324447631836, - "rewards/rejected": -29.1833438873291, - "step": 2137 - }, - { - "epoch": 0.81, - "learning_rate": 1.9309054632220645e-06, - "logits/chosen": -6.8801751136779785, - "logits/rejected": -3.563842535018921, - "logps/chosen": -497.7008056640625, - "logps/rejected": -1733.4638671875, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.26115113496780396, - "rewards/margins": 25.083099365234375, - "rewards/rejected": -24.821949005126953, - "step": 2138 - }, - { - "epoch": 0.81, - "learning_rate": 1.923868844238054e-06, - "logits/chosen": -1.7215806245803833, - "logits/rejected": -4.624431133270264, - "logps/chosen": -337.3413391113281, - "logps/rejected": -262.7047424316406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6006500720977783, - "rewards/margins": 14.298016548156738, - "rewards/rejected": -15.898666381835938, - "step": 2139 - }, - { - "epoch": 0.81, - "learning_rate": 1.916843705252195e-06, - "logits/chosen": -1.931428074836731, - "logits/rejected": -7.591612815856934, - "logps/chosen": -385.4437255859375, - "logps/rejected": -135.64625549316406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.193359375, - "rewards/margins": 10.471430778503418, - "rewards/rejected": -9.278071403503418, - "step": 2140 - }, - { - "epoch": 0.81, - "learning_rate": 1.9098300562505266e-06, - "logits/chosen": -6.474215030670166, - "logits/rejected": -0.9985607862472534, - "logps/chosen": -440.0830383300781, - "logps/rejected": -3109.141845703125, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.914154052734375, - "rewards/margins": 45.168087005615234, - "rewards/rejected": -42.25393295288086, - "step": 2141 - }, - { - "epoch": 0.81, - "learning_rate": 1.9028279072027589e-06, - "logits/chosen": -1.9513916969299316, - "logits/rejected": -3.6912620067596436, - "logps/chosen": -217.56503295898438, - "logps/rejected": -411.7529296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.213002011179924, - "rewards/margins": 18.12883186340332, - "rewards/rejected": -18.341833114624023, - "step": 2142 - }, - { - "epoch": 0.81, - "learning_rate": 1.895837268062256e-06, - "logits/chosen": -5.759528636932373, - "logits/rejected": -2.8107707500457764, - "logps/chosen": -645.79443359375, - "logps/rejected": -2390.1103515625, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.1629700660705566, - "rewards/margins": 38.10715103149414, - "rewards/rejected": -34.94417953491211, - "step": 2143 - }, - { - "epoch": 0.81, - "learning_rate": 1.8888581487660185e-06, - "logits/chosen": -0.9871168732643127, - "logits/rejected": -4.150562286376953, - "logps/chosen": -263.9544982910156, - "logps/rejected": -201.8583984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.76947021484375, - "rewards/margins": 9.607375144958496, - "rewards/rejected": -14.376845359802246, - "step": 2144 - }, - { - "epoch": 0.81, - "learning_rate": 1.881890559234668e-06, - "logits/chosen": -0.300197035074234, - "logits/rejected": -4.06299352645874, - "logps/chosen": -329.00823974609375, - "logps/rejected": -354.8216247558594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.0873658657073975, - "rewards/margins": 18.183666229248047, - "rewards/rejected": -20.271032333374023, - "step": 2145 - }, - { - "epoch": 0.81, - "learning_rate": 1.874934509372448e-06, - "logits/chosen": -2.699007749557495, - "logits/rejected": -1.9865895509719849, - "logps/chosen": -267.56842041015625, - "logps/rejected": -475.63720703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.22834168374538422, - "rewards/margins": 10.588223457336426, - "rewards/rejected": -10.81656551361084, - "step": 2146 - }, - { - "epoch": 0.81, - "learning_rate": 1.8679900090671854e-06, - "logits/chosen": -3.1107611656188965, - "logits/rejected": -4.177247524261475, - "logps/chosen": -282.4669189453125, - "logps/rejected": -239.31582641601562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -7.254599094390869, - "rewards/margins": 8.783517837524414, - "rewards/rejected": -16.038116455078125, - "step": 2147 - }, - { - "epoch": 0.81, - "learning_rate": 1.8610570681903018e-06, - "logits/chosen": -1.3796077966690063, - "logits/rejected": -4.392078399658203, - "logps/chosen": -467.26824951171875, - "logps/rejected": -184.85459899902344, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.974261462688446, - "rewards/margins": 12.419400215148926, - "rewards/rejected": -13.393661499023438, - "step": 2148 - }, - { - "epoch": 0.81, - "learning_rate": 1.8541356965967782e-06, - "logits/chosen": -1.185477614402771, - "logits/rejected": -0.7101513743400574, - "logps/chosen": -471.0119323730469, - "logps/rejected": -674.591552734375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2414093017578125, - "rewards/margins": 7.146542549133301, - "rewards/rejected": -8.387951850891113, - "step": 2149 - }, - { - "epoch": 0.81, - "learning_rate": 1.8472259041251538e-06, - "logits/chosen": -5.515875816345215, - "logits/rejected": -1.180465817451477, - "logps/chosen": -207.96206665039062, - "logps/rejected": -979.82666015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.383825659751892, - "rewards/margins": 13.843463897705078, - "rewards/rejected": -15.227289199829102, - "step": 2150 - }, - { - "epoch": 0.81, - "learning_rate": 1.8403277005975084e-06, - "logits/chosen": -2.995814323425293, - "logits/rejected": -2.2676546573638916, - "logps/chosen": -400.6909484863281, - "logps/rejected": -592.7871704101562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6056548953056335, - "rewards/margins": 12.314282417297363, - "rewards/rejected": -11.708627700805664, - "step": 2151 - }, - { - "epoch": 0.81, - "learning_rate": 1.8334410958194527e-06, - "logits/chosen": -3.1311638355255127, - "logits/rejected": -3.745844602584839, - "logps/chosen": -243.6134796142578, - "logps/rejected": -404.07293701171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.354882836341858, - "rewards/margins": 18.804977416992188, - "rewards/rejected": -20.159860610961914, - "step": 2152 - }, - { - "epoch": 0.81, - "learning_rate": 1.8265660995801004e-06, - "logits/chosen": 0.016447028145194054, - "logits/rejected": -4.668059825897217, - "logps/chosen": -323.1354675292969, - "logps/rejected": -214.80670166015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.793817162513733, - "rewards/margins": 12.850900650024414, - "rewards/rejected": -11.057083129882812, - "step": 2153 - }, - { - "epoch": 0.81, - "learning_rate": 1.8197027216520734e-06, - "logits/chosen": -2.2203831672668457, - "logits/rejected": -4.294514179229736, - "logps/chosen": -255.44708251953125, - "logps/rejected": -285.251220703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.269720435142517, - "rewards/margins": 15.982728958129883, - "rewards/rejected": -17.25244903564453, - "step": 2154 - }, - { - "epoch": 0.81, - "learning_rate": 1.8128509717914733e-06, - "logits/chosen": -6.347795486450195, - "logits/rejected": -4.592892646789551, - "logps/chosen": -242.39019775390625, - "logps/rejected": -3146.6240234375, - "loss": 0.0021, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.6413116455078125, - "rewards/margins": 39.96088790893555, - "rewards/rejected": -42.60219955444336, - "step": 2155 - }, - { - "epoch": 0.82, - "learning_rate": 1.8060108597378712e-06, - "logits/chosen": -5.250687122344971, - "logits/rejected": -1.4393023252487183, - "logps/chosen": -239.864990234375, - "logps/rejected": -1232.583251953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6235076785087585, - "rewards/margins": 43.18948745727539, - "rewards/rejected": -43.81299591064453, - "step": 2156 - }, - { - "epoch": 0.82, - "learning_rate": 1.7991823952143028e-06, - "logits/chosen": -6.7817230224609375, - "logits/rejected": -0.3044115900993347, - "logps/chosen": -258.61199951171875, - "logps/rejected": -2009.181640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.8697327375411987, - "rewards/margins": 37.16899871826172, - "rewards/rejected": -35.29926681518555, - "step": 2157 - }, - { - "epoch": 0.82, - "learning_rate": 1.7923655879272395e-06, - "logits/chosen": -4.8040924072265625, - "logits/rejected": -2.7458598613739014, - "logps/chosen": -615.2598876953125, - "logps/rejected": -1530.768798828125, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.31337282061576843, - "rewards/margins": 24.840730667114258, - "rewards/rejected": -24.52735710144043, - "step": 2158 - }, - { - "epoch": 0.82, - "learning_rate": 1.785560447566581e-06, - "logits/chosen": -1.404052972793579, - "logits/rejected": -4.6300129890441895, - "logps/chosen": -657.7168579101562, - "logps/rejected": -306.0697021484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.008862257003784, - "rewards/margins": 21.94925880432129, - "rewards/rejected": -19.940397262573242, - "step": 2159 - }, - { - "epoch": 0.82, - "learning_rate": 1.7787669838056575e-06, - "logits/chosen": -6.192538261413574, - "logits/rejected": -3.391879081726074, - "logps/chosen": -553.7116088867188, - "logps/rejected": -2536.306640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.3397949934005737, - "rewards/margins": 57.34554672241211, - "rewards/rejected": -58.685340881347656, - "step": 2160 - }, - { - "epoch": 0.82, - "learning_rate": 1.7719852063011844e-06, - "logits/chosen": -1.4363731145858765, - "logits/rejected": 0.02785993367433548, - "logps/chosen": -222.19607543945312, - "logps/rejected": -627.1380004882812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.399148553609848, - "rewards/margins": 22.310007095336914, - "rewards/rejected": -22.709156036376953, - "step": 2161 - }, - { - "epoch": 0.82, - "learning_rate": 1.7652151246932703e-06, - "logits/chosen": -1.3252156972885132, - "logits/rejected": -4.08401346206665, - "logps/chosen": -214.94859313964844, - "logps/rejected": -217.69859313964844, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.3296494483947754, - "rewards/margins": 10.69986343383789, - "rewards/rejected": -14.029512405395508, - "step": 2162 - }, - { - "epoch": 0.82, - "learning_rate": 1.7584567486054039e-06, - "logits/chosen": -7.196615219116211, - "logits/rejected": -2.961714506149292, - "logps/chosen": -432.0120849609375, - "logps/rejected": -1607.501220703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7007598876953125, - "rewards/margins": 24.16715431213379, - "rewards/rejected": -22.466394424438477, - "step": 2163 - }, - { - "epoch": 0.82, - "learning_rate": 1.7517100876444294e-06, - "logits/chosen": -2.916400909423828, - "logits/rejected": -2.979529857635498, - "logps/chosen": -168.05111694335938, - "logps/rejected": -510.33050537109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.43939515948295593, - "rewards/margins": 25.822172164916992, - "rewards/rejected": -25.382776260375977, - "step": 2164 - }, - { - "epoch": 0.82, - "learning_rate": 1.7449751514005365e-06, - "logits/chosen": -7.8300862312316895, - "logits/rejected": -3.4970738887786865, - "logps/chosen": -266.5867919921875, - "logps/rejected": -2241.251220703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6064239740371704, - "rewards/margins": 47.320560455322266, - "rewards/rejected": -45.71413803100586, - "step": 2165 - }, - { - "epoch": 0.82, - "learning_rate": 1.7382519494472571e-06, - "logits/chosen": -2.686544895172119, - "logits/rejected": -2.2427852153778076, - "logps/chosen": -382.6663818359375, - "logps/rejected": -772.1447143554688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1283538341522217, - "rewards/margins": 42.137046813964844, - "rewards/rejected": -40.00869369506836, - "step": 2166 - }, - { - "epoch": 0.82, - "learning_rate": 1.7315404913414325e-06, - "logits/chosen": -2.6364424228668213, - "logits/rejected": -1.9334850311279297, - "logps/chosen": -283.8428955078125, - "logps/rejected": -434.2028503417969, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8935791254043579, - "rewards/margins": 11.148404121398926, - "rewards/rejected": -10.2548246383667, - "step": 2167 - }, - { - "epoch": 0.82, - "learning_rate": 1.7248407866232175e-06, - "logits/chosen": -2.296135902404785, - "logits/rejected": -5.819711685180664, - "logps/chosen": -253.48623657226562, - "logps/rejected": -164.82826232910156, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.0726609230041504, - "rewards/margins": 8.003303527832031, - "rewards/rejected": -10.07596492767334, - "step": 2168 - }, - { - "epoch": 0.82, - "learning_rate": 1.7181528448160611e-06, - "logits/chosen": -4.804635524749756, - "logits/rejected": -1.2114074230194092, - "logps/chosen": -218.5991973876953, - "logps/rejected": -1280.239013671875, - "loss": 0.0868, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7359359860420227, - "rewards/margins": 42.64105224609375, - "rewards/rejected": -41.90511703491211, - "step": 2169 - }, - { - "epoch": 0.82, - "learning_rate": 1.7114766754266842e-06, - "logits/chosen": -3.1233129501342773, - "logits/rejected": -2.093444347381592, - "logps/chosen": -485.13751220703125, - "logps/rejected": -946.5291748046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.220056176185608, - "rewards/margins": 32.25655746459961, - "rewards/rejected": -33.47661209106445, - "step": 2170 - }, - { - "epoch": 0.82, - "learning_rate": 1.7048122879450812e-06, - "logits/chosen": -3.3885786533355713, - "logits/rejected": -3.868589401245117, - "logps/chosen": -165.28656005859375, - "logps/rejected": -654.167236328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1983582973480225, - "rewards/margins": 39.217010498046875, - "rewards/rejected": -37.018653869628906, - "step": 2171 - }, - { - "epoch": 0.82, - "learning_rate": 1.6981596918444953e-06, - "logits/chosen": -0.438136488199234, - "logits/rejected": -1.325783133506775, - "logps/chosen": -303.25091552734375, - "logps/rejected": -846.8998413085938, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.19617004692554474, - "rewards/margins": 44.01392364501953, - "rewards/rejected": -43.817752838134766, - "step": 2172 - }, - { - "epoch": 0.82, - "learning_rate": 1.6915188965814034e-06, - "logits/chosen": -2.6953253746032715, - "logits/rejected": -0.9010257720947266, - "logps/chosen": -278.9245910644531, - "logps/rejected": -530.971923828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.803396701812744, - "rewards/margins": 11.901681900024414, - "rewards/rejected": -14.705078125, - "step": 2173 - }, - { - "epoch": 0.82, - "learning_rate": 1.6848899115955208e-06, - "logits/chosen": -7.028253555297852, - "logits/rejected": -2.0719263553619385, - "logps/chosen": -284.0481872558594, - "logps/rejected": -1587.53271484375, - "loss": 0.0059, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.8564361333847046, - "rewards/margins": 37.39539337158203, - "rewards/rejected": -39.2518310546875, - "step": 2174 - }, - { - "epoch": 0.82, - "learning_rate": 1.6782727463097626e-06, - "logits/chosen": -6.826747894287109, - "logits/rejected": -1.5250204801559448, - "logps/chosen": -370.3489990234375, - "logps/rejected": -3456.5537109375, - "loss": 0.002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3993866443634033, - "rewards/margins": 16.38913917541504, - "rewards/rejected": -18.78852653503418, - "step": 2175 - }, - { - "epoch": 0.82, - "learning_rate": 1.6716674101302465e-06, - "logits/chosen": -5.394894599914551, - "logits/rejected": -2.368241786956787, - "logps/chosen": -535.9547729492188, - "logps/rejected": -1295.4154052734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02098388783633709, - "rewards/margins": 32.59227752685547, - "rewards/rejected": -32.61326217651367, - "step": 2176 - }, - { - "epoch": 0.82, - "learning_rate": 1.6650739124462766e-06, - "logits/chosen": -1.8850988149642944, - "logits/rejected": -5.616618633270264, - "logps/chosen": -247.9589080810547, - "logps/rejected": -228.83834838867188, - "loss": 0.0012, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.910640001296997, - "rewards/margins": 13.077778816223145, - "rewards/rejected": -15.988418579101562, - "step": 2177 - }, - { - "epoch": 0.82, - "learning_rate": 1.6584922626303325e-06, - "logits/chosen": -6.48164176940918, - "logits/rejected": -4.170598030090332, - "logps/chosen": -279.1718444824219, - "logps/rejected": -1594.25244140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3241974115371704, - "rewards/margins": 32.167057037353516, - "rewards/rejected": -30.842859268188477, - "step": 2178 - }, - { - "epoch": 0.82, - "learning_rate": 1.6519224700380422e-06, - "logits/chosen": -0.4843965470790863, - "logits/rejected": -5.327138900756836, - "logps/chosen": -677.6439819335938, - "logps/rejected": -206.27931213378906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.398004174232483, - "rewards/margins": 9.423958778381348, - "rewards/rejected": -10.8219633102417, - "step": 2179 - }, - { - "epoch": 0.82, - "learning_rate": 1.645364544008191e-06, - "logits/chosen": -1.4069541692733765, - "logits/rejected": -4.04749059677124, - "logps/chosen": -222.11279296875, - "logps/rejected": -135.2174835205078, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8869431018829346, - "rewards/margins": 7.237256050109863, - "rewards/rejected": -10.124198913574219, - "step": 2180 - }, - { - "epoch": 0.82, - "learning_rate": 1.6388184938626871e-06, - "logits/chosen": -5.237401008605957, - "logits/rejected": -0.8545037508010864, - "logps/chosen": -340.1357421875, - "logps/rejected": -1412.8489990234375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7092437744140625, - "rewards/margins": 32.360355377197266, - "rewards/rejected": -31.651111602783203, - "step": 2181 - }, - { - "epoch": 0.82, - "learning_rate": 1.6322843289065581e-06, - "logits/chosen": -3.1704647541046143, - "logits/rejected": -0.1508823037147522, - "logps/chosen": -292.4443054199219, - "logps/rejected": -1377.6357421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6123046875, - "rewards/margins": 53.46234893798828, - "rewards/rejected": -55.07465362548828, - "step": 2182 - }, - { - "epoch": 0.83, - "learning_rate": 1.6257620584279454e-06, - "logits/chosen": -2.6040799617767334, - "logits/rejected": -5.142090320587158, - "logps/chosen": -264.6678466796875, - "logps/rejected": -215.78590393066406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.756201148033142, - "rewards/margins": 13.052871704101562, - "rewards/rejected": -11.296670913696289, - "step": 2183 - }, - { - "epoch": 0.83, - "learning_rate": 1.6192516916980717e-06, - "logits/chosen": -2.4823808670043945, - "logits/rejected": -2.6652207374572754, - "logps/chosen": -360.378173828125, - "logps/rejected": -463.2483215332031, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07846985012292862, - "rewards/margins": 13.645373344421387, - "rewards/rejected": -13.723843574523926, - "step": 2184 - }, - { - "epoch": 0.83, - "learning_rate": 1.6127532379712473e-06, - "logits/chosen": -7.045773029327393, - "logits/rejected": -3.0094175338745117, - "logps/chosen": -391.68597412109375, - "logps/rejected": -2820.79833984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.519287109375, - "rewards/margins": 105.5496597290039, - "rewards/rejected": -107.0689468383789, - "step": 2185 - }, - { - "epoch": 0.83, - "learning_rate": 1.6062667064848424e-06, - "logits/chosen": -4.727768421173096, - "logits/rejected": -3.3431308269500732, - "logps/chosen": -320.9355773925781, - "logps/rejected": -859.908935546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.21462403237819672, - "rewards/margins": 38.07730484008789, - "rewards/rejected": -37.8626823425293, - "step": 2186 - }, - { - "epoch": 0.83, - "learning_rate": 1.599792106459288e-06, - "logits/chosen": -1.3903993368148804, - "logits/rejected": -4.114131927490234, - "logps/chosen": -727.578857421875, - "logps/rejected": -883.9095458984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6863159537315369, - "rewards/margins": 48.241432189941406, - "rewards/rejected": -47.55511474609375, - "step": 2187 - }, - { - "epoch": 0.83, - "learning_rate": 1.5933294470980443e-06, - "logits/chosen": -3.070483446121216, - "logits/rejected": -5.7589216232299805, - "logps/chosen": -475.415283203125, - "logps/rejected": -352.0951843261719, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.46049195528030396, - "rewards/margins": 18.790008544921875, - "rewards/rejected": -18.329517364501953, - "step": 2188 - }, - { - "epoch": 0.83, - "learning_rate": 1.5868787375876082e-06, - "logits/chosen": -3.847285032272339, - "logits/rejected": -2.095830202102661, - "logps/chosen": -171.3854522705078, - "logps/rejected": -716.1551513671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.0056472779251635075, - "rewards/margins": 21.594865798950195, - "rewards/rejected": -21.589218139648438, - "step": 2189 - }, - { - "epoch": 0.83, - "learning_rate": 1.5804399870974806e-06, - "logits/chosen": -3.5760655403137207, - "logits/rejected": -4.020259857177734, - "logps/chosen": -200.0517578125, - "logps/rejected": -400.6109619140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3175277709960938, - "rewards/margins": 23.56861114501953, - "rewards/rejected": -22.251083374023438, - "step": 2190 - }, - { - "epoch": 0.83, - "learning_rate": 1.5740132047801738e-06, - "logits/chosen": -5.028744697570801, - "logits/rejected": -3.3316166400909424, - "logps/chosen": -333.3387451171875, - "logps/rejected": -1402.877197265625, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.2979094982147217, - "rewards/margins": 41.16474914550781, - "rewards/rejected": -44.4626579284668, - "step": 2191 - }, - { - "epoch": 0.83, - "learning_rate": 1.5675983997711797e-06, - "logits/chosen": -0.28566408157348633, - "logits/rejected": -1.6371798515319824, - "logps/chosen": -252.701904296875, - "logps/rejected": -433.80926513671875, - "loss": 0.0008, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.417738437652588, - "rewards/margins": 18.05773162841797, - "rewards/rejected": -20.4754695892334, - "step": 2192 - }, - { - "epoch": 0.83, - "learning_rate": 1.5611955811889645e-06, - "logits/chosen": -2.4598312377929688, - "logits/rejected": -4.551436901092529, - "logps/chosen": -120.89640045166016, - "logps/rejected": -375.3503112792969, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.325781226158142, - "rewards/margins": 24.807453155517578, - "rewards/rejected": -26.13323402404785, - "step": 2193 - }, - { - "epoch": 0.83, - "learning_rate": 1.5548047581349624e-06, - "logits/chosen": -6.1162919998168945, - "logits/rejected": -1.730946660041809, - "logps/chosen": -408.08953857421875, - "logps/rejected": -1427.06787109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.269415259361267, - "rewards/margins": 34.54872512817383, - "rewards/rejected": -35.81814193725586, - "step": 2194 - }, - { - "epoch": 0.83, - "learning_rate": 1.5484259396935508e-06, - "logits/chosen": -7.425093173980713, - "logits/rejected": -2.442121982574463, - "logps/chosen": -416.45074462890625, - "logps/rejected": -2949.70556640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2750427722930908, - "rewards/margins": 25.4489803314209, - "rewards/rejected": -26.724023818969727, - "step": 2195 - }, - { - "epoch": 0.83, - "learning_rate": 1.5420591349320401e-06, - "logits/chosen": -1.4089761972427368, - "logits/rejected": -3.802062511444092, - "logps/chosen": -1123.3475341796875, - "logps/rejected": -984.22998046875, - "loss": 0.023, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1804322004318237, - "rewards/margins": 23.968854904174805, - "rewards/rejected": -25.1492862701416, - "step": 2196 - }, - { - "epoch": 0.83, - "learning_rate": 1.5357043529006777e-06, - "logits/chosen": -1.243674874305725, - "logits/rejected": -6.131026268005371, - "logps/chosen": -494.45452880859375, - "logps/rejected": -190.70692443847656, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8026184439659119, - "rewards/margins": 13.93287181854248, - "rewards/rejected": -13.130253791809082, - "step": 2197 - }, - { - "epoch": 0.83, - "learning_rate": 1.5293616026326053e-06, - "logits/chosen": -0.42977580428123474, - "logits/rejected": -3.9861979484558105, - "logps/chosen": -512.2430419921875, - "logps/rejected": -605.8910522460938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.924029529094696, - "rewards/margins": 35.46172332763672, - "rewards/rejected": -36.3857536315918, - "step": 2198 - }, - { - "epoch": 0.83, - "learning_rate": 1.5230308931438675e-06, - "logits/chosen": -4.401474475860596, - "logits/rejected": -4.8900957107543945, - "logps/chosen": -571.1080322265625, - "logps/rejected": -1279.721923828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.009509277530014515, - "rewards/margins": 32.44387435913086, - "rewards/rejected": -32.434364318847656, - "step": 2199 - }, - { - "epoch": 0.83, - "learning_rate": 1.516712233433396e-06, - "logits/chosen": -1.7662047147750854, - "logits/rejected": -1.7673674821853638, - "logps/chosen": -225.043701171875, - "logps/rejected": -489.1319885253906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.11003418266773224, - "rewards/margins": 17.960905075073242, - "rewards/rejected": -17.85087013244629, - "step": 2200 - }, - { - "epoch": 0.83, - "learning_rate": 1.5104056324829908e-06, - "logits/chosen": -5.957309246063232, - "logits/rejected": -1.4918344020843506, - "logps/chosen": -436.2004699707031, - "logps/rejected": -2620.50341796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.027273654937744, - "rewards/margins": 72.84915161132812, - "rewards/rejected": -70.8218765258789, - "step": 2201 - }, - { - "epoch": 0.83, - "learning_rate": 1.5041110992573083e-06, - "logits/chosen": -4.931337356567383, - "logits/rejected": -2.7432467937469482, - "logps/chosen": -400.52947998046875, - "logps/rejected": -1035.75146484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.1923950910568237, - "rewards/margins": 15.579596519470215, - "rewards/rejected": -16.771991729736328, - "step": 2202 - }, - { - "epoch": 0.83, - "learning_rate": 1.4978286427038602e-06, - "logits/chosen": 0.01958826370537281, - "logits/rejected": -2.1927099227905273, - "logps/chosen": -468.8343505859375, - "logps/rejected": -763.3878784179688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.534838855266571, - "rewards/margins": 27.821870803833008, - "rewards/rejected": -27.287031173706055, - "step": 2203 - }, - { - "epoch": 0.83, - "learning_rate": 1.4915582717529797e-06, - "logits/chosen": -2.276433229446411, - "logits/rejected": -0.4808266758918762, - "logps/chosen": -542.6409301757812, - "logps/rejected": -1052.7978515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.22468872368335724, - "rewards/margins": 18.295196533203125, - "rewards/rejected": -18.519886016845703, - "step": 2204 - }, - { - "epoch": 0.83, - "learning_rate": 1.4852999953178293e-06, - "logits/chosen": -1.5964115858078003, - "logits/rejected": -0.8261826634407043, - "logps/chosen": -373.87554931640625, - "logps/rejected": -892.4783325195312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8854310512542725, - "rewards/margins": 37.450801849365234, - "rewards/rejected": -40.33623123168945, - "step": 2205 - }, - { - "epoch": 0.83, - "learning_rate": 1.4790538222943795e-06, - "logits/chosen": -2.0768446922302246, - "logits/rejected": -7.22473669052124, - "logps/chosen": -299.10296630859375, - "logps/rejected": -186.8145751953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.08836060017347336, - "rewards/margins": 13.716285705566406, - "rewards/rejected": -13.804646492004395, - "step": 2206 - }, - { - "epoch": 0.83, - "learning_rate": 1.4728197615613905e-06, - "logits/chosen": -1.0206161737442017, - "logits/rejected": -4.03218936920166, - "logps/chosen": -426.20477294921875, - "logps/rejected": -394.36138916015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.9400269985198975, - "rewards/margins": 14.59646987915039, - "rewards/rejected": -17.536497116088867, - "step": 2207 - }, - { - "epoch": 0.83, - "learning_rate": 1.4665978219804056e-06, - "logits/chosen": -0.5789293646812439, - "logits/rejected": -4.1383161544799805, - "logps/chosen": -220.2354736328125, - "logps/rejected": -207.3160400390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.137487769126892, - "rewards/margins": 13.12740707397461, - "rewards/rejected": -14.264894485473633, - "step": 2208 - }, - { - "epoch": 0.84, - "learning_rate": 1.4603880123957448e-06, - "logits/chosen": -5.952966213226318, - "logits/rejected": -4.50251579284668, - "logps/chosen": -268.07196044921875, - "logps/rejected": -1268.142578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.607690453529358, - "rewards/margins": 32.09912109375, - "rewards/rejected": -30.491430282592773, - "step": 2209 - }, - { - "epoch": 0.84, - "learning_rate": 1.4541903416344783e-06, - "logits/chosen": -2.2036566734313965, - "logits/rejected": -1.8573296070098877, - "logps/chosen": -433.9955749511719, - "logps/rejected": -1170.5260009765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.7159454822540283, - "rewards/margins": 51.09064483642578, - "rewards/rejected": -54.80659103393555, - "step": 2210 - }, - { - "epoch": 0.84, - "learning_rate": 1.4480048185064288e-06, - "logits/chosen": -2.4949798583984375, - "logits/rejected": -2.4333205223083496, - "logps/chosen": -205.62786865234375, - "logps/rejected": -749.2101440429688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.2631469666957855, - "rewards/margins": 42.143558502197266, - "rewards/rejected": -42.40670394897461, - "step": 2211 - }, - { - "epoch": 0.84, - "learning_rate": 1.4418314518041432e-06, - "logits/chosen": -3.0087456703186035, - "logits/rejected": -7.727070331573486, - "logps/chosen": -207.8667755126953, - "logps/rejected": -189.90664672851562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3630691468715668, - "rewards/margins": 13.629344940185547, - "rewards/rejected": -13.266275405883789, - "step": 2212 - }, - { - "epoch": 0.84, - "learning_rate": 1.435670250302892e-06, - "logits/chosen": -0.9203929901123047, - "logits/rejected": -1.3393627405166626, - "logps/chosen": -345.45013427734375, - "logps/rejected": -564.010498046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.30115967988967896, - "rewards/margins": 18.498605728149414, - "rewards/rejected": -18.197446823120117, - "step": 2213 - }, - { - "epoch": 0.84, - "learning_rate": 1.4295212227606548e-06, - "logits/chosen": -4.659112453460693, - "logits/rejected": -0.48180079460144043, - "logps/chosen": -700.3709716796875, - "logps/rejected": -1893.5400390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.806665062904358, - "rewards/margins": 29.357776641845703, - "rewards/rejected": -31.16444206237793, - "step": 2214 - }, - { - "epoch": 0.84, - "learning_rate": 1.4233843779181079e-06, - "logits/chosen": -3.141840934753418, - "logits/rejected": -0.8513950109481812, - "logps/chosen": -314.6850280761719, - "logps/rejected": -1479.9189453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.16048888862133026, - "rewards/margins": 80.60372924804688, - "rewards/rejected": -80.76422119140625, - "step": 2215 - }, - { - "epoch": 0.84, - "learning_rate": 1.417259724498603e-06, - "logits/chosen": -0.6197142004966736, - "logits/rejected": -2.865276575088501, - "logps/chosen": -276.1446838378906, - "logps/rejected": -380.3441162109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6602661609649658, - "rewards/margins": 23.0997257232666, - "rewards/rejected": -24.759992599487305, - "step": 2216 - }, - { - "epoch": 0.84, - "learning_rate": 1.4111472712081698e-06, - "logits/chosen": -1.0578539371490479, - "logits/rejected": -4.243124008178711, - "logps/chosen": -250.4815216064453, - "logps/rejected": -334.02252197265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8028594851493835, - "rewards/margins": 15.84655475616455, - "rewards/rejected": -16.6494140625, - "step": 2217 - }, - { - "epoch": 0.84, - "learning_rate": 1.405047026735491e-06, - "logits/chosen": -1.8490979671478271, - "logits/rejected": 0.6788210272789001, - "logps/chosen": -419.46417236328125, - "logps/rejected": -952.075927734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.919036865234375, - "rewards/margins": 20.2216854095459, - "rewards/rejected": -21.140722274780273, - "step": 2218 - }, - { - "epoch": 0.84, - "learning_rate": 1.3989589997518937e-06, - "logits/chosen": -0.13734520971775055, - "logits/rejected": -4.253185749053955, - "logps/chosen": -456.9971923828125, - "logps/rejected": -373.2747802734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6818298697471619, - "rewards/margins": 24.37848663330078, - "rewards/rejected": -25.06031608581543, - "step": 2219 - }, - { - "epoch": 0.84, - "learning_rate": 1.3928831989113444e-06, - "logits/chosen": -0.4878905117511749, - "logits/rejected": -2.49033784866333, - "logps/chosen": -314.1154479980469, - "logps/rejected": -394.54022216796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.767333984375, - "rewards/margins": 13.092856407165527, - "rewards/rejected": -14.860190391540527, - "step": 2220 - }, - { - "epoch": 0.84, - "learning_rate": 1.3868196328504258e-06, - "logits/chosen": -5.846794605255127, - "logits/rejected": -1.7469658851623535, - "logps/chosen": -273.5367431640625, - "logps/rejected": -1117.0206298828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5428435802459717, - "rewards/margins": 32.83214569091797, - "rewards/rejected": -35.3749885559082, - "step": 2221 - }, - { - "epoch": 0.84, - "learning_rate": 1.3807683101883285e-06, - "logits/chosen": -4.37021017074585, - "logits/rejected": -0.6901800632476807, - "logps/chosen": -726.9600219726562, - "logps/rejected": -2425.9453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5963989496231079, - "rewards/margins": 48.64422607421875, - "rewards/rejected": -48.047828674316406, - "step": 2222 - }, - { - "epoch": 0.84, - "learning_rate": 1.3747292395268407e-06, - "logits/chosen": -1.256502628326416, - "logits/rejected": -4.363734722137451, - "logps/chosen": -301.1041259765625, - "logps/rejected": -358.0733642578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.66568922996521, - "rewards/margins": 19.313573837280273, - "rewards/rejected": -21.979263305664062, - "step": 2223 - }, - { - "epoch": 0.84, - "learning_rate": 1.3687024294503392e-06, - "logits/chosen": -6.490797996520996, - "logits/rejected": -1.40803861618042, - "logps/chosen": -249.8697509765625, - "logps/rejected": -992.4462890625, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": -7.328579902648926, - "rewards/margins": 20.622398376464844, - "rewards/rejected": -27.950977325439453, - "step": 2224 - }, - { - "epoch": 0.84, - "learning_rate": 1.3626878885257644e-06, - "logits/chosen": -3.226681709289551, - "logits/rejected": -4.145928382873535, - "logps/chosen": -576.12353515625, - "logps/rejected": -390.0263671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.957934617996216, - "rewards/margins": 27.439085006713867, - "rewards/rejected": -24.481149673461914, - "step": 2225 - }, - { - "epoch": 0.84, - "learning_rate": 1.356685625302625e-06, - "logits/chosen": -5.612063884735107, - "logits/rejected": -2.2593657970428467, - "logps/chosen": -242.17042541503906, - "logps/rejected": -1001.1500854492188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.08219756931066513, - "rewards/margins": 30.770618438720703, - "rewards/rejected": -30.68842124938965, - "step": 2226 - }, - { - "epoch": 0.84, - "learning_rate": 1.3506956483129674e-06, - "logits/chosen": 0.2890355587005615, - "logits/rejected": -3.8264904022216797, - "logps/chosen": -284.2366943359375, - "logps/rejected": -324.5194091796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.4302612245082855, - "rewards/margins": 24.633562088012695, - "rewards/rejected": -25.063823699951172, - "step": 2227 - }, - { - "epoch": 0.84, - "learning_rate": 1.344717966071385e-06, - "logits/chosen": -6.944352149963379, - "logits/rejected": -1.1617584228515625, - "logps/chosen": -448.88226318359375, - "logps/rejected": -1720.822265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.568572998046875, - "rewards/margins": 25.0919132232666, - "rewards/rejected": -26.660486221313477, - "step": 2228 - }, - { - "epoch": 0.84, - "learning_rate": 1.3387525870749851e-06, - "logits/chosen": -1.005852460861206, - "logits/rejected": -1.005852460861206, - "logps/chosen": 0.0, - "logps/rejected": 0.0, - "loss": 0.0866, - "rewards/accuracies": 0.0, - "rewards/chosen": 0.0, - "rewards/margins": 0.0, - "rewards/rejected": 0.0, - "step": 2229 - }, - { - "epoch": 0.84, - "learning_rate": 1.3327995198033883e-06, - "logits/chosen": -1.602145791053772, - "logits/rejected": -3.8114840984344482, - "logps/chosen": -204.33306884765625, - "logps/rejected": -186.04049682617188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0225067138671875, - "rewards/margins": 12.022931098937988, - "rewards/rejected": -12.045437812805176, - "step": 2230 - }, - { - "epoch": 0.84, - "learning_rate": 1.3268587727187198e-06, - "logits/chosen": -2.786992073059082, - "logits/rejected": -6.028966903686523, - "logps/chosen": -497.0238342285156, - "logps/rejected": -625.3134765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.03254089504480362, - "rewards/margins": 32.89570999145508, - "rewards/rejected": -32.8631706237793, - "step": 2231 - }, - { - "epoch": 0.84, - "learning_rate": 1.3209303542655837e-06, - "logits/chosen": -2.2372467517852783, - "logits/rejected": -1.941033959388733, - "logps/chosen": -353.1775817871094, - "logps/rejected": -893.7978515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0018463134765625, - "rewards/margins": 36.88767623901367, - "rewards/rejected": -35.88582992553711, - "step": 2232 - }, - { - "epoch": 0.84, - "learning_rate": 1.3150142728710669e-06, - "logits/chosen": -3.1155247688293457, - "logits/rejected": -1.7095829248428345, - "logps/chosen": -458.50628662109375, - "logps/rejected": -1315.4915771484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0145416259765625, - "rewards/margins": 55.88587951660156, - "rewards/rejected": -54.871337890625, - "step": 2233 - }, - { - "epoch": 0.84, - "learning_rate": 1.3091105369447166e-06, - "logits/chosen": -0.5469239354133606, - "logits/rejected": -3.5828378200531006, - "logps/chosen": -243.58828735351562, - "logps/rejected": -212.61708068847656, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3975616693496704, - "rewards/margins": 16.131099700927734, - "rewards/rejected": -14.733538627624512, - "step": 2234 - }, - { - "epoch": 0.84, - "learning_rate": 1.30321915487853e-06, - "logits/chosen": -4.371665954589844, - "logits/rejected": -2.8172037601470947, - "logps/chosen": -442.9704895019531, - "logps/rejected": -977.5878295898438, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.192513942718506, - "rewards/margins": 29.71702766418457, - "rewards/rejected": -33.909542083740234, - "step": 2235 - }, - { - "epoch": 0.85, - "learning_rate": 1.2973401350469428e-06, - "logits/chosen": -2.0408191680908203, - "logits/rejected": -6.140677452087402, - "logps/chosen": -452.531982421875, - "logps/rejected": -255.84420776367188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.511810302734375, - "rewards/margins": 14.844534873962402, - "rewards/rejected": -14.332724571228027, - "step": 2236 - }, - { - "epoch": 0.85, - "learning_rate": 1.2914734858068233e-06, - "logits/chosen": -2.4935526847839355, - "logits/rejected": -2.2622122764587402, - "logps/chosen": -310.5387268066406, - "logps/rejected": -902.96484375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.20657959580421448, - "rewards/margins": 44.3311882019043, - "rewards/rejected": -44.53776931762695, - "step": 2237 - }, - { - "epoch": 0.85, - "learning_rate": 1.2856192154974488e-06, - "logits/chosen": -6.308655738830566, - "logits/rejected": -2.073174238204956, - "logps/chosen": -457.259521484375, - "logps/rejected": -2149.48046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.19822998344898224, - "rewards/margins": 32.0376091003418, - "rewards/rejected": -31.839380264282227, - "step": 2238 - }, - { - "epoch": 0.85, - "learning_rate": 1.2797773324405039e-06, - "logits/chosen": -2.930438280105591, - "logits/rejected": -1.4169089794158936, - "logps/chosen": -379.1475524902344, - "logps/rejected": -707.7786254882812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.5652436017990112, - "rewards/margins": 18.90989112854004, - "rewards/rejected": -17.344648361206055, - "step": 2239 - }, - { - "epoch": 0.85, - "learning_rate": 1.2739478449400667e-06, - "logits/chosen": -1.9989264011383057, - "logits/rejected": -3.7880799770355225, - "logps/chosen": -276.7508850097656, - "logps/rejected": -561.9244384765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3761627674102783, - "rewards/margins": 27.64383888244629, - "rewards/rejected": -26.267675399780273, - "step": 2240 - }, - { - "epoch": 0.85, - "learning_rate": 1.26813076128259e-06, - "logits/chosen": -1.3961443901062012, - "logits/rejected": -5.480928897857666, - "logps/chosen": -1063.81640625, - "logps/rejected": -664.2459716796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5071778297424316, - "rewards/margins": 34.556396484375, - "rewards/rejected": -37.063575744628906, - "step": 2241 - }, - { - "epoch": 0.85, - "learning_rate": 1.2623260897368994e-06, - "logits/chosen": -6.089954853057861, - "logits/rejected": -1.1274563074111938, - "logps/chosen": -305.41644287109375, - "logps/rejected": -1825.762939453125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.989715576171875, - "rewards/margins": 53.69336700439453, - "rewards/rejected": -51.703651428222656, - "step": 2242 - }, - { - "epoch": 0.85, - "learning_rate": 1.2565338385541792e-06, - "logits/chosen": -6.682372093200684, - "logits/rejected": -2.0756826400756836, - "logps/chosen": -357.97369384765625, - "logps/rejected": -2654.28857421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.139032006263733, - "rewards/margins": 46.01334762573242, - "rewards/rejected": -44.87431716918945, - "step": 2243 - }, - { - "epoch": 0.85, - "learning_rate": 1.250754015967951e-06, - "logits/chosen": -3.4939236640930176, - "logits/rejected": -4.963113307952881, - "logps/chosen": -138.53097534179688, - "logps/rejected": -198.5266571044922, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.619337558746338, - "rewards/margins": 12.83079719543457, - "rewards/rejected": -15.45013427734375, - "step": 2244 - }, - { - "epoch": 0.85, - "learning_rate": 1.2449866301940738e-06, - "logits/chosen": -3.8201169967651367, - "logits/rejected": -2.8390557765960693, - "logps/chosen": -204.38363647460938, - "logps/rejected": -683.6322631835938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.147761583328247, - "rewards/margins": 25.58298683166504, - "rewards/rejected": -26.730749130249023, - "step": 2245 - }, - { - "epoch": 0.85, - "learning_rate": 1.23923168943073e-06, - "logits/chosen": -1.1445800065994263, - "logits/rejected": -5.080644130706787, - "logps/chosen": -408.58148193359375, - "logps/rejected": -204.81024169921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5039429068565369, - "rewards/margins": 11.637996673583984, - "rewards/rejected": -12.141939163208008, - "step": 2246 - }, - { - "epoch": 0.85, - "learning_rate": 1.2334892018584055e-06, - "logits/chosen": -7.156251907348633, - "logits/rejected": -1.5690451860427856, - "logps/chosen": -778.9625244140625, - "logps/rejected": -3987.880615234375, - "loss": 0.0104, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.81024169921875, - "rewards/margins": 33.05697250366211, - "rewards/rejected": -34.86721420288086, - "step": 2247 - }, - { - "epoch": 0.85, - "learning_rate": 1.2277591756398933e-06, - "logits/chosen": -5.178523540496826, - "logits/rejected": -2.679425001144409, - "logps/chosen": -285.7796936035156, - "logps/rejected": -1005.9774169921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0869232416152954, - "rewards/margins": 20.191146850585938, - "rewards/rejected": -21.2780704498291, - "step": 2248 - }, - { - "epoch": 0.85, - "learning_rate": 1.2220416189202656e-06, - "logits/chosen": -1.6060764789581299, - "logits/rejected": -1.4491468667984009, - "logps/chosen": -333.32843017578125, - "logps/rejected": -663.93994140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.628930687904358, - "rewards/margins": 19.76116943359375, - "rewards/rejected": -18.132238388061523, - "step": 2249 - }, - { - "epoch": 0.85, - "learning_rate": 1.21633653982687e-06, - "logits/chosen": -1.5970754623413086, - "logits/rejected": -4.734700679779053, - "logps/chosen": -447.4179992675781, - "logps/rejected": -540.889892578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.6413726806640625, - "rewards/margins": 24.027524948120117, - "rewards/rejected": -26.66889762878418, - "step": 2250 - }, - { - "epoch": 0.85, - "learning_rate": 1.210643946469321e-06, - "logits/chosen": -2.488464593887329, - "logits/rejected": -7.130350112915039, - "logps/chosen": -570.6324462890625, - "logps/rejected": -189.4081268310547, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.232751488685608, - "rewards/margins": 14.972167015075684, - "rewards/rejected": -13.739415168762207, - "step": 2251 - }, - { - "epoch": 0.85, - "learning_rate": 1.2049638469394854e-06, - "logits/chosen": -2.6408350467681885, - "logits/rejected": -1.2502367496490479, - "logps/chosen": -224.9442138671875, - "logps/rejected": -783.0827026367188, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1794556379318237, - "rewards/margins": 32.29479217529297, - "rewards/rejected": -31.11533546447754, - "step": 2252 - }, - { - "epoch": 0.85, - "learning_rate": 1.1992962493114645e-06, - "logits/chosen": -7.277291774749756, - "logits/rejected": -2.1131107807159424, - "logps/chosen": -390.7980651855469, - "logps/rejected": -3442.328857421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.17531433701515198, - "rewards/margins": 48.19198989868164, - "rewards/rejected": -48.01667404174805, - "step": 2253 - }, - { - "epoch": 0.85, - "learning_rate": 1.1936411616415966e-06, - "logits/chosen": -6.69442081451416, - "logits/rejected": -0.37692728638648987, - "logps/chosen": -368.30682373046875, - "logps/rejected": -1378.567138671875, - "loss": 0.0144, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.376922607421875, - "rewards/margins": 15.489496231079102, - "rewards/rejected": -14.112573623657227, - "step": 2254 - }, - { - "epoch": 0.85, - "learning_rate": 1.1879985919684312e-06, - "logits/chosen": -6.51168155670166, - "logits/rejected": -0.5990697145462036, - "logps/chosen": -492.8675537109375, - "logps/rejected": -4971.62255859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1319580078125, - "rewards/margins": 119.20783996582031, - "rewards/rejected": -117.07588195800781, - "step": 2255 - }, - { - "epoch": 0.85, - "learning_rate": 1.1823685483127234e-06, - "logits/chosen": -1.272921085357666, - "logits/rejected": -0.4079817533493042, - "logps/chosen": -454.52435302734375, - "logps/rejected": -748.5831298828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.2621917724609375, - "rewards/margins": 20.784818649291992, - "rewards/rejected": -18.522626876831055, - "step": 2256 - }, - { - "epoch": 0.85, - "learning_rate": 1.1767510386774294e-06, - "logits/chosen": -4.254648685455322, - "logits/rejected": -1.9705727100372314, - "logps/chosen": -305.5274963378906, - "logps/rejected": -998.1396484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6911407709121704, - "rewards/margins": 37.6380500793457, - "rewards/rejected": -38.32918930053711, - "step": 2257 - }, - { - "epoch": 0.85, - "learning_rate": 1.171146071047683e-06, - "logits/chosen": -3.6535181999206543, - "logits/rejected": -7.0043110847473145, - "logps/chosen": -456.8008117675781, - "logps/rejected": -294.7785949707031, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4955291748046875, - "rewards/margins": 15.460650444030762, - "rewards/rejected": -14.965121269226074, - "step": 2258 - }, - { - "epoch": 0.85, - "learning_rate": 1.1655536533907874e-06, - "logits/chosen": -5.783109664916992, - "logits/rejected": -1.3471473455429077, - "logps/chosen": -652.3795166015625, - "logps/rejected": -2149.039306640625, - "loss": 0.0798, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.548937976360321, - "rewards/margins": 33.13957595825195, - "rewards/rejected": -32.59063720703125, - "step": 2259 - }, - { - "epoch": 0.85, - "learning_rate": 1.159973793656215e-06, - "logits/chosen": -2.499814748764038, - "logits/rejected": -3.578882932662964, - "logps/chosen": -269.0263366699219, - "logps/rejected": -701.5299682617188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.4971359968185425, - "rewards/margins": 38.62196350097656, - "rewards/rejected": -40.11909866333008, - "step": 2260 - }, - { - "epoch": 0.85, - "learning_rate": 1.1544064997755843e-06, - "logits/chosen": -7.851195812225342, - "logits/rejected": -1.8344640731811523, - "logps/chosen": -230.51480102539062, - "logps/rejected": -3161.884765625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.09514923393726349, - "rewards/margins": 68.6094970703125, - "rewards/rejected": -68.70464324951172, - "step": 2261 - }, - { - "epoch": 0.86, - "learning_rate": 1.1488517796626453e-06, - "logits/chosen": -2.3038082122802734, - "logits/rejected": -4.087804317474365, - "logps/chosen": -176.70755004882812, - "logps/rejected": -190.99606323242188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7612976431846619, - "rewards/margins": 14.280423164367676, - "rewards/rejected": -13.519125938415527, - "step": 2262 - }, - { - "epoch": 0.86, - "learning_rate": 1.1433096412132838e-06, - "logits/chosen": -5.295071125030518, - "logits/rejected": -4.0865254402160645, - "logps/chosen": -261.7884521484375, - "logps/rejected": -894.5604858398438, - "loss": 0.003, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.02170715294778347, - "rewards/margins": 20.56656837463379, - "rewards/rejected": -20.54486083984375, - "step": 2263 - }, - { - "epoch": 0.86, - "learning_rate": 1.1377800923054961e-06, - "logits/chosen": -0.5134826898574829, - "logits/rejected": -3.4286389350891113, - "logps/chosen": -273.2124328613281, - "logps/rejected": -422.10595703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.862091064453125, - "rewards/margins": 25.539390563964844, - "rewards/rejected": -24.67729949951172, - "step": 2264 - }, - { - "epoch": 0.86, - "learning_rate": 1.132263140799381e-06, - "logits/chosen": -3.943814754486084, - "logits/rejected": -1.1392420530319214, - "logps/chosen": -393.4644775390625, - "logps/rejected": -1313.22412109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.198617696762085, - "rewards/margins": 48.7969856262207, - "rewards/rejected": -46.59836959838867, - "step": 2265 - }, - { - "epoch": 0.86, - "learning_rate": 1.1267587945371383e-06, - "logits/chosen": -0.23222476243972778, - "logits/rejected": -3.3979880809783936, - "logps/chosen": -273.3726806640625, - "logps/rejected": -213.79380798339844, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.270941138267517, - "rewards/margins": 15.04863166809082, - "rewards/rejected": -16.31957244873047, - "step": 2266 - }, - { - "epoch": 0.86, - "learning_rate": 1.1212670613430388e-06, - "logits/chosen": -0.505266547203064, - "logits/rejected": -3.918585777282715, - "logps/chosen": -347.35113525390625, - "logps/rejected": -404.31768798828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.8427581787109375, - "rewards/margins": 22.832157135009766, - "rewards/rejected": -25.674915313720703, - "step": 2267 - }, - { - "epoch": 0.86, - "learning_rate": 1.1157879490234346e-06, - "logits/chosen": -2.160755157470703, - "logits/rejected": -2.129856824874878, - "logps/chosen": -419.372314453125, - "logps/rejected": -744.62548828125, - "loss": 0.0022, - "rewards/accuracies": 1.0, - "rewards/chosen": 4.280358791351318, - "rewards/margins": 36.26066970825195, - "rewards/rejected": -31.980310440063477, - "step": 2268 - }, - { - "epoch": 0.86, - "learning_rate": 1.1103214653667306e-06, - "logits/chosen": -6.571373462677002, - "logits/rejected": -5.678745746612549, - "logps/chosen": -229.89894104003906, - "logps/rejected": -472.2058410644531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.001667857170105, - "rewards/margins": 27.156187057495117, - "rewards/rejected": -26.15452003479004, - "step": 2269 - }, - { - "epoch": 0.86, - "learning_rate": 1.1048676181433837e-06, - "logits/chosen": 0.06945187598466873, - "logits/rejected": -3.6232879161834717, - "logps/chosen": -467.78912353515625, - "logps/rejected": -255.47976684570312, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1338471174240112, - "rewards/margins": 12.311088562011719, - "rewards/rejected": -11.177241325378418, - "step": 2270 - }, - { - "epoch": 0.86, - "learning_rate": 1.0994264151058897e-06, - "logits/chosen": -3.618767738342285, - "logits/rejected": -1.7863432168960571, - "logps/chosen": -280.8578796386719, - "logps/rejected": -569.9576416015625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9398590326309204, - "rewards/margins": 17.077138900756836, - "rewards/rejected": -19.016998291015625, - "step": 2271 - }, - { - "epoch": 0.86, - "learning_rate": 1.0939978639887661e-06, - "logits/chosen": -1.762033462524414, - "logits/rejected": -3.682478666305542, - "logps/chosen": -330.0191650390625, - "logps/rejected": -370.59033203125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6222015619277954, - "rewards/margins": 14.053600311279297, - "rewards/rejected": -13.431398391723633, - "step": 2272 - }, - { - "epoch": 0.86, - "learning_rate": 1.088581972508549e-06, - "logits/chosen": -1.477115273475647, - "logits/rejected": -1.9515947103500366, - "logps/chosen": -388.0555419921875, - "logps/rejected": -453.5016784667969, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.578137218952179, - "rewards/margins": 8.95422649383545, - "rewards/rejected": -9.532363891601562, - "step": 2273 - }, - { - "epoch": 0.86, - "learning_rate": 1.0831787483637812e-06, - "logits/chosen": -6.459691524505615, - "logits/rejected": -3.743382692337036, - "logps/chosen": -339.5286865234375, - "logps/rejected": -1351.3175048828125, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.899212598800659, - "rewards/margins": 19.02216339111328, - "rewards/rejected": -22.921375274658203, - "step": 2274 - }, - { - "epoch": 0.86, - "learning_rate": 1.0777881992349959e-06, - "logits/chosen": -3.7766003608703613, - "logits/rejected": -0.8646678328514099, - "logps/chosen": -262.7241516113281, - "logps/rejected": -831.90771484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.9218368530273438, - "rewards/margins": 24.32137680053711, - "rewards/rejected": -26.243213653564453, - "step": 2275 - }, - { - "epoch": 0.86, - "learning_rate": 1.072410332784708e-06, - "logits/chosen": -1.5227452516555786, - "logits/rejected": -1.6515361070632935, - "logps/chosen": -332.748046875, - "logps/rejected": -595.2559814453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.5171478390693665, - "rewards/margins": 24.50957489013672, - "rewards/rejected": -25.026721954345703, - "step": 2276 - }, - { - "epoch": 0.86, - "learning_rate": 1.0670451566574102e-06, - "logits/chosen": -1.2542946338653564, - "logits/rejected": -1.1930973529815674, - "logps/chosen": -367.4902038574219, - "logps/rejected": -739.8037719726562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.333181858062744, - "rewards/margins": 30.48616600036621, - "rewards/rejected": -34.8193473815918, - "step": 2277 - }, - { - "epoch": 0.86, - "learning_rate": 1.0616926784795511e-06, - "logits/chosen": -5.041102886199951, - "logits/rejected": -1.5250612497329712, - "logps/chosen": -946.2896728515625, - "logps/rejected": -2457.498046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.806140124797821, - "rewards/margins": 40.25448226928711, - "rewards/rejected": -39.448341369628906, - "step": 2278 - }, - { - "epoch": 0.86, - "learning_rate": 1.0563529058595302e-06, - "logits/chosen": -0.32851719856262207, - "logits/rejected": -4.285958290100098, - "logps/chosen": -280.95782470703125, - "logps/rejected": -135.22396850585938, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6174103021621704, - "rewards/margins": 10.351758003234863, - "rewards/rejected": -9.734347343444824, - "step": 2279 - }, - { - "epoch": 0.86, - "learning_rate": 1.0510258463876899e-06, - "logits/chosen": -7.258376121520996, - "logits/rejected": -3.9765357971191406, - "logps/chosen": -313.92120361328125, - "logps/rejected": -1936.756591796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.452230840921402, - "rewards/margins": 54.026756286621094, - "rewards/rejected": -53.57452392578125, - "step": 2280 - }, - { - "epoch": 0.86, - "learning_rate": 1.0457115076362978e-06, - "logits/chosen": -0.9007105231285095, - "logits/rejected": -1.4582732915878296, - "logps/chosen": -239.14584350585938, - "logps/rejected": -524.07470703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9284591674804688, - "rewards/margins": 21.60409164428711, - "rewards/rejected": -22.532550811767578, - "step": 2281 - }, - { - "epoch": 0.86, - "learning_rate": 1.0404098971595378e-06, - "logits/chosen": -7.3122663497924805, - "logits/rejected": -1.2838518619537354, - "logps/chosen": -639.541015625, - "logps/rejected": -2829.213623046875, - "loss": 0.0165, - "rewards/accuracies": 1.0, - "rewards/chosen": 4.736456394195557, - "rewards/margins": 26.654693603515625, - "rewards/rejected": -21.918237686157227, - "step": 2282 - }, - { - "epoch": 0.86, - "learning_rate": 1.035121022493506e-06, - "logits/chosen": -1.7585283517837524, - "logits/rejected": -3.6983447074890137, - "logps/chosen": -191.35423278808594, - "logps/rejected": -293.7237243652344, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.9375457763671875, - "rewards/margins": 18.08327293395996, - "rewards/rejected": -19.02081871032715, - "step": 2283 - }, - { - "epoch": 0.86, - "learning_rate": 1.0298448911561899e-06, - "logits/chosen": -6.034070014953613, - "logits/rejected": -0.8730331063270569, - "logps/chosen": -350.35052490234375, - "logps/rejected": -2921.55029296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.29811403155326843, - "rewards/margins": 33.628936767578125, - "rewards/rejected": -33.92705154418945, - "step": 2284 - }, - { - "epoch": 0.86, - "learning_rate": 1.0245815106474677e-06, - "logits/chosen": -3.077252149581909, - "logits/rejected": -3.240896463394165, - "logps/chosen": -484.8265075683594, - "logps/rejected": -1076.477294921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2739533185958862, - "rewards/margins": 47.24322509765625, - "rewards/rejected": -48.51717758178711, - "step": 2285 - }, - { - "epoch": 0.86, - "learning_rate": 1.0193308884490894e-06, - "logits/chosen": 0.03100203163921833, - "logits/rejected": -2.2412993907928467, - "logps/chosen": -337.55877685546875, - "logps/rejected": -762.76513671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.53656005859375, - "rewards/margins": 42.55772018432617, - "rewards/rejected": -43.09428024291992, - "step": 2286 - }, - { - "epoch": 0.86, - "learning_rate": 1.0140930320246667e-06, - "logits/chosen": -4.261916637420654, - "logits/rejected": -1.0056160688400269, - "logps/chosen": -442.35113525390625, - "logps/rejected": -1433.869384765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.000201463699341, - "rewards/margins": 31.59783172607422, - "rewards/rejected": -34.5980339050293, - "step": 2287 - }, - { - "epoch": 0.87, - "learning_rate": 1.0088679488196695e-06, - "logits/chosen": -4.3197922706604, - "logits/rejected": -5.14686393737793, - "logps/chosen": -229.71817016601562, - "logps/rejected": -346.9123229980469, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6720215082168579, - "rewards/margins": 7.885208606719971, - "rewards/rejected": -8.557229995727539, - "step": 2288 - }, - { - "epoch": 0.87, - "learning_rate": 1.0036556462614133e-06, - "logits/chosen": -4.309444904327393, - "logits/rejected": -1.3892579078674316, - "logps/chosen": -705.7723388671875, - "logps/rejected": -1369.56005859375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.500701904296875, - "rewards/margins": 23.951129913330078, - "rewards/rejected": -25.451831817626953, - "step": 2289 - }, - { - "epoch": 0.87, - "learning_rate": 9.984561317590392e-07, - "logits/chosen": -3.141448736190796, - "logits/rejected": -1.7517937421798706, - "logps/chosen": -266.83843994140625, - "logps/rejected": -881.1564331054688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.536144971847534, - "rewards/margins": 38.755393981933594, - "rewards/rejected": -36.2192497253418, - "step": 2290 - }, - { - "epoch": 0.87, - "learning_rate": 9.932694127035169e-07, - "logits/chosen": -2.668370008468628, - "logits/rejected": -3.9302573204040527, - "logps/chosen": -191.3590087890625, - "logps/rejected": -348.5333557128906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.6222900748252869, - "rewards/margins": 19.237903594970703, - "rewards/rejected": -18.61561393737793, - "step": 2291 - }, - { - "epoch": 0.87, - "learning_rate": 9.880954964676226e-07, - "logits/chosen": -2.3191306591033936, - "logits/rejected": -3.4213898181915283, - "logps/chosen": -314.36590576171875, - "logps/rejected": -808.6525268554688, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7578125, - "rewards/margins": 41.18349838256836, - "rewards/rejected": -40.42568588256836, - "step": 2292 - }, - { - "epoch": 0.87, - "learning_rate": 9.829343904059342e-07, - "logits/chosen": -2.123347520828247, - "logits/rejected": -3.6603732109069824, - "logps/chosen": -361.41473388671875, - "logps/rejected": -878.8825073242188, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.502465844154358, - "rewards/margins": 41.82550811767578, - "rewards/rejected": -43.327972412109375, - "step": 2293 - }, - { - "epoch": 0.87, - "learning_rate": 9.77786101854825e-07, - "logits/chosen": -5.545884132385254, - "logits/rejected": -1.493821620941162, - "logps/chosen": -763.0858154296875, - "logps/rejected": -3069.205322265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.976342797279358, - "rewards/margins": 25.557031631469727, - "rewards/rejected": -27.533374786376953, - "step": 2294 - }, - { - "epoch": 0.87, - "learning_rate": 9.726506381324451e-07, - "logits/chosen": -7.060025691986084, - "logits/rejected": -2.468797445297241, - "logps/chosen": -517.9913940429688, - "logps/rejected": -1449.315673828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.884716808795929, - "rewards/margins": 10.479101181030273, - "rewards/rejected": -11.363818168640137, - "step": 2295 - }, - { - "epoch": 0.87, - "learning_rate": 9.675280065387117e-07, - "logits/chosen": -4.1796956062316895, - "logits/rejected": -2.061783790588379, - "logps/chosen": -443.8509826660156, - "logps/rejected": -1024.5576171875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -5.795172214508057, - "rewards/margins": 19.337249755859375, - "rewards/rejected": -25.132421493530273, - "step": 2296 - }, - { - "epoch": 0.87, - "learning_rate": 9.624182143553073e-07, - "logits/chosen": -1.6048811674118042, - "logits/rejected": -4.185052871704102, - "logps/chosen": -257.492431640625, - "logps/rejected": -440.2882385253906, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.5737321376800537, - "rewards/margins": 21.118791580200195, - "rewards/rejected": -23.692523956298828, - "step": 2297 - }, - { - "epoch": 0.87, - "learning_rate": 9.573212688456635e-07, - "logits/chosen": -5.717707633972168, - "logits/rejected": -3.6595535278320312, - "logps/chosen": -559.7548217773438, - "logps/rejected": -1270.9346923828125, - "loss": 0.2447, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.776904582977295, - "rewards/margins": 30.384265899658203, - "rewards/rejected": -35.161170959472656, - "step": 2298 - }, - { - "epoch": 0.87, - "learning_rate": 9.522371772549421e-07, - "logits/chosen": -0.44397643208503723, - "logits/rejected": -3.8258841037750244, - "logps/chosen": -318.595458984375, - "logps/rejected": -338.06317138671875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4578918516635895, - "rewards/margins": 18.670066833496094, - "rewards/rejected": -18.212175369262695, - "step": 2299 - }, - { - "epoch": 0.87, - "learning_rate": 9.471659468100436e-07, - "logits/chosen": -4.057843208312988, - "logits/rejected": -4.384329795837402, - "logps/chosen": -878.1985473632812, - "logps/rejected": -1095.21142578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.666143774986267, - "rewards/margins": 20.781780242919922, - "rewards/rejected": -19.115636825561523, - "step": 2300 - }, - { - "epoch": 0.87, - "learning_rate": 9.421075847195827e-07, - "logits/chosen": -5.152177810668945, - "logits/rejected": -3.0935699939727783, - "logps/chosen": -418.4200439453125, - "logps/rejected": -2029.23486328125, - "loss": 0.0866, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.44633179903030396, - "rewards/margins": 45.60253143310547, - "rewards/rejected": -45.15620040893555, - "step": 2301 - }, - { - "epoch": 0.87, - "learning_rate": 9.370620981738776e-07, - "logits/chosen": -1.4885820150375366, - "logits/rejected": -1.8916853666305542, - "logps/chosen": -563.937744140625, - "logps/rejected": -1087.7216796875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.041656494140625, - "rewards/margins": 41.58717727661133, - "rewards/rejected": -42.62883377075195, - "step": 2302 - }, - { - "epoch": 0.87, - "learning_rate": 9.320294943449537e-07, - "logits/chosen": -2.24163556098938, - "logits/rejected": -1.5464329719543457, - "logps/chosen": -333.21099853515625, - "logps/rejected": -764.709228515625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6636230945587158, - "rewards/margins": 22.49873161315918, - "rewards/rejected": -20.835107803344727, - "step": 2303 - }, - { - "epoch": 0.87, - "learning_rate": 9.270097803865141e-07, - "logits/chosen": -1.31155264377594, - "logits/rejected": -1.1909176111221313, - "logps/chosen": -450.0164794921875, - "logps/rejected": -953.36279296875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.803741455078125, - "rewards/margins": 35.65708541870117, - "rewards/rejected": -33.85334396362305, - "step": 2304 - }, - { - "epoch": 0.87, - "learning_rate": 9.220029634339489e-07, - "logits/chosen": -4.2611188888549805, - "logits/rejected": -0.8429282903671265, - "logps/chosen": -355.2950134277344, - "logps/rejected": -1307.56787109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7326599359512329, - "rewards/margins": 32.59406661987305, - "rewards/rejected": -31.861408233642578, - "step": 2305 - }, - { - "epoch": 0.87, - "learning_rate": 9.170090506043061e-07, - "logits/chosen": -0.39333242177963257, - "logits/rejected": -3.908914089202881, - "logps/chosen": -504.52130126953125, - "logps/rejected": -214.48638916015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.7119691371917725, - "rewards/margins": 17.645862579345703, - "rewards/rejected": -14.933893203735352, - "step": 2306 - }, - { - "epoch": 0.87, - "learning_rate": 9.120280489962985e-07, - "logits/chosen": -1.3771655559539795, - "logits/rejected": -1.565158724784851, - "logps/chosen": -322.5576477050781, - "logps/rejected": -527.8469848632812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7393769025802612, - "rewards/margins": 12.442184448242188, - "rewards/rejected": -10.702807426452637, - "step": 2307 - }, - { - "epoch": 0.87, - "learning_rate": 9.070599656902801e-07, - "logits/chosen": -3.324707508087158, - "logits/rejected": -2.503920793533325, - "logps/chosen": -1169.5438232421875, - "logps/rejected": -1708.55224609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.6295411586761475, - "rewards/margins": 27.018774032592773, - "rewards/rejected": -30.6483154296875, - "step": 2308 - }, - { - "epoch": 0.87, - "learning_rate": 9.021048077482486e-07, - "logits/chosen": -0.6131764650344849, - "logits/rejected": -3.2342891693115234, - "logps/chosen": -313.1690673828125, - "logps/rejected": -237.1962890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06342162936925888, - "rewards/margins": 14.522299766540527, - "rewards/rejected": -14.585721015930176, - "step": 2309 - }, - { - "epoch": 0.87, - "learning_rate": 8.971625822138197e-07, - "logits/chosen": -4.201205253601074, - "logits/rejected": -3.102832555770874, - "logps/chosen": -256.83050537109375, - "logps/rejected": -668.2911376953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.1717010736465454, - "rewards/margins": 11.80142593383789, - "rewards/rejected": -10.629724502563477, - "step": 2310 - }, - { - "epoch": 0.87, - "learning_rate": 8.92233296112236e-07, - "logits/chosen": -4.51686429977417, - "logits/rejected": 0.3305574059486389, - "logps/chosen": -466.7547302246094, - "logps/rejected": -1489.2841796875, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.3552002012729645, - "rewards/margins": 17.320201873779297, - "rewards/rejected": -16.965002059936523, - "step": 2311 - }, - { - "epoch": 0.87, - "learning_rate": 8.873169564503392e-07, - "logits/chosen": -2.9971237182617188, - "logits/rejected": -0.6740178465843201, - "logps/chosen": -169.5089874267578, - "logps/rejected": -403.7099609375, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.0056777955032885075, - "rewards/margins": 5.572828769683838, - "rewards/rejected": -5.567151069641113, - "step": 2312 - }, - { - "epoch": 0.87, - "learning_rate": 8.824135702165693e-07, - "logits/chosen": -2.7114274501800537, - "logits/rejected": -3.3536999225616455, - "logps/chosen": -142.05035400390625, - "logps/rejected": -482.97747802734375, - "loss": 0.0868, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8881866335868835, - "rewards/margins": 22.93089485168457, - "rewards/rejected": -23.819082260131836, - "step": 2313 - }, - { - "epoch": 0.87, - "learning_rate": 8.775231443809574e-07, - "logits/chosen": -0.06403205543756485, - "logits/rejected": -6.1217570304870605, - "logps/chosen": -388.1910095214844, - "logps/rejected": -138.98724365234375, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.9916350841522217, - "rewards/margins": 4.459027290344238, - "rewards/rejected": -8.450662612915039, - "step": 2314 - }, - { - "epoch": 0.88, - "learning_rate": 8.726456858951082e-07, - "logits/chosen": -2.3519177436828613, - "logits/rejected": -2.405176877975464, - "logps/chosen": -243.60203552246094, - "logps/rejected": -402.179931640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.8065078258514404, - "rewards/margins": 21.77128791809082, - "rewards/rejected": -17.964780807495117, - "step": 2315 - }, - { - "epoch": 0.88, - "learning_rate": 8.677812016921938e-07, - "logits/chosen": -0.20235072076320648, - "logits/rejected": -4.930386066436768, - "logps/chosen": -421.27301025390625, - "logps/rejected": -571.59521484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 4.2578887939453125, - "rewards/margins": 35.892547607421875, - "rewards/rejected": -31.63465690612793, - "step": 2316 - }, - { - "epoch": 0.88, - "learning_rate": 8.629296986869473e-07, - "logits/chosen": -2.071054697036743, - "logits/rejected": -3.346135139465332, - "logps/chosen": -531.8040161132812, - "logps/rejected": -965.349853515625, - "loss": 0.0015, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.727838158607483, - "rewards/margins": 40.16640090942383, - "rewards/rejected": -38.43856430053711, - "step": 2317 - }, - { - "epoch": 0.88, - "learning_rate": 8.580911837756467e-07, - "logits/chosen": -5.685336589813232, - "logits/rejected": -2.1763417720794678, - "logps/chosen": -484.603271484375, - "logps/rejected": -1372.4329833984375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.4979798793792725, - "rewards/margins": 28.668474197387695, - "rewards/rejected": -32.16645431518555, - "step": 2318 - }, - { - "epoch": 0.88, - "learning_rate": 8.532656638361025e-07, - "logits/chosen": -6.973424911499023, - "logits/rejected": -2.375239610671997, - "logps/chosen": -340.166259765625, - "logps/rejected": -1958.986328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2509002685546875, - "rewards/margins": 59.163875579833984, - "rewards/rejected": -57.9129753112793, - "step": 2319 - }, - { - "epoch": 0.88, - "learning_rate": 8.484531457276657e-07, - "logits/chosen": -3.5835697650909424, - "logits/rejected": -1.0613313913345337, - "logps/chosen": -296.6109619140625, - "logps/rejected": -695.4609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0736206769943237, - "rewards/margins": 11.173962593078613, - "rewards/rejected": -12.247583389282227, - "step": 2320 - }, - { - "epoch": 0.88, - "learning_rate": 8.43653636291194e-07, - "logits/chosen": -5.966427803039551, - "logits/rejected": -0.6692860126495361, - "logps/chosen": -287.0304870605469, - "logps/rejected": -1664.0557861328125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.10862121731042862, - "rewards/margins": 50.33900451660156, - "rewards/rejected": -50.230384826660156, - "step": 2321 - }, - { - "epoch": 0.88, - "learning_rate": 8.388671423490568e-07, - "logits/chosen": -7.323575973510742, - "logits/rejected": -2.3044068813323975, - "logps/chosen": -375.88043212890625, - "logps/rejected": -1710.17431640625, - "loss": 0.0011, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07911377400159836, - "rewards/margins": 34.45895004272461, - "rewards/rejected": -34.538063049316406, - "step": 2322 - }, - { - "epoch": 0.88, - "learning_rate": 8.340936707051273e-07, - "logits/chosen": -5.207639217376709, - "logits/rejected": -3.237111806869507, - "logps/chosen": -327.56365966796875, - "logps/rejected": -794.0745849609375, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.132672071456909, - "rewards/margins": 19.777973175048828, - "rewards/rejected": -21.91064453125, - "step": 2323 - }, - { - "epoch": 0.88, - "learning_rate": 8.293332281447608e-07, - "logits/chosen": -0.18153002858161926, - "logits/rejected": -1.2705966234207153, - "logps/chosen": -230.04306030273438, - "logps/rejected": -604.530517578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.7887146472930908, - "rewards/margins": 30.56643295288086, - "rewards/rejected": -32.35514831542969, - "step": 2324 - }, - { - "epoch": 0.88, - "learning_rate": 8.245858214347969e-07, - "logits/chosen": -1.420676589012146, - "logits/rejected": -0.24668218195438385, - "logps/chosen": -271.10260009765625, - "logps/rejected": -414.65216064453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.215075731277466, - "rewards/margins": 8.731769561767578, - "rewards/rejected": -10.946845054626465, - "step": 2325 - }, - { - "epoch": 0.88, - "learning_rate": 8.19851457323545e-07, - "logits/chosen": -3.870201349258423, - "logits/rejected": -0.48729899525642395, - "logps/chosen": -535.246337890625, - "logps/rejected": -1306.30126953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.11737060546875, - "rewards/margins": 27.68402099609375, - "rewards/rejected": -27.8013916015625, - "step": 2326 - }, - { - "epoch": 0.88, - "learning_rate": 8.151301425407699e-07, - "logits/chosen": -2.309642791748047, - "logits/rejected": -4.01458740234375, - "logps/chosen": -218.0791015625, - "logps/rejected": -338.5583801269531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.6267456412315369, - "rewards/margins": 14.885883331298828, - "rewards/rejected": -15.512628555297852, - "step": 2327 - }, - { - "epoch": 0.88, - "learning_rate": 8.10421883797694e-07, - "logits/chosen": -5.290191173553467, - "logits/rejected": -3.3795011043548584, - "logps/chosen": -253.66575622558594, - "logps/rejected": -875.2525634765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3427718877792358, - "rewards/margins": 24.75499153137207, - "rewards/rejected": -23.412220001220703, - "step": 2328 - }, - { - "epoch": 0.88, - "learning_rate": 8.057266877869763e-07, - "logits/chosen": -3.9990885257720947, - "logits/rejected": 0.20298580825328827, - "logps/chosen": -304.1702575683594, - "logps/rejected": -958.3098754882812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0766022205352783, - "rewards/margins": 15.763199806213379, - "rewards/rejected": -16.839801788330078, - "step": 2329 - }, - { - "epoch": 0.88, - "learning_rate": 8.010445611827067e-07, - "logits/chosen": -8.00576400756836, - "logits/rejected": -2.9446816444396973, - "logps/chosen": -612.2886962890625, - "logps/rejected": -4046.4921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.40086671710014343, - "rewards/margins": 33.6917839050293, - "rewards/rejected": -34.0926513671875, - "step": 2330 - }, - { - "epoch": 0.88, - "learning_rate": 7.963755106404036e-07, - "logits/chosen": -5.4029717445373535, - "logits/rejected": -5.111543655395508, - "logps/chosen": -394.86651611328125, - "logps/rejected": -613.844482421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2363128662109375, - "rewards/margins": 18.14336585998535, - "rewards/rejected": -17.907052993774414, - "step": 2331 - }, - { - "epoch": 0.88, - "learning_rate": 7.917195427969904e-07, - "logits/chosen": -1.0741639137268066, - "logits/rejected": -1.2915798425674438, - "logps/chosen": -376.41851806640625, - "logps/rejected": -750.1898803710938, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7643890380859375, - "rewards/margins": 34.160762786865234, - "rewards/rejected": -32.3963737487793, - "step": 2332 - }, - { - "epoch": 0.88, - "learning_rate": 7.87076664270795e-07, - "logits/chosen": 0.08296038955450058, - "logits/rejected": -4.637684345245361, - "logps/chosen": -239.21856689453125, - "logps/rejected": -395.6277160644531, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.121978759765625, - "rewards/margins": 30.876972198486328, - "rewards/rejected": -29.754993438720703, - "step": 2333 - }, - { - "epoch": 0.88, - "learning_rate": 7.824468816615482e-07, - "logits/chosen": -2.145440101623535, - "logits/rejected": -3.074178457260132, - "logps/chosen": -399.4054870605469, - "logps/rejected": -786.4275512695312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1720428466796875, - "rewards/margins": 28.228769302368164, - "rewards/rejected": -26.056726455688477, - "step": 2334 - }, - { - "epoch": 0.88, - "learning_rate": 7.778302015503547e-07, - "logits/chosen": -1.548422932624817, - "logits/rejected": -3.44184947013855, - "logps/chosen": -470.721923828125, - "logps/rejected": -681.9287109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.907873630523682, - "rewards/margins": 40.6845817565918, - "rewards/rejected": -45.59245681762695, - "step": 2335 - }, - { - "epoch": 0.88, - "learning_rate": 7.732266304996971e-07, - "logits/chosen": -6.749175548553467, - "logits/rejected": -3.0150744915008545, - "logps/chosen": -263.80133056640625, - "logps/rejected": -1920.39306640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.58758544921875, - "rewards/margins": 51.22002029418945, - "rewards/rejected": -51.8076057434082, - "step": 2336 - }, - { - "epoch": 0.88, - "learning_rate": 7.686361750534266e-07, - "logits/chosen": -5.5892133712768555, - "logits/rejected": -1.587829828262329, - "logps/chosen": -252.70301818847656, - "logps/rejected": -1033.315673828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2084763050079346, - "rewards/margins": 22.046154022216797, - "rewards/rejected": -20.837677001953125, - "step": 2337 - }, - { - "epoch": 0.88, - "learning_rate": 7.6405884173675e-07, - "logits/chosen": -2.027827501296997, - "logits/rejected": -0.6390998363494873, - "logps/chosen": -345.99676513671875, - "logps/rejected": -1129.3232421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.915435791015625, - "rewards/margins": 47.01776885986328, - "rewards/rejected": -45.102333068847656, - "step": 2338 - }, - { - "epoch": 0.88, - "learning_rate": 7.594946370562184e-07, - "logits/chosen": -1.0639301538467407, - "logits/rejected": -5.899831771850586, - "logps/chosen": -583.204345703125, - "logps/rejected": -616.7481689453125, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.1683106422424316, - "rewards/margins": 32.82434844970703, - "rewards/rejected": -30.656036376953125, - "step": 2339 - }, - { - "epoch": 0.88, - "learning_rate": 7.549435674997252e-07, - "logits/chosen": -2.7614974975585938, - "logits/rejected": -2.852614164352417, - "logps/chosen": -400.82781982421875, - "logps/rejected": -535.8687744140625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.2629029750823975, - "rewards/margins": 10.733692169189453, - "rewards/rejected": -8.470788955688477, - "step": 2340 - }, - { - "epoch": 0.89, - "learning_rate": 7.504056395364879e-07, - "logits/chosen": -1.0356628894805908, - "logits/rejected": -3.786083936691284, - "logps/chosen": -278.0221862792969, - "logps/rejected": -388.71820068359375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0060333013534546, - "rewards/margins": 20.015649795532227, - "rewards/rejected": -19.00961685180664, - "step": 2341 - }, - { - "epoch": 0.89, - "learning_rate": 7.458808596170497e-07, - "logits/chosen": -3.2123923301696777, - "logits/rejected": -1.5782068967819214, - "logps/chosen": -456.4811706542969, - "logps/rejected": -1530.595458984375, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.8237762451171875, - "rewards/margins": 71.79898834228516, - "rewards/rejected": -70.97521209716797, - "step": 2342 - }, - { - "epoch": 0.89, - "learning_rate": 7.413692341732582e-07, - "logits/chosen": -1.7389999628067017, - "logits/rejected": -5.177521228790283, - "logps/chosen": -473.18212890625, - "logps/rejected": -587.1492919921875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.4197753965854645, - "rewards/margins": 39.05599594116211, - "rewards/rejected": -38.6362190246582, - "step": 2343 - }, - { - "epoch": 0.89, - "learning_rate": 7.368707696182653e-07, - "logits/chosen": -7.543111324310303, - "logits/rejected": -2.1864519119262695, - "logps/chosen": -137.47254943847656, - "logps/rejected": -2862.751220703125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0814545154571533, - "rewards/margins": 33.271541595458984, - "rewards/rejected": -31.190088272094727, - "step": 2344 - }, - { - "epoch": 0.89, - "learning_rate": 7.32385472346514e-07, - "logits/chosen": -3.723020076751709, - "logits/rejected": -0.6367745995521545, - "logps/chosen": -277.1058349609375, - "logps/rejected": -1057.3499755859375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.944958508014679, - "rewards/margins": 41.437355041503906, - "rewards/rejected": -42.3823127746582, - "step": 2345 - }, - { - "epoch": 0.89, - "learning_rate": 7.279133487337342e-07, - "logits/chosen": -1.8446636199951172, - "logits/rejected": -2.2211804389953613, - "logps/chosen": -277.4748229980469, - "logps/rejected": -342.4508361816406, - "loss": 0.001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7512115240097046, - "rewards/margins": 8.119470596313477, - "rewards/rejected": -6.368258953094482, - "step": 2346 - }, - { - "epoch": 0.89, - "learning_rate": 7.234544051369218e-07, - "logits/chosen": -2.2384018898010254, - "logits/rejected": -2.80953049659729, - "logps/chosen": -255.4636993408203, - "logps/rejected": -642.1414184570312, - "loss": 0.0003, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.344917297363281, - "rewards/margins": 35.80662536621094, - "rewards/rejected": -40.15154266357422, - "step": 2347 - }, - { - "epoch": 0.89, - "learning_rate": 7.190086478943459e-07, - "logits/chosen": -0.46599280834198, - "logits/rejected": -4.616573810577393, - "logps/chosen": -352.7833251953125, - "logps/rejected": -308.45343017578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.4490203857421875, - "rewards/margins": 20.847057342529297, - "rewards/rejected": -17.39803695678711, - "step": 2348 - }, - { - "epoch": 0.89, - "learning_rate": 7.145760833255267e-07, - "logits/chosen": -3.1453356742858887, - "logits/rejected": -3.9098517894744873, - "logps/chosen": -320.032958984375, - "logps/rejected": -677.1593017578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.0386199951171875, - "rewards/margins": 35.401214599609375, - "rewards/rejected": -36.43983459472656, - "step": 2349 - }, - { - "epoch": 0.89, - "learning_rate": 7.101567177312307e-07, - "logits/chosen": -6.107498645782471, - "logits/rejected": -3.6586313247680664, - "logps/chosen": -131.2852783203125, - "logps/rejected": -902.3433837890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.981351613998413, - "rewards/margins": 26.729768753051758, - "rewards/rejected": -30.71112060546875, - "step": 2350 - }, - { - "epoch": 0.89, - "learning_rate": 7.057505573934686e-07, - "logits/chosen": -1.8153958320617676, - "logits/rejected": -2.3335437774658203, - "logps/chosen": -199.95851135253906, - "logps/rejected": -556.4393310546875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.809039294719696, - "rewards/margins": 27.153575897216797, - "rewards/rejected": -27.962615966796875, - "step": 2351 - }, - { - "epoch": 0.89, - "learning_rate": 7.013576085754725e-07, - "logits/chosen": -2.681821823120117, - "logits/rejected": -1.7183637619018555, - "logps/chosen": -288.5316467285156, - "logps/rejected": -653.1241455078125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5050598382949829, - "rewards/margins": 23.643674850463867, - "rewards/rejected": -23.138614654541016, - "step": 2352 - }, - { - "epoch": 0.89, - "learning_rate": 6.969778775217007e-07, - "logits/chosen": -6.574576377868652, - "logits/rejected": -3.7327659130096436, - "logps/chosen": -664.3272705078125, - "logps/rejected": -4061.314697265625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.2143189907073975, - "rewards/margins": 52.852012634277344, - "rewards/rejected": -49.6376953125, - "step": 2353 - }, - { - "epoch": 0.89, - "learning_rate": 6.926113704578197e-07, - "logits/chosen": -1.752110481262207, - "logits/rejected": -3.3487026691436768, - "logps/chosen": -310.0791320800781, - "logps/rejected": -411.2723083496094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.618695020675659, - "rewards/margins": 17.966386795043945, - "rewards/rejected": -21.585081100463867, - "step": 2354 - }, - { - "epoch": 0.89, - "learning_rate": 6.88258093590699e-07, - "logits/chosen": -1.511534333229065, - "logits/rejected": -1.6408616304397583, - "logps/chosen": -415.094970703125, - "logps/rejected": -1013.6376953125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3346832990646362, - "rewards/margins": 44.210731506347656, - "rewards/rejected": -42.87604904174805, - "step": 2355 - }, - { - "epoch": 0.89, - "learning_rate": 6.839180531084e-07, - "logits/chosen": -0.02565096504986286, - "logits/rejected": -4.824843406677246, - "logps/chosen": -636.8899536132812, - "logps/rejected": -421.7391357421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.783892810344696, - "rewards/margins": 27.475709915161133, - "rewards/rejected": -26.691816329956055, - "step": 2356 - }, - { - "epoch": 0.89, - "learning_rate": 6.795912551801742e-07, - "logits/chosen": -6.1352410316467285, - "logits/rejected": -1.104781985282898, - "logps/chosen": -315.1269226074219, - "logps/rejected": -1106.72998046875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.8403900265693665, - "rewards/margins": 13.3323392868042, - "rewards/rejected": -14.1727294921875, - "step": 2357 - }, - { - "epoch": 0.89, - "learning_rate": 6.752777059564431e-07, - "logits/chosen": -0.6025702953338623, - "logits/rejected": -3.507747173309326, - "logps/chosen": -352.34478759765625, - "logps/rejected": -561.8743286132812, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.2887329161167145, - "rewards/margins": 33.147987365722656, - "rewards/rejected": -32.8592529296875, - "step": 2358 - }, - { - "epoch": 0.89, - "learning_rate": 6.709774115687983e-07, - "logits/chosen": -0.155378520488739, - "logits/rejected": -2.621126890182495, - "logps/chosen": -256.1181335449219, - "logps/rejected": -414.5680236816406, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.5415893793106079, - "rewards/margins": 23.478471755981445, - "rewards/rejected": -22.93688201904297, - "step": 2359 - }, - { - "epoch": 0.89, - "learning_rate": 6.666903781299927e-07, - "logits/chosen": -1.4430899620056152, - "logits/rejected": -2.569720983505249, - "logps/chosen": -221.693115234375, - "logps/rejected": -613.7940673828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.1997833251953125, - "rewards/margins": 34.83052444458008, - "rewards/rejected": -35.03030776977539, - "step": 2360 - }, - { - "epoch": 0.89, - "learning_rate": 6.624166117339215e-07, - "logits/chosen": -1.5497829914093018, - "logits/rejected": -3.6055405139923096, - "logps/chosen": -575.9683837890625, - "logps/rejected": -728.7695922851562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0286133289337158, - "rewards/margins": 27.938251495361328, - "rewards/rejected": -26.909637451171875, - "step": 2361 - }, - { - "epoch": 0.89, - "learning_rate": 6.581561184556296e-07, - "logits/chosen": -5.362800121307373, - "logits/rejected": -4.473238945007324, - "logps/chosen": -242.24612426757812, - "logps/rejected": -771.7991333007812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.1627655029296875, - "rewards/margins": 30.72566795349121, - "rewards/rejected": -30.562902450561523, - "step": 2362 - }, - { - "epoch": 0.89, - "learning_rate": 6.539089043512914e-07, - "logits/chosen": -1.4857169389724731, - "logits/rejected": -2.5166261196136475, - "logps/chosen": -191.19338989257812, - "logps/rejected": -326.05096435546875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.1352508068084717, - "rewards/margins": 11.05931568145752, - "rewards/rejected": -13.19456672668457, - "step": 2363 - }, - { - "epoch": 0.89, - "learning_rate": 6.496749754582043e-07, - "logits/chosen": -1.0368132591247559, - "logits/rejected": -0.5650222897529602, - "logps/chosen": -421.27099609375, - "logps/rejected": -707.90087890625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.4567506313323975, - "rewards/margins": 18.257762908935547, - "rewards/rejected": -15.801012992858887, - "step": 2364 - }, - { - "epoch": 0.89, - "learning_rate": 6.454543377947786e-07, - "logits/chosen": -1.2518606185913086, - "logits/rejected": -1.746415376663208, - "logps/chosen": -240.85208129882812, - "logps/rejected": -430.487548828125, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9763885736465454, - "rewards/margins": 18.90381622314453, - "rewards/rejected": -17.927427291870117, - "step": 2365 - }, - { - "epoch": 0.89, - "learning_rate": 6.412469973605384e-07, - "logits/chosen": -0.9507202506065369, - "logits/rejected": -0.8237573504447937, - "logps/chosen": -346.4190673828125, - "logps/rejected": -725.628662109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.33473512530326843, - "rewards/margins": 28.578699111938477, - "rewards/rejected": -28.24396324157715, - "step": 2366 - }, - { - "epoch": 0.89, - "learning_rate": 6.370529601360997e-07, - "logits/chosen": -5.218628883361816, - "logits/rejected": 0.1669386625289917, - "logps/chosen": -318.1318054199219, - "logps/rejected": -1862.297607421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9428680539131165, - "rewards/margins": 34.329341888427734, - "rewards/rejected": -33.386474609375, - "step": 2367 - }, - { - "epoch": 0.9, - "learning_rate": 6.328722320831737e-07, - "logits/chosen": -7.551084518432617, - "logits/rejected": -1.9319281578063965, - "logps/chosen": -230.1853485107422, - "logps/rejected": -1747.6917724609375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7764129638671875, - "rewards/margins": 16.217741012573242, - "rewards/rejected": -16.99415397644043, - "step": 2368 - }, - { - "epoch": 0.9, - "learning_rate": 6.287048191445488e-07, - "logits/chosen": -8.177655220031738, - "logits/rejected": -3.2343544960021973, - "logps/chosen": -511.60845947265625, - "logps/rejected": -3275.6689453125, - "loss": 0.0007, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.507794141769409, - "rewards/margins": 78.34873962402344, - "rewards/rejected": -75.8409423828125, - "step": 2369 - }, - { - "epoch": 0.9, - "learning_rate": 6.245507272440843e-07, - "logits/chosen": -2.9112703800201416, - "logits/rejected": -1.35981023311615, - "logps/chosen": -363.807861328125, - "logps/rejected": -1002.1993408203125, - "loss": 0.0005, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0115203857421875, - "rewards/margins": 34.79312515258789, - "rewards/rejected": -33.7816047668457, - "step": 2370 - }, - { - "epoch": 0.9, - "learning_rate": 6.204099622867144e-07, - "logits/chosen": -5.976293087005615, - "logits/rejected": -1.6551592350006104, - "logps/chosen": -360.92742919921875, - "logps/rejected": -1635.67578125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.3519562482833862, - "rewards/margins": 39.93816375732422, - "rewards/rejected": -38.58620834350586, - "step": 2371 - }, - { - "epoch": 0.9, - "learning_rate": 6.162825301584185e-07, - "logits/chosen": -5.2943339347839355, - "logits/rejected": -4.005948066711426, - "logps/chosen": -1317.938232421875, - "logps/rejected": -2194.0302734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.3647706508636475, - "rewards/margins": 16.195751190185547, - "rewards/rejected": -18.560522079467773, - "step": 2372 - }, - { - "epoch": 0.9, - "learning_rate": 6.121684367262271e-07, - "logits/chosen": -0.32377350330352783, - "logits/rejected": -2.2592782974243164, - "logps/chosen": -471.42254638671875, - "logps/rejected": -691.2590942382812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.2192505598068237, - "rewards/margins": 41.96493911743164, - "rewards/rejected": -40.745689392089844, - "step": 2373 - }, - { - "epoch": 0.9, - "learning_rate": 6.08067687838213e-07, - "logits/chosen": -1.782834768295288, - "logits/rejected": -1.7111316919326782, - "logps/chosen": -197.5287628173828, - "logps/rejected": -545.363037109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.6562728881835938, - "rewards/margins": 22.16127586364746, - "rewards/rejected": -23.817548751831055, - "step": 2374 - }, - { - "epoch": 0.9, - "learning_rate": 6.039802893234769e-07, - "logits/chosen": -4.001855850219727, - "logits/rejected": -2.9994966983795166, - "logps/chosen": -141.3135986328125, - "logps/rejected": -792.591796875, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.2429535388946533, - "rewards/margins": 46.45053482055664, - "rewards/rejected": -47.69348907470703, - "step": 2375 - }, - { - "epoch": 0.9, - "learning_rate": 5.999062469921424e-07, - "logits/chosen": -0.4750763475894928, - "logits/rejected": -0.5439879298210144, - "logps/chosen": -385.9372253417969, - "logps/rejected": -1052.488525390625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.435556173324585, - "rewards/margins": 45.75150680541992, - "rewards/rejected": -48.18706130981445, - "step": 2376 - }, - { - "epoch": 0.9, - "learning_rate": 5.958455666353502e-07, - "logits/chosen": -1.7974542379379272, - "logits/rejected": -2.49981689453125, - "logps/chosen": -292.6661376953125, - "logps/rejected": -513.3067016601562, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -1.270025610923767, - "rewards/margins": 21.947975158691406, - "rewards/rejected": -23.218000411987305, - "step": 2377 - }, - { - "epoch": 0.9, - "learning_rate": 5.917982540252442e-07, - "logits/chosen": -8.009286880493164, - "logits/rejected": -1.2646898031234741, - "logps/chosen": -311.32379150390625, - "logps/rejected": -2222.409423828125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.9766845703125, - "rewards/margins": 32.700401306152344, - "rewards/rejected": -31.723718643188477, - "step": 2378 - }, - { - "epoch": 0.9, - "learning_rate": 5.877643149149669e-07, - "logits/chosen": -0.24228468537330627, - "logits/rejected": -0.8576930165290833, - "logps/chosen": -296.6073913574219, - "logps/rejected": -646.5339965820312, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.562243640422821, - "rewards/margins": 28.81206512451172, - "rewards/rejected": -28.249820709228516, - "step": 2379 - }, - { - "epoch": 0.9, - "learning_rate": 5.83743755038656e-07, - "logits/chosen": -7.819255828857422, - "logits/rejected": -1.4826085567474365, - "logps/chosen": -309.3973388671875, - "logps/rejected": -2654.467041015625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.075958251953125, - "rewards/margins": 56.43125534057617, - "rewards/rejected": -56.35529708862305, - "step": 2380 - }, - { - "epoch": 0.9, - "learning_rate": 5.797365801114241e-07, - "logits/chosen": -2.7274816036224365, - "logits/rejected": -5.0622148513793945, - "logps/chosen": -382.4452209472656, - "logps/rejected": -395.6907653808594, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.429467797279358, - "rewards/margins": 26.238643646240234, - "rewards/rejected": -24.809175491333008, - "step": 2381 - }, - { - "epoch": 0.9, - "learning_rate": 5.757427958293582e-07, - "logits/chosen": -1.6177302598953247, - "logits/rejected": -1.8011095523834229, - "logps/chosen": -227.03347778320312, - "logps/rejected": -569.37939453125, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.11138916015625, - "rewards/margins": 28.575626373291016, - "rewards/rejected": -27.464237213134766, - "step": 2382 - }, - { - "epoch": 0.9, - "learning_rate": 5.71762407869515e-07, - "logits/chosen": -0.5988301634788513, - "logits/rejected": -5.8988542556762695, - "logps/chosen": -409.2900085449219, - "logps/rejected": -66.35623931884766, - "loss": 0.0013, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.7571258544921875, - "rewards/margins": 4.557746410369873, - "rewards/rejected": -5.3148722648620605, - "step": 2383 - }, - { - "epoch": 0.9, - "learning_rate": 5.677954218899063e-07, - "logits/chosen": -4.416251182556152, - "logits/rejected": -1.5236676931381226, - "logps/chosen": -490.188232421875, - "logps/rejected": -1212.5308837890625, - "loss": 0.0001, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.6377105712890625, - "rewards/margins": 20.057998657226562, - "rewards/rejected": -18.4202880859375, - "step": 2384 - }, - { - "epoch": 0.9, - "learning_rate": 5.638418435294935e-07, - "logits/chosen": -0.44939079880714417, - "logits/rejected": -6.43556547164917, - "logps/chosen": -291.51055908203125, - "logps/rejected": -274.438232421875, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.800335705280304, - "rewards/margins": 20.22844886779785, - "rewards/rejected": -19.42811393737793, - "step": 2385 - }, - { - "epoch": 0.9, - "learning_rate": 5.59901678408179e-07, - "logits/chosen": -3.6467642784118652, - "logits/rejected": -2.3286783695220947, - "logps/chosen": -399.10357666015625, - "logps/rejected": -527.3917846679688, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": -3.147259473800659, - "rewards/margins": 6.519109725952148, - "rewards/rejected": -9.666369438171387, - "step": 2386 - }, - { - "epoch": 0.9, - "learning_rate": 5.559749321267971e-07, - "logits/chosen": -1.8998973369598389, - "logits/rejected": -0.249521404504776, - "logps/chosen": -258.3542785644531, - "logps/rejected": -611.3404541015625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.0807465314865112, - "rewards/margins": 16.739547729492188, - "rewards/rejected": -15.658801078796387, - "step": 2387 - }, - { - "epoch": 0.9, - "learning_rate": 5.520616102671128e-07, - "logits/chosen": -5.642346382141113, - "logits/rejected": -0.6250077486038208, - "logps/chosen": -320.78814697265625, - "logps/rejected": -1209.3543701171875, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.739471435546875, - "rewards/margins": 12.815253257751465, - "rewards/rejected": -13.55472469329834, - "step": 2388 - }, - { - "epoch": 0.9, - "learning_rate": 5.481617183918053e-07, - "logits/chosen": -7.304417610168457, - "logits/rejected": -2.6942648887634277, - "logps/chosen": -306.61883544921875, - "logps/rejected": -4052.263427734375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.0089690685272217, - "rewards/margins": 39.618125915527344, - "rewards/rejected": -37.60915756225586, - "step": 2389 - }, - { - "epoch": 0.9, - "learning_rate": 5.442752620444602e-07, - "logits/chosen": -1.3176119327545166, - "logits/rejected": -1.6761837005615234, - "logps/chosen": -271.2281799316406, - "logps/rejected": -694.0188598632812, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.7472137808799744, - "rewards/margins": 32.52836227416992, - "rewards/rejected": -31.781147003173828, - "step": 2390 - }, - { - "epoch": 0.9, - "learning_rate": 5.404022467495728e-07, - "logits/chosen": -2.9949557781219482, - "logits/rejected": -1.2606483697891235, - "logps/chosen": -372.7286376953125, - "logps/rejected": -1009.853271484375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -4.2755126953125, - "rewards/margins": 37.3822135925293, - "rewards/rejected": -41.6577262878418, - "step": 2391 - }, - { - "epoch": 0.9, - "learning_rate": 5.36542678012525e-07, - "logits/chosen": -0.34744134545326233, - "logits/rejected": -3.3611700534820557, - "logps/chosen": -209.08482360839844, - "logps/rejected": -375.7100524902344, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": -2.230973958969116, - "rewards/margins": 19.80171012878418, - "rewards/rejected": -22.032684326171875, - "step": 2392 - }, - { - "epoch": 0.9, - "learning_rate": 5.326965613195867e-07, - "logits/chosen": -6.387984275817871, - "logits/rejected": -3.2590746879577637, - "logps/chosen": -588.99169921875, - "logps/rejected": -2586.48681640625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.31593018770217896, - "rewards/margins": 38.8325080871582, - "rewards/rejected": -38.516578674316406, - "step": 2393 - }, - { - "epoch": 0.91, - "learning_rate": 5.288639021379094e-07, - "logits/chosen": -5.345860004425049, - "logits/rejected": -0.3710087835788727, - "logps/chosen": -234.36422729492188, - "logps/rejected": -1458.4166259765625, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.7232636213302612, - "rewards/margins": 44.08561706542969, - "rewards/rejected": -42.36235427856445, - "step": 2394 - }, - { - "epoch": 0.91, - "learning_rate": 5.250447059155117e-07, - "logits/chosen": -3.8447399139404297, - "logits/rejected": -0.7453059554100037, - "logps/chosen": -407.6627502441406, - "logps/rejected": -1042.1722412109375, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.133261203765869, - "rewards/margins": 16.736581802368164, - "rewards/rejected": -14.603320121765137, - "step": 2395 - }, - { - "epoch": 0.91, - "learning_rate": 5.212389780812733e-07, - "logits/chosen": -1.3884098529815674, - "logits/rejected": -3.1272799968719482, - "logps/chosen": -322.47711181640625, - "logps/rejected": -604.1803588867188, - "loss": 0.0004, - "rewards/accuracies": 1.0, - "rewards/chosen": 3.4380249977111816, - "rewards/margins": 24.488910675048828, - "rewards/rejected": -21.050886154174805, - "step": 2396 - }, - { - "epoch": 0.91, - "learning_rate": 5.174467240449366e-07, - "logits/chosen": 0.15594661235809326, - "logits/rejected": -3.059762477874756, - "logps/chosen": -520.428466796875, - "logps/rejected": -479.2771911621094, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.4026001691818237, - "rewards/margins": 23.874170303344727, - "rewards/rejected": -22.47157096862793, - "step": 2397 - }, - { - "epoch": 0.91, - "learning_rate": 5.136679491970809e-07, - "logits/chosen": -2.7048544883728027, - "logits/rejected": -0.3631521165370941, - "logps/chosen": -275.9468994140625, - "logps/rejected": -692.6514892578125, - "loss": 0.0014, - "rewards/accuracies": 1.0, - "rewards/chosen": 0.40758973360061646, - "rewards/margins": 19.88968849182129, - "rewards/rejected": -19.482099533081055, - "step": 2398 - }, - { - "epoch": 0.91, - "learning_rate": 5.099026589091338e-07, - "logits/chosen": 0.0013115613255649805, - "logits/rejected": -2.8942501544952393, - "logps/chosen": -254.48011779785156, - "logps/rejected": -630.3706665039062, - "loss": 0.0, - "rewards/accuracies": 1.0, - "rewards/chosen": 2.2492752075195312, - "rewards/margins": 38.910072326660156, - "rewards/rejected": -36.660797119140625, - "step": 2399 - }, - { - "epoch": 0.91, - "learning_rate": 5.06150858533353e-07, - "logits/chosen": -2.319995403289795, - "logits/rejected": -2.219836473464966, - "logps/chosen": -425.2501220703125, - "logps/rejected": -750.6461181640625, - "loss": 0.0002, - "rewards/accuracies": 1.0, - "rewards/chosen": 1.527716040611267, - "rewards/margins": 23.081165313720703, - "rewards/rejected": -21.553449630737305, - "step": 2400 - } - ], - "logging_steps": 1, - "max_steps": 2645, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "total_flos": 0.0, - "train_batch_size": 1, - "trial_name": null, - "trial_params": null -}