|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005234231876472127, |
|
"grad_norm": 7271.4244777785725, |
|
"learning_rate": 2.617801047120419e-09, |
|
"logits/chosen": 5870.685546875, |
|
"logits/rejected": 4942.87255859375, |
|
"logps/chosen": -300.06866455078125, |
|
"logps/rejected": -172.3806915283203, |
|
"loss": 502.7921, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 8304.038811792496, |
|
"learning_rate": 2.6178010471204188e-08, |
|
"logits/chosen": 4513.291015625, |
|
"logits/rejected": 4184.9970703125, |
|
"logps/chosen": -237.91387939453125, |
|
"logps/rejected": -218.99322509765625, |
|
"loss": 517.5651, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 0.00014268612721934915, |
|
"rewards/margins": 7.825787179172039e-06, |
|
"rewards/rejected": 0.00013486042735166848, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 7349.301686542035, |
|
"learning_rate": 5.2356020942408376e-08, |
|
"logits/chosen": 6489.048828125, |
|
"logits/rejected": 5857.85986328125, |
|
"logps/chosen": -313.1256408691406, |
|
"logps/rejected": -286.7991027832031, |
|
"loss": 576.4106, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.005085395649075508, |
|
"rewards/margins": 0.001057310844771564, |
|
"rewards/rejected": 0.004028084687888622, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 6070.326194091692, |
|
"learning_rate": 7.853403141361257e-08, |
|
"logits/chosen": 6126.0244140625, |
|
"logits/rejected": 4615.61572265625, |
|
"logps/chosen": -283.33941650390625, |
|
"logps/rejected": -226.2187957763672, |
|
"loss": 536.6951, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.03882580250501633, |
|
"rewards/margins": 0.0007929968414828181, |
|
"rewards/rejected": 0.038032807409763336, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 4027.252620020275, |
|
"learning_rate": 1.0471204188481675e-07, |
|
"logits/chosen": 6232.24609375, |
|
"logits/rejected": 5138.41943359375, |
|
"logps/chosen": -303.6441345214844, |
|
"logps/rejected": -273.7585144042969, |
|
"loss": 521.8768, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.10832098871469498, |
|
"rewards/margins": 0.001040009781718254, |
|
"rewards/rejected": 0.10728099197149277, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 3273.4166142677173, |
|
"learning_rate": 1.3089005235602092e-07, |
|
"logits/chosen": 5814.5888671875, |
|
"logits/rejected": 4966.0185546875, |
|
"logps/chosen": -264.1505126953125, |
|
"logps/rejected": -246.9972381591797, |
|
"loss": 504.7053, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.1410144865512848, |
|
"rewards/margins": 0.008737658150494099, |
|
"rewards/rejected": 0.13227683305740356, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 3405.5858728041726, |
|
"learning_rate": 1.5706806282722514e-07, |
|
"logits/chosen": 5904.3173828125, |
|
"logits/rejected": 4385.4423828125, |
|
"logps/chosen": -305.66455078125, |
|
"logps/rejected": -220.8424072265625, |
|
"loss": 504.0667, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.162828266620636, |
|
"rewards/margins": 0.004079371690750122, |
|
"rewards/rejected": 0.15874889492988586, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 3202.908216557707, |
|
"learning_rate": 1.8324607329842932e-07, |
|
"logits/chosen": 5761.95166015625, |
|
"logits/rejected": 5009.62744140625, |
|
"logps/chosen": -268.76776123046875, |
|
"logps/rejected": -242.82901000976562, |
|
"loss": 483.6057, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.1770472377538681, |
|
"rewards/margins": 0.009785796515643597, |
|
"rewards/rejected": 0.16726145148277283, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 3087.5358270471534, |
|
"learning_rate": 2.094240837696335e-07, |
|
"logits/chosen": 5641.09423828125, |
|
"logits/rejected": 4720.921875, |
|
"logps/chosen": -255.1355438232422, |
|
"logps/rejected": -223.3970947265625, |
|
"loss": 520.935, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.20566542446613312, |
|
"rewards/margins": 0.015274032950401306, |
|
"rewards/rejected": 0.1903913915157318, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 3166.52121678684, |
|
"learning_rate": 2.356020942408377e-07, |
|
"logits/chosen": 5928.62060546875, |
|
"logits/rejected": 5229.50830078125, |
|
"logps/chosen": -273.92181396484375, |
|
"logps/rejected": -257.62664794921875, |
|
"loss": 504.3293, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.2296813279390335, |
|
"rewards/margins": 0.005136436782777309, |
|
"rewards/rejected": 0.22454488277435303, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 2820.6105507964335, |
|
"learning_rate": 2.6178010471204185e-07, |
|
"logits/chosen": 5289.11279296875, |
|
"logits/rejected": 4693.4892578125, |
|
"logps/chosen": -223.42025756835938, |
|
"logps/rejected": -191.41561889648438, |
|
"loss": 462.7118, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2503862977027893, |
|
"rewards/margins": 0.040689971297979355, |
|
"rewards/rejected": 0.20969633758068085, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 2957.007752432132, |
|
"learning_rate": 2.879581151832461e-07, |
|
"logits/chosen": 4805.5546875, |
|
"logits/rejected": 3832.24560546875, |
|
"logps/chosen": -226.375732421875, |
|
"logps/rejected": -162.94354248046875, |
|
"loss": 468.0938, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.2314574271440506, |
|
"rewards/margins": 0.005531529895961285, |
|
"rewards/rejected": 0.22592587769031525, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 2665.4354032186566, |
|
"learning_rate": 3.1413612565445027e-07, |
|
"logits/chosen": 5966.80615234375, |
|
"logits/rejected": 5375.4677734375, |
|
"logps/chosen": -266.8603210449219, |
|
"logps/rejected": -251.39871215820312, |
|
"loss": 478.3712, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.27219146490097046, |
|
"rewards/margins": 0.014115167781710625, |
|
"rewards/rejected": 0.2580762803554535, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 2859.728706765586, |
|
"learning_rate": 3.4031413612565446e-07, |
|
"logits/chosen": 5990.4404296875, |
|
"logits/rejected": 4250.15087890625, |
|
"logps/chosen": -246.6409149169922, |
|
"logps/rejected": -190.3795928955078, |
|
"loss": 466.5569, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3171687722206116, |
|
"rewards/margins": 0.03780997544527054, |
|
"rewards/rejected": 0.27935880422592163, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 2930.1339861107, |
|
"learning_rate": 3.6649214659685864e-07, |
|
"logits/chosen": 5721.59375, |
|
"logits/rejected": 5571.9296875, |
|
"logps/chosen": -255.01687622070312, |
|
"logps/rejected": -260.22662353515625, |
|
"loss": 485.1082, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.27502936124801636, |
|
"rewards/margins": -0.03979866951704025, |
|
"rewards/rejected": 0.3148280382156372, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 3059.9679230109905, |
|
"learning_rate": 3.926701570680628e-07, |
|
"logits/chosen": 5372.61669921875, |
|
"logits/rejected": 4874.53759765625, |
|
"logps/chosen": -255.142333984375, |
|
"logps/rejected": -235.846435546875, |
|
"loss": 507.7281, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.29862216114997864, |
|
"rewards/margins": 0.015394523739814758, |
|
"rewards/rejected": 0.2832276523113251, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 2926.3725797738743, |
|
"learning_rate": 4.18848167539267e-07, |
|
"logits/chosen": 5306.0595703125, |
|
"logits/rejected": 4798.4033203125, |
|
"logps/chosen": -218.11911010742188, |
|
"logps/rejected": -226.7350616455078, |
|
"loss": 462.329, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.31827059388160706, |
|
"rewards/margins": -0.01059560663998127, |
|
"rewards/rejected": 0.32886621356010437, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 2736.45715870512, |
|
"learning_rate": 4.450261780104712e-07, |
|
"logits/chosen": 6529.0009765625, |
|
"logits/rejected": 5290.5166015625, |
|
"logps/chosen": -264.6250915527344, |
|
"logps/rejected": -233.53598022460938, |
|
"loss": 479.4374, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.2924504578113556, |
|
"rewards/margins": -0.042323701083660126, |
|
"rewards/rejected": 0.3347741663455963, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 2984.3085207454747, |
|
"learning_rate": 4.712041884816754e-07, |
|
"logits/chosen": 5866.4013671875, |
|
"logits/rejected": 4263.2841796875, |
|
"logps/chosen": -253.30673217773438, |
|
"logps/rejected": -205.41256713867188, |
|
"loss": 500.1966, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3619328439235687, |
|
"rewards/margins": 0.07482115924358368, |
|
"rewards/rejected": 0.28711163997650146, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 2413.8772161958623, |
|
"learning_rate": 4.973821989528796e-07, |
|
"logits/chosen": 5497.056640625, |
|
"logits/rejected": 5329.40380859375, |
|
"logps/chosen": -232.9961700439453, |
|
"logps/rejected": -253.7998809814453, |
|
"loss": 460.8556, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.3027707040309906, |
|
"rewards/margins": -0.005348903127014637, |
|
"rewards/rejected": 0.30811959505081177, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 3189.2111842360887, |
|
"learning_rate": 4.999661831436498e-07, |
|
"logits/chosen": 5501.1474609375, |
|
"logits/rejected": 5489.7998046875, |
|
"logps/chosen": -236.9458465576172, |
|
"logps/rejected": -251.3883819580078, |
|
"loss": 475.5444, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.30881690979003906, |
|
"rewards/margins": -0.020580019801855087, |
|
"rewards/rejected": 0.32939693331718445, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 2699.749049929128, |
|
"learning_rate": 4.998492971140339e-07, |
|
"logits/chosen": 5460.18603515625, |
|
"logits/rejected": 5445.2724609375, |
|
"logps/chosen": -231.32339477539062, |
|
"logps/rejected": -259.4305114746094, |
|
"loss": 472.6169, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.32528847455978394, |
|
"rewards/margins": -0.03840009123086929, |
|
"rewards/rejected": 0.363688588142395, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 2726.868782872802, |
|
"learning_rate": 4.996489634487865e-07, |
|
"logits/chosen": 5533.435546875, |
|
"logits/rejected": 4800.142578125, |
|
"logps/chosen": -251.1014404296875, |
|
"logps/rejected": -216.500244140625, |
|
"loss": 442.7733, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3984920084476471, |
|
"rewards/margins": 0.058864910155534744, |
|
"rewards/rejected": 0.33962708711624146, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 2717.1705033857043, |
|
"learning_rate": 4.993652490577246e-07, |
|
"logits/chosen": 5993.1884765625, |
|
"logits/rejected": 4839.08203125, |
|
"logps/chosen": -249.40988159179688, |
|
"logps/rejected": -219.46878051757812, |
|
"loss": 462.8714, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.32329297065734863, |
|
"rewards/margins": -0.012330831959843636, |
|
"rewards/rejected": 0.3356238007545471, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 2891.5856762486524, |
|
"learning_rate": 4.9899824869915e-07, |
|
"logits/chosen": 5396.11376953125, |
|
"logits/rejected": 4066.21875, |
|
"logps/chosen": -242.0372314453125, |
|
"logps/rejected": -174.00357055664062, |
|
"loss": 448.6322, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.3511057496070862, |
|
"rewards/margins": 0.024918105453252792, |
|
"rewards/rejected": 0.3261876404285431, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 2977.7608325122183, |
|
"learning_rate": 4.985480849482012e-07, |
|
"logits/chosen": 5365.56591796875, |
|
"logits/rejected": 5459.0966796875, |
|
"logps/chosen": -223.53158569335938, |
|
"logps/rejected": -243.6824951171875, |
|
"loss": 470.1929, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3795216679573059, |
|
"rewards/margins": 0.03881485387682915, |
|
"rewards/rejected": 0.34070685505867004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 2844.886121573562, |
|
"learning_rate": 4.980149081559142e-07, |
|
"logits/chosen": 5969.5849609375, |
|
"logits/rejected": 5703.658203125, |
|
"logps/chosen": -279.28240966796875, |
|
"logps/rejected": -260.6078186035156, |
|
"loss": 453.171, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.32865092158317566, |
|
"rewards/margins": -0.03916555643081665, |
|
"rewards/rejected": 0.3678165078163147, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 2829.945413913836, |
|
"learning_rate": 4.973988963990065e-07, |
|
"logits/chosen": 4869.52001953125, |
|
"logits/rejected": 4211.67724609375, |
|
"logps/chosen": -224.86849975585938, |
|
"logps/rejected": -217.9200439453125, |
|
"loss": 457.8975, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.32791829109191895, |
|
"rewards/margins": 0.021049553528428078, |
|
"rewards/rejected": 0.3068687319755554, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 2907.0916066466657, |
|
"learning_rate": 4.967002554204008e-07, |
|
"logits/chosen": 5243.541015625, |
|
"logits/rejected": 4437.04345703125, |
|
"logps/chosen": -247.62939453125, |
|
"logps/rejected": -211.838134765625, |
|
"loss": 450.3028, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.3431067168712616, |
|
"rewards/margins": 0.02685241959989071, |
|
"rewards/rejected": 0.31625431776046753, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 2882.190469859016, |
|
"learning_rate": 4.959192185605087e-07, |
|
"logits/chosen": 5457.43994140625, |
|
"logits/rejected": 4840.53564453125, |
|
"logps/chosen": -238.77737426757812, |
|
"logps/rejected": -243.97128295898438, |
|
"loss": 468.4004, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.36954444646835327, |
|
"rewards/margins": 0.02929796651005745, |
|
"rewards/rejected": 0.3402464985847473, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 3009.2833395331872, |
|
"learning_rate": 4.950560466792969e-07, |
|
"logits/chosen": 6089.1845703125, |
|
"logits/rejected": 4922.9873046875, |
|
"logps/chosen": -288.011962890625, |
|
"logps/rejected": -253.79428100585938, |
|
"loss": 496.6538, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.41366416215896606, |
|
"rewards/margins": 0.09003490954637527, |
|
"rewards/rejected": 0.323629230260849, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 2429.430639056721, |
|
"learning_rate": 4.941110280691619e-07, |
|
"logits/chosen": 5420.87890625, |
|
"logits/rejected": 4362.85302734375, |
|
"logps/chosen": -246.7043914794922, |
|
"logps/rejected": -184.77566528320312, |
|
"loss": 450.8828, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3455493152141571, |
|
"rewards/margins": -0.0095530916005373, |
|
"rewards/rejected": 0.3551023602485657, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 2753.359736527446, |
|
"learning_rate": 4.930844783586424e-07, |
|
"logits/chosen": 4784.5322265625, |
|
"logits/rejected": 4547.6220703125, |
|
"logps/chosen": -184.01431274414062, |
|
"logps/rejected": -193.05662536621094, |
|
"loss": 415.4406, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.3396381139755249, |
|
"rewards/margins": -0.008304371498525143, |
|
"rewards/rejected": 0.3479425311088562, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 2852.3716536653224, |
|
"learning_rate": 4.919767404070033e-07, |
|
"logits/chosen": 5849.3818359375, |
|
"logits/rejected": 4828.42822265625, |
|
"logps/chosen": -246.0577850341797, |
|
"logps/rejected": -209.41921997070312, |
|
"loss": 449.1766, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4010588228702545, |
|
"rewards/margins": 0.042839743196964264, |
|
"rewards/rejected": 0.35821908712387085, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 2456.2913708681403, |
|
"learning_rate": 4.907881841897216e-07, |
|
"logits/chosen": 5143.18994140625, |
|
"logits/rejected": 5371.2216796875, |
|
"logps/chosen": -232.52963256835938, |
|
"logps/rejected": -257.6844787597656, |
|
"loss": 454.2092, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 0.35724347829818726, |
|
"rewards/margins": -0.11005325615406036, |
|
"rewards/rejected": 0.4672967493534088, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 2678.401269841126, |
|
"learning_rate": 4.895192066749189e-07, |
|
"logits/chosen": 5613.0537109375, |
|
"logits/rejected": 4274.7685546875, |
|
"logps/chosen": -235.586669921875, |
|
"logps/rejected": -212.4261474609375, |
|
"loss": 472.6165, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4322528839111328, |
|
"rewards/margins": 0.11002279818058014, |
|
"rewards/rejected": 0.32223010063171387, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 2532.990455889227, |
|
"learning_rate": 4.881702316907768e-07, |
|
"logits/chosen": 5681.9755859375, |
|
"logits/rejected": 4378.25390625, |
|
"logps/chosen": -240.4472198486328, |
|
"logps/rejected": -192.0093536376953, |
|
"loss": 458.877, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.371535062789917, |
|
"rewards/margins": 0.03572763875126839, |
|
"rewards/rejected": 0.3358073830604553, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 2561.1977986473953, |
|
"learning_rate": 4.86741709783982e-07, |
|
"logits/chosen": 5142.1611328125, |
|
"logits/rejected": 4391.021484375, |
|
"logps/chosen": -221.49853515625, |
|
"logps/rejected": -208.3010711669922, |
|
"loss": 418.6793, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.36740997433662415, |
|
"rewards/margins": 0.024009237065911293, |
|
"rewards/rejected": 0.343400776386261, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 3249.3946848792525, |
|
"learning_rate": 4.85234118069247e-07, |
|
"logits/chosen": 5851.6572265625, |
|
"logits/rejected": 5273.86572265625, |
|
"logps/chosen": -258.82733154296875, |
|
"logps/rejected": -225.1135711669922, |
|
"loss": 476.0678, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.3698350489139557, |
|
"rewards/margins": -0.05597928166389465, |
|
"rewards/rejected": 0.42581433057785034, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 6104.987197861906, |
|
"learning_rate": 4.836479600699578e-07, |
|
"logits/chosen": 5440.23828125, |
|
"logits/rejected": 5176.56787109375, |
|
"logps/chosen": -227.13626098632812, |
|
"logps/rejected": -237.4613800048828, |
|
"loss": 457.8458, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.4158262312412262, |
|
"rewards/margins": -0.06199340894818306, |
|
"rewards/rejected": 0.47781962156295776, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 2913.855746159282, |
|
"learning_rate": 4.819837655500013e-07, |
|
"logits/chosen": 6080.09521484375, |
|
"logits/rejected": 6008.8671875, |
|
"logps/chosen": -255.96240234375, |
|
"logps/rejected": -260.66082763671875, |
|
"loss": 463.2553, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.4074326157569885, |
|
"rewards/margins": -0.010780897922813892, |
|
"rewards/rejected": 0.41821345686912537, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 2757.6002263120413, |
|
"learning_rate": 4.802420903368285e-07, |
|
"logits/chosen": 5520.74951171875, |
|
"logits/rejected": 4620.50830078125, |
|
"logps/chosen": -209.3979949951172, |
|
"logps/rejected": -202.67066955566406, |
|
"loss": 401.3251, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.3946303129196167, |
|
"rewards/margins": 0.021963870152831078, |
|
"rewards/rejected": 0.37266644835472107, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 2886.017303383734, |
|
"learning_rate": 4.784235161358123e-07, |
|
"logits/chosen": 6241.6337890625, |
|
"logits/rejected": 4859.7138671875, |
|
"logps/chosen": -258.38421630859375, |
|
"logps/rejected": -226.57174682617188, |
|
"loss": 474.339, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.3764682412147522, |
|
"rewards/margins": -0.0009525719797238708, |
|
"rewards/rejected": 0.3774208426475525, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 2694.1239844205243, |
|
"learning_rate": 4.7652865033596314e-07, |
|
"logits/chosen": 5915.04052734375, |
|
"logits/rejected": 4868.78125, |
|
"logps/chosen": -237.8837890625, |
|
"logps/rejected": -239.0323944091797, |
|
"loss": 479.7707, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.39550644159317017, |
|
"rewards/margins": 0.05114778131246567, |
|
"rewards/rejected": 0.3443586528301239, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 2638.7505881805687, |
|
"learning_rate": 4.7455812580706534e-07, |
|
"logits/chosen": 5430.63525390625, |
|
"logits/rejected": 4377.5166015625, |
|
"logps/chosen": -228.3394012451172, |
|
"logps/rejected": -218.4453125, |
|
"loss": 477.3207, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4782446026802063, |
|
"rewards/margins": 0.08502718061208725, |
|
"rewards/rejected": 0.39321738481521606, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 2560.4922150003467, |
|
"learning_rate": 4.725126006883046e-07, |
|
"logits/chosen": 5066.28369140625, |
|
"logits/rejected": 4950.4150390625, |
|
"logps/chosen": -223.5402069091797, |
|
"logps/rejected": -229.3683624267578, |
|
"loss": 475.7734, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.37004831433296204, |
|
"rewards/margins": -0.06600765883922577, |
|
"rewards/rejected": 0.4360559582710266, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 2834.5464453746413, |
|
"learning_rate": 4.703927581684539e-07, |
|
"logits/chosen": 5400.9306640625, |
|
"logits/rejected": 5344.81640625, |
|
"logps/chosen": -234.24026489257812, |
|
"logps/rejected": -206.53564453125, |
|
"loss": 451.783, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.3872218132019043, |
|
"rewards/margins": -0.023321902379393578, |
|
"rewards/rejected": 0.4105437397956848, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 2984.6307853878493, |
|
"learning_rate": 4.68199306257695e-07, |
|
"logits/chosen": 5166.0556640625, |
|
"logits/rejected": 4168.626953125, |
|
"logps/chosen": -228.40029907226562, |
|
"logps/rejected": -217.7026824951172, |
|
"loss": 436.8623, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.3921186327934265, |
|
"rewards/margins": 0.03906525298953056, |
|
"rewards/rejected": 0.35305342078208923, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 2709.863344047286, |
|
"learning_rate": 4.6593297755114776e-07, |
|
"logits/chosen": 5889.123046875, |
|
"logits/rejected": 5564.8466796875, |
|
"logps/chosen": -224.7196807861328, |
|
"logps/rejected": -254.2318572998047, |
|
"loss": 500.472, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4287680983543396, |
|
"rewards/margins": 0.03177551180124283, |
|
"rewards/rejected": 0.39699262380599976, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 2812.365883993442, |
|
"learning_rate": 4.635945289841902e-07, |
|
"logits/chosen": 4608.798828125, |
|
"logits/rejected": 4657.5869140625, |
|
"logps/chosen": -183.73062133789062, |
|
"logps/rejected": -228.1973876953125, |
|
"loss": 436.1543, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.3860538601875305, |
|
"rewards/margins": 0.007812491152435541, |
|
"rewards/rejected": 0.3782413601875305, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 2964.3249647637385, |
|
"learning_rate": 4.611847415796476e-07, |
|
"logits/chosen": 5814.58544921875, |
|
"logits/rejected": 4944.06103515625, |
|
"logps/chosen": -253.06246948242188, |
|
"logps/rejected": -216.42373657226562, |
|
"loss": 449.1805, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.37933146953582764, |
|
"rewards/margins": -0.004121728241443634, |
|
"rewards/rejected": 0.38345322012901306, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 2468.7689710136347, |
|
"learning_rate": 4.5870442018693773e-07, |
|
"logits/chosen": 5567.94140625, |
|
"logits/rejected": 5069.24072265625, |
|
"logps/chosen": -238.51309204101562, |
|
"logps/rejected": -237.9658203125, |
|
"loss": 468.6761, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.39677414298057556, |
|
"rewards/margins": -0.018477313220500946, |
|
"rewards/rejected": 0.4152514338493347, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 3135.9866296472474, |
|
"learning_rate": 4.5615439321325735e-07, |
|
"logits/chosen": 5763.81103515625, |
|
"logits/rejected": 4699.95361328125, |
|
"logps/chosen": -245.3778076171875, |
|
"logps/rejected": -236.6832733154297, |
|
"loss": 485.4475, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3716925084590912, |
|
"rewards/margins": -0.019004570320248604, |
|
"rewards/rejected": 0.39069709181785583, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 2697.1963242488955, |
|
"learning_rate": 4.535355123469008e-07, |
|
"logits/chosen": 5287.62890625, |
|
"logits/rejected": 4875.68505859375, |
|
"logps/chosen": -229.9176788330078, |
|
"logps/rejected": -223.1514892578125, |
|
"loss": 472.4326, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3853301405906677, |
|
"rewards/margins": 0.00580341462045908, |
|
"rewards/rejected": 0.37952667474746704, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 3065.9865354495937, |
|
"learning_rate": 4.5084865227280366e-07, |
|
"logits/chosen": 5233.0517578125, |
|
"logits/rejected": 4762.06787109375, |
|
"logps/chosen": -248.7694549560547, |
|
"logps/rejected": -221.34976196289062, |
|
"loss": 494.6735, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.3382965922355652, |
|
"rewards/margins": -0.02283622696995735, |
|
"rewards/rejected": 0.36113283038139343, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 2581.098101318781, |
|
"learning_rate": 4.4809471038040437e-07, |
|
"logits/chosen": 5172.52978515625, |
|
"logits/rejected": 4110.0751953125, |
|
"logps/chosen": -249.93325805664062, |
|
"logps/rejected": -195.62350463867188, |
|
"loss": 445.9224, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.42613086104393005, |
|
"rewards/margins": 0.030070941895246506, |
|
"rewards/rejected": 0.39605993032455444, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 2935.758261662807, |
|
"learning_rate": 4.4527460646392386e-07, |
|
"logits/chosen": 5275.1181640625, |
|
"logits/rejected": 4908.9013671875, |
|
"logps/chosen": -204.8629150390625, |
|
"logps/rejected": -213.5486297607422, |
|
"loss": 464.7498, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3916018009185791, |
|
"rewards/margins": 0.01974121853709221, |
|
"rewards/rejected": 0.3718605637550354, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 2317.2183869330697, |
|
"learning_rate": 4.4238928241516163e-07, |
|
"logits/chosen": 6298.4970703125, |
|
"logits/rejected": 4828.0947265625, |
|
"logps/chosen": -271.87530517578125, |
|
"logps/rejected": -210.5924835205078, |
|
"loss": 444.7044, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4335819184780121, |
|
"rewards/margins": 0.005383139010518789, |
|
"rewards/rejected": 0.42819881439208984, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 2476.766944172286, |
|
"learning_rate": 4.394397019089116e-07, |
|
"logits/chosen": 5619.259765625, |
|
"logits/rejected": 4530.4365234375, |
|
"logps/chosen": -253.644287109375, |
|
"logps/rejected": -210.2486572265625, |
|
"loss": 460.2462, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3618123531341553, |
|
"rewards/margins": 0.0577847845852375, |
|
"rewards/rejected": 0.3040275275707245, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 2592.791022013939, |
|
"learning_rate": 4.3642685008110246e-07, |
|
"logits/chosen": 5317.41064453125, |
|
"logits/rejected": 4190.921875, |
|
"logps/chosen": -215.50991821289062, |
|
"logps/rejected": -187.17164611816406, |
|
"loss": 413.2921, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.38392287492752075, |
|
"rewards/margins": -0.009419135749340057, |
|
"rewards/rejected": 0.3933420181274414, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 2611.3201020461893, |
|
"learning_rate": 4.333517331997704e-07, |
|
"logits/chosen": 5873.51220703125, |
|
"logits/rejected": 5519.97119140625, |
|
"logps/chosen": -261.2032470703125, |
|
"logps/rejected": -246.95849609375, |
|
"loss": 476.4223, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.3629549741744995, |
|
"rewards/margins": -0.021394124254584312, |
|
"rewards/rejected": 0.3843490779399872, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 2757.807768530615, |
|
"learning_rate": 4.302153783289736e-07, |
|
"logits/chosen": 5596.8837890625, |
|
"logits/rejected": 4788.099609375, |
|
"logps/chosen": -225.3079071044922, |
|
"logps/rejected": -241.4241180419922, |
|
"loss": 469.7494, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4231595993041992, |
|
"rewards/margins": -0.00441837077960372, |
|
"rewards/rejected": 0.42757803201675415, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 3105.8012818905454, |
|
"learning_rate": 4.2701883298576124e-07, |
|
"logits/chosen": 5359.2724609375, |
|
"logits/rejected": 5001.88427734375, |
|
"logps/chosen": -240.8025360107422, |
|
"logps/rejected": -207.4171142578125, |
|
"loss": 475.3348, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.40704458951950073, |
|
"rewards/margins": 0.016931820660829544, |
|
"rewards/rejected": 0.3901127278804779, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 2907.7644335294735, |
|
"learning_rate": 4.237631647903115e-07, |
|
"logits/chosen": 5390.2080078125, |
|
"logits/rejected": 4478.12353515625, |
|
"logps/chosen": -223.9457244873047, |
|
"logps/rejected": -204.24703979492188, |
|
"loss": 459.8224, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.5224254727363586, |
|
"rewards/margins": 0.09427186101675034, |
|
"rewards/rejected": 0.4281536042690277, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 2838.8774079471536, |
|
"learning_rate": 4.204494611093548e-07, |
|
"logits/chosen": 5652.3779296875, |
|
"logits/rejected": 4024.271484375, |
|
"logps/chosen": -262.69659423828125, |
|
"logps/rejected": -210.9382781982422, |
|
"loss": 457.168, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.5036084651947021, |
|
"rewards/margins": 0.1345875859260559, |
|
"rewards/rejected": 0.36902087926864624, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 2841.283206358593, |
|
"learning_rate": 4.1707882869300235e-07, |
|
"logits/chosen": 5601.1298828125, |
|
"logits/rejected": 4618.71484375, |
|
"logps/chosen": -243.36392211914062, |
|
"logps/rejected": -188.28256225585938, |
|
"loss": 473.2414, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.37009182572364807, |
|
"rewards/margins": 0.020084170624613762, |
|
"rewards/rejected": 0.3500076234340668, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 2990.557256988464, |
|
"learning_rate": 4.136523933051005e-07, |
|
"logits/chosen": 5809.75, |
|
"logits/rejected": 5187.5576171875, |
|
"logps/chosen": -242.94424438476562, |
|
"logps/rejected": -218.1668701171875, |
|
"loss": 430.7427, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.3632182478904724, |
|
"rewards/margins": -0.019726304337382317, |
|
"rewards/rejected": 0.3829445242881775, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 3030.6632095062805, |
|
"learning_rate": 4.101712993472348e-07, |
|
"logits/chosen": 5965.10986328125, |
|
"logits/rejected": 5161.24755859375, |
|
"logps/chosen": -239.2626190185547, |
|
"logps/rejected": -198.73562622070312, |
|
"loss": 445.511, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.375115305185318, |
|
"rewards/margins": 0.006631316151469946, |
|
"rewards/rejected": 0.36848393082618713, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 2767.786914589451, |
|
"learning_rate": 4.066367094765091e-07, |
|
"logits/chosen": 5560.0693359375, |
|
"logits/rejected": 4629.181640625, |
|
"logps/chosen": -257.6699523925781, |
|
"logps/rejected": -211.1696319580078, |
|
"loss": 458.9728, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.35179638862609863, |
|
"rewards/margins": -0.009023250080645084, |
|
"rewards/rejected": 0.36081960797309875, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 3018.8316167165076, |
|
"learning_rate": 4.0304980421722766e-07, |
|
"logits/chosen": 5494.3037109375, |
|
"logits/rejected": 5067.5087890625, |
|
"logps/chosen": -251.9346923828125, |
|
"logps/rejected": -231.23477172851562, |
|
"loss": 483.0792, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.39491140842437744, |
|
"rewards/margins": -0.004837697837501764, |
|
"rewards/rejected": 0.39974913001060486, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 2804.3368058052815, |
|
"learning_rate": 3.994117815666095e-07, |
|
"logits/chosen": 5581.04638671875, |
|
"logits/rejected": 4282.5869140625, |
|
"logps/chosen": -290.048095703125, |
|
"logps/rejected": -219.49282836914062, |
|
"loss": 487.5479, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4267527461051941, |
|
"rewards/margins": 0.01361342053860426, |
|
"rewards/rejected": 0.41313934326171875, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 2463.6680284285494, |
|
"learning_rate": 3.957238565946671e-07, |
|
"logits/chosen": 5351.44384765625, |
|
"logits/rejected": 4493.7109375, |
|
"logps/chosen": -217.0452880859375, |
|
"logps/rejected": -190.09336853027344, |
|
"loss": 434.9337, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.3848896622657776, |
|
"rewards/margins": -0.02217109315097332, |
|
"rewards/rejected": 0.4070607125759125, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 2789.400152363563, |
|
"learning_rate": 3.9198726103838306e-07, |
|
"logits/chosen": 5270.38037109375, |
|
"logits/rejected": 4837.1796875, |
|
"logps/chosen": -230.885986328125, |
|
"logps/rejected": -195.58602905273438, |
|
"loss": 426.7396, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.3861168324947357, |
|
"rewards/margins": -0.029095202684402466, |
|
"rewards/rejected": 0.41521206498146057, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 2579.476417492773, |
|
"learning_rate": 3.8820324289031946e-07, |
|
"logits/chosen": 5369.2978515625, |
|
"logits/rejected": 4761.36328125, |
|
"logps/chosen": -204.5257568359375, |
|
"logps/rejected": -204.974853515625, |
|
"loss": 453.2719, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.3813869059085846, |
|
"rewards/margins": 0.00393150607123971, |
|
"rewards/rejected": 0.37745538353919983, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 2589.569755576345, |
|
"learning_rate": 3.84373065981799e-07, |
|
"logits/chosen": 6128.9091796875, |
|
"logits/rejected": 4719.29931640625, |
|
"logps/chosen": -250.7108154296875, |
|
"logps/rejected": -220.9887237548828, |
|
"loss": 459.4358, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.42599329352378845, |
|
"rewards/margins": -0.052743494510650635, |
|
"rewards/rejected": 0.4787367880344391, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 2833.9492734543496, |
|
"learning_rate": 3.8049800956079545e-07, |
|
"logits/chosen": 5706.271484375, |
|
"logits/rejected": 5021.3134765625, |
|
"logps/chosen": -249.65283203125, |
|
"logps/rejected": -209.9662322998047, |
|
"loss": 474.0779, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.4112626016139984, |
|
"rewards/margins": 0.01550484262406826, |
|
"rewards/rejected": 0.3957577347755432, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 2832.6107588966483, |
|
"learning_rate": 3.7657936786467525e-07, |
|
"logits/chosen": 4986.99462890625, |
|
"logits/rejected": 4256.84765625, |
|
"logps/chosen": -212.7075958251953, |
|
"logps/rejected": -192.9168701171875, |
|
"loss": 433.4753, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.39544767141342163, |
|
"rewards/margins": 0.020534038543701172, |
|
"rewards/rejected": 0.37491363286972046, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 2774.458368416743, |
|
"learning_rate": 3.7261844968793226e-07, |
|
"logits/chosen": 4237.71337890625, |
|
"logits/rejected": 4375.86767578125, |
|
"logps/chosen": -183.19639587402344, |
|
"logps/rejected": -205.65249633789062, |
|
"loss": 429.399, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.3700638711452484, |
|
"rewards/margins": -0.0648937076330185, |
|
"rewards/rejected": 0.4349575936794281, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 2924.863786054313, |
|
"learning_rate": 3.6861657794506187e-07, |
|
"logits/chosen": 4800.78125, |
|
"logits/rejected": 4496.0048828125, |
|
"logps/chosen": -206.91018676757812, |
|
"logps/rejected": -204.9410858154297, |
|
"loss": 467.7143, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.3601909279823303, |
|
"rewards/margins": -0.02283461019396782, |
|
"rewards/rejected": 0.38302555680274963, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 2883.394905438846, |
|
"learning_rate": 3.6457508922871777e-07, |
|
"logits/chosen": 5870.728515625, |
|
"logits/rejected": 4462.73876953125, |
|
"logps/chosen": -227.4943084716797, |
|
"logps/rejected": -195.0250701904297, |
|
"loss": 461.7544, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.41137704253196716, |
|
"rewards/margins": 0.02530388906598091, |
|
"rewards/rejected": 0.38607317209243774, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 2905.6186537657873, |
|
"learning_rate": 3.6049533336330084e-07, |
|
"logits/chosen": 5861.5732421875, |
|
"logits/rejected": 4758.0703125, |
|
"logps/chosen": -244.23165893554688, |
|
"logps/rejected": -210.7204132080078, |
|
"loss": 469.5478, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.41407591104507446, |
|
"rewards/margins": 0.06709511578083038, |
|
"rewards/rejected": 0.3469807505607605, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 2670.309278191828, |
|
"learning_rate": 3.56378672954129e-07, |
|
"logits/chosen": 6055.84130859375, |
|
"logits/rejected": 4388.7734375, |
|
"logps/chosen": -263.1472473144531, |
|
"logps/rejected": -195.3523712158203, |
|
"loss": 441.372, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.44812217354774475, |
|
"rewards/margins": 0.04222990199923515, |
|
"rewards/rejected": 0.4058922231197357, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 3124.9754658062902, |
|
"learning_rate": 3.5222648293233803e-07, |
|
"logits/chosen": 6021.9658203125, |
|
"logits/rejected": 5607.9169921875, |
|
"logps/chosen": -244.96554565429688, |
|
"logps/rejected": -242.5836639404297, |
|
"loss": 472.9549, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.38347047567367554, |
|
"rewards/margins": -0.00437899911776185, |
|
"rewards/rejected": 0.38784947991371155, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 2576.472835509745, |
|
"learning_rate": 3.480401500956657e-07, |
|
"logits/chosen": 5213.6259765625, |
|
"logits/rejected": 4482.5400390625, |
|
"logps/chosen": -206.4659881591797, |
|
"logps/rejected": -217.31362915039062, |
|
"loss": 449.0575, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.38270142674446106, |
|
"rewards/margins": 0.010285294614732265, |
|
"rewards/rejected": 0.3724161386489868, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 3109.274717902587, |
|
"learning_rate": 3.438210726452724e-07, |
|
"logits/chosen": 5968.18017578125, |
|
"logits/rejected": 5356.08447265625, |
|
"logps/chosen": -269.45263671875, |
|
"logps/rejected": -230.8134307861328, |
|
"loss": 482.8063, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4160873293876648, |
|
"rewards/margins": -0.02526630461215973, |
|
"rewards/rejected": 0.44135361909866333, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 2983.3753201080026, |
|
"learning_rate": 3.395706597187538e-07, |
|
"logits/chosen": 4604.427734375, |
|
"logits/rejected": 4566.62109375, |
|
"logps/chosen": -193.61697387695312, |
|
"logps/rejected": -188.32859802246094, |
|
"loss": 448.0851, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.36395174264907837, |
|
"rewards/margins": -0.0260360948741436, |
|
"rewards/rejected": 0.3899878263473511, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 2768.9950966421284, |
|
"learning_rate": 3.3529033091949986e-07, |
|
"logits/chosen": 5541.9208984375, |
|
"logits/rejected": 5208.42041015625, |
|
"logps/chosen": -259.69183349609375, |
|
"logps/rejected": -261.2183837890625, |
|
"loss": 463.3188, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.4210972189903259, |
|
"rewards/margins": -0.004580638371407986, |
|
"rewards/rejected": 0.4256778359413147, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 2588.2368838304837, |
|
"learning_rate": 3.309815158425591e-07, |
|
"logits/chosen": 5310.1044921875, |
|
"logits/rejected": 5185.2841796875, |
|
"logps/chosen": -249.4232177734375, |
|
"logps/rejected": -236.345947265625, |
|
"loss": 492.8625, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.38777777552604675, |
|
"rewards/margins": -0.04184495285153389, |
|
"rewards/rejected": 0.42962273955345154, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 2541.1798920816664, |
|
"learning_rate": 3.2664565359716536e-07, |
|
"logits/chosen": 5426.6923828125, |
|
"logits/rejected": 4499.73095703125, |
|
"logps/chosen": -225.3328399658203, |
|
"logps/rejected": -189.65623474121094, |
|
"loss": 472.1843, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.391153484582901, |
|
"rewards/margins": -0.021805385127663612, |
|
"rewards/rejected": 0.4129588007926941, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 2708.826125946467, |
|
"learning_rate": 3.222841923260869e-07, |
|
"logits/chosen": 5195.87939453125, |
|
"logits/rejected": 4546.89111328125, |
|
"logps/chosen": -222.4955596923828, |
|
"logps/rejected": -202.45382690429688, |
|
"loss": 436.797, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.4222163259983063, |
|
"rewards/margins": -0.005721543915569782, |
|
"rewards/rejected": 0.42793789505958557, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 3327.9010011428068, |
|
"learning_rate": 3.1789858872195887e-07, |
|
"logits/chosen": 6197.4189453125, |
|
"logits/rejected": 5115.60205078125, |
|
"logps/chosen": -256.226318359375, |
|
"logps/rejected": -237.42318725585938, |
|
"loss": 463.1399, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.47410669922828674, |
|
"rewards/margins": 0.11127021163702011, |
|
"rewards/rejected": 0.36283645033836365, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 3030.8407607963145, |
|
"learning_rate": 3.1349030754075937e-07, |
|
"logits/chosen": 5142.9736328125, |
|
"logits/rejected": 4237.6552734375, |
|
"logps/chosen": -213.5383758544922, |
|
"logps/rejected": -190.01211547851562, |
|
"loss": 441.7674, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.39735934138298035, |
|
"rewards/margins": 0.007636462338268757, |
|
"rewards/rejected": 0.38972288370132446, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 2438.3420207071586, |
|
"learning_rate": 3.090608211125931e-07, |
|
"logits/chosen": 5203.4697265625, |
|
"logits/rejected": 4523.6591796875, |
|
"logps/chosen": -209.59799194335938, |
|
"logps/rejected": -197.68603515625, |
|
"loss": 421.52, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4184856414794922, |
|
"rewards/margins": 0.05746692419052124, |
|
"rewards/rejected": 0.36101871728897095, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 2547.16792381473, |
|
"learning_rate": 3.0461160884994487e-07, |
|
"logits/chosen": 5516.6259765625, |
|
"logits/rejected": 4994.55078125, |
|
"logps/chosen": -227.75247192382812, |
|
"logps/rejected": -213.952392578125, |
|
"loss": 469.6991, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.4269241690635681, |
|
"rewards/margins": 0.04791535809636116, |
|
"rewards/rejected": 0.37900882959365845, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 2854.5481374547926, |
|
"learning_rate": 3.001441567535681e-07, |
|
"logits/chosen": 6194.80322265625, |
|
"logits/rejected": 5217.76171875, |
|
"logps/chosen": -246.4164581298828, |
|
"logps/rejected": -225.7456817626953, |
|
"loss": 476.4677, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.45431065559387207, |
|
"rewards/margins": -0.05539635568857193, |
|
"rewards/rejected": 0.509706974029541, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 2818.5725741299366, |
|
"learning_rate": 2.956599569161724e-07, |
|
"logits/chosen": 5136.03759765625, |
|
"logits/rejected": 4069.05126953125, |
|
"logps/chosen": -197.53506469726562, |
|
"logps/rejected": -187.42420959472656, |
|
"loss": 445.4503, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.3796132206916809, |
|
"rewards/margins": -0.05161357671022415, |
|
"rewards/rejected": 0.43122678995132446, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 2889.166347908498, |
|
"learning_rate": 2.91160507024077e-07, |
|
"logits/chosen": 5450.57666015625, |
|
"logits/rejected": 4700.4873046875, |
|
"logps/chosen": -224.8113555908203, |
|
"logps/rejected": -205.53060913085938, |
|
"loss": 454.4613, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3970748782157898, |
|
"rewards/margins": 0.05230867862701416, |
|
"rewards/rejected": 0.34476613998413086, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 3284.552726470652, |
|
"learning_rate": 2.866473098569953e-07, |
|
"logits/chosen": 5638.99609375, |
|
"logits/rejected": 4810.56494140625, |
|
"logps/chosen": -246.9467010498047, |
|
"logps/rejected": -218.79812622070312, |
|
"loss": 466.0353, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.40407705307006836, |
|
"rewards/margins": -0.02928379736840725, |
|
"rewards/rejected": 0.43336087465286255, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 2623.610981766376, |
|
"learning_rate": 2.8212187278611905e-07, |
|
"logits/chosen": 5360.29296875, |
|
"logits/rejected": 4762.080078125, |
|
"logps/chosen": -241.3561553955078, |
|
"logps/rejected": -224.0301055908203, |
|
"loss": 451.5908, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.4626719355583191, |
|
"rewards/margins": 0.05470385402441025, |
|
"rewards/rejected": 0.40796810388565063, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 2798.9691821826846, |
|
"learning_rate": 2.775857072706684e-07, |
|
"logits/chosen": 5845.1748046875, |
|
"logits/rejected": 4390.95556640625, |
|
"logps/chosen": -243.54818725585938, |
|
"logps/rejected": -186.8525390625, |
|
"loss": 451.5862, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4639175534248352, |
|
"rewards/margins": 0.07632891833782196, |
|
"rewards/rejected": 0.38758859038352966, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 2635.202394275582, |
|
"learning_rate": 2.7304032835307667e-07, |
|
"logits/chosen": 6009.6083984375, |
|
"logits/rejected": 5336.86083984375, |
|
"logps/chosen": -233.775146484375, |
|
"logps/rejected": -256.08892822265625, |
|
"loss": 449.5508, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.45101770758628845, |
|
"rewards/margins": 0.05462346225976944, |
|
"rewards/rejected": 0.396394282579422, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 3046.3512976746365, |
|
"learning_rate": 2.6848725415297884e-07, |
|
"logits/chosen": 5911.275390625, |
|
"logits/rejected": 5214.2880859375, |
|
"logps/chosen": -261.0115661621094, |
|
"logps/rejected": -205.8301239013672, |
|
"loss": 464.0305, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4340108335018158, |
|
"rewards/margins": 0.017716465517878532, |
|
"rewards/rejected": 0.4162944257259369, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 2864.526004336298, |
|
"learning_rate": 2.6392800536017183e-07, |
|
"logits/chosen": 5202.76416015625, |
|
"logits/rejected": 4944.0068359375, |
|
"logps/chosen": -247.30435180664062, |
|
"logps/rejected": -236.41311645507812, |
|
"loss": 435.8117, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.40671029686927795, |
|
"rewards/margins": 0.028489042073488235, |
|
"rewards/rejected": 0.37822121381759644, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 6187.448379451401, |
|
"learning_rate": 2.59364104726716e-07, |
|
"logits/chosen": 5676.6806640625, |
|
"logits/rejected": 5011.1064453125, |
|
"logps/chosen": -246.1743927001953, |
|
"logps/rejected": -241.49044799804688, |
|
"loss": 454.4778, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.4495580792427063, |
|
"rewards/margins": 0.018314603716135025, |
|
"rewards/rejected": 0.4312434792518616, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 2691.628839857216, |
|
"learning_rate": 2.547970765583491e-07, |
|
"logits/chosen": 5299.375, |
|
"logits/rejected": 4855.00146484375, |
|
"logps/chosen": -215.9862060546875, |
|
"logps/rejected": -192.43870544433594, |
|
"loss": 461.7123, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.4477197229862213, |
|
"rewards/margins": -0.008086870424449444, |
|
"rewards/rejected": 0.45580655336380005, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 2495.233536183816, |
|
"learning_rate": 2.502284462053799e-07, |
|
"logits/chosen": 5878.501953125, |
|
"logits/rejected": 5847.0, |
|
"logps/chosen": -239.09317016601562, |
|
"logps/rejected": -223.63198852539062, |
|
"loss": 434.0518, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.4332825243473053, |
|
"rewards/margins": -0.021379027515649796, |
|
"rewards/rejected": 0.4546615183353424, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 2637.28084366533, |
|
"learning_rate": 2.4565973955323374e-07, |
|
"logits/chosen": 5609.7353515625, |
|
"logits/rejected": 4916.1220703125, |
|
"logps/chosen": -250.21542358398438, |
|
"logps/rejected": -205.2766876220703, |
|
"loss": 434.1141, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.415800005197525, |
|
"rewards/margins": 0.021374240517616272, |
|
"rewards/rejected": 0.39442577958106995, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 3027.24933722122, |
|
"learning_rate": 2.410924825128195e-07, |
|
"logits/chosen": 5264.02099609375, |
|
"logits/rejected": 5056.28564453125, |
|
"logps/chosen": -225.8623504638672, |
|
"logps/rejected": -233.9796905517578, |
|
"loss": 455.3113, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.424748033285141, |
|
"rewards/margins": -0.012741155922412872, |
|
"rewards/rejected": 0.43748918175697327, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 3316.2490439100284, |
|
"learning_rate": 2.365282005108875e-07, |
|
"logits/chosen": 5667.7626953125, |
|
"logits/rejected": 4730.36328125, |
|
"logps/chosen": -207.9048614501953, |
|
"logps/rejected": -211.5628204345703, |
|
"loss": 447.5346, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.47996577620506287, |
|
"rewards/margins": 0.09519468992948532, |
|
"rewards/rejected": 0.38477107882499695, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 2826.3995184578384, |
|
"learning_rate": 2.319684179805491e-07, |
|
"logits/chosen": 5481.96240234375, |
|
"logits/rejected": 4411.9677734375, |
|
"logps/chosen": -243.62890625, |
|
"logps/rejected": -192.74151611328125, |
|
"loss": 455.4142, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.46998849511146545, |
|
"rewards/margins": 0.031203698366880417, |
|
"rewards/rejected": 0.43878477811813354, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 2747.847648919441, |
|
"learning_rate": 2.2741465785212902e-07, |
|
"logits/chosen": 5132.85595703125, |
|
"logits/rejected": 4005.84423828125, |
|
"logps/chosen": -217.897216796875, |
|
"logps/rejected": -181.64852905273438, |
|
"loss": 430.0103, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.38213029503822327, |
|
"rewards/margins": -0.016666922718286514, |
|
"rewards/rejected": 0.39879724383354187, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 2323.3465928198475, |
|
"learning_rate": 2.2286844104451843e-07, |
|
"logits/chosen": 5565.04443359375, |
|
"logits/rejected": 4898.17822265625, |
|
"logps/chosen": -242.07308959960938, |
|
"logps/rejected": -231.4122314453125, |
|
"loss": 449.538, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4311472773551941, |
|
"rewards/margins": 0.0534161813557148, |
|
"rewards/rejected": 0.3777311444282532, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 2731.5099834648036, |
|
"learning_rate": 2.183312859572008e-07, |
|
"logits/chosen": 6313.4072265625, |
|
"logits/rejected": 5429.6669921875, |
|
"logps/chosen": -251.57821655273438, |
|
"logps/rejected": -215.6925048828125, |
|
"loss": 439.9762, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.39398592710494995, |
|
"rewards/margins": -0.047116655856370926, |
|
"rewards/rejected": 0.4411025941371918, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 2679.5180675952847, |
|
"learning_rate": 2.138047079631184e-07, |
|
"logits/chosen": 5296.7177734375, |
|
"logits/rejected": 5422.4072265625, |
|
"logps/chosen": -213.47006225585938, |
|
"logps/rejected": -225.93197631835938, |
|
"loss": 469.3134, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.4071172773838043, |
|
"rewards/margins": -0.12094493210315704, |
|
"rewards/rejected": 0.5280622243881226, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 2582.940559778803, |
|
"learning_rate": 2.0929021890255068e-07, |
|
"logits/chosen": 6125.99462890625, |
|
"logits/rejected": 5399.9873046875, |
|
"logps/chosen": -257.4599609375, |
|
"logps/rejected": -255.30001831054688, |
|
"loss": 448.1145, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4138859212398529, |
|
"rewards/margins": -0.1462526023387909, |
|
"rewards/rejected": 0.5601385831832886, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 2839.4929770409203, |
|
"learning_rate": 2.0478932657817102e-07, |
|
"logits/chosen": 5059.7060546875, |
|
"logits/rejected": 4862.5732421875, |
|
"logps/chosen": -209.98721313476562, |
|
"logps/rejected": -213.7787628173828, |
|
"loss": 421.8651, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3850982189178467, |
|
"rewards/margins": 0.013092848472297192, |
|
"rewards/rejected": 0.3720053732395172, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 3064.4069962248286, |
|
"learning_rate": 2.0030353425145374e-07, |
|
"logits/chosen": 7047.43115234375, |
|
"logits/rejected": 6413.43994140625, |
|
"logps/chosen": -304.1640319824219, |
|
"logps/rejected": -276.45574951171875, |
|
"loss": 514.2635, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.6004487872123718, |
|
"rewards/margins": -0.14154250919818878, |
|
"rewards/rejected": 0.7419912815093994, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 2585.4669520145776, |
|
"learning_rate": 1.9583434014059635e-07, |
|
"logits/chosen": 5760.01904296875, |
|
"logits/rejected": 5066.72900390625, |
|
"logps/chosen": -234.2002410888672, |
|
"logps/rejected": -218.1713409423828, |
|
"loss": 446.8139, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.3829045593738556, |
|
"rewards/margins": -0.20982496440410614, |
|
"rewards/rejected": 0.5927294492721558, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 2895.216663375429, |
|
"learning_rate": 1.9138323692012733e-07, |
|
"logits/chosen": 5071.248046875, |
|
"logits/rejected": 5016.58203125, |
|
"logps/chosen": -232.4548797607422, |
|
"logps/rejected": -214.78939819335938, |
|
"loss": 427.5768, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4009971022605896, |
|
"rewards/margins": -0.017197366803884506, |
|
"rewards/rejected": 0.4181944727897644, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 3172.1217220248027, |
|
"learning_rate": 1.8695171122236442e-07, |
|
"logits/chosen": 5186.33447265625, |
|
"logits/rejected": 5195.64892578125, |
|
"logps/chosen": -219.36703491210938, |
|
"logps/rejected": -242.1404571533203, |
|
"loss": 492.8513, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.3699229657649994, |
|
"rewards/margins": -0.017510617151856422, |
|
"rewards/rejected": 0.38743358850479126, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 2914.8049221104293, |
|
"learning_rate": 1.8254124314089223e-07, |
|
"logits/chosen": 5557.251953125, |
|
"logits/rejected": 5019.79541015625, |
|
"logps/chosen": -235.5565185546875, |
|
"logps/rejected": -225.5511016845703, |
|
"loss": 466.1685, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.48438769578933716, |
|
"rewards/margins": 0.052512697875499725, |
|
"rewards/rejected": 0.4318750500679016, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 2637.0032356490447, |
|
"learning_rate": 1.7815330573622205e-07, |
|
"logits/chosen": 5739.81884765625, |
|
"logits/rejected": 5681.2822265625, |
|
"logps/chosen": -233.9698028564453, |
|
"logps/rejected": -262.9213562011719, |
|
"loss": 428.7625, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4719967842102051, |
|
"rewards/margins": 0.026335466653108597, |
|
"rewards/rejected": 0.44566136598587036, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 3181.9042887438923, |
|
"learning_rate": 1.7378936454380274e-07, |
|
"logits/chosen": 5621.00830078125, |
|
"logits/rejected": 4796.08740234375, |
|
"logps/chosen": -220.7882080078125, |
|
"logps/rejected": -207.47921752929688, |
|
"loss": 446.4105, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.43960586190223694, |
|
"rewards/margins": 0.08442094922065735, |
|
"rewards/rejected": 0.3551848828792572, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 2986.626560530847, |
|
"learning_rate": 1.694508770845427e-07, |
|
"logits/chosen": 6612.59521484375, |
|
"logits/rejected": 5661.8095703125, |
|
"logps/chosen": -281.0216064453125, |
|
"logps/rejected": -243.0041961669922, |
|
"loss": 471.7892, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5507138967514038, |
|
"rewards/margins": 0.09217164665460587, |
|
"rewards/rejected": 0.4585422873497009, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 2491.6715549344503, |
|
"learning_rate": 1.651392923780105e-07, |
|
"logits/chosen": 6138.0517578125, |
|
"logits/rejected": 4998.80126953125, |
|
"logps/chosen": -237.0027618408203, |
|
"logps/rejected": -196.52346801757812, |
|
"loss": 439.6602, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.44304361939430237, |
|
"rewards/margins": 0.019446546211838722, |
|
"rewards/rejected": 0.4235970377922058, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 2930.0912987575693, |
|
"learning_rate": 1.6085605045847367e-07, |
|
"logits/chosen": 5613.2529296875, |
|
"logits/rejected": 4578.0126953125, |
|
"logps/chosen": -231.54763793945312, |
|
"logps/rejected": -231.14364624023438, |
|
"loss": 444.372, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.41010889410972595, |
|
"rewards/margins": -0.021687136963009834, |
|
"rewards/rejected": 0.43179601430892944, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 2685.020119553926, |
|
"learning_rate": 1.5660258189393944e-07, |
|
"logits/chosen": 5781.34375, |
|
"logits/rejected": 4609.04443359375, |
|
"logps/chosen": -247.92501831054688, |
|
"logps/rejected": -209.21481323242188, |
|
"loss": 470.8875, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5003296732902527, |
|
"rewards/margins": -0.01339314877986908, |
|
"rewards/rejected": 0.5137227773666382, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 3478.5401870274527, |
|
"learning_rate": 1.5238030730835577e-07, |
|
"logits/chosen": 5069.4560546875, |
|
"logits/rejected": 5332.6708984375, |
|
"logps/chosen": -198.8926544189453, |
|
"logps/rejected": -207.35888671875, |
|
"loss": 431.9828, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.39420709013938904, |
|
"rewards/margins": -0.018875379115343094, |
|
"rewards/rejected": 0.41308245062828064, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 2495.084991283364, |
|
"learning_rate": 1.4819063690713564e-07, |
|
"logits/chosen": 5791.67138671875, |
|
"logits/rejected": 4803.8017578125, |
|
"logps/chosen": -239.6625213623047, |
|
"logps/rejected": -215.32882690429688, |
|
"loss": 463.378, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.38961488008499146, |
|
"rewards/margins": -0.013818919658660889, |
|
"rewards/rejected": 0.40343379974365234, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 2929.168635003601, |
|
"learning_rate": 1.4403497000615883e-07, |
|
"logits/chosen": 5547.22119140625, |
|
"logits/rejected": 4953.2685546875, |
|
"logps/chosen": -271.31817626953125, |
|
"logps/rejected": -208.81826782226562, |
|
"loss": 444.6075, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.40208378434181213, |
|
"rewards/margins": -0.03740546852350235, |
|
"rewards/rejected": 0.4394892156124115, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 2581.5019746906396, |
|
"learning_rate": 1.3991469456441272e-07, |
|
"logits/chosen": 5376.77197265625, |
|
"logits/rejected": 5233.3271484375, |
|
"logps/chosen": -235.42138671875, |
|
"logps/rejected": -236.4017333984375, |
|
"loss": 420.9892, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4205804467201233, |
|
"rewards/margins": -0.030592668801546097, |
|
"rewards/rejected": 0.4511730670928955, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 3160.8658484895473, |
|
"learning_rate": 1.358311867204244e-07, |
|
"logits/chosen": 4565.5341796875, |
|
"logits/rejected": 4611.97998046875, |
|
"logps/chosen": -186.4778289794922, |
|
"logps/rejected": -192.1836700439453, |
|
"loss": 444.8648, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.37066149711608887, |
|
"rewards/margins": -0.06482173502445221, |
|
"rewards/rejected": 0.43548327684402466, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 2840.1793768299094, |
|
"learning_rate": 1.3178581033264216e-07, |
|
"logits/chosen": 6083.375, |
|
"logits/rejected": 5281.1484375, |
|
"logps/chosen": -248.0015106201172, |
|
"logps/rejected": -244.42868041992188, |
|
"loss": 492.9659, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4658554196357727, |
|
"rewards/margins": 0.03714742138981819, |
|
"rewards/rejected": 0.42870792746543884, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 2999.3305501535006, |
|
"learning_rate": 1.2777991652391757e-07, |
|
"logits/chosen": 5277.3232421875, |
|
"logits/rejected": 4048.818359375, |
|
"logps/chosen": -233.933349609375, |
|
"logps/rejected": -182.76797485351562, |
|
"loss": 437.7016, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.4525213837623596, |
|
"rewards/margins": 0.050732992589473724, |
|
"rewards/rejected": 0.4017884135246277, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 2758.037544701704, |
|
"learning_rate": 1.2381484323024178e-07, |
|
"logits/chosen": 5822.4111328125, |
|
"logits/rejected": 5114.529296875, |
|
"logps/chosen": -239.6138916015625, |
|
"logps/rejected": -212.1165771484375, |
|
"loss": 445.3217, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.416238397359848, |
|
"rewards/margins": -0.008182978257536888, |
|
"rewards/rejected": 0.42442137002944946, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 2322.8858217356333, |
|
"learning_rate": 1.1989191475388516e-07, |
|
"logits/chosen": 4871.5693359375, |
|
"logits/rejected": 4576.48779296875, |
|
"logps/chosen": -184.75506591796875, |
|
"logps/rejected": -198.5352020263672, |
|
"loss": 395.6979, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.42691653966903687, |
|
"rewards/margins": -0.034974582493305206, |
|
"rewards/rejected": 0.4618911147117615, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 2504.7651931239393, |
|
"learning_rate": 1.1601244132109179e-07, |
|
"logits/chosen": 4925.0966796875, |
|
"logits/rejected": 4446.34521484375, |
|
"logps/chosen": -194.90817260742188, |
|
"logps/rejected": -204.82412719726562, |
|
"loss": 465.5208, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4748953878879547, |
|
"rewards/margins": 0.097330242395401, |
|
"rewards/rejected": 0.3775652050971985, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 3082.926945072268, |
|
"learning_rate": 1.1217771864447395e-07, |
|
"logits/chosen": 5598.09228515625, |
|
"logits/rejected": 4815.2041015625, |
|
"logps/chosen": -246.0078125, |
|
"logps/rejected": -249.9725341796875, |
|
"loss": 500.4969, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.39293307065963745, |
|
"rewards/margins": -0.0007484182715415955, |
|
"rewards/rejected": 0.39368146657943726, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 2722.3074395253057, |
|
"learning_rate": 1.0838902749025499e-07, |
|
"logits/chosen": 6814.609375, |
|
"logits/rejected": 5488.0498046875, |
|
"logps/chosen": -268.84417724609375, |
|
"logps/rejected": -234.26416015625, |
|
"loss": 458.2768, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5655155777931213, |
|
"rewards/margins": 0.1583983451128006, |
|
"rewards/rejected": 0.4071172773838043, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 3089.651688801927, |
|
"learning_rate": 1.0464763325050358e-07, |
|
"logits/chosen": 5166.2724609375, |
|
"logits/rejected": 4742.515625, |
|
"logps/chosen": -231.9933624267578, |
|
"logps/rejected": -210.8185577392578, |
|
"loss": 428.4296, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.49672383069992065, |
|
"rewards/margins": 0.027382072061300278, |
|
"rewards/rejected": 0.4693417549133301, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 3265.9261458601773, |
|
"learning_rate": 1.0095478552050346e-07, |
|
"logits/chosen": 6062.771484375, |
|
"logits/rejected": 4171.9345703125, |
|
"logps/chosen": -269.94989013671875, |
|
"logps/rejected": -207.66567993164062, |
|
"loss": 476.396, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4134543538093567, |
|
"rewards/margins": -0.02273373305797577, |
|
"rewards/rejected": 0.43618807196617126, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 2494.1100717249315, |
|
"learning_rate": 9.731171768139806e-08, |
|
"logits/chosen": 5687.302734375, |
|
"logits/rejected": 4657.4443359375, |
|
"logps/chosen": -224.1031036376953, |
|
"logps/rejected": -198.58413696289062, |
|
"loss": 443.6654, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.4446600377559662, |
|
"rewards/margins": 0.015666846185922623, |
|
"rewards/rejected": 0.42899322509765625, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 2932.950028085596, |
|
"learning_rate": 9.37196464882522e-08, |
|
"logits/chosen": 5420.83837890625, |
|
"logits/rejected": 4967.8408203125, |
|
"logps/chosen": -209.77053833007812, |
|
"logps/rejected": -207.4347686767578, |
|
"loss": 450.4227, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.43635910749435425, |
|
"rewards/margins": 0.013584541156888008, |
|
"rewards/rejected": 0.42277461290359497, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 2755.115765364893, |
|
"learning_rate": 9.017977166366444e-08, |
|
"logits/chosen": 5591.40771484375, |
|
"logits/rejected": 5046.1015625, |
|
"logps/chosen": -239.6355438232422, |
|
"logps/rejected": -232.3363800048828, |
|
"loss": 399.9244, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.44893360137939453, |
|
"rewards/margins": -0.023927757516503334, |
|
"rewards/rejected": 0.47286128997802734, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 2758.9501579598164, |
|
"learning_rate": 8.669327549707095e-08, |
|
"logits/chosen": 5718.99560546875, |
|
"logits/rejected": 4905.77197265625, |
|
"logps/chosen": -254.92733764648438, |
|
"logps/rejected": -217.9339141845703, |
|
"loss": 459.4898, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.49462470412254333, |
|
"rewards/margins": 0.05205591768026352, |
|
"rewards/rejected": 0.442568838596344, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 2916.421053999076, |
|
"learning_rate": 8.326132244986931e-08, |
|
"logits/chosen": 5092.185546875, |
|
"logits/rejected": 4410.7255859375, |
|
"logps/chosen": -227.4456329345703, |
|
"logps/rejected": -200.5583953857422, |
|
"loss": 443.7745, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.35959136486053467, |
|
"rewards/margins": 0.013029132969677448, |
|
"rewards/rejected": 0.3465622365474701, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 2500.95110656443, |
|
"learning_rate": 7.988505876649862e-08, |
|
"logits/chosen": 5265.857421875, |
|
"logits/rejected": 4087.15283203125, |
|
"logps/chosen": -229.035888671875, |
|
"logps/rejected": -220.652099609375, |
|
"loss": 447.4743, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.39383992552757263, |
|
"rewards/margins": 0.018630001693964005, |
|
"rewards/rejected": 0.37520989775657654, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 3061.8379378461154, |
|
"learning_rate": 7.656561209160248e-08, |
|
"logits/chosen": 5706.8154296875, |
|
"logits/rejected": 4945.07177734375, |
|
"logps/chosen": -261.44781494140625, |
|
"logps/rejected": -214.4395294189453, |
|
"loss": 466.3214, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.42837047576904297, |
|
"rewards/margins": 0.06265218555927277, |
|
"rewards/rejected": 0.3657183051109314, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 2191.9761613126184, |
|
"learning_rate": 7.330409109340562e-08, |
|
"logits/chosen": 5812.962890625, |
|
"logits/rejected": 5259.6865234375, |
|
"logps/chosen": -275.3894958496094, |
|
"logps/rejected": -240.2283935546875, |
|
"loss": 436.1938, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4367316663265228, |
|
"rewards/margins": 0.004428995307534933, |
|
"rewards/rejected": 0.43230265378952026, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 3020.3364090207338, |
|
"learning_rate": 7.010158509342681e-08, |
|
"logits/chosen": 6394.3779296875, |
|
"logits/rejected": 4694.5419921875, |
|
"logps/chosen": -254.2501983642578, |
|
"logps/rejected": -196.20724487304688, |
|
"loss": 469.8941, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.4546736776828766, |
|
"rewards/margins": -0.030942197889089584, |
|
"rewards/rejected": 0.4856158196926117, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 2862.609703782646, |
|
"learning_rate": 6.695916370265527e-08, |
|
"logits/chosen": 5147.36279296875, |
|
"logits/rejected": 4634.7470703125, |
|
"logps/chosen": -220.52487182617188, |
|
"logps/rejected": -173.90330505371094, |
|
"loss": 483.0644, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.3716416358947754, |
|
"rewards/margins": -0.025338003411889076, |
|
"rewards/rejected": 0.3969796299934387, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 2682.017468500409, |
|
"learning_rate": 6.387787646430853e-08, |
|
"logits/chosen": 6349.74951171875, |
|
"logits/rejected": 5803.51025390625, |
|
"logps/chosen": -253.1613311767578, |
|
"logps/rejected": -243.31069946289062, |
|
"loss": 465.5553, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4580962657928467, |
|
"rewards/margins": 0.023849302902817726, |
|
"rewards/rejected": 0.4342469573020935, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 2929.4259148387664, |
|
"learning_rate": 6.0858752503294e-08, |
|
"logits/chosen": 4991.2568359375, |
|
"logits/rejected": 4856.7724609375, |
|
"logps/chosen": -243.1151885986328, |
|
"logps/rejected": -218.1384735107422, |
|
"loss": 448.5571, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.39313045144081116, |
|
"rewards/margins": -0.008735431358218193, |
|
"rewards/rejected": 0.4018658697605133, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 2334.8289467051422, |
|
"learning_rate": 5.7902800182489385e-08, |
|
"logits/chosen": 5283.90966796875, |
|
"logits/rejected": 5065.044921875, |
|
"logps/chosen": -209.5236053466797, |
|
"logps/rejected": -185.2886505126953, |
|
"loss": 438.2237, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 0.4488741457462311, |
|
"rewards/margins": -0.02802448347210884, |
|
"rewards/rejected": 0.47689858078956604, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 2835.446662936965, |
|
"learning_rate": 5.5011006765957604e-08, |
|
"logits/chosen": 6475.173828125, |
|
"logits/rejected": 5851.21044921875, |
|
"logps/chosen": -254.42550659179688, |
|
"logps/rejected": -276.20184326171875, |
|
"loss": 459.7479, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.5142590999603271, |
|
"rewards/margins": 0.06982637941837311, |
|
"rewards/rejected": 0.44443267583847046, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 2645.7937944070086, |
|
"learning_rate": 5.218433808920883e-08, |
|
"logits/chosen": 5612.806640625, |
|
"logits/rejected": 5204.6416015625, |
|
"logps/chosen": -241.086669921875, |
|
"logps/rejected": -235.9331817626953, |
|
"loss": 477.5177, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.46159011125564575, |
|
"rewards/margins": 0.03268017619848251, |
|
"rewards/rejected": 0.42890992760658264, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 2799.218073861503, |
|
"learning_rate": 4.942373823661927e-08, |
|
"logits/chosen": 6671.35693359375, |
|
"logits/rejected": 5100.93115234375, |
|
"logps/chosen": -269.9798583984375, |
|
"logps/rejected": -219.4925079345703, |
|
"loss": 440.4742, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.49509739875793457, |
|
"rewards/margins": 0.023434173315763474, |
|
"rewards/rejected": 0.471663236618042, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 2778.404170104335, |
|
"learning_rate": 4.6730129226114354e-08, |
|
"logits/chosen": 5087.5205078125, |
|
"logits/rejected": 4740.5205078125, |
|
"logps/chosen": -213.2566375732422, |
|
"logps/rejected": -172.8238525390625, |
|
"loss": 456.5575, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4261597990989685, |
|
"rewards/margins": -0.00964472908526659, |
|
"rewards/rejected": 0.4358045160770416, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 2634.177798429675, |
|
"learning_rate": 4.41044107012227e-08, |
|
"logits/chosen": 6359.1689453125, |
|
"logits/rejected": 5187.23828125, |
|
"logps/chosen": -286.50201416015625, |
|
"logps/rejected": -236.1680145263672, |
|
"loss": 464.8844, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5070956945419312, |
|
"rewards/margins": 0.03383489325642586, |
|
"rewards/rejected": 0.4732607901096344, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 2799.6223555020733, |
|
"learning_rate": 4.1547459630601966e-08, |
|
"logits/chosen": 5616.50146484375, |
|
"logits/rejected": 5148.76025390625, |
|
"logps/chosen": -245.4374542236328, |
|
"logps/rejected": -224.58297729492188, |
|
"loss": 468.5303, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.42994633316993713, |
|
"rewards/margins": -0.04849852994084358, |
|
"rewards/rejected": 0.47844481468200684, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 2603.023786297724, |
|
"learning_rate": 3.9060130015138857e-08, |
|
"logits/chosen": 5224.2919921875, |
|
"logits/rejected": 4763.0263671875, |
|
"logps/chosen": -228.858642578125, |
|
"logps/rejected": -206.1272735595703, |
|
"loss": 441.8306, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.532415509223938, |
|
"rewards/margins": 0.082635298371315, |
|
"rewards/rejected": 0.4497801661491394, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 2952.0546467904624, |
|
"learning_rate": 3.664325260271953e-08, |
|
"logits/chosen": 5981.43505859375, |
|
"logits/rejected": 5149.3701171875, |
|
"logps/chosen": -264.82513427734375, |
|
"logps/rejected": -231.50088500976562, |
|
"loss": 476.4973, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.5294232368469238, |
|
"rewards/margins": 0.05757498741149902, |
|
"rewards/rejected": 0.4718483090400696, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 2685.7294831210556, |
|
"learning_rate": 3.429763461076676e-08, |
|
"logits/chosen": 5783.91162109375, |
|
"logits/rejected": 5101.3291015625, |
|
"logps/chosen": -229.64187622070312, |
|
"logps/rejected": -208.2964324951172, |
|
"loss": 452.1323, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.657304584980011, |
|
"rewards/margins": -0.1763857752084732, |
|
"rewards/rejected": 0.8336902856826782, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 2730.708924158352, |
|
"learning_rate": 3.202405945663555e-08, |
|
"logits/chosen": 5812.5908203125, |
|
"logits/rejected": 4056.528076171875, |
|
"logps/chosen": -227.04598999023438, |
|
"logps/rejected": -165.66348266601562, |
|
"loss": 433.1286, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.44985729455947876, |
|
"rewards/margins": 0.0024114579427987337, |
|
"rewards/rejected": 0.4474458694458008, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 2716.197853796788, |
|
"learning_rate": 2.9823286495958556e-08, |
|
"logits/chosen": 4791.669921875, |
|
"logits/rejected": 5499.1123046875, |
|
"logps/chosen": -198.6993865966797, |
|
"logps/rejected": -247.2404327392578, |
|
"loss": 452.7315, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.4359843134880066, |
|
"rewards/margins": -0.5284264087677002, |
|
"rewards/rejected": 0.9644107818603516, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 2791.281847581253, |
|
"learning_rate": 2.769605076902695e-08, |
|
"logits/chosen": 6074.97802734375, |
|
"logits/rejected": 5591.9599609375, |
|
"logps/chosen": -239.0797882080078, |
|
"logps/rejected": -250.9207763671875, |
|
"loss": 429.5277, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.5212064981460571, |
|
"rewards/margins": 0.0025348193012177944, |
|
"rewards/rejected": 0.5186716318130493, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 3093.882963200942, |
|
"learning_rate": 2.5643062755293403e-08, |
|
"logits/chosen": 5405.78759765625, |
|
"logits/rejected": 4668.48828125, |
|
"logps/chosen": -237.1522674560547, |
|
"logps/rejected": -197.37451171875, |
|
"loss": 469.6285, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.42427119612693787, |
|
"rewards/margins": 0.02963954210281372, |
|
"rewards/rejected": 0.39463168382644653, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 2911.6598438916217, |
|
"learning_rate": 2.366500813607733e-08, |
|
"logits/chosen": 5930.2880859375, |
|
"logits/rejected": 4750.8203125, |
|
"logps/chosen": -233.2056427001953, |
|
"logps/rejected": -216.0753173828125, |
|
"loss": 461.4281, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.453493595123291, |
|
"rewards/margins": 0.01780077815055847, |
|
"rewards/rejected": 0.43569284677505493, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 2804.7017487072826, |
|
"learning_rate": 2.176254756555329e-08, |
|
"logits/chosen": 6307.5615234375, |
|
"logits/rejected": 5713.3017578125, |
|
"logps/chosen": -275.5252685546875, |
|
"logps/rejected": -247.7924346923828, |
|
"loss": 446.325, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.4305667281150818, |
|
"rewards/margins": 0.0016602992545813322, |
|
"rewards/rejected": 0.42890650033950806, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 2756.2303664573706, |
|
"learning_rate": 1.9936316450097468e-08, |
|
"logits/chosen": 5031.6337890625, |
|
"logits/rejected": 4576.9560546875, |
|
"logps/chosen": -213.4817657470703, |
|
"logps/rejected": -188.27459716796875, |
|
"loss": 429.4412, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.42264145612716675, |
|
"rewards/margins": 0.02685725688934326, |
|
"rewards/rejected": 0.3957841992378235, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 2590.5879445875785, |
|
"learning_rate": 1.8186924736067477e-08, |
|
"logits/chosen": 5710.92578125, |
|
"logits/rejected": 4436.71435546875, |
|
"logps/chosen": -245.94833374023438, |
|
"logps/rejected": -221.7904815673828, |
|
"loss": 449.8979, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.45010289549827576, |
|
"rewards/margins": 0.06335899978876114, |
|
"rewards/rejected": 0.3867438733577728, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 2757.0934444677064, |
|
"learning_rate": 1.651495670608488e-08, |
|
"logits/chosen": 6424.537109375, |
|
"logits/rejected": 5109.24169921875, |
|
"logps/chosen": -250.62875366210938, |
|
"logps/rejected": -216.7704620361328, |
|
"loss": 430.7702, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.5290960073471069, |
|
"rewards/margins": 0.08976821601390839, |
|
"rewards/rejected": 0.43932777643203735, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 8507.84049039918, |
|
"learning_rate": 1.4920970783889737e-08, |
|
"logits/chosen": 6169.34375, |
|
"logits/rejected": 4740.4990234375, |
|
"logps/chosen": -254.0730438232422, |
|
"logps/rejected": -234.4978485107422, |
|
"loss": 463.4699, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4609464704990387, |
|
"rewards/margins": -0.0022157118655741215, |
|
"rewards/rejected": 0.4631621837615967, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 2899.8879312514678, |
|
"learning_rate": 1.340549934783164e-08, |
|
"logits/chosen": 5876.3447265625, |
|
"logits/rejected": 5633.6728515625, |
|
"logps/chosen": -254.2863311767578, |
|
"logps/rejected": -255.55599975585938, |
|
"loss": 446.1415, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.43738800287246704, |
|
"rewards/margins": 0.03623257577419281, |
|
"rewards/rejected": 0.40115541219711304, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 2759.8073515628384, |
|
"learning_rate": 1.1969048553059608e-08, |
|
"logits/chosen": 5478.740234375, |
|
"logits/rejected": 4825.607421875, |
|
"logps/chosen": -207.3783416748047, |
|
"logps/rejected": -196.50003051757812, |
|
"loss": 417.9901, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.41129404306411743, |
|
"rewards/margins": -0.001239946810528636, |
|
"rewards/rejected": 0.4125339984893799, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 2928.870279384255, |
|
"learning_rate": 1.06120981624703e-08, |
|
"logits/chosen": 5248.7646484375, |
|
"logits/rejected": 5659.0908203125, |
|
"logps/chosen": -234.52590942382812, |
|
"logps/rejected": -253.9241943359375, |
|
"loss": 473.0938, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.3923454284667969, |
|
"rewards/margins": -0.08389392495155334, |
|
"rewards/rejected": 0.4762393534183502, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 2633.7390537249466, |
|
"learning_rate": 9.335101386471284e-09, |
|
"logits/chosen": 6008.14599609375, |
|
"logits/rejected": 5465.85791015625, |
|
"logps/chosen": -254.5990447998047, |
|
"logps/rejected": -225.7978973388672, |
|
"loss": 429.5775, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.3928176462650299, |
|
"rewards/margins": -0.041309647262096405, |
|
"rewards/rejected": 0.4341272711753845, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 2751.6746625086507, |
|
"learning_rate": 8.138484731612273e-09, |
|
"logits/chosen": 5772.14208984375, |
|
"logits/rejected": 4916.0166015625, |
|
"logps/chosen": -237.14111328125, |
|
"logps/rejected": -235.15902709960938, |
|
"loss": 449.8257, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4163140654563904, |
|
"rewards/margins": -0.003167784307152033, |
|
"rewards/rejected": 0.4194818437099457, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 2986.1836483001102, |
|
"learning_rate": 7.0226478581355e-09, |
|
"logits/chosen": 5796.9658203125, |
|
"logits/rejected": 5225.16015625, |
|
"logps/chosen": -233.0400390625, |
|
"logps/rejected": -211.41305541992188, |
|
"loss": 442.5229, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.5166589021682739, |
|
"rewards/margins": -0.05622934550046921, |
|
"rewards/rejected": 0.5728882551193237, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 2588.104995999543, |
|
"learning_rate": 5.987963446492383e-09, |
|
"logits/chosen": 5863.2470703125, |
|
"logits/rejected": 5275.2890625, |
|
"logps/chosen": -230.316162109375, |
|
"logps/rejected": -208.0933074951172, |
|
"loss": 434.7167, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.4742770791053772, |
|
"rewards/margins": 0.013745969161391258, |
|
"rewards/rejected": 0.4605311453342438, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 2706.6843129659187, |
|
"learning_rate": 5.0347770728713935e-09, |
|
"logits/chosen": 5820.26416015625, |
|
"logits/rejected": 4646.1689453125, |
|
"logps/chosen": -280.79437255859375, |
|
"logps/rejected": -203.54367065429688, |
|
"loss": 451.1318, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.39324313402175903, |
|
"rewards/margins": -0.029782313853502274, |
|
"rewards/rejected": 0.4230254590511322, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 2907.594423226152, |
|
"learning_rate": 4.1634070937782424e-09, |
|
"logits/chosen": 5797.3984375, |
|
"logits/rejected": 5349.8115234375, |
|
"logps/chosen": -250.1112518310547, |
|
"logps/rejected": -250.5971221923828, |
|
"loss": 469.334, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.465669721364975, |
|
"rewards/margins": -0.11242341995239258, |
|
"rewards/rejected": 0.5780931711196899, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 2594.3361342889198, |
|
"learning_rate": 3.3741445397075797e-09, |
|
"logits/chosen": 6125.84912109375, |
|
"logits/rejected": 5266.125, |
|
"logps/chosen": -265.88262939453125, |
|
"logps/rejected": -253.229736328125, |
|
"loss": 498.9627, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.4644504487514496, |
|
"rewards/margins": 0.023336850106716156, |
|
"rewards/rejected": 0.44111356139183044, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 2694.3106547281477, |
|
"learning_rate": 2.667253017941018e-09, |
|
"logits/chosen": 6051.47802734375, |
|
"logits/rejected": 4897.32177734375, |
|
"logps/chosen": -258.3802795410156, |
|
"logps/rejected": -225.14633178710938, |
|
"loss": 475.6425, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.4497244358062744, |
|
"rewards/margins": -0.013121997937560081, |
|
"rewards/rejected": 0.46284645795822144, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 2494.09151231596, |
|
"learning_rate": 2.0429686245045097e-09, |
|
"logits/chosen": 6027.98046875, |
|
"logits/rejected": 4803.41650390625, |
|
"logps/chosen": -286.62261962890625, |
|
"logps/rejected": -219.09426879882812, |
|
"loss": 477.4155, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.5301491022109985, |
|
"rewards/margins": 0.06436850130558014, |
|
"rewards/rejected": 0.4657805562019348, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 2754.330659901363, |
|
"learning_rate": 1.5014998653141708e-09, |
|
"logits/chosen": 5659.52490234375, |
|
"logits/rejected": 4881.109375, |
|
"logps/chosen": -252.7493896484375, |
|
"logps/rejected": -205.0902862548828, |
|
"loss": 449.6573, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5084835290908813, |
|
"rewards/margins": 0.027627814561128616, |
|
"rewards/rejected": 0.4808557629585266, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 2974.5475606563223, |
|
"learning_rate": 1.0430275865371263e-09, |
|
"logits/chosen": 5825.0693359375, |
|
"logits/rejected": 4934.61962890625, |
|
"logps/chosen": -208.9281768798828, |
|
"logps/rejected": -208.70657348632812, |
|
"loss": 450.0896, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.5317475199699402, |
|
"rewards/margins": 0.12720224261283875, |
|
"rewards/rejected": 0.40454530715942383, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 2999.4619162797794, |
|
"learning_rate": 6.677049141901314e-10, |
|
"logits/chosen": 4800.06103515625, |
|
"logits/rejected": 4752.68701171875, |
|
"logps/chosen": -204.03689575195312, |
|
"logps/rejected": -210.6210174560547, |
|
"loss": 434.911, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.3801441788673401, |
|
"rewards/margins": -0.11329132318496704, |
|
"rewards/rejected": 0.49343547224998474, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 2752.7633972308495, |
|
"learning_rate": 3.7565720299687077e-10, |
|
"logits/chosen": 6091.1953125, |
|
"logits/rejected": 5330.6806640625, |
|
"logps/chosen": -270.46978759765625, |
|
"logps/rejected": -219.2836456298828, |
|
"loss": 473.733, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.4513503611087799, |
|
"rewards/margins": -0.0449112243950367, |
|
"rewards/rejected": 0.4962615966796875, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 2528.626552808556, |
|
"learning_rate": 1.6698199452053197e-10, |
|
"logits/chosen": 4495.45458984375, |
|
"logits/rejected": 4576.0517578125, |
|
"logps/chosen": -214.46987915039062, |
|
"logps/rejected": -207.4097137451172, |
|
"loss": 442.3405, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.38978248834609985, |
|
"rewards/margins": -0.048971425741910934, |
|
"rewards/rejected": 0.4387539029121399, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 2585.4214547887354, |
|
"learning_rate": 4.174898458556009e-11, |
|
"logits/chosen": 5962.24658203125, |
|
"logits/rejected": 4326.06396484375, |
|
"logps/chosen": -230.4096221923828, |
|
"logps/rejected": -197.4384765625, |
|
"loss": 445.7363, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.42028751969337463, |
|
"rewards/margins": 0.060069866478443146, |
|
"rewards/rejected": 0.3602176308631897, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 2873.788980614392, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 6063.86083984375, |
|
"logits/rejected": 5068.90869140625, |
|
"logps/chosen": -257.5758361816406, |
|
"logps/rejected": -246.18759155273438, |
|
"loss": 479.5838, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.4219503402709961, |
|
"rewards/margins": 0.048766739666461945, |
|
"rewards/rejected": 0.37318360805511475, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 459.2928345845008, |
|
"train_runtime": 17326.5441, |
|
"train_samples_per_second": 3.528, |
|
"train_steps_per_second": 0.11 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|