|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005234231876472127, |
|
"grad_norm": 18.219385651116053, |
|
"learning_rate": 2.617801047120419e-09, |
|
"logits/chosen": 5870.685546875, |
|
"logits/rejected": 4942.87255859375, |
|
"logps/chosen": -300.06866455078125, |
|
"logps/rejected": -172.3806915283203, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 17.235981665270252, |
|
"learning_rate": 2.6178010471204188e-08, |
|
"logits/chosen": 4513.25439453125, |
|
"logits/rejected": 4184.88818359375, |
|
"logps/chosen": -237.9716033935547, |
|
"logps/rejected": -219.00857543945312, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": -0.00043410700163803995, |
|
"rewards/margins": -0.00041542822145856917, |
|
"rewards/rejected": -1.8678772903513163e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 16.065934368869318, |
|
"learning_rate": 5.2356020942408376e-08, |
|
"logits/chosen": 6490.0400390625, |
|
"logits/rejected": 5858.52490234375, |
|
"logps/chosen": -313.576171875, |
|
"logps/rejected": -287.2350158691406, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0005803096573799849, |
|
"rewards/margins": 0.0009115642169490457, |
|
"rewards/rejected": -0.0003312545013613999, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 15.920025055683531, |
|
"learning_rate": 7.853403141361257e-08, |
|
"logits/chosen": 6130.9091796875, |
|
"logits/rejected": 4619.53173828125, |
|
"logps/chosen": -287.20556640625, |
|
"logps/rejected": -230.14352416992188, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.00016442504420410842, |
|
"rewards/margins": 0.0013787832576781511, |
|
"rewards/rejected": -0.0012143582571297884, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 19.041793171522134, |
|
"learning_rate": 1.0471204188481675e-07, |
|
"logits/chosen": 6250.5380859375, |
|
"logits/rejected": 5154.09716796875, |
|
"logps/chosen": -314.29571533203125, |
|
"logps/rejected": -284.4984130859375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.001805333187803626, |
|
"rewards/margins": 0.0019232326885685325, |
|
"rewards/rejected": -0.00011789942800533026, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 15.141940304638405, |
|
"learning_rate": 1.3089005235602092e-07, |
|
"logits/chosen": 5869.23583984375, |
|
"logits/rejected": 5015.390625, |
|
"logps/chosen": -278.0210876464844, |
|
"logps/rejected": -260.28076171875, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0023084317799657583, |
|
"rewards/margins": 0.0028667484875768423, |
|
"rewards/rejected": -0.0005583164747804403, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 17.08217856519977, |
|
"learning_rate": 1.5706806282722514e-07, |
|
"logits/chosen": 5986.6494140625, |
|
"logits/rejected": 4455.423828125, |
|
"logps/chosen": -321.358154296875, |
|
"logps/rejected": -236.8417205810547, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0058924416080117226, |
|
"rewards/margins": 0.0071367500349879265, |
|
"rewards/rejected": -0.0012443081941455603, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 16.204216557148193, |
|
"learning_rate": 1.8324607329842932e-07, |
|
"logits/chosen": 5881.57421875, |
|
"logits/rejected": 5116.564453125, |
|
"logps/chosen": -285.27740478515625, |
|
"logps/rejected": -259.5113830566406, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.011950762942433357, |
|
"rewards/margins": 0.011513126082718372, |
|
"rewards/rejected": 0.0004376379365567118, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 15.596476937669543, |
|
"learning_rate": 2.094240837696335e-07, |
|
"logits/chosen": 5791.3642578125, |
|
"logits/rejected": 4847.74462890625, |
|
"logps/chosen": -272.760009765625, |
|
"logps/rejected": -241.96463012695312, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.029420843347907066, |
|
"rewards/margins": 0.02470467798411846, |
|
"rewards/rejected": 0.004716166295111179, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 16.155654611877022, |
|
"learning_rate": 2.356020942408377e-07, |
|
"logits/chosen": 6104.376953125, |
|
"logits/rejected": 5388.201171875, |
|
"logps/chosen": -293.25665283203125, |
|
"logps/rejected": -278.4584655761719, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.03633292764425278, |
|
"rewards/margins": 0.020106201991438866, |
|
"rewards/rejected": 0.01622672937810421, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 16.685655525961554, |
|
"learning_rate": 2.6178010471204185e-07, |
|
"logits/chosen": 5451.1865234375, |
|
"logits/rejected": 4855.86181640625, |
|
"logps/chosen": -246.4558563232422, |
|
"logps/rejected": -211.8059844970703, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.020030105486512184, |
|
"rewards/margins": 0.014237035997211933, |
|
"rewards/rejected": 0.005793069489300251, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 17.053147091965794, |
|
"learning_rate": 2.879581151832461e-07, |
|
"logits/chosen": 4958.31884765625, |
|
"logits/rejected": 3970.31396484375, |
|
"logps/chosen": -246.61898803710938, |
|
"logps/rejected": -188.33499145507812, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.029025157913565636, |
|
"rewards/margins": 0.057013750076293945, |
|
"rewards/rejected": -0.02798858843743801, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 16.545038194152365, |
|
"learning_rate": 3.1413612565445027e-07, |
|
"logits/chosen": 6173.68212890625, |
|
"logits/rejected": 5564.80078125, |
|
"logps/chosen": -292.63348388671875, |
|
"logps/rejected": -283.7936706542969, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.014459408819675446, |
|
"rewards/margins": 0.08033261448144913, |
|
"rewards/rejected": -0.06587319076061249, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 19.676877695748942, |
|
"learning_rate": 3.4031413612565446e-07, |
|
"logits/chosen": 6213.53125, |
|
"logits/rejected": 4406.7197265625, |
|
"logps/chosen": -277.0280456542969, |
|
"logps/rejected": -229.03775024414062, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.013297341763973236, |
|
"rewards/margins": 0.1205199584364891, |
|
"rewards/rejected": -0.10722261667251587, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 19.250986468179725, |
|
"learning_rate": 3.6649214659685864e-07, |
|
"logits/chosen": 5914.55908203125, |
|
"logits/rejected": 5749.5546875, |
|
"logps/chosen": -303.387939453125, |
|
"logps/rejected": -319.0635681152344, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.20868118107318878, |
|
"rewards/margins": 0.06486045569181442, |
|
"rewards/rejected": -0.2735416293144226, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 19.120241781934865, |
|
"learning_rate": 3.926701570680628e-07, |
|
"logits/chosen": 5598.470703125, |
|
"logits/rejected": 5063.0654296875, |
|
"logps/chosen": -288.4744873046875, |
|
"logps/rejected": -278.3064270019531, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03469898924231529, |
|
"rewards/margins": 0.1066732183098793, |
|
"rewards/rejected": -0.14137223362922668, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 20.879794925554606, |
|
"learning_rate": 4.18848167539267e-07, |
|
"logits/chosen": 5560.33642578125, |
|
"logits/rejected": 5010.7998046875, |
|
"logps/chosen": -242.72854614257812, |
|
"logps/rejected": -265.71160888671875, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.07217627763748169, |
|
"rewards/margins": 0.1330757886171341, |
|
"rewards/rejected": -0.060899507254362106, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 19.704247956213553, |
|
"learning_rate": 4.450261780104712e-07, |
|
"logits/chosen": 6847.02587890625, |
|
"logits/rejected": 5505.11083984375, |
|
"logps/chosen": -308.2012634277344, |
|
"logps/rejected": -309.3101806640625, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.14331135153770447, |
|
"rewards/margins": 0.27965664863586426, |
|
"rewards/rejected": -0.4229680001735687, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 24.516429995380633, |
|
"learning_rate": 4.712041884816754e-07, |
|
"logits/chosen": 6166.9541015625, |
|
"logits/rejected": 4428.91064453125, |
|
"logps/chosen": -315.04620361328125, |
|
"logps/rejected": -281.04083251953125, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.25546202063560486, |
|
"rewards/margins": 0.21370892226696014, |
|
"rewards/rejected": -0.4691709876060486, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 25.97707933799451, |
|
"learning_rate": 4.973821989528796e-07, |
|
"logits/chosen": 5838.8359375, |
|
"logits/rejected": 5683.42529296875, |
|
"logps/chosen": -275.9669189453125, |
|
"logps/rejected": -315.14813232421875, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12693677842617035, |
|
"rewards/margins": 0.17842599749565125, |
|
"rewards/rejected": -0.3053628206253052, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 21.5648227076861, |
|
"learning_rate": 4.999661831436498e-07, |
|
"logits/chosen": 5913.36572265625, |
|
"logits/rejected": 5817.02001953125, |
|
"logps/chosen": -281.7383117675781, |
|
"logps/rejected": -325.94866943359375, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1391076147556305, |
|
"rewards/margins": 0.27709800004959106, |
|
"rewards/rejected": -0.41620558500289917, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 23.653917732151342, |
|
"learning_rate": 4.998492971140339e-07, |
|
"logits/chosen": 5833.1513671875, |
|
"logits/rejected": 5763.98828125, |
|
"logps/chosen": -301.4639587402344, |
|
"logps/rejected": -368.21435546875, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3761170506477356, |
|
"rewards/margins": 0.3480328917503357, |
|
"rewards/rejected": -0.7241500020027161, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 24.40546569275766, |
|
"learning_rate": 4.996489634487865e-07, |
|
"logits/chosen": 5946.09765625, |
|
"logits/rejected": 5071.6171875, |
|
"logps/chosen": -338.2851257324219, |
|
"logps/rejected": -332.00750732421875, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4733448028564453, |
|
"rewards/margins": 0.3421005308628082, |
|
"rewards/rejected": -0.8154453039169312, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 27.21646553864263, |
|
"learning_rate": 4.993652490577246e-07, |
|
"logits/chosen": 6565.8515625, |
|
"logits/rejected": 5242.6064453125, |
|
"logps/chosen": -319.81707763671875, |
|
"logps/rejected": -330.72802734375, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.38077861070632935, |
|
"rewards/margins": 0.39619022607803345, |
|
"rewards/rejected": -0.7769688367843628, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 26.32441631739712, |
|
"learning_rate": 4.9899824869915e-07, |
|
"logits/chosen": 5868.26904296875, |
|
"logits/rejected": 4399.78662109375, |
|
"logps/chosen": -337.031982421875, |
|
"logps/rejected": -297.947998046875, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5988413095474243, |
|
"rewards/margins": 0.3144153952598572, |
|
"rewards/rejected": -0.9132567644119263, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 44.90708876251853, |
|
"learning_rate": 4.985480849482012e-07, |
|
"logits/chosen": 5798.130859375, |
|
"logits/rejected": 5872.59912109375, |
|
"logps/chosen": -307.9162902832031, |
|
"logps/rejected": -349.7005920410156, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.46432504057884216, |
|
"rewards/margins": 0.2551492154598236, |
|
"rewards/rejected": -0.7194742560386658, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 45.19986115391165, |
|
"learning_rate": 4.980149081559142e-07, |
|
"logits/chosen": 6476.58447265625, |
|
"logits/rejected": 6131.4462890625, |
|
"logps/chosen": -367.92474365234375, |
|
"logps/rejected": -391.8291320800781, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5577724575996399, |
|
"rewards/margins": 0.386624276638031, |
|
"rewards/rejected": -0.9443964958190918, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 42.08733545483534, |
|
"learning_rate": 4.973988963990065e-07, |
|
"logits/chosen": 5284.0224609375, |
|
"logits/rejected": 4501.1884765625, |
|
"logps/chosen": -320.0960693359375, |
|
"logps/rejected": -377.3937683105469, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6243572235107422, |
|
"rewards/margins": 0.6635113954544067, |
|
"rewards/rejected": -1.2878687381744385, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 53.662262542495505, |
|
"learning_rate": 4.967002554204008e-07, |
|
"logits/chosen": 5689.02197265625, |
|
"logits/rejected": 4741.4453125, |
|
"logps/chosen": -367.5455627441406, |
|
"logps/rejected": -406.5661315917969, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8560550808906555, |
|
"rewards/margins": 0.7749707698822021, |
|
"rewards/rejected": -1.6310256719589233, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 46.63789766427997, |
|
"learning_rate": 4.959192185605087e-07, |
|
"logits/chosen": 5927.48388671875, |
|
"logits/rejected": 5238.05615234375, |
|
"logps/chosen": -354.1465148925781, |
|
"logps/rejected": -415.89349365234375, |
|
"loss": 0.5585, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7841471433639526, |
|
"rewards/margins": 0.5948286056518555, |
|
"rewards/rejected": -1.3789756298065186, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 34.308974438258886, |
|
"learning_rate": 4.950560466792969e-07, |
|
"logits/chosen": 6596.2265625, |
|
"logits/rejected": 5299.0927734375, |
|
"logps/chosen": -406.15313720703125, |
|
"logps/rejected": -429.3497009277344, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7677477598190308, |
|
"rewards/margins": 0.6641772985458374, |
|
"rewards/rejected": -1.4319250583648682, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 82.23748697014982, |
|
"learning_rate": 4.941110280691619e-07, |
|
"logits/chosen": 5986.08203125, |
|
"logits/rejected": 4740.41259765625, |
|
"logps/chosen": -357.654052734375, |
|
"logps/rejected": -365.6220703125, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7639473676681519, |
|
"rewards/margins": 0.6894143223762512, |
|
"rewards/rejected": -1.4533617496490479, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 41.19448913938724, |
|
"learning_rate": 4.930844783586424e-07, |
|
"logits/chosen": 5201.2353515625, |
|
"logits/rejected": 4921.05322265625, |
|
"logps/chosen": -310.82574462890625, |
|
"logps/rejected": -375.40509033203125, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9284757375717163, |
|
"rewards/margins": 0.5470661520957947, |
|
"rewards/rejected": -1.4755420684814453, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 76.38056074864738, |
|
"learning_rate": 4.919767404070033e-07, |
|
"logits/chosen": 6316.92236328125, |
|
"logits/rejected": 5181.3857421875, |
|
"logps/chosen": -405.858154296875, |
|
"logps/rejected": -420.537109375, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1969449520111084, |
|
"rewards/margins": 0.556014895439148, |
|
"rewards/rejected": -1.752959966659546, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 67.01683401046546, |
|
"learning_rate": 4.907881841897216e-07, |
|
"logits/chosen": 5539.5302734375, |
|
"logits/rejected": 5639.63037109375, |
|
"logps/chosen": -424.65478515625, |
|
"logps/rejected": -517.6135864257812, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5640079975128174, |
|
"rewards/margins": 0.5679855942726135, |
|
"rewards/rejected": -2.1319937705993652, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 31.85827136868456, |
|
"learning_rate": 4.895192066749189e-07, |
|
"logits/chosen": 5924.69580078125, |
|
"logits/rejected": 4566.55419921875, |
|
"logps/chosen": -421.01739501953125, |
|
"logps/rejected": -438.85858154296875, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4220540523529053, |
|
"rewards/margins": 0.5200406312942505, |
|
"rewards/rejected": -1.9420945644378662, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 41.33731178691857, |
|
"learning_rate": 4.881702316907768e-07, |
|
"logits/chosen": 6177.900390625, |
|
"logits/rejected": 4649.4853515625, |
|
"logps/chosen": -359.7803039550781, |
|
"logps/rejected": -367.48541259765625, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.821795642375946, |
|
"rewards/margins": 0.5971574783325195, |
|
"rewards/rejected": -1.4189531803131104, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 49.52794113034413, |
|
"learning_rate": 4.86741709783982e-07, |
|
"logits/chosen": 5590.2451171875, |
|
"logits/rejected": 4720.5322265625, |
|
"logps/chosen": -358.56243896484375, |
|
"logps/rejected": -439.32843017578125, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0032289028167725, |
|
"rewards/margins": 0.9636434316635132, |
|
"rewards/rejected": -1.9668724536895752, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 44.092881923343576, |
|
"learning_rate": 4.85234118069247e-07, |
|
"logits/chosen": 6412.9873046875, |
|
"logits/rejected": 5594.14306640625, |
|
"logps/chosen": -396.65447998046875, |
|
"logps/rejected": -428.21490478515625, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0084364414215088, |
|
"rewards/margins": 0.5967626571655273, |
|
"rewards/rejected": -1.6051992177963257, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 76.52233652264678, |
|
"learning_rate": 4.836479600699578e-07, |
|
"logits/chosen": 5924.59326171875, |
|
"logits/rejected": 5504.5029296875, |
|
"logps/chosen": -342.6595153808594, |
|
"logps/rejected": -414.57427978515625, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7394064664840698, |
|
"rewards/margins": 0.5539022088050842, |
|
"rewards/rejected": -1.2933086156845093, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 47.20888121465801, |
|
"learning_rate": 4.819837655500013e-07, |
|
"logits/chosen": 6445.34130859375, |
|
"logits/rejected": 6306.50390625, |
|
"logps/chosen": -414.34515380859375, |
|
"logps/rejected": -472.36212158203125, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1763949394226074, |
|
"rewards/margins": 0.5224038362503052, |
|
"rewards/rejected": -1.6987988948822021, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 39.87824487927702, |
|
"learning_rate": 4.802420903368285e-07, |
|
"logits/chosen": 5955.009765625, |
|
"logits/rejected": 4885.7529296875, |
|
"logps/chosen": -395.8122863769531, |
|
"logps/rejected": -484.23565673828125, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4695124626159668, |
|
"rewards/margins": 0.973471462726593, |
|
"rewards/rejected": -2.442983865737915, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 53.89952582024282, |
|
"learning_rate": 4.784235161358123e-07, |
|
"logits/chosen": 6697.92822265625, |
|
"logits/rejected": 5091.77685546875, |
|
"logps/chosen": -452.1192321777344, |
|
"logps/rejected": -489.41015625, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5608818531036377, |
|
"rewards/margins": 0.6900812983512878, |
|
"rewards/rejected": -2.2509632110595703, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 53.64934742868112, |
|
"learning_rate": 4.7652865033596314e-07, |
|
"logits/chosen": 6347.36865234375, |
|
"logits/rejected": 5186.87109375, |
|
"logps/chosen": -429.01214599609375, |
|
"logps/rejected": -489.399169921875, |
|
"loss": 0.5164, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5157774686813354, |
|
"rewards/margins": 0.6435315608978271, |
|
"rewards/rejected": -2.159308910369873, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 43.316512493038786, |
|
"learning_rate": 4.7455812580706534e-07, |
|
"logits/chosen": 5819.4365234375, |
|
"logits/rejected": 4712.92431640625, |
|
"logps/chosen": -383.89447021484375, |
|
"logps/rejected": -428.0326232910156, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0773056745529175, |
|
"rewards/margins": 0.6253499388694763, |
|
"rewards/rejected": -1.702655553817749, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 40.32463549649978, |
|
"learning_rate": 4.725126006883046e-07, |
|
"logits/chosen": 5460.0400390625, |
|
"logits/rejected": 5187.6435546875, |
|
"logps/chosen": -382.9438781738281, |
|
"logps/rejected": -463.376220703125, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2239887714385986, |
|
"rewards/margins": 0.6800339818000793, |
|
"rewards/rejected": -1.9040225744247437, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 37.48349919523277, |
|
"learning_rate": 4.703927581684539e-07, |
|
"logits/chosen": 5890.5068359375, |
|
"logits/rejected": 5778.6552734375, |
|
"logps/chosen": -375.30609130859375, |
|
"logps/rejected": -398.3433532714844, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0234365463256836, |
|
"rewards/margins": 0.4840970039367676, |
|
"rewards/rejected": -1.5075336694717407, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 33.46265791395517, |
|
"learning_rate": 4.68199306257695e-07, |
|
"logits/chosen": 5615.3662109375, |
|
"logits/rejected": 4484.6279296875, |
|
"logps/chosen": -362.4558410644531, |
|
"logps/rejected": -425.9368591308594, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9484370350837708, |
|
"rewards/margins": 0.7808512449264526, |
|
"rewards/rejected": -1.729288101196289, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 41.07649079601629, |
|
"learning_rate": 4.6593297755114776e-07, |
|
"logits/chosen": 6466.3056640625, |
|
"logits/rejected": 6035.984375, |
|
"logps/chosen": -378.1504821777344, |
|
"logps/rejected": -465.003173828125, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1055399179458618, |
|
"rewards/margins": 0.6051799654960632, |
|
"rewards/rejected": -1.7107200622558594, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 37.586937993939735, |
|
"learning_rate": 4.635945289841902e-07, |
|
"logits/chosen": 4972.3583984375, |
|
"logits/rejected": 5045.6435546875, |
|
"logps/chosen": -335.74884033203125, |
|
"logps/rejected": -420.6666564941406, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1341283321380615, |
|
"rewards/margins": 0.4123230576515198, |
|
"rewards/rejected": -1.546451210975647, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 42.61584498258183, |
|
"learning_rate": 4.611847415796476e-07, |
|
"logits/chosen": 6352.6376953125, |
|
"logits/rejected": 5433.37158203125, |
|
"logps/chosen": -395.6455383300781, |
|
"logps/rejected": -416.5750427246094, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0464991331100464, |
|
"rewards/margins": 0.5715607404708862, |
|
"rewards/rejected": -1.6180601119995117, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 31.96372069934404, |
|
"learning_rate": 4.5870442018693773e-07, |
|
"logits/chosen": 5973.494140625, |
|
"logits/rejected": 5411.9462890625, |
|
"logps/chosen": -370.54351806640625, |
|
"logps/rejected": -440.241943359375, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9235299825668335, |
|
"rewards/margins": 0.6839796900749207, |
|
"rewards/rejected": -1.6075098514556885, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 54.78695276780696, |
|
"learning_rate": 4.5615439321325735e-07, |
|
"logits/chosen": 6326.53125, |
|
"logits/rejected": 5008.32275390625, |
|
"logps/chosen": -359.27716064453125, |
|
"logps/rejected": -423.32672119140625, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7673004269599915, |
|
"rewards/margins": 0.7084370255470276, |
|
"rewards/rejected": -1.475737452507019, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 41.821437903417056, |
|
"learning_rate": 4.535355123469008e-07, |
|
"logits/chosen": 5782.46484375, |
|
"logits/rejected": 5206.86962890625, |
|
"logps/chosen": -348.8133850097656, |
|
"logps/rejected": -427.0050354003906, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8036266565322876, |
|
"rewards/margins": 0.8553822636604309, |
|
"rewards/rejected": -1.6590089797973633, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 36.104040296185744, |
|
"learning_rate": 4.5084865227280366e-07, |
|
"logits/chosen": 5758.5625, |
|
"logits/rejected": 5162.15185546875, |
|
"logps/chosen": -382.82147216796875, |
|
"logps/rejected": -439.8946838378906, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0022233724594116, |
|
"rewards/margins": 0.8220928311347961, |
|
"rewards/rejected": -1.8243162631988525, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 73.05209001650977, |
|
"learning_rate": 4.4809471038040437e-07, |
|
"logits/chosen": 5572.75537109375, |
|
"logits/rejected": 4392.76708984375, |
|
"logps/chosen": -439.218994140625, |
|
"logps/rejected": -457.9751892089844, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.466726303100586, |
|
"rewards/margins": 0.7607309222221375, |
|
"rewards/rejected": -2.227457284927368, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 48.9029845725239, |
|
"learning_rate": 4.4527460646392386e-07, |
|
"logits/chosen": 5651.72216796875, |
|
"logits/rejected": 5173.35986328125, |
|
"logps/chosen": -379.19842529296875, |
|
"logps/rejected": -442.13751220703125, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3517531156539917, |
|
"rewards/margins": 0.562275230884552, |
|
"rewards/rejected": -1.9140284061431885, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 43.677593505995624, |
|
"learning_rate": 4.4238928241516163e-07, |
|
"logits/chosen": 6816.3515625, |
|
"logits/rejected": 5143.58349609375, |
|
"logps/chosen": -437.1297912597656, |
|
"logps/rejected": -473.25128173828125, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2189630270004272, |
|
"rewards/margins": 0.9794257879257202, |
|
"rewards/rejected": -2.1983885765075684, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 63.11423854936817, |
|
"learning_rate": 4.394397019089116e-07, |
|
"logits/chosen": 6103.3896484375, |
|
"logits/rejected": 4841.986328125, |
|
"logps/chosen": -409.55291748046875, |
|
"logps/rejected": -423.4261779785156, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1972742080688477, |
|
"rewards/margins": 0.6304734945297241, |
|
"rewards/rejected": -1.8277477025985718, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 54.584236713891464, |
|
"learning_rate": 4.3642685008110246e-07, |
|
"logits/chosen": 5786.09765625, |
|
"logits/rejected": 4412.03515625, |
|
"logps/chosen": -372.55584716796875, |
|
"logps/rejected": -439.2442321777344, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.1865366697311401, |
|
"rewards/margins": 0.9408473968505859, |
|
"rewards/rejected": -2.1273841857910156, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 40.053790563245464, |
|
"learning_rate": 4.333517331997704e-07, |
|
"logits/chosen": 6298.62158203125, |
|
"logits/rejected": 5869.5048828125, |
|
"logps/chosen": -428.19195556640625, |
|
"logps/rejected": -478.00067138671875, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3069322109222412, |
|
"rewards/margins": 0.6191404461860657, |
|
"rewards/rejected": -1.9260727167129517, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 35.0814337208229, |
|
"learning_rate": 4.302153783289736e-07, |
|
"logits/chosen": 6017.439453125, |
|
"logits/rejected": 5107.21435546875, |
|
"logps/chosen": -382.84521484375, |
|
"logps/rejected": -509.23162841796875, |
|
"loss": 0.4236, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1522138118743896, |
|
"rewards/margins": 1.0982835292816162, |
|
"rewards/rejected": -2.250497341156006, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 56.2222562736252, |
|
"learning_rate": 4.2701883298576124e-07, |
|
"logits/chosen": 5797.8349609375, |
|
"logits/rejected": 5281.35791015625, |
|
"logps/chosen": -443.8690490722656, |
|
"logps/rejected": -503.81103515625, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6236203908920288, |
|
"rewards/margins": 0.9502062797546387, |
|
"rewards/rejected": -2.573826313018799, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 55.63889247197654, |
|
"learning_rate": 4.237631647903115e-07, |
|
"logits/chosen": 5690.2646484375, |
|
"logits/rejected": 4674.8740234375, |
|
"logps/chosen": -455.040283203125, |
|
"logps/rejected": -506.5398864746094, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7885202169418335, |
|
"rewards/margins": 0.806254506111145, |
|
"rewards/rejected": -2.5947747230529785, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 50.31232935422226, |
|
"learning_rate": 4.204494611093548e-07, |
|
"logits/chosen": 6034.45556640625, |
|
"logits/rejected": 4252.986328125, |
|
"logps/chosen": -460.38092041015625, |
|
"logps/rejected": -486.3749084472656, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4732345342636108, |
|
"rewards/margins": 0.9121103286743164, |
|
"rewards/rejected": -2.385344982147217, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 45.5866926508609, |
|
"learning_rate": 4.1707882869300235e-07, |
|
"logits/chosen": 6080.8759765625, |
|
"logits/rejected": 4943.0146484375, |
|
"logps/chosen": -413.87408447265625, |
|
"logps/rejected": -431.1224670410156, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.335010051727295, |
|
"rewards/margins": 0.7433810830116272, |
|
"rewards/rejected": -2.0783913135528564, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 49.967926368130314, |
|
"learning_rate": 4.136523933051005e-07, |
|
"logits/chosen": 6260.0546875, |
|
"logits/rejected": 5515.7265625, |
|
"logps/chosen": -427.33453369140625, |
|
"logps/rejected": -463.00830078125, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4806843996047974, |
|
"rewards/margins": 0.5847845673561096, |
|
"rewards/rejected": -2.0654690265655518, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 43.43487493207468, |
|
"learning_rate": 4.101712993472348e-07, |
|
"logits/chosen": 6464.7451171875, |
|
"logits/rejected": 5535.1884765625, |
|
"logps/chosen": -382.25323486328125, |
|
"logps/rejected": -416.734130859375, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.05479097366333, |
|
"rewards/margins": 0.7567101716995239, |
|
"rewards/rejected": -1.811500906944275, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 75.11106325290936, |
|
"learning_rate": 4.066367094765091e-07, |
|
"logits/chosen": 6027.20458984375, |
|
"logits/rejected": 4826.53515625, |
|
"logps/chosen": -394.6514587402344, |
|
"logps/rejected": -457.4222717285156, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0180187225341797, |
|
"rewards/margins": 1.0836880207061768, |
|
"rewards/rejected": -2.1017067432403564, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 39.3035282380294, |
|
"learning_rate": 4.0304980421722766e-07, |
|
"logits/chosen": 5874.466796875, |
|
"logits/rejected": 5295.6796875, |
|
"logps/chosen": -425.7220153808594, |
|
"logps/rejected": -494.6651916503906, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3429622650146484, |
|
"rewards/margins": 0.8915923833847046, |
|
"rewards/rejected": -2.2345547676086426, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 72.63738048545449, |
|
"learning_rate": 3.994117815666095e-07, |
|
"logits/chosen": 5882.6201171875, |
|
"logits/rejected": 4352.89453125, |
|
"logps/chosen": -540.1171875, |
|
"logps/rejected": -573.46533203125, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0739383697509766, |
|
"rewards/margins": 1.0526468753814697, |
|
"rewards/rejected": -3.1265854835510254, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 43.821611769673716, |
|
"learning_rate": 3.957238565946671e-07, |
|
"logits/chosen": 5647.4677734375, |
|
"logits/rejected": 4672.6025390625, |
|
"logps/chosen": -402.78948974609375, |
|
"logps/rejected": -439.42181396484375, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4725525379180908, |
|
"rewards/margins": 0.6136714816093445, |
|
"rewards/rejected": -2.08622407913208, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 42.031950023528594, |
|
"learning_rate": 3.9198726103838306e-07, |
|
"logits/chosen": 5673.10546875, |
|
"logits/rejected": 5009.50537109375, |
|
"logps/chosen": -369.00616455078125, |
|
"logps/rejected": -408.2512512207031, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9950849413871765, |
|
"rewards/margins": 0.7163550853729248, |
|
"rewards/rejected": -1.711439847946167, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 40.901024619350444, |
|
"learning_rate": 3.8820324289031946e-07, |
|
"logits/chosen": 5839.904296875, |
|
"logits/rejected": 5013.7724609375, |
|
"logps/chosen": -351.48541259765625, |
|
"logps/rejected": -451.85772705078125, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.088209629058838, |
|
"rewards/margins": 1.003163456916809, |
|
"rewards/rejected": -2.0913729667663574, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 67.27072060484619, |
|
"learning_rate": 3.84373065981799e-07, |
|
"logits/chosen": 6519.392578125, |
|
"logits/rejected": 4812.6298828125, |
|
"logps/chosen": -426.72235107421875, |
|
"logps/rejected": -512.9215087890625, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.334121584892273, |
|
"rewards/margins": 1.1064695119857788, |
|
"rewards/rejected": -2.440591335296631, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 59.86556321970916, |
|
"learning_rate": 3.8049800956079545e-07, |
|
"logits/chosen": 6076.96533203125, |
|
"logits/rejected": 5167.3095703125, |
|
"logps/chosen": -461.48333740234375, |
|
"logps/rejected": -533.3235473632812, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7070420980453491, |
|
"rewards/margins": 1.1307730674743652, |
|
"rewards/rejected": -2.837815046310425, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 33.685203730626526, |
|
"learning_rate": 3.7657936786467525e-07, |
|
"logits/chosen": 5342.8798828125, |
|
"logits/rejected": 4421.5263671875, |
|
"logps/chosen": -402.7789001464844, |
|
"logps/rejected": -472.66015625, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5052649974822998, |
|
"rewards/margins": 0.917253851890564, |
|
"rewards/rejected": -2.422518491744995, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 40.729237557670544, |
|
"learning_rate": 3.7261844968793226e-07, |
|
"logits/chosen": 4545.2060546875, |
|
"logits/rejected": 4567.5732421875, |
|
"logps/chosen": -330.99951171875, |
|
"logps/rejected": -459.3143615722656, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1079671382904053, |
|
"rewards/margins": 0.9936937093734741, |
|
"rewards/rejected": -2.101661205291748, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 41.80861205828446, |
|
"learning_rate": 3.6861657794506187e-07, |
|
"logits/chosen": 5142.6376953125, |
|
"logits/rejected": 4762.04296875, |
|
"logps/chosen": -388.6526794433594, |
|
"logps/rejected": -440.11773681640625, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.457234263420105, |
|
"rewards/margins": 0.5115066766738892, |
|
"rewards/rejected": -1.968740701675415, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 33.75516962062128, |
|
"learning_rate": 3.6457508922871777e-07, |
|
"logits/chosen": 6393.16162109375, |
|
"logits/rejected": 4704.26171875, |
|
"logps/chosen": -405.71917724609375, |
|
"logps/rejected": -469.50787353515625, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3708717823028564, |
|
"rewards/margins": 0.9878827929496765, |
|
"rewards/rejected": -2.3587546348571777, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 69.41446168689265, |
|
"learning_rate": 3.6049533336330084e-07, |
|
"logits/chosen": 6274.9033203125, |
|
"logits/rejected": 4973.19140625, |
|
"logps/chosen": -432.57891845703125, |
|
"logps/rejected": -494.46240234375, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4693964719772339, |
|
"rewards/margins": 1.0210431814193726, |
|
"rewards/rejected": -2.4904398918151855, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 62.54198407820259, |
|
"learning_rate": 3.56378672954129e-07, |
|
"logits/chosen": 6452.43505859375, |
|
"logits/rejected": 4535.1796875, |
|
"logps/chosen": -467.36920166015625, |
|
"logps/rejected": -505.26416015625, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5940972566604614, |
|
"rewards/margins": 1.0991283655166626, |
|
"rewards/rejected": -2.693225383758545, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 60.6478894617012, |
|
"learning_rate": 3.5222648293233803e-07, |
|
"logits/chosen": 6424.5205078125, |
|
"logits/rejected": 5873.54150390625, |
|
"logps/chosen": -459.4623107910156, |
|
"logps/rejected": -547.5186767578125, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7614972591400146, |
|
"rewards/margins": 0.9000027775764465, |
|
"rewards/rejected": -2.6614999771118164, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 45.06218597777103, |
|
"learning_rate": 3.480401500956657e-07, |
|
"logits/chosen": 5537.083984375, |
|
"logits/rejected": 4656.86279296875, |
|
"logps/chosen": -401.616943359375, |
|
"logps/rejected": -469.45294189453125, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5688083171844482, |
|
"rewards/margins": 0.580168604850769, |
|
"rewards/rejected": -2.1489768028259277, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 54.52932769604021, |
|
"learning_rate": 3.438210726452724e-07, |
|
"logits/chosen": 6457.12548828125, |
|
"logits/rejected": 5661.3583984375, |
|
"logps/chosen": -436.4351501464844, |
|
"logps/rejected": -478.7416076660156, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2537381649017334, |
|
"rewards/margins": 0.7841897010803223, |
|
"rewards/rejected": -2.0379281044006348, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 50.065290886996, |
|
"learning_rate": 3.395706597187538e-07, |
|
"logits/chosen": 4831.3525390625, |
|
"logits/rejected": 4748.2353515625, |
|
"logps/chosen": -361.46270751953125, |
|
"logps/rejected": -433.6537170410156, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3145052194595337, |
|
"rewards/margins": 0.74875807762146, |
|
"rewards/rejected": -2.063263416290283, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 52.3633732314044, |
|
"learning_rate": 3.3529033091949986e-07, |
|
"logits/chosen": 5875.1552734375, |
|
"logits/rejected": 5419.3779296875, |
|
"logps/chosen": -449.91156005859375, |
|
"logps/rejected": -558.637451171875, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4811006784439087, |
|
"rewards/margins": 1.0674123764038086, |
|
"rewards/rejected": -2.548513174057007, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 73.44300921439607, |
|
"learning_rate": 3.309815158425591e-07, |
|
"logits/chosen": 5725.45703125, |
|
"logits/rejected": 5392.0048828125, |
|
"logps/chosen": -429.4095153808594, |
|
"logps/rejected": -524.0256958007812, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4120854139328003, |
|
"rewards/margins": 1.0350888967514038, |
|
"rewards/rejected": -2.447174549102783, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 52.13481785442569, |
|
"learning_rate": 3.2664565359716536e-07, |
|
"logits/chosen": 5756.67041015625, |
|
"logits/rejected": 4672.0849609375, |
|
"logps/chosen": -459.2340393066406, |
|
"logps/rejected": -534.3760375976562, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9478585720062256, |
|
"rewards/margins": 1.086380124092102, |
|
"rewards/rejected": -3.034238576889038, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 67.63237467819759, |
|
"learning_rate": 3.222841923260869e-07, |
|
"logits/chosen": 5340.1484375, |
|
"logits/rejected": 4598.82177734375, |
|
"logps/chosen": -484.25640869140625, |
|
"logps/rejected": -572.4221801757812, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.19539213180542, |
|
"rewards/margins": 1.0763537883758545, |
|
"rewards/rejected": -3.2717461585998535, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 83.86420244213592, |
|
"learning_rate": 3.1789858872195887e-07, |
|
"logits/chosen": 6498.91650390625, |
|
"logits/rejected": 5262.67919921875, |
|
"logps/chosen": -523.2308349609375, |
|
"logps/rejected": -602.5567626953125, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.1959385871887207, |
|
"rewards/margins": 1.0925599336624146, |
|
"rewards/rejected": -3.288498640060425, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 36.7937222415298, |
|
"learning_rate": 3.1349030754075937e-07, |
|
"logits/chosen": 5431.06005859375, |
|
"logits/rejected": 4285.5322265625, |
|
"logps/chosen": -431.21502685546875, |
|
"logps/rejected": -537.7532958984375, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7794075012207031, |
|
"rewards/margins": 1.3082810640335083, |
|
"rewards/rejected": -3.087688446044922, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 55.18035438094623, |
|
"learning_rate": 3.090608211125931e-07, |
|
"logits/chosen": 5392.5185546875, |
|
"logits/rejected": 4608.42236328125, |
|
"logps/chosen": -412.7171325683594, |
|
"logps/rejected": -508.71490478515625, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6127049922943115, |
|
"rewards/margins": 1.1365652084350586, |
|
"rewards/rejected": -2.749270439147949, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 44.58108503513362, |
|
"learning_rate": 3.0461160884994487e-07, |
|
"logits/chosen": 5840.9501953125, |
|
"logits/rejected": 5145.94580078125, |
|
"logps/chosen": -455.33843994140625, |
|
"logps/rejected": -515.6210327148438, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8489354848861694, |
|
"rewards/margins": 0.7887415885925293, |
|
"rewards/rejected": -2.637676954269409, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 57.73411435833942, |
|
"learning_rate": 3.001441567535681e-07, |
|
"logits/chosen": 6431.67626953125, |
|
"logits/rejected": 5249.001953125, |
|
"logps/chosen": -440.92095947265625, |
|
"logps/rejected": -529.418701171875, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4907338619232178, |
|
"rewards/margins": 1.0362895727157593, |
|
"rewards/rejected": -2.5270237922668457, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 59.18958027082939, |
|
"learning_rate": 2.956599569161724e-07, |
|
"logits/chosen": 5414.20458984375, |
|
"logits/rejected": 4187.3544921875, |
|
"logps/chosen": -389.37335205078125, |
|
"logps/rejected": -450.3434143066406, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5387694835662842, |
|
"rewards/margins": 0.6591954231262207, |
|
"rewards/rejected": -2.197964906692505, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 42.563050302084065, |
|
"learning_rate": 2.91160507024077e-07, |
|
"logits/chosen": 5768.6162109375, |
|
"logits/rejected": 4807.8056640625, |
|
"logps/chosen": -413.04205322265625, |
|
"logps/rejected": -478.17559814453125, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.485231637954712, |
|
"rewards/margins": 0.8964517712593079, |
|
"rewards/rejected": -2.381683826446533, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 33.46251118490837, |
|
"learning_rate": 2.866473098569953e-07, |
|
"logits/chosen": 5825.630859375, |
|
"logits/rejected": 4860.0419921875, |
|
"logps/chosen": -423.99835205078125, |
|
"logps/rejected": -493.2699279785156, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3664392232894897, |
|
"rewards/margins": 0.9449175596237183, |
|
"rewards/rejected": -2.311356782913208, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 32.803451972147634, |
|
"learning_rate": 2.8212187278611905e-07, |
|
"logits/chosen": 5577.02197265625, |
|
"logits/rejected": 4832.1171875, |
|
"logps/chosen": -447.1991271972656, |
|
"logps/rejected": -527.7049560546875, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5957579612731934, |
|
"rewards/margins": 1.033022165298462, |
|
"rewards/rejected": -2.628779888153076, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 64.00615935239229, |
|
"learning_rate": 2.775857072706684e-07, |
|
"logits/chosen": 6070.87353515625, |
|
"logits/rejected": 4420.8466796875, |
|
"logps/chosen": -438.16644287109375, |
|
"logps/rejected": -482.08465576171875, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4822652339935303, |
|
"rewards/margins": 1.0824673175811768, |
|
"rewards/rejected": -2.564732551574707, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 46.86520969500558, |
|
"learning_rate": 2.7304032835307667e-07, |
|
"logits/chosen": 6216.6162109375, |
|
"logits/rejected": 5469.23974609375, |
|
"logps/chosen": -451.27020263671875, |
|
"logps/rejected": -555.1627807617188, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7239328622817993, |
|
"rewards/margins": 0.8704110383987427, |
|
"rewards/rejected": -2.594343662261963, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 49.63995578440868, |
|
"learning_rate": 2.6848725415297884e-07, |
|
"logits/chosen": 6084.1416015625, |
|
"logits/rejected": 5248.6669921875, |
|
"logps/chosen": -470.7705078125, |
|
"logps/rejected": -499.703857421875, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.663577675819397, |
|
"rewards/margins": 0.8588649034500122, |
|
"rewards/rejected": -2.522442579269409, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 64.97333091332597, |
|
"learning_rate": 2.6392800536017183e-07, |
|
"logits/chosen": 5355.8505859375, |
|
"logits/rejected": 5051.5439453125, |
|
"logps/chosen": -488.87176513671875, |
|
"logps/rejected": -567.3258666992188, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0089640617370605, |
|
"rewards/margins": 0.9219423532485962, |
|
"rewards/rejected": -2.930906295776367, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 46.67126991156967, |
|
"learning_rate": 2.59364104726716e-07, |
|
"logits/chosen": 5887.8046875, |
|
"logits/rejected": 5121.62890625, |
|
"logps/chosen": -468.025146484375, |
|
"logps/rejected": -593.8919677734375, |
|
"loss": 0.4498, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.7689498662948608, |
|
"rewards/margins": 1.3238210678100586, |
|
"rewards/rejected": -3.092771053314209, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 61.29392397902382, |
|
"learning_rate": 2.547970765583491e-07, |
|
"logits/chosen": 5582.82763671875, |
|
"logits/rejected": 4876.9638671875, |
|
"logps/chosen": -430.79541015625, |
|
"logps/rejected": -515.9193115234375, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.700372338294983, |
|
"rewards/margins": 1.078627347946167, |
|
"rewards/rejected": -2.7789998054504395, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 41.39413522028797, |
|
"learning_rate": 2.502284462053799e-07, |
|
"logits/chosen": 6156.40283203125, |
|
"logits/rejected": 5941.8779296875, |
|
"logps/chosen": -476.8907165527344, |
|
"logps/rejected": -558.0145874023438, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9446933269500732, |
|
"rewards/margins": 0.9444707632064819, |
|
"rewards/rejected": -2.8891639709472656, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 69.99679860346889, |
|
"learning_rate": 2.4565973955323374e-07, |
|
"logits/chosen": 5784.0166015625, |
|
"logits/rejected": 4964.3076171875, |
|
"logps/chosen": -465.17950439453125, |
|
"logps/rejected": -525.5794067382812, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.7338411808013916, |
|
"rewards/margins": 1.0747594833374023, |
|
"rewards/rejected": -2.808600902557373, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 39.31688734230333, |
|
"learning_rate": 2.410924825128195e-07, |
|
"logits/chosen": 5454.869140625, |
|
"logits/rejected": 5118.14306640625, |
|
"logps/chosen": -430.4056701660156, |
|
"logps/rejected": -529.0426025390625, |
|
"loss": 0.4646, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6206849813461304, |
|
"rewards/margins": 0.8924548029899597, |
|
"rewards/rejected": -2.5131397247314453, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 52.77189181501683, |
|
"learning_rate": 2.365282005108875e-07, |
|
"logits/chosen": 5776.9716796875, |
|
"logits/rejected": 4836.4609375, |
|
"logps/chosen": -423.0970153808594, |
|
"logps/rejected": -519.367431640625, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6719558238983154, |
|
"rewards/margins": 1.021319031715393, |
|
"rewards/rejected": -2.693274974822998, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 70.82459551115117, |
|
"learning_rate": 2.319684179805491e-07, |
|
"logits/chosen": 5663.40283203125, |
|
"logits/rejected": 4413.01171875, |
|
"logps/chosen": -462.0267028808594, |
|
"logps/rejected": -538.9208984375, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7139892578125, |
|
"rewards/margins": 1.3090190887451172, |
|
"rewards/rejected": -3.0230085849761963, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 33.8748559668582, |
|
"learning_rate": 2.2741465785212902e-07, |
|
"logits/chosen": 5301.47216796875, |
|
"logits/rejected": 3999.432373046875, |
|
"logps/chosen": -420.2606506347656, |
|
"logps/rejected": -517.6099243164062, |
|
"loss": 0.416, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6415036916732788, |
|
"rewards/margins": 1.3193124532699585, |
|
"rewards/rejected": -2.9608161449432373, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 50.83769917179278, |
|
"learning_rate": 2.2286844104451843e-07, |
|
"logits/chosen": 5784.0478515625, |
|
"logits/rejected": 5007.18017578125, |
|
"logps/chosen": -490.86505126953125, |
|
"logps/rejected": -576.6304931640625, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.056772470474243, |
|
"rewards/margins": 1.0176784992218018, |
|
"rewards/rejected": -3.074450969696045, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 52.001448501596336, |
|
"learning_rate": 2.183312859572008e-07, |
|
"logits/chosen": 6639.57177734375, |
|
"logits/rejected": 5511.9033203125, |
|
"logps/chosen": -482.6524963378906, |
|
"logps/rejected": -556.4099731445312, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9167563915252686, |
|
"rewards/margins": 1.0493156909942627, |
|
"rewards/rejected": -2.9660720825195312, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 44.25761412679498, |
|
"learning_rate": 2.138047079631184e-07, |
|
"logits/chosen": 5394.453125, |
|
"logits/rejected": 5371.2919921875, |
|
"logps/chosen": -488.195068359375, |
|
"logps/rejected": -600.7262573242188, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.340132474899292, |
|
"rewards/margins": 0.8797481656074524, |
|
"rewards/rejected": -3.2198805809020996, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 55.393528463173325, |
|
"learning_rate": 2.0929021890255068e-07, |
|
"logits/chosen": 6330.7919921875, |
|
"logits/rejected": 5427.1728515625, |
|
"logps/chosen": -502.2682189941406, |
|
"logps/rejected": -618.1027221679688, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.034196615219116, |
|
"rewards/margins": 1.0336921215057373, |
|
"rewards/rejected": -3.0678887367248535, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 50.46681050763751, |
|
"learning_rate": 2.0478932657817102e-07, |
|
"logits/chosen": 5141.21923828125, |
|
"logits/rejected": 4884.60009765625, |
|
"logps/chosen": -445.36236572265625, |
|
"logps/rejected": -531.31787109375, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.968653678894043, |
|
"rewards/margins": 0.8347317576408386, |
|
"rewards/rejected": -2.8033852577209473, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 70.3269151760612, |
|
"learning_rate": 2.0030353425145374e-07, |
|
"logits/chosen": 7235.20947265625, |
|
"logits/rejected": 6419.9287109375, |
|
"logps/chosen": -583.9832153320312, |
|
"logps/rejected": -640.3153076171875, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1977429389953613, |
|
"rewards/margins": 0.6988611221313477, |
|
"rewards/rejected": -2.896604061126709, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 42.0515600415498, |
|
"learning_rate": 1.9583434014059635e-07, |
|
"logits/chosen": 5870.0048828125, |
|
"logits/rejected": 4960.2783203125, |
|
"logps/chosen": -460.8169860839844, |
|
"logps/rejected": -575.4650268554688, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8832632303237915, |
|
"rewards/margins": 1.0969445705413818, |
|
"rewards/rejected": -2.9802074432373047, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 44.17752357905191, |
|
"learning_rate": 1.9138323692012733e-07, |
|
"logits/chosen": 5152.05322265625, |
|
"logits/rejected": 4995.10302734375, |
|
"logps/chosen": -465.43109130859375, |
|
"logps/rejected": -523.4608154296875, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.928765058517456, |
|
"rewards/margins": 0.7397549748420715, |
|
"rewards/rejected": -2.668519973754883, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 61.75617287900426, |
|
"learning_rate": 1.8695171122236442e-07, |
|
"logits/chosen": 5305.31787109375, |
|
"logits/rejected": 5259.71630859375, |
|
"logps/chosen": -420.59771728515625, |
|
"logps/rejected": -538.1131591796875, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6423835754394531, |
|
"rewards/margins": 0.9299103021621704, |
|
"rewards/rejected": -2.572293996810913, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 70.3147665430388, |
|
"learning_rate": 1.8254124314089223e-07, |
|
"logits/chosen": 5743.0556640625, |
|
"logits/rejected": 5161.66015625, |
|
"logps/chosen": -456.74395751953125, |
|
"logps/rejected": -543.2876586914062, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.72748601436615, |
|
"rewards/margins": 1.018004059791565, |
|
"rewards/rejected": -2.745490074157715, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 55.2814901627422, |
|
"learning_rate": 1.7815330573622205e-07, |
|
"logits/chosen": 5943.31103515625, |
|
"logits/rejected": 5791.52685546875, |
|
"logps/chosen": -441.3788146972656, |
|
"logps/rejected": -568.0220336914062, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6020936965942383, |
|
"rewards/margins": 1.0032509565353394, |
|
"rewards/rejected": -2.605344533920288, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 36.81078378897197, |
|
"learning_rate": 1.7378936454380274e-07, |
|
"logits/chosen": 5846.7255859375, |
|
"logits/rejected": 4917.35595703125, |
|
"logps/chosen": -435.710693359375, |
|
"logps/rejected": -514.1156616210938, |
|
"loss": 0.4601, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7096188068389893, |
|
"rewards/margins": 1.0015608072280884, |
|
"rewards/rejected": -2.711179494857788, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 67.37657075563799, |
|
"learning_rate": 1.694508770845427e-07, |
|
"logits/chosen": 6779.4072265625, |
|
"logits/rejected": 5683.87646484375, |
|
"logps/chosen": -540.6749267578125, |
|
"logps/rejected": -585.6129760742188, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0458195209503174, |
|
"rewards/margins": 0.9217261075973511, |
|
"rewards/rejected": -2.967545747756958, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 56.03321414275303, |
|
"learning_rate": 1.651392923780105e-07, |
|
"logits/chosen": 6311.9423828125, |
|
"logits/rejected": 5025.9326171875, |
|
"logps/chosen": -482.51629638671875, |
|
"logps/rejected": -529.3140869140625, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.012092113494873, |
|
"rewards/margins": 0.8922163248062134, |
|
"rewards/rejected": -2.904308557510376, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 51.240934983951576, |
|
"learning_rate": 1.6085605045847367e-07, |
|
"logits/chosen": 5766.875, |
|
"logits/rejected": 4679.556640625, |
|
"logps/chosen": -484.06036376953125, |
|
"logps/rejected": -574.91943359375, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.115018367767334, |
|
"rewards/margins": 0.8909432291984558, |
|
"rewards/rejected": -3.0059614181518555, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 44.83155429296669, |
|
"learning_rate": 1.5660258189393944e-07, |
|
"logits/chosen": 6001.896484375, |
|
"logits/rejected": 4623.4814453125, |
|
"logps/chosen": -481.5863342285156, |
|
"logps/rejected": -554.1494140625, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8362839221954346, |
|
"rewards/margins": 1.0993396043777466, |
|
"rewards/rejected": -2.9356234073638916, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 58.86551962180224, |
|
"learning_rate": 1.5238030730835577e-07, |
|
"logits/chosen": 5332.787109375, |
|
"logits/rejected": 5467.3818359375, |
|
"logps/chosen": -398.0010986328125, |
|
"logps/rejected": -541.9666748046875, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5968772172927856, |
|
"rewards/margins": 1.336118221282959, |
|
"rewards/rejected": -2.932995319366455, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 53.78027682064507, |
|
"learning_rate": 1.4819063690713564e-07, |
|
"logits/chosen": 6006.59130859375, |
|
"logits/rejected": 4786.06982421875, |
|
"logps/chosen": -449.716796875, |
|
"logps/rejected": -553.7860107421875, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7109276056289673, |
|
"rewards/margins": 1.27021062374115, |
|
"rewards/rejected": -2.981138229370117, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 69.08546288730311, |
|
"learning_rate": 1.4403497000615883e-07, |
|
"logits/chosen": 5749.35546875, |
|
"logits/rejected": 5006.19580078125, |
|
"logps/chosen": -513.867431640625, |
|
"logps/rejected": -558.72509765625, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0234084129333496, |
|
"rewards/margins": 1.0361706018447876, |
|
"rewards/rejected": -3.0595791339874268, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 94.32166607912914, |
|
"learning_rate": 1.3991469456441272e-07, |
|
"logits/chosen": 5560.58642578125, |
|
"logits/rejected": 5246.12646484375, |
|
"logps/chosen": -432.19964599609375, |
|
"logps/rejected": -541.0390625, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.547202706336975, |
|
"rewards/margins": 1.0479974746704102, |
|
"rewards/rejected": -2.5952000617980957, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 38.56424538056888, |
|
"learning_rate": 1.358311867204244e-07, |
|
"logits/chosen": 4675.93896484375, |
|
"logits/rejected": 4583.65625, |
|
"logps/chosen": -363.8262634277344, |
|
"logps/rejected": -492.0935974121094, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.402822732925415, |
|
"rewards/margins": 1.1607930660247803, |
|
"rewards/rejected": -2.5636157989501953, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 70.48363118369693, |
|
"learning_rate": 1.3178581033264216e-07, |
|
"logits/chosen": 6256.1904296875, |
|
"logits/rejected": 5279.4775390625, |
|
"logps/chosen": -481.8662109375, |
|
"logps/rejected": -563.3885498046875, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8727912902832031, |
|
"rewards/margins": 0.8880994915962219, |
|
"rewards/rejected": -2.7608909606933594, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 53.61690545001972, |
|
"learning_rate": 1.2777991652391757e-07, |
|
"logits/chosen": 5354.2919921875, |
|
"logits/rejected": 3956.951904296875, |
|
"logps/chosen": -457.558837890625, |
|
"logps/rejected": -519.9451904296875, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7837330102920532, |
|
"rewards/margins": 1.186250925064087, |
|
"rewards/rejected": -2.9699840545654297, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 85.94134232920602, |
|
"learning_rate": 1.2381484323024178e-07, |
|
"logits/chosen": 6099.58154296875, |
|
"logits/rejected": 5222.3310546875, |
|
"logps/chosen": -465.1321716308594, |
|
"logps/rejected": -540.4118041992188, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8389440774917603, |
|
"rewards/margins": 1.0195863246917725, |
|
"rewards/rejected": -2.8585305213928223, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 59.890970835357095, |
|
"learning_rate": 1.1989191475388516e-07, |
|
"logits/chosen": 5064.47119140625, |
|
"logits/rejected": 4606.1064453125, |
|
"logps/chosen": -396.0245056152344, |
|
"logps/rejected": -529.9171752929688, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6857776641845703, |
|
"rewards/margins": 1.1661508083343506, |
|
"rewards/rejected": -2.851928234100342, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 55.025653930575544, |
|
"learning_rate": 1.1601244132109179e-07, |
|
"logits/chosen": 5044.70556640625, |
|
"logits/rejected": 4524.1494140625, |
|
"logps/chosen": -439.0829162597656, |
|
"logps/rejected": -536.5711669921875, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9668521881103516, |
|
"rewards/margins": 0.9730531573295593, |
|
"rewards/rejected": -2.9399051666259766, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 50.42441473566833, |
|
"learning_rate": 1.1217771864447395e-07, |
|
"logits/chosen": 5791.28662109375, |
|
"logits/rejected": 4876.34228515625, |
|
"logps/chosen": -454.65106201171875, |
|
"logps/rejected": -576.1729736328125, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.693499207496643, |
|
"rewards/margins": 1.1748238801956177, |
|
"rewards/rejected": -2.8683230876922607, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 45.59860401020204, |
|
"learning_rate": 1.0838902749025499e-07, |
|
"logits/chosen": 7000.99462890625, |
|
"logits/rejected": 5573.9833984375, |
|
"logps/chosen": -491.8744201660156, |
|
"logps/rejected": -530.9385986328125, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6647865772247314, |
|
"rewards/margins": 0.8948407173156738, |
|
"rewards/rejected": -2.559627056121826, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 68.88855255766244, |
|
"learning_rate": 1.0464763325050358e-07, |
|
"logits/chosen": 5260.3330078125, |
|
"logits/rejected": 4669.32958984375, |
|
"logps/chosen": -447.2159118652344, |
|
"logps/rejected": -515.1805419921875, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6555016040802002, |
|
"rewards/margins": 0.9187766909599304, |
|
"rewards/rejected": -2.5742781162261963, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 53.849978458276446, |
|
"learning_rate": 1.0095478552050346e-07, |
|
"logits/chosen": 6265.92041015625, |
|
"logits/rejected": 4139.224609375, |
|
"logps/chosen": -451.6341857910156, |
|
"logps/rejected": -501.037353515625, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4033886194229126, |
|
"rewards/margins": 1.0941402912139893, |
|
"rewards/rejected": -2.4975287914276123, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 71.49005742239994, |
|
"learning_rate": 9.731171768139806e-08, |
|
"logits/chosen": 5820.0, |
|
"logits/rejected": 4671.0771484375, |
|
"logps/chosen": -401.1160583496094, |
|
"logps/rejected": -485.373291015625, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3254698514938354, |
|
"rewards/margins": 1.1134282350540161, |
|
"rewards/rejected": -2.4388980865478516, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 61.38384481626538, |
|
"learning_rate": 9.37196464882522e-08, |
|
"logits/chosen": 5571.05078125, |
|
"logits/rejected": 5003.7001953125, |
|
"logps/chosen": -405.25677490234375, |
|
"logps/rejected": -501.10931396484375, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5185030698776245, |
|
"rewards/margins": 0.9954677820205688, |
|
"rewards/rejected": -2.5139708518981934, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 56.09557615458594, |
|
"learning_rate": 9.017977166366444e-08, |
|
"logits/chosen": 5765.5498046875, |
|
"logits/rejected": 5031.99169921875, |
|
"logps/chosen": -432.7581481933594, |
|
"logps/rejected": -531.5502319335938, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4822924137115479, |
|
"rewards/margins": 1.0369850397109985, |
|
"rewards/rejected": -2.519277572631836, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 43.93135285453503, |
|
"learning_rate": 8.669327549707095e-08, |
|
"logits/chosen": 5835.9599609375, |
|
"logits/rejected": 4902.2099609375, |
|
"logps/chosen": -467.08721923828125, |
|
"logps/rejected": -531.9814453125, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.62697434425354, |
|
"rewards/margins": 1.0709320306777954, |
|
"rewards/rejected": -2.697906494140625, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 45.45268260539036, |
|
"learning_rate": 8.326132244986931e-08, |
|
"logits/chosen": 5231.73681640625, |
|
"logits/rejected": 4425.86572265625, |
|
"logps/chosen": -425.63995361328125, |
|
"logps/rejected": -521.69140625, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6223514080047607, |
|
"rewards/margins": 1.2424169778823853, |
|
"rewards/rejected": -2.8647682666778564, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 44.10137407870052, |
|
"learning_rate": 7.988505876649862e-08, |
|
"logits/chosen": 5436.15673828125, |
|
"logits/rejected": 4060.813232421875, |
|
"logps/chosen": -442.54400634765625, |
|
"logps/rejected": -547.1506958007812, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7412408590316772, |
|
"rewards/margins": 1.1485346555709839, |
|
"rewards/rejected": -2.889775514602661, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 34.2003751407198, |
|
"learning_rate": 7.656561209160248e-08, |
|
"logits/chosen": 5884.69921875, |
|
"logits/rejected": 4979.0634765625, |
|
"logps/chosen": -468.45892333984375, |
|
"logps/rejected": -524.2210693359375, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6417407989501953, |
|
"rewards/margins": 1.0903558731079102, |
|
"rewards/rejected": -2.7320969104766846, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 62.328404682337315, |
|
"learning_rate": 7.330409109340562e-08, |
|
"logits/chosen": 5976.05615234375, |
|
"logits/rejected": 5185.83984375, |
|
"logps/chosen": -475.17303466796875, |
|
"logps/rejected": -552.3870849609375, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5611035823822021, |
|
"rewards/margins": 1.1281805038452148, |
|
"rewards/rejected": -2.689283847808838, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 62.90207468441792, |
|
"learning_rate": 7.010158509342681e-08, |
|
"logits/chosen": 6559.21875, |
|
"logits/rejected": 4668.7568359375, |
|
"logps/chosen": -461.5740661621094, |
|
"logps/rejected": -515.0909423828125, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6185649633407593, |
|
"rewards/margins": 1.0846556425094604, |
|
"rewards/rejected": -2.703220844268799, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 52.65265815732916, |
|
"learning_rate": 6.695916370265527e-08, |
|
"logits/chosen": 5316.6923828125, |
|
"logits/rejected": 4581.3759765625, |
|
"logps/chosen": -423.22406005859375, |
|
"logps/rejected": -458.8834533691406, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6553497314453125, |
|
"rewards/margins": 0.7974721789360046, |
|
"rewards/rejected": -2.452821731567383, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 39.19744570522527, |
|
"learning_rate": 6.387787646430853e-08, |
|
"logits/chosen": 6557.60546875, |
|
"logits/rejected": 5875.27685546875, |
|
"logps/chosen": -476.264404296875, |
|
"logps/rejected": -544.9144897460938, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7729346752166748, |
|
"rewards/margins": 0.808856189250946, |
|
"rewards/rejected": -2.5817906856536865, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 53.35492473376416, |
|
"learning_rate": 6.0858752503294e-08, |
|
"logits/chosen": 5201.9482421875, |
|
"logits/rejected": 4884.1943359375, |
|
"logps/chosen": -451.56707763671875, |
|
"logps/rejected": -502.1494140625, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6913883686065674, |
|
"rewards/margins": 0.7468551397323608, |
|
"rewards/rejected": -2.438243865966797, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 37.5730139468933, |
|
"learning_rate": 5.7902800182489385e-08, |
|
"logits/chosen": 5386.0400390625, |
|
"logits/rejected": 5056.7646484375, |
|
"logps/chosen": -412.1158752441406, |
|
"logps/rejected": -504.4266052246094, |
|
"loss": 0.4652, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.577048659324646, |
|
"rewards/margins": 1.137432336807251, |
|
"rewards/rejected": -2.7144808769226074, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 47.25236502782169, |
|
"learning_rate": 5.5011006765957604e-08, |
|
"logits/chosen": 6559.1689453125, |
|
"logits/rejected": 5847.15869140625, |
|
"logps/chosen": -477.906005859375, |
|
"logps/rejected": -593.715087890625, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.720546007156372, |
|
"rewards/margins": 1.0101532936096191, |
|
"rewards/rejected": -2.7306995391845703, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 64.5306158701743, |
|
"learning_rate": 5.218433808920883e-08, |
|
"logits/chosen": 5732.14404296875, |
|
"logits/rejected": 5182.62109375, |
|
"logps/chosen": -454.5556640625, |
|
"logps/rejected": -543.537841796875, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6730997562408447, |
|
"rewards/margins": 0.9740368723869324, |
|
"rewards/rejected": -2.6471364498138428, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 48.449579077266975, |
|
"learning_rate": 4.942373823661927e-08, |
|
"logits/chosen": 6836.04052734375, |
|
"logits/rejected": 5074.99169921875, |
|
"logps/chosen": -489.0601501464844, |
|
"logps/rejected": -550.8364868164062, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6957050561904907, |
|
"rewards/margins": 1.1460716724395752, |
|
"rewards/rejected": -2.8417768478393555, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 47.436165301548996, |
|
"learning_rate": 4.6730129226114354e-08, |
|
"logits/chosen": 5166.6318359375, |
|
"logits/rejected": 4734.98779296875, |
|
"logps/chosen": -445.92669677734375, |
|
"logps/rejected": -492.08770751953125, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9005409479141235, |
|
"rewards/margins": 0.8562926054000854, |
|
"rewards/rejected": -2.756833553314209, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 46.42456774742332, |
|
"learning_rate": 4.41044107012227e-08, |
|
"logits/chosen": 6607.1123046875, |
|
"logits/rejected": 5159.0517578125, |
|
"logps/chosen": -491.15869140625, |
|
"logps/rejected": -544.934814453125, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5394710302352905, |
|
"rewards/margins": 1.0749366283416748, |
|
"rewards/rejected": -2.614407777786255, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 115.21349764331502, |
|
"learning_rate": 4.1547459630601966e-08, |
|
"logits/chosen": 5747.3759765625, |
|
"logits/rejected": 5143.2470703125, |
|
"logps/chosen": -473.9146423339844, |
|
"logps/rejected": -541.2026977539062, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8548256158828735, |
|
"rewards/margins": 0.8329262733459473, |
|
"rewards/rejected": -2.6877522468566895, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 46.00913012878008, |
|
"learning_rate": 3.9060130015138857e-08, |
|
"logits/chosen": 5326.37109375, |
|
"logits/rejected": 4686.98291015625, |
|
"logps/chosen": -470.4459533691406, |
|
"logps/rejected": -547.7535400390625, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8834571838378906, |
|
"rewards/margins": 1.0830243825912476, |
|
"rewards/rejected": -2.9664816856384277, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 35.690049353036706, |
|
"learning_rate": 3.664325260271953e-08, |
|
"logits/chosen": 6072.751953125, |
|
"logits/rejected": 5098.45068359375, |
|
"logps/chosen": -512.0150146484375, |
|
"logps/rejected": -555.8195190429688, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9424760341644287, |
|
"rewards/margins": 0.8288620114326477, |
|
"rewards/rejected": -2.7713379859924316, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 46.255307725799796, |
|
"learning_rate": 3.429763461076676e-08, |
|
"logits/chosen": 5927.7353515625, |
|
"logits/rejected": 5079.93212890625, |
|
"logps/chosen": -456.045166015625, |
|
"logps/rejected": -560.6665649414062, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6067283153533936, |
|
"rewards/margins": 1.0832823514938354, |
|
"rewards/rejected": -2.6900105476379395, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 46.35436279492669, |
|
"learning_rate": 3.202405945663555e-08, |
|
"logits/chosen": 5855.36962890625, |
|
"logits/rejected": 3933.013671875, |
|
"logps/chosen": -460.8177185058594, |
|
"logps/rejected": -482.4466247558594, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8878599405288696, |
|
"rewards/margins": 0.8325251340866089, |
|
"rewards/rejected": -2.7203853130340576, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 54.73002711133669, |
|
"learning_rate": 2.9823286495958556e-08, |
|
"logits/chosen": 4859.734375, |
|
"logits/rejected": 5366.44775390625, |
|
"logps/chosen": -439.7100524902344, |
|
"logps/rejected": -632.7385864257812, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9741220474243164, |
|
"rewards/margins": 0.9164485931396484, |
|
"rewards/rejected": -2.890570640563965, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 43.01757031434862, |
|
"learning_rate": 2.769605076902695e-08, |
|
"logits/chosen": 6194.2392578125, |
|
"logits/rejected": 5666.4248046875, |
|
"logps/chosen": -463.29083251953125, |
|
"logps/rejected": -570.9111328125, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7209036350250244, |
|
"rewards/margins": 0.9603285789489746, |
|
"rewards/rejected": -2.68123197555542, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 41.534018608207546, |
|
"learning_rate": 2.5643062755293403e-08, |
|
"logits/chosen": 5478.0517578125, |
|
"logits/rejected": 4659.22412109375, |
|
"logps/chosen": -460.326416015625, |
|
"logps/rejected": -499.4408264160156, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8074705600738525, |
|
"rewards/margins": 0.8185604214668274, |
|
"rewards/rejected": -2.626030921936035, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 60.86434476583357, |
|
"learning_rate": 2.366500813607733e-08, |
|
"logits/chosen": 6139.50146484375, |
|
"logits/rejected": 4718.34619140625, |
|
"logps/chosen": -445.7151794433594, |
|
"logps/rejected": -558.4569091796875, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6716018915176392, |
|
"rewards/margins": 1.3165209293365479, |
|
"rewards/rejected": -2.9881229400634766, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 75.4275871069272, |
|
"learning_rate": 2.176254756555329e-08, |
|
"logits/chosen": 6512.9912109375, |
|
"logits/rejected": 5728.6318359375, |
|
"logps/chosen": -498.4745178222656, |
|
"logps/rejected": -584.0284423828125, |
|
"loss": 0.4563, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7989261150360107, |
|
"rewards/margins": 1.1345270872116089, |
|
"rewards/rejected": -2.93345308303833, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 47.66108370078102, |
|
"learning_rate": 1.9936316450097468e-08, |
|
"logits/chosen": 5179.87646484375, |
|
"logits/rejected": 4646.3017578125, |
|
"logps/chosen": -436.65118408203125, |
|
"logps/rejected": -480.0382385253906, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8090522289276123, |
|
"rewards/margins": 0.7128003835678101, |
|
"rewards/rejected": -2.521852493286133, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 49.60045372554933, |
|
"learning_rate": 1.8186924736067477e-08, |
|
"logits/chosen": 5840.21240234375, |
|
"logits/rejected": 4393.1689453125, |
|
"logps/chosen": -455.3392639160156, |
|
"logps/rejected": -550.1962280273438, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6438064575195312, |
|
"rewards/margins": 1.2535064220428467, |
|
"rewards/rejected": -2.8973135948181152, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 43.86855453315871, |
|
"learning_rate": 1.651495670608488e-08, |
|
"logits/chosen": 6719.01708984375, |
|
"logits/rejected": 5168.751953125, |
|
"logps/chosen": -477.3172912597656, |
|
"logps/rejected": -558.49560546875, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7377887964248657, |
|
"rewards/margins": 1.2401338815689087, |
|
"rewards/rejected": -2.9779226779937744, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 42.225143416295225, |
|
"learning_rate": 1.4920970783889737e-08, |
|
"logits/chosen": 6293.6005859375, |
|
"logits/rejected": 4633.51806640625, |
|
"logps/chosen": -493.03509521484375, |
|
"logps/rejected": -573.3130493164062, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9286737442016602, |
|
"rewards/margins": 0.9963156580924988, |
|
"rewards/rejected": -2.9249894618988037, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 33.92052291437684, |
|
"learning_rate": 1.340549934783164e-08, |
|
"logits/chosen": 6018.5830078125, |
|
"logits/rejected": 5687.3076171875, |
|
"logps/chosen": -481.969970703125, |
|
"logps/rejected": -572.0182495117188, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8394489288330078, |
|
"rewards/margins": 0.924017608165741, |
|
"rewards/rejected": -2.7634665966033936, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 44.157007984318106, |
|
"learning_rate": 1.1969048553059608e-08, |
|
"logits/chosen": 5706.2099609375, |
|
"logits/rejected": 4860.96533203125, |
|
"logps/chosen": -412.661865234375, |
|
"logps/rejected": -496.70074462890625, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6415414810180664, |
|
"rewards/margins": 0.9479316473007202, |
|
"rewards/rejected": -2.589473009109497, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 54.00838584546709, |
|
"learning_rate": 1.06120981624703e-08, |
|
"logits/chosen": 5393.56005859375, |
|
"logits/rejected": 5689.6533203125, |
|
"logps/chosen": -444.2010192871094, |
|
"logps/rejected": -582.6871948242188, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7044061422348022, |
|
"rewards/margins": 1.1069849729537964, |
|
"rewards/rejected": -2.8113913536071777, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 61.132927463591344, |
|
"learning_rate": 9.335101386471284e-09, |
|
"logits/chosen": 6236.1591796875, |
|
"logits/rejected": 5493.9794921875, |
|
"logps/chosen": -478.6182556152344, |
|
"logps/rejected": -549.0506591796875, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8473739624023438, |
|
"rewards/margins": 0.9510253667831421, |
|
"rewards/rejected": -2.7983996868133545, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 63.02028903638583, |
|
"learning_rate": 8.138484731612273e-09, |
|
"logits/chosen": 5896.7861328125, |
|
"logits/rejected": 4876.11328125, |
|
"logps/chosen": -457.31500244140625, |
|
"logps/rejected": -578.9633178710938, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.785424828529358, |
|
"rewards/margins": 1.2331361770629883, |
|
"rewards/rejected": -3.0185611248016357, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 45.2465032405233, |
|
"learning_rate": 7.0226478581355e-09, |
|
"logits/chosen": 5961.98974609375, |
|
"logits/rejected": 5157.7978515625, |
|
"logps/chosen": -488.4525451660156, |
|
"logps/rejected": -565.0822143554688, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.037466287612915, |
|
"rewards/margins": 0.9263374209403992, |
|
"rewards/rejected": -2.963803768157959, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 60.617493519779835, |
|
"learning_rate": 5.987963446492383e-09, |
|
"logits/chosen": 6006.6962890625, |
|
"logits/rejected": 5307.9892578125, |
|
"logps/chosen": -438.90753173828125, |
|
"logps/rejected": -521.6585693359375, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6116359233856201, |
|
"rewards/margins": 1.0634849071502686, |
|
"rewards/rejected": -2.6751208305358887, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 59.87948034425728, |
|
"learning_rate": 5.0347770728713935e-09, |
|
"logits/chosen": 5996.91455078125, |
|
"logits/rejected": 4601.3720703125, |
|
"logps/chosen": -487.2227478027344, |
|
"logps/rejected": -516.2828979492188, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6710405349731445, |
|
"rewards/margins": 1.0333257913589478, |
|
"rewards/rejected": -2.7043662071228027, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 43.04367777321277, |
|
"learning_rate": 4.1634070937782424e-09, |
|
"logits/chosen": 5986.92822265625, |
|
"logits/rejected": 5393.41259765625, |
|
"logps/chosen": -493.161376953125, |
|
"logps/rejected": -621.3470458984375, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9648311138153076, |
|
"rewards/margins": 1.164574146270752, |
|
"rewards/rejected": -3.1294054985046387, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 47.169800736825145, |
|
"learning_rate": 3.3741445397075797e-09, |
|
"logits/chosen": 6257.791015625, |
|
"logits/rejected": 5275.78759765625, |
|
"logps/chosen": -488.2510681152344, |
|
"logps/rejected": -594.5584106445312, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.759234070777893, |
|
"rewards/margins": 1.2129390239715576, |
|
"rewards/rejected": -2.9721732139587402, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 68.54918801823914, |
|
"learning_rate": 2.667253017941018e-09, |
|
"logits/chosen": 6221.16015625, |
|
"logits/rejected": 4841.1064453125, |
|
"logps/chosen": -486.86309814453125, |
|
"logps/rejected": -553.6325073242188, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8351036310195923, |
|
"rewards/margins": 0.9869117736816406, |
|
"rewards/rejected": -2.8220152854919434, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 81.5491614635087, |
|
"learning_rate": 2.0429686245045097e-09, |
|
"logits/chosen": 6046.38037109375, |
|
"logits/rejected": 4651.4619140625, |
|
"logps/chosen": -524.8812255859375, |
|
"logps/rejected": -546.822265625, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8524370193481445, |
|
"rewards/margins": 0.9590626955032349, |
|
"rewards/rejected": -2.811499834060669, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 44.064739740774314, |
|
"learning_rate": 1.5014998653141708e-09, |
|
"logits/chosen": 5743.37060546875, |
|
"logits/rejected": 4843.56884765625, |
|
"logps/chosen": -482.69586181640625, |
|
"logps/rejected": -556.3638916015625, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.790981650352478, |
|
"rewards/margins": 1.2408983707427979, |
|
"rewards/rejected": -3.0318799018859863, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 42.50973909426173, |
|
"learning_rate": 1.0430275865371263e-09, |
|
"logits/chosen": 5961.98095703125, |
|
"logits/rejected": 4932.6357421875, |
|
"logps/chosen": -443.4388732910156, |
|
"logps/rejected": -550.4918212890625, |
|
"loss": 0.455, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.813359260559082, |
|
"rewards/margins": 1.199947476387024, |
|
"rewards/rejected": -3.0133066177368164, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 56.66273501138923, |
|
"learning_rate": 6.677049141901314e-10, |
|
"logits/chosen": 4880.40576171875, |
|
"logits/rejected": 4685.27197265625, |
|
"logps/chosen": -416.60089111328125, |
|
"logps/rejected": -539.0184936523438, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7454957962036133, |
|
"rewards/margins": 1.0450434684753418, |
|
"rewards/rejected": -2.790539264678955, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 38.32090090248521, |
|
"learning_rate": 3.7565720299687077e-10, |
|
"logits/chosen": 6260.2158203125, |
|
"logits/rejected": 5280.84912109375, |
|
"logps/chosen": -494.6107482910156, |
|
"logps/rejected": -552.1981201171875, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7900596857070923, |
|
"rewards/margins": 1.042823076248169, |
|
"rewards/rejected": -2.8328824043273926, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 64.27508021348248, |
|
"learning_rate": 1.6698199452053197e-10, |
|
"logits/chosen": 4530.2236328125, |
|
"logits/rejected": 4498.3388671875, |
|
"logps/chosen": -430.72576904296875, |
|
"logps/rejected": -518.1484985351562, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7727760076522827, |
|
"rewards/margins": 0.8958579301834106, |
|
"rewards/rejected": -2.6686339378356934, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 70.24208653050465, |
|
"learning_rate": 4.174898458556009e-11, |
|
"logits/chosen": 6094.0966796875, |
|
"logits/rejected": 4274.994140625, |
|
"logps/chosen": -462.11920166015625, |
|
"logps/rejected": -517.6158447265625, |
|
"loss": 0.4826, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8968086242675781, |
|
"rewards/margins": 0.9447473287582397, |
|
"rewards/rejected": -2.8415558338165283, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 90.73252809696429, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 6198.6337890625, |
|
"logits/rejected": 5036.7548828125, |
|
"logps/chosen": -491.354736328125, |
|
"logps/rejected": -579.1531982421875, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9158384799957275, |
|
"rewards/margins": 1.0406345129013062, |
|
"rewards/rejected": -2.956472873687744, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5203473493066758, |
|
"train_runtime": 16903.37, |
|
"train_samples_per_second": 3.617, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|