|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00026171159382360636, |
|
"grad_norm": 1.999703049659729, |
|
"learning_rate": 1.3054830287206266e-09, |
|
"logits/chosen": -2.9875593185424805, |
|
"logits/rejected": -2.936753749847412, |
|
"logps/chosen": -307.4898681640625, |
|
"logps/rejected": -392.088623046875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026171159382360636, |
|
"grad_norm": 1.9285504817962646, |
|
"learning_rate": 1.3054830287206264e-08, |
|
"logits/chosen": -2.8448944091796875, |
|
"logits/rejected": -2.83210825920105, |
|
"logps/chosen": -299.1453857421875, |
|
"logps/rejected": -260.9873352050781, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4930555522441864, |
|
"rewards/chosen": -0.00014580304559785873, |
|
"rewards/margins": 0.0003282717370893806, |
|
"rewards/rejected": -0.00047407473903149366, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 2.234384775161743, |
|
"learning_rate": 2.610966057441253e-08, |
|
"logits/chosen": -2.861093044281006, |
|
"logits/rejected": -2.826277732849121, |
|
"logps/chosen": -325.42889404296875, |
|
"logps/rejected": -252.72314453125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.00027085753390565515, |
|
"rewards/margins": 0.0006726925494149327, |
|
"rewards/rejected": -0.00040183504461310804, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007851347814708191, |
|
"grad_norm": 2.5200695991516113, |
|
"learning_rate": 3.91644908616188e-08, |
|
"logits/chosen": -2.8650269508361816, |
|
"logits/rejected": -2.839594841003418, |
|
"logps/chosen": -269.79888916015625, |
|
"logps/rejected": -268.51544189453125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0004993680049665272, |
|
"rewards/margins": 0.0007416309672407806, |
|
"rewards/rejected": -0.00024226296227425337, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 1.6392391920089722, |
|
"learning_rate": 5.221932114882506e-08, |
|
"logits/chosen": -2.8317809104919434, |
|
"logits/rejected": -2.8215935230255127, |
|
"logps/chosen": -233.3176727294922, |
|
"logps/rejected": -238.38671875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -3.822711369139142e-05, |
|
"rewards/margins": 0.000457162968814373, |
|
"rewards/rejected": -0.0004953901516273618, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01308557969118032, |
|
"grad_norm": 1.624583125114441, |
|
"learning_rate": 6.527415143603133e-08, |
|
"logits/chosen": -2.865053176879883, |
|
"logits/rejected": -2.852184295654297, |
|
"logps/chosen": -290.0357360839844, |
|
"logps/rejected": -253.96719360351562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00021998901502229273, |
|
"rewards/margins": 8.350692223757505e-05, |
|
"rewards/rejected": -0.00030349590815603733, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 1.7673835754394531, |
|
"learning_rate": 7.83289817232376e-08, |
|
"logits/chosen": -2.8233509063720703, |
|
"logits/rejected": -2.809717893600464, |
|
"logps/chosen": -273.7070617675781, |
|
"logps/rejected": -246.9080352783203, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.00012425810564309359, |
|
"rewards/margins": 6.1127066146582365e-06, |
|
"rewards/rejected": -0.00013037076860200614, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018319811567652448, |
|
"grad_norm": 1.7462002038955688, |
|
"learning_rate": 9.138381201044386e-08, |
|
"logits/chosen": -2.8822834491729736, |
|
"logits/rejected": -2.8470146656036377, |
|
"logps/chosen": -293.1849060058594, |
|
"logps/rejected": -266.12908935546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00014021484821569175, |
|
"rewards/margins": 4.102182720089331e-05, |
|
"rewards/rejected": -0.00018123674090020359, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 2.281116008758545, |
|
"learning_rate": 1.0443864229765012e-07, |
|
"logits/chosen": -2.820223331451416, |
|
"logits/rejected": -2.797712564468384, |
|
"logps/chosen": -279.3045959472656, |
|
"logps/rejected": -266.4049072265625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.00035889382706955075, |
|
"rewards/margins": -3.7797075492562726e-05, |
|
"rewards/rejected": -0.00032109676976688206, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.023554043444124574, |
|
"grad_norm": 1.8048748970031738, |
|
"learning_rate": 1.174934725848564e-07, |
|
"logits/chosen": -2.834364652633667, |
|
"logits/rejected": -2.821197032928467, |
|
"logps/chosen": -270.66107177734375, |
|
"logps/rejected": -251.8137664794922, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.5717377866385505e-05, |
|
"rewards/margins": 0.00027994689298793674, |
|
"rewards/rejected": -0.00030566431814804673, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 1.8376109600067139, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.8485753536224365, |
|
"logits/rejected": -2.8414525985717773, |
|
"logps/chosen": -267.0416259765625, |
|
"logps/rejected": -248.66622924804688, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.00016696630336809903, |
|
"rewards/margins": 0.0004185012076050043, |
|
"rewards/rejected": -0.0005854673800058663, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"eval_logits/chosen": -2.8661274909973145, |
|
"eval_logits/rejected": -2.8388071060180664, |
|
"eval_logps/chosen": -282.74957275390625, |
|
"eval_logps/rejected": -261.47882080078125, |
|
"eval_loss": 0.693004846572876, |
|
"eval_rewards/accuracies": 0.5249999761581421, |
|
"eval_rewards/chosen": -0.00011926326260436326, |
|
"eval_rewards/margins": 0.0002895805810112506, |
|
"eval_rewards/rejected": -0.00040884382906369865, |
|
"eval_runtime": 692.2735, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028788275320596704, |
|
"grad_norm": 2.015868663787842, |
|
"learning_rate": 1.4360313315926893e-07, |
|
"logits/chosen": -2.856309652328491, |
|
"logits/rejected": -2.823089361190796, |
|
"logps/chosen": -307.3843994140625, |
|
"logps/rejected": -257.291015625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.0002652711991686374, |
|
"rewards/margins": -0.00011966088641202077, |
|
"rewards/rejected": -0.00014561018906533718, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 1.7159242630004883, |
|
"learning_rate": 1.566579634464752e-07, |
|
"logits/chosen": -2.869659423828125, |
|
"logits/rejected": -2.8464877605438232, |
|
"logps/chosen": -310.60089111328125, |
|
"logps/rejected": -287.7904357910156, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0001522126840427518, |
|
"rewards/margins": 0.0004031356074847281, |
|
"rewards/rejected": -0.00025092283613048494, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03402250719706883, |
|
"grad_norm": 2.0958242416381836, |
|
"learning_rate": 1.6971279373368143e-07, |
|
"logits/chosen": -2.850337266921997, |
|
"logits/rejected": -2.8188374042510986, |
|
"logps/chosen": -271.6417236328125, |
|
"logps/rejected": -269.60174560546875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00013120910443831235, |
|
"rewards/margins": 0.0006835443200543523, |
|
"rewards/rejected": -0.0005523352883756161, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 1.8925613164901733, |
|
"learning_rate": 1.8276762402088773e-07, |
|
"logits/chosen": -2.8673295974731445, |
|
"logits/rejected": -2.8122167587280273, |
|
"logps/chosen": -291.46307373046875, |
|
"logps/rejected": -247.7669677734375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00036148293293081224, |
|
"rewards/margins": 0.0009279497899115086, |
|
"rewards/rejected": -0.0005664670607075095, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03925673907354096, |
|
"grad_norm": 1.9597433805465698, |
|
"learning_rate": 1.95822454308094e-07, |
|
"logits/chosen": -2.8569109439849854, |
|
"logits/rejected": -2.837003707885742, |
|
"logps/chosen": -298.9459228515625, |
|
"logps/rejected": -256.0478515625, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0006078753503970802, |
|
"rewards/margins": 0.0009616016177460551, |
|
"rewards/rejected": -0.00035372626734897494, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 1.913694977760315, |
|
"learning_rate": 2.0887728459530023e-07, |
|
"logits/chosen": -2.864971876144409, |
|
"logits/rejected": -2.8458945751190186, |
|
"logps/chosen": -275.124755859375, |
|
"logps/rejected": -275.0151062011719, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.00012734555639326572, |
|
"rewards/margins": 0.001163811655715108, |
|
"rewards/rejected": -0.0010364660993218422, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04449097095001309, |
|
"grad_norm": 2.1846537590026855, |
|
"learning_rate": 2.2193211488250652e-07, |
|
"logits/chosen": -2.822680950164795, |
|
"logits/rejected": -2.8042876720428467, |
|
"logps/chosen": -236.7074432373047, |
|
"logps/rejected": -238.3466339111328, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0003129563410766423, |
|
"rewards/margins": 0.0008108107140287757, |
|
"rewards/rejected": -0.0011237671133130789, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 1.6035895347595215, |
|
"learning_rate": 2.349869451697128e-07, |
|
"logits/chosen": -2.850816249847412, |
|
"logits/rejected": -2.823718309402466, |
|
"logps/chosen": -276.2500915527344, |
|
"logps/rejected": -259.9451904296875, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0001872165739769116, |
|
"rewards/margins": 0.0008747532265260816, |
|
"rewards/rejected": -0.001061969785951078, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04972520282648522, |
|
"grad_norm": 3.182461738586426, |
|
"learning_rate": 2.4804177545691903e-07, |
|
"logits/chosen": -2.8869190216064453, |
|
"logits/rejected": -2.8687491416931152, |
|
"logps/chosen": -290.9490661621094, |
|
"logps/rejected": -257.3797302246094, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0006612293072976172, |
|
"rewards/margins": 0.000992011046037078, |
|
"rewards/rejected": -0.00033078185515478253, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 1.8618322610855103, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.837772846221924, |
|
"logits/rejected": -2.8276214599609375, |
|
"logps/chosen": -267.96173095703125, |
|
"logps/rejected": -225.5831756591797, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0006539617897942662, |
|
"rewards/margins": 0.0017792375292629004, |
|
"rewards/rejected": -0.0011252757394686341, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"eval_logits/chosen": -2.8652713298797607, |
|
"eval_logits/rejected": -2.837984800338745, |
|
"eval_logps/chosen": -282.66241455078125, |
|
"eval_logps/rejected": -261.5315856933594, |
|
"eval_loss": 0.6923088431358337, |
|
"eval_rewards/accuracies": 0.6050000190734863, |
|
"eval_rewards/chosen": 0.0007522286614403129, |
|
"eval_rewards/margins": 0.001688659773208201, |
|
"eval_rewards/rejected": -0.0009364310535602272, |
|
"eval_runtime": 693.0899, |
|
"eval_samples_per_second": 2.886, |
|
"eval_steps_per_second": 0.361, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05495943470295734, |
|
"grad_norm": 1.7776113748550415, |
|
"learning_rate": 2.7415143603133156e-07, |
|
"logits/chosen": -2.8762500286102295, |
|
"logits/rejected": -2.8429489135742188, |
|
"logps/chosen": -275.98614501953125, |
|
"logps/rejected": -245.2783660888672, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0009210329735651612, |
|
"rewards/margins": 0.0018816586816683412, |
|
"rewards/rejected": -0.0009606255334801972, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 1.6921358108520508, |
|
"learning_rate": 2.8720626631853785e-07, |
|
"logits/chosen": -2.817211627960205, |
|
"logits/rejected": -2.811617851257324, |
|
"logps/chosen": -274.0498962402344, |
|
"logps/rejected": -242.93923950195312, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0014726849040016532, |
|
"rewards/margins": 0.0024847507011145353, |
|
"rewards/rejected": -0.0010120656806975603, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06019366657942947, |
|
"grad_norm": 2.0040206909179688, |
|
"learning_rate": 3.002610966057441e-07, |
|
"logits/chosen": -2.885439157485962, |
|
"logits/rejected": -2.86034893989563, |
|
"logps/chosen": -322.754150390625, |
|
"logps/rejected": -285.758056640625, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0018624020740389824, |
|
"rewards/margins": 0.0018660586792975664, |
|
"rewards/rejected": -3.6565586469805567e-06, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 1.809605360031128, |
|
"learning_rate": 3.133159268929504e-07, |
|
"logits/chosen": -2.8532462120056152, |
|
"logits/rejected": -2.8391811847686768, |
|
"logps/chosen": -312.47088623046875, |
|
"logps/rejected": -297.48907470703125, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0017323732608929276, |
|
"rewards/margins": 0.0021942437160760164, |
|
"rewards/rejected": -0.00046187033876776695, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06542789845590159, |
|
"grad_norm": 1.6686596870422363, |
|
"learning_rate": 3.263707571801567e-07, |
|
"logits/chosen": -2.814990282058716, |
|
"logits/rejected": -2.81905198097229, |
|
"logps/chosen": -277.08941650390625, |
|
"logps/rejected": -249.03414916992188, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.001977517269551754, |
|
"rewards/margins": 0.003367725061252713, |
|
"rewards/rejected": -0.0013902074424549937, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 1.5935229063034058, |
|
"learning_rate": 3.3942558746736286e-07, |
|
"logits/chosen": -2.8718338012695312, |
|
"logits/rejected": -2.8251404762268066, |
|
"logps/chosen": -297.3100280761719, |
|
"logps/rejected": -277.9830017089844, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0025989424902945757, |
|
"rewards/margins": 0.0032064050901681185, |
|
"rewards/rejected": -0.00060746242525056, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07066213033237373, |
|
"grad_norm": 1.4248483180999756, |
|
"learning_rate": 3.5248041775456916e-07, |
|
"logits/chosen": -2.8370590209960938, |
|
"logits/rejected": -2.8248658180236816, |
|
"logps/chosen": -281.2889709472656, |
|
"logps/rejected": -245.48855590820312, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.003083507064729929, |
|
"rewards/margins": 0.006150919944047928, |
|
"rewards/rejected": -0.003067413344979286, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 1.725456714630127, |
|
"learning_rate": 3.6553524804177545e-07, |
|
"logits/chosen": -2.8781139850616455, |
|
"logits/rejected": -2.8350632190704346, |
|
"logps/chosen": -276.51568603515625, |
|
"logps/rejected": -253.5542755126953, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0024674157612025738, |
|
"rewards/margins": 0.005187267437577248, |
|
"rewards/rejected": -0.00271985144354403, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07589636220884585, |
|
"grad_norm": 1.9681357145309448, |
|
"learning_rate": 3.785900783289817e-07, |
|
"logits/chosen": -2.849203586578369, |
|
"logits/rejected": -2.838613986968994, |
|
"logps/chosen": -304.06463623046875, |
|
"logps/rejected": -279.3326721191406, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0034332734066993, |
|
"rewards/margins": 0.0062034172005951405, |
|
"rewards/rejected": -0.0027701437938958406, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 2.0513315200805664, |
|
"learning_rate": 3.91644908616188e-07, |
|
"logits/chosen": -2.8060500621795654, |
|
"logits/rejected": -2.76236629486084, |
|
"logps/chosen": -266.20794677734375, |
|
"logps/rejected": -248.80886840820312, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.0026042419485747814, |
|
"rewards/margins": 0.006667142268270254, |
|
"rewards/rejected": -0.004062901251018047, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"eval_logits/chosen": -2.8622689247131348, |
|
"eval_logits/rejected": -2.834963321685791, |
|
"eval_logps/chosen": -282.39178466796875, |
|
"eval_logps/rejected": -261.6759948730469, |
|
"eval_loss": 0.6902644038200378, |
|
"eval_rewards/accuracies": 0.6639999747276306, |
|
"eval_rewards/chosen": 0.0034584649838507175, |
|
"eval_rewards/margins": 0.0058389026671648026, |
|
"eval_rewards/rejected": -0.0023804374504834414, |
|
"eval_runtime": 692.5367, |
|
"eval_samples_per_second": 2.888, |
|
"eval_steps_per_second": 0.361, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08113059408531798, |
|
"grad_norm": 2.1205692291259766, |
|
"learning_rate": 4.046997389033943e-07, |
|
"logits/chosen": -2.893097400665283, |
|
"logits/rejected": -2.87463641166687, |
|
"logps/chosen": -306.21636962890625, |
|
"logps/rejected": -250.2729949951172, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.004871034994721413, |
|
"rewards/margins": 0.008721152320504189, |
|
"rewards/rejected": -0.003850117791444063, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 1.7468680143356323, |
|
"learning_rate": 4.1775456919060046e-07, |
|
"logits/chosen": -2.873706817626953, |
|
"logits/rejected": -2.8421998023986816, |
|
"logps/chosen": -272.94659423828125, |
|
"logps/rejected": -255.0898895263672, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.00492675369605422, |
|
"rewards/margins": 0.005588999018073082, |
|
"rewards/rejected": -0.000662245147395879, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08636482596179011, |
|
"grad_norm": 1.7784926891326904, |
|
"learning_rate": 4.3080939947780675e-07, |
|
"logits/chosen": -2.8389968872070312, |
|
"logits/rejected": -2.8390631675720215, |
|
"logps/chosen": -277.24652099609375, |
|
"logps/rejected": -250.9720458984375, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.007157427724450827, |
|
"rewards/margins": 0.00795576348900795, |
|
"rewards/rejected": -0.0007983351242728531, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 2.0122432708740234, |
|
"learning_rate": 4.4386422976501305e-07, |
|
"logits/chosen": -2.868762254714966, |
|
"logits/rejected": -2.8562684059143066, |
|
"logps/chosen": -306.8142395019531, |
|
"logps/rejected": -284.90679931640625, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.00881933607161045, |
|
"rewards/margins": 0.009326713159680367, |
|
"rewards/rejected": -0.000507376913446933, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09159905783826224, |
|
"grad_norm": 1.7484519481658936, |
|
"learning_rate": 4.569190600522193e-07, |
|
"logits/chosen": -2.824993848800659, |
|
"logits/rejected": -2.797851085662842, |
|
"logps/chosen": -309.11224365234375, |
|
"logps/rejected": -296.3442687988281, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.009017640724778175, |
|
"rewards/margins": 0.00765979802235961, |
|
"rewards/rejected": 0.0013578429352492094, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 1.2647193670272827, |
|
"learning_rate": 4.699738903394256e-07, |
|
"logits/chosen": -2.8344480991363525, |
|
"logits/rejected": -2.816068649291992, |
|
"logps/chosen": -256.1959533691406, |
|
"logps/rejected": -236.88818359375, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.007074951194226742, |
|
"rewards/margins": 0.009867229498922825, |
|
"rewards/rejected": -0.0027922778390347958, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09683328971473436, |
|
"grad_norm": 2.0885772705078125, |
|
"learning_rate": 4.830287206266319e-07, |
|
"logits/chosen": -2.8475875854492188, |
|
"logits/rejected": -2.8186795711517334, |
|
"logps/chosen": -295.1861572265625, |
|
"logps/rejected": -251.5151824951172, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.010460047982633114, |
|
"rewards/margins": 0.015231410041451454, |
|
"rewards/rejected": -0.004771359730511904, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 1.8870456218719482, |
|
"learning_rate": 4.960835509138381e-07, |
|
"logits/chosen": -2.8488352298736572, |
|
"logits/rejected": -2.7997212409973145, |
|
"logps/chosen": -315.6346740722656, |
|
"logps/rejected": -279.5706481933594, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.011897383257746696, |
|
"rewards/margins": 0.012441580183804035, |
|
"rewards/rejected": -0.0005441965768113732, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1020675215912065, |
|
"grad_norm": 2.3549890518188477, |
|
"learning_rate": 4.999948856244767e-07, |
|
"logits/chosen": -2.8280773162841797, |
|
"logits/rejected": -2.8224241733551025, |
|
"logps/chosen": -297.057373046875, |
|
"logps/rejected": -278.00421142578125, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.01873602904379368, |
|
"rewards/margins": 0.01945691928267479, |
|
"rewards/rejected": -0.0007208908209577203, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 1.818867802619934, |
|
"learning_rate": 4.999698361256577e-07, |
|
"logits/chosen": -2.851010799407959, |
|
"logits/rejected": -2.8151259422302246, |
|
"logps/chosen": -279.1597900390625, |
|
"logps/rejected": -237.5978546142578, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.016593072563409805, |
|
"rewards/margins": 0.012265140190720558, |
|
"rewards/rejected": 0.004327933304011822, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -2.857703685760498, |
|
"eval_logits/rejected": -2.830756425857544, |
|
"eval_logps/chosen": -281.0899963378906, |
|
"eval_logps/rejected": -261.22564697265625, |
|
"eval_loss": 0.6861628293991089, |
|
"eval_rewards/accuracies": 0.6669999957084656, |
|
"eval_rewards/chosen": 0.01647624559700489, |
|
"eval_rewards/margins": 0.014353430829942226, |
|
"eval_rewards/rejected": 0.002122814767062664, |
|
"eval_runtime": 692.2781, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10730175346767862, |
|
"grad_norm": 1.9545940160751343, |
|
"learning_rate": 4.99923914217458e-07, |
|
"logits/chosen": -2.818399667739868, |
|
"logits/rejected": -2.802830457687378, |
|
"logps/chosen": -256.24957275390625, |
|
"logps/rejected": -256.09527587890625, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.013771469704806805, |
|
"rewards/margins": 0.008097216486930847, |
|
"rewards/rejected": 0.005674251355230808, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 4.077869415283203, |
|
"learning_rate": 4.99857123734344e-07, |
|
"logits/chosen": -2.8153655529022217, |
|
"logits/rejected": -2.769317865371704, |
|
"logps/chosen": -244.53890991210938, |
|
"logps/rejected": -238.0004119873047, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.015213017351925373, |
|
"rewards/margins": 0.015682024881243706, |
|
"rewards/rejected": -0.00046900735469534993, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11253598534415074, |
|
"grad_norm": 2.243114471435547, |
|
"learning_rate": 4.997694702533016e-07, |
|
"logits/chosen": -2.837740182876587, |
|
"logits/rejected": -2.806856870651245, |
|
"logps/chosen": -293.7519836425781, |
|
"logps/rejected": -272.25494384765625, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.022876007482409477, |
|
"rewards/margins": 0.019848225638270378, |
|
"rewards/rejected": 0.0030277802143245935, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 1.829640507698059, |
|
"learning_rate": 4.996609610933712e-07, |
|
"logits/chosen": -2.875370740890503, |
|
"logits/rejected": -2.8540024757385254, |
|
"logps/chosen": -285.1123962402344, |
|
"logps/rejected": -256.6170654296875, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02266586944460869, |
|
"rewards/margins": 0.020275097340345383, |
|
"rewards/rejected": 0.0023907723370939493, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11777021722062288, |
|
"grad_norm": 1.756147861480713, |
|
"learning_rate": 4.995316053150366e-07, |
|
"logits/chosen": -2.806842088699341, |
|
"logits/rejected": -2.8101210594177246, |
|
"logps/chosen": -288.1036376953125, |
|
"logps/rejected": -259.46014404296875, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.029574494808912277, |
|
"rewards/margins": 0.022273657843470573, |
|
"rewards/rejected": 0.007300837431102991, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 3.1120874881744385, |
|
"learning_rate": 4.99381413719468e-07, |
|
"logits/chosen": -2.825704574584961, |
|
"logits/rejected": -2.81204891204834, |
|
"logps/chosen": -279.86334228515625, |
|
"logps/rejected": -268.80755615234375, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.029285842552781105, |
|
"rewards/margins": 0.027944009751081467, |
|
"rewards/rejected": 0.0013418343150988221, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.123004449097095, |
|
"grad_norm": 1.9212427139282227, |
|
"learning_rate": 4.992103988476205e-07, |
|
"logits/chosen": -2.83656644821167, |
|
"logits/rejected": -2.810007333755493, |
|
"logps/chosen": -257.7132873535156, |
|
"logps/rejected": -245.3390655517578, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.024322878569364548, |
|
"rewards/margins": 0.020839061588048935, |
|
"rewards/rejected": 0.003483818843960762, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 2.0051708221435547, |
|
"learning_rate": 4.990185749791864e-07, |
|
"logits/chosen": -2.868682622909546, |
|
"logits/rejected": -2.836199998855591, |
|
"logps/chosen": -271.63922119140625, |
|
"logps/rejected": -274.00189208984375, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.027854889631271362, |
|
"rewards/margins": 0.0271223783493042, |
|
"rewards/rejected": 0.0007325109909288585, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12823868097356714, |
|
"grad_norm": 2.0355913639068604, |
|
"learning_rate": 4.988059581314039e-07, |
|
"logits/chosen": -2.8479950428009033, |
|
"logits/rejected": -2.8285024166107178, |
|
"logps/chosen": -305.7145690917969, |
|
"logps/rejected": -269.5832214355469, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.02704049088060856, |
|
"rewards/margins": 0.029708972200751305, |
|
"rewards/rejected": -0.002668480621650815, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 1.996235966682434, |
|
"learning_rate": 4.985725660577184e-07, |
|
"logits/chosen": -2.8617165088653564, |
|
"logits/rejected": -2.843017101287842, |
|
"logps/chosen": -288.36846923828125, |
|
"logps/rejected": -249.8210906982422, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.023136448115110397, |
|
"rewards/margins": 0.031000768765807152, |
|
"rewards/rejected": -0.007864321582019329, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"eval_logits/chosen": -2.848633289337158, |
|
"eval_logits/rejected": -2.8214972019195557, |
|
"eval_logps/chosen": -280.6480712890625, |
|
"eval_logps/rejected": -262.0230407714844, |
|
"eval_loss": 0.6803756356239319, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": 0.020895304158329964, |
|
"eval_rewards/margins": 0.026746317744255066, |
|
"eval_rewards/rejected": -0.005851015914231539, |
|
"eval_runtime": 691.0122, |
|
"eval_samples_per_second": 2.894, |
|
"eval_steps_per_second": 0.362, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13347291285003926, |
|
"grad_norm": 2.2953689098358154, |
|
"learning_rate": 4.983184182463008e-07, |
|
"logits/chosen": -2.83900785446167, |
|
"logits/rejected": -2.8163068294525146, |
|
"logps/chosen": -292.3056335449219, |
|
"logps/rejected": -256.3818359375, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0237285066395998, |
|
"rewards/margins": 0.03204946964979172, |
|
"rewards/rejected": -0.008320963010191917, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 2.152860164642334, |
|
"learning_rate": 4.980435359184203e-07, |
|
"logits/chosen": -2.8620104789733887, |
|
"logits/rejected": -2.8637924194335938, |
|
"logps/chosen": -285.1622314453125, |
|
"logps/rejected": -270.9977722167969, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.026320820674300194, |
|
"rewards/margins": 0.029663830995559692, |
|
"rewards/rejected": -0.0033430135808885098, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13870714472651138, |
|
"grad_norm": 2.3760368824005127, |
|
"learning_rate": 4.977479420266723e-07, |
|
"logits/chosen": -2.8074328899383545, |
|
"logits/rejected": -2.8127429485321045, |
|
"logps/chosen": -278.2021484375, |
|
"logps/rejected": -288.5596618652344, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.02414657548069954, |
|
"rewards/margins": 0.02932720258831978, |
|
"rewards/rejected": -0.005180628038942814, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 1.8068273067474365, |
|
"learning_rate": 4.974316612530614e-07, |
|
"logits/chosen": -2.799464464187622, |
|
"logits/rejected": -2.781719446182251, |
|
"logps/chosen": -296.43017578125, |
|
"logps/rejected": -260.1778869628906, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.03263556957244873, |
|
"rewards/margins": 0.05155158042907715, |
|
"rewards/rejected": -0.018916018307209015, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1439413766029835, |
|
"grad_norm": 2.295518636703491, |
|
"learning_rate": 4.970947200069415e-07, |
|
"logits/chosen": -2.8136024475097656, |
|
"logits/rejected": -2.8002548217773438, |
|
"logps/chosen": -296.8650817871094, |
|
"logps/rejected": -277.0992431640625, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.026846662163734436, |
|
"rewards/margins": 0.029769038781523705, |
|
"rewards/rejected": -0.0029223733581602573, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 1.8040831089019775, |
|
"learning_rate": 4.967371464228095e-07, |
|
"logits/chosen": -2.8747551441192627, |
|
"logits/rejected": -2.8538835048675537, |
|
"logps/chosen": -269.18994140625, |
|
"logps/rejected": -272.37799072265625, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.026889195665717125, |
|
"rewards/margins": 0.03184649348258972, |
|
"rewards/rejected": -0.004957299679517746, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14917560847945563, |
|
"grad_norm": 2.131438970565796, |
|
"learning_rate": 4.963589703579569e-07, |
|
"logits/chosen": -2.899491310119629, |
|
"logits/rejected": -2.8730692863464355, |
|
"logps/chosen": -313.0187072753906, |
|
"logps/rejected": -280.3568420410156, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.028542449697852135, |
|
"rewards/margins": 0.03851853683590889, |
|
"rewards/rejected": -0.009976087138056755, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 1.8194427490234375, |
|
"learning_rate": 4.959602233899761e-07, |
|
"logits/chosen": -2.892979621887207, |
|
"logits/rejected": -2.8543694019317627, |
|
"logps/chosen": -311.68353271484375, |
|
"logps/rejected": -272.5694580078125, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.035731758922338486, |
|
"rewards/margins": 0.04327362775802612, |
|
"rewards/rejected": -0.007541867904365063, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15440984035592778, |
|
"grad_norm": 2.1900675296783447, |
|
"learning_rate": 4.955409388141243e-07, |
|
"logits/chosen": -2.8265955448150635, |
|
"logits/rejected": -2.8132894039154053, |
|
"logps/chosen": -273.9072265625, |
|
"logps/rejected": -251.5390167236328, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.016455931589007378, |
|
"rewards/margins": 0.03850039094686508, |
|
"rewards/rejected": -0.022044459357857704, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 1.8198952674865723, |
|
"learning_rate": 4.951011516405429e-07, |
|
"logits/chosen": -2.84102201461792, |
|
"logits/rejected": -2.84004807472229, |
|
"logps/chosen": -265.394775390625, |
|
"logps/rejected": -252.8574676513672, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.021321838721632957, |
|
"rewards/margins": 0.04377777501940727, |
|
"rewards/rejected": -0.022455941885709763, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"eval_logits/chosen": -2.8409736156463623, |
|
"eval_logits/rejected": -2.813835382461548, |
|
"eval_logps/chosen": -281.19580078125, |
|
"eval_logps/rejected": -264.16082763671875, |
|
"eval_loss": 0.6732848882675171, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.015417821705341339, |
|
"eval_rewards/margins": 0.04264672100543976, |
|
"eval_rewards/rejected": -0.02722889743745327, |
|
"eval_runtime": 691.9111, |
|
"eval_samples_per_second": 2.891, |
|
"eval_steps_per_second": 0.361, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1596440722323999, |
|
"grad_norm": 2.117947578430176, |
|
"learning_rate": 4.946408985913344e-07, |
|
"logits/chosen": -2.834245204925537, |
|
"logits/rejected": -2.8125996589660645, |
|
"logps/chosen": -262.54144287109375, |
|
"logps/rejected": -246.34860229492188, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.020137300714850426, |
|
"rewards/margins": 0.04266170784831047, |
|
"rewards/rejected": -0.022524405270814896, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 2.218667507171631, |
|
"learning_rate": 4.941602180974958e-07, |
|
"logits/chosen": -2.8357930183410645, |
|
"logits/rejected": -2.7973721027374268, |
|
"logps/chosen": -303.65606689453125, |
|
"logps/rejected": -245.33108520507812, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.019601870328187943, |
|
"rewards/margins": 0.049685824662446976, |
|
"rewards/rejected": -0.030083950608968735, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16487830410887203, |
|
"grad_norm": 1.9840420484542847, |
|
"learning_rate": 4.936591502957101e-07, |
|
"logits/chosen": -2.8378233909606934, |
|
"logits/rejected": -2.8140475749969482, |
|
"logps/chosen": -261.1944580078125, |
|
"logps/rejected": -257.957763671875, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.024741780012845993, |
|
"rewards/margins": 0.06145521253347397, |
|
"rewards/rejected": -0.036713436245918274, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 2.034658432006836, |
|
"learning_rate": 4.931377370249945e-07, |
|
"logits/chosen": -2.845576763153076, |
|
"logits/rejected": -2.78796124458313, |
|
"logps/chosen": -281.0826110839844, |
|
"logps/rejected": -263.23370361328125, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -6.524250056827441e-05, |
|
"rewards/margins": 0.05555204302072525, |
|
"rewards/rejected": -0.055617284029722214, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17011253598534415, |
|
"grad_norm": 2.102283239364624, |
|
"learning_rate": 4.925960218232072e-07, |
|
"logits/chosen": -2.8266994953155518, |
|
"logits/rejected": -2.8046762943267822, |
|
"logps/chosen": -269.2861633300781, |
|
"logps/rejected": -264.4281005859375, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.008663799613714218, |
|
"rewards/margins": 0.06168809533119202, |
|
"rewards/rejected": -0.0530242919921875, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 3.1403772830963135, |
|
"learning_rate": 4.920340499234116e-07, |
|
"logits/chosen": -2.796461343765259, |
|
"logits/rejected": -2.757336139678955, |
|
"logps/chosen": -285.25445556640625, |
|
"logps/rejected": -251.8562469482422, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.010964155197143555, |
|
"rewards/margins": 0.05367765575647354, |
|
"rewards/rejected": -0.04271350055932999, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17534676786181627, |
|
"grad_norm": 1.932573914527893, |
|
"learning_rate": 4.914518682500995e-07, |
|
"logits/chosen": -2.870535373687744, |
|
"logits/rejected": -2.840186595916748, |
|
"logps/chosen": -297.72967529296875, |
|
"logps/rejected": -261.30780029296875, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0239148810505867, |
|
"rewards/margins": 0.07002829760313034, |
|
"rewards/rejected": -0.04611341655254364, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 2.7643067836761475, |
|
"learning_rate": 4.90849525415273e-07, |
|
"logits/chosen": -2.830029249191284, |
|
"logits/rejected": -2.8078887462615967, |
|
"logps/chosen": -288.3429260253906, |
|
"logps/rejected": -245.07369995117188, |
|
"loss": 0.6589, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.02092517912387848, |
|
"rewards/margins": 0.07483113557100296, |
|
"rewards/rejected": -0.05390595644712448, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1805809997382884, |
|
"grad_norm": 2.184591054916382, |
|
"learning_rate": 4.902270717143858e-07, |
|
"logits/chosen": -2.837787628173828, |
|
"logits/rejected": -2.8210721015930176, |
|
"logps/chosen": -255.417724609375, |
|
"logps/rejected": -272.31591796875, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.005492637865245342, |
|
"rewards/margins": 0.09071613848209381, |
|
"rewards/rejected": -0.0852234959602356, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 2.2565648555755615, |
|
"learning_rate": 4.895845591221426e-07, |
|
"logits/chosen": -2.833556652069092, |
|
"logits/rejected": -2.836822032928467, |
|
"logps/chosen": -269.5510559082031, |
|
"logps/rejected": -269.97686767578125, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.003929516766220331, |
|
"rewards/margins": 0.058883119374513626, |
|
"rewards/rejected": -0.06281263381242752, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"eval_logits/chosen": -2.8327224254608154, |
|
"eval_logits/rejected": -2.8060340881347656, |
|
"eval_logps/chosen": -283.0862731933594, |
|
"eval_logps/rejected": -268.32659912109375, |
|
"eval_loss": 0.6637989282608032, |
|
"eval_rewards/accuracies": 0.6754999756813049, |
|
"eval_rewards/chosen": -0.0034864526242017746, |
|
"eval_rewards/margins": 0.06540023535490036, |
|
"eval_rewards/rejected": -0.06888668984174728, |
|
"eval_runtime": 691.7822, |
|
"eval_samples_per_second": 2.891, |
|
"eval_steps_per_second": 0.361, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18581523161476055, |
|
"grad_norm": 2.449979782104492, |
|
"learning_rate": 4.8892204128816e-07, |
|
"logits/chosen": -2.865187644958496, |
|
"logits/rejected": -2.8416965007781982, |
|
"logps/chosen": -281.83489990234375, |
|
"logps/rejected": -273.02984619140625, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0031673975754529238, |
|
"rewards/margins": 0.059172265231609344, |
|
"rewards/rejected": -0.062339670956134796, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 2.0199317932128906, |
|
"learning_rate": 4.882395735324863e-07, |
|
"logits/chosen": -2.840233325958252, |
|
"logits/rejected": -2.7969911098480225, |
|
"logps/chosen": -281.1783447265625, |
|
"logps/rejected": -274.934326171875, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0017295643920078874, |
|
"rewards/margins": 0.08150311559438705, |
|
"rewards/rejected": -0.07977355271577835, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19104946349123267, |
|
"grad_norm": 2.187190294265747, |
|
"learning_rate": 4.875372128409829e-07, |
|
"logits/chosen": -2.815016269683838, |
|
"logits/rejected": -2.7854647636413574, |
|
"logps/chosen": -285.82489013671875, |
|
"logps/rejected": -259.6023254394531, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.02074645273387432, |
|
"rewards/margins": 0.07150407880544662, |
|
"rewards/rejected": -0.0922505259513855, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 2.0459957122802734, |
|
"learning_rate": 4.868150178605653e-07, |
|
"logits/chosen": -2.812069892883301, |
|
"logits/rejected": -2.7864902019500732, |
|
"logps/chosen": -246.3455352783203, |
|
"logps/rejected": -221.7488250732422, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03750302642583847, |
|
"rewards/margins": 0.08891085535287857, |
|
"rewards/rejected": -0.12641388177871704, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1962836953677048, |
|
"grad_norm": 2.3921523094177246, |
|
"learning_rate": 4.860730488943068e-07, |
|
"logits/chosen": -2.7749264240264893, |
|
"logits/rejected": -2.7638156414031982, |
|
"logps/chosen": -253.1526641845703, |
|
"logps/rejected": -256.56072998046875, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.013170385733246803, |
|
"rewards/margins": 0.08013583719730377, |
|
"rewards/rejected": -0.09330622851848602, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 2.7103869915008545, |
|
"learning_rate": 4.853113678964021e-07, |
|
"logits/chosen": -2.7963593006134033, |
|
"logits/rejected": -2.786759376525879, |
|
"logps/chosen": -295.2373962402344, |
|
"logps/rejected": -288.03070068359375, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0016003316268324852, |
|
"rewards/margins": 0.09059783071279526, |
|
"rewards/rejected": -0.09219817072153091, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.20151792724417691, |
|
"grad_norm": 2.149914026260376, |
|
"learning_rate": 4.845300384669957e-07, |
|
"logits/chosen": -2.81345534324646, |
|
"logits/rejected": -2.783003807067871, |
|
"logps/chosen": -270.67730712890625, |
|
"logps/rejected": -254.6434326171875, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.006530989892780781, |
|
"rewards/margins": 0.07502902299165726, |
|
"rewards/rejected": -0.08156001567840576, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 2.4296960830688477, |
|
"learning_rate": 4.8372912584687e-07, |
|
"logits/chosen": -2.8353335857391357, |
|
"logits/rejected": -2.801575183868408, |
|
"logps/chosen": -300.9684143066406, |
|
"logps/rejected": -283.5567626953125, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0004579909145832062, |
|
"rewards/margins": 0.0798453614115715, |
|
"rewards/rejected": -0.079387366771698, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.20675215912064904, |
|
"grad_norm": 3.0373857021331787, |
|
"learning_rate": 4.829086969119983e-07, |
|
"logits/chosen": -2.8006482124328613, |
|
"logits/rejected": -2.8082146644592285, |
|
"logps/chosen": -276.4783020019531, |
|
"logps/rejected": -276.69720458984375, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.024218443781137466, |
|
"rewards/margins": 0.06143224984407425, |
|
"rewards/rejected": -0.08565069735050201, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 2.1895201206207275, |
|
"learning_rate": 4.820688201679605e-07, |
|
"logits/chosen": -2.8546204566955566, |
|
"logits/rejected": -2.809619426727295, |
|
"logps/chosen": -277.23187255859375, |
|
"logps/rejected": -223.0809783935547, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.0034676387440413237, |
|
"rewards/margins": 0.11413818597793579, |
|
"rewards/rejected": -0.1106705442070961, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -2.828324317932129, |
|
"eval_logits/rejected": -2.8020219802856445, |
|
"eval_logps/chosen": -284.8824768066406, |
|
"eval_logps/rejected": -272.4747314453125, |
|
"eval_loss": 0.6546491980552673, |
|
"eval_rewards/accuracies": 0.6815000176429749, |
|
"eval_rewards/chosen": -0.02144855633378029, |
|
"eval_rewards/margins": 0.08891918510198593, |
|
"eval_rewards/rejected": -0.11036773025989532, |
|
"eval_runtime": 691.3571, |
|
"eval_samples_per_second": 2.893, |
|
"eval_steps_per_second": 0.362, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21198639099712116, |
|
"grad_norm": 2.411094903945923, |
|
"learning_rate": 4.812095657442231e-07, |
|
"logits/chosen": -2.8379623889923096, |
|
"logits/rejected": -2.8474135398864746, |
|
"logps/chosen": -292.9294128417969, |
|
"logps/rejected": -291.79937744140625, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03384638577699661, |
|
"rewards/margins": 0.06732925027608871, |
|
"rewards/rejected": -0.10117564350366592, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 2.2789130210876465, |
|
"learning_rate": 4.803310053882831e-07, |
|
"logits/chosen": -2.820188522338867, |
|
"logits/rejected": -2.8341267108917236, |
|
"logps/chosen": -253.18002319335938, |
|
"logps/rejected": -271.46209716796875, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.04002877324819565, |
|
"rewards/margins": 0.08040440827608109, |
|
"rewards/rejected": -0.12043318897485733, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2172206228735933, |
|
"grad_norm": 2.6294658184051514, |
|
"learning_rate": 4.794332124596775e-07, |
|
"logits/chosen": -2.8491604328155518, |
|
"logits/rejected": -2.8390445709228516, |
|
"logps/chosen": -288.0977478027344, |
|
"logps/rejected": -289.91839599609375, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.025598719716072083, |
|
"rewards/margins": 0.0781911239027977, |
|
"rewards/rejected": -0.10378985106945038, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 2.718003273010254, |
|
"learning_rate": 4.785162619238574e-07, |
|
"logits/chosen": -2.7903778553009033, |
|
"logits/rejected": -2.750192880630493, |
|
"logps/chosen": -271.6007995605469, |
|
"logps/rejected": -255.642822265625, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.013516816310584545, |
|
"rewards/margins": 0.11254201829433441, |
|
"rewards/rejected": -0.12605881690979004, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22245485475006543, |
|
"grad_norm": 2.693995714187622, |
|
"learning_rate": 4.775802303459287e-07, |
|
"logits/chosen": -2.7961440086364746, |
|
"logits/rejected": -2.782381534576416, |
|
"logps/chosen": -266.48406982421875, |
|
"logps/rejected": -271.54876708984375, |
|
"loss": 0.6543, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.025890201330184937, |
|
"rewards/margins": 0.09162938594818115, |
|
"rewards/rejected": -0.11751959472894669, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 3.3223588466644287, |
|
"learning_rate": 4.766251958842589e-07, |
|
"logits/chosen": -2.770634174346924, |
|
"logits/rejected": -2.7624752521514893, |
|
"logps/chosen": -295.11322021484375, |
|
"logps/rejected": -291.52655029296875, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.03162473067641258, |
|
"rewards/margins": 0.10102814435958862, |
|
"rewards/rejected": -0.1326528638601303, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22768908662653756, |
|
"grad_norm": 2.2951784133911133, |
|
"learning_rate": 4.756512382839506e-07, |
|
"logits/chosen": -2.792806625366211, |
|
"logits/rejected": -2.7687854766845703, |
|
"logps/chosen": -276.4913024902344, |
|
"logps/rejected": -288.6650390625, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.06362788379192352, |
|
"rewards/margins": 0.11521414667367935, |
|
"rewards/rejected": -0.17884202301502228, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 2.3468611240386963, |
|
"learning_rate": 4.746584388701831e-07, |
|
"logits/chosen": -2.804765224456787, |
|
"logits/rejected": -2.8049676418304443, |
|
"logps/chosen": -284.9786071777344, |
|
"logps/rejected": -280.96392822265625, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05107206106185913, |
|
"rewards/margins": 0.11629124730825424, |
|
"rewards/rejected": -0.16736331582069397, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23292331850300968, |
|
"grad_norm": 3.075714588165283, |
|
"learning_rate": 4.736468805414218e-07, |
|
"logits/chosen": -2.77662992477417, |
|
"logits/rejected": -2.7775301933288574, |
|
"logps/chosen": -271.46368408203125, |
|
"logps/rejected": -293.26531982421875, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.033290714025497437, |
|
"rewards/margins": 0.12201287597417831, |
|
"rewards/rejected": -0.15530358254909515, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 2.879183769226074, |
|
"learning_rate": 4.7261664776249595e-07, |
|
"logits/chosen": -2.7510781288146973, |
|
"logits/rejected": -2.7387068271636963, |
|
"logps/chosen": -250.3533477783203, |
|
"logps/rejected": -251.46630859375, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.04132508859038353, |
|
"rewards/margins": 0.12072241306304932, |
|
"rewards/rejected": -0.16204750537872314, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"eval_logits/chosen": -2.819901704788208, |
|
"eval_logits/rejected": -2.794234275817871, |
|
"eval_logps/chosen": -285.2049865722656, |
|
"eval_logps/rejected": -275.2684631347656, |
|
"eval_loss": 0.6458239555358887, |
|
"eval_rewards/accuracies": 0.6769999861717224, |
|
"eval_rewards/chosen": -0.024673735722899437, |
|
"eval_rewards/margins": 0.1136314645409584, |
|
"eval_rewards/rejected": -0.138305202126503, |
|
"eval_runtime": 690.9829, |
|
"eval_samples_per_second": 2.894, |
|
"eval_steps_per_second": 0.362, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2381575503794818, |
|
"grad_norm": 2.7687416076660156, |
|
"learning_rate": 4.7156782655754624e-07, |
|
"logits/chosen": -2.8114147186279297, |
|
"logits/rejected": -2.772068977355957, |
|
"logps/chosen": -300.78826904296875, |
|
"logps/rejected": -255.8038330078125, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.002123198937624693, |
|
"rewards/margins": 0.1206832155585289, |
|
"rewards/rejected": -0.12280640751123428, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 2.5618391036987305, |
|
"learning_rate": 4.705005045028414e-07, |
|
"logits/chosen": -2.765242338180542, |
|
"logits/rejected": -2.737863063812256, |
|
"logps/chosen": -287.15667724609375, |
|
"logps/rejected": -278.50726318359375, |
|
"loss": 0.6459, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.05967919901013374, |
|
"rewards/margins": 0.11529602855443954, |
|
"rewards/rejected": -0.1749752312898636, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24339178225595393, |
|
"grad_norm": 2.9336323738098145, |
|
"learning_rate": 4.694147707194659e-07, |
|
"logits/chosen": -2.832733631134033, |
|
"logits/rejected": -2.8244283199310303, |
|
"logps/chosen": -294.346923828125, |
|
"logps/rejected": -287.9342346191406, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.06329428404569626, |
|
"rewards/margins": 0.1405760794878006, |
|
"rewards/rejected": -0.20387034118175507, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 3.908505439758301, |
|
"learning_rate": 4.683107158658781e-07, |
|
"logits/chosen": -2.7808585166931152, |
|
"logits/rejected": -2.763042688369751, |
|
"logps/chosen": -314.3782653808594, |
|
"logps/rejected": -299.661865234375, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.05701801925897598, |
|
"rewards/margins": 0.16755308210849762, |
|
"rewards/rejected": -0.2245711088180542, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24862601413242608, |
|
"grad_norm": 3.2749459743499756, |
|
"learning_rate": 4.6718843213034066e-07, |
|
"logits/chosen": -2.7944037914276123, |
|
"logits/rejected": -2.77887225151062, |
|
"logps/chosen": -272.23724365234375, |
|
"logps/rejected": -273.14776611328125, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.09230604767799377, |
|
"rewards/margins": 0.14217710494995117, |
|
"rewards/rejected": -0.23448316752910614, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 3.0224010944366455, |
|
"learning_rate": 4.660480132232224e-07, |
|
"logits/chosen": -2.805572986602783, |
|
"logits/rejected": -2.80751371383667, |
|
"logps/chosen": -293.3813171386719, |
|
"logps/rejected": -280.83465576171875, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0641000047326088, |
|
"rewards/margins": 0.10990612208843231, |
|
"rewards/rejected": -0.1740061342716217, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25386024600889817, |
|
"grad_norm": 3.5039138793945312, |
|
"learning_rate": 4.64889554369174e-07, |
|
"logits/chosen": -2.805609941482544, |
|
"logits/rejected": -2.771754741668701, |
|
"logps/chosen": -298.55157470703125, |
|
"logps/rejected": -267.65087890625, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0059810527600348, |
|
"rewards/margins": 0.18814215064048767, |
|
"rewards/rejected": -0.1821610927581787, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 2.8160240650177, |
|
"learning_rate": 4.637131522991764e-07, |
|
"logits/chosen": -2.7994441986083984, |
|
"logits/rejected": -2.7969179153442383, |
|
"logps/chosen": -309.35089111328125, |
|
"logps/rejected": -296.6192321777344, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.029499268159270287, |
|
"rewards/margins": 0.14880326390266418, |
|
"rewards/rejected": -0.17830254137516022, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2590944778853703, |
|
"grad_norm": 3.782945156097412, |
|
"learning_rate": 4.6251890524246375e-07, |
|
"logits/chosen": -2.8050458431243896, |
|
"logits/rejected": -2.786475658416748, |
|
"logps/chosen": -262.4518737792969, |
|
"logps/rejected": -256.80792236328125, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0701083093881607, |
|
"rewards/margins": 0.18339978158473969, |
|
"rewards/rejected": -0.253508061170578, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 3.791015148162842, |
|
"learning_rate": 4.613069129183218e-07, |
|
"logits/chosen": -2.8377981185913086, |
|
"logits/rejected": -2.799161911010742, |
|
"logps/chosen": -328.35491943359375, |
|
"logps/rejected": -301.65679931640625, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.0686495453119278, |
|
"rewards/margins": 0.13748301565647125, |
|
"rewards/rejected": -0.20613256096839905, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"eval_logits/chosen": -2.813830852508545, |
|
"eval_logits/rejected": -2.7887284755706787, |
|
"eval_logps/chosen": -289.12060546875, |
|
"eval_logps/rejected": -282.1760559082031, |
|
"eval_loss": 0.635771632194519, |
|
"eval_rewards/accuracies": 0.6784999966621399, |
|
"eval_rewards/chosen": -0.06382979452610016, |
|
"eval_rewards/margins": 0.14355140924453735, |
|
"eval_rewards/rejected": -0.2073812186717987, |
|
"eval_runtime": 691.4427, |
|
"eval_samples_per_second": 2.893, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2643287097618425, |
|
"grad_norm": 4.366467475891113, |
|
"learning_rate": 4.6007727652776065e-07, |
|
"logits/chosen": -2.7737021446228027, |
|
"logits/rejected": -2.7608792781829834, |
|
"logps/chosen": -254.6834259033203, |
|
"logps/rejected": -263.98565673828125, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.036558397114276886, |
|
"rewards/margins": 0.1544768214225769, |
|
"rewards/rejected": -0.1910352259874344, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 3.2850377559661865, |
|
"learning_rate": 4.588300987450652e-07, |
|
"logits/chosen": -2.82348895072937, |
|
"logits/rejected": -2.7995572090148926, |
|
"logps/chosen": -271.41241455078125, |
|
"logps/rejected": -254.01864624023438, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.008820459246635437, |
|
"rewards/margins": 0.1594310700893402, |
|
"rewards/rejected": -0.16825154423713684, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26956294163831457, |
|
"grad_norm": 3.3716328144073486, |
|
"learning_rate": 4.5756548370922134e-07, |
|
"logits/chosen": -2.781808853149414, |
|
"logits/rejected": -2.7637503147125244, |
|
"logps/chosen": -258.62860107421875, |
|
"logps/rejected": -260.2466125488281, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.024007773026823997, |
|
"rewards/margins": 0.11937548965215683, |
|
"rewards/rejected": -0.14338326454162598, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 3.529965400695801, |
|
"learning_rate": 4.5628353701522047e-07, |
|
"logits/chosen": -2.815080404281616, |
|
"logits/rejected": -2.7873313426971436, |
|
"logps/chosen": -321.65435791015625, |
|
"logps/rejected": -310.28497314453125, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.017561940476298332, |
|
"rewards/margins": 0.2143036425113678, |
|
"rewards/rejected": -0.2318655550479889, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2747971735147867, |
|
"grad_norm": 2.87839412689209, |
|
"learning_rate": 4.549843657052429e-07, |
|
"logits/chosen": -2.834746837615967, |
|
"logits/rejected": -2.808051347732544, |
|
"logps/chosen": -287.9942321777344, |
|
"logps/rejected": -302.9963684082031, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.03615923970937729, |
|
"rewards/margins": 0.21066415309906006, |
|
"rewards/rejected": -0.24682338535785675, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 3.860949993133545, |
|
"learning_rate": 4.5366807825971907e-07, |
|
"logits/chosen": -2.780369758605957, |
|
"logits/rejected": -2.7750542163848877, |
|
"logps/chosen": -262.59075927734375, |
|
"logps/rejected": -269.21051025390625, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08822160959243774, |
|
"rewards/margins": 0.14002035558223724, |
|
"rewards/rejected": -0.228241965174675, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2800314053912588, |
|
"grad_norm": 6.0348801612854, |
|
"learning_rate": 4.5233478458827176e-07, |
|
"logits/chosen": -2.8092315196990967, |
|
"logits/rejected": -2.785090446472168, |
|
"logps/chosen": -316.466064453125, |
|
"logps/rejected": -282.1798400878906, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.08112485706806183, |
|
"rewards/margins": 0.2059168517589569, |
|
"rewards/rejected": -0.2870417535305023, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 4.09010124206543, |
|
"learning_rate": 4.509845960205389e-07, |
|
"logits/chosen": -2.749141216278076, |
|
"logits/rejected": -2.753202438354492, |
|
"logps/chosen": -304.83111572265625, |
|
"logps/rejected": -288.3349304199219, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.07901586592197418, |
|
"rewards/margins": 0.17285946011543274, |
|
"rewards/rejected": -0.2518753409385681, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28526563726773096, |
|
"grad_norm": 4.772919654846191, |
|
"learning_rate": 4.4961762529687736e-07, |
|
"logits/chosen": -2.8033485412597656, |
|
"logits/rejected": -2.7844488620758057, |
|
"logps/chosen": -288.91998291015625, |
|
"logps/rejected": -284.6497802734375, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09251121431589127, |
|
"rewards/margins": 0.15693159401416779, |
|
"rewards/rejected": -0.24944277107715607, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 4.188416957855225, |
|
"learning_rate": 4.482339865589492e-07, |
|
"logits/chosen": -2.8103842735290527, |
|
"logits/rejected": -2.768054962158203, |
|
"logps/chosen": -299.87091064453125, |
|
"logps/rejected": -267.5564880371094, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.16943010687828064, |
|
"rewards/margins": 0.12456401437520981, |
|
"rewards/rejected": -0.29399409890174866, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"eval_logits/chosen": -2.8070549964904785, |
|
"eval_logits/rejected": -2.782604694366455, |
|
"eval_logps/chosen": -296.5137634277344, |
|
"eval_logps/rejected": -291.989013671875, |
|
"eval_loss": 0.6283535361289978, |
|
"eval_rewards/accuracies": 0.6790000200271606, |
|
"eval_rewards/chosen": -0.13776110112667084, |
|
"eval_rewards/margins": 0.16774973273277283, |
|
"eval_rewards/rejected": -0.30551087856292725, |
|
"eval_runtime": 691.0066, |
|
"eval_samples_per_second": 2.894, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2904998691442031, |
|
"grad_norm": 4.440745830535889, |
|
"learning_rate": 4.4683379534019076e-07, |
|
"logits/chosen": -2.803920269012451, |
|
"logits/rejected": -2.8017265796661377, |
|
"logps/chosen": -300.3214111328125, |
|
"logps/rejected": -309.1615905761719, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1395951509475708, |
|
"rewards/margins": 0.1519310027360916, |
|
"rewards/rejected": -0.2915261387825012, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 3.8111138343811035, |
|
"learning_rate": 4.4541716855616593e-07, |
|
"logits/chosen": -2.7794926166534424, |
|
"logits/rejected": -2.7597875595092773, |
|
"logps/chosen": -264.9614562988281, |
|
"logps/rejected": -282.9358825683594, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.07038460671901703, |
|
"rewards/margins": 0.17066633701324463, |
|
"rewards/rejected": -0.24105095863342285, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2957341010206752, |
|
"grad_norm": 5.494072914123535, |
|
"learning_rate": 4.4398422449480357e-07, |
|
"logits/chosen": -2.774218797683716, |
|
"logits/rejected": -2.725161075592041, |
|
"logps/chosen": -294.66448974609375, |
|
"logps/rejected": -311.0096740722656, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1438552290201187, |
|
"rewards/margins": 0.14675047993659973, |
|
"rewards/rejected": -0.29060572385787964, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 4.3281474113464355, |
|
"learning_rate": 4.4253508280652036e-07, |
|
"logits/chosen": -2.7951579093933105, |
|
"logits/rejected": -2.7520532608032227, |
|
"logps/chosen": -317.461181640625, |
|
"logps/rejected": -285.7931213378906, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.13621816039085388, |
|
"rewards/margins": 0.19606857001781464, |
|
"rewards/rejected": -0.3322867453098297, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.30096833289714736, |
|
"grad_norm": 6.221525192260742, |
|
"learning_rate": 4.410698644942302e-07, |
|
"logits/chosen": -2.8402047157287598, |
|
"logits/rejected": -2.816387176513672, |
|
"logps/chosen": -297.50286865234375, |
|
"logps/rejected": -292.28436279296875, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.10158324241638184, |
|
"rewards/margins": 0.19611066579818726, |
|
"rewards/rejected": -0.2976939082145691, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 4.492012023925781, |
|
"learning_rate": 4.3958869190324057e-07, |
|
"logits/chosen": -2.76503586769104, |
|
"logits/rejected": -2.7254602909088135, |
|
"logps/chosen": -291.94873046875, |
|
"logps/rejected": -282.52880859375, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.12198346853256226, |
|
"rewards/margins": 0.18694952130317688, |
|
"rewards/rejected": -0.30893296003341675, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.30620256477361946, |
|
"grad_norm": 3.562570810317993, |
|
"learning_rate": 4.380916887110365e-07, |
|
"logits/chosen": -2.829111099243164, |
|
"logits/rejected": -2.800809383392334, |
|
"logps/chosen": -290.05316162109375, |
|
"logps/rejected": -266.3580017089844, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14898671209812164, |
|
"rewards/margins": 0.19158688187599182, |
|
"rewards/rejected": -0.34057360887527466, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 5.379666805267334, |
|
"learning_rate": 4.3657897991695394e-07, |
|
"logits/chosen": -2.7369437217712402, |
|
"logits/rejected": -2.7774927616119385, |
|
"logps/chosen": -281.9171142578125, |
|
"logps/rejected": -300.78912353515625, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.11646691709756851, |
|
"rewards/margins": 0.19827672839164734, |
|
"rewards/rejected": -0.31474363803863525, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3114367966500916, |
|
"grad_norm": 4.079792499542236, |
|
"learning_rate": 4.350506918317416e-07, |
|
"logits/chosen": -2.8184256553649902, |
|
"logits/rejected": -2.788510799407959, |
|
"logps/chosen": -274.4839172363281, |
|
"logps/rejected": -287.8948669433594, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.12529827654361725, |
|
"rewards/margins": 0.19565680623054504, |
|
"rewards/rejected": -0.3209550976753235, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 4.406829833984375, |
|
"learning_rate": 4.335069520670149e-07, |
|
"logits/chosen": -2.7956674098968506, |
|
"logits/rejected": -2.7690110206604004, |
|
"logps/chosen": -252.70156860351562, |
|
"logps/rejected": -279.14111328125, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.09379851073026657, |
|
"rewards/margins": 0.14501607418060303, |
|
"rewards/rejected": -0.2388145923614502, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -2.8165299892425537, |
|
"eval_logits/rejected": -2.793107032775879, |
|
"eval_logps/chosen": -293.77850341796875, |
|
"eval_logps/rejected": -291.3028259277344, |
|
"eval_loss": 0.622346818447113, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": -0.11040891706943512, |
|
"eval_rewards/margins": 0.18824002146720886, |
|
"eval_rewards/rejected": -0.2986489236354828, |
|
"eval_runtime": 690.8187, |
|
"eval_samples_per_second": 2.895, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3166710285265637, |
|
"grad_norm": 4.730831146240234, |
|
"learning_rate": 4.319478895245999e-07, |
|
"logits/chosen": -2.8096089363098145, |
|
"logits/rejected": -2.781852960586548, |
|
"logps/chosen": -277.19305419921875, |
|
"logps/rejected": -268.88653564453125, |
|
"loss": 0.6189, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12051185220479965, |
|
"rewards/margins": 0.19446460902690887, |
|
"rewards/rejected": -0.3149764835834503, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 4.179198741912842, |
|
"learning_rate": 4.3037363438577036e-07, |
|
"logits/chosen": -2.8334312438964844, |
|
"logits/rejected": -2.796905517578125, |
|
"logps/chosen": -275.5434875488281, |
|
"logps/rejected": -309.56561279296875, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.03255675360560417, |
|
"rewards/margins": 0.21560052037239075, |
|
"rewards/rejected": -0.24815726280212402, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32190526040303585, |
|
"grad_norm": 3.7570934295654297, |
|
"learning_rate": 4.2878431810037716e-07, |
|
"logits/chosen": -2.8290486335754395, |
|
"logits/rejected": -2.821361780166626, |
|
"logps/chosen": -317.92926025390625, |
|
"logps/rejected": -291.9640197753906, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06272344291210175, |
|
"rewards/margins": 0.21540877223014832, |
|
"rewards/rejected": -0.27813225984573364, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 5.973113536834717, |
|
"learning_rate": 4.271800733758729e-07, |
|
"logits/chosen": -2.801720380783081, |
|
"logits/rejected": -2.804701566696167, |
|
"logps/chosen": -308.4283142089844, |
|
"logps/rejected": -294.974609375, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.04135540500283241, |
|
"rewards/margins": 0.23178556561470032, |
|
"rewards/rejected": -0.27314096689224243, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.327139492279508, |
|
"grad_norm": 5.047220706939697, |
|
"learning_rate": 4.255610341662304e-07, |
|
"logits/chosen": -2.8307595252990723, |
|
"logits/rejected": -2.779573440551758, |
|
"logps/chosen": -282.5008239746094, |
|
"logps/rejected": -278.0930480957031, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07180126756429672, |
|
"rewards/margins": 0.17990802228450775, |
|
"rewards/rejected": -0.2517092823982239, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 4.12667179107666, |
|
"learning_rate": 4.2392733566075757e-07, |
|
"logits/chosen": -2.8080954551696777, |
|
"logits/rejected": -2.7833712100982666, |
|
"logps/chosen": -279.9812927246094, |
|
"logps/rejected": -274.603271484375, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.06445430964231491, |
|
"rewards/margins": 0.1353849321603775, |
|
"rewards/rejected": -0.19983923435211182, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3323737241559801, |
|
"grad_norm": 3.241464138031006, |
|
"learning_rate": 4.2227911427280973e-07, |
|
"logits/chosen": -2.7715563774108887, |
|
"logits/rejected": -2.7483251094818115, |
|
"logps/chosen": -269.14215087890625, |
|
"logps/rejected": -254.9038543701172, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.029628584161400795, |
|
"rewards/margins": 0.1794588267803192, |
|
"rewards/rejected": -0.20908741652965546, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 6.028203010559082, |
|
"learning_rate": 4.206165076283982e-07, |
|
"logits/chosen": -2.8015265464782715, |
|
"logits/rejected": -2.7831873893737793, |
|
"logps/chosen": -270.62139892578125, |
|
"logps/rejected": -273.0738830566406, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09085245430469513, |
|
"rewards/margins": 0.2116876095533371, |
|
"rewards/rejected": -0.30254003405570984, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33760795603245225, |
|
"grad_norm": 5.242630958557129, |
|
"learning_rate": 4.1893965455469946e-07, |
|
"logits/chosen": -2.8173327445983887, |
|
"logits/rejected": -2.7973732948303223, |
|
"logps/chosen": -279.14031982421875, |
|
"logps/rejected": -275.79638671875, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.14117182791233063, |
|
"rewards/margins": 0.18503603339195251, |
|
"rewards/rejected": -0.32620781660079956, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 5.775106430053711, |
|
"learning_rate": 4.172486950684626e-07, |
|
"logits/chosen": -2.821103096008301, |
|
"logits/rejected": -2.814502477645874, |
|
"logps/chosen": -279.78289794921875, |
|
"logps/rejected": -298.9765930175781, |
|
"loss": 0.6131, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11109775304794312, |
|
"rewards/margins": 0.21843478083610535, |
|
"rewards/rejected": -0.32953253388404846, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"eval_logits/chosen": -2.818049430847168, |
|
"eval_logits/rejected": -2.7951488494873047, |
|
"eval_logps/chosen": -297.3945007324219, |
|
"eval_logps/rejected": -296.5805969238281, |
|
"eval_loss": 0.6172210574150085, |
|
"eval_rewards/accuracies": 0.6865000128746033, |
|
"eval_rewards/chosen": -0.14656904339790344, |
|
"eval_rewards/margins": 0.2048574537038803, |
|
"eval_rewards/rejected": -0.35142648220062256, |
|
"eval_runtime": 691.9861, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.361, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34284218790892435, |
|
"grad_norm": 8.304680824279785, |
|
"learning_rate": 4.155437703643181e-07, |
|
"logits/chosen": -2.841334581375122, |
|
"logits/rejected": -2.806217670440674, |
|
"logps/chosen": -272.61444091796875, |
|
"logps/rejected": -267.8605041503906, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11493051052093506, |
|
"rewards/margins": 0.24178418517112732, |
|
"rewards/rejected": -0.35671466588974, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 6.887094497680664, |
|
"learning_rate": 4.138250228029881e-07, |
|
"logits/chosen": -2.811464786529541, |
|
"logits/rejected": -2.797884941101074, |
|
"logps/chosen": -295.8591613769531, |
|
"logps/rejected": -319.4233703613281, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2382466346025467, |
|
"rewards/margins": 0.16607843339443207, |
|
"rewards/rejected": -0.40432506799697876, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3480764197853965, |
|
"grad_norm": 4.52334451675415, |
|
"learning_rate": 4.1209259589939935e-07, |
|
"logits/chosen": -2.8012988567352295, |
|
"logits/rejected": -2.8001253604888916, |
|
"logps/chosen": -262.8810119628906, |
|
"logps/rejected": -272.76788330078125, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.13144102692604065, |
|
"rewards/margins": 0.17341327667236328, |
|
"rewards/rejected": -0.30485430359840393, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 3.246675729751587, |
|
"learning_rate": 4.103466343106998e-07, |
|
"logits/chosen": -2.8291964530944824, |
|
"logits/rejected": -2.824831247329712, |
|
"logps/chosen": -302.6276550292969, |
|
"logps/rejected": -286.753662109375, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1298406422138214, |
|
"rewards/margins": 0.16963128745555878, |
|
"rewards/rejected": -0.2994719445705414, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35331065166186865, |
|
"grad_norm": 4.933244705200195, |
|
"learning_rate": 4.085872838241796e-07, |
|
"logits/chosen": -2.767702102661133, |
|
"logits/rejected": -2.730109691619873, |
|
"logps/chosen": -311.7983703613281, |
|
"logps/rejected": -294.95294189453125, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.16958799958229065, |
|
"rewards/margins": 0.17119386792182922, |
|
"rewards/rejected": -0.3407818675041199, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 6.131802082061768, |
|
"learning_rate": 4.06814691345098e-07, |
|
"logits/chosen": -2.7470338344573975, |
|
"logits/rejected": -2.722545862197876, |
|
"logps/chosen": -288.4170837402344, |
|
"logps/rejected": -289.61102294921875, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1348382532596588, |
|
"rewards/margins": 0.2365628182888031, |
|
"rewards/rejected": -0.3714010715484619, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35854488353834074, |
|
"grad_norm": 4.9708638191223145, |
|
"learning_rate": 4.0502900488441707e-07, |
|
"logits/chosen": -2.7989072799682617, |
|
"logits/rejected": -2.789274215698242, |
|
"logps/chosen": -306.6829528808594, |
|
"logps/rejected": -320.0224304199219, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.20997491478919983, |
|
"rewards/margins": 0.1841730773448944, |
|
"rewards/rejected": -0.39414799213409424, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 6.784174919128418, |
|
"learning_rate": 4.032303735464422e-07, |
|
"logits/chosen": -2.880401134490967, |
|
"logits/rejected": -2.835643768310547, |
|
"logps/chosen": -310.90679931640625, |
|
"logps/rejected": -308.8883361816406, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.20711734890937805, |
|
"rewards/margins": 0.24106808006763458, |
|
"rewards/rejected": -0.44818538427352905, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3637791154148129, |
|
"grad_norm": 5.785353183746338, |
|
"learning_rate": 4.014189475163726e-07, |
|
"logits/chosen": -2.794342517852783, |
|
"logits/rejected": -2.7849628925323486, |
|
"logps/chosen": -297.41961669921875, |
|
"logps/rejected": -308.3134765625, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.24589386582374573, |
|
"rewards/margins": 0.2298090010881424, |
|
"rewards/rejected": -0.47570285201072693, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 6.076969146728516, |
|
"learning_rate": 3.995948780477605e-07, |
|
"logits/chosen": -2.8259429931640625, |
|
"logits/rejected": -2.795186996459961, |
|
"logps/chosen": -306.1077880859375, |
|
"logps/rejected": -299.7892150878906, |
|
"loss": 0.6326, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.21178540587425232, |
|
"rewards/margins": 0.17982172966003418, |
|
"rewards/rejected": -0.3916071355342865, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"eval_logits/chosen": -2.814655065536499, |
|
"eval_logits/rejected": -2.7920358180999756, |
|
"eval_logps/chosen": -300.2596740722656, |
|
"eval_logps/rejected": -300.3965759277344, |
|
"eval_loss": 0.6155202388763428, |
|
"eval_rewards/accuracies": 0.6859999895095825, |
|
"eval_rewards/chosen": -0.175220787525177, |
|
"eval_rewards/margins": 0.2143653929233551, |
|
"eval_rewards/rejected": -0.3895862102508545, |
|
"eval_runtime": 692.0291, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.361, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.369013347291285, |
|
"grad_norm": 6.421947479248047, |
|
"learning_rate": 3.977583174498816e-07, |
|
"logits/chosen": -2.816697359085083, |
|
"logits/rejected": -2.8030014038085938, |
|
"logps/chosen": -300.00640869140625, |
|
"logps/rejected": -303.1688232421875, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.14107367396354675, |
|
"rewards/margins": 0.27628999948501587, |
|
"rewards/rejected": -0.41736364364624023, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 4.980222225189209, |
|
"learning_rate": 3.9590941907501717e-07, |
|
"logits/chosen": -2.8284125328063965, |
|
"logits/rejected": -2.812608242034912, |
|
"logps/chosen": -307.8800354003906, |
|
"logps/rejected": -303.53021240234375, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.056650467216968536, |
|
"rewards/margins": 0.2519657611846924, |
|
"rewards/rejected": -0.3086162507534027, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37424757916775714, |
|
"grad_norm": 5.049463272094727, |
|
"learning_rate": 3.9404833730564974e-07, |
|
"logits/chosen": -2.735870838165283, |
|
"logits/rejected": -2.722884178161621, |
|
"logps/chosen": -285.8304443359375, |
|
"logps/rejected": -297.43341064453125, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12556666135787964, |
|
"rewards/margins": 0.2363204061985016, |
|
"rewards/rejected": -0.3618870973587036, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 6.007881164550781, |
|
"learning_rate": 3.9217522754157117e-07, |
|
"logits/chosen": -2.8069920539855957, |
|
"logits/rejected": -2.80522084236145, |
|
"logps/chosen": -284.0002136230469, |
|
"logps/rejected": -286.4706115722656, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15235498547554016, |
|
"rewards/margins": 0.26103848218917847, |
|
"rewards/rejected": -0.41339343786239624, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37948181104422923, |
|
"grad_norm": 4.487087726593018, |
|
"learning_rate": 3.9029024618690785e-07, |
|
"logits/chosen": -2.8235816955566406, |
|
"logits/rejected": -2.7990283966064453, |
|
"logps/chosen": -266.3917541503906, |
|
"logps/rejected": -270.59381103515625, |
|
"loss": 0.6161, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.11356230825185776, |
|
"rewards/margins": 0.21844033896923065, |
|
"rewards/rejected": -0.3320026695728302, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 3.7364535331726074, |
|
"learning_rate": 3.883935506370605e-07, |
|
"logits/chosen": -2.7793936729431152, |
|
"logits/rejected": -2.770378589630127, |
|
"logps/chosen": -278.8677062988281, |
|
"logps/rejected": -271.43145751953125, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08725923299789429, |
|
"rewards/margins": 0.2380957156419754, |
|
"rewards/rejected": -0.3253549635410309, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3847160429207014, |
|
"grad_norm": 4.045937538146973, |
|
"learning_rate": 3.864852992655616e-07, |
|
"logits/chosen": -2.7860310077667236, |
|
"logits/rejected": -2.7741951942443848, |
|
"logps/chosen": -279.3297119140625, |
|
"logps/rejected": -292.84356689453125, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.10104944556951523, |
|
"rewards/margins": 0.2876027524471283, |
|
"rewards/rejected": -0.38865217566490173, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 5.180766582489014, |
|
"learning_rate": 3.845656514108515e-07, |
|
"logits/chosen": -2.8035526275634766, |
|
"logits/rejected": -2.784550189971924, |
|
"logps/chosen": -299.1927490234375, |
|
"logps/rejected": -258.96661376953125, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.18017061054706573, |
|
"rewards/margins": 0.21995961666107178, |
|
"rewards/rejected": -0.40013018250465393, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38995027479717354, |
|
"grad_norm": 3.420503616333008, |
|
"learning_rate": 3.8263476736297375e-07, |
|
"logits/chosen": -2.8004748821258545, |
|
"logits/rejected": -2.755922794342041, |
|
"logps/chosen": -280.3719177246094, |
|
"logps/rejected": -276.71051025390625, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.11658191680908203, |
|
"rewards/margins": 0.22706842422485352, |
|
"rewards/rejected": -0.34365034103393555, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 6.24570369720459, |
|
"learning_rate": 3.8069280835019055e-07, |
|
"logits/chosen": -2.7886569499969482, |
|
"logits/rejected": -2.757636070251465, |
|
"logps/chosen": -291.5840759277344, |
|
"logps/rejected": -290.7030334472656, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.07107678055763245, |
|
"rewards/margins": 0.2125014066696167, |
|
"rewards/rejected": -0.28357818722724915, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"eval_logits/chosen": -2.819805145263672, |
|
"eval_logits/rejected": -2.798032283782959, |
|
"eval_logps/chosen": -289.036865234375, |
|
"eval_logps/rejected": -288.3089904785156, |
|
"eval_loss": 0.6180471777915955, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": -0.06299243867397308, |
|
"eval_rewards/margins": 0.20571817457675934, |
|
"eval_rewards/rejected": -0.2687106430530548, |
|
"eval_runtime": 691.9992, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.361, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39518450667364563, |
|
"grad_norm": 7.418298721313477, |
|
"learning_rate": 3.7873993652552073e-07, |
|
"logits/chosen": -2.7985031604766846, |
|
"logits/rejected": -2.7847418785095215, |
|
"logps/chosen": -256.2576904296875, |
|
"logps/rejected": -263.3230895996094, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07267605513334274, |
|
"rewards/margins": 0.14168903231620789, |
|
"rewards/rejected": -0.21436509490013123, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 3.0412213802337646, |
|
"learning_rate": 3.767763149531995e-07, |
|
"logits/chosen": -2.8065857887268066, |
|
"logits/rejected": -2.792532205581665, |
|
"logps/chosen": -282.3772888183594, |
|
"logps/rejected": -286.32757568359375, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.029223937541246414, |
|
"rewards/margins": 0.23573264479637146, |
|
"rewards/rejected": -0.26495662331581116, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4004187385501178, |
|
"grad_norm": 6.914887428283691, |
|
"learning_rate": 3.7480210759506326e-07, |
|
"logits/chosen": -2.771960973739624, |
|
"logits/rejected": -2.769230365753174, |
|
"logps/chosen": -301.027099609375, |
|
"logps/rejected": -306.0934143066406, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.05497425049543381, |
|
"rewards/margins": 0.1824551671743393, |
|
"rewards/rejected": -0.2374294102191925, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 5.229218006134033, |
|
"learning_rate": 3.728174792968582e-07, |
|
"logits/chosen": -2.7818996906280518, |
|
"logits/rejected": -2.753554582595825, |
|
"logps/chosen": -264.9828186035156, |
|
"logps/rejected": -266.6888122558594, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.10081575810909271, |
|
"rewards/margins": 0.1800784170627594, |
|
"rewards/rejected": -0.2808941900730133, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4056529704265899, |
|
"grad_norm": 3.8269035816192627, |
|
"learning_rate": 3.70822595774476e-07, |
|
"logits/chosen": -2.8083198070526123, |
|
"logits/rejected": -2.7798688411712646, |
|
"logps/chosen": -294.8878479003906, |
|
"logps/rejected": -306.19659423828125, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.06873732060194016, |
|
"rewards/margins": 0.28800445795059204, |
|
"rewards/rejected": -0.3567417860031128, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 6.544018268585205, |
|
"learning_rate": 3.688176236001168e-07, |
|
"logits/chosen": -2.7987208366394043, |
|
"logits/rejected": -2.7670371532440186, |
|
"logps/chosen": -304.5577392578125, |
|
"logps/rejected": -289.78729248046875, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0676363930106163, |
|
"rewards/margins": 0.23785026371479034, |
|
"rewards/rejected": -0.30548661947250366, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.410887202303062, |
|
"grad_norm": 9.901212692260742, |
|
"learning_rate": 3.6680273018838016e-07, |
|
"logits/chosen": -2.8177802562713623, |
|
"logits/rejected": -2.806378126144409, |
|
"logps/chosen": -281.0837707519531, |
|
"logps/rejected": -286.8470153808594, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11407822370529175, |
|
"rewards/margins": 0.25138336420059204, |
|
"rewards/rejected": -0.3654615879058838, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 7.281955718994141, |
|
"learning_rate": 3.6477808378228596e-07, |
|
"logits/chosen": -2.787090539932251, |
|
"logits/rejected": -2.7860255241394043, |
|
"logps/chosen": -283.32928466796875, |
|
"logps/rejected": -338.25714111328125, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12236142158508301, |
|
"rewards/margins": 0.2562143802642822, |
|
"rewards/rejected": -0.37857580184936523, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4161214341795342, |
|
"grad_norm": 8.57088565826416, |
|
"learning_rate": 3.6274385343922674e-07, |
|
"logits/chosen": -2.8543007373809814, |
|
"logits/rejected": -2.8531434535980225, |
|
"logps/chosen": -267.55767822265625, |
|
"logps/rejected": -295.7901306152344, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.15387986600399017, |
|
"rewards/margins": 0.21341195702552795, |
|
"rewards/rejected": -0.36729180812835693, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 5.7539849281311035, |
|
"learning_rate": 3.6070020901685057e-07, |
|
"logits/chosen": -2.7576816082000732, |
|
"logits/rejected": -2.769594669342041, |
|
"logps/chosen": -300.43572998046875, |
|
"logps/rejected": -298.788818359375, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.18216048181056976, |
|
"rewards/margins": 0.21212442219257355, |
|
"rewards/rejected": -0.3942849040031433, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -2.8147764205932617, |
|
"eval_logits/rejected": -2.792606830596924, |
|
"eval_logps/chosen": -299.62200927734375, |
|
"eval_logps/rejected": -302.40740966796875, |
|
"eval_loss": 0.6088424324989319, |
|
"eval_rewards/accuracies": 0.6945000290870667, |
|
"eval_rewards/chosen": -0.16884401440620422, |
|
"eval_rewards/margins": 0.2408505380153656, |
|
"eval_rewards/rejected": -0.4096945822238922, |
|
"eval_runtime": 691.674, |
|
"eval_samples_per_second": 2.892, |
|
"eval_steps_per_second": 0.361, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4213556660560063, |
|
"grad_norm": 6.157792568206787, |
|
"learning_rate": 3.5864732115887863e-07, |
|
"logits/chosen": -2.81066632270813, |
|
"logits/rejected": -2.802830219268799, |
|
"logps/chosen": -273.0591735839844, |
|
"logps/rejected": -307.04254150390625, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12094251811504364, |
|
"rewards/margins": 0.2827422022819519, |
|
"rewards/rejected": -0.40368470549583435, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 6.331284999847412, |
|
"learning_rate": 3.565853612808562e-07, |
|
"logits/chosen": -2.823272466659546, |
|
"logits/rejected": -2.794790744781494, |
|
"logps/chosen": -303.06683349609375, |
|
"logps/rejected": -291.0, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.23127253353595734, |
|
"rewards/margins": 0.17943724989891052, |
|
"rewards/rejected": -0.41070979833602905, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4265898979324784, |
|
"grad_norm": 9.121101379394531, |
|
"learning_rate": 3.5451450155583984e-07, |
|
"logits/chosen": -2.733624219894409, |
|
"logits/rejected": -2.7721478939056396, |
|
"logps/chosen": -277.8062744140625, |
|
"logps/rejected": -282.9922790527344, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.28953424096107483, |
|
"rewards/margins": 0.21646256744861603, |
|
"rewards/rejected": -0.5059967041015625, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 4.436567306518555, |
|
"learning_rate": 3.5243491490002055e-07, |
|
"logits/chosen": -2.817996025085449, |
|
"logits/rejected": -2.8122916221618652, |
|
"logps/chosen": -305.4420471191406, |
|
"logps/rejected": -318.54742431640625, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.32780542969703674, |
|
"rewards/margins": 0.21562886238098145, |
|
"rewards/rejected": -0.5434342622756958, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4318241298089505, |
|
"grad_norm": 7.695457935333252, |
|
"learning_rate": 3.503467749582857e-07, |
|
"logits/chosen": -2.790708303451538, |
|
"logits/rejected": -2.7539708614349365, |
|
"logps/chosen": -298.7849426269531, |
|
"logps/rejected": -281.51995849609375, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.2754608690738678, |
|
"rewards/margins": 0.19722957909107208, |
|
"rewards/rejected": -0.47269049286842346, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 8.035721778869629, |
|
"learning_rate": 3.482502560897194e-07, |
|
"logits/chosen": -2.7719411849975586, |
|
"logits/rejected": -2.762267589569092, |
|
"logps/chosen": -256.39263916015625, |
|
"logps/rejected": -276.6297607421875, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.19001971185207367, |
|
"rewards/margins": 0.172675222158432, |
|
"rewards/rejected": -0.3626949191093445, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43705836168542267, |
|
"grad_norm": 4.791623115539551, |
|
"learning_rate": 3.4614553335304403e-07, |
|
"logits/chosen": -2.8094491958618164, |
|
"logits/rejected": -2.7578389644622803, |
|
"logps/chosen": -303.371337890625, |
|
"logps/rejected": -291.80615234375, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.12800468504428864, |
|
"rewards/margins": 0.26551762223243713, |
|
"rewards/rejected": -0.39352232217788696, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 7.589243412017822, |
|
"learning_rate": 3.440327824920022e-07, |
|
"logits/chosen": -2.7957282066345215, |
|
"logits/rejected": -2.775707483291626, |
|
"logps/chosen": -309.8748474121094, |
|
"logps/rejected": -299.0494384765625, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08200428634881973, |
|
"rewards/margins": 0.3152574598789215, |
|
"rewards/rejected": -0.39726167917251587, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44229259356189476, |
|
"grad_norm": 6.186291694641113, |
|
"learning_rate": 3.4191217992068287e-07, |
|
"logits/chosen": -2.8362536430358887, |
|
"logits/rejected": -2.8137047290802, |
|
"logps/chosen": -306.2242431640625, |
|
"logps/rejected": -284.80548095703125, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.115182064473629, |
|
"rewards/margins": 0.25850868225097656, |
|
"rewards/rejected": -0.37369078397750854, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 12.576449394226074, |
|
"learning_rate": 3.3978390270879056e-07, |
|
"logits/chosen": -2.7859883308410645, |
|
"logits/rejected": -2.7761070728302, |
|
"logps/chosen": -251.69168090820312, |
|
"logps/rejected": -273.64825439453125, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.23590262234210968, |
|
"rewards/margins": 0.1843734234571457, |
|
"rewards/rejected": -0.42027607560157776, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"eval_logits/chosen": -2.818115234375, |
|
"eval_logits/rejected": -2.7960946559906006, |
|
"eval_logps/chosen": -304.2535095214844, |
|
"eval_logps/rejected": -308.0869140625, |
|
"eval_loss": 0.6060847043991089, |
|
"eval_rewards/accuracies": 0.6924999952316284, |
|
"eval_rewards/chosen": -0.21515871584415436, |
|
"eval_rewards/margins": 0.2513309419155121, |
|
"eval_rewards/rejected": -0.46648964285850525, |
|
"eval_runtime": 691.2139, |
|
"eval_samples_per_second": 2.893, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4475268254383669, |
|
"grad_norm": 8.074392318725586, |
|
"learning_rate": 3.376481285668599e-07, |
|
"logits/chosen": -2.8055875301361084, |
|
"logits/rejected": -2.8101181983947754, |
|
"logps/chosen": -259.6014404296875, |
|
"logps/rejected": -299.0648193359375, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.21092364192008972, |
|
"rewards/margins": 0.25584885478019714, |
|
"rewards/rejected": -0.4667724668979645, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 9.234480857849121, |
|
"learning_rate": 3.355050358314172e-07, |
|
"logits/chosen": -2.838655948638916, |
|
"logits/rejected": -2.825796604156494, |
|
"logps/chosen": -299.0382995605469, |
|
"logps/rejected": -306.70733642578125, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14200787246227264, |
|
"rewards/margins": 0.2596356272697449, |
|
"rewards/rejected": -0.40164345502853394, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45276105731483907, |
|
"grad_norm": 6.1853437423706055, |
|
"learning_rate": 3.33354803450089e-07, |
|
"logits/chosen": -2.745539426803589, |
|
"logits/rejected": -2.7465980052948, |
|
"logps/chosen": -298.8321533203125, |
|
"logps/rejected": -300.1834411621094, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.14898642897605896, |
|
"rewards/margins": 0.23417282104492188, |
|
"rewards/rejected": -0.38315925002098083, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 3.701824426651001, |
|
"learning_rate": 3.311976109666605e-07, |
|
"logits/chosen": -2.762765407562256, |
|
"logits/rejected": -2.745163917541504, |
|
"logps/chosen": -306.2688293457031, |
|
"logps/rejected": -297.1578369140625, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11404751241207123, |
|
"rewards/margins": 0.22985681891441345, |
|
"rewards/rejected": -0.3439043462276459, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45799528919131116, |
|
"grad_norm": 5.698086738586426, |
|
"learning_rate": 3.2903363850608317e-07, |
|
"logits/chosen": -2.8657941818237305, |
|
"logits/rejected": -2.8256325721740723, |
|
"logps/chosen": -286.952392578125, |
|
"logps/rejected": -288.02484130859375, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.22551126778125763, |
|
"rewards/margins": 0.23173291981220245, |
|
"rewards/rejected": -0.45724421739578247, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 7.6980085372924805, |
|
"learning_rate": 3.2686306675943477e-07, |
|
"logits/chosen": -2.792118549346924, |
|
"logits/rejected": -2.8060059547424316, |
|
"logps/chosen": -294.06951904296875, |
|
"logps/rejected": -291.16302490234375, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.20452764630317688, |
|
"rewards/margins": 0.24044232070446014, |
|
"rewards/rejected": -0.44496995210647583, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4632295210677833, |
|
"grad_norm": 4.300843238830566, |
|
"learning_rate": 3.2468607696883145e-07, |
|
"logits/chosen": -2.7653212547302246, |
|
"logits/rejected": -2.756118059158325, |
|
"logps/chosen": -298.01544189453125, |
|
"logps/rejected": -333.34234619140625, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2969765067100525, |
|
"rewards/margins": 0.29465410113334656, |
|
"rewards/rejected": -0.5916305780410767, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 9.618111610412598, |
|
"learning_rate": 3.2250285091229435e-07, |
|
"logits/chosen": -2.825916290283203, |
|
"logits/rejected": -2.8047428131103516, |
|
"logps/chosen": -277.54571533203125, |
|
"logps/rejected": -286.90704345703125, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.2800549864768982, |
|
"rewards/margins": 0.20103518664836884, |
|
"rewards/rejected": -0.4810902178287506, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4684637529442554, |
|
"grad_norm": 15.666852951049805, |
|
"learning_rate": 3.2031357088857083e-07, |
|
"logits/chosen": -2.8130288124084473, |
|
"logits/rejected": -2.8077621459960938, |
|
"logps/chosen": -317.0379333496094, |
|
"logps/rejected": -347.8671569824219, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.24338212609291077, |
|
"rewards/margins": 0.24569590389728546, |
|
"rewards/rejected": -0.4890781044960022, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 6.9462571144104, |
|
"learning_rate": 3.1811841970191267e-07, |
|
"logits/chosen": -2.736687183380127, |
|
"logits/rejected": -2.714433193206787, |
|
"logps/chosen": -264.3397521972656, |
|
"logps/rejected": -324.6456604003906, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.18001236021518707, |
|
"rewards/margins": 0.31897181272506714, |
|
"rewards/rejected": -0.4989841878414154, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"eval_logits/chosen": -2.8173904418945312, |
|
"eval_logits/rejected": -2.7949471473693848, |
|
"eval_logps/chosen": -296.00537109375, |
|
"eval_logps/rejected": -299.93682861328125, |
|
"eval_loss": 0.6049584746360779, |
|
"eval_rewards/accuracies": 0.6915000081062317, |
|
"eval_rewards/chosen": -0.1326776146888733, |
|
"eval_rewards/margins": 0.25231143832206726, |
|
"eval_rewards/rejected": -0.38498908281326294, |
|
"eval_runtime": 691.5153, |
|
"eval_samples_per_second": 2.892, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47369798482072756, |
|
"grad_norm": 4.673962116241455, |
|
"learning_rate": 3.1591758064681257e-07, |
|
"logits/chosen": -2.7477469444274902, |
|
"logits/rejected": -2.7178540229797363, |
|
"logps/chosen": -282.83074951171875, |
|
"logps/rejected": -272.26715087890625, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.11454129219055176, |
|
"rewards/margins": 0.27904239296913147, |
|
"rewards/rejected": -0.3935837149620056, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 7.684245586395264, |
|
"learning_rate": 3.13711237492698e-07, |
|
"logits/chosen": -2.7976129055023193, |
|
"logits/rejected": -2.7869057655334473, |
|
"logps/chosen": -313.35540771484375, |
|
"logps/rejected": -318.04559326171875, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1507539302110672, |
|
"rewards/margins": 0.1945343315601349, |
|
"rewards/rejected": -0.3452882170677185, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4789322166971997, |
|
"grad_norm": 4.426579475402832, |
|
"learning_rate": 3.1149957446858767e-07, |
|
"logits/chosen": -2.791010618209839, |
|
"logits/rejected": -2.807931423187256, |
|
"logps/chosen": -277.4505310058594, |
|
"logps/rejected": -279.3646240234375, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.12662403285503387, |
|
"rewards/margins": 0.16396556794643402, |
|
"rewards/rejected": -0.2905896306037903, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 5.900054931640625, |
|
"learning_rate": 3.0928277624770736e-07, |
|
"logits/chosen": -2.843986988067627, |
|
"logits/rejected": -2.823529005050659, |
|
"logps/chosen": -312.50799560546875, |
|
"logps/rejected": -315.56402587890625, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.0948447436094284, |
|
"rewards/margins": 0.32227185368537903, |
|
"rewards/rejected": -0.41711658239364624, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4841664485736718, |
|
"grad_norm": 4.000248908996582, |
|
"learning_rate": 3.0706102793207073e-07, |
|
"logits/chosen": -2.8290603160858154, |
|
"logits/rejected": -2.8024706840515137, |
|
"logps/chosen": -316.80023193359375, |
|
"logps/rejected": -323.507080078125, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.1260642558336258, |
|
"rewards/margins": 0.2963547706604004, |
|
"rewards/rejected": -0.422419011592865, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 7.178162574768066, |
|
"learning_rate": 3.048345150370226e-07, |
|
"logits/chosen": -2.8230552673339844, |
|
"logits/rejected": -2.817823886871338, |
|
"logps/chosen": -320.08123779296875, |
|
"logps/rejected": -328.2519836425781, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1783401370048523, |
|
"rewards/margins": 0.27760833501815796, |
|
"rewards/rejected": -0.45594844222068787, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48940068045014395, |
|
"grad_norm": 5.042900562286377, |
|
"learning_rate": 3.0260342347574913e-07, |
|
"logits/chosen": -2.809600353240967, |
|
"logits/rejected": -2.78784441947937, |
|
"logps/chosen": -304.2792053222656, |
|
"logps/rejected": -314.709716796875, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1620454490184784, |
|
"rewards/margins": 0.3016073703765869, |
|
"rewards/rejected": -0.4636527895927429, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 6.708124160766602, |
|
"learning_rate": 3.0036793954375357e-07, |
|
"logits/chosen": -2.840010643005371, |
|
"logits/rejected": -2.820410966873169, |
|
"logps/chosen": -301.98583984375, |
|
"logps/rejected": -291.33465576171875, |
|
"loss": 0.5776, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15946264564990997, |
|
"rewards/margins": 0.32609638571739197, |
|
"rewards/rejected": -0.48555904626846313, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49463491232661605, |
|
"grad_norm": 4.842483043670654, |
|
"learning_rate": 2.9812824990330085e-07, |
|
"logits/chosen": -2.8116726875305176, |
|
"logits/rejected": -2.8013501167297363, |
|
"logps/chosen": -312.96807861328125, |
|
"logps/rejected": -315.23675537109375, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.20859690010547638, |
|
"rewards/margins": 0.28837090730667114, |
|
"rewards/rejected": -0.4969678521156311, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 11.47492790222168, |
|
"learning_rate": 2.958845415678316e-07, |
|
"logits/chosen": -2.8100364208221436, |
|
"logits/rejected": -2.7813189029693604, |
|
"logps/chosen": -317.1954650878906, |
|
"logps/rejected": -327.9840087890625, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.21498079597949982, |
|
"rewards/margins": 0.32284659147262573, |
|
"rewards/rejected": -0.5378273725509644, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"eval_logits/chosen": -2.8176026344299316, |
|
"eval_logits/rejected": -2.7953593730926514, |
|
"eval_logps/chosen": -304.433349609375, |
|
"eval_logps/rejected": -310.2669677734375, |
|
"eval_loss": 0.6012681722640991, |
|
"eval_rewards/accuracies": 0.6965000033378601, |
|
"eval_rewards/chosen": -0.2169574648141861, |
|
"eval_rewards/margins": 0.27133309841156006, |
|
"eval_rewards/rejected": -0.4882905185222626, |
|
"eval_runtime": 691.3293, |
|
"eval_samples_per_second": 2.893, |
|
"eval_steps_per_second": 0.362, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4998691442030882, |
|
"grad_norm": 8.036276817321777, |
|
"learning_rate": 2.936370018863459e-07, |
|
"logits/chosen": -2.833437442779541, |
|
"logits/rejected": -2.8240761756896973, |
|
"logps/chosen": -301.29473876953125, |
|
"logps/rejected": -287.30487060546875, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2125242054462433, |
|
"rewards/margins": 0.2442711889743805, |
|
"rewards/rejected": -0.4567953944206238, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 6.088084697723389, |
|
"learning_rate": 2.913858185277605e-07, |
|
"logits/chosen": -2.793074131011963, |
|
"logits/rejected": -2.7879836559295654, |
|
"logps/chosen": -291.63409423828125, |
|
"logps/rejected": -303.8699035644531, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.14563243091106415, |
|
"rewards/margins": 0.27376314997673035, |
|
"rewards/rejected": -0.4193955361843109, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5051033760795604, |
|
"grad_norm": 6.633253574371338, |
|
"learning_rate": 2.89131179465238e-07, |
|
"logits/chosen": -2.763582706451416, |
|
"logits/rejected": -2.7273335456848145, |
|
"logps/chosen": -300.27764892578125, |
|
"logps/rejected": -291.0055236816406, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12304127216339111, |
|
"rewards/margins": 0.3036150336265564, |
|
"rewards/rejected": -0.4266563355922699, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 4.170144557952881, |
|
"learning_rate": 2.8687327296049125e-07, |
|
"logits/chosen": -2.803448438644409, |
|
"logits/rejected": -2.7855215072631836, |
|
"logps/chosen": -287.71673583984375, |
|
"logps/rejected": -312.64544677734375, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14048686623573303, |
|
"rewards/margins": 0.2633481025695801, |
|
"rewards/rejected": -0.4038349688053131, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5103376079560324, |
|
"grad_norm": 4.711779594421387, |
|
"learning_rate": 2.846122875480637e-07, |
|
"logits/chosen": -2.823185682296753, |
|
"logits/rejected": -2.7931466102600098, |
|
"logps/chosen": -301.4597473144531, |
|
"logps/rejected": -299.9159851074219, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10608525574207306, |
|
"rewards/margins": 0.25272199511528015, |
|
"rewards/rejected": -0.3588072657585144, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 5.881545543670654, |
|
"learning_rate": 2.8234841201958647e-07, |
|
"logits/chosen": -2.8165388107299805, |
|
"logits/rejected": -2.784043550491333, |
|
"logps/chosen": -311.29217529296875, |
|
"logps/rejected": -301.19964599609375, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1093025654554367, |
|
"rewards/margins": 0.299915611743927, |
|
"rewards/rejected": -0.4092181622982025, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5155718398325045, |
|
"grad_norm": 10.640946388244629, |
|
"learning_rate": 2.800818354080148e-07, |
|
"logits/chosen": -2.7974326610565186, |
|
"logits/rejected": -2.7710323333740234, |
|
"logps/chosen": -303.19610595703125, |
|
"logps/rejected": -281.1106872558594, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13868093490600586, |
|
"rewards/margins": 0.2444918155670166, |
|
"rewards/rejected": -0.38317275047302246, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 5.855273246765137, |
|
"learning_rate": 2.778127469718435e-07, |
|
"logits/chosen": -2.751603364944458, |
|
"logits/rejected": -2.7628543376922607, |
|
"logps/chosen": -261.6673278808594, |
|
"logps/rejected": -309.0796813964844, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1476416289806366, |
|
"rewards/margins": 0.2927255630493164, |
|
"rewards/rejected": -0.4403671622276306, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5208060717089767, |
|
"grad_norm": 5.992628574371338, |
|
"learning_rate": 2.755413361793039e-07, |
|
"logits/chosen": -2.7673847675323486, |
|
"logits/rejected": -2.7404510974884033, |
|
"logps/chosen": -280.890869140625, |
|
"logps/rejected": -294.01092529296875, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15447109937667847, |
|
"rewards/margins": 0.2593531310558319, |
|
"rewards/rejected": -0.4138242304325104, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 6.741150379180908, |
|
"learning_rate": 2.7326779269254356e-07, |
|
"logits/chosen": -2.826737880706787, |
|
"logits/rejected": -2.811283588409424, |
|
"logps/chosen": -320.9913024902344, |
|
"logps/rejected": -290.5726318359375, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14564435184001923, |
|
"rewards/margins": 0.29357942938804626, |
|
"rewards/rejected": -0.4392237663269043, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -2.812201976776123, |
|
"eval_logits/rejected": -2.7902560234069824, |
|
"eval_logps/chosen": -303.8027648925781, |
|
"eval_logps/rejected": -310.42926025390625, |
|
"eval_loss": 0.5991718173027039, |
|
"eval_rewards/accuracies": 0.6995000243186951, |
|
"eval_rewards/chosen": -0.21065115928649902, |
|
"eval_rewards/margins": 0.27926215529441833, |
|
"eval_rewards/rejected": -0.48991334438323975, |
|
"eval_runtime": 691.9553, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.361, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5260403035854488, |
|
"grad_norm": 5.159753322601318, |
|
"learning_rate": 2.709923063517895e-07, |
|
"logits/chosen": -2.770754337310791, |
|
"logits/rejected": -2.7877042293548584, |
|
"logps/chosen": -297.4669494628906, |
|
"logps/rejected": -326.15008544921875, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.18324916064739227, |
|
"rewards/margins": 0.3264145255088806, |
|
"rewards/rejected": -0.5096637010574341, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 9.780900001525879, |
|
"learning_rate": 2.68715067159496e-07, |
|
"logits/chosen": -2.804417133331299, |
|
"logits/rejected": -2.7843241691589355, |
|
"logps/chosen": -287.03619384765625, |
|
"logps/rejected": -296.3020324707031, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.18021352589130402, |
|
"rewards/margins": 0.30431440472602844, |
|
"rewards/rejected": -0.4845278859138489, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5312745354619209, |
|
"grad_norm": 7.88455867767334, |
|
"learning_rate": 2.664362652644806e-07, |
|
"logits/chosen": -2.820744514465332, |
|
"logits/rejected": -2.8191521167755127, |
|
"logps/chosen": -334.691650390625, |
|
"logps/rejected": -322.51885986328125, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.22317573428153992, |
|
"rewards/margins": 0.33498162031173706, |
|
"rewards/rejected": -0.5581573247909546, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 6.620345115661621, |
|
"learning_rate": 2.6415609094604555e-07, |
|
"logits/chosen": -2.802522659301758, |
|
"logits/rejected": -2.8061249256134033, |
|
"logps/chosen": -310.2366638183594, |
|
"logps/rejected": -317.20941162109375, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.22533388435840607, |
|
"rewards/margins": 0.28193774819374084, |
|
"rewards/rejected": -0.5072715878486633, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5365087673383931, |
|
"grad_norm": 8.580389022827148, |
|
"learning_rate": 2.618747345980904e-07, |
|
"logits/chosen": -2.8094029426574707, |
|
"logits/rejected": -2.768106460571289, |
|
"logps/chosen": -293.4418029785156, |
|
"logps/rejected": -266.50897216796875, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.28857478499412537, |
|
"rewards/margins": 0.2732298970222473, |
|
"rewards/rejected": -0.5618046522140503, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 11.197132110595703, |
|
"learning_rate": 2.595923867132136e-07, |
|
"logits/chosen": -2.8401012420654297, |
|
"logits/rejected": -2.835894823074341, |
|
"logps/chosen": -327.6039733886719, |
|
"logps/rejected": -335.93634033203125, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.29747676849365234, |
|
"rewards/margins": 0.3320815861225128, |
|
"rewards/rejected": -0.6295583844184875, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5417429992148652, |
|
"grad_norm": 7.386964797973633, |
|
"learning_rate": 2.5730923786680667e-07, |
|
"logits/chosen": -2.820725917816162, |
|
"logits/rejected": -2.821699619293213, |
|
"logps/chosen": -294.2755432128906, |
|
"logps/rejected": -329.28900146484375, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.28539037704467773, |
|
"rewards/margins": 0.27198493480682373, |
|
"rewards/rejected": -0.5573753714561462, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 10.91450023651123, |
|
"learning_rate": 2.5502547870114135e-07, |
|
"logits/chosen": -2.798468589782715, |
|
"logits/rejected": -2.764756441116333, |
|
"logps/chosen": -296.8208923339844, |
|
"logps/rejected": -290.93609619140625, |
|
"loss": 0.6123, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.25504210591316223, |
|
"rewards/margins": 0.26738548278808594, |
|
"rewards/rejected": -0.5224276185035706, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5469772310913373, |
|
"grad_norm": 9.419450759887695, |
|
"learning_rate": 2.527412999094506e-07, |
|
"logits/chosen": -2.7591891288757324, |
|
"logits/rejected": -2.7384586334228516, |
|
"logps/chosen": -340.7040100097656, |
|
"logps/rejected": -353.3229064941406, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2267749011516571, |
|
"rewards/margins": 0.2946481704711914, |
|
"rewards/rejected": -0.5214229822158813, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 9.121070861816406, |
|
"learning_rate": 2.5045689222000636e-07, |
|
"logits/chosen": -2.748777151107788, |
|
"logits/rejected": -2.737816333770752, |
|
"logps/chosen": -279.33941650390625, |
|
"logps/rejected": -290.88262939453125, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.20830078423023224, |
|
"rewards/margins": 0.28861740231513977, |
|
"rewards/rejected": -0.4969182014465332, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"eval_logits/chosen": -2.8085484504699707, |
|
"eval_logits/rejected": -2.786346673965454, |
|
"eval_logps/chosen": -306.4640808105469, |
|
"eval_logps/rejected": -313.952880859375, |
|
"eval_loss": 0.5981019139289856, |
|
"eval_rewards/accuracies": 0.7024999856948853, |
|
"eval_rewards/chosen": -0.23726463317871094, |
|
"eval_rewards/margins": 0.2878848612308502, |
|
"eval_rewards/rejected": -0.5251494646072388, |
|
"eval_runtime": 690.4278, |
|
"eval_samples_per_second": 2.897, |
|
"eval_steps_per_second": 0.362, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5522114629678094, |
|
"grad_norm": 7.360952854156494, |
|
"learning_rate": 2.481724463801933e-07, |
|
"logits/chosen": -2.7974154949188232, |
|
"logits/rejected": -2.7778165340423584, |
|
"logps/chosen": -320.70465087890625, |
|
"logps/rejected": -308.23455810546875, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.25429460406303406, |
|
"rewards/margins": 0.29730120301246643, |
|
"rewards/rejected": -0.5515958070755005, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 9.077162742614746, |
|
"learning_rate": 2.4588815314058154e-07, |
|
"logits/chosen": -2.7863690853118896, |
|
"logits/rejected": -2.787247896194458, |
|
"logps/chosen": -283.7870788574219, |
|
"logps/rejected": -277.558837890625, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.24108314514160156, |
|
"rewards/margins": 0.27977603673934937, |
|
"rewards/rejected": -0.5208591818809509, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5574456948442816, |
|
"grad_norm": 6.194889545440674, |
|
"learning_rate": 2.4360420323899917e-07, |
|
"logits/chosen": -2.7870755195617676, |
|
"logits/rejected": -2.779362916946411, |
|
"logps/chosen": -321.5159606933594, |
|
"logps/rejected": -313.3367614746094, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.25045931339263916, |
|
"rewards/margins": 0.27981314063072205, |
|
"rewards/rejected": -0.5302724242210388, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 9.01162338256836, |
|
"learning_rate": 2.4132078738460583e-07, |
|
"logits/chosen": -2.821700096130371, |
|
"logits/rejected": -2.7977004051208496, |
|
"logps/chosen": -299.77734375, |
|
"logps/rejected": -288.15472412109375, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2075999230146408, |
|
"rewards/margins": 0.2872273027896881, |
|
"rewards/rejected": -0.49482718110084534, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5626799267207537, |
|
"grad_norm": 8.978148460388184, |
|
"learning_rate": 2.390380962419682e-07, |
|
"logits/chosen": -2.7910008430480957, |
|
"logits/rejected": -2.7853500843048096, |
|
"logps/chosen": -271.1761474609375, |
|
"logps/rejected": -258.0618896484375, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2159349024295807, |
|
"rewards/margins": 0.2157304286956787, |
|
"rewards/rejected": -0.4316653609275818, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 10.330108642578125, |
|
"learning_rate": 2.3675632041513977e-07, |
|
"logits/chosen": -2.8272249698638916, |
|
"logits/rejected": -2.781740427017212, |
|
"logps/chosen": -321.1408996582031, |
|
"logps/rejected": -290.31451416015625, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1839137077331543, |
|
"rewards/margins": 0.36078041791915894, |
|
"rewards/rejected": -0.5446941256523132, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5679141585972258, |
|
"grad_norm": 4.827859401702881, |
|
"learning_rate": 2.344756504317453e-07, |
|
"logits/chosen": -2.7731990814208984, |
|
"logits/rejected": -2.739841938018799, |
|
"logps/chosen": -311.63385009765625, |
|
"logps/rejected": -300.05657958984375, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.37105852365493774, |
|
"rewards/margins": 0.2651851773262024, |
|
"rewards/rejected": -0.6362437009811401, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 7.324320316314697, |
|
"learning_rate": 2.3219627672707237e-07, |
|
"logits/chosen": -2.7636940479278564, |
|
"logits/rejected": -2.7629504203796387, |
|
"logps/chosen": -312.3614196777344, |
|
"logps/rejected": -291.49920654296875, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.40163812041282654, |
|
"rewards/margins": 0.2354915589094162, |
|
"rewards/rejected": -0.6371296644210815, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.573148390473698, |
|
"grad_norm": 9.793487548828125, |
|
"learning_rate": 2.2991838962816918e-07, |
|
"logits/chosen": -2.760166645050049, |
|
"logits/rejected": -2.7421138286590576, |
|
"logps/chosen": -309.69378662109375, |
|
"logps/rejected": -330.1057434082031, |
|
"loss": 0.6189, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.391974538564682, |
|
"rewards/margins": 0.23559853434562683, |
|
"rewards/rejected": -0.6275731325149536, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 4.884433746337891, |
|
"learning_rate": 2.2764217933795297e-07, |
|
"logits/chosen": -2.7735462188720703, |
|
"logits/rejected": -2.7576115131378174, |
|
"logps/chosen": -306.01983642578125, |
|
"logps/rejected": -319.36273193359375, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.28672754764556885, |
|
"rewards/margins": 0.3387922942638397, |
|
"rewards/rejected": -0.625519871711731, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"eval_logits/chosen": -2.806988000869751, |
|
"eval_logits/rejected": -2.7848920822143555, |
|
"eval_logps/chosen": -309.6146240234375, |
|
"eval_logps/rejected": -317.14105224609375, |
|
"eval_loss": 0.5989395976066589, |
|
"eval_rewards/accuracies": 0.6970000267028809, |
|
"eval_rewards/chosen": -0.26876989006996155, |
|
"eval_rewards/margins": 0.28826138377189636, |
|
"eval_rewards/rejected": -0.5570313334465027, |
|
"eval_runtime": 692.0182, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.361, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5783826223501701, |
|
"grad_norm": 5.080691337585449, |
|
"learning_rate": 2.253678359193278e-07, |
|
"logits/chosen": -2.8626627922058105, |
|
"logits/rejected": -2.8227312564849854, |
|
"logps/chosen": -323.10284423828125, |
|
"logps/rejected": -324.9154968261719, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.28973332047462463, |
|
"rewards/margins": 0.24134087562561035, |
|
"rewards/rejected": -0.5310741662979126, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 8.136847496032715, |
|
"learning_rate": 2.230955492793149e-07, |
|
"logits/chosen": -2.7363781929016113, |
|
"logits/rejected": -2.747398853302002, |
|
"logps/chosen": -315.01092529296875, |
|
"logps/rejected": -321.312744140625, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2270394265651703, |
|
"rewards/margins": 0.22412936389446259, |
|
"rewards/rejected": -0.4511687755584717, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5836168542266422, |
|
"grad_norm": 3.2636797428131104, |
|
"learning_rate": 2.2082550915319468e-07, |
|
"logits/chosen": -2.746173858642578, |
|
"logits/rejected": -2.7479488849639893, |
|
"logps/chosen": -311.60443115234375, |
|
"logps/rejected": -304.00933837890625, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.16526171565055847, |
|
"rewards/margins": 0.31148332357406616, |
|
"rewards/rejected": -0.47674503922462463, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 7.513117790222168, |
|
"learning_rate": 2.1855790508866433e-07, |
|
"logits/chosen": -2.7626214027404785, |
|
"logits/rejected": -2.766356945037842, |
|
"logps/chosen": -345.93560791015625, |
|
"logps/rejected": -345.16632080078125, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.19639845192432404, |
|
"rewards/margins": 0.2772556245326996, |
|
"rewards/rejected": -0.473654180765152, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5888510861031143, |
|
"grad_norm": 4.226502418518066, |
|
"learning_rate": 2.162929264300107e-07, |
|
"logits/chosen": -2.7443809509277344, |
|
"logits/rejected": -2.740731716156006, |
|
"logps/chosen": -298.61883544921875, |
|
"logps/rejected": -312.0686950683594, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14046551287174225, |
|
"rewards/margins": 0.34373658895492554, |
|
"rewards/rejected": -0.4842020869255066, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 5.33687162399292, |
|
"learning_rate": 2.1403076230230005e-07, |
|
"logits/chosen": -2.767137289047241, |
|
"logits/rejected": -2.7396111488342285, |
|
"logps/chosen": -312.28643798828125, |
|
"logps/rejected": -306.20172119140625, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.19273105263710022, |
|
"rewards/margins": 0.26331207156181335, |
|
"rewards/rejected": -0.45604315400123596, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5940853179795865, |
|
"grad_norm": 9.639008522033691, |
|
"learning_rate": 2.1177160159558596e-07, |
|
"logits/chosen": -2.7518250942230225, |
|
"logits/rejected": -2.7383649349212646, |
|
"logps/chosen": -321.7221374511719, |
|
"logps/rejected": -297.3667297363281, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.21679162979125977, |
|
"rewards/margins": 0.29109686613082886, |
|
"rewards/rejected": -0.5078884959220886, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 6.384767055511475, |
|
"learning_rate": 2.0951563294913734e-07, |
|
"logits/chosen": -2.760425090789795, |
|
"logits/rejected": -2.7438526153564453, |
|
"logps/chosen": -299.39373779296875, |
|
"logps/rejected": -302.9912109375, |
|
"loss": 0.5717, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.20336699485778809, |
|
"rewards/margins": 0.3353096842765808, |
|
"rewards/rejected": -0.5386766791343689, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5993195498560586, |
|
"grad_norm": 6.036366939544678, |
|
"learning_rate": 2.072630447356869e-07, |
|
"logits/chosen": -2.7959117889404297, |
|
"logits/rejected": -2.7956790924072266, |
|
"logps/chosen": -300.03179931640625, |
|
"logps/rejected": -291.49481201171875, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.23898771405220032, |
|
"rewards/margins": 0.26846712827682495, |
|
"rewards/rejected": -0.5074548125267029, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 7.8020195960998535, |
|
"learning_rate": 2.0501402504570232e-07, |
|
"logits/chosen": -2.829082727432251, |
|
"logits/rejected": -2.772502899169922, |
|
"logps/chosen": -318.4316711425781, |
|
"logps/rejected": -315.959716796875, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22740764915943146, |
|
"rewards/margins": 0.3216533958911896, |
|
"rewards/rejected": -0.5490610003471375, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"eval_logits/chosen": -2.80366849899292, |
|
"eval_logits/rejected": -2.7820827960968018, |
|
"eval_logps/chosen": -305.00982666015625, |
|
"eval_logps/rejected": -313.32330322265625, |
|
"eval_loss": 0.5960872769355774, |
|
"eval_rewards/accuracies": 0.6955000162124634, |
|
"eval_rewards/chosen": -0.2227218896150589, |
|
"eval_rewards/margins": 0.2961318790912628, |
|
"eval_rewards/rejected": -0.5188537836074829, |
|
"eval_runtime": 691.9375, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.361, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6045537817325307, |
|
"grad_norm": 12.083107948303223, |
|
"learning_rate": 2.027687616716804e-07, |
|
"logits/chosen": -2.72344970703125, |
|
"logits/rejected": -2.7168376445770264, |
|
"logps/chosen": -268.31243896484375, |
|
"logps/rejected": -255.6737518310547, |
|
"loss": 0.6189, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2189827412366867, |
|
"rewards/margins": 0.24416430294513702, |
|
"rewards/rejected": -0.46314701437950134, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 8.845372200012207, |
|
"learning_rate": 2.005274420924668e-07, |
|
"logits/chosen": -2.790346145629883, |
|
"logits/rejected": -2.778743267059326, |
|
"logps/chosen": -295.9941711425781, |
|
"logps/rejected": -287.6865234375, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.25174736976623535, |
|
"rewards/margins": 0.2748829126358032, |
|
"rewards/rejected": -0.5266302824020386, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6097880136090029, |
|
"grad_norm": 7.964311599731445, |
|
"learning_rate": 1.9829025345760121e-07, |
|
"logits/chosen": -2.7749578952789307, |
|
"logits/rejected": -2.7802319526672363, |
|
"logps/chosen": -315.29290771484375, |
|
"logps/rejected": -332.8951721191406, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.17806461453437805, |
|
"rewards/margins": 0.2752231955528259, |
|
"rewards/rejected": -0.4532877802848816, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 8.214485168457031, |
|
"learning_rate": 1.960573825716911e-07, |
|
"logits/chosen": -2.743821620941162, |
|
"logits/rejected": -2.7305188179016113, |
|
"logps/chosen": -275.1949768066406, |
|
"logps/rejected": -297.45172119140625, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.23889228701591492, |
|
"rewards/margins": 0.29088443517684937, |
|
"rewards/rejected": -0.5297766923904419, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.615022245485475, |
|
"grad_norm": 7.783448696136475, |
|
"learning_rate": 1.9382901587881273e-07, |
|
"logits/chosen": -2.8195502758026123, |
|
"logits/rejected": -2.8172898292541504, |
|
"logps/chosen": -291.1629333496094, |
|
"logps/rejected": -292.11553955078125, |
|
"loss": 0.5555, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.15334704518318176, |
|
"rewards/margins": 0.37875789403915405, |
|
"rewards/rejected": -0.5321049094200134, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 7.713850498199463, |
|
"learning_rate": 1.9160533944694364e-07, |
|
"logits/chosen": -2.802713394165039, |
|
"logits/rejected": -2.763248920440674, |
|
"logps/chosen": -297.48541259765625, |
|
"logps/rejected": -321.0580139160156, |
|
"loss": 0.5661, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1875167191028595, |
|
"rewards/margins": 0.3671106696128845, |
|
"rewards/rejected": -0.5546274185180664, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6202564773619471, |
|
"grad_norm": 7.275653839111328, |
|
"learning_rate": 1.8938653895242602e-07, |
|
"logits/chosen": -2.805842161178589, |
|
"logits/rejected": -2.7778079509735107, |
|
"logps/chosen": -301.32257080078125, |
|
"logps/rejected": -307.5292663574219, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.22137200832366943, |
|
"rewards/margins": 0.3620893061161041, |
|
"rewards/rejected": -0.583461344242096, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 7.8891282081604, |
|
"learning_rate": 1.8717279966446264e-07, |
|
"logits/chosen": -2.702014684677124, |
|
"logits/rejected": -2.6890392303466797, |
|
"logps/chosen": -299.67095947265625, |
|
"logps/rejected": -315.53125, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3092700242996216, |
|
"rewards/margins": 0.2915950417518616, |
|
"rewards/rejected": -0.6008650660514832, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6254907092384192, |
|
"grad_norm": 9.103086471557617, |
|
"learning_rate": 1.8496430642964694e-07, |
|
"logits/chosen": -2.7693662643432617, |
|
"logits/rejected": -2.749218702316284, |
|
"logps/chosen": -320.30596923828125, |
|
"logps/rejected": -322.6269226074219, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2899993658065796, |
|
"rewards/margins": 0.2783369719982147, |
|
"rewards/rejected": -0.5683363676071167, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 8.552151679992676, |
|
"learning_rate": 1.8276124365652855e-07, |
|
"logits/chosen": -2.796008586883545, |
|
"logits/rejected": -2.750042200088501, |
|
"logps/chosen": -308.24066162109375, |
|
"logps/rejected": -318.9580993652344, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.283893346786499, |
|
"rewards/margins": 0.2797131836414337, |
|
"rewards/rejected": -0.5636065602302551, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -2.796116352081299, |
|
"eval_logits/rejected": -2.774383783340454, |
|
"eval_logps/chosen": -309.5652160644531, |
|
"eval_logps/rejected": -318.12506103515625, |
|
"eval_loss": 0.5968618392944336, |
|
"eval_rewards/accuracies": 0.6990000009536743, |
|
"eval_rewards/chosen": -0.2682757079601288, |
|
"eval_rewards/margins": 0.2985955774784088, |
|
"eval_rewards/rejected": -0.5668712258338928, |
|
"eval_runtime": 690.9152, |
|
"eval_samples_per_second": 2.895, |
|
"eval_steps_per_second": 0.362, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6307249411148914, |
|
"grad_norm": 10.884597778320312, |
|
"learning_rate": 1.805637953002149e-07, |
|
"logits/chosen": -2.806243658065796, |
|
"logits/rejected": -2.804234266281128, |
|
"logps/chosen": -287.49090576171875, |
|
"logps/rejected": -287.6014404296875, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.27734607458114624, |
|
"rewards/margins": 0.24837279319763184, |
|
"rewards/rejected": -0.5257189273834229, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 9.013958930969238, |
|
"learning_rate": 1.7837214484701153e-07, |
|
"logits/chosen": -2.7953040599823, |
|
"logits/rejected": -2.7851452827453613, |
|
"logps/chosen": -289.382568359375, |
|
"logps/rejected": -297.02679443359375, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2011108100414276, |
|
"rewards/margins": 0.34568914771080017, |
|
"rewards/rejected": -0.5468000173568726, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6359591729913635, |
|
"grad_norm": 14.238588333129883, |
|
"learning_rate": 1.761864752991004e-07, |
|
"logits/chosen": -2.778735399246216, |
|
"logits/rejected": -2.759908437728882, |
|
"logps/chosen": -295.66241455078125, |
|
"logps/rejected": -312.7738952636719, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.21105961501598358, |
|
"rewards/margins": 0.3268287181854248, |
|
"rewards/rejected": -0.5378884077072144, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 5.6600518226623535, |
|
"learning_rate": 1.7400696915925995e-07, |
|
"logits/chosen": -2.7974464893341064, |
|
"logits/rejected": -2.7732651233673096, |
|
"logps/chosen": -312.24798583984375, |
|
"logps/rejected": -279.251708984375, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.23233290016651154, |
|
"rewards/margins": 0.3078458309173584, |
|
"rewards/rejected": -0.5401787161827087, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6411934048678356, |
|
"grad_norm": 11.058223724365234, |
|
"learning_rate": 1.718338084156254e-07, |
|
"logits/chosen": -2.7382242679595947, |
|
"logits/rejected": -2.727843761444092, |
|
"logps/chosen": -323.4954528808594, |
|
"logps/rejected": -317.99456787109375, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1701376736164093, |
|
"rewards/margins": 0.3507465720176697, |
|
"rewards/rejected": -0.5208842754364014, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 14.676642417907715, |
|
"learning_rate": 1.696671745264937e-07, |
|
"logits/chosen": -2.799201488494873, |
|
"logits/rejected": -2.8146328926086426, |
|
"logps/chosen": -313.3539733886719, |
|
"logps/rejected": -290.71197509765625, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.154522106051445, |
|
"rewards/margins": 0.36096832156181335, |
|
"rewards/rejected": -0.5154904127120972, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6464276367443078, |
|
"grad_norm": 7.134603500366211, |
|
"learning_rate": 1.67507248405171e-07, |
|
"logits/chosen": -2.786536693572998, |
|
"logits/rejected": -2.7716171741485596, |
|
"logps/chosen": -290.3885192871094, |
|
"logps/rejected": -317.96453857421875, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.17861530184745789, |
|
"rewards/margins": 0.2776513695716858, |
|
"rewards/rejected": -0.4562666416168213, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 9.284005165100098, |
|
"learning_rate": 1.6535421040486683e-07, |
|
"logits/chosen": -2.695885181427002, |
|
"logits/rejected": -2.683889150619507, |
|
"logps/chosen": -292.3827209472656, |
|
"logps/rejected": -295.35003662109375, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.20486466586589813, |
|
"rewards/margins": 0.3616489768028259, |
|
"rewards/rejected": -0.5665136575698853, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6516618686207799, |
|
"grad_norm": 11.596046447753906, |
|
"learning_rate": 1.6320824030363456e-07, |
|
"logits/chosen": -2.7673633098602295, |
|
"logits/rejected": -2.7697348594665527, |
|
"logps/chosen": -269.5127868652344, |
|
"logps/rejected": -284.500732421875, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.19040192663669586, |
|
"rewards/margins": 0.32062506675720215, |
|
"rewards/rejected": -0.5110269784927368, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 8.306464195251465, |
|
"learning_rate": 1.6106951728936024e-07, |
|
"logits/chosen": -2.8287737369537354, |
|
"logits/rejected": -2.785698413848877, |
|
"logps/chosen": -290.69586181640625, |
|
"logps/rejected": -315.9652404785156, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.17289450764656067, |
|
"rewards/margins": 0.32756882905960083, |
|
"rewards/rejected": -0.5004633069038391, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"eval_logits/chosen": -2.7979679107666016, |
|
"eval_logits/rejected": -2.776271104812622, |
|
"eval_logps/chosen": -303.76153564453125, |
|
"eval_logps/rejected": -311.8429260253906, |
|
"eval_loss": 0.5962891578674316, |
|
"eval_rewards/accuracies": 0.6974999904632568, |
|
"eval_rewards/chosen": -0.2102394998073578, |
|
"eval_rewards/margins": 0.2938106954097748, |
|
"eval_rewards/rejected": -0.5040501952171326, |
|
"eval_runtime": 692.3854, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.656896100497252, |
|
"grad_norm": 6.3364176750183105, |
|
"learning_rate": 1.5893821994479994e-07, |
|
"logits/chosen": -2.8073089122772217, |
|
"logits/rejected": -2.7984962463378906, |
|
"logps/chosen": -307.6702880859375, |
|
"logps/rejected": -299.78192138671875, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15238206088542938, |
|
"rewards/margins": 0.3230430781841278, |
|
"rewards/rejected": -0.4754251539707184, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 7.475069999694824, |
|
"learning_rate": 1.5681452623266867e-07, |
|
"logits/chosen": -2.788701057434082, |
|
"logits/rejected": -2.7505264282226562, |
|
"logps/chosen": -323.1575012207031, |
|
"logps/rejected": -304.9902038574219, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.1838502436876297, |
|
"rewards/margins": 0.4115122854709625, |
|
"rewards/rejected": -0.5953624844551086, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6621303323737242, |
|
"grad_norm": 9.084112167358398, |
|
"learning_rate": 1.546986134807801e-07, |
|
"logits/chosen": -2.8091278076171875, |
|
"logits/rejected": -2.780764102935791, |
|
"logps/chosen": -293.3882751464844, |
|
"logps/rejected": -309.5545349121094, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.28720271587371826, |
|
"rewards/margins": 0.30004793405532837, |
|
"rewards/rejected": -0.5872506499290466, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 7.817606449127197, |
|
"learning_rate": 1.5259065836724034e-07, |
|
"logits/chosen": -2.7307331562042236, |
|
"logits/rejected": -2.7140753269195557, |
|
"logps/chosen": -290.29443359375, |
|
"logps/rejected": -307.90399169921875, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2654728889465332, |
|
"rewards/margins": 0.2819042205810547, |
|
"rewards/rejected": -0.5473771095275879, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6673645642501963, |
|
"grad_norm": 8.136064529418945, |
|
"learning_rate": 1.5049083690569454e-07, |
|
"logits/chosen": -2.7462635040283203, |
|
"logits/rejected": -2.731522798538208, |
|
"logps/chosen": -279.6645812988281, |
|
"logps/rejected": -303.47857666015625, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2667672336101532, |
|
"rewards/margins": 0.28752660751342773, |
|
"rewards/rejected": -0.5542938113212585, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 5.6162896156311035, |
|
"learning_rate": 1.4839932443063056e-07, |
|
"logits/chosen": -2.7818315029144287, |
|
"logits/rejected": -2.754776954650879, |
|
"logps/chosen": -331.192626953125, |
|
"logps/rejected": -306.44342041015625, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.23324036598205566, |
|
"rewards/margins": 0.33265605568885803, |
|
"rewards/rejected": -0.5658964514732361, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6725987961266684, |
|
"grad_norm": 15.203133583068848, |
|
"learning_rate": 1.46316295582738e-07, |
|
"logits/chosen": -2.755795955657959, |
|
"logits/rejected": -2.745166301727295, |
|
"logps/chosen": -288.94012451171875, |
|
"logps/rejected": -295.92974853515625, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.30726075172424316, |
|
"rewards/margins": 0.21980533003807068, |
|
"rewards/rejected": -0.5270661115646362, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 23.822792053222656, |
|
"learning_rate": 1.4424192429432655e-07, |
|
"logits/chosen": -2.783210515975952, |
|
"logits/rejected": -2.766979694366455, |
|
"logps/chosen": -291.4307556152344, |
|
"logps/rejected": -328.7579040527344, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.18577826023101807, |
|
"rewards/margins": 0.34509676694869995, |
|
"rewards/rejected": -0.5308750867843628, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6778330280031405, |
|
"grad_norm": 9.544054985046387, |
|
"learning_rate": 1.4217638377480158e-07, |
|
"logits/chosen": -2.7744319438934326, |
|
"logits/rejected": -2.7644972801208496, |
|
"logps/chosen": -299.30975341796875, |
|
"logps/rejected": -312.57220458984375, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.23222167789936066, |
|
"rewards/margins": 0.28205937147140503, |
|
"rewards/rejected": -0.5142810344696045, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 7.35859489440918, |
|
"learning_rate": 1.401198464962021e-07, |
|
"logits/chosen": -2.7667133808135986, |
|
"logits/rejected": -2.7541134357452393, |
|
"logps/chosen": -305.63446044921875, |
|
"logps/rejected": -288.49676513671875, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2002829611301422, |
|
"rewards/margins": 0.26447853446006775, |
|
"rewards/rejected": -0.4647614359855652, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"eval_logits/chosen": -2.793254852294922, |
|
"eval_logits/rejected": -2.771672010421753, |
|
"eval_logps/chosen": -301.69635009765625, |
|
"eval_logps/rejected": -309.3417053222656, |
|
"eval_loss": 0.5973595976829529, |
|
"eval_rewards/accuracies": 0.6919999718666077, |
|
"eval_rewards/chosen": -0.18958736956119537, |
|
"eval_rewards/margins": 0.289450466632843, |
|
"eval_rewards/rejected": -0.4790377914905548, |
|
"eval_runtime": 692.1987, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6830672598796127, |
|
"grad_norm": 6.412085056304932, |
|
"learning_rate": 1.3807248417879894e-07, |
|
"logits/chosen": -2.799522638320923, |
|
"logits/rejected": -2.801234483718872, |
|
"logps/chosen": -304.61505126953125, |
|
"logps/rejected": -318.75360107421875, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1567406803369522, |
|
"rewards/margins": 0.35466814041137695, |
|
"rewards/rejected": -0.511408805847168, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 6.595985412597656, |
|
"learning_rate": 1.3603446777675665e-07, |
|
"logits/chosen": -2.7163891792297363, |
|
"logits/rejected": -2.6980533599853516, |
|
"logps/chosen": -301.43170166015625, |
|
"logps/rejected": -309.5948486328125, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.18890248239040375, |
|
"rewards/margins": 0.33902615308761597, |
|
"rewards/rejected": -0.5279285907745361, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6883014917560848, |
|
"grad_norm": 5.626343250274658, |
|
"learning_rate": 1.3400596746385814e-07, |
|
"logits/chosen": -2.785409450531006, |
|
"logits/rejected": -2.7549426555633545, |
|
"logps/chosen": -305.23779296875, |
|
"logps/rejected": -306.29864501953125, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.17120136320590973, |
|
"rewards/margins": 0.3220587372779846, |
|
"rewards/rejected": -0.49326008558273315, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 7.084354400634766, |
|
"learning_rate": 1.3198715261929586e-07, |
|
"logits/chosen": -2.8111932277679443, |
|
"logits/rejected": -2.7792601585388184, |
|
"logps/chosen": -269.24957275390625, |
|
"logps/rejected": -297.8160400390625, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.19386166334152222, |
|
"rewards/margins": 0.37062662839889526, |
|
"rewards/rejected": -0.5644882917404175, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6935357236325569, |
|
"grad_norm": 6.301397800445557, |
|
"learning_rate": 1.299781918135282e-07, |
|
"logits/chosen": -2.780548095703125, |
|
"logits/rejected": -2.7463881969451904, |
|
"logps/chosen": -331.93035888671875, |
|
"logps/rejected": -346.24005126953125, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12747205793857574, |
|
"rewards/margins": 0.4090425372123718, |
|
"rewards/rejected": -0.5365146398544312, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 4.976480007171631, |
|
"learning_rate": 1.279792527942045e-07, |
|
"logits/chosen": -2.7965517044067383, |
|
"logits/rejected": -2.7541985511779785, |
|
"logps/chosen": -308.75946044921875, |
|
"logps/rejected": -333.583251953125, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2170572280883789, |
|
"rewards/margins": 0.3559117913246155, |
|
"rewards/rejected": -0.5729690194129944, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6987699555090291, |
|
"grad_norm": 7.420611381530762, |
|
"learning_rate": 1.259905024721576e-07, |
|
"logits/chosen": -2.7755208015441895, |
|
"logits/rejected": -2.7653794288635254, |
|
"logps/chosen": -297.36810302734375, |
|
"logps/rejected": -308.62139892578125, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.21521492302417755, |
|
"rewards/margins": 0.3440507650375366, |
|
"rewards/rejected": -0.5592657327651978, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 9.432327270507812, |
|
"learning_rate": 1.2401210690746703e-07, |
|
"logits/chosen": -2.7644107341766357, |
|
"logits/rejected": -2.7474875450134277, |
|
"logps/chosen": -305.26129150390625, |
|
"logps/rejected": -300.5979309082031, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.19491124153137207, |
|
"rewards/margins": 0.2979043126106262, |
|
"rewards/rejected": -0.4928155541419983, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7040041873855012, |
|
"grad_norm": 13.687203407287598, |
|
"learning_rate": 1.2204423129559305e-07, |
|
"logits/chosen": -2.803926467895508, |
|
"logits/rejected": -2.8096935749053955, |
|
"logps/chosen": -304.5517272949219, |
|
"logps/rejected": -332.74627685546875, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.21299275755882263, |
|
"rewards/margins": 0.32770127058029175, |
|
"rewards/rejected": -0.540693998336792, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 9.307769775390625, |
|
"learning_rate": 1.2008703995358299e-07, |
|
"logits/chosen": -2.7696948051452637, |
|
"logits/rejected": -2.7626984119415283, |
|
"logps/chosen": -305.66973876953125, |
|
"logps/rejected": -309.4637756347656, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.23966650664806366, |
|
"rewards/margins": 0.3371264636516571, |
|
"rewards/rejected": -0.5767929553985596, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"eval_logits/chosen": -2.7892041206359863, |
|
"eval_logits/rejected": -2.7675600051879883, |
|
"eval_logps/chosen": -307.9026794433594, |
|
"eval_logps/rejected": -317.58642578125, |
|
"eval_loss": 0.5930463671684265, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": -0.2516505718231201, |
|
"eval_rewards/margins": 0.309834361076355, |
|
"eval_rewards/rejected": -0.5614849925041199, |
|
"eval_runtime": 692.1934, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7092384192619733, |
|
"grad_norm": 7.60300874710083, |
|
"learning_rate": 1.1814069630635068e-07, |
|
"logits/chosen": -2.7490410804748535, |
|
"logits/rejected": -2.7561395168304443, |
|
"logps/chosen": -311.02667236328125, |
|
"logps/rejected": -334.8045349121094, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2228337824344635, |
|
"rewards/margins": 0.31492942571640015, |
|
"rewards/rejected": -0.5377631783485413, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 5.55739164352417, |
|
"learning_rate": 1.1620536287303051e-07, |
|
"logits/chosen": -2.7841482162475586, |
|
"logits/rejected": -2.7707200050354004, |
|
"logps/chosen": -330.66802978515625, |
|
"logps/rejected": -324.71453857421875, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.21253342926502228, |
|
"rewards/margins": 0.2718030512332916, |
|
"rewards/rejected": -0.4843364655971527, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7144726511384454, |
|
"grad_norm": 4.946017742156982, |
|
"learning_rate": 1.1428120125340716e-07, |
|
"logits/chosen": -2.771012783050537, |
|
"logits/rejected": -2.751859188079834, |
|
"logps/chosen": -299.06195068359375, |
|
"logps/rejected": -291.7746276855469, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.18322893977165222, |
|
"rewards/margins": 0.4256429076194763, |
|
"rewards/rejected": -0.6088718175888062, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 8.510547637939453, |
|
"learning_rate": 1.123683721144223e-07, |
|
"logits/chosen": -2.773465871810913, |
|
"logits/rejected": -2.750523328781128, |
|
"logps/chosen": -322.75030517578125, |
|
"logps/rejected": -322.23541259765625, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2108650654554367, |
|
"rewards/margins": 0.3147924840450287, |
|
"rewards/rejected": -0.5256575345993042, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7197068830149176, |
|
"grad_norm": 6.666440010070801, |
|
"learning_rate": 1.1046703517675845e-07, |
|
"logits/chosen": -2.792327642440796, |
|
"logits/rejected": -2.780276298522949, |
|
"logps/chosen": -292.0575256347656, |
|
"logps/rejected": -331.8373718261719, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20287561416625977, |
|
"rewards/margins": 0.3353033661842346, |
|
"rewards/rejected": -0.5381789803504944, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 3.823488712310791, |
|
"learning_rate": 1.085773492015028e-07, |
|
"logits/chosen": -2.7709414958953857, |
|
"logits/rejected": -2.7493114471435547, |
|
"logps/chosen": -284.67193603515625, |
|
"logps/rejected": -288.34991455078125, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2005012482404709, |
|
"rewards/margins": 0.4104704260826111, |
|
"rewards/rejected": -0.6109716892242432, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7249411148913897, |
|
"grad_norm": 10.498513221740723, |
|
"learning_rate": 1.0669947197689033e-07, |
|
"logits/chosen": -2.7609269618988037, |
|
"logits/rejected": -2.723078489303589, |
|
"logps/chosen": -316.71929931640625, |
|
"logps/rejected": -321.02239990234375, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2569184899330139, |
|
"rewards/margins": 0.3084716498851776, |
|
"rewards/rejected": -0.5653902292251587, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 9.501131057739258, |
|
"learning_rate": 1.048335603051291e-07, |
|
"logits/chosen": -2.7370448112487793, |
|
"logits/rejected": -2.730591058731079, |
|
"logps/chosen": -329.8760986328125, |
|
"logps/rejected": -340.55413818359375, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2385425865650177, |
|
"rewards/margins": 0.41302841901779175, |
|
"rewards/rejected": -0.6515710353851318, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7301753467678618, |
|
"grad_norm": 9.440362930297852, |
|
"learning_rate": 1.0297976998930663e-07, |
|
"logits/chosen": -2.787727117538452, |
|
"logits/rejected": -2.7839837074279785, |
|
"logps/chosen": -315.8175048828125, |
|
"logps/rejected": -321.4845275878906, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.23409982025623322, |
|
"rewards/margins": 0.4074832797050476, |
|
"rewards/rejected": -0.6415830850601196, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 9.004974365234375, |
|
"learning_rate": 1.0113825582038077e-07, |
|
"logits/chosen": -2.7806646823883057, |
|
"logits/rejected": -2.770219326019287, |
|
"logps/chosen": -309.5851135253906, |
|
"logps/rejected": -321.6380310058594, |
|
"loss": 0.5994, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2802024185657501, |
|
"rewards/margins": 0.2918320596218109, |
|
"rewards/rejected": -0.572034478187561, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -2.785149335861206, |
|
"eval_logits/rejected": -2.7636430263519287, |
|
"eval_logps/chosen": -308.8106689453125, |
|
"eval_logps/rejected": -319.18377685546875, |
|
"eval_loss": 0.5920370221138, |
|
"eval_rewards/accuracies": 0.7045000195503235, |
|
"eval_rewards/chosen": -0.2607303559780121, |
|
"eval_rewards/margins": 0.31672805547714233, |
|
"eval_rewards/rejected": -0.577458381652832, |
|
"eval_runtime": 691.5482, |
|
"eval_samples_per_second": 2.892, |
|
"eval_steps_per_second": 0.362, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.735409578644334, |
|
"grad_norm": 5.153034687042236, |
|
"learning_rate": 9.930917156425475e-08, |
|
"logits/chosen": -2.7953689098358154, |
|
"logits/rejected": -2.7769198417663574, |
|
"logps/chosen": -307.6942443847656, |
|
"logps/rejected": -336.81036376953125, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2690412104129791, |
|
"rewards/margins": 0.3371729254722595, |
|
"rewards/rejected": -0.6062140464782715, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 10.421857833862305, |
|
"learning_rate": 9.749266994893754e-08, |
|
"logits/chosen": -2.7286500930786133, |
|
"logits/rejected": -2.696841239929199, |
|
"logps/chosen": -283.78277587890625, |
|
"logps/rejected": -293.64666748046875, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2909180223941803, |
|
"rewards/margins": 0.21305350959300995, |
|
"rewards/rejected": -0.5039715766906738, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7406438105208061, |
|
"grad_norm": 14.213560104370117, |
|
"learning_rate": 9.568890265179128e-08, |
|
"logits/chosen": -2.7485554218292236, |
|
"logits/rejected": -2.7543232440948486, |
|
"logps/chosen": -308.8101806640625, |
|
"logps/rejected": -305.62347412109375, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2877466678619385, |
|
"rewards/margins": 0.28105878829956055, |
|
"rewards/rejected": -0.568805456161499, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 5.577268600463867, |
|
"learning_rate": 9.389802028686616e-08, |
|
"logits/chosen": -2.7711002826690674, |
|
"logits/rejected": -2.7511260509490967, |
|
"logps/chosen": -308.267822265625, |
|
"logps/rejected": -295.8204650878906, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.292976438999176, |
|
"rewards/margins": 0.21805603802204132, |
|
"rewards/rejected": -0.5110324621200562, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7458780423972782, |
|
"grad_norm": 5.392404556274414, |
|
"learning_rate": 9.212017239232426e-08, |
|
"logits/chosen": -2.7617223262786865, |
|
"logits/rejected": -2.7573046684265137, |
|
"logps/chosen": -312.38421630859375, |
|
"logps/rejected": -330.9461975097656, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.22561874985694885, |
|
"rewards/margins": 0.4286450445652008, |
|
"rewards/rejected": -0.6542637348175049, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 6.394357681274414, |
|
"learning_rate": 9.035550741795328e-08, |
|
"logits/chosen": -2.7431981563568115, |
|
"logits/rejected": -2.7521939277648926, |
|
"logps/chosen": -295.7667541503906, |
|
"logps/rejected": -334.49688720703125, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.21194259822368622, |
|
"rewards/margins": 0.35274478793144226, |
|
"rewards/rejected": -0.5646874308586121, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7511122742737504, |
|
"grad_norm": 9.479743003845215, |
|
"learning_rate": 8.860417271277065e-08, |
|
"logits/chosen": -2.819362163543701, |
|
"logits/rejected": -2.8213016986846924, |
|
"logps/chosen": -308.4556884765625, |
|
"logps/rejected": -324.0565490722656, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.23003943264484406, |
|
"rewards/margins": 0.26295268535614014, |
|
"rewards/rejected": -0.492992103099823, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 9.29710865020752, |
|
"learning_rate": 8.686631451272029e-08, |
|
"logits/chosen": -2.7966079711914062, |
|
"logits/rejected": -2.7735276222229004, |
|
"logps/chosen": -297.5863952636719, |
|
"logps/rejected": -300.37908935546875, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2746056914329529, |
|
"rewards/margins": 0.2642548680305481, |
|
"rewards/rejected": -0.5388606190681458, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7563465061502225, |
|
"grad_norm": 9.630151748657227, |
|
"learning_rate": 8.514207792846168e-08, |
|
"logits/chosen": -2.7753801345825195, |
|
"logits/rejected": -2.775832414627075, |
|
"logps/chosen": -292.93609619140625, |
|
"logps/rejected": -292.79754638671875, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2579854130744934, |
|
"rewards/margins": 0.3091490864753723, |
|
"rewards/rejected": -0.5671344995498657, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 7.0608439445495605, |
|
"learning_rate": 8.343160693325355e-08, |
|
"logits/chosen": -2.7492966651916504, |
|
"logits/rejected": -2.7410671710968018, |
|
"logps/chosen": -293.8484802246094, |
|
"logps/rejected": -324.77001953125, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22494366765022278, |
|
"rewards/margins": 0.3548448979854584, |
|
"rewards/rejected": -0.5797885656356812, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"eval_logits/chosen": -2.783421277999878, |
|
"eval_logits/rejected": -2.7619221210479736, |
|
"eval_logps/chosen": -308.137939453125, |
|
"eval_logps/rejected": -318.6510925292969, |
|
"eval_loss": 0.5913165211677551, |
|
"eval_rewards/accuracies": 0.7055000066757202, |
|
"eval_rewards/chosen": -0.2540031671524048, |
|
"eval_rewards/margins": 0.3181284964084625, |
|
"eval_rewards/rejected": -0.5721316933631897, |
|
"eval_runtime": 692.0731, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.361, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7615807380266946, |
|
"grad_norm": 7.802112579345703, |
|
"learning_rate": 8.173504435093173e-08, |
|
"logits/chosen": -2.7537245750427246, |
|
"logits/rejected": -2.726355791091919, |
|
"logps/chosen": -290.5617980957031, |
|
"logps/rejected": -287.50799560546875, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2531769275665283, |
|
"rewards/margins": 0.35345658659935, |
|
"rewards/rejected": -0.6066334843635559, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 9.018595695495605, |
|
"learning_rate": 8.005253184398359e-08, |
|
"logits/chosen": -2.7553019523620605, |
|
"logits/rejected": -2.745943546295166, |
|
"logps/chosen": -320.03265380859375, |
|
"logps/rejected": -340.8626403808594, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.24576649069786072, |
|
"rewards/margins": 0.28911441564559937, |
|
"rewards/rejected": -0.5348808765411377, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7668149699031667, |
|
"grad_norm": 6.111194133758545, |
|
"learning_rate": 7.838420990171926e-08, |
|
"logits/chosen": -2.789515972137451, |
|
"logits/rejected": -2.7570273876190186, |
|
"logps/chosen": -310.61224365234375, |
|
"logps/rejected": -312.87152099609375, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.22255787253379822, |
|
"rewards/margins": 0.31383711099624634, |
|
"rewards/rejected": -0.5363950133323669, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 5.815800666809082, |
|
"learning_rate": 7.673021782854083e-08, |
|
"logits/chosen": -2.69783091545105, |
|
"logits/rejected": -2.6870310306549072, |
|
"logps/chosen": -311.68963623046875, |
|
"logps/rejected": -288.39215087890625, |
|
"loss": 0.5979, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2525468170642853, |
|
"rewards/margins": 0.31668832898139954, |
|
"rewards/rejected": -0.5692351460456848, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7720492017796389, |
|
"grad_norm": 10.589014053344727, |
|
"learning_rate": 7.509069373231039e-08, |
|
"logits/chosen": -2.742522716522217, |
|
"logits/rejected": -2.7218940258026123, |
|
"logps/chosen": -293.1689453125, |
|
"logps/rejected": -302.7828369140625, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24362894892692566, |
|
"rewards/margins": 0.29250627756118774, |
|
"rewards/rejected": -0.536135196685791, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 8.408040046691895, |
|
"learning_rate": 7.346577451281821e-08, |
|
"logits/chosen": -2.7488350868225098, |
|
"logits/rejected": -2.7583699226379395, |
|
"logps/chosen": -308.5254821777344, |
|
"logps/rejected": -321.6301574707031, |
|
"loss": 0.578, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.26960092782974243, |
|
"rewards/margins": 0.3470562696456909, |
|
"rewards/rejected": -0.6166571378707886, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.777283433656111, |
|
"grad_norm": 7.626022815704346, |
|
"learning_rate": 7.185559585035136e-08, |
|
"logits/chosen": -2.7650535106658936, |
|
"logits/rejected": -2.736623764038086, |
|
"logps/chosen": -327.43792724609375, |
|
"logps/rejected": -349.74005126953125, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28370755910873413, |
|
"rewards/margins": 0.38453495502471924, |
|
"rewards/rejected": -0.6682425737380981, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 8.664432525634766, |
|
"learning_rate": 7.026029219436502e-08, |
|
"logits/chosen": -2.7403178215026855, |
|
"logits/rejected": -2.726973533630371, |
|
"logps/chosen": -296.88629150390625, |
|
"logps/rejected": -320.1584167480469, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2508087158203125, |
|
"rewards/margins": 0.3491096496582031, |
|
"rewards/rejected": -0.5999183058738708, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7825176655325831, |
|
"grad_norm": 7.381548881530762, |
|
"learning_rate": 6.867999675225522e-08, |
|
"logits/chosen": -2.7898964881896973, |
|
"logits/rejected": -2.765493392944336, |
|
"logps/chosen": -269.5013427734375, |
|
"logps/rejected": -287.95318603515625, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.22013553977012634, |
|
"rewards/margins": 0.3466190695762634, |
|
"rewards/rejected": -0.5667546391487122, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 8.886544227600098, |
|
"learning_rate": 6.711484147823662e-08, |
|
"logits/chosen": -2.7362468242645264, |
|
"logits/rejected": -2.7374088764190674, |
|
"logps/chosen": -273.03204345703125, |
|
"logps/rejected": -309.46832275390625, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.22477373480796814, |
|
"rewards/margins": 0.3105041980743408, |
|
"rewards/rejected": -0.5352779626846313, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"eval_logits/chosen": -2.781898021697998, |
|
"eval_logits/rejected": -2.7604949474334717, |
|
"eval_logps/chosen": -308.9897766113281, |
|
"eval_logps/rejected": -319.7853088378906, |
|
"eval_loss": 0.5910181999206543, |
|
"eval_rewards/accuracies": 0.7055000066757202, |
|
"eval_rewards/chosen": -0.2625214755535126, |
|
"eval_rewards/margins": 0.32095208764076233, |
|
"eval_rewards/rejected": -0.5834735035896301, |
|
"eval_runtime": 691.7146, |
|
"eval_samples_per_second": 2.891, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7877518974090553, |
|
"grad_norm": 12.21480655670166, |
|
"learning_rate": 6.556495706232412e-08, |
|
"logits/chosen": -2.7469980716705322, |
|
"logits/rejected": -2.7527496814727783, |
|
"logps/chosen": -316.41766357421875, |
|
"logps/rejected": -328.52532958984375, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.291492760181427, |
|
"rewards/margins": 0.32380086183547974, |
|
"rewards/rejected": -0.6152936816215515, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 8.182783126831055, |
|
"learning_rate": 6.403047291942057e-08, |
|
"logits/chosen": -2.722087860107422, |
|
"logits/rejected": -2.6903903484344482, |
|
"logps/chosen": -275.5090637207031, |
|
"logps/rejected": -277.62420654296875, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3089084327220917, |
|
"rewards/margins": 0.29682403802871704, |
|
"rewards/rejected": -0.6057325005531311, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7929861292855274, |
|
"grad_norm": 8.147031784057617, |
|
"learning_rate": 6.251151717851021e-08, |
|
"logits/chosen": -2.743332624435425, |
|
"logits/rejected": -2.7332491874694824, |
|
"logps/chosen": -280.6979064941406, |
|
"logps/rejected": -292.1900329589844, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.29725611209869385, |
|
"rewards/margins": 0.27910858392715454, |
|
"rewards/rejected": -0.5763646960258484, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 10.667434692382812, |
|
"learning_rate": 6.100821667196041e-08, |
|
"logits/chosen": -2.8258140087127686, |
|
"logits/rejected": -2.772840976715088, |
|
"logps/chosen": -316.3697204589844, |
|
"logps/rejected": -283.46575927734375, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2557021975517273, |
|
"rewards/margins": 0.3566380739212036, |
|
"rewards/rejected": -0.6123403310775757, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7982203611619995, |
|
"grad_norm": 11.156988143920898, |
|
"learning_rate": 5.952069692493061e-08, |
|
"logits/chosen": -2.7050204277038574, |
|
"logits/rejected": -2.7095789909362793, |
|
"logps/chosen": -266.9496154785156, |
|
"logps/rejected": -308.8603515625, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21333126723766327, |
|
"rewards/margins": 0.3779350519180298, |
|
"rewards/rejected": -0.5912663340568542, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 17.065628051757812, |
|
"learning_rate": 5.8049082144891794e-08, |
|
"logits/chosen": -2.702791452407837, |
|
"logits/rejected": -2.6872074604034424, |
|
"logps/chosen": -304.93463134765625, |
|
"logps/rejected": -380.0108642578125, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24746175110340118, |
|
"rewards/margins": 0.32271090149879456, |
|
"rewards/rejected": -0.5701726675033569, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8034545930384716, |
|
"grad_norm": 5.375977516174316, |
|
"learning_rate": 5.659349521125459e-08, |
|
"logits/chosen": -2.828627109527588, |
|
"logits/rejected": -2.8292970657348633, |
|
"logps/chosen": -323.8910827636719, |
|
"logps/rejected": -331.82403564453125, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.25231170654296875, |
|
"rewards/margins": 0.3079237937927246, |
|
"rewards/rejected": -0.5602355003356934, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 10.280311584472656, |
|
"learning_rate": 5.5154057665109e-08, |
|
"logits/chosen": -2.772552490234375, |
|
"logits/rejected": -2.7637112140655518, |
|
"logps/chosen": -304.2619934082031, |
|
"logps/rejected": -313.9085998535156, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.28161460161209106, |
|
"rewards/margins": 0.3681698441505432, |
|
"rewards/rejected": -0.6497844457626343, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8086888249149438, |
|
"grad_norm": 5.905206203460693, |
|
"learning_rate": 5.3730889699075853e-08, |
|
"logits/chosen": -2.790621280670166, |
|
"logits/rejected": -2.764768123626709, |
|
"logps/chosen": -320.5517272949219, |
|
"logps/rejected": -295.2154541015625, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.23616118729114532, |
|
"rewards/margins": 0.32327955961227417, |
|
"rewards/rejected": -0.5594406723976135, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 5.722733974456787, |
|
"learning_rate": 5.2324110147270893e-08, |
|
"logits/chosen": -2.766014814376831, |
|
"logits/rejected": -2.758927583694458, |
|
"logps/chosen": -317.6996154785156, |
|
"logps/rejected": -342.97039794921875, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.17896804213523865, |
|
"rewards/margins": 0.3621399402618408, |
|
"rewards/rejected": -0.5411080121994019, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"eval_logits/chosen": -2.7776589393615723, |
|
"eval_logits/rejected": -2.7558252811431885, |
|
"eval_logps/chosen": -306.57073974609375, |
|
"eval_logps/rejected": -317.1507263183594, |
|
"eval_loss": 0.5914422869682312, |
|
"eval_rewards/accuracies": 0.7039999961853027, |
|
"eval_rewards/chosen": -0.23833158612251282, |
|
"eval_rewards/margins": 0.3187963366508484, |
|
"eval_rewards/rejected": -0.5571279525756836, |
|
"eval_runtime": 692.3976, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8139230567914159, |
|
"grad_norm": 5.692158222198486, |
|
"learning_rate": 5.0933836475381795e-08, |
|
"logits/chosen": -2.773538827896118, |
|
"logits/rejected": -2.743774175643921, |
|
"logps/chosen": -323.03564453125, |
|
"logps/rejected": -339.22576904296875, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.20304706692695618, |
|
"rewards/margins": 0.33373111486434937, |
|
"rewards/rejected": -0.5367781519889832, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 6.522732734680176, |
|
"learning_rate": 4.956018477086005e-08, |
|
"logits/chosen": -2.7541415691375732, |
|
"logits/rejected": -2.7304270267486572, |
|
"logps/chosen": -312.82550048828125, |
|
"logps/rejected": -319.4942626953125, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2153932750225067, |
|
"rewards/margins": 0.3583284020423889, |
|
"rewards/rejected": -0.5737215876579285, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.819157288667888, |
|
"grad_norm": 12.873359680175781, |
|
"learning_rate": 4.820326973322763e-08, |
|
"logits/chosen": -2.7611987590789795, |
|
"logits/rejected": -2.7416489124298096, |
|
"logps/chosen": -294.5945129394531, |
|
"logps/rejected": -322.9219055175781, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.26755794882774353, |
|
"rewards/margins": 0.30830827355384827, |
|
"rewards/rejected": -0.5758662223815918, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 6.0704731941223145, |
|
"learning_rate": 4.686320466449981e-08, |
|
"logits/chosen": -2.765129566192627, |
|
"logits/rejected": -2.712188482284546, |
|
"logps/chosen": -279.4689025878906, |
|
"logps/rejected": -308.8946533203125, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21677632629871368, |
|
"rewards/margins": 0.3269808888435364, |
|
"rewards/rejected": -0.5437572598457336, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8243915205443602, |
|
"grad_norm": 9.32778549194336, |
|
"learning_rate": 4.554010145972417e-08, |
|
"logits/chosen": -2.8120663166046143, |
|
"logits/rejected": -2.7678775787353516, |
|
"logps/chosen": -308.05975341796875, |
|
"logps/rejected": -326.4994812011719, |
|
"loss": 0.6037, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.28121477365493774, |
|
"rewards/margins": 0.3111681342124939, |
|
"rewards/rejected": -0.5923829078674316, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 6.878976345062256, |
|
"learning_rate": 4.423407059763745e-08, |
|
"logits/chosen": -2.769566535949707, |
|
"logits/rejected": -2.754739999771118, |
|
"logps/chosen": -313.4940490722656, |
|
"logps/rejected": -338.7357482910156, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22392907738685608, |
|
"rewards/margins": 0.3541107773780823, |
|
"rewards/rejected": -0.578039824962616, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8296257524208323, |
|
"grad_norm": 8.941882133483887, |
|
"learning_rate": 4.294522113144078e-08, |
|
"logits/chosen": -2.7120773792266846, |
|
"logits/rejected": -2.676596164703369, |
|
"logps/chosen": -310.96600341796875, |
|
"logps/rejected": -309.7723083496094, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.23985318839550018, |
|
"rewards/margins": 0.3455398380756378, |
|
"rewards/rejected": -0.5853930115699768, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 11.861396789550781, |
|
"learning_rate": 4.1673660679693804e-08, |
|
"logits/chosen": -2.759885311126709, |
|
"logits/rejected": -2.7518694400787354, |
|
"logps/chosen": -264.2064514160156, |
|
"logps/rejected": -315.90380859375, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.26855209469795227, |
|
"rewards/margins": 0.2709905505180359, |
|
"rewards/rejected": -0.539542555809021, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8348599842973043, |
|
"grad_norm": 3.688720941543579, |
|
"learning_rate": 4.041949541732825e-08, |
|
"logits/chosen": -2.7698843479156494, |
|
"logits/rejected": -2.773341655731201, |
|
"logps/chosen": -306.61480712890625, |
|
"logps/rejected": -325.04541015625, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2628583610057831, |
|
"rewards/margins": 0.3378385603427887, |
|
"rewards/rejected": -0.6006969213485718, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 3.2142703533172607, |
|
"learning_rate": 3.9182830066782605e-08, |
|
"logits/chosen": -2.7356200218200684, |
|
"logits/rejected": -2.740725040435791, |
|
"logps/chosen": -303.8326721191406, |
|
"logps/rejected": -351.736083984375, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.28992363810539246, |
|
"rewards/margins": 0.3618486821651459, |
|
"rewards/rejected": -0.6517723798751831, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -2.778296709060669, |
|
"eval_logits/rejected": -2.7567243576049805, |
|
"eval_logps/chosen": -308.9666442871094, |
|
"eval_logps/rejected": -320.12237548828125, |
|
"eval_loss": 0.5903262495994568, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": -0.26229044795036316, |
|
"eval_rewards/margins": 0.3245540261268616, |
|
"eval_rewards/rejected": -0.5868445038795471, |
|
"eval_runtime": 691.7572, |
|
"eval_samples_per_second": 2.891, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8400942161737766, |
|
"grad_norm": 5.404438018798828, |
|
"learning_rate": 3.79637678892577e-08, |
|
"logits/chosen": -2.737617015838623, |
|
"logits/rejected": -2.7435827255249023, |
|
"logps/chosen": -313.7263488769531, |
|
"logps/rejected": -326.2721862792969, |
|
"loss": 0.5958, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.21620874106884003, |
|
"rewards/margins": 0.29549044370651245, |
|
"rewards/rejected": -0.5116991996765137, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 8.482666015625, |
|
"learning_rate": 3.6762410676094645e-08, |
|
"logits/chosen": -2.7493488788604736, |
|
"logits/rejected": -2.751436233520508, |
|
"logps/chosen": -342.2435302734375, |
|
"logps/rejected": -334.9501953125, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22917640209197998, |
|
"rewards/margins": 0.40211135149002075, |
|
"rewards/rejected": -0.631287693977356, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8453284480502486, |
|
"grad_norm": 21.451396942138672, |
|
"learning_rate": 3.557885874027497e-08, |
|
"logits/chosen": -2.7467381954193115, |
|
"logits/rejected": -2.7420356273651123, |
|
"logps/chosen": -307.3967590332031, |
|
"logps/rejected": -319.23785400390625, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2908255457878113, |
|
"rewards/margins": 0.24548819661140442, |
|
"rewards/rejected": -0.5363136529922485, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 9.142580032348633, |
|
"learning_rate": 3.441321090804469e-08, |
|
"logits/chosen": -2.805671215057373, |
|
"logits/rejected": -2.7749440670013428, |
|
"logps/chosen": -311.969482421875, |
|
"logps/rejected": -301.92559814453125, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.28862375020980835, |
|
"rewards/margins": 0.3031871020793915, |
|
"rewards/rejected": -0.5918108820915222, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8505626799267207, |
|
"grad_norm": 6.999141216278076, |
|
"learning_rate": 3.326556451066234e-08, |
|
"logits/chosen": -2.8003592491149902, |
|
"logits/rejected": -2.7750496864318848, |
|
"logps/chosen": -333.262451171875, |
|
"logps/rejected": -342.88970947265625, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.21939554810523987, |
|
"rewards/margins": 0.38298407196998596, |
|
"rewards/rejected": -0.602379560470581, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 8.473172187805176, |
|
"learning_rate": 3.2136015376271946e-08, |
|
"logits/chosen": -2.7543041706085205, |
|
"logits/rejected": -2.7237446308135986, |
|
"logps/chosen": -310.47503662109375, |
|
"logps/rejected": -316.1898498535156, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.3483801782131195, |
|
"rewards/margins": 0.25630325078964233, |
|
"rewards/rejected": -0.6046834588050842, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8557969118031928, |
|
"grad_norm": 6.828322887420654, |
|
"learning_rate": 3.102465782190106e-08, |
|
"logits/chosen": -2.765094041824341, |
|
"logits/rejected": -2.7622992992401123, |
|
"logps/chosen": -292.77264404296875, |
|
"logps/rejected": -306.03790283203125, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.26343613862991333, |
|
"rewards/margins": 0.2959148585796356, |
|
"rewards/rejected": -0.5593509674072266, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 7.230039119720459, |
|
"learning_rate": 2.993158464558565e-08, |
|
"logits/chosen": -2.752277135848999, |
|
"logits/rejected": -2.7456305027008057, |
|
"logps/chosen": -313.83514404296875, |
|
"logps/rejected": -343.77923583984375, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2221953421831131, |
|
"rewards/margins": 0.2806113660335541, |
|
"rewards/rejected": -0.5028067231178284, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.861031143679665, |
|
"grad_norm": 3.2468912601470947, |
|
"learning_rate": 2.8856887118621358e-08, |
|
"logits/chosen": -2.7951433658599854, |
|
"logits/rejected": -2.8030707836151123, |
|
"logps/chosen": -308.23077392578125, |
|
"logps/rejected": -336.6316223144531, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3232649266719818, |
|
"rewards/margins": 0.30740997195243835, |
|
"rewards/rejected": -0.6306749582290649, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 6.59912109375, |
|
"learning_rate": 2.7800654977942482e-08, |
|
"logits/chosen": -2.7431418895721436, |
|
"logits/rejected": -2.7131383419036865, |
|
"logps/chosen": -301.9719543457031, |
|
"logps/rejected": -354.3257751464844, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2676336467266083, |
|
"rewards/margins": 0.3562368154525757, |
|
"rewards/rejected": -0.6238704919815063, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"eval_logits/chosen": -2.7770590782165527, |
|
"eval_logits/rejected": -2.755500555038452, |
|
"eval_logps/chosen": -309.4716491699219, |
|
"eval_logps/rejected": -320.77569580078125, |
|
"eval_loss": 0.5899637341499329, |
|
"eval_rewards/accuracies": 0.703000009059906, |
|
"eval_rewards/chosen": -0.2673403322696686, |
|
"eval_rewards/margins": 0.3260369896888733, |
|
"eval_rewards/rejected": -0.5933773517608643, |
|
"eval_runtime": 692.4414, |
|
"eval_samples_per_second": 2.888, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8662653755561371, |
|
"grad_norm": 7.842947959899902, |
|
"learning_rate": 2.676297641862879e-08, |
|
"logits/chosen": -2.76792049407959, |
|
"logits/rejected": -2.7621943950653076, |
|
"logps/chosen": -265.380859375, |
|
"logps/rejected": -254.47140502929688, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22911493480205536, |
|
"rewards/margins": 0.32636719942092896, |
|
"rewards/rejected": -0.5554821491241455, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 13.967310905456543, |
|
"learning_rate": 2.5743938086541352e-08, |
|
"logits/chosen": -2.7548770904541016, |
|
"logits/rejected": -2.729977607727051, |
|
"logps/chosen": -309.2705383300781, |
|
"logps/rejected": -313.9998779296875, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.29073604941368103, |
|
"rewards/margins": 0.31791952252388, |
|
"rewards/rejected": -0.6086556315422058, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8714996074326092, |
|
"grad_norm": 11.057051658630371, |
|
"learning_rate": 2.474362507108757e-08, |
|
"logits/chosen": -2.814598560333252, |
|
"logits/rejected": -2.7810606956481934, |
|
"logps/chosen": -317.7953186035156, |
|
"logps/rejected": -332.5885314941406, |
|
"loss": 0.5725, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.25249534845352173, |
|
"rewards/margins": 0.38154152035713196, |
|
"rewards/rejected": -0.6340368390083313, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 10.906637191772461, |
|
"learning_rate": 2.3762120898116495e-08, |
|
"logits/chosen": -2.774956226348877, |
|
"logits/rejected": -2.764927625656128, |
|
"logps/chosen": -322.2221984863281, |
|
"logps/rejected": -341.53216552734375, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3415859639644623, |
|
"rewards/margins": 0.2912564277648926, |
|
"rewards/rejected": -0.6328424215316772, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8767338393090814, |
|
"grad_norm": 6.918145656585693, |
|
"learning_rate": 2.2799507522944044e-08, |
|
"logits/chosen": -2.689883232116699, |
|
"logits/rejected": -2.6739673614501953, |
|
"logps/chosen": -313.18524169921875, |
|
"logps/rejected": -340.9402770996094, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.26544609665870667, |
|
"rewards/margins": 0.35022976994514465, |
|
"rewards/rejected": -0.6156758069992065, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 10.59185791015625, |
|
"learning_rate": 2.1855865323510054e-08, |
|
"logits/chosen": -2.7279655933380127, |
|
"logits/rejected": -2.6860973834991455, |
|
"logps/chosen": -320.9715576171875, |
|
"logps/rejected": -355.20880126953125, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2595919072628021, |
|
"rewards/margins": 0.40920543670654297, |
|
"rewards/rejected": -0.6687973141670227, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8819680711855535, |
|
"grad_norm": 6.1795830726623535, |
|
"learning_rate": 2.0931273093666573e-08, |
|
"logits/chosen": -2.728386878967285, |
|
"logits/rejected": -2.7089622020721436, |
|
"logps/chosen": -283.88409423828125, |
|
"logps/rejected": -303.3033142089844, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2573816776275635, |
|
"rewards/margins": 0.4072348475456238, |
|
"rewards/rejected": -0.6646164655685425, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 6.445786476135254, |
|
"learning_rate": 2.002580803659873e-08, |
|
"logits/chosen": -2.747699022293091, |
|
"logits/rejected": -2.7049365043640137, |
|
"logps/chosen": -303.89813232421875, |
|
"logps/rejected": -318.79693603515625, |
|
"loss": 0.617, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.3385586738586426, |
|
"rewards/margins": 0.2693432867527008, |
|
"rewards/rejected": -0.607901930809021, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8872023030620256, |
|
"grad_norm": 9.493855476379395, |
|
"learning_rate": 1.9139545758378256e-08, |
|
"logits/chosen": -2.770669460296631, |
|
"logits/rejected": -2.722433090209961, |
|
"logps/chosen": -311.3063659667969, |
|
"logps/rejected": -296.7181701660156, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2407282292842865, |
|
"rewards/margins": 0.3538174629211426, |
|
"rewards/rejected": -0.5945457220077515, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 8.795994758605957, |
|
"learning_rate": 1.8272560261650277e-08, |
|
"logits/chosen": -2.782130479812622, |
|
"logits/rejected": -2.757819652557373, |
|
"logps/chosen": -354.10919189453125, |
|
"logps/rejected": -333.00250244140625, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.22043180465698242, |
|
"rewards/margins": 0.3896182179450989, |
|
"rewards/rejected": -0.6100499629974365, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"eval_logits/chosen": -2.773853302001953, |
|
"eval_logits/rejected": -2.7519986629486084, |
|
"eval_logps/chosen": -309.8929748535156, |
|
"eval_logps/rejected": -321.31964111328125, |
|
"eval_loss": 0.5895980000495911, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": -0.27155351638793945, |
|
"eval_rewards/margins": 0.3272639214992523, |
|
"eval_rewards/rejected": -0.5988174676895142, |
|
"eval_runtime": 692.3174, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8924365349384977, |
|
"grad_norm": 7.369442462921143, |
|
"learning_rate": 1.742492393945427e-08, |
|
"logits/chosen": -2.7513797283172607, |
|
"logits/rejected": -2.710066318511963, |
|
"logps/chosen": -323.8204650878906, |
|
"logps/rejected": -317.6787109375, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2688294053077698, |
|
"rewards/margins": 0.36988669633865356, |
|
"rewards/rejected": -0.6387161016464233, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 7.45905876159668, |
|
"learning_rate": 1.6596707569179302e-08, |
|
"logits/chosen": -2.791177749633789, |
|
"logits/rejected": -2.7743191719055176, |
|
"logps/chosen": -325.4018249511719, |
|
"logps/rejected": -326.23291015625, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.28556251525878906, |
|
"rewards/margins": 0.3457964062690735, |
|
"rewards/rejected": -0.6313589215278625, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8976707668149699, |
|
"grad_norm": 6.628225326538086, |
|
"learning_rate": 1.5787980306653848e-08, |
|
"logits/chosen": -2.75858736038208, |
|
"logits/rejected": -2.7154600620269775, |
|
"logps/chosen": -316.15985107421875, |
|
"logps/rejected": -336.3743896484375, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.24860498309135437, |
|
"rewards/margins": 0.3617299795150757, |
|
"rewards/rejected": -0.6103349924087524, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 10.542095184326172, |
|
"learning_rate": 1.499880968037165e-08, |
|
"logits/chosen": -2.752002477645874, |
|
"logits/rejected": -2.733220100402832, |
|
"logps/chosen": -292.7621765136719, |
|
"logps/rejected": -285.80218505859375, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.22942595183849335, |
|
"rewards/margins": 0.32194358110427856, |
|
"rewards/rejected": -0.5513694882392883, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.902904998691442, |
|
"grad_norm": 5.9859395027160645, |
|
"learning_rate": 1.4229261585852803e-08, |
|
"logits/chosen": -2.77447772026062, |
|
"logits/rejected": -2.7663679122924805, |
|
"logps/chosen": -305.6563415527344, |
|
"logps/rejected": -314.01043701171875, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.22854971885681152, |
|
"rewards/margins": 0.3463636040687561, |
|
"rewards/rejected": -0.5749133825302124, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 9.172728538513184, |
|
"learning_rate": 1.3479400280141883e-08, |
|
"logits/chosen": -2.74762225151062, |
|
"logits/rejected": -2.7340774536132812, |
|
"logps/chosen": -290.8319396972656, |
|
"logps/rejected": -326.6239929199219, |
|
"loss": 0.5852, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2622153162956238, |
|
"rewards/margins": 0.35056665539741516, |
|
"rewards/rejected": -0.6127818822860718, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9081392305679141, |
|
"grad_norm": 8.79883098602295, |
|
"learning_rate": 1.2749288376442042e-08, |
|
"logits/chosen": -2.7586569786071777, |
|
"logits/rejected": -2.730827569961548, |
|
"logps/chosen": -337.0930480957031, |
|
"logps/rejected": -317.09912109375, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.19493858516216278, |
|
"rewards/margins": 0.4252621531486511, |
|
"rewards/rejected": -0.6202007532119751, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 11.71596622467041, |
|
"learning_rate": 1.2038986838887127e-08, |
|
"logits/chosen": -2.792734384536743, |
|
"logits/rejected": -2.77490234375, |
|
"logps/chosen": -288.8994445800781, |
|
"logps/rejected": -313.22430419921875, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.30519285798072815, |
|
"rewards/margins": 0.2674819231033325, |
|
"rewards/rejected": -0.5726747512817383, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9133734624443863, |
|
"grad_norm": 6.5518951416015625, |
|
"learning_rate": 1.1348554977451131e-08, |
|
"logits/chosen": -2.805830478668213, |
|
"logits/rejected": -2.7894272804260254, |
|
"logps/chosen": -327.4478759765625, |
|
"logps/rejected": -324.9560546875, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2583698630332947, |
|
"rewards/margins": 0.3414859175682068, |
|
"rewards/rejected": -0.5998557806015015, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 5.205156326293945, |
|
"learning_rate": 1.06780504429958e-08, |
|
"logits/chosen": -2.7797505855560303, |
|
"logits/rejected": -2.7590694427490234, |
|
"logps/chosen": -325.8748779296875, |
|
"logps/rejected": -310.8509521484375, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.27413299679756165, |
|
"rewards/margins": 0.3151172399520874, |
|
"rewards/rejected": -0.5892502665519714, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"eval_logits/chosen": -2.775543689727783, |
|
"eval_logits/rejected": -2.7539024353027344, |
|
"eval_logps/chosen": -309.8999938964844, |
|
"eval_logps/rejected": -321.37445068359375, |
|
"eval_loss": 0.5894958972930908, |
|
"eval_rewards/accuracies": 0.703499972820282, |
|
"eval_rewards/chosen": -0.2716234028339386, |
|
"eval_rewards/margins": 0.3277418315410614, |
|
"eval_rewards/rejected": -0.599365234375, |
|
"eval_runtime": 692.3998, |
|
"eval_samples_per_second": 2.889, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9186076943208584, |
|
"grad_norm": 6.786498069763184, |
|
"learning_rate": 1.0027529222456754e-08, |
|
"logits/chosen": -2.7301533222198486, |
|
"logits/rejected": -2.702810764312744, |
|
"logps/chosen": -296.23834228515625, |
|
"logps/rejected": -315.268310546875, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2563706934452057, |
|
"rewards/margins": 0.3883191645145416, |
|
"rewards/rejected": -0.6446898579597473, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 9.892511367797852, |
|
"learning_rate": 9.397045634168766e-09, |
|
"logits/chosen": -2.8002243041992188, |
|
"logits/rejected": -2.7856602668762207, |
|
"logps/chosen": -308.3498229980469, |
|
"logps/rejected": -351.95831298828125, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.21786804497241974, |
|
"rewards/margins": 0.41681188344955444, |
|
"rewards/rejected": -0.634679913520813, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9238419261973305, |
|
"grad_norm": 12.571949005126953, |
|
"learning_rate": 8.78665232332998e-09, |
|
"logits/chosen": -2.724975347518921, |
|
"logits/rejected": -2.708922863006592, |
|
"logps/chosen": -277.4271545410156, |
|
"logps/rejected": -300.2417297363281, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3028232455253601, |
|
"rewards/margins": 0.2615777850151062, |
|
"rewards/rejected": -0.5644010305404663, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 7.908664703369141, |
|
"learning_rate": 8.196400257606206e-09, |
|
"logits/chosen": -2.772461414337158, |
|
"logits/rejected": -2.7343640327453613, |
|
"logps/chosen": -328.0716247558594, |
|
"logps/rejected": -358.15655517578125, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2738083302974701, |
|
"rewards/margins": 0.3519710600376129, |
|
"rewards/rejected": -0.625779390335083, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9290761580738026, |
|
"grad_norm": 5.722252368927002, |
|
"learning_rate": 7.626338722875075e-09, |
|
"logits/chosen": -2.7591617107391357, |
|
"logits/rejected": -2.780594825744629, |
|
"logps/chosen": -298.6004943847656, |
|
"logps/rejected": -326.13287353515625, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2562271058559418, |
|
"rewards/margins": 0.3074356019496918, |
|
"rewards/rejected": -0.5636627078056335, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 8.03117847442627, |
|
"learning_rate": 7.0765153191106875e-09, |
|
"logits/chosen": -2.781140089035034, |
|
"logits/rejected": -2.7692975997924805, |
|
"logps/chosen": -295.3600158691406, |
|
"logps/rejected": -291.2763366699219, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2436678409576416, |
|
"rewards/margins": 0.39614516496658325, |
|
"rewards/rejected": -0.6398130655288696, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9343103899502748, |
|
"grad_norm": 7.668455600738525, |
|
"learning_rate": 6.54697595640899e-09, |
|
"logits/chosen": -2.7558670043945312, |
|
"logits/rejected": -2.7410783767700195, |
|
"logps/chosen": -333.0140075683594, |
|
"logps/rejected": -347.9772033691406, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.23105120658874512, |
|
"rewards/margins": 0.38840624690055847, |
|
"rewards/rejected": -0.6194573640823364, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 7.808078765869141, |
|
"learning_rate": 6.037764851154425e-09, |
|
"logits/chosen": -2.7314181327819824, |
|
"logits/rejected": -2.7231030464172363, |
|
"logps/chosen": -305.7143249511719, |
|
"logps/rejected": -345.88983154296875, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.22997505962848663, |
|
"rewards/margins": 0.37085598707199097, |
|
"rewards/rejected": -0.6008309721946716, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9395446218267469, |
|
"grad_norm": 9.760852813720703, |
|
"learning_rate": 5.548924522327747e-09, |
|
"logits/chosen": -2.7540392875671387, |
|
"logits/rejected": -2.7462592124938965, |
|
"logps/chosen": -308.9768981933594, |
|
"logps/rejected": -327.16802978515625, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.299643337726593, |
|
"rewards/margins": 0.3448326587677002, |
|
"rewards/rejected": -0.6444759368896484, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 11.123191833496094, |
|
"learning_rate": 5.080495787955691e-09, |
|
"logits/chosen": -2.734261989593506, |
|
"logits/rejected": -2.717097043991089, |
|
"logps/chosen": -269.73223876953125, |
|
"logps/rejected": -300.8177490234375, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.254092276096344, |
|
"rewards/margins": 0.306030809879303, |
|
"rewards/rejected": -0.560123085975647, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -2.776420831680298, |
|
"eval_logits/rejected": -2.7549078464508057, |
|
"eval_logps/chosen": -309.95306396484375, |
|
"eval_logps/rejected": -321.4418029785156, |
|
"eval_loss": 0.5895029306411743, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": -0.27215421199798584, |
|
"eval_rewards/margins": 0.3278846740722656, |
|
"eval_rewards/rejected": -0.6000389456748962, |
|
"eval_runtime": 692.4927, |
|
"eval_samples_per_second": 2.888, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.944778853703219, |
|
"grad_norm": 7.403170585632324, |
|
"learning_rate": 4.632517761702814e-09, |
|
"logits/chosen": -2.7008776664733887, |
|
"logits/rejected": -2.6773476600646973, |
|
"logps/chosen": -289.5223083496094, |
|
"logps/rejected": -309.5367431640625, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.308106005191803, |
|
"rewards/margins": 0.33852237462997437, |
|
"rewards/rejected": -0.6466284394264221, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 9.613285064697266, |
|
"learning_rate": 4.205027849605358e-09, |
|
"logits/chosen": -2.738858699798584, |
|
"logits/rejected": -2.726569414138794, |
|
"logps/chosen": -294.84014892578125, |
|
"logps/rejected": -290.58770751953125, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.292969286441803, |
|
"rewards/margins": 0.3049730956554413, |
|
"rewards/rejected": -0.5979424715042114, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9500130855796912, |
|
"grad_norm": 4.820310115814209, |
|
"learning_rate": 3.798061746947995e-09, |
|
"logits/chosen": -2.785492420196533, |
|
"logits/rejected": -2.767252206802368, |
|
"logps/chosen": -311.9582214355469, |
|
"logps/rejected": -305.7359924316406, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.31197255849838257, |
|
"rewards/margins": 0.3228316307067871, |
|
"rewards/rejected": -0.6348041296005249, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 5.795242786407471, |
|
"learning_rate": 3.411653435283157e-09, |
|
"logits/chosen": -2.7570109367370605, |
|
"logits/rejected": -2.7326931953430176, |
|
"logps/chosen": -313.0288391113281, |
|
"logps/rejected": -286.85894775390625, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.23662319779396057, |
|
"rewards/margins": 0.32369619607925415, |
|
"rewards/rejected": -0.5603194236755371, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9552473174561633, |
|
"grad_norm": 8.141414642333984, |
|
"learning_rate": 3.0458351795936698e-09, |
|
"logits/chosen": -2.800523281097412, |
|
"logits/rejected": -2.7803540229797363, |
|
"logps/chosen": -287.27178955078125, |
|
"logps/rejected": -296.94482421875, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.20535226166248322, |
|
"rewards/margins": 0.4138811230659485, |
|
"rewards/rejected": -0.6192333102226257, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 10.963499069213867, |
|
"learning_rate": 2.700637525598598e-09, |
|
"logits/chosen": -2.7325665950775146, |
|
"logits/rejected": -2.742112636566162, |
|
"logps/chosen": -318.7773742675781, |
|
"logps/rejected": -340.5607604980469, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2901912331581116, |
|
"rewards/margins": 0.23596885800361633, |
|
"rewards/rejected": -0.5261600613594055, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9604815493326354, |
|
"grad_norm": 5.604915618896484, |
|
"learning_rate": 2.3760892972027324e-09, |
|
"logits/chosen": -2.8125240802764893, |
|
"logits/rejected": -2.794743061065674, |
|
"logps/chosen": -320.9376525878906, |
|
"logps/rejected": -314.6265869140625, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3326115012168884, |
|
"rewards/margins": 0.2905888855457306, |
|
"rewards/rejected": -0.6232004165649414, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 8.076900482177734, |
|
"learning_rate": 2.0722175940897645e-09, |
|
"logits/chosen": -2.730006694793701, |
|
"logits/rejected": -2.7527151107788086, |
|
"logps/chosen": -304.4130554199219, |
|
"logps/rejected": -333.45281982421875, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2793710231781006, |
|
"rewards/margins": 0.40125495195388794, |
|
"rewards/rejected": -0.6806259751319885, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9657157812091076, |
|
"grad_norm": 5.261369705200195, |
|
"learning_rate": 1.7890477894593748e-09, |
|
"logits/chosen": -2.7596428394317627, |
|
"logits/rejected": -2.73931622505188, |
|
"logps/chosen": -363.08984375, |
|
"logps/rejected": -348.8448486328125, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.23692288994789124, |
|
"rewards/margins": 0.40229707956314087, |
|
"rewards/rejected": -0.6392199993133545, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 7.541417598724365, |
|
"learning_rate": 1.5266035279088708e-09, |
|
"logits/chosen": -2.6856465339660645, |
|
"logits/rejected": -2.6826679706573486, |
|
"logps/chosen": -347.5863952636719, |
|
"logps/rejected": -356.30120849609375, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2766670286655426, |
|
"rewards/margins": 0.367573082447052, |
|
"rewards/rejected": -0.6442400813102722, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"eval_logits/chosen": -2.7754881381988525, |
|
"eval_logits/rejected": -2.753868341445923, |
|
"eval_logps/chosen": -310.11712646484375, |
|
"eval_logps/rejected": -321.65545654296875, |
|
"eval_loss": 0.5893409252166748, |
|
"eval_rewards/accuracies": 0.7014999985694885, |
|
"eval_rewards/chosen": -0.2737952172756195, |
|
"eval_rewards/margins": 0.32838013768196106, |
|
"eval_rewards/rejected": -0.6021752953529358, |
|
"eval_runtime": 692.7848, |
|
"eval_samples_per_second": 2.887, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9709500130855797, |
|
"grad_norm": 11.719736099243164, |
|
"learning_rate": 1.2849067234584621e-09, |
|
"logits/chosen": -2.714137315750122, |
|
"logits/rejected": -2.7111401557922363, |
|
"logps/chosen": -280.48919677734375, |
|
"logps/rejected": -300.55706787109375, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2787315249443054, |
|
"rewards/margins": 0.30049681663513184, |
|
"rewards/rejected": -0.5792283415794373, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 12.492560386657715, |
|
"learning_rate": 1.0639775577218625e-09, |
|
"logits/chosen": -2.719714403152466, |
|
"logits/rejected": -2.667534589767456, |
|
"logps/chosen": -295.1371765136719, |
|
"logps/rejected": -294.61932373046875, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.27527686953544617, |
|
"rewards/margins": 0.36209625005722046, |
|
"rewards/rejected": -0.637373149394989, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9761842449620518, |
|
"grad_norm": 7.440390110015869, |
|
"learning_rate": 8.638344782207485e-10, |
|
"logits/chosen": -2.725163459777832, |
|
"logits/rejected": -2.7303969860076904, |
|
"logps/chosen": -296.50689697265625, |
|
"logps/rejected": -305.67706298828125, |
|
"loss": 0.5767, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22756004333496094, |
|
"rewards/margins": 0.3577590882778168, |
|
"rewards/rejected": -0.5853191018104553, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 10.965612411499023, |
|
"learning_rate": 6.844941968447149e-10, |
|
"logits/chosen": -2.7626724243164062, |
|
"logits/rejected": -2.7460460662841797, |
|
"logps/chosen": -316.35015869140625, |
|
"logps/rejected": -349.7431945800781, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2542489767074585, |
|
"rewards/margins": 0.45952582359313965, |
|
"rewards/rejected": -0.7137748003005981, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9814184768385239, |
|
"grad_norm": 5.883279323577881, |
|
"learning_rate": 5.25971688455612e-10, |
|
"logits/chosen": -2.7904438972473145, |
|
"logits/rejected": -2.775864362716675, |
|
"logps/chosen": -316.23297119140625, |
|
"logps/rejected": -347.6502685546875, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2520793080329895, |
|
"rewards/margins": 0.36716121435165405, |
|
"rewards/rejected": -0.6192405223846436, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 4.377948760986328, |
|
"learning_rate": 3.882801896372967e-10, |
|
"logits/chosen": -2.785407543182373, |
|
"logits/rejected": -2.785416841506958, |
|
"logps/chosen": -311.1086120605469, |
|
"logps/rejected": -308.876220703125, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2892715036869049, |
|
"rewards/margins": 0.29838478565216064, |
|
"rewards/rejected": -0.5876562595367432, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9866527087149961, |
|
"grad_norm": 8.081770896911621, |
|
"learning_rate": 2.714311975902661e-10, |
|
"logits/chosen": -2.7383980751037598, |
|
"logits/rejected": -2.710829257965088, |
|
"logps/chosen": -330.71771240234375, |
|
"logps/rejected": -337.7955627441406, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.25471562147140503, |
|
"rewards/margins": 0.3609776496887207, |
|
"rewards/rejected": -0.6156932711601257, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 7.887190818786621, |
|
"learning_rate": 1.754344691717591e-10, |
|
"logits/chosen": -2.761021852493286, |
|
"logits/rejected": -2.7340810298919678, |
|
"logps/chosen": -295.04718017578125, |
|
"logps/rejected": -336.95147705078125, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.27056482434272766, |
|
"rewards/margins": 0.2132827490568161, |
|
"rewards/rejected": -0.4838475286960602, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9918869405914682, |
|
"grad_norm": 7.817293643951416, |
|
"learning_rate": 1.0029802008096333e-10, |
|
"logits/chosen": -2.7683863639831543, |
|
"logits/rejected": -2.7289211750030518, |
|
"logps/chosen": -316.55340576171875, |
|
"logps/rejected": -334.72845458984375, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.25747808814048767, |
|
"rewards/margins": 0.4023471474647522, |
|
"rewards/rejected": -0.6598252654075623, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 6.426971435546875, |
|
"learning_rate": 4.602812418974533e-11, |
|
"logits/chosen": -2.791513442993164, |
|
"logits/rejected": -2.7664811611175537, |
|
"logps/chosen": -328.2163391113281, |
|
"logps/rejected": -337.54974365234375, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2448674440383911, |
|
"rewards/margins": 0.3448673486709595, |
|
"rewards/rejected": -0.5897347927093506, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"eval_logits/chosen": -2.7742366790771484, |
|
"eval_logits/rejected": -2.7524607181549072, |
|
"eval_logps/chosen": -310.13330078125, |
|
"eval_logps/rejected": -321.6666259765625, |
|
"eval_loss": 0.5893096923828125, |
|
"eval_rewards/accuracies": 0.7024999856948853, |
|
"eval_rewards/chosen": -0.2739570438861847, |
|
"eval_rewards/margins": 0.32832974195480347, |
|
"eval_rewards/rejected": -0.6022867560386658, |
|
"eval_runtime": 692.7928, |
|
"eval_samples_per_second": 2.887, |
|
"eval_steps_per_second": 0.361, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9971211724679403, |
|
"grad_norm": 7.6028289794921875, |
|
"learning_rate": 1.2629313018819309e-11, |
|
"logits/chosen": -2.7530319690704346, |
|
"logits/rejected": -2.7311320304870605, |
|
"logps/chosen": -300.90142822265625, |
|
"logps/rejected": -311.88006591796875, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2676599621772766, |
|
"rewards/margins": 0.3127606511116028, |
|
"rewards/rejected": -0.5804205536842346, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 10.209754943847656, |
|
"learning_rate": 1.0437535929996855e-13, |
|
"logits/chosen": -2.765655279159546, |
|
"logits/rejected": -2.7465381622314453, |
|
"logps/chosen": -334.4398498535156, |
|
"logps/rejected": -327.4457702636719, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2764059007167816, |
|
"rewards/margins": 0.397102028131485, |
|
"rewards/rejected": -0.6735079288482666, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6164219083351729, |
|
"train_runtime": 73481.1174, |
|
"train_samples_per_second": 0.832, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|