|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9964868029907215, |
|
"eval_steps": 800, |
|
"global_step": 2079, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014413115935501305, |
|
"grad_norm": 15.202939063397405, |
|
"learning_rate": 4.807692307692308e-10, |
|
"logits/chosen": -2.3378124237060547, |
|
"logits/rejected": -2.341672897338867, |
|
"logps/chosen": -1.0059865713119507, |
|
"logps/rejected": -1.105405569076538, |
|
"loss": 1.6556, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.0119731426239014, |
|
"rewards/margins": 0.19883811473846436, |
|
"rewards/rejected": -2.210811138153076, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.014413115935501306, |
|
"grad_norm": 18.061978045212722, |
|
"learning_rate": 4.807692307692308e-09, |
|
"logits/chosen": -2.356367826461792, |
|
"logits/rejected": -2.3451521396636963, |
|
"logps/chosen": -1.0228126049041748, |
|
"logps/rejected": -1.1430484056472778, |
|
"loss": 1.6323, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": -2.0456252098083496, |
|
"rewards/margins": 0.24047136306762695, |
|
"rewards/rejected": -2.2860968112945557, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02882623187100261, |
|
"grad_norm": 17.723319596995733, |
|
"learning_rate": 9.615384615384615e-09, |
|
"logits/chosen": -2.3264236450195312, |
|
"logits/rejected": -2.321986198425293, |
|
"logps/chosen": -1.0446507930755615, |
|
"logps/rejected": -1.1442738771438599, |
|
"loss": 1.6729, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.089301586151123, |
|
"rewards/margins": 0.19924603402614594, |
|
"rewards/rejected": -2.2885477542877197, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04323934780650392, |
|
"grad_norm": 17.07010517991476, |
|
"learning_rate": 1.442307692307692e-08, |
|
"logits/chosen": -2.3456313610076904, |
|
"logits/rejected": -2.3424785137176514, |
|
"logps/chosen": -1.0158333778381348, |
|
"logps/rejected": -1.076974630355835, |
|
"loss": 1.7109, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0316667556762695, |
|
"rewards/margins": 0.12228262424468994, |
|
"rewards/rejected": -2.15394926071167, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05765246374200522, |
|
"grad_norm": 19.711953891202494, |
|
"learning_rate": 1.923076923076923e-08, |
|
"logits/chosen": -2.383465528488159, |
|
"logits/rejected": -2.3750338554382324, |
|
"logps/chosen": -1.1377735137939453, |
|
"logps/rejected": -1.221296787261963, |
|
"loss": 1.6828, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2755470275878906, |
|
"rewards/margins": 0.167046457529068, |
|
"rewards/rejected": -2.442593574523926, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07206557967750653, |
|
"grad_norm": 15.368731865288492, |
|
"learning_rate": 2.403846153846154e-08, |
|
"logits/chosen": -2.3631155490875244, |
|
"logits/rejected": -2.362963914871216, |
|
"logps/chosen": -1.0241036415100098, |
|
"logps/rejected": -1.1317743062973022, |
|
"loss": 1.6525, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -2.0482072830200195, |
|
"rewards/margins": 0.2153414785861969, |
|
"rewards/rejected": -2.2635486125946045, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08647869561300783, |
|
"grad_norm": 15.486802435760401, |
|
"learning_rate": 2.884615384615384e-08, |
|
"logits/chosen": -2.3361105918884277, |
|
"logits/rejected": -2.327380657196045, |
|
"logps/chosen": -0.9968592524528503, |
|
"logps/rejected": -1.0975861549377441, |
|
"loss": 1.6565, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9937185049057007, |
|
"rewards/margins": 0.2014540731906891, |
|
"rewards/rejected": -2.1951723098754883, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10089181154850914, |
|
"grad_norm": 15.988415966234422, |
|
"learning_rate": 3.365384615384615e-08, |
|
"logits/chosen": -2.3774499893188477, |
|
"logits/rejected": -2.3742191791534424, |
|
"logps/chosen": -1.028954267501831, |
|
"logps/rejected": -1.1373963356018066, |
|
"loss": 1.6496, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.057908535003662, |
|
"rewards/margins": 0.21688416600227356, |
|
"rewards/rejected": -2.2747926712036133, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11530492748401044, |
|
"grad_norm": 13.627900414661896, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/chosen": -2.3636672496795654, |
|
"logits/rejected": -2.354912757873535, |
|
"logps/chosen": -0.9835589528083801, |
|
"logps/rejected": -1.1169239282608032, |
|
"loss": 1.6095, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9671179056167603, |
|
"rewards/margins": 0.2667301595211029, |
|
"rewards/rejected": -2.2338478565216064, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12971804341951176, |
|
"grad_norm": 16.99221012864124, |
|
"learning_rate": 4.326923076923077e-08, |
|
"logits/chosen": -2.3509373664855957, |
|
"logits/rejected": -2.3414111137390137, |
|
"logps/chosen": -1.0289192199707031, |
|
"logps/rejected": -1.1351473331451416, |
|
"loss": 1.6614, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0578384399414062, |
|
"rewards/margins": 0.21245631575584412, |
|
"rewards/rejected": -2.270294666290283, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14413115935501306, |
|
"grad_norm": 16.26579840133319, |
|
"learning_rate": 4.807692307692308e-08, |
|
"logits/chosen": -2.4182028770446777, |
|
"logits/rejected": -2.416335105895996, |
|
"logps/chosen": -0.9977607727050781, |
|
"logps/rejected": -1.108969807624817, |
|
"loss": 1.637, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9955215454101562, |
|
"rewards/margins": 0.22241799533367157, |
|
"rewards/rejected": -2.217939615249634, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15854427529051437, |
|
"grad_norm": 14.431674139311319, |
|
"learning_rate": 5.288461538461538e-08, |
|
"logits/chosen": -2.342700719833374, |
|
"logits/rejected": -2.3403000831604004, |
|
"logps/chosen": -1.0405100584030151, |
|
"logps/rejected": -1.1639328002929688, |
|
"loss": 1.6325, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0810201168060303, |
|
"rewards/margins": 0.24684572219848633, |
|
"rewards/rejected": -2.3278656005859375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17295739122601567, |
|
"grad_norm": 16.881846104086076, |
|
"learning_rate": 5.769230769230768e-08, |
|
"logits/chosen": -2.3760740756988525, |
|
"logits/rejected": -2.373129367828369, |
|
"logps/chosen": -1.0364916324615479, |
|
"logps/rejected": -1.1324373483657837, |
|
"loss": 1.67, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.0729832649230957, |
|
"rewards/margins": 0.19189123809337616, |
|
"rewards/rejected": -2.2648746967315674, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18737050716151699, |
|
"grad_norm": 16.764431844922484, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.3209102153778076, |
|
"logits/rejected": -2.3239667415618896, |
|
"logps/chosen": -1.0940515995025635, |
|
"logps/rejected": -1.1949011087417603, |
|
"loss": 1.6633, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.188103199005127, |
|
"rewards/margins": 0.20169904828071594, |
|
"rewards/rejected": -2.3898022174835205, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.20178362309701828, |
|
"grad_norm": 17.534779544810593, |
|
"learning_rate": 6.73076923076923e-08, |
|
"logits/chosen": -2.3762125968933105, |
|
"logits/rejected": -2.368044376373291, |
|
"logps/chosen": -1.0029666423797607, |
|
"logps/rejected": -1.1249053478240967, |
|
"loss": 1.6237, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0059332847595215, |
|
"rewards/margins": 0.2438771277666092, |
|
"rewards/rejected": -2.2498106956481934, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2161967390325196, |
|
"grad_norm": 15.578504627710455, |
|
"learning_rate": 7.211538461538461e-08, |
|
"logits/chosen": -2.3589887619018555, |
|
"logits/rejected": -2.3546345233917236, |
|
"logps/chosen": -1.0512168407440186, |
|
"logps/rejected": -1.1491758823394775, |
|
"loss": 1.6633, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -2.102433681488037, |
|
"rewards/margins": 0.19591817259788513, |
|
"rewards/rejected": -2.298351764678955, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2306098549680209, |
|
"grad_norm": 13.745585175489111, |
|
"learning_rate": 7.692307692307692e-08, |
|
"logits/chosen": -2.338444232940674, |
|
"logits/rejected": -2.332979679107666, |
|
"logps/chosen": -1.0473906993865967, |
|
"logps/rejected": -1.1564788818359375, |
|
"loss": 1.6513, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.0947813987731934, |
|
"rewards/margins": 0.21817633509635925, |
|
"rewards/rejected": -2.312957763671875, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2450229709035222, |
|
"grad_norm": 16.783418396767676, |
|
"learning_rate": 8.173076923076923e-08, |
|
"logits/chosen": -2.3806934356689453, |
|
"logits/rejected": -2.3792760372161865, |
|
"logps/chosen": -1.0662988424301147, |
|
"logps/rejected": -1.1184349060058594, |
|
"loss": 1.7353, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -2.1325976848602295, |
|
"rewards/margins": 0.1042722687125206, |
|
"rewards/rejected": -2.2368698120117188, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2594360868390235, |
|
"grad_norm": 15.562730291374017, |
|
"learning_rate": 8.653846153846154e-08, |
|
"logits/chosen": -2.3370161056518555, |
|
"logits/rejected": -2.3294992446899414, |
|
"logps/chosen": -1.0367413759231567, |
|
"logps/rejected": -1.1586549282073975, |
|
"loss": 1.6251, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.0734827518463135, |
|
"rewards/margins": 0.24382701516151428, |
|
"rewards/rejected": -2.317309856414795, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2738492027745248, |
|
"grad_norm": 14.57246304002355, |
|
"learning_rate": 9.134615384615383e-08, |
|
"logits/chosen": -2.355874538421631, |
|
"logits/rejected": -2.357952833175659, |
|
"logps/chosen": -1.0316553115844727, |
|
"logps/rejected": -1.1332082748413086, |
|
"loss": 1.6605, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0633106231689453, |
|
"rewards/margins": 0.20310597121715546, |
|
"rewards/rejected": -2.266416549682617, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2882623187100261, |
|
"grad_norm": 14.510113595673776, |
|
"learning_rate": 9.615384615384616e-08, |
|
"logits/chosen": -2.3815228939056396, |
|
"logits/rejected": -2.377211332321167, |
|
"logps/chosen": -1.0085281133651733, |
|
"logps/rejected": -1.0985215902328491, |
|
"loss": 1.6684, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0170562267303467, |
|
"rewards/margins": 0.17998693883419037, |
|
"rewards/rejected": -2.1970431804656982, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.30267543464552743, |
|
"grad_norm": 14.940800895121608, |
|
"learning_rate": 9.999971806320255e-08, |
|
"logits/chosen": -2.4093306064605713, |
|
"logits/rejected": -2.4097609519958496, |
|
"logps/chosen": -1.0589462518692017, |
|
"logps/rejected": -1.1346651315689087, |
|
"loss": 1.695, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1178925037384033, |
|
"rewards/margins": 0.15143761038780212, |
|
"rewards/rejected": -2.2693302631378174, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.31708855058102875, |
|
"grad_norm": 15.468071809971288, |
|
"learning_rate": 9.998985060913876e-08, |
|
"logits/chosen": -2.327671527862549, |
|
"logits/rejected": -2.3280539512634277, |
|
"logps/chosen": -1.0390589237213135, |
|
"logps/rejected": -1.1213579177856445, |
|
"loss": 1.6904, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.078117847442627, |
|
"rewards/margins": 0.1645977944135666, |
|
"rewards/rejected": -2.242715835571289, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.33150166651653007, |
|
"grad_norm": 18.885553561709102, |
|
"learning_rate": 9.996588949457546e-08, |
|
"logits/chosen": -2.3791205883026123, |
|
"logits/rejected": -2.3730788230895996, |
|
"logps/chosen": -1.156124472618103, |
|
"logps/rejected": -1.2356293201446533, |
|
"loss": 1.6937, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -2.312248945236206, |
|
"rewards/margins": 0.15900969505310059, |
|
"rewards/rejected": -2.4712586402893066, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.34591478245203133, |
|
"grad_norm": 18.61654233250297, |
|
"learning_rate": 9.992784147488017e-08, |
|
"logits/chosen": -2.4054293632507324, |
|
"logits/rejected": -2.3909668922424316, |
|
"logps/chosen": -1.040718674659729, |
|
"logps/rejected": -1.1538527011871338, |
|
"loss": 1.6368, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.081437349319458, |
|
"rewards/margins": 0.22626809775829315, |
|
"rewards/rejected": -2.3077054023742676, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.36032789838753265, |
|
"grad_norm": 15.133106885435941, |
|
"learning_rate": 9.987571727694775e-08, |
|
"logits/chosen": -2.377009630203247, |
|
"logits/rejected": -2.371063232421875, |
|
"logps/chosen": -0.997736930847168, |
|
"logps/rejected": -1.1200191974639893, |
|
"loss": 1.6202, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -1.995473861694336, |
|
"rewards/margins": 0.24456438422203064, |
|
"rewards/rejected": -2.2400383949279785, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.37474101432303397, |
|
"grad_norm": 15.527267890358452, |
|
"learning_rate": 9.98095315961762e-08, |
|
"logits/chosen": -2.38106369972229, |
|
"logits/rejected": -2.3770012855529785, |
|
"logps/chosen": -1.073089838027954, |
|
"logps/rejected": -1.1799663305282593, |
|
"loss": 1.6494, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.146179676055908, |
|
"rewards/margins": 0.2137528359889984, |
|
"rewards/rejected": -2.3599326610565186, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3891541302585353, |
|
"grad_norm": 16.699106788545635, |
|
"learning_rate": 9.97293030923235e-08, |
|
"logits/chosen": -2.3734331130981445, |
|
"logits/rejected": -2.36216402053833, |
|
"logps/chosen": -1.0048857927322388, |
|
"logps/rejected": -1.0962402820587158, |
|
"loss": 1.6741, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -2.0097715854644775, |
|
"rewards/margins": 0.18270887434482574, |
|
"rewards/rejected": -2.1924805641174316, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.40356724619403656, |
|
"grad_norm": 14.595168551654872, |
|
"learning_rate": 9.963505438424693e-08, |
|
"logits/chosen": -2.340841293334961, |
|
"logits/rejected": -2.3415005207061768, |
|
"logps/chosen": -1.0379191637039185, |
|
"logps/rejected": -1.1280016899108887, |
|
"loss": 1.6851, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -2.075838327407837, |
|
"rewards/margins": 0.18016524612903595, |
|
"rewards/rejected": -2.2560033798217773, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4179803621295379, |
|
"grad_norm": 14.286732447718073, |
|
"learning_rate": 9.952681204352607e-08, |
|
"logits/chosen": -2.361560821533203, |
|
"logits/rejected": -2.3513660430908203, |
|
"logps/chosen": -1.0380117893218994, |
|
"logps/rejected": -1.1370676755905151, |
|
"loss": 1.6637, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -2.076023578643799, |
|
"rewards/margins": 0.198111891746521, |
|
"rewards/rejected": -2.2741353511810303, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4323934780650392, |
|
"grad_norm": 17.31273729578293, |
|
"learning_rate": 9.94046065869715e-08, |
|
"logits/chosen": -2.377479314804077, |
|
"logits/rejected": -2.375476360321045, |
|
"logps/chosen": -1.0271109342575073, |
|
"logps/rejected": -1.1700676679611206, |
|
"loss": 1.5942, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -2.0542218685150146, |
|
"rewards/margins": 0.2859136462211609, |
|
"rewards/rejected": -2.340135335922241, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4468065940005405, |
|
"grad_norm": 17.694546366405458, |
|
"learning_rate": 9.926847246802116e-08, |
|
"logits/chosen": -2.3561387062072754, |
|
"logits/rejected": -2.3444766998291016, |
|
"logps/chosen": -1.0410211086273193, |
|
"logps/rejected": -1.1159262657165527, |
|
"loss": 1.6942, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0820422172546387, |
|
"rewards/margins": 0.14981010556221008, |
|
"rewards/rejected": -2.2318525314331055, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4612197099360418, |
|
"grad_norm": 13.051339803328997, |
|
"learning_rate": 9.911844806702691e-08, |
|
"logits/chosen": -2.3585753440856934, |
|
"logits/rejected": -2.360156297683716, |
|
"logps/chosen": -1.015515923500061, |
|
"logps/rejected": -1.1353641748428345, |
|
"loss": 1.6286, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.031031847000122, |
|
"rewards/margins": 0.23969626426696777, |
|
"rewards/rejected": -2.270728349685669, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4756328258715431, |
|
"grad_norm": 17.002852190341585, |
|
"learning_rate": 9.895457568043387e-08, |
|
"logits/chosen": -2.3824462890625, |
|
"logits/rejected": -2.3757641315460205, |
|
"logps/chosen": -1.059061050415039, |
|
"logps/rejected": -1.14574134349823, |
|
"loss": 1.6835, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.118122100830078, |
|
"rewards/margins": 0.17336080968379974, |
|
"rewards/rejected": -2.29148268699646, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4900459418070444, |
|
"grad_norm": 16.276382330404722, |
|
"learning_rate": 9.877690150885587e-08, |
|
"logits/chosen": -2.324713945388794, |
|
"logits/rejected": -2.314767599105835, |
|
"logps/chosen": -1.0457204580307007, |
|
"logps/rejected": -1.135799527168274, |
|
"loss": 1.6763, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0914409160614014, |
|
"rewards/margins": 0.18015804886817932, |
|
"rewards/rejected": -2.271599054336548, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5044590577425457, |
|
"grad_norm": 14.679321409845278, |
|
"learning_rate": 9.858547564404998e-08, |
|
"logits/chosen": -2.368298292160034, |
|
"logits/rejected": -2.3589999675750732, |
|
"logps/chosen": -1.0575425624847412, |
|
"logps/rejected": -1.1802635192871094, |
|
"loss": 1.6339, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.1150851249694824, |
|
"rewards/margins": 0.24544170498847961, |
|
"rewards/rejected": -2.3605270385742188, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.518872173678047, |
|
"grad_norm": 16.288849210972156, |
|
"learning_rate": 9.838035205479418e-08, |
|
"logits/chosen": -2.3341236114501953, |
|
"logits/rejected": -2.328613042831421, |
|
"logps/chosen": -0.9657120704650879, |
|
"logps/rejected": -1.0940418243408203, |
|
"loss": 1.6196, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9314241409301758, |
|
"rewards/margins": 0.25665926933288574, |
|
"rewards/rejected": -2.1880836486816406, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5332852896135484, |
|
"grad_norm": 15.065053010351129, |
|
"learning_rate": 9.816158857167196e-08, |
|
"logits/chosen": -2.3553214073181152, |
|
"logits/rejected": -2.3543648719787598, |
|
"logps/chosen": -1.017580509185791, |
|
"logps/rejected": -1.093390703201294, |
|
"loss": 1.706, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.035161018371582, |
|
"rewards/margins": 0.15162022411823273, |
|
"rewards/rejected": -2.186781406402588, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5476984055490496, |
|
"grad_norm": 15.268674336756646, |
|
"learning_rate": 9.7929246870768e-08, |
|
"logits/chosen": -2.3563642501831055, |
|
"logits/rejected": -2.357172727584839, |
|
"logps/chosen": -1.0474622249603271, |
|
"logps/rejected": -1.1527016162872314, |
|
"loss": 1.6593, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0949244499206543, |
|
"rewards/margins": 0.2104784995317459, |
|
"rewards/rejected": -2.305403232574463, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5621115214845509, |
|
"grad_norm": 19.51913775076441, |
|
"learning_rate": 9.768339245627993e-08, |
|
"logits/chosen": -2.329598903656006, |
|
"logits/rejected": -2.3325648307800293, |
|
"logps/chosen": -1.0032579898834229, |
|
"logps/rejected": -1.1267921924591064, |
|
"loss": 1.6287, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.0065159797668457, |
|
"rewards/margins": 0.24706879258155823, |
|
"rewards/rejected": -2.253584384918213, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5765246374200522, |
|
"grad_norm": 15.08719846804436, |
|
"learning_rate": 9.742409464205059e-08, |
|
"logits/chosen": -2.364119052886963, |
|
"logits/rejected": -2.3581573963165283, |
|
"logps/chosen": -1.054837942123413, |
|
"logps/rejected": -1.1783701181411743, |
|
"loss": 1.6358, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.109675884246826, |
|
"rewards/margins": 0.2470642328262329, |
|
"rewards/rejected": -2.3567402362823486, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5909377533555535, |
|
"grad_norm": 16.155157647324575, |
|
"learning_rate": 9.715142653202644e-08, |
|
"logits/chosen": -2.347181558609009, |
|
"logits/rejected": -2.342615842819214, |
|
"logps/chosen": -1.017263650894165, |
|
"logps/rejected": -1.1102826595306396, |
|
"loss": 1.6768, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.03452730178833, |
|
"rewards/margins": 0.18603798747062683, |
|
"rewards/rejected": -2.2205653190612793, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6053508692910549, |
|
"grad_norm": 14.897089823744135, |
|
"learning_rate": 9.68654649996473e-08, |
|
"logits/chosen": -2.364981174468994, |
|
"logits/rejected": -2.3646998405456543, |
|
"logps/chosen": -1.0181089639663696, |
|
"logps/rejected": -1.1212923526763916, |
|
"loss": 1.6626, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.0362179279327393, |
|
"rewards/margins": 0.206366628408432, |
|
"rewards/rejected": -2.242584705352783, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6197639852265562, |
|
"grad_norm": 15.109629627010106, |
|
"learning_rate": 9.656629066617335e-08, |
|
"logits/chosen": -2.351111650466919, |
|
"logits/rejected": -2.3459696769714355, |
|
"logps/chosen": -1.1007968187332153, |
|
"logps/rejected": -1.1891463994979858, |
|
"loss": 1.6834, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.2015936374664307, |
|
"rewards/margins": 0.17669954895973206, |
|
"rewards/rejected": -2.3782927989959717, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6341771011620575, |
|
"grad_norm": 16.01041357452403, |
|
"learning_rate": 9.62539878779556e-08, |
|
"logits/chosen": -2.3512957096099854, |
|
"logits/rejected": -2.3472342491149902, |
|
"logps/chosen": -1.0058082342147827, |
|
"logps/rejected": -1.1037191152572632, |
|
"loss": 1.6651, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -2.0116164684295654, |
|
"rewards/margins": 0.19582167267799377, |
|
"rewards/rejected": -2.2074382305145264, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6485902170975588, |
|
"grad_norm": 18.411662730620584, |
|
"learning_rate": 9.592864468265604e-08, |
|
"logits/chosen": -2.3800835609436035, |
|
"logits/rejected": -2.3797011375427246, |
|
"logps/chosen": -1.0755730867385864, |
|
"logps/rejected": -1.1656855344772339, |
|
"loss": 1.6784, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -2.151146173477173, |
|
"rewards/margins": 0.18022510409355164, |
|
"rewards/rejected": -2.3313710689544678, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6630033330330601, |
|
"grad_norm": 17.51219332799835, |
|
"learning_rate": 9.559035280442441e-08, |
|
"logits/chosen": -2.3352417945861816, |
|
"logits/rejected": -2.3331692218780518, |
|
"logps/chosen": -1.0036710500717163, |
|
"logps/rejected": -1.0872585773468018, |
|
"loss": 1.6865, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -2.0073421001434326, |
|
"rewards/margins": 0.16717498004436493, |
|
"rewards/rejected": -2.1745171546936035, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6774164489685613, |
|
"grad_norm": 18.31866820732837, |
|
"learning_rate": 9.523920761803823e-08, |
|
"logits/chosen": -2.3979227542877197, |
|
"logits/rejected": -2.399036407470703, |
|
"logps/chosen": -1.0747919082641602, |
|
"logps/rejected": -1.1746306419372559, |
|
"loss": 1.6553, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -2.1495838165283203, |
|
"rewards/margins": 0.1996772736310959, |
|
"rewards/rejected": -2.3492612838745117, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6918295649040627, |
|
"grad_norm": 16.80271538537987, |
|
"learning_rate": 9.487530812201383e-08, |
|
"logits/chosen": -2.35792875289917, |
|
"logits/rejected": -2.3569393157958984, |
|
"logps/chosen": -1.0264079570770264, |
|
"logps/rejected": -1.1486434936523438, |
|
"loss": 1.6324, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0528159141540527, |
|
"rewards/margins": 0.24447116255760193, |
|
"rewards/rejected": -2.2972869873046875, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.706242680839564, |
|
"grad_norm": 16.799352219592777, |
|
"learning_rate": 9.449875691069571e-08, |
|
"logits/chosen": -2.356339931488037, |
|
"logits/rejected": -2.354175567626953, |
|
"logps/chosen": -1.0335304737091064, |
|
"logps/rejected": -1.1673954725265503, |
|
"loss": 1.6051, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.067060947418213, |
|
"rewards/margins": 0.26773008704185486, |
|
"rewards/rejected": -2.3347909450531006, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7206557967750653, |
|
"grad_norm": 15.404244347962265, |
|
"learning_rate": 9.410966014533195e-08, |
|
"logits/chosen": -2.3478922843933105, |
|
"logits/rejected": -2.3435702323913574, |
|
"logps/chosen": -1.053039312362671, |
|
"logps/rejected": -1.1690478324890137, |
|
"loss": 1.6495, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.106078624725342, |
|
"rewards/margins": 0.23201718926429749, |
|
"rewards/rejected": -2.3380956649780273, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7350689127105666, |
|
"grad_norm": 15.81308480269748, |
|
"learning_rate": 9.37081275241442e-08, |
|
"logits/chosen": -2.3459486961364746, |
|
"logits/rejected": -2.339306592941284, |
|
"logps/chosen": -1.0136808156967163, |
|
"logps/rejected": -1.1138548851013184, |
|
"loss": 1.667, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0273616313934326, |
|
"rewards/margins": 0.20034781098365784, |
|
"rewards/rejected": -2.2277097702026367, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7494820286460679, |
|
"grad_norm": 14.877032985004409, |
|
"learning_rate": 9.329427225140042e-08, |
|
"logits/chosen": -2.3370301723480225, |
|
"logits/rejected": -2.3319363594055176, |
|
"logps/chosen": -1.0117393732070923, |
|
"logps/rejected": -1.1295689344406128, |
|
"loss": 1.6432, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0234787464141846, |
|
"rewards/margins": 0.23565927147865295, |
|
"rewards/rejected": -2.2591378688812256, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7638951445815693, |
|
"grad_norm": 16.317618504393014, |
|
"learning_rate": 9.286821100549906e-08, |
|
"logits/chosen": -2.336864471435547, |
|
"logits/rejected": -2.329371929168701, |
|
"logps/chosen": -0.9821737408638, |
|
"logps/rejected": -1.1123030185699463, |
|
"loss": 1.6226, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9643474817276, |
|
"rewards/margins": 0.26025891304016113, |
|
"rewards/rejected": -2.2246060371398926, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7783082605170706, |
|
"grad_norm": 16.918699303271303, |
|
"learning_rate": 9.243006390607402e-08, |
|
"logits/chosen": -2.3681960105895996, |
|
"logits/rejected": -2.3686928749084473, |
|
"logps/chosen": -1.0731232166290283, |
|
"logps/rejected": -1.2046077251434326, |
|
"loss": 1.6286, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.1462464332580566, |
|
"rewards/margins": 0.2629690170288086, |
|
"rewards/rejected": -2.4092154502868652, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7927213764525718, |
|
"grad_norm": 17.901668830250117, |
|
"learning_rate": 9.197995448012912e-08, |
|
"logits/chosen": -2.3749890327453613, |
|
"logits/rejected": -2.368088960647583, |
|
"logps/chosen": -1.0722578763961792, |
|
"logps/rejected": -1.2028658390045166, |
|
"loss": 1.6224, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.1445157527923584, |
|
"rewards/margins": 0.26121601462364197, |
|
"rewards/rejected": -2.405731678009033, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8071344923880731, |
|
"grad_norm": 15.889671449808617, |
|
"learning_rate": 9.151800962721217e-08, |
|
"logits/chosen": -2.320263147354126, |
|
"logits/rejected": -2.3110299110412598, |
|
"logps/chosen": -1.0240787267684937, |
|
"logps/rejected": -1.1282823085784912, |
|
"loss": 1.6579, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.0481574535369873, |
|
"rewards/margins": 0.20840716361999512, |
|
"rewards/rejected": -2.2565646171569824, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8215476083235744, |
|
"grad_norm": 16.160221475349292, |
|
"learning_rate": 9.104435958363807e-08, |
|
"logits/chosen": -2.3726491928100586, |
|
"logits/rejected": -2.3696436882019043, |
|
"logps/chosen": -1.0209132432937622, |
|
"logps/rejected": -1.134126901626587, |
|
"loss": 1.6464, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0418264865875244, |
|
"rewards/margins": 0.22642748057842255, |
|
"rewards/rejected": -2.268253803253174, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8359607242590757, |
|
"grad_norm": 16.09504542028388, |
|
"learning_rate": 9.055913788577128e-08, |
|
"logits/chosen": -2.3402140140533447, |
|
"logits/rejected": -2.334770679473877, |
|
"logps/chosen": -1.0541826486587524, |
|
"logps/rejected": -1.1505852937698364, |
|
"loss": 1.6795, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.108365297317505, |
|
"rewards/margins": 0.19280506670475006, |
|
"rewards/rejected": -2.301170587539673, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8503738401945771, |
|
"grad_norm": 18.45826863343491, |
|
"learning_rate": 9.006248133237782e-08, |
|
"logits/chosen": -2.3699214458465576, |
|
"logits/rejected": -2.361508846282959, |
|
"logps/chosen": -1.037255048751831, |
|
"logps/rejected": -1.155447006225586, |
|
"loss": 1.6428, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.074510097503662, |
|
"rewards/margins": 0.2363840639591217, |
|
"rewards/rejected": -2.310894012451172, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8647869561300784, |
|
"grad_norm": 18.434587269982643, |
|
"learning_rate": 8.955452994605753e-08, |
|
"logits/chosen": -2.3500571250915527, |
|
"logits/rejected": -2.338733196258545, |
|
"logps/chosen": -1.0794237852096558, |
|
"logps/rejected": -1.170361042022705, |
|
"loss": 1.6733, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -2.1588475704193115, |
|
"rewards/margins": 0.1818745732307434, |
|
"rewards/rejected": -2.34072208404541, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8792000720655797, |
|
"grad_norm": 14.237081246848815, |
|
"learning_rate": 8.903542693376747e-08, |
|
"logits/chosen": -2.3270299434661865, |
|
"logits/rejected": -2.3305177688598633, |
|
"logps/chosen": -0.9713711738586426, |
|
"logps/rejected": -1.1125681400299072, |
|
"loss": 1.6091, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9427423477172852, |
|
"rewards/margins": 0.2823939025402069, |
|
"rewards/rejected": -2.2251362800598145, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.893613188001081, |
|
"grad_norm": 17.400582788834974, |
|
"learning_rate": 8.850531864644748e-08, |
|
"logits/chosen": -2.3322553634643555, |
|
"logits/rejected": -2.321770668029785, |
|
"logps/chosen": -0.9585525393486023, |
|
"logps/rejected": -1.0878236293792725, |
|
"loss": 1.6235, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9171050786972046, |
|
"rewards/margins": 0.2585422694683075, |
|
"rewards/rejected": -2.175647258758545, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9080263039365822, |
|
"grad_norm": 18.38773462583586, |
|
"learning_rate": 8.796435453775943e-08, |
|
"logits/chosen": -2.3591456413269043, |
|
"logits/rejected": -2.3641350269317627, |
|
"logps/chosen": -1.0922317504882812, |
|
"logps/rejected": -1.245233416557312, |
|
"loss": 1.5961, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1844635009765625, |
|
"rewards/margins": 0.30600348114967346, |
|
"rewards/rejected": -2.490466833114624, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9224394198720836, |
|
"grad_norm": 17.0793455640924, |
|
"learning_rate": 8.741268712195164e-08, |
|
"logits/chosen": -2.362234115600586, |
|
"logits/rejected": -2.3535900115966797, |
|
"logps/chosen": -0.9950187802314758, |
|
"logps/rejected": -1.1404359340667725, |
|
"loss": 1.5986, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9900375604629517, |
|
"rewards/margins": 0.29083460569381714, |
|
"rewards/rejected": -2.280871868133545, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9368525358075849, |
|
"grad_norm": 18.356266999768685, |
|
"learning_rate": 8.685047193086053e-08, |
|
"logits/chosen": -2.3747830390930176, |
|
"logits/rejected": -2.3743832111358643, |
|
"logps/chosen": -1.0230966806411743, |
|
"logps/rejected": -1.1178253889083862, |
|
"loss": 1.6728, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0461933612823486, |
|
"rewards/margins": 0.18945762515068054, |
|
"rewards/rejected": -2.2356507778167725, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9512656517430862, |
|
"grad_norm": 16.97821645636938, |
|
"learning_rate": 8.627786747006144e-08, |
|
"logits/chosen": -2.3651280403137207, |
|
"logits/rejected": -2.3614325523376465, |
|
"logps/chosen": -1.028911828994751, |
|
"logps/rejected": -1.1648304462432861, |
|
"loss": 1.6105, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.057823657989502, |
|
"rewards/margins": 0.2718368470668793, |
|
"rewards/rejected": -2.3296608924865723, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9656787676785875, |
|
"grad_norm": 18.242383473952547, |
|
"learning_rate": 8.569503517418104e-08, |
|
"logits/chosen": -2.3506455421447754, |
|
"logits/rejected": -2.346644401550293, |
|
"logps/chosen": -1.038861870765686, |
|
"logps/rejected": -1.1740354299545288, |
|
"loss": 1.6204, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.077723741531372, |
|
"rewards/margins": 0.27034711837768555, |
|
"rewards/rejected": -2.3480708599090576, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9800918836140888, |
|
"grad_norm": 15.51076376279893, |
|
"learning_rate": 8.510213936138402e-08, |
|
"logits/chosen": -2.3083348274230957, |
|
"logits/rejected": -2.3014862537384033, |
|
"logps/chosen": -0.9869492650032043, |
|
"logps/rejected": -1.0866016149520874, |
|
"loss": 1.6735, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.9738985300064087, |
|
"rewards/margins": 0.19930467009544373, |
|
"rewards/rejected": -2.173203229904175, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9945049995495902, |
|
"grad_norm": 17.843639653030788, |
|
"learning_rate": 8.449934718704685e-08, |
|
"logits/chosen": -2.3410897254943848, |
|
"logits/rejected": -2.334183692932129, |
|
"logps/chosen": -1.02655827999115, |
|
"logps/rejected": -1.1379454135894775, |
|
"loss": 1.6428, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0531165599823, |
|
"rewards/margins": 0.22277435660362244, |
|
"rewards/rejected": -2.275890827178955, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0089181154850915, |
|
"grad_norm": 18.24062737002371, |
|
"learning_rate": 8.388682859663152e-08, |
|
"logits/chosen": -2.3235275745391846, |
|
"logits/rejected": -2.323727607727051, |
|
"logps/chosen": -1.0423524379730225, |
|
"logps/rejected": -1.1892979145050049, |
|
"loss": 1.6146, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.084704875946045, |
|
"rewards/margins": 0.2938912510871887, |
|
"rewards/rejected": -2.3785958290100098, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0233312314205927, |
|
"grad_norm": 18.30818756183919, |
|
"learning_rate": 8.326475627777277e-08, |
|
"logits/chosen": -2.3337440490722656, |
|
"logits/rejected": -2.3330025672912598, |
|
"logps/chosen": -1.0714682340621948, |
|
"logps/rejected": -1.2082436084747314, |
|
"loss": 1.6339, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1429364681243896, |
|
"rewards/margins": 0.27355074882507324, |
|
"rewards/rejected": -2.416487216949463, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.037744347356094, |
|
"grad_norm": 17.423864156378112, |
|
"learning_rate": 8.26333056115922e-08, |
|
"logits/chosen": -2.373300075531006, |
|
"logits/rejected": -2.3668229579925537, |
|
"logps/chosen": -1.0338383913040161, |
|
"logps/rejected": -1.1421548128128052, |
|
"loss": 1.6639, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -2.0676767826080322, |
|
"rewards/margins": 0.21663276851177216, |
|
"rewards/rejected": -2.2843096256256104, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0521574632915953, |
|
"grad_norm": 16.635043052348962, |
|
"learning_rate": 8.1992654623253e-08, |
|
"logits/chosen": -2.3428361415863037, |
|
"logits/rejected": -2.33913516998291, |
|
"logps/chosen": -1.009476900100708, |
|
"logps/rejected": -1.1869137287139893, |
|
"loss": 1.559, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -2.018953800201416, |
|
"rewards/margins": 0.35487350821495056, |
|
"rewards/rejected": -2.3738274574279785, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0665705792270967, |
|
"grad_norm": 19.25205105759611, |
|
"learning_rate": 8.134298393176915e-08, |
|
"logits/chosen": -2.301328420639038, |
|
"logits/rejected": -2.2953743934631348, |
|
"logps/chosen": -0.9850282669067383, |
|
"logps/rejected": -1.131919264793396, |
|
"loss": 1.6056, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9700565338134766, |
|
"rewards/margins": 0.2937820851802826, |
|
"rewards/rejected": -2.263838529586792, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.080983695162598, |
|
"grad_norm": 16.04856542856117, |
|
"learning_rate": 8.068447669908356e-08, |
|
"logits/chosen": -2.306058168411255, |
|
"logits/rejected": -2.294712781906128, |
|
"logps/chosen": -1.06520676612854, |
|
"logps/rejected": -1.1720651388168335, |
|
"loss": 1.6557, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -2.13041353225708, |
|
"rewards/margins": 0.21371681988239288, |
|
"rewards/rejected": -2.344130277633667, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0953968110980994, |
|
"grad_norm": 14.81697278342191, |
|
"learning_rate": 8.001731857842906e-08, |
|
"logits/chosen": -2.317549705505371, |
|
"logits/rejected": -2.3219799995422363, |
|
"logps/chosen": -1.0585771799087524, |
|
"logps/rejected": -1.1321176290512085, |
|
"loss": 1.7105, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -2.117154359817505, |
|
"rewards/margins": 0.14708088338375092, |
|
"rewards/rejected": -2.264235258102417, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1098099270336006, |
|
"grad_norm": 19.08608533403698, |
|
"learning_rate": 7.934169766198712e-08, |
|
"logits/chosen": -2.347382068634033, |
|
"logits/rejected": -2.3347859382629395, |
|
"logps/chosen": -0.9919846653938293, |
|
"logps/rejected": -1.155458688735962, |
|
"loss": 1.5702, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9839693307876587, |
|
"rewards/margins": 0.32694780826568604, |
|
"rewards/rejected": -2.310917377471924, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.1242230429691018, |
|
"grad_norm": 21.782769163652045, |
|
"learning_rate": 7.86578044278589e-08, |
|
"logits/chosen": -2.3568646907806396, |
|
"logits/rejected": -2.350098133087158, |
|
"logps/chosen": -1.0653258562088013, |
|
"logps/rejected": -1.2129188776016235, |
|
"loss": 1.6052, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1306517124176025, |
|
"rewards/margins": 0.29518604278564453, |
|
"rewards/rejected": -2.425837755203247, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.1386361589046032, |
|
"grad_norm": 20.459222597520984, |
|
"learning_rate": 7.796583168636375e-08, |
|
"logits/chosen": -2.3612263202667236, |
|
"logits/rejected": -2.3560619354248047, |
|
"logps/chosen": -1.0090010166168213, |
|
"logps/rejected": -1.1769835948944092, |
|
"loss": 1.5759, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -2.0180020332336426, |
|
"rewards/margins": 0.3359653949737549, |
|
"rewards/rejected": -2.3539671897888184, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1530492748401044, |
|
"grad_norm": 16.695222101185497, |
|
"learning_rate": 7.726597452568007e-08, |
|
"logits/chosen": -2.3381145000457764, |
|
"logits/rejected": -2.3316009044647217, |
|
"logps/chosen": -1.0254031419754028, |
|
"logps/rejected": -1.16634202003479, |
|
"loss": 1.6068, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.0508062839508057, |
|
"rewards/margins": 0.2818780839443207, |
|
"rewards/rejected": -2.33268404006958, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1674623907756059, |
|
"grad_norm": 17.52985696830486, |
|
"learning_rate": 7.655843025684402e-08, |
|
"logits/chosen": -2.3598532676696777, |
|
"logits/rejected": -2.362898349761963, |
|
"logps/chosen": -1.044235348701477, |
|
"logps/rejected": -1.1720434427261353, |
|
"loss": 1.6296, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.088470697402954, |
|
"rewards/margins": 0.25561633706092834, |
|
"rewards/rejected": -2.3440868854522705, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.181875506711107, |
|
"grad_norm": 14.910484844275423, |
|
"learning_rate": 7.584339835812151e-08, |
|
"logits/chosen": -2.3223514556884766, |
|
"logits/rejected": -2.323925495147705, |
|
"logps/chosen": -1.0323957204818726, |
|
"logps/rejected": -1.1369130611419678, |
|
"loss": 1.6678, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -2.064791440963745, |
|
"rewards/margins": 0.20903484523296356, |
|
"rewards/rejected": -2.2738261222839355, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.1962886226466085, |
|
"grad_norm": 16.58257930662513, |
|
"learning_rate": 7.512108041876924e-08, |
|
"logits/chosen": -2.2956104278564453, |
|
"logits/rejected": -2.298205852508545, |
|
"logps/chosen": -0.9996700286865234, |
|
"logps/rejected": -1.1152664422988892, |
|
"loss": 1.6512, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -1.9993400573730469, |
|
"rewards/margins": 0.23119251430034637, |
|
"rewards/rejected": -2.2305328845977783, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2107017385821097, |
|
"grad_norm": 16.103489416598062, |
|
"learning_rate": 7.439168008220056e-08, |
|
"logits/chosen": -2.333143949508667, |
|
"logits/rejected": -2.327017068862915, |
|
"logps/chosen": -1.0302656888961792, |
|
"logps/rejected": -1.1976699829101562, |
|
"loss": 1.5731, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -2.0605313777923584, |
|
"rewards/margins": 0.3348085880279541, |
|
"rewards/rejected": -2.3953399658203125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.225114854517611, |
|
"grad_norm": 17.820096880219356, |
|
"learning_rate": 7.365540298857215e-08, |
|
"logits/chosen": -2.3323662281036377, |
|
"logits/rejected": -2.3332276344299316, |
|
"logps/chosen": -1.0587284564971924, |
|
"logps/rejected": -1.2181167602539062, |
|
"loss": 1.5796, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1174569129943848, |
|
"rewards/margins": 0.3187769949436188, |
|
"rewards/rejected": -2.4362335205078125, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2395279704531124, |
|
"grad_norm": 18.066090520662634, |
|
"learning_rate": 7.291245671680781e-08, |
|
"logits/chosen": -2.3100619316101074, |
|
"logits/rejected": -2.3028578758239746, |
|
"logps/chosen": -0.9891204833984375, |
|
"logps/rejected": -1.1562236547470093, |
|
"loss": 1.5852, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -1.978240966796875, |
|
"rewards/margins": 0.3342065215110779, |
|
"rewards/rejected": -2.3124473094940186, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2539410863886136, |
|
"grad_norm": 16.723867521490277, |
|
"learning_rate": 7.216305072607568e-08, |
|
"logits/chosen": -2.3490469455718994, |
|
"logits/rejected": -2.351792812347412, |
|
"logps/chosen": -1.0800100564956665, |
|
"logps/rejected": -1.2314789295196533, |
|
"loss": 1.6035, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.160020112991333, |
|
"rewards/margins": 0.30293765664100647, |
|
"rewards/rejected": -2.4629578590393066, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.268354202324115, |
|
"grad_norm": 19.696376219540245, |
|
"learning_rate": 7.14073962967353e-08, |
|
"logits/chosen": -2.361971855163574, |
|
"logits/rejected": -2.3552968502044678, |
|
"logps/chosen": -1.1068134307861328, |
|
"logps/rejected": -1.2376269102096558, |
|
"loss": 1.6428, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -2.2136268615722656, |
|
"rewards/margins": 0.2616268992424011, |
|
"rewards/rejected": -2.4752538204193115, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2827673182596162, |
|
"grad_norm": 18.939981579389148, |
|
"learning_rate": 7.064570647077124e-08, |
|
"logits/chosen": -2.34350848197937, |
|
"logits/rejected": -2.335470676422119, |
|
"logps/chosen": -1.1084269285202026, |
|
"logps/rejected": -1.230513095855713, |
|
"loss": 1.6428, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.2168538570404053, |
|
"rewards/margins": 0.24417249858379364, |
|
"rewards/rejected": -2.461026191711426, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2971804341951176, |
|
"grad_norm": 18.47019854160618, |
|
"learning_rate": 6.987819599173006e-08, |
|
"logits/chosen": -2.3356449604034424, |
|
"logits/rejected": -2.331501007080078, |
|
"logps/chosen": -1.0205782651901245, |
|
"logps/rejected": -1.1818567514419556, |
|
"loss": 1.588, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.041156530380249, |
|
"rewards/margins": 0.322556734085083, |
|
"rewards/rejected": -2.363713502883911, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3115935501306188, |
|
"grad_norm": 21.06251591954156, |
|
"learning_rate": 6.910508124417765e-08, |
|
"logits/chosen": -2.3116612434387207, |
|
"logits/rejected": -2.311708927154541, |
|
"logps/chosen": -1.0073387622833252, |
|
"logps/rejected": -1.1689893007278442, |
|
"loss": 1.5949, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0146775245666504, |
|
"rewards/margins": 0.32330113649368286, |
|
"rewards/rejected": -2.3379786014556885, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.32600666606612, |
|
"grad_norm": 15.75888959059691, |
|
"learning_rate": 6.832658019269373e-08, |
|
"logits/chosen": -2.2905359268188477, |
|
"logits/rejected": -2.285813808441162, |
|
"logps/chosen": -1.017747402191162, |
|
"logps/rejected": -1.1801689863204956, |
|
"loss": 1.5957, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.035494804382324, |
|
"rewards/margins": 0.3248431086540222, |
|
"rewards/rejected": -2.360337972640991, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.3404197820016215, |
|
"grad_norm": 16.36860064354464, |
|
"learning_rate": 6.75429123204211e-08, |
|
"logits/chosen": -2.3322787284851074, |
|
"logits/rejected": -2.325899600982666, |
|
"logps/chosen": -1.0550917387008667, |
|
"logps/rejected": -1.2269432544708252, |
|
"loss": 1.5757, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1101834774017334, |
|
"rewards/margins": 0.34370261430740356, |
|
"rewards/rejected": -2.4538865089416504, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.354832897937123, |
|
"grad_norm": 15.89341720744674, |
|
"learning_rate": 6.675429856718652e-08, |
|
"logits/chosen": -2.302473306655884, |
|
"logits/rejected": -2.292829990386963, |
|
"logps/chosen": -0.9993384480476379, |
|
"logps/rejected": -1.1607972383499146, |
|
"loss": 1.5858, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.9986768960952759, |
|
"rewards/margins": 0.3229173719882965, |
|
"rewards/rejected": -2.321594476699829, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.3692460138726241, |
|
"grad_norm": 16.669054151143325, |
|
"learning_rate": 6.596096126721123e-08, |
|
"logits/chosen": -2.273181200027466, |
|
"logits/rejected": -2.2777457237243652, |
|
"logps/chosen": -1.0447285175323486, |
|
"logps/rejected": -1.2103157043457031, |
|
"loss": 1.5821, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0894570350646973, |
|
"rewards/margins": 0.3311743438243866, |
|
"rewards/rejected": -2.4206314086914062, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.3836591298081253, |
|
"grad_norm": 15.868141254654335, |
|
"learning_rate": 6.516312408642804e-08, |
|
"logits/chosen": -2.322033405303955, |
|
"logits/rejected": -2.3260583877563477, |
|
"logps/chosen": -1.0269404649734497, |
|
"logps/rejected": -1.217023491859436, |
|
"loss": 1.543, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -2.0538809299468994, |
|
"rewards/margins": 0.3801659941673279, |
|
"rewards/rejected": -2.434046983718872, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3980722457436268, |
|
"grad_norm": 19.7395273688106, |
|
"learning_rate": 6.436101195942312e-08, |
|
"logits/chosen": -2.3190536499023438, |
|
"logits/rejected": -2.321190357208252, |
|
"logps/chosen": -1.0408755540847778, |
|
"logps/rejected": -1.1574127674102783, |
|
"loss": 1.6495, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.0817511081695557, |
|
"rewards/margins": 0.23307427763938904, |
|
"rewards/rejected": -2.3148255348205566, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.412485361679128, |
|
"grad_norm": 17.85424182086385, |
|
"learning_rate": 6.35548510260201e-08, |
|
"logits/chosen": -2.2950663566589355, |
|
"logits/rejected": -2.290828227996826, |
|
"logps/chosen": -1.015590786933899, |
|
"logps/rejected": -1.1845998764038086, |
|
"loss": 1.5815, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.031181573867798, |
|
"rewards/margins": 0.33801814913749695, |
|
"rewards/rejected": -2.369199752807617, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.4268984776146292, |
|
"grad_norm": 17.0591983972092, |
|
"learning_rate": 6.274486856752442e-08, |
|
"logits/chosen": -2.3268628120422363, |
|
"logits/rejected": -2.3215243816375732, |
|
"logps/chosen": -1.054785132408142, |
|
"logps/rejected": -1.2332737445831299, |
|
"loss": 1.5786, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.109570264816284, |
|
"rewards/margins": 0.35697704553604126, |
|
"rewards/rejected": -2.4665474891662598, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.4413115935501306, |
|
"grad_norm": 15.976591290404047, |
|
"learning_rate": 6.193129294264568e-08, |
|
"logits/chosen": -2.3251538276672363, |
|
"logits/rejected": -2.319453477859497, |
|
"logps/chosen": -1.0316834449768066, |
|
"logps/rejected": -1.2238515615463257, |
|
"loss": 1.549, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0633668899536133, |
|
"rewards/margins": 0.3843366503715515, |
|
"rewards/rejected": -2.4477031230926514, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.455724709485632, |
|
"grad_norm": 17.378099075031535, |
|
"learning_rate": 6.111435352311653e-08, |
|
"logits/chosen": -2.3224568367004395, |
|
"logits/rejected": -2.318516254425049, |
|
"logps/chosen": -1.044806718826294, |
|
"logps/rejected": -1.204319715499878, |
|
"loss": 1.5956, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.089613437652588, |
|
"rewards/margins": 0.3190259337425232, |
|
"rewards/rejected": -2.408639430999756, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.4701378254211332, |
|
"grad_norm": 18.355317239262256, |
|
"learning_rate": 6.02942806290257e-08, |
|
"logits/chosen": -2.337299346923828, |
|
"logits/rejected": -2.334476947784424, |
|
"logps/chosen": -1.0204999446868896, |
|
"logps/rejected": -1.182180404663086, |
|
"loss": 1.5882, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0409998893737793, |
|
"rewards/margins": 0.3233610987663269, |
|
"rewards/rejected": -2.364360809326172, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.4845509413566345, |
|
"grad_norm": 16.458819438737027, |
|
"learning_rate": 5.947130546388376e-08, |
|
"logits/chosen": -2.307170867919922, |
|
"logits/rejected": -2.297262668609619, |
|
"logps/chosen": -1.1198623180389404, |
|
"logps/rejected": -1.2803127765655518, |
|
"loss": 1.6069, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.239724636077881, |
|
"rewards/margins": 0.32090049982070923, |
|
"rewards/rejected": -2.5606255531311035, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4989640572921359, |
|
"grad_norm": 18.315663658527253, |
|
"learning_rate": 5.864566004943983e-08, |
|
"logits/chosen": -2.3090689182281494, |
|
"logits/rejected": -2.299919605255127, |
|
"logps/chosen": -1.1342939138412476, |
|
"logps/rejected": -1.2915699481964111, |
|
"loss": 1.5918, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.268587827682495, |
|
"rewards/margins": 0.3145517408847809, |
|
"rewards/rejected": -2.5831398963928223, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.513377173227637, |
|
"grad_norm": 18.253777248388865, |
|
"learning_rate": 5.78175771602676e-08, |
|
"logits/chosen": -2.3258557319641113, |
|
"logits/rejected": -2.329089641571045, |
|
"logps/chosen": -1.0340855121612549, |
|
"logps/rejected": -1.1988188028335571, |
|
"loss": 1.5903, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0681710243225098, |
|
"rewards/margins": 0.3294665813446045, |
|
"rewards/rejected": -2.3976376056671143, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5277902891631383, |
|
"grad_norm": 20.03722300524917, |
|
"learning_rate": 5.6987290258139073e-08, |
|
"logits/chosen": -2.269885301589966, |
|
"logits/rejected": -2.2610838413238525, |
|
"logps/chosen": -1.0655957460403442, |
|
"logps/rejected": -1.2299748659133911, |
|
"loss": 1.5939, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -2.1311914920806885, |
|
"rewards/margins": 0.3287580609321594, |
|
"rewards/rejected": -2.4599497318267822, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.5422034050986397, |
|
"grad_norm": 19.363745969848598, |
|
"learning_rate": 5.6155033426204615e-08, |
|
"logits/chosen": -2.3013463020324707, |
|
"logits/rejected": -2.30194091796875, |
|
"logps/chosen": -1.1020151376724243, |
|
"logps/rejected": -1.2730225324630737, |
|
"loss": 1.58, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.2040302753448486, |
|
"rewards/margins": 0.34201496839523315, |
|
"rewards/rejected": -2.5460450649261475, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.5566165210341412, |
|
"grad_norm": 21.243971440197193, |
|
"learning_rate": 5.532104130299771e-08, |
|
"logits/chosen": -2.306084632873535, |
|
"logits/rejected": -2.3026065826416016, |
|
"logps/chosen": -1.1136653423309326, |
|
"logps/rejected": -1.253650426864624, |
|
"loss": 1.6339, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -2.2273306846618652, |
|
"rewards/margins": 0.27996987104415894, |
|
"rewards/rejected": -2.507300853729248, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.5710296369696424, |
|
"grad_norm": 18.884950972549078, |
|
"learning_rate": 5.448554901628333e-08, |
|
"logits/chosen": -2.3047351837158203, |
|
"logits/rejected": -2.30297589302063, |
|
"logps/chosen": -1.057666301727295, |
|
"logps/rejected": -1.2256438732147217, |
|
"loss": 1.5844, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.11533260345459, |
|
"rewards/margins": 0.3359553813934326, |
|
"rewards/rejected": -2.4512877464294434, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.5854427529051436, |
|
"grad_norm": 16.26327515212116, |
|
"learning_rate": 5.364879211676816e-08, |
|
"logits/chosen": -2.3229575157165527, |
|
"logits/rejected": -2.322633743286133, |
|
"logps/chosen": -1.0644395351409912, |
|
"logps/rejected": -1.2588599920272827, |
|
"loss": 1.5435, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1288790702819824, |
|
"rewards/margins": 0.3888412117958069, |
|
"rewards/rejected": -2.5177199840545654, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.599855868840645, |
|
"grad_norm": 16.929494402078088, |
|
"learning_rate": 5.281100651169175e-08, |
|
"logits/chosen": -2.3269693851470947, |
|
"logits/rejected": -2.329103946685791, |
|
"logps/chosen": -1.1110026836395264, |
|
"logps/rejected": -1.3049942255020142, |
|
"loss": 1.5754, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.2220053672790527, |
|
"rewards/margins": 0.3879828453063965, |
|
"rewards/rejected": -2.6099884510040283, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.6142689847761464, |
|
"grad_norm": 19.384751167038143, |
|
"learning_rate": 5.197242839831706e-08, |
|
"logits/chosen": -2.2902255058288574, |
|
"logits/rejected": -2.2878143787384033, |
|
"logps/chosen": -1.0505023002624512, |
|
"logps/rejected": -1.2497543096542358, |
|
"loss": 1.5559, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -2.1010046005249023, |
|
"rewards/margins": 0.39850395917892456, |
|
"rewards/rejected": -2.4995086193084717, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.6286821007116477, |
|
"grad_norm": 21.020671773840373, |
|
"learning_rate": 5.1133294197339274e-08, |
|
"logits/chosen": -2.3327059745788574, |
|
"logits/rejected": -2.3221957683563232, |
|
"logps/chosen": -1.0784157514572144, |
|
"logps/rejected": -1.2418811321258545, |
|
"loss": 1.6035, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1568315029144287, |
|
"rewards/margins": 0.3269307017326355, |
|
"rewards/rejected": -2.483762264251709, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.6430952166471489, |
|
"grad_norm": 21.249031332264607, |
|
"learning_rate": 5.029384048623153e-08, |
|
"logits/chosen": -2.2892603874206543, |
|
"logits/rejected": -2.2838594913482666, |
|
"logps/chosen": -1.1238863468170166, |
|
"logps/rejected": -1.2816271781921387, |
|
"loss": 1.5968, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.247772693634033, |
|
"rewards/margins": 0.3154818117618561, |
|
"rewards/rejected": -2.5632543563842773, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.6575083325826503, |
|
"grad_norm": 18.66086972186176, |
|
"learning_rate": 4.9454303932546675e-08, |
|
"logits/chosen": -2.28279447555542, |
|
"logits/rejected": -2.2724807262420654, |
|
"logps/chosen": -1.0907418727874756, |
|
"logps/rejected": -1.2298866510391235, |
|
"loss": 1.6405, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.181483745574951, |
|
"rewards/margins": 0.2782895267009735, |
|
"rewards/rejected": -2.459773302078247, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.6719214485181515, |
|
"grad_norm": 19.50349240348182, |
|
"learning_rate": 4.861492122719338e-08, |
|
"logits/chosen": -2.319563388824463, |
|
"logits/rejected": -2.3177480697631836, |
|
"logps/chosen": -1.0951299667358398, |
|
"logps/rejected": -1.260750651359558, |
|
"loss": 1.6022, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -2.1902599334716797, |
|
"rewards/margins": 0.3312414586544037, |
|
"rewards/rejected": -2.521501302719116, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.6863345644536527, |
|
"grad_norm": 17.58127266536524, |
|
"learning_rate": 4.777592901770575e-08, |
|
"logits/chosen": -2.327413558959961, |
|
"logits/rejected": -2.3294601440429688, |
|
"logps/chosen": -1.0109418630599976, |
|
"logps/rejected": -1.214444637298584, |
|
"loss": 1.5519, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.021883726119995, |
|
"rewards/margins": 0.4070053994655609, |
|
"rewards/rejected": -2.428889274597168, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7007476803891541, |
|
"grad_norm": 16.893442050436466, |
|
"learning_rate": 4.693756384152529e-08, |
|
"logits/chosen": -2.290790557861328, |
|
"logits/rejected": -2.2821555137634277, |
|
"logps/chosen": -1.0620388984680176, |
|
"logps/rejected": -1.2741947174072266, |
|
"loss": 1.5403, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.124077796936035, |
|
"rewards/margins": 0.42431193590164185, |
|
"rewards/rejected": -2.548389434814453, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.7151607963246556, |
|
"grad_norm": 16.76150597577845, |
|
"learning_rate": 4.610006205931365e-08, |
|
"logits/chosen": -2.334803342819214, |
|
"logits/rejected": -2.3295693397521973, |
|
"logps/chosen": -1.1866618394851685, |
|
"logps/rejected": -1.3234022855758667, |
|
"loss": 1.6392, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.373323678970337, |
|
"rewards/margins": 0.2734811305999756, |
|
"rewards/rejected": -2.6468045711517334, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.7295739122601568, |
|
"grad_norm": 24.57055189161366, |
|
"learning_rate": 4.526365978831551e-08, |
|
"logits/chosen": -2.3130276203155518, |
|
"logits/rejected": -2.30517578125, |
|
"logps/chosen": -1.1128777265548706, |
|
"logps/rejected": -1.3150999546051025, |
|
"loss": 1.556, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.225755453109741, |
|
"rewards/margins": 0.40444430708885193, |
|
"rewards/rejected": -2.630199909210205, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.743987028195658, |
|
"grad_norm": 19.26814679538138, |
|
"learning_rate": 4.442859283578981e-08, |
|
"logits/chosen": -2.312147617340088, |
|
"logits/rejected": -2.3039205074310303, |
|
"logps/chosen": -1.0945560932159424, |
|
"logps/rejected": -1.2648680210113525, |
|
"loss": 1.6149, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.1891121864318848, |
|
"rewards/margins": 0.3406239151954651, |
|
"rewards/rejected": -2.529736042022705, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.7584001441311594, |
|
"grad_norm": 18.13222142013933, |
|
"learning_rate": 4.359509663252864e-08, |
|
"logits/chosen": -2.289947986602783, |
|
"logits/rejected": -2.2836596965789795, |
|
"logps/chosen": -1.0912672281265259, |
|
"logps/rejected": -1.261278748512268, |
|
"loss": 1.5891, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.1825344562530518, |
|
"rewards/margins": 0.3400228023529053, |
|
"rewards/rejected": -2.522557497024536, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.7728132600666606, |
|
"grad_norm": 19.057526927248425, |
|
"learning_rate": 4.276340616648198e-08, |
|
"logits/chosen": -2.341885566711426, |
|
"logits/rejected": -2.3356499671936035, |
|
"logps/chosen": -1.10612154006958, |
|
"logps/rejected": -1.2711408138275146, |
|
"loss": 1.6144, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.21224308013916, |
|
"rewards/margins": 0.33003857731819153, |
|
"rewards/rejected": -2.5422816276550293, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.7872263760021618, |
|
"grad_norm": 19.76084929838562, |
|
"learning_rate": 4.193375591650758e-08, |
|
"logits/chosen": -2.3344829082489014, |
|
"logits/rejected": -2.3287951946258545, |
|
"logps/chosen": -1.1671698093414307, |
|
"logps/rejected": -1.3440189361572266, |
|
"loss": 1.6093, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.3343396186828613, |
|
"rewards/margins": 0.353698194026947, |
|
"rewards/rejected": -2.688037872314453, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8016394919376633, |
|
"grad_norm": 19.067146028274564, |
|
"learning_rate": 4.110637978626415e-08, |
|
"logits/chosen": -2.298180341720581, |
|
"logits/rejected": -2.2934188842773438, |
|
"logps/chosen": -1.030287504196167, |
|
"logps/rejected": -1.2465605735778809, |
|
"loss": 1.5146, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.060575008392334, |
|
"rewards/margins": 0.43254607915878296, |
|
"rewards/rejected": -2.4931211471557617, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8160526078731647, |
|
"grad_norm": 18.276378668755576, |
|
"learning_rate": 4.0281511038266867e-08, |
|
"logits/chosen": -2.234718084335327, |
|
"logits/rejected": -2.2318148612976074, |
|
"logps/chosen": -1.0859392881393433, |
|
"logps/rejected": -1.2924591302871704, |
|
"loss": 1.5609, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.1718785762786865, |
|
"rewards/margins": 0.41303977370262146, |
|
"rewards/rejected": -2.584918260574341, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.830465723808666, |
|
"grad_norm": 17.470784593739236, |
|
"learning_rate": 3.9459382228123475e-08, |
|
"logits/chosen": -2.279468059539795, |
|
"logits/rejected": -2.273711919784546, |
|
"logps/chosen": -1.0365493297576904, |
|
"logps/rejected": -1.2447311878204346, |
|
"loss": 1.5556, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.073098659515381, |
|
"rewards/margins": 0.41636401414871216, |
|
"rewards/rejected": -2.489462375640869, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.844878839744167, |
|
"grad_norm": 21.830692496447263, |
|
"learning_rate": 3.864022513896989e-08, |
|
"logits/chosen": -2.2853286266326904, |
|
"logits/rejected": -2.2701587677001953, |
|
"logps/chosen": -1.0575942993164062, |
|
"logps/rejected": -1.2254334688186646, |
|
"loss": 1.6005, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1151885986328125, |
|
"rewards/margins": 0.3356781005859375, |
|
"rewards/rejected": -2.450866937637329, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.8592919556796685, |
|
"grad_norm": 20.0916366903334, |
|
"learning_rate": 3.782427071612339e-08, |
|
"logits/chosen": -2.3116753101348877, |
|
"logits/rejected": -2.306715488433838, |
|
"logps/chosen": -1.1340314149856567, |
|
"logps/rejected": -1.3019399642944336, |
|
"loss": 1.5867, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.2680628299713135, |
|
"rewards/margins": 0.33581703901290894, |
|
"rewards/rejected": -2.603879928588867, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.87370507161517, |
|
"grad_norm": 22.477485924506297, |
|
"learning_rate": 3.7011749001972174e-08, |
|
"logits/chosen": -2.3057870864868164, |
|
"logits/rejected": -2.3034915924072266, |
|
"logps/chosen": -1.053118348121643, |
|
"logps/rejected": -1.2349039316177368, |
|
"loss": 1.5867, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -2.106236696243286, |
|
"rewards/margins": 0.36357131600379944, |
|
"rewards/rejected": -2.4698078632354736, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.888118187550671, |
|
"grad_norm": 20.557013864835106, |
|
"learning_rate": 3.620288907111931e-08, |
|
"logits/chosen": -2.277376651763916, |
|
"logits/rejected": -2.272871255874634, |
|
"logps/chosen": -1.096543312072754, |
|
"logps/rejected": -1.3053501844406128, |
|
"loss": 1.5318, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.193086624145508, |
|
"rewards/margins": 0.4176138937473297, |
|
"rewards/rejected": -2.6107003688812256, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9025313034861724, |
|
"grad_norm": 27.1695631827936, |
|
"learning_rate": 3.539791896579978e-08, |
|
"logits/chosen": -2.317373752593994, |
|
"logits/rejected": -2.318577289581299, |
|
"logps/chosen": -1.2034056186676025, |
|
"logps/rejected": -1.344125747680664, |
|
"loss": 1.6377, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -2.406811237335205, |
|
"rewards/margins": 0.28144046664237976, |
|
"rewards/rejected": -2.688251495361328, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9169444194216738, |
|
"grad_norm": 25.106064057973505, |
|
"learning_rate": 3.459706563158828e-08, |
|
"logits/chosen": -2.279590129852295, |
|
"logits/rejected": -2.281261682510376, |
|
"logps/chosen": -1.1769063472747803, |
|
"logps/rejected": -1.3924826383590698, |
|
"loss": 1.5341, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.3538126945495605, |
|
"rewards/margins": 0.4311525821685791, |
|
"rewards/rejected": -2.7849652767181396, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.931357535357175, |
|
"grad_norm": 20.192509452290462, |
|
"learning_rate": 3.380055485341644e-08, |
|
"logits/chosen": -2.314013957977295, |
|
"logits/rejected": -2.3160252571105957, |
|
"logps/chosen": -1.1351264715194702, |
|
"logps/rejected": -1.3126869201660156, |
|
"loss": 1.5828, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -2.2702529430389404, |
|
"rewards/margins": 0.35512077808380127, |
|
"rewards/rejected": -2.6253738403320312, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.9457706512926762, |
|
"grad_norm": 22.1205875163306, |
|
"learning_rate": 3.300861119191718e-08, |
|
"logits/chosen": -2.2895724773406982, |
|
"logits/rejected": -2.283412456512451, |
|
"logps/chosen": -1.179337739944458, |
|
"logps/rejected": -1.3338556289672852, |
|
"loss": 1.6304, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -2.358675479888916, |
|
"rewards/margins": 0.30903515219688416, |
|
"rewards/rejected": -2.6677112579345703, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.9601837672281777, |
|
"grad_norm": 21.26891098809936, |
|
"learning_rate": 3.2221457920114213e-08, |
|
"logits/chosen": -2.307619094848633, |
|
"logits/rejected": -2.3046841621398926, |
|
"logps/chosen": -1.1182931661605835, |
|
"logps/rejected": -1.3411715030670166, |
|
"loss": 1.5205, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.236586332321167, |
|
"rewards/margins": 0.4457565248012543, |
|
"rewards/rejected": -2.682343006134033, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.974596883163679, |
|
"grad_norm": 23.3986392290044, |
|
"learning_rate": 3.143931696047454e-08, |
|
"logits/chosen": -2.302565813064575, |
|
"logits/rejected": -2.298037528991699, |
|
"logps/chosen": -1.0839837789535522, |
|
"logps/rejected": -1.2788712978363037, |
|
"loss": 1.559, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.1679675579071045, |
|
"rewards/margins": 0.38977518677711487, |
|
"rewards/rejected": -2.5577425956726074, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.9890099990991803, |
|
"grad_norm": 19.075694699589782, |
|
"learning_rate": 3.066240882234186e-08, |
|
"logits/chosen": -2.306809663772583, |
|
"logits/rejected": -2.3090083599090576, |
|
"logps/chosen": -1.150748372077942, |
|
"logps/rejected": -1.330487847328186, |
|
"loss": 1.5692, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.301496744155884, |
|
"rewards/margins": 0.35947883129119873, |
|
"rewards/rejected": -2.660975694656372, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0034231150346815, |
|
"grad_norm": 23.08357458694508, |
|
"learning_rate": 2.989095253976816e-08, |
|
"logits/chosen": -2.2911369800567627, |
|
"logits/rejected": -2.2887818813323975, |
|
"logps/chosen": -1.1655315160751343, |
|
"logps/rejected": -1.3231830596923828, |
|
"loss": 1.6272, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -2.3310630321502686, |
|
"rewards/margins": 0.3153030276298523, |
|
"rewards/rejected": -2.6463661193847656, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.017836230970183, |
|
"grad_norm": 21.786843412845027, |
|
"learning_rate": 2.912516560976146e-08, |
|
"logits/chosen": -2.2617886066436768, |
|
"logits/rejected": -2.261368989944458, |
|
"logps/chosen": -1.116999864578247, |
|
"logps/rejected": -1.3585065603256226, |
|
"loss": 1.5173, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.233999729156494, |
|
"rewards/margins": 0.4830136299133301, |
|
"rewards/rejected": -2.717013120651245, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0322493469056844, |
|
"grad_norm": 19.872912648108493, |
|
"learning_rate": 2.836526393096661e-08, |
|
"logits/chosen": -2.3144338130950928, |
|
"logits/rejected": -2.319342613220215, |
|
"logps/chosen": -1.127329707145691, |
|
"logps/rejected": -1.3289254903793335, |
|
"loss": 1.5402, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -2.254659414291382, |
|
"rewards/margins": 0.40319204330444336, |
|
"rewards/rejected": -2.657850980758667, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.0466624628411854, |
|
"grad_norm": 22.10407026857419, |
|
"learning_rate": 2.7611461742797165e-08, |
|
"logits/chosen": -2.2922112941741943, |
|
"logits/rejected": -2.2878568172454834, |
|
"logps/chosen": -1.0672378540039062, |
|
"logps/rejected": -1.2899413108825684, |
|
"loss": 1.5126, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.1344757080078125, |
|
"rewards/margins": 0.44540706276893616, |
|
"rewards/rejected": -2.5798826217651367, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.061075578776687, |
|
"grad_norm": 26.207768824418398, |
|
"learning_rate": 2.686397156503445e-08, |
|
"logits/chosen": -2.2948415279388428, |
|
"logits/rejected": -2.28835129737854, |
|
"logps/chosen": -1.1063997745513916, |
|
"logps/rejected": -1.3052228689193726, |
|
"loss": 1.5589, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.212799549102783, |
|
"rewards/margins": 0.39764639735221863, |
|
"rewards/rejected": -2.610445737838745, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.075488694712188, |
|
"grad_norm": 18.99932149970658, |
|
"learning_rate": 2.6123004137912084e-08, |
|
"logits/chosen": -2.2723312377929688, |
|
"logits/rejected": -2.276716709136963, |
|
"logps/chosen": -1.0470964908599854, |
|
"logps/rejected": -1.2561558485031128, |
|
"loss": 1.5356, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0941929817199707, |
|
"rewards/margins": 0.4181187152862549, |
|
"rewards/rejected": -2.5123116970062256, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.089901810647689, |
|
"grad_norm": 19.445114453376085, |
|
"learning_rate": 2.5388768362701585e-08, |
|
"logits/chosen": -2.2706756591796875, |
|
"logits/rejected": -2.269131898880005, |
|
"logps/chosen": -1.1902254819869995, |
|
"logps/rejected": -1.351431131362915, |
|
"loss": 1.6073, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.380450963973999, |
|
"rewards/margins": 0.3224112391471863, |
|
"rewards/rejected": -2.70286226272583, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.1043149265831906, |
|
"grad_norm": 22.70265803179129, |
|
"learning_rate": 2.466147124281703e-08, |
|
"logits/chosen": -2.3346049785614014, |
|
"logits/rejected": -2.3269667625427246, |
|
"logps/chosen": -1.1868515014648438, |
|
"logps/rejected": -1.3827440738677979, |
|
"loss": 1.5644, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.3737030029296875, |
|
"rewards/margins": 0.39178499579429626, |
|
"rewards/rejected": -2.7654881477355957, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.118728042518692, |
|
"grad_norm": 25.431369552773468, |
|
"learning_rate": 2.3941317825454278e-08, |
|
"logits/chosen": -2.287153720855713, |
|
"logits/rejected": -2.274724006652832, |
|
"logps/chosen": -1.1501365900039673, |
|
"logps/rejected": -1.3252675533294678, |
|
"loss": 1.599, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.3002731800079346, |
|
"rewards/margins": 0.35026198625564575, |
|
"rewards/rejected": -2.6505351066589355, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.1331411584541935, |
|
"grad_norm": 38.861924452847305, |
|
"learning_rate": 2.322851114378203e-08, |
|
"logits/chosen": -2.2646145820617676, |
|
"logits/rejected": -2.2705867290496826, |
|
"logps/chosen": -1.2125260829925537, |
|
"logps/rejected": -1.4090855121612549, |
|
"loss": 1.5981, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.4250521659851074, |
|
"rewards/margins": 0.39311888813972473, |
|
"rewards/rejected": -2.8181710243225098, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.1475542743896945, |
|
"grad_norm": 20.599427677239603, |
|
"learning_rate": 2.252325215970059e-08, |
|
"logits/chosen": -2.2515275478363037, |
|
"logits/rejected": -2.24314022064209, |
|
"logps/chosen": -1.1347332000732422, |
|
"logps/rejected": -1.3541853427886963, |
|
"loss": 1.5426, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -2.2694664001464844, |
|
"rewards/margins": 0.438904345035553, |
|
"rewards/rejected": -2.7083706855773926, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.161967390325196, |
|
"grad_norm": 20.697243890138434, |
|
"learning_rate": 2.182573970718449e-08, |
|
"logits/chosen": -2.279026746749878, |
|
"logits/rejected": -2.2784788608551025, |
|
"logps/chosen": -1.1145248413085938, |
|
"logps/rejected": -1.3219712972640991, |
|
"loss": 1.5631, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.2290496826171875, |
|
"rewards/margins": 0.4148930013179779, |
|
"rewards/rejected": -2.6439425945281982, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1763805062606973, |
|
"grad_norm": 20.97814093763114, |
|
"learning_rate": 2.113617043622536e-08, |
|
"logits/chosen": -2.2447619438171387, |
|
"logits/rejected": -2.2397830486297607, |
|
"logps/chosen": -1.108572006225586, |
|
"logps/rejected": -1.312126874923706, |
|
"loss": 1.5638, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.217144012451172, |
|
"rewards/margins": 0.4071098268032074, |
|
"rewards/rejected": -2.624253749847412, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.1907936221961988, |
|
"grad_norm": 19.658252029005208, |
|
"learning_rate": 2.045473875739001e-08, |
|
"logits/chosen": -2.286835193634033, |
|
"logits/rejected": -2.284726619720459, |
|
"logps/chosen": -1.1268645524978638, |
|
"logps/rejected": -1.3589181900024414, |
|
"loss": 1.5125, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.2537291049957275, |
|
"rewards/margins": 0.4641071856021881, |
|
"rewards/rejected": -2.717836380004883, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.2052067381316998, |
|
"grad_norm": 19.675863885214547, |
|
"learning_rate": 1.9781636787010503e-08, |
|
"logits/chosen": -2.296203851699829, |
|
"logits/rejected": -2.292480230331421, |
|
"logps/chosen": -1.1581227779388428, |
|
"logps/rejected": -1.3830742835998535, |
|
"loss": 1.5552, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.3162455558776855, |
|
"rewards/margins": 0.4499031603336334, |
|
"rewards/rejected": -2.766148567199707, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.219619854067201, |
|
"grad_norm": 26.028820150112818, |
|
"learning_rate": 1.911705429302038e-08, |
|
"logits/chosen": -2.2454471588134766, |
|
"logits/rejected": -2.2483785152435303, |
|
"logps/chosen": -1.1285746097564697, |
|
"logps/rejected": -1.2919931411743164, |
|
"loss": 1.5857, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2571492195129395, |
|
"rewards/margins": 0.3268371522426605, |
|
"rewards/rejected": -2.583986282348633, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.2340329700027026, |
|
"grad_norm": 23.71926436834239, |
|
"learning_rate": 1.8461178641453617e-08, |
|
"logits/chosen": -2.2616686820983887, |
|
"logits/rejected": -2.2652456760406494, |
|
"logps/chosen": -1.1020487546920776, |
|
"logps/rejected": -1.310429573059082, |
|
"loss": 1.5808, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2040975093841553, |
|
"rewards/margins": 0.41676193475723267, |
|
"rewards/rejected": -2.620859146118164, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.2484460859382036, |
|
"grad_norm": 18.888058220721906, |
|
"learning_rate": 1.781419474362017e-08, |
|
"logits/chosen": -2.2560315132141113, |
|
"logits/rejected": -2.2563912868499756, |
|
"logps/chosen": -1.120178461074829, |
|
"logps/rejected": -1.3521924018859863, |
|
"loss": 1.5308, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -2.240356922149658, |
|
"rewards/margins": 0.46402817964553833, |
|
"rewards/rejected": -2.7043848037719727, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.262859201873705, |
|
"grad_norm": 18.99138375232662, |
|
"learning_rate": 1.7176285003974033e-08, |
|
"logits/chosen": -2.2571425437927246, |
|
"logits/rejected": -2.253202199935913, |
|
"logps/chosen": -1.1062372922897339, |
|
"logps/rejected": -1.313024640083313, |
|
"loss": 1.5606, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2124745845794678, |
|
"rewards/margins": 0.413574755191803, |
|
"rewards/rejected": -2.626049280166626, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.2772723178092065, |
|
"grad_norm": 24.162865311479557, |
|
"learning_rate": 1.6547629268687786e-08, |
|
"logits/chosen": -2.2994749546051025, |
|
"logits/rejected": -2.296318531036377, |
|
"logps/chosen": -1.0731937885284424, |
|
"logps/rejected": -1.3190656900405884, |
|
"loss": 1.5119, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -2.1463875770568848, |
|
"rewards/margins": 0.49174371361732483, |
|
"rewards/rejected": -2.6381313800811768, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.291685433744708, |
|
"grad_norm": 18.432849736683174, |
|
"learning_rate": 1.59284047749485e-08, |
|
"logits/chosen": -2.2636983394622803, |
|
"logits/rejected": -2.2557337284088135, |
|
"logps/chosen": -1.0886359214782715, |
|
"logps/rejected": -1.2910807132720947, |
|
"loss": 1.5641, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -2.177271842956543, |
|
"rewards/margins": 0.4048894941806793, |
|
"rewards/rejected": -2.5821614265441895, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.306098549680209, |
|
"grad_norm": 28.646123727089137, |
|
"learning_rate": 1.5318786100989188e-08, |
|
"logits/chosen": -2.229341506958008, |
|
"logits/rejected": -2.226560115814209, |
|
"logps/chosen": -1.2336177825927734, |
|
"logps/rejected": -1.4262335300445557, |
|
"loss": 1.6148, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.467235565185547, |
|
"rewards/margins": 0.3852314352989197, |
|
"rewards/rejected": -2.8524670600891113, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.3205116656157103, |
|
"grad_norm": 23.756121348250495, |
|
"learning_rate": 1.471894511686988e-08, |
|
"logits/chosen": -2.2284324169158936, |
|
"logits/rejected": -2.2255947589874268, |
|
"logps/chosen": -1.1893842220306396, |
|
"logps/rejected": -1.3409416675567627, |
|
"loss": 1.6367, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.3787684440612793, |
|
"rewards/margins": 0.30311447381973267, |
|
"rewards/rejected": -2.6818833351135254, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.3349247815512117, |
|
"grad_norm": 18.450150129405873, |
|
"learning_rate": 1.4129050936022214e-08, |
|
"logits/chosen": -2.2338924407958984, |
|
"logits/rejected": -2.235215663909912, |
|
"logps/chosen": -1.0769164562225342, |
|
"logps/rejected": -1.2985079288482666, |
|
"loss": 1.5409, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1538329124450684, |
|
"rewards/margins": 0.44318294525146484, |
|
"rewards/rejected": -2.597015857696533, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.3493378974867127, |
|
"grad_norm": 22.180084405255627, |
|
"learning_rate": 1.3549269867571222e-08, |
|
"logits/chosen": -2.2351133823394775, |
|
"logits/rejected": -2.2372500896453857, |
|
"logps/chosen": -1.1330866813659668, |
|
"logps/rejected": -1.2997318506240845, |
|
"loss": 1.6214, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.2661733627319336, |
|
"rewards/margins": 0.33329010009765625, |
|
"rewards/rejected": -2.599463701248169, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.363751013422214, |
|
"grad_norm": 23.08714654459471, |
|
"learning_rate": 1.2979765369447742e-08, |
|
"logits/chosen": -2.304003953933716, |
|
"logits/rejected": -2.2949726581573486, |
|
"logps/chosen": -1.1455012559890747, |
|
"logps/rejected": -1.3875641822814941, |
|
"loss": 1.5371, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -2.2910025119781494, |
|
"rewards/margins": 0.48412585258483887, |
|
"rewards/rejected": -2.7751283645629883, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.3781641293577156, |
|
"grad_norm": 30.56182243031503, |
|
"learning_rate": 1.2420698002304608e-08, |
|
"logits/chosen": -2.2411041259765625, |
|
"logits/rejected": -2.2343127727508545, |
|
"logps/chosen": -1.0859107971191406, |
|
"logps/rejected": -1.3196165561676025, |
|
"loss": 1.5388, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.1718215942382812, |
|
"rewards/margins": 0.4674110412597656, |
|
"rewards/rejected": -2.639233112335205, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.392577245293217, |
|
"grad_norm": 19.77198047003492, |
|
"learning_rate": 1.1872225384249768e-08, |
|
"logits/chosen": -2.268101215362549, |
|
"logits/rejected": -2.2637829780578613, |
|
"logps/chosen": -1.1163004636764526, |
|
"logps/rejected": -1.3505176305770874, |
|
"loss": 1.5169, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2326009273529053, |
|
"rewards/margins": 0.46843448281288147, |
|
"rewards/rejected": -2.701035261154175, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.406990361228718, |
|
"grad_norm": 26.906205506300168, |
|
"learning_rate": 1.1334502146408881e-08, |
|
"logits/chosen": -2.2429723739624023, |
|
"logits/rejected": -2.249293804168701, |
|
"logps/chosen": -1.1734583377838135, |
|
"logps/rejected": -1.3377552032470703, |
|
"loss": 1.6096, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.346916675567627, |
|
"rewards/margins": 0.3285936415195465, |
|
"rewards/rejected": -2.6755104064941406, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.4214034771642194, |
|
"grad_norm": 21.73816659360824, |
|
"learning_rate": 1.0807679889330163e-08, |
|
"logits/chosen": -2.314985990524292, |
|
"logits/rejected": -2.320690870285034, |
|
"logps/chosen": -1.17433762550354, |
|
"logps/rejected": -1.3522727489471436, |
|
"loss": 1.5944, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.34867525100708, |
|
"rewards/margins": 0.35587045550346375, |
|
"rewards/rejected": -2.704545497894287, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.435816593099721, |
|
"grad_norm": 17.239308701432627, |
|
"learning_rate": 1.0291907140243538e-08, |
|
"logits/chosen": -2.2565197944641113, |
|
"logits/rejected": -2.255737781524658, |
|
"logps/chosen": -1.1245791912078857, |
|
"logps/rejected": -1.4125820398330688, |
|
"loss": 1.4673, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -2.2491583824157715, |
|
"rewards/margins": 0.5760055184364319, |
|
"rewards/rejected": -2.8251640796661377, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.450229709035222, |
|
"grad_norm": 21.369654725894584, |
|
"learning_rate": 9.787329311186249e-09, |
|
"logits/chosen": -2.252303123474121, |
|
"logits/rejected": -2.251774787902832, |
|
"logps/chosen": -1.1287000179290771, |
|
"logps/rejected": -1.3461166620254517, |
|
"loss": 1.5545, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.2574000358581543, |
|
"rewards/margins": 0.43483343720436096, |
|
"rewards/rejected": -2.6922333240509033, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4646428249707233, |
|
"grad_norm": 25.680472794698755, |
|
"learning_rate": 9.294088658006916e-09, |
|
"logits/chosen": -2.2721753120422363, |
|
"logits/rejected": -2.2618608474731445, |
|
"logps/chosen": -1.1408545970916748, |
|
"logps/rejected": -1.366431474685669, |
|
"loss": 1.5555, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.2817091941833496, |
|
"rewards/margins": 0.45115384459495544, |
|
"rewards/rejected": -2.732862949371338, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.4790559409062247, |
|
"grad_norm": 20.80236487452411, |
|
"learning_rate": 8.812324240259094e-09, |
|
"logits/chosen": -2.2599918842315674, |
|
"logits/rejected": -2.2533061504364014, |
|
"logps/chosen": -1.1435985565185547, |
|
"logps/rejected": -1.3751742839813232, |
|
"loss": 1.5389, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.2871971130371094, |
|
"rewards/margins": 0.4631514549255371, |
|
"rewards/rejected": -2.7503485679626465, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.493469056841726, |
|
"grad_norm": 25.297955693939965, |
|
"learning_rate": 8.342171881996351e-09, |
|
"logits/chosen": -2.269395112991333, |
|
"logits/rejected": -2.267338514328003, |
|
"logps/chosen": -1.1785120964050293, |
|
"logps/rejected": -1.3562462329864502, |
|
"loss": 1.6033, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.3570241928100586, |
|
"rewards/margins": 0.355468213558197, |
|
"rewards/rejected": -2.7124924659729004, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.507882172777227, |
|
"grad_norm": 24.06865322162579, |
|
"learning_rate": 7.883764133479137e-09, |
|
"logits/chosen": -2.260371685028076, |
|
"logits/rejected": -2.2534215450286865, |
|
"logps/chosen": -1.130081295967102, |
|
"logps/rejected": -1.3861533403396606, |
|
"loss": 1.4917, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -2.260162591934204, |
|
"rewards/margins": 0.5121440887451172, |
|
"rewards/rejected": -2.7723066806793213, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.5222952887127286, |
|
"grad_norm": 29.75935812876475, |
|
"learning_rate": 7.43723023380502e-09, |
|
"logits/chosen": -2.2067666053771973, |
|
"logits/rejected": -2.208773136138916, |
|
"logps/chosen": -1.1877186298370361, |
|
"logps/rejected": -1.4029791355133057, |
|
"loss": 1.5553, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.3754372596740723, |
|
"rewards/margins": 0.430520623922348, |
|
"rewards/rejected": -2.8059582710266113, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.53670840464823, |
|
"grad_norm": 24.2432673255774, |
|
"learning_rate": 7.002696074472075e-09, |
|
"logits/chosen": -2.2512130737304688, |
|
"logits/rejected": -2.2531332969665527, |
|
"logps/chosen": -1.2248094081878662, |
|
"logps/rejected": -1.4335352182388306, |
|
"loss": 1.5688, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.4496188163757324, |
|
"rewards/margins": 0.4174516797065735, |
|
"rewards/rejected": -2.867070436477661, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.551121520583731, |
|
"grad_norm": 32.01658470543389, |
|
"learning_rate": 6.580284163886369e-09, |
|
"logits/chosen": -2.2607645988464355, |
|
"logits/rejected": -2.2610065937042236, |
|
"logps/chosen": -1.1927731037139893, |
|
"logps/rejected": -1.3909296989440918, |
|
"loss": 1.5668, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.3855462074279785, |
|
"rewards/margins": 0.39631372690200806, |
|
"rewards/rejected": -2.7818593978881836, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.5655346365192324, |
|
"grad_norm": 24.419915253157857, |
|
"learning_rate": 6.1701135928230566e-09, |
|
"logits/chosen": -2.217277765274048, |
|
"logits/rejected": -2.209423303604126, |
|
"logps/chosen": -1.2151906490325928, |
|
"logps/rejected": -1.427695870399475, |
|
"loss": 1.5543, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -2.4303812980651855, |
|
"rewards/margins": 0.4250105321407318, |
|
"rewards/rejected": -2.85539174079895, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.579947752454734, |
|
"grad_norm": 22.58314758967658, |
|
"learning_rate": 5.7723000008510655e-09, |
|
"logits/chosen": -2.2694671154022217, |
|
"logits/rejected": -2.2696220874786377, |
|
"logps/chosen": -1.168027639389038, |
|
"logps/rejected": -1.3549962043762207, |
|
"loss": 1.5926, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.336055278778076, |
|
"rewards/margins": 0.3739371597766876, |
|
"rewards/rejected": -2.7099924087524414, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.5943608683902353, |
|
"grad_norm": 22.781044887360306, |
|
"learning_rate": 5.386955543730798e-09, |
|
"logits/chosen": -2.277388334274292, |
|
"logits/rejected": -2.2686190605163574, |
|
"logps/chosen": -1.2046597003936768, |
|
"logps/rejected": -1.4461263418197632, |
|
"loss": 1.5418, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.4093194007873535, |
|
"rewards/margins": 0.4829334318637848, |
|
"rewards/rejected": -2.8922526836395264, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6087739843257363, |
|
"grad_norm": 22.99449695923957, |
|
"learning_rate": 5.014188861794e-09, |
|
"logits/chosen": -2.2212021350860596, |
|
"logits/rejected": -2.2196457386016846, |
|
"logps/chosen": -1.1851980686187744, |
|
"logps/rejected": -1.4349489212036133, |
|
"loss": 1.5106, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.370396137237549, |
|
"rewards/margins": 0.4995017945766449, |
|
"rewards/rejected": -2.8698978424072266, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.6231871002612377, |
|
"grad_norm": 21.158930640881984, |
|
"learning_rate": 4.654105049314744e-09, |
|
"logits/chosen": -2.2831361293792725, |
|
"logits/rejected": -2.2893922328948975, |
|
"logps/chosen": -1.1905128955841064, |
|
"logps/rejected": -1.392458200454712, |
|
"loss": 1.5859, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.381025791168213, |
|
"rewards/margins": 0.40389055013656616, |
|
"rewards/rejected": -2.784916400909424, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.637600216196739, |
|
"grad_norm": 24.606900180349317, |
|
"learning_rate": 4.3068056248801496e-09, |
|
"logits/chosen": -2.260871410369873, |
|
"logits/rejected": -2.2557454109191895, |
|
"logps/chosen": -1.1808732748031616, |
|
"logps/rejected": -1.4025046825408936, |
|
"loss": 1.5385, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.3617465496063232, |
|
"rewards/margins": 0.44326257705688477, |
|
"rewards/rejected": -2.805009365081787, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.65201333213224, |
|
"grad_norm": 21.13626030836664, |
|
"learning_rate": 3.972388502769225e-09, |
|
"logits/chosen": -2.298476457595825, |
|
"logits/rejected": -2.2920804023742676, |
|
"logps/chosen": -1.2038078308105469, |
|
"logps/rejected": -1.3969953060150146, |
|
"loss": 1.5752, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.4076156616210938, |
|
"rewards/margins": 0.38637492060661316, |
|
"rewards/rejected": -2.7939906120300293, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.6664264480677415, |
|
"grad_norm": 23.476816797872775, |
|
"learning_rate": 3.650947965347817e-09, |
|
"logits/chosen": -2.2797365188598633, |
|
"logits/rejected": -2.2747490406036377, |
|
"logps/chosen": -1.1963701248168945, |
|
"logps/rejected": -1.4419893026351929, |
|
"loss": 1.4998, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.392740249633789, |
|
"rewards/margins": 0.4912383556365967, |
|
"rewards/rejected": -2.8839786052703857, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.680839564003243, |
|
"grad_norm": 28.256187183267656, |
|
"learning_rate": 3.342574636487583e-09, |
|
"logits/chosen": -2.3183302879333496, |
|
"logits/rejected": -2.3189597129821777, |
|
"logps/chosen": -1.2193528413772583, |
|
"logps/rejected": -1.4082263708114624, |
|
"loss": 1.5796, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.4387056827545166, |
|
"rewards/margins": 0.37774714827537537, |
|
"rewards/rejected": -2.816452741622925, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.6952526799387444, |
|
"grad_norm": 19.684709175702448, |
|
"learning_rate": 3.0473554560163207e-09, |
|
"logits/chosen": -2.254714012145996, |
|
"logits/rejected": -2.2444214820861816, |
|
"logps/chosen": -1.1542867422103882, |
|
"logps/rejected": -1.377029538154602, |
|
"loss": 1.5415, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.3085734844207764, |
|
"rewards/margins": 0.44548529386520386, |
|
"rewards/rejected": -2.754059076309204, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.709665795874246, |
|
"grad_norm": 24.08431048004274, |
|
"learning_rate": 2.7653736552070207e-09, |
|
"logits/chosen": -2.2782135009765625, |
|
"logits/rejected": -2.276923179626465, |
|
"logps/chosen": -1.2209516763687134, |
|
"logps/rejected": -1.4482202529907227, |
|
"loss": 1.538, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -2.4419033527374268, |
|
"rewards/margins": 0.45453739166259766, |
|
"rewards/rejected": -2.8964405059814453, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.724078911809747, |
|
"grad_norm": 17.160087536859805, |
|
"learning_rate": 2.496708733312419e-09, |
|
"logits/chosen": -2.250776767730713, |
|
"logits/rejected": -2.253812551498413, |
|
"logps/chosen": -1.1638703346252441, |
|
"logps/rejected": -1.3798881769180298, |
|
"loss": 1.5446, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -2.3277406692504883, |
|
"rewards/margins": 0.43203577399253845, |
|
"rewards/rejected": -2.7597763538360596, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.7384920277452482, |
|
"grad_norm": 18.244927534685523, |
|
"learning_rate": 2.241436435151717e-09, |
|
"logits/chosen": -2.2549357414245605, |
|
"logits/rejected": -2.247612714767456, |
|
"logps/chosen": -1.1582852602005005, |
|
"logps/rejected": -1.3766599893569946, |
|
"loss": 1.5527, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -2.316570520401001, |
|
"rewards/margins": 0.4367493987083435, |
|
"rewards/rejected": -2.7533199787139893, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.7529051436807492, |
|
"grad_norm": 16.475242116483138, |
|
"learning_rate": 1.9996287297558866e-09, |
|
"logits/chosen": -2.241720199584961, |
|
"logits/rejected": -2.246184825897217, |
|
"logps/chosen": -1.1753349304199219, |
|
"logps/rejected": -1.398506760597229, |
|
"loss": 1.5477, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.3506698608398438, |
|
"rewards/margins": 0.44634366035461426, |
|
"rewards/rejected": -2.797013521194458, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.7673182596162507, |
|
"grad_norm": 18.756801068057744, |
|
"learning_rate": 1.7713537900772957e-09, |
|
"logits/chosen": -2.2873311042785645, |
|
"logits/rejected": -2.285597562789917, |
|
"logps/chosen": -1.2065943479537964, |
|
"logps/rejected": -1.3886728286743164, |
|
"loss": 1.587, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.4131886959075928, |
|
"rewards/margins": 0.3641572594642639, |
|
"rewards/rejected": -2.777345657348633, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.781731375551752, |
|
"grad_norm": 18.75587536733683, |
|
"learning_rate": 1.5566759737697998e-09, |
|
"logits/chosen": -2.252821922302246, |
|
"logits/rejected": -2.252249240875244, |
|
"logps/chosen": -1.1472349166870117, |
|
"logps/rejected": -1.3485777378082275, |
|
"loss": 1.5582, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.2944698333740234, |
|
"rewards/margins": 0.40268588066101074, |
|
"rewards/rejected": -2.697155475616455, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.7961444914872535, |
|
"grad_norm": 18.83657032008189, |
|
"learning_rate": 1.3556558050442425e-09, |
|
"logits/chosen": -2.27396821975708, |
|
"logits/rejected": -2.266453504562378, |
|
"logps/chosen": -1.153480052947998, |
|
"logps/rejected": -1.3870432376861572, |
|
"loss": 1.5257, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -2.306960105895996, |
|
"rewards/margins": 0.46712619066238403, |
|
"rewards/rejected": -2.7740864753723145, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.810557607422755, |
|
"grad_norm": 19.634521530754597, |
|
"learning_rate": 1.1683499576049583e-09, |
|
"logits/chosen": -2.2552907466888428, |
|
"logits/rejected": -2.255131959915161, |
|
"logps/chosen": -1.1509824991226196, |
|
"logps/rejected": -1.3625354766845703, |
|
"loss": 1.543, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -2.3019649982452393, |
|
"rewards/margins": 0.4231061041355133, |
|
"rewards/rejected": -2.7250709533691406, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.824970723358256, |
|
"grad_norm": 22.498945774440706, |
|
"learning_rate": 9.948112386716167e-10, |
|
"logits/chosen": -2.2837812900543213, |
|
"logits/rejected": -2.2745299339294434, |
|
"logps/chosen": -1.220226764678955, |
|
"logps/rejected": -1.4314284324645996, |
|
"loss": 1.569, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -2.44045352935791, |
|
"rewards/margins": 0.4224032759666443, |
|
"rewards/rejected": -2.862856864929199, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.8393838392937574, |
|
"grad_norm": 24.02219360016628, |
|
"learning_rate": 8.350885740913416e-10, |
|
"logits/chosen": -2.224419116973877, |
|
"logits/rejected": -2.2149837017059326, |
|
"logps/chosen": -1.1606011390686035, |
|
"logps/rejected": -1.3387001752853394, |
|
"loss": 1.6133, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -2.321202278137207, |
|
"rewards/margins": 0.3561980724334717, |
|
"rewards/rejected": -2.6774003505706787, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.8537969552292584, |
|
"grad_norm": 19.84204643186706, |
|
"learning_rate": 6.89226994544978e-10, |
|
"logits/chosen": -2.223024845123291, |
|
"logits/rejected": -2.2192695140838623, |
|
"logps/chosen": -1.1890778541564941, |
|
"logps/rejected": -1.3559348583221436, |
|
"loss": 1.6171, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.3781557083129883, |
|
"rewards/margins": 0.33371374011039734, |
|
"rewards/rejected": -2.711869716644287, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.86821007116476, |
|
"grad_norm": 20.65249363397335, |
|
"learning_rate": 5.572676228516038e-10, |
|
"logits/chosen": -2.255366802215576, |
|
"logits/rejected": -2.2476673126220703, |
|
"logps/chosen": -1.1339585781097412, |
|
"logps/rejected": -1.3980591297149658, |
|
"loss": 1.4961, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -2.2679171562194824, |
|
"rewards/margins": 0.5282012224197388, |
|
"rewards/rejected": -2.7961182594299316, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.882623187100261, |
|
"grad_norm": 21.582370970938786, |
|
"learning_rate": 4.3924766237473656e-10, |
|
"logits/chosen": -2.2555174827575684, |
|
"logits/rejected": -2.247621536254883, |
|
"logps/chosen": -1.1424418687820435, |
|
"logps/rejected": -1.3766818046569824, |
|
"loss": 1.531, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -2.284883737564087, |
|
"rewards/margins": 0.4684801697731018, |
|
"rewards/rejected": -2.753363609313965, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8970363030357626, |
|
"grad_norm": 25.67561175147071, |
|
"learning_rate": 3.35200386533574e-10, |
|
"logits/chosen": -2.2250311374664307, |
|
"logits/rejected": -2.2280611991882324, |
|
"logps/chosen": -1.181894063949585, |
|
"logps/rejected": -1.3828670978546143, |
|
"loss": 1.565, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -2.36378812789917, |
|
"rewards/margins": 0.40194636583328247, |
|
"rewards/rejected": -2.7657341957092285, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.911449418971264, |
|
"grad_norm": 21.660548030642744, |
|
"learning_rate": 2.4515512942220874e-10, |
|
"logits/chosen": -2.27579927444458, |
|
"logits/rejected": -2.2686378955841064, |
|
"logps/chosen": -1.2043073177337646, |
|
"logps/rejected": -1.3992283344268799, |
|
"loss": 1.5841, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -2.4086146354675293, |
|
"rewards/margins": 0.38984209299087524, |
|
"rewards/rejected": -2.7984566688537598, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.925862534906765, |
|
"grad_norm": 24.50163023857697, |
|
"learning_rate": 1.691372775394717e-10, |
|
"logits/chosen": -2.2493457794189453, |
|
"logits/rejected": -2.251462936401367, |
|
"logps/chosen": -1.2009613513946533, |
|
"logps/rejected": -1.3668345212936401, |
|
"loss": 1.6163, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.4019227027893066, |
|
"rewards/margins": 0.33174630999565125, |
|
"rewards/rejected": -2.7336690425872803, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.9402756508422665, |
|
"grad_norm": 23.84173235916362, |
|
"learning_rate": 1.0716826263165724e-10, |
|
"logits/chosen": -2.291029691696167, |
|
"logits/rejected": -2.289228916168213, |
|
"logps/chosen": -1.17218816280365, |
|
"logps/rejected": -1.440246343612671, |
|
"loss": 1.4873, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.3443763256073, |
|
"rewards/margins": 0.5361161828041077, |
|
"rewards/rejected": -2.880492687225342, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.954688766777768, |
|
"grad_norm": 21.3140792744408, |
|
"learning_rate": 5.926555565031743e-11, |
|
"logits/chosen": -2.2876641750335693, |
|
"logits/rejected": -2.289773464202881, |
|
"logps/chosen": -1.216587781906128, |
|
"logps/rejected": -1.4193012714385986, |
|
"loss": 1.5845, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.433175563812256, |
|
"rewards/margins": 0.40542715787887573, |
|
"rewards/rejected": -2.8386025428771973, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.969101882713269, |
|
"grad_norm": 22.106407972159015, |
|
"learning_rate": 2.544266182662458e-11, |
|
"logits/chosen": -2.2547993659973145, |
|
"logits/rejected": -2.2469217777252197, |
|
"logps/chosen": -1.1249706745147705, |
|
"logps/rejected": -1.3703702688217163, |
|
"loss": 1.515, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -2.249941349029541, |
|
"rewards/margins": 0.49079880118370056, |
|
"rewards/rejected": -2.7407405376434326, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.9835149986487703, |
|
"grad_norm": 22.35894660462506, |
|
"learning_rate": 5.709116863872321e-12, |
|
"logits/chosen": -2.2706878185272217, |
|
"logits/rejected": -2.2676730155944824, |
|
"logps/chosen": -1.1365437507629395, |
|
"logps/rejected": -1.3011773824691772, |
|
"loss": 1.6093, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -2.273087501525879, |
|
"rewards/margins": 0.3292676508426666, |
|
"rewards/rejected": -2.6023547649383545, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.9964868029907215, |
|
"step": 2079, |
|
"total_flos": 0.0, |
|
"train_loss": 1.6015657603367983, |
|
"train_runtime": 23310.5572, |
|
"train_samples_per_second": 2.857, |
|
"train_steps_per_second": 0.089 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2079, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|