|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 352, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028409090909090908, |
|
"grad_norm": 31.782667280345194, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"logits/chosen": -2.8591694831848145, |
|
"logits/rejected": -2.6428685188293457, |
|
"logps/chosen": -390.5384216308594, |
|
"logps/rejected": -607.8155517578125, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0022137626074254513, |
|
"rewards/margins": 0.013292843475937843, |
|
"rewards/rejected": -0.011079080402851105, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.056818181818181816, |
|
"grad_norm": 18.732725603737485, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -2.8422160148620605, |
|
"logits/rejected": -2.694746494293213, |
|
"logps/chosen": -328.73382568359375, |
|
"logps/rejected": -775.1841430664062, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.08411312848329544, |
|
"rewards/margins": 0.6615578532218933, |
|
"rewards/rejected": -0.5774446725845337, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08522727272727272, |
|
"grad_norm": 5.623138797985183, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.8572211265563965, |
|
"logits/rejected": -2.6727840900421143, |
|
"logps/chosen": -296.5441589355469, |
|
"logps/rejected": -1129.43017578125, |
|
"loss": 0.2375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3573785722255707, |
|
"rewards/margins": 4.872136116027832, |
|
"rewards/rejected": -4.5147576332092285, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 1.8250836888812216, |
|
"learning_rate": 4.998023493068254e-07, |
|
"logits/chosen": -2.813652753829956, |
|
"logits/rejected": -2.606616258621216, |
|
"logps/chosen": -323.4709167480469, |
|
"logps/rejected": -2253.32470703125, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23284561932086945, |
|
"rewards/margins": 15.6871337890625, |
|
"rewards/rejected": -15.4542875289917, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14204545454545456, |
|
"grad_norm": 1.2011523237420525, |
|
"learning_rate": 4.975823666181255e-07, |
|
"logits/chosen": -2.769263744354248, |
|
"logits/rejected": -2.4520652294158936, |
|
"logps/chosen": -483.3968200683594, |
|
"logps/rejected": -4912.21728515625, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2076160907745361, |
|
"rewards/margins": 41.37514877319336, |
|
"rewards/rejected": -42.582763671875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14204545454545456, |
|
"eval_logits/chosen": -2.8844106197357178, |
|
"eval_logits/rejected": -2.4010605812072754, |
|
"eval_logps/chosen": -508.0701904296875, |
|
"eval_logps/rejected": -5776.9990234375, |
|
"eval_loss": 0.005187372677028179, |
|
"eval_rewards/accuracies": 0.9959677457809448, |
|
"eval_rewards/chosen": -1.4108844995498657, |
|
"eval_rewards/margins": 49.95121765136719, |
|
"eval_rewards/rejected": -51.36210250854492, |
|
"eval_runtime": 197.1461, |
|
"eval_samples_per_second": 19.808, |
|
"eval_steps_per_second": 0.314, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17045454545454544, |
|
"grad_norm": 0.511680193897244, |
|
"learning_rate": 4.929173350101024e-07, |
|
"logits/chosen": -2.8946661949157715, |
|
"logits/rejected": -2.239091396331787, |
|
"logps/chosen": -522.5350341796875, |
|
"logps/rejected": -6444.09033203125, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.6264005899429321, |
|
"rewards/margins": 56.385963439941406, |
|
"rewards/rejected": -58.012359619140625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19886363636363635, |
|
"grad_norm": 0.2533965055751647, |
|
"learning_rate": 4.858533249305336e-07, |
|
"logits/chosen": -2.7772889137268066, |
|
"logits/rejected": -1.765015959739685, |
|
"logps/chosen": -555.8414306640625, |
|
"logps/rejected": -6928.0419921875, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7668079137802124, |
|
"rewards/margins": 61.386253356933594, |
|
"rewards/rejected": -63.1530647277832, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 5.78873598153483, |
|
"learning_rate": 4.764600984163808e-07, |
|
"logits/chosen": -2.8613221645355225, |
|
"logits/rejected": -1.6336866617202759, |
|
"logps/chosen": -575.8856201171875, |
|
"logps/rejected": -8230.505859375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6956470012664795, |
|
"rewards/margins": 73.87208557128906, |
|
"rewards/rejected": -75.56773376464844, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2556818181818182, |
|
"grad_norm": 0.05441796989473776, |
|
"learning_rate": 4.6483042014491527e-07, |
|
"logits/chosen": -2.866097927093506, |
|
"logits/rejected": -1.7091907262802124, |
|
"logps/chosen": -556.7591552734375, |
|
"logps/rejected": -7405.0654296875, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.946458101272583, |
|
"rewards/margins": 65.7303466796875, |
|
"rewards/rejected": -67.67679595947266, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"grad_norm": 1.0199863397394129, |
|
"learning_rate": 4.510791413176912e-07, |
|
"logits/chosen": -2.6974339485168457, |
|
"logits/rejected": -0.42245426774024963, |
|
"logps/chosen": -575.6227416992188, |
|
"logps/rejected": -9247.611328125, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0615272521972656, |
|
"rewards/margins": 83.35676574707031, |
|
"rewards/rejected": -85.41829681396484, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"eval_logits/chosen": -2.4172048568725586, |
|
"eval_logits/rejected": 0.017224134877324104, |
|
"eval_logps/chosen": -573.0657348632812, |
|
"eval_logps/rejected": -8932.8349609375, |
|
"eval_loss": 0.001179259386844933, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -2.060839891433716, |
|
"eval_rewards/margins": 80.85962677001953, |
|
"eval_rewards/rejected": -82.92045593261719, |
|
"eval_runtime": 196.8335, |
|
"eval_samples_per_second": 19.839, |
|
"eval_steps_per_second": 0.315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.0720935751683844, |
|
"learning_rate": 4.353420654246546e-07, |
|
"logits/chosen": -2.386807441711426, |
|
"logits/rejected": -0.0012192248832434416, |
|
"logps/chosen": -584.6697998046875, |
|
"logps/rejected": -7824.6484375, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0924859046936035, |
|
"rewards/margins": 70.16132354736328, |
|
"rewards/rejected": -72.2538070678711, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3409090909090909, |
|
"grad_norm": 0.13283878655315423, |
|
"learning_rate": 4.177746070897592e-07, |
|
"logits/chosen": -2.329523801803589, |
|
"logits/rejected": 0.7127262949943542, |
|
"logps/chosen": -563.5079956054688, |
|
"logps/rejected": -7580.98974609375, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8199115991592407, |
|
"rewards/margins": 67.61772155761719, |
|
"rewards/rejected": -69.43761444091797, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3693181818181818, |
|
"grad_norm": 0.24292729812234612, |
|
"learning_rate": 3.9855025724292763e-07, |
|
"logits/chosen": -2.526569366455078, |
|
"logits/rejected": 1.3170499801635742, |
|
"logps/chosen": -577.057861328125, |
|
"logps/rejected": -8586.48046875, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0660626888275146, |
|
"rewards/margins": 77.39335632324219, |
|
"rewards/rejected": -79.45941925048828, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3977272727272727, |
|
"grad_norm": 0.9183173633670716, |
|
"learning_rate": 3.7785886977585555e-07, |
|
"logits/chosen": -2.464595317840576, |
|
"logits/rejected": 1.4278135299682617, |
|
"logps/chosen": -540.9710083007812, |
|
"logps/rejected": -10038.2763671875, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.749346375465393, |
|
"rewards/margins": 92.21351623535156, |
|
"rewards/rejected": -93.96287536621094, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42613636363636365, |
|
"grad_norm": 0.6993679548084264, |
|
"learning_rate": 3.5590478660213206e-07, |
|
"logits/chosen": -2.191847562789917, |
|
"logits/rejected": 1.6170787811279297, |
|
"logps/chosen": -609.4722900390625, |
|
"logps/rejected": -9242.5185546875, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.3140909671783447, |
|
"rewards/margins": 82.9913101196289, |
|
"rewards/rejected": -85.30540466308594, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42613636363636365, |
|
"eval_logits/chosen": -2.19913911819458, |
|
"eval_logits/rejected": 1.8917714357376099, |
|
"eval_logps/chosen": -571.1802368164062, |
|
"eval_logps/rejected": -8522.7255859375, |
|
"eval_loss": 0.0007830065442249179, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -2.041984796524048, |
|
"eval_rewards/margins": 76.77738952636719, |
|
"eval_rewards/rejected": -78.81936645507812, |
|
"eval_runtime": 195.9934, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 0.316, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 1.8645892720332131, |
|
"learning_rate": 3.3290481963801696e-07, |
|
"logits/chosen": -2.4417898654937744, |
|
"logits/rejected": 1.9294321537017822, |
|
"logps/chosen": -544.4041748046875, |
|
"logps/rejected": -8048.359375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7426624298095703, |
|
"rewards/margins": 72.57603454589844, |
|
"rewards/rejected": -74.31868743896484, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48295454545454547, |
|
"grad_norm": 0.026359625456043548, |
|
"learning_rate": 3.0908610963322626e-07, |
|
"logits/chosen": -2.6247737407684326, |
|
"logits/rejected": 1.3285863399505615, |
|
"logps/chosen": -582.7529907226562, |
|
"logps/rejected": -8232.6865234375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9513471126556396, |
|
"rewards/margins": 73.0924072265625, |
|
"rewards/rejected": -75.04375457763672, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5113636363636364, |
|
"grad_norm": 0.023938485830486744, |
|
"learning_rate": 2.846838829972671e-07, |
|
"logits/chosen": -2.2369513511657715, |
|
"logits/rejected": 2.0209977626800537, |
|
"logps/chosen": -574.5399780273438, |
|
"logps/rejected": -7650.44775390625, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.136356830596924, |
|
"rewards/margins": 68.23828887939453, |
|
"rewards/rejected": -70.37464904785156, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5397727272727273, |
|
"grad_norm": 0.22173170076093188, |
|
"learning_rate": 2.5993912877423147e-07, |
|
"logits/chosen": -1.6255722045898438, |
|
"logits/rejected": 1.8459784984588623, |
|
"logps/chosen": -532.0623168945312, |
|
"logps/rejected": -8512.8662109375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.864465355873108, |
|
"rewards/margins": 76.34809875488281, |
|
"rewards/rejected": -78.21255493164062, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 0.7897968463196222, |
|
"learning_rate": 2.3509621870754504e-07, |
|
"logits/chosen": -1.029783010482788, |
|
"logits/rejected": 2.887779474258423, |
|
"logps/chosen": -551.5975952148438, |
|
"logps/rejected": -8021.84912109375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8646786212921143, |
|
"rewards/margins": 72.0453872680664, |
|
"rewards/rejected": -73.91007232666016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"eval_logits/chosen": -0.8412286043167114, |
|
"eval_logits/rejected": 2.5732452869415283, |
|
"eval_logps/chosen": -564.5499877929688, |
|
"eval_logps/rejected": -8622.9443359375, |
|
"eval_loss": 0.0007432692218571901, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.9756826162338257, |
|
"eval_rewards/margins": 77.84588623046875, |
|
"eval_rewards/rejected": -79.8215560913086, |
|
"eval_runtime": 196.9649, |
|
"eval_samples_per_second": 19.826, |
|
"eval_steps_per_second": 0.315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5965909090909091, |
|
"grad_norm": 0.4603520409149027, |
|
"learning_rate": 2.1040049389819624e-07, |
|
"logits/chosen": -0.8681282997131348, |
|
"logits/rejected": 2.764690399169922, |
|
"logps/chosen": -578.1253662109375, |
|
"logps/rejected": -8362.7060546875, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8748207092285156, |
|
"rewards/margins": 75.43318939208984, |
|
"rewards/rejected": -77.30801391601562, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 2.35697501687302, |
|
"learning_rate": 1.8609584188988133e-07, |
|
"logits/chosen": -1.0785493850708008, |
|
"logits/rejected": 2.1844732761383057, |
|
"logps/chosen": -606.7586059570312, |
|
"logps/rejected": -7175.0869140625, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4215800762176514, |
|
"rewards/margins": 62.83292770385742, |
|
"rewards/rejected": -65.25450897216797, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6534090909090909, |
|
"grad_norm": 22.84103924366115, |
|
"learning_rate": 1.624222881090439e-07, |
|
"logits/chosen": -1.365192174911499, |
|
"logits/rejected": 1.863221526145935, |
|
"logps/chosen": -607.694091796875, |
|
"logps/rejected": -8029.53759765625, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.130295515060425, |
|
"rewards/margins": 71.30926513671875, |
|
"rewards/rejected": -73.43955993652344, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 0.4015693433898369, |
|
"learning_rate": 1.3961362544602212e-07, |
|
"logits/chosen": -1.195718765258789, |
|
"logits/rejected": 2.2943453788757324, |
|
"logps/chosen": -563.174560546875, |
|
"logps/rejected": -6934.9736328125, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8716214895248413, |
|
"rewards/margins": 61.537391662597656, |
|
"rewards/rejected": -63.409019470214844, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7102272727272727, |
|
"grad_norm": 0.304169201785893, |
|
"learning_rate": 1.1789510538684522e-07, |
|
"logits/chosen": -1.2848708629608154, |
|
"logits/rejected": 2.0380046367645264, |
|
"logps/chosen": -532.541015625, |
|
"logps/rejected": -8912.1201171875, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8050743341445923, |
|
"rewards/margins": 80.02030944824219, |
|
"rewards/rejected": -81.82538604736328, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7102272727272727, |
|
"eval_logits/chosen": -1.2004659175872803, |
|
"eval_logits/rejected": 2.2580416202545166, |
|
"eval_logps/chosen": -549.9619750976562, |
|
"eval_logps/rejected": -7654.19775390625, |
|
"eval_loss": 0.0007973507163114846, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.829802393913269, |
|
"eval_rewards/margins": 68.30428314208984, |
|
"eval_rewards/rejected": -70.13408660888672, |
|
"eval_runtime": 195.5911, |
|
"eval_samples_per_second": 19.965, |
|
"eval_steps_per_second": 0.317, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7386363636363636, |
|
"grad_norm": 0.5568394356105595, |
|
"learning_rate": 9.748121349736891e-08, |
|
"logits/chosen": -1.168460488319397, |
|
"logits/rejected": 2.0924360752105713, |
|
"logps/chosen": -588.483642578125, |
|
"logps/rejected": -7967.40478515625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.046555995941162, |
|
"rewards/margins": 70.84620666503906, |
|
"rewards/rejected": -72.89276885986328, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7670454545454546, |
|
"grad_norm": 0.07097046676874795, |
|
"learning_rate": 7.857355122839673e-08, |
|
"logits/chosen": -1.3962290287017822, |
|
"logits/rejected": 1.8883154392242432, |
|
"logps/chosen": -566.6204833984375, |
|
"logps/rejected": -7627.25634765625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8577096462249756, |
|
"rewards/margins": 67.72525024414062, |
|
"rewards/rejected": -69.58296203613281, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7954545454545454, |
|
"grad_norm": 0.05520595932113315, |
|
"learning_rate": 6.135884496044244e-08, |
|
"logits/chosen": -1.3116520643234253, |
|
"logits/rejected": 2.1938819885253906, |
|
"logps/chosen": -557.5162353515625, |
|
"logps/rejected": -7626.7578125, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7834333181381226, |
|
"rewards/margins": 68.03524017333984, |
|
"rewards/rejected": -69.81867980957031, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8238636363636364, |
|
"grad_norm": 0.05646545972920247, |
|
"learning_rate": 4.600710195020982e-08, |
|
"logits/chosen": -1.1199910640716553, |
|
"logits/rejected": 2.300516128540039, |
|
"logps/chosen": -577.44482421875, |
|
"logps/rejected": -7700.6279296875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8830159902572632, |
|
"rewards/margins": 68.71068572998047, |
|
"rewards/rejected": -70.59370422363281, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"grad_norm": 0.5065392892121787, |
|
"learning_rate": 3.2669931390104374e-08, |
|
"logits/chosen": -1.1116163730621338, |
|
"logits/rejected": 2.1315221786499023, |
|
"logps/chosen": -531.3294677734375, |
|
"logps/rejected": -8432.990234375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7507492303848267, |
|
"rewards/margins": 76.37090301513672, |
|
"rewards/rejected": -78.12163543701172, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"eval_logits/chosen": -0.9558252096176147, |
|
"eval_logits/rejected": 2.2921457290649414, |
|
"eval_logps/chosen": -557.7684936523438, |
|
"eval_logps/rejected": -8073.39990234375, |
|
"eval_loss": 0.0005955722881481051, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.907867670059204, |
|
"eval_rewards/margins": 72.41824340820312, |
|
"eval_rewards/rejected": -74.32611846923828, |
|
"eval_runtime": 196.4288, |
|
"eval_samples_per_second": 19.88, |
|
"eval_steps_per_second": 0.316, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8806818181818182, |
|
"grad_norm": 0.11084443137467888, |
|
"learning_rate": 2.147904716149135e-08, |
|
"logits/chosen": -0.904153048992157, |
|
"logits/rejected": 2.1967928409576416, |
|
"logps/chosen": -541.1990966796875, |
|
"logps/rejected": -8187.125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7913249731063843, |
|
"rewards/margins": 73.07491302490234, |
|
"rewards/rejected": -74.86624145507812, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.06043015851279257, |
|
"learning_rate": 1.254496706805433e-08, |
|
"logits/chosen": -1.1182914972305298, |
|
"logits/rejected": 2.186739206314087, |
|
"logps/chosen": -570.1544189453125, |
|
"logps/rejected": -8264.416015625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8748136758804321, |
|
"rewards/margins": 74.24952697753906, |
|
"rewards/rejected": -76.12433624267578, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 0.14908479424865714, |
|
"learning_rate": 5.955921395237318e-09, |
|
"logits/chosen": -0.9981291890144348, |
|
"logits/rejected": 2.14846134185791, |
|
"logps/chosen": -528.0042114257812, |
|
"logps/rejected": -8246.408203125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.727360486984253, |
|
"rewards/margins": 74.25918579101562, |
|
"rewards/rejected": -75.98654174804688, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9659090909090909, |
|
"grad_norm": 0.18250364966205163, |
|
"learning_rate": 1.7769815745066474e-09, |
|
"logits/chosen": -1.248327612876892, |
|
"logits/rejected": 2.005960702896118, |
|
"logps/chosen": -543.0258178710938, |
|
"logps/rejected": -7556.515625, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7404506206512451, |
|
"rewards/margins": 67.73704528808594, |
|
"rewards/rejected": -69.47749328613281, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9943181818181818, |
|
"grad_norm": 1.0466321976770805, |
|
"learning_rate": 4.9417557483610875e-11, |
|
"logits/chosen": -0.9777113199234009, |
|
"logits/rejected": 2.094377040863037, |
|
"logps/chosen": -576.5343017578125, |
|
"logps/rejected": -8160.74853515625, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.0226657390594482, |
|
"rewards/margins": 73.10923767089844, |
|
"rewards/rejected": -75.13190460205078, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9943181818181818, |
|
"eval_logits/chosen": -0.908790111541748, |
|
"eval_logits/rejected": 2.3223326206207275, |
|
"eval_logps/chosen": -561.191650390625, |
|
"eval_logps/rejected": -8146.36962890625, |
|
"eval_loss": 0.0005959240952506661, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.9420990943908691, |
|
"eval_rewards/margins": 73.11370849609375, |
|
"eval_rewards/rejected": -75.0558090209961, |
|
"eval_runtime": 196.8349, |
|
"eval_samples_per_second": 19.839, |
|
"eval_steps_per_second": 0.315, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 352, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0465552191166022, |
|
"train_runtime": 10297.8682, |
|
"train_samples_per_second": 4.37, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 352, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|